summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp18
1 files changed, 16 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index fd39b8a1350c..7a519117f248 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -463,6 +463,10 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
NewOpcode = AMDGPU::V_FMAAK_F16_fake16;
break;
+ case AMDGPU::V_FMA_F64_e64:
+ if (ST->hasFmaakFmamkF64Insts())
+ NewOpcode = AMDGPU::V_FMAAK_F64;
+ break;
}
}
@@ -497,6 +501,10 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
NewOpcode = AMDGPU::V_FMAMK_F16_fake16;
break;
+ case AMDGPU::V_FMA_F64_e64:
+ if (ST->hasFmaakFmamkF64Insts())
+ NewOpcode = AMDGPU::V_FMAMK_F64;
+ break;
}
}
@@ -961,7 +969,9 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_t16_e64 ||
- MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64) {
+ MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64 ||
+ (MI.getOpcode() == AMDGPU::V_FMA_F64_e64 &&
+ ST->hasFmaakFmamkF64Insts())) {
shrinkMadFma(MI);
continue;
}
@@ -1058,7 +1068,11 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
// fold an immediate into the shrunk instruction as a literal operand. In
// GFX10 VOP3 instructions can take a literal operand anyway, so there is
// no advantage to doing this.
- if (ST->hasVOP3Literal() && !IsPostRA)
+ // However, if 64-bit literals are allowed we still need to shrink it
+ // for such literal to be able to fold.
+ if (ST->hasVOP3Literal() &&
+ (!ST->has64BitLiterals() || AMDGPU::isTrue16Inst(MI.getOpcode())) &&
+ !IsPostRA)
continue;
if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&