diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp | 18 |
1 files changed, 16 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp index fd39b8a1350c..7a519117f248 100644 --- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp +++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp @@ -463,6 +463,10 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const { case AMDGPU::V_FMA_F16_gfx9_fake16_e64: NewOpcode = AMDGPU::V_FMAAK_F16_fake16; break; + case AMDGPU::V_FMA_F64_e64: + if (ST->hasFmaakFmamkF64Insts()) + NewOpcode = AMDGPU::V_FMAAK_F64; + break; } } @@ -497,6 +501,10 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const { case AMDGPU::V_FMA_F16_gfx9_fake16_e64: NewOpcode = AMDGPU::V_FMAMK_F16_fake16; break; + case AMDGPU::V_FMA_F64_e64: + if (ST->hasFmaakFmamkF64Insts()) + NewOpcode = AMDGPU::V_FMAMK_F64; + break; } } @@ -961,7 +969,9 @@ bool SIShrinkInstructions::run(MachineFunction &MF) { MI.getOpcode() == AMDGPU::V_FMA_F16_e64 || MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 || MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_t16_e64 || - MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64) { + MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64 || + (MI.getOpcode() == AMDGPU::V_FMA_F64_e64 && + ST->hasFmaakFmamkF64Insts())) { shrinkMadFma(MI); continue; } @@ -1058,7 +1068,11 @@ bool SIShrinkInstructions::run(MachineFunction &MF) { // fold an immediate into the shrunk instruction as a literal operand. In // GFX10 VOP3 instructions can take a literal operand anyway, so there is // no advantage to doing this. - if (ST->hasVOP3Literal() && !IsPostRA) + // However, if 64-bit literals are allowed we still need to shrink it + // for such literal to be able to fold. + if (ST->hasVOP3Literal() && + (!ST->has64BitLiterals() || AMDGPU::isTrue16Inst(MI.getOpcode())) && + !IsPostRA) continue; if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) && |
