diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 586de433ea28..3281eabcd4ad 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -5423,6 +5423,14 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, MI.eraseFromParent(); return BB; } + case AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM: { + // Set SCC to true, in case the barrier instruction gets converted to a NOP. + BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(), + TII->get(AMDGPU::S_CMP_EQ_U32)) + .addImm(0) + .addImm(0); + return BB; + } case AMDGPU::GET_GROUPSTATICSIZE: { assert(getTargetMachine().getTargetTriple().getOS() == Triple::AMDHSA || getTargetMachine().getTargetTriple().getOS() == Triple::AMDPAL); @@ -10858,7 +10866,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { (AS == AMDGPUAS::GLOBAL_ADDRESS && Subtarget->getScalarizeGlobalBehavior() && Load->isSimple() && isMemOpHasNoClobberedMemOperand(Load))) { - if ((!Op->isDivergent() || AMDGPUInstrInfo::isUniformMMO(MMO)) && + if ((!Op->isDivergent() || AMDGPU::isUniformMMO(MMO)) && Alignment >= Align(4) && NumElements < 32) { if (MemVT.isPow2VectorType() || (Subtarget->hasScalarDwordx3Loads() && NumElements == 3)) @@ -15181,6 +15189,12 @@ SDValue SITargetLowering::performFMulCombine(SDNode *N, EVT ScalarVT = VT.getScalarType(); EVT IntVT = VT.changeElementType(MVT::i32); + if (!N->isDivergent() && getSubtarget()->hasSALUFloatInsts() && + (ScalarVT == MVT::f32 || ScalarVT == MVT::f16)) { + // Prefer to use s_mul_f16/f32 instead of v_ldexp_f16/f32. + return SDValue(); + } + SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); |
