summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp16
1 files changed, 15 insertions, 1 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 586de433ea28..3281eabcd4ad 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5423,6 +5423,14 @@ SITargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
MI.eraseFromParent();
return BB;
}
+ case AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM: {
+ // Set SCC to true, in case the barrier instruction gets converted to a NOP.
+ BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(),
+ TII->get(AMDGPU::S_CMP_EQ_U32))
+ .addImm(0)
+ .addImm(0);
+ return BB;
+ }
case AMDGPU::GET_GROUPSTATICSIZE: {
assert(getTargetMachine().getTargetTriple().getOS() == Triple::AMDHSA ||
getTargetMachine().getTargetTriple().getOS() == Triple::AMDPAL);
@@ -10858,7 +10866,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
(AS == AMDGPUAS::GLOBAL_ADDRESS &&
Subtarget->getScalarizeGlobalBehavior() && Load->isSimple() &&
isMemOpHasNoClobberedMemOperand(Load))) {
- if ((!Op->isDivergent() || AMDGPUInstrInfo::isUniformMMO(MMO)) &&
+ if ((!Op->isDivergent() || AMDGPU::isUniformMMO(MMO)) &&
Alignment >= Align(4) && NumElements < 32) {
if (MemVT.isPow2VectorType() ||
(Subtarget->hasScalarDwordx3Loads() && NumElements == 3))
@@ -15181,6 +15189,12 @@ SDValue SITargetLowering::performFMulCombine(SDNode *N,
EVT ScalarVT = VT.getScalarType();
EVT IntVT = VT.changeElementType(MVT::i32);
+ if (!N->isDivergent() && getSubtarget()->hasSALUFloatInsts() &&
+ (ScalarVT == MVT::f32 || ScalarVT == MVT::f16)) {
+ // Prefer to use s_mul_f16/f32 instead of v_ldexp_f16/f32.
+ return SDValue();
+ }
+
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);