diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 90 |
1 files changed, 88 insertions, 2 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index e64d2162441a..3d040fb705a8 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -4006,7 +4006,8 @@ SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine( case Intrinsic::amdgcn_rsq: case Intrinsic::amdgcn_rcp_legacy: case Intrinsic::amdgcn_rsq_legacy: - case Intrinsic::amdgcn_rsq_clamp: { + case Intrinsic::amdgcn_rsq_clamp: + case Intrinsic::amdgcn_tanh: { // FIXME: This is probably wrong. If src is an sNaN, it won't be quieted SDValue Src = N->getOperand(1); return Src.isUndef() ? Src : SDValue(); @@ -4842,11 +4843,94 @@ AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI, return SDValue(); } +// Detect when CMP and SELECT use the same constant and fold them to avoid +// loading the constant twice. Specifically handles patterns like: +// %cmp = icmp eq i32 %val, 4242 +// %sel = select i1 %cmp, i32 4242, i32 %other +// It can be optimized to reuse %val instead of 4242 in select. +static SDValue +foldCmpSelectWithSharedConstant(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, + const AMDGPUSubtarget *ST) { + SDValue Cond = N->getOperand(0); + SDValue TrueVal = N->getOperand(1); + SDValue FalseVal = N->getOperand(2); + + // Check if condition is a comparison. + if (Cond.getOpcode() != ISD::SETCC) + return SDValue(); + + SDValue LHS = Cond.getOperand(0); + SDValue RHS = Cond.getOperand(1); + ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); + + bool isFloatingPoint = LHS.getValueType().isFloatingPoint(); + bool isInteger = LHS.getValueType().isInteger(); + + // Handle simple floating-point and integer types only. + if (!isFloatingPoint && !isInteger) + return SDValue(); + + bool isEquality = CC == (isFloatingPoint ? ISD::SETOEQ : ISD::SETEQ); + bool isNonEquality = CC == (isFloatingPoint ? ISD::SETONE : ISD::SETNE); + if (!isEquality && !isNonEquality) + return SDValue(); + + SDValue ArgVal, ConstVal; + if ((isFloatingPoint && isa<ConstantFPSDNode>(RHS)) || + (isInteger && isa<ConstantSDNode>(RHS))) { + ConstVal = RHS; + ArgVal = LHS; + } else if ((isFloatingPoint && isa<ConstantFPSDNode>(LHS)) || + (isInteger && isa<ConstantSDNode>(LHS))) { + ConstVal = LHS; + ArgVal = RHS; + } else { + return SDValue(); + } + + // Check if constant should not be optimized - early return if not. + if (isFloatingPoint) { + const APFloat &Val = cast<ConstantFPSDNode>(ConstVal)->getValueAPF(); + const GCNSubtarget *GCNST = static_cast<const GCNSubtarget *>(ST); + + // Only optimize normal floating-point values (finite, non-zero, and + // non-subnormal as per IEEE 754), skip optimization for inlinable + // floating-point constants. + if (!Val.isNormal() || GCNST->getInstrInfo()->isInlineConstant(Val)) + return SDValue(); + } else { + int64_t IntVal = cast<ConstantSDNode>(ConstVal)->getSExtValue(); + + // Skip optimization for inlinable integer immediates. + // Inlinable immediates include: -16 to 64 (inclusive). + if (IntVal >= -16 && IntVal <= 64) + return SDValue(); + } + + // For equality and non-equality comparisons, patterns: + // select (setcc x, const), const, y -> select (setcc x, const), x, y + // select (setccinv x, const), y, const -> select (setccinv x, const), y, x + if (!(isEquality && TrueVal == ConstVal) && + !(isNonEquality && FalseVal == ConstVal)) + return SDValue(); + + SDValue SelectLHS = (isEquality && TrueVal == ConstVal) ? ArgVal : TrueVal; + SDValue SelectRHS = + (isNonEquality && FalseVal == ConstVal) ? ArgVal : FalseVal; + return DCI.DAG.getNode(ISD::SELECT, SDLoc(N), N->getValueType(0), Cond, + SelectLHS, SelectRHS); +} + SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const { if (SDValue Folded = foldFreeOpFromSelect(DCI, SDValue(N, 0))) return Folded; + // Try to fold CMP + SELECT patterns with shared constants (both FP and + // integer). + if (SDValue Folded = foldCmpSelectWithSharedConstant(N, DCI, Subtarget)) + return Folded; + SDValue Cond = N->getOperand(0); if (Cond.getOpcode() != ISD::SETCC) return SDValue(); @@ -5733,6 +5817,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(BUILD_VERTICAL_VECTOR) NODE_NAME_CASE(CONST_DATA_PTR) NODE_NAME_CASE(PC_ADD_REL_OFFSET) + NODE_NAME_CASE(PC_ADD_REL_OFFSET64) NODE_NAME_CASE(LDS) NODE_NAME_CASE(DUMMY_CHAIN) NODE_NAME_CASE(LOAD_D16_HI) @@ -6196,7 +6281,8 @@ bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode( case Intrinsic::amdgcn_rsq: case Intrinsic::amdgcn_rcp_legacy: case Intrinsic::amdgcn_rsq_legacy: - case Intrinsic::amdgcn_rsq_clamp: { + case Intrinsic::amdgcn_rsq_clamp: + case Intrinsic::amdgcn_tanh: { if (SNaN) return true; |
