diff options
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 74 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/dagcombine-nsz-relaxations.ll | 48 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AArch64/fptosi-sitofp-to-ftrunc.ll | 85 |
3 files changed, 194 insertions, 13 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6bf9008c3d67..3b5d4ad11b6d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -17993,7 +17993,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // (fsub A, 0) -> A if (N1CFP && N1CFP->isZero()) { - if (!N1CFP->isNegative() || Flags.hasNoSignedZeros()) { + if (!N1CFP->isNegative() || Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(N0)) { return N0; } } @@ -18022,13 +18022,17 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { } } - if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() && - N1.getOpcode() == ISD::FADD) { + // X - (X + Y) -> -Y is valid when: + // 1. NoSignedZeros is enabled (globally or via flags), OR + // 2. Y is known to never be zero (preventing X == Y case that could produce signed zero) + if (Flags.hasAllowReassociation() && N1.getOpcode() == ISD::FADD) { // X - (X + Y) -> -Y - if (N0 == N1->getOperand(0)) + if (N0 == N1->getOperand(0) && + (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(N1->getOperand(1)))) return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1)); // X - (Y + X) -> -Y - if (N0 == N1->getOperand(1)) + if (N0 == N1->getOperand(1) && + (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(N1->getOperand(0)))) return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0)); } @@ -18337,8 +18341,9 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) { } if (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs()) { - if (N->getFlags().hasNoSignedZeros() || - (N2CFP && !N2CFP->isExactlyValue(-0.0))) { + if (N->getFlags().hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(N2) || + (N2CFP && !N2CFP->isExactlyValue(-0.0)) || + DAG.isKnownNeverZeroFloat(N2)) { if (N0CFP && N0CFP->isZero()) return N2; if (N1CFP && N1CFP->isZero()) @@ -18870,6 +18875,35 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) { return SDValue(); } +/// Check if a use of a floating-point value doesn't care about the sign of zero. +/// This allows us to optimize (sitofp (fptosi x)) -> ftrunc(x) even without +/// NoSignedZerosFPMath, as long as all uses are sign-insensitive. +static bool isSignInsensitiveUse(SDNode *Use, unsigned OperandNo, SelectionDAG &DAG) { + switch (Use->getOpcode()) { + // Comparisons: IEEE 754 specifies +0.0 == -0.0. + case ISD::SETCC: + // fabs always produces +0.0. + case ISD::FABS: + return true; + // Arithmetic with non-zero constants overwrites the sign. + case ISD::FADD: + case ISD::FSUB: { + SDValue Other = Use->getOperand(1 - OperandNo); + return DAG.isKnownNeverZeroFloat(Other); + } + default: + return false; + } +} + +/// Check if all uses of a value are insensitive to the sign of zero. +static bool allUsesSignInsensitive(SDValue V, SelectionDAG &DAG) { + return all_of(V->uses(), [&](SDUse &Use) { + SDNode *User = Use.getUser(); + unsigned OperandNo = Use.getOperandNo(); + return isSignInsensitiveUse(User, OperandNo, DAG); + }); +} static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, const TargetLowering &TLI) { @@ -18885,18 +18919,24 @@ static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG, if (!TLI.isOperationLegal(ISD::FTRUNC, VT)) return SDValue(); + // Check if we can ignore signed zeros via global flag OR use-based analysis. + bool CanIgnoreSignedZeros = DAG.getTarget().Options.NoSignedZerosFPMath || + allUsesSignInsensitive(SDValue(N, 0), DAG); + if (!CanIgnoreSignedZeros) + return SDValue(); + // fptosi/fptoui round towards zero, so converting from FP to integer and // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X SDValue N0 = N->getOperand(0); if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT && N0.getOperand(0).getValueType() == VT) { - if (DAG.getTarget().Options.NoSignedZerosFPMath) + if (CanIgnoreSignedZeros) return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0)); } if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT && N0.getOperand(0).getValueType() == VT) { - if (DAG.getTarget().Options.NoSignedZerosFPMath) + if (CanIgnoreSignedZeros) return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0)); // Strict math: use FABS to handle negative inputs correctly. @@ -19333,10 +19373,18 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { // FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't // know it was called from a context with a nsz flag if the input fsub does // not. - if (N0.getOpcode() == ISD::FSUB && N->getFlags().hasNoSignedZeros() && - N0.hasOneUse()) { - return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1), - N0.getOperand(0)); + if (N0.getOpcode() == ISD::FSUB && N0.hasOneUse()) { + SDValue X = N0.getOperand(0); + SDValue Y = N0.getOperand(1); + + // Safe if NoSignedZeros, or if we can prove X != Y (avoiding the -0.0 vs +0.0 issue) + // For now, we use a conservative check: if either operand is known never zero, + // then X - Y can't produce a signed zero from X == Y. + if (N->getFlags().hasNoSignedZeros() || + DAG.isKnownNeverZeroFloat(X) || + DAG.isKnownNeverZeroFloat(Y)) { + return DAG.getNode(ISD::FSUB, SDLoc(N), VT, Y, X); + } } if (SimplifyDemandedBits(SDValue(N, 0))) diff --git a/llvm/test/CodeGen/AArch64/dagcombine-nsz-relaxations.ll b/llvm/test/CodeGen/AArch64/dagcombine-nsz-relaxations.ll new file mode 100644 index 000000000000..3ccfb2b6e756 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/dagcombine-nsz-relaxations.ll @@ -0,0 +1,48 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mtriple=aarch64 < %s | FileCheck %s + +; Test DAGCombiner optimizations that can bypass NoSignedZerosFPMath requirement +; by using isKnownNeverZeroFloat analysis. + +; ===== Test 1: fsub A, 0 -> A ===== +; When A is known to be non-zero, we can eliminate the subtraction + +define double @fsub_nonzero_minus_zero(double %x) { +; CHECK-LABEL: fsub_nonzero_minus_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov d1, #1.00000000 +; CHECK-NEXT: fadd d0, d0, d1 +; CHECK-NEXT: ret + %add = fadd double %x, 1.0 + %sub = fsub double %add, 0.0 + ret double %sub +} + +; ===== Test 2: fneg(fsub(A, B)) -> fsub(B, A) ===== +; When A or B is known to be non-zero, we can swap the operands + +define double @fneg_fsub_nonzero_nonzero(double %x) { +; CHECK-LABEL: fneg_fsub_nonzero_nonzero: +; CHECK: // %bb.0: +; CHECK-NEXT: fmov d1, #1.00000000 +; CHECK-NEXT: fadd d0, d0, d1 +; CHECK-NEXT: fmov d1, #2.00000000 +; CHECK-NEXT: fsub d0, d1, d0 +; CHECK-NEXT: ret + %add = fadd double %x, 1.0 + %sub = fsub double %add, 2.0 + %neg = fneg double %sub + ret double %neg +} + +; Negative test: both could be same value, can't optimize +define double @fneg_fsub_maybe_equal(double %x, double %y) { +; CHECK-LABEL: fneg_fsub_maybe_equal: +; CHECK: // %bb.0: +; CHECK-NEXT: fsub d0, d0, d1 +; CHECK-NEXT: fneg d0, d0 +; CHECK-NEXT: ret + %sub = fsub double %x, %y + %neg = fneg double %sub + ret double %neg +} diff --git a/llvm/test/CodeGen/AArch64/fptosi-sitofp-to-ftrunc.ll b/llvm/test/CodeGen/AArch64/fptosi-sitofp-to-ftrunc.ll new file mode 100644 index 000000000000..cb4d3aaf8e5a --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fptosi-sitofp-to-ftrunc.ll @@ -0,0 +1,85 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64 < %s | FileCheck %s + +; Test that (sitofp (fptosi x)) can be optimized to ftrunc when all uses are +; either insenstive to the sign-bit (comparison) or that they will fix the +; sign-bit when the original value is in the range of (-1.0, 0.0) and then the +; rounding mode can affect the result. + +define double @fptosi_sitofp_never_zero_add(double %x) { +; CHECK-LABEL: fptosi_sitofp_never_zero_add: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz d0, d0 +; CHECK-NEXT: fmov d1, #1.00000000 +; CHECK-NEXT: fadd d0, d0, d1 +; CHECK-NEXT: ret + %conv1 = fptosi double %x to i32 + %conv2 = sitofp i32 %conv1 to double + %add = fadd double %conv2, 1.0 + ret double %add +} + +define i1 @fptosi_sitofp_compare(double %x) { +; CHECK-LABEL: fptosi_sitofp_compare: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz d0, d0 +; CHECK-NEXT: fcmp d0, #0.0 +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret + %conv1 = fptosi double %x to i32 + %conv2 = sitofp i32 %conv1 to double + %cmp = fcmp oeq double %conv2, 0.0 + ret i1 %cmp +} + +define double @fptosi_sitofp_fabs(double %x) { +; CHECK-LABEL: fptosi_sitofp_fabs: +; CHECK: // %bb.0: +; CHECK-NEXT: frintz d0, d0 +; CHECK-NEXT: fabs d0, d0 +; CHECK-NEXT: ret + %conv1 = fptosi double %x to i32 + %conv2 = sitofp i32 %conv1 to double + %abs = call double @llvm.fabs.f64(double %conv2) + ret double %abs +} + +define double @fptosi_sitofp_mul(double %x) { +; CHECK-LABEL: fptosi_sitofp_mul: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: scvtf d0, w8 +; CHECK-NEXT: fadd d0, d0, d0 +; CHECK-NEXT: ret + %conv1 = fptosi double %x to i32 + %conv2 = sitofp i32 %conv1 to double + %mul = fmul double %conv2, 2.0 + ret double %mul +} + +define double @fptosi_sitofp_could_be_zero(double %x) { +; CHECK-LABEL: fptosi_sitofp_could_be_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: scvtf d0, w8 +; CHECK-NEXT: ret + %conv1 = fptosi double %x to i32 + %conv2 = sitofp i32 %conv1 to double + ret double %conv2 +} + +define double @fptosi_sitofp_add_zero(double %x) { +; CHECK-LABEL: fptosi_sitofp_add_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: movi d1, #0000000000000000 +; CHECK-NEXT: scvtf d0, w8 +; CHECK-NEXT: fadd d0, d0, d1 +; CHECK-NEXT: ret + %conv1 = fptosi double %x to i32 + %conv2 = sitofp i32 %conv1 to double + %add = fadd double %conv2, 0.0 + ret double %add +} + +declare double @llvm.fabs.f64(double) |
