summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp74
-rw-r--r--llvm/test/CodeGen/AArch64/dagcombine-nsz-relaxations.ll48
-rw-r--r--llvm/test/CodeGen/AArch64/fptosi-sitofp-to-ftrunc.ll85
3 files changed, 194 insertions, 13 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6bf9008c3d67..3b5d4ad11b6d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -17993,7 +17993,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// (fsub A, 0) -> A
if (N1CFP && N1CFP->isZero()) {
- if (!N1CFP->isNegative() || Flags.hasNoSignedZeros()) {
+ if (!N1CFP->isNegative() || Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(N0)) {
return N0;
}
}
@@ -18022,13 +18022,17 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
}
- if (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros() &&
- N1.getOpcode() == ISD::FADD) {
+ // X - (X + Y) -> -Y is valid when:
+ // 1. NoSignedZeros is enabled (globally or via flags), OR
+ // 2. Y is known to never be zero (preventing X == Y case that could produce signed zero)
+ if (Flags.hasAllowReassociation() && N1.getOpcode() == ISD::FADD) {
// X - (X + Y) -> -Y
- if (N0 == N1->getOperand(0))
+ if (N0 == N1->getOperand(0) &&
+ (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(N1->getOperand(1))))
return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1));
// X - (Y + X) -> -Y
- if (N0 == N1->getOperand(1))
+ if (N0 == N1->getOperand(1) &&
+ (Flags.hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(N1->getOperand(0))))
return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0));
}
@@ -18337,8 +18341,9 @@ template <class MatchContextClass> SDValue DAGCombiner::visitFMA(SDNode *N) {
}
if (N->getFlags().hasNoNaNs() && N->getFlags().hasNoInfs()) {
- if (N->getFlags().hasNoSignedZeros() ||
- (N2CFP && !N2CFP->isExactlyValue(-0.0))) {
+ if (N->getFlags().hasNoSignedZeros() || DAG.isKnownNeverZeroFloat(N2) ||
+ (N2CFP && !N2CFP->isExactlyValue(-0.0)) ||
+ DAG.isKnownNeverZeroFloat(N2)) {
if (N0CFP && N0CFP->isZero())
return N2;
if (N1CFP && N1CFP->isZero())
@@ -18870,6 +18875,35 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
return SDValue();
}
+/// Check if a use of a floating-point value doesn't care about the sign of zero.
+/// This allows us to optimize (sitofp (fptosi x)) -> ftrunc(x) even without
+/// NoSignedZerosFPMath, as long as all uses are sign-insensitive.
+static bool isSignInsensitiveUse(SDNode *Use, unsigned OperandNo, SelectionDAG &DAG) {
+ switch (Use->getOpcode()) {
+ // Comparisons: IEEE 754 specifies +0.0 == -0.0.
+ case ISD::SETCC:
+ // fabs always produces +0.0.
+ case ISD::FABS:
+ return true;
+ // Arithmetic with non-zero constants overwrites the sign.
+ case ISD::FADD:
+ case ISD::FSUB: {
+ SDValue Other = Use->getOperand(1 - OperandNo);
+ return DAG.isKnownNeverZeroFloat(Other);
+ }
+ default:
+ return false;
+ }
+}
+
+/// Check if all uses of a value are insensitive to the sign of zero.
+static bool allUsesSignInsensitive(SDValue V, SelectionDAG &DAG) {
+ return all_of(V->uses(), [&](SDUse &Use) {
+ SDNode *User = Use.getUser();
+ unsigned OperandNo = Use.getOperandNo();
+ return isSignInsensitiveUse(User, OperandNo, DAG);
+ });
+}
static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
const TargetLowering &TLI) {
@@ -18885,18 +18919,24 @@ static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
if (!TLI.isOperationLegal(ISD::FTRUNC, VT))
return SDValue();
+ // Check if we can ignore signed zeros via global flag OR use-based analysis.
+ bool CanIgnoreSignedZeros = DAG.getTarget().Options.NoSignedZerosFPMath ||
+ allUsesSignInsensitive(SDValue(N, 0), DAG);
+ if (!CanIgnoreSignedZeros)
+ return SDValue();
+
// fptosi/fptoui round towards zero, so converting from FP to integer and
// back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
SDValue N0 = N->getOperand(0);
if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
N0.getOperand(0).getValueType() == VT) {
- if (DAG.getTarget().Options.NoSignedZerosFPMath)
+ if (CanIgnoreSignedZeros)
return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
}
if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
N0.getOperand(0).getValueType() == VT) {
- if (DAG.getTarget().Options.NoSignedZerosFPMath)
+ if (CanIgnoreSignedZeros)
return DAG.getNode(ISD::FTRUNC, DL, VT, N0.getOperand(0));
// Strict math: use FABS to handle negative inputs correctly.
@@ -19333,10 +19373,18 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
// FIXME: This is duplicated in getNegatibleCost, but getNegatibleCost doesn't
// know it was called from a context with a nsz flag if the input fsub does
// not.
- if (N0.getOpcode() == ISD::FSUB && N->getFlags().hasNoSignedZeros() &&
- N0.hasOneUse()) {
- return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0.getOperand(1),
- N0.getOperand(0));
+ if (N0.getOpcode() == ISD::FSUB && N0.hasOneUse()) {
+ SDValue X = N0.getOperand(0);
+ SDValue Y = N0.getOperand(1);
+
+ // Safe if NoSignedZeros, or if we can prove X != Y (avoiding the -0.0 vs +0.0 issue)
+ // For now, we use a conservative check: if either operand is known never zero,
+ // then X - Y can't produce a signed zero from X == Y.
+ if (N->getFlags().hasNoSignedZeros() ||
+ DAG.isKnownNeverZeroFloat(X) ||
+ DAG.isKnownNeverZeroFloat(Y)) {
+ return DAG.getNode(ISD::FSUB, SDLoc(N), VT, Y, X);
+ }
}
if (SimplifyDemandedBits(SDValue(N, 0)))
diff --git a/llvm/test/CodeGen/AArch64/dagcombine-nsz-relaxations.ll b/llvm/test/CodeGen/AArch64/dagcombine-nsz-relaxations.ll
new file mode 100644
index 000000000000..3ccfb2b6e756
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/dagcombine-nsz-relaxations.ll
@@ -0,0 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+; Test DAGCombiner optimizations that can bypass NoSignedZerosFPMath requirement
+; by using isKnownNeverZeroFloat analysis.
+
+; ===== Test 1: fsub A, 0 -> A =====
+; When A is known to be non-zero, we can eliminate the subtraction
+
+define double @fsub_nonzero_minus_zero(double %x) {
+; CHECK-LABEL: fsub_nonzero_minus_zero:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d1, #1.00000000
+; CHECK-NEXT: fadd d0, d0, d1
+; CHECK-NEXT: ret
+ %add = fadd double %x, 1.0
+ %sub = fsub double %add, 0.0
+ ret double %sub
+}
+
+; ===== Test 2: fneg(fsub(A, B)) -> fsub(B, A) =====
+; When A or B is known to be non-zero, we can swap the operands
+
+define double @fneg_fsub_nonzero_nonzero(double %x) {
+; CHECK-LABEL: fneg_fsub_nonzero_nonzero:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d1, #1.00000000
+; CHECK-NEXT: fadd d0, d0, d1
+; CHECK-NEXT: fmov d1, #2.00000000
+; CHECK-NEXT: fsub d0, d1, d0
+; CHECK-NEXT: ret
+ %add = fadd double %x, 1.0
+ %sub = fsub double %add, 2.0
+ %neg = fneg double %sub
+ ret double %neg
+}
+
+; Negative test: both could be same value, can't optimize
+define double @fneg_fsub_maybe_equal(double %x, double %y) {
+; CHECK-LABEL: fneg_fsub_maybe_equal:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fsub d0, d0, d1
+; CHECK-NEXT: fneg d0, d0
+; CHECK-NEXT: ret
+ %sub = fsub double %x, %y
+ %neg = fneg double %sub
+ ret double %neg
+}
diff --git a/llvm/test/CodeGen/AArch64/fptosi-sitofp-to-ftrunc.ll b/llvm/test/CodeGen/AArch64/fptosi-sitofp-to-ftrunc.ll
new file mode 100644
index 000000000000..cb4d3aaf8e5a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fptosi-sitofp-to-ftrunc.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+; Test that (sitofp (fptosi x)) can be optimized to ftrunc when all uses are
+; either insenstive to the sign-bit (comparison) or that they will fix the
+; sign-bit when the original value is in the range of (-1.0, 0.0) and then the
+; rounding mode can affect the result.
+
+define double @fptosi_sitofp_never_zero_add(double %x) {
+; CHECK-LABEL: fptosi_sitofp_never_zero_add:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintz d0, d0
+; CHECK-NEXT: fmov d1, #1.00000000
+; CHECK-NEXT: fadd d0, d0, d1
+; CHECK-NEXT: ret
+ %conv1 = fptosi double %x to i32
+ %conv2 = sitofp i32 %conv1 to double
+ %add = fadd double %conv2, 1.0
+ ret double %add
+}
+
+define i1 @fptosi_sitofp_compare(double %x) {
+; CHECK-LABEL: fptosi_sitofp_compare:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintz d0, d0
+; CHECK-NEXT: fcmp d0, #0.0
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %conv1 = fptosi double %x to i32
+ %conv2 = sitofp i32 %conv1 to double
+ %cmp = fcmp oeq double %conv2, 0.0
+ ret i1 %cmp
+}
+
+define double @fptosi_sitofp_fabs(double %x) {
+; CHECK-LABEL: fptosi_sitofp_fabs:
+; CHECK: // %bb.0:
+; CHECK-NEXT: frintz d0, d0
+; CHECK-NEXT: fabs d0, d0
+; CHECK-NEXT: ret
+ %conv1 = fptosi double %x to i32
+ %conv2 = sitofp i32 %conv1 to double
+ %abs = call double @llvm.fabs.f64(double %conv2)
+ ret double %abs
+}
+
+define double @fptosi_sitofp_mul(double %x) {
+; CHECK-LABEL: fptosi_sitofp_mul:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs w8, d0
+; CHECK-NEXT: scvtf d0, w8
+; CHECK-NEXT: fadd d0, d0, d0
+; CHECK-NEXT: ret
+ %conv1 = fptosi double %x to i32
+ %conv2 = sitofp i32 %conv1 to double
+ %mul = fmul double %conv2, 2.0
+ ret double %mul
+}
+
+define double @fptosi_sitofp_could_be_zero(double %x) {
+; CHECK-LABEL: fptosi_sitofp_could_be_zero:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs w8, d0
+; CHECK-NEXT: scvtf d0, w8
+; CHECK-NEXT: ret
+ %conv1 = fptosi double %x to i32
+ %conv2 = sitofp i32 %conv1 to double
+ ret double %conv2
+}
+
+define double @fptosi_sitofp_add_zero(double %x) {
+; CHECK-LABEL: fptosi_sitofp_add_zero:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcvtzs w8, d0
+; CHECK-NEXT: movi d1, #0000000000000000
+; CHECK-NEXT: scvtf d0, w8
+; CHECK-NEXT: fadd d0, d0, d1
+; CHECK-NEXT: ret
+ %conv1 = fptosi double %x to i32
+ %conv2 = sitofp i32 %conv1 to double
+ %add = fadd double %conv2, 0.0
+ ret double %add
+}
+
+declare double @llvm.fabs.f64(double)