summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMarius Kamp <msk@posteo.org>2025-11-18 18:36:18 +0100
committerGitHub <noreply@github.com>2025-11-18 17:36:18 +0000
commit94e9bfb80365de0c9c71303418b33ceb767f7cf9 (patch)
treee35c32b19506d58e82885067f3ee642d0cf65e92
parentb53371210fcf1f23d1f87e5727fdf1e9aefa674f (diff)
[AArch64] Reorder Comparison Trees to Facilitate CSE (#168064)
The AArch64 backend converts trees formed by conjunctions/disjunctions of comparisons into sequences of `CCMP` instructions. The implementation before this change checks whether a sub-tree must be processed first. If not, it processes the operations in the order they occur in the DAG. This may not be optimal if there is a corresponding `SUB` node for one of the comparisons. In this case, we should process this comparison first because we can then use the same instruction for the `SUB` node and the comparison. To achieve this, this commit comprises the following changes: - Extend `canEmitConjunction` with a new output parameter `PreferFirst`, which reports to the caller whether the sub-tree should preferably be processed first. - Set `PreferFirst` to `true` if we can find a corresponding `SUB` node in the DAG. - If we can process a sub-tree with `PreferFirst = true` first (i.e., we do not violate any `MustBeFirst` constraint by doing so), we swap the sub-trees. - The already existing code for performing the common subexpression elimination takes care to use only a single instruction for the comparison and the `SUB` node if possible. Closes #149685.
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp41
-rw-r--r--llvm/test/CodeGen/AArch64/ccmp-cse.ll139
2 files changed, 170 insertions, 10 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d21e19b2ecd4..8f41f230b552 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3886,22 +3886,30 @@ static SDValue emitConditionalComparison(SDValue LHS, SDValue RHS,
/// \param MustBeFirst Set to true if this subtree needs to be negated and we
/// cannot do the negation naturally. We are required to
/// emit the subtree first in this case.
+/// \param PreferFirst Set to true if processing this subtree first may
+/// result in more efficient code.
/// \param WillNegate Is true if are called when the result of this
/// subexpression must be negated. This happens when the
/// outer expression is an OR. We can use this fact to know
/// that we have a double negation (or (or ...) ...) that
/// can be implemented for free.
-static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
- bool &MustBeFirst, bool WillNegate,
+static bool canEmitConjunction(SelectionDAG &DAG, const SDValue Val,
+ bool &CanNegate, bool &MustBeFirst,
+ bool &PreferFirst, bool WillNegate,
unsigned Depth = 0) {
if (!Val.hasOneUse())
return false;
unsigned Opcode = Val->getOpcode();
if (Opcode == ISD::SETCC) {
- if (Val->getOperand(0).getValueType() == MVT::f128)
+ EVT VT = Val->getOperand(0).getValueType();
+ if (VT == MVT::f128)
return false;
CanNegate = true;
MustBeFirst = false;
+ // Designate this operation as a preferred first operation if the result
+ // of a SUB operation can be reused.
+ PreferFirst = DAG.doesNodeExist(ISD::SUB, DAG.getVTList(VT),
+ {Val->getOperand(0), Val->getOperand(1)});
return true;
}
// Protect against exponential runtime and stack overflow.
@@ -3913,11 +3921,15 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
SDValue O1 = Val->getOperand(1);
bool CanNegateL;
bool MustBeFirstL;
- if (!canEmitConjunction(O0, CanNegateL, MustBeFirstL, IsOR, Depth+1))
+ bool PreferFirstL;
+ if (!canEmitConjunction(DAG, O0, CanNegateL, MustBeFirstL, PreferFirstL,
+ IsOR, Depth + 1))
return false;
bool CanNegateR;
bool MustBeFirstR;
- if (!canEmitConjunction(O1, CanNegateR, MustBeFirstR, IsOR, Depth+1))
+ bool PreferFirstR;
+ if (!canEmitConjunction(DAG, O1, CanNegateR, MustBeFirstR, PreferFirstR,
+ IsOR, Depth + 1))
return false;
if (MustBeFirstL && MustBeFirstR)
@@ -3940,6 +3952,7 @@ static bool canEmitConjunction(const SDValue Val, bool &CanNegate,
CanNegate = false;
MustBeFirst = MustBeFirstL || MustBeFirstR;
}
+ PreferFirst = PreferFirstL || PreferFirstR;
return true;
}
return false;
@@ -4001,19 +4014,25 @@ static SDValue emitConjunctionRec(SelectionDAG &DAG, SDValue Val,
SDValue LHS = Val->getOperand(0);
bool CanNegateL;
bool MustBeFirstL;
- bool ValidL = canEmitConjunction(LHS, CanNegateL, MustBeFirstL, IsOR);
+ bool PreferFirstL;
+ bool ValidL = canEmitConjunction(DAG, LHS, CanNegateL, MustBeFirstL,
+ PreferFirstL, IsOR);
assert(ValidL && "Valid conjunction/disjunction tree");
(void)ValidL;
SDValue RHS = Val->getOperand(1);
bool CanNegateR;
bool MustBeFirstR;
- bool ValidR = canEmitConjunction(RHS, CanNegateR, MustBeFirstR, IsOR);
+ bool PreferFirstR;
+ bool ValidR = canEmitConjunction(DAG, RHS, CanNegateR, MustBeFirstR,
+ PreferFirstR, IsOR);
assert(ValidR && "Valid conjunction/disjunction tree");
(void)ValidR;
- // Swap sub-tree that must come first to the right side.
- if (MustBeFirstL) {
+ bool ShouldFirstL = PreferFirstL && !PreferFirstR && !MustBeFirstR;
+
+ // Swap sub-tree that must or should come first to the right side.
+ if (MustBeFirstL || ShouldFirstL) {
assert(!MustBeFirstR && "Valid conjunction/disjunction tree");
std::swap(LHS, RHS);
std::swap(CanNegateL, CanNegateR);
@@ -4069,7 +4088,9 @@ static SDValue emitConjunction(SelectionDAG &DAG, SDValue Val,
AArch64CC::CondCode &OutCC) {
bool DummyCanNegate;
bool DummyMustBeFirst;
- if (!canEmitConjunction(Val, DummyCanNegate, DummyMustBeFirst, false))
+ bool DummyPreferFirst;
+ if (!canEmitConjunction(DAG, Val, DummyCanNegate, DummyMustBeFirst,
+ DummyPreferFirst, false))
return SDValue();
return emitConjunctionRec(DAG, Val, OutCC, false, SDValue(), AArch64CC::AL);
diff --git a/llvm/test/CodeGen/AArch64/ccmp-cse.ll b/llvm/test/CodeGen/AArch64/ccmp-cse.ll
new file mode 100644
index 000000000000..657498172a04
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/ccmp-cse.ll
@@ -0,0 +1,139 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
+
+define i64 @test_single_or(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_single_or:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs x8, x2, x1
+; CHECK-NEXT: ccmp x2, x0, #2, hs
+; CHECK-NEXT: csel x0, xzr, x8, hi
+; CHECK-NEXT: ret
+ %cmp.match = icmp ult i64 %y, %x
+ %cmp.nomatch = icmp ugt i64 %y, %unrelated
+ %or.cond = or i1 %cmp.match, %cmp.nomatch
+ %sub.reuse = sub nuw i64 %y, %x
+ %res = select i1 %or.cond, i64 0, i64 %sub.reuse
+ ret i64 %res
+}
+
+define i64 @test_two_ors(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_two_ors:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs x8, x2, x1
+; CHECK-NEXT: ccmp x0, x1, #0, hs
+; CHECK-NEXT: ccmp x2, x0, #2, hs
+; CHECK-NEXT: csel x0, xzr, x8, hi
+; CHECK-NEXT: ret
+ %cmp.match = icmp ult i64 %y, %x
+ %cmp.nomatch1 = icmp ult i64 %unrelated, %x
+ %cmp.nomatch2 = icmp ugt i64 %y, %unrelated
+ %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2
+ %or.cond = or i1 %cmp.match, %or.nomatch
+ %sub.reuse = sub nuw i64 %y, %x
+ %res = select i1 %or.cond, i64 0, i64 %sub.reuse
+ ret i64 %res
+}
+
+define i64 @test_two_ors_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_two_ors_commuted:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs x8, x2, x1
+; CHECK-NEXT: ccmp x0, x1, #0, hs
+; CHECK-NEXT: ccmp x2, x0, #2, hs
+; CHECK-NEXT: csel x0, xzr, x8, hi
+; CHECK-NEXT: ret
+ %cmp.match = icmp ult i64 %y, %x
+ %cmp.nomatch1 = icmp ult i64 %unrelated, %x
+ %cmp.nomatch2 = icmp ugt i64 %y, %unrelated
+ %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2
+ %or.cond = or i1 %or.nomatch, %cmp.match
+ %sub.reuse = sub nuw i64 %y, %x
+ %res = select i1 %or.cond, i64 0, i64 %sub.reuse
+ ret i64 %res
+}
+
+define i64 @test_single_and(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_single_and:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs x8, x2, x1
+; CHECK-NEXT: ccmp x2, x0, #0, lo
+; CHECK-NEXT: csel x0, xzr, x8, hi
+; CHECK-NEXT: ret
+ %cmp.match = icmp ult i64 %y, %x
+ %cmp.nomatch = icmp ugt i64 %y, %unrelated
+ %and.cond = and i1 %cmp.match, %cmp.nomatch
+ %sub.reuse = sub nuw i64 %y, %x
+ %res = select i1 %and.cond, i64 0, i64 %sub.reuse
+ ret i64 %res
+}
+
+define i64 @test_single_or_sub_commuted(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_single_or_sub_commuted:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs x8, x1, x2
+; CHECK-NEXT: ccmp x2, x0, #2, ls
+; CHECK-NEXT: csel x0, xzr, x8, hi
+; CHECK-NEXT: ret
+ %cmp.match = icmp ult i64 %y, %x
+ %cmp.nomatch = icmp ugt i64 %y, %unrelated
+ %or.cond = or i1 %cmp.match, %cmp.nomatch
+ %sub.reuse = sub nuw i64 %x, %y
+ %res = select i1 %or.cond, i64 0, i64 %sub.reuse
+ ret i64 %res
+}
+
+; Negative test: We must negate the or operation, hence this must come first.
+define i64 @test_mustbefirst_overrides_preferfirst_negative(i64 %unrelated, i64 %x, i64 %y) nounwind {
+; CHECK-LABEL: test_mustbefirst_overrides_preferfirst_negative:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmp x2, x0
+; CHECK-NEXT: sub x8, x2, x1
+; CHECK-NEXT: ccmp x0, x1, #0, ls
+; CHECK-NEXT: ccmp x2, x1, #2, lo
+; CHECK-NEXT: csel x0, xzr, x8, lo
+; CHECK-NEXT: ret
+ %cmp.match = icmp ult i64 %y, %x
+ %cmp.nomatch1 = icmp ult i64 %unrelated, %x
+ %cmp.nomatch2 = icmp ugt i64 %y, %unrelated
+ %or.nomatch = or i1 %cmp.nomatch1, %cmp.nomatch2
+ %and.cond = and i1 %or.nomatch, %cmp.match
+ %sub.reuse = sub nuw i64 %y, %x
+ %res = select i1 %and.cond, i64 0, i64 %sub.reuse
+ ret i64 %res
+}
+
+; Negative test: There is no analogue of SUBS for floating point.
+define float @test_negative_float(float %unrelated, float %x, float %y) nounwind {
+; CHECK-LABEL: test_negative_float:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fcmp s2, s0
+; CHECK-NEXT: fsub s0, s2, s1
+; CHECK-NEXT: movi d3, #0000000000000000
+; CHECK-NEXT: fccmp s2, s1, #8, le
+; CHECK-NEXT: fcsel s0, s3, s0, mi
+; CHECK-NEXT: ret
+ %cmp.nomatch1 = fcmp olt float %y, %x
+ %cmp.nomatch2 = fcmp ogt float %y, %unrelated
+ %or.cond = or i1 %cmp.nomatch1, %cmp.nomatch2
+ %sub.noreuse = fsub float %y, %x
+ %res = select i1 %or.cond, float 0.0, float %sub.noreuse
+ ret float %res
+}
+
+; Negative test: If both operands match a sub, do not reorder them.
+define i64 @test_prefer_right_negative(i64 %x, i64 %y, i64 %z) nounwind {
+; CHECK-LABEL: test_prefer_right_negative:
+; CHECK: // %bb.0:
+; CHECK-NEXT: cmp x2, x0
+; CHECK-NEXT: ccmp x2, x1, #0, ls
+; CHECK-NEXT: csel x8, x0, x1, lo
+; CHECK-NEXT: sub x0, x2, x8
+; CHECK-NEXT: ret
+ %cmp.match1 = icmp ult i64 %z, %y
+ %cmp.match2 = icmp ugt i64 %z, %x
+ %or.cond = or i1 %cmp.match1, %cmp.match2
+ %sub.reuse1 = sub nuw i64 %z, %y
+ %sub.reuse2 = sub nuw i64 %z, %x
+ %res = select i1 %or.cond, i64 %sub.reuse2, i64 %sub.reuse1
+ ret i64 %res
+}