summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@sifive.com>2025-11-22 23:16:31 -0800
committerGitHub <noreply@github.com>2025-11-22 23:16:31 -0800
commitb9107bfc1faa8aa74e736169626e0cf7eb0925ba (patch)
treeacae869b7c93d0eed0ddee7808a94d18f51dd0ef
parentd5f3ab8ec97786476a077b0c8e35c7c337dfddf2 (diff)
[RISCV] Support zilsd-4byte-align for i64 load/store in SelectionDAG. (#169182)
I think we need to keep the SelectionDAG code for volatile load/store so we should support 4 byte alignment when possible.
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.h7
-rw-r--r--llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp4
-rw-r--r--llvm/test/CodeGen/RISCV/zilsd.ll91
4 files changed, 101 insertions, 5 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index dd3225507dde..bc1768ac26c2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -8404,7 +8404,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
if (Store->isTruncatingStore())
return SDValue();
- if (!Subtarget.enableUnalignedScalarMem() && Store->getAlign() < 8)
+ if (Store->getAlign() < Subtarget.getZilsdAlign())
return SDValue();
SDLoc DL(Op);
@@ -14803,7 +14803,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() &&
"Unexpected custom legalisation");
- if (!Subtarget.enableUnalignedScalarMem() && Ld->getAlign() < 8)
+ if (Ld->getAlign() < Subtarget.getZilsdAlign())
return;
SDLoc DL(N);
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 29df53c6c989..b659bb96f2f1 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -237,6 +237,13 @@ public:
return 0;
}
+
+ Align getZilsdAlign() const {
+ return Align(enableUnalignedScalarMem() ? 1
+ : allowZilsd4ByteAlign() ? 4
+ : 8);
+ }
+
unsigned getELen() const {
assert(hasVInstructions() && "Expected V extension");
return hasVInstructionsI64() ? 64 : 32;
diff --git a/llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp
index 99e83fbb05a7..3b47903c351b 100644
--- a/llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp
@@ -146,9 +146,7 @@ bool RISCVPreAllocZilsdOpt::runOnMachineFunction(MachineFunction &MF) {
// Check alignment: default is 8-byte, but allow 4-byte with tune feature
// If unaligned scalar memory is enabled, allow any alignment
- RequiredAlign = STI->enableUnalignedScalarMem() ? Align(1)
- : STI->allowZilsd4ByteAlign() ? Align(4)
- : Align(8);
+ RequiredAlign = STI->getZilsdAlign();
bool Modified = false;
for (auto &MBB : MF) {
Modified |= rescheduleLoadStoreInstrs(&MBB);
diff --git a/llvm/test/CodeGen/RISCV/zilsd.ll b/llvm/test/CodeGen/RISCV/zilsd.ll
index 7f2d2dd12030..27b1ff76f6f0 100644
--- a/llvm/test/CodeGen/RISCV/zilsd.ll
+++ b/llvm/test/CodeGen/RISCV/zilsd.ll
@@ -3,6 +3,8 @@
; RUN: | FileCheck -check-prefixes=CHECK,SLOW %s
; RUN: llc -mtriple=riscv32 -mattr=+zilsd,+unaligned-scalar-mem -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=CHECK,FAST %s
+; RUN: llc -mtriple=riscv32 -mattr=+zilsd,+zilsd-4byte-align -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=CHECK,4BYTEALIGN %s
define i64 @load(ptr %a) nounwind {
; CHECK-LABEL: load:
@@ -17,6 +19,28 @@ define i64 @load(ptr %a) nounwind {
ret i64 %2
}
+define i64 @load_align4(ptr %a) nounwind {
+; SLOW-LABEL: load_align4:
+; SLOW: # %bb.0:
+; SLOW-NEXT: lw a2, 80(a0)
+; SLOW-NEXT: lw a1, 84(a0)
+; SLOW-NEXT: mv a0, a2
+; SLOW-NEXT: ret
+;
+; FAST-LABEL: load_align4:
+; FAST: # %bb.0:
+; FAST-NEXT: ld a0, 80(a0)
+; FAST-NEXT: ret
+;
+; 4BYTEALIGN-LABEL: load_align4:
+; 4BYTEALIGN: # %bb.0:
+; 4BYTEALIGN-NEXT: ld a0, 80(a0)
+; 4BYTEALIGN-NEXT: ret
+ %1 = getelementptr i64, ptr %a, i32 10
+ %2 = load i64, ptr %1, align 4
+ ret i64 %2
+}
+
define void @store(ptr %a, i64 %b) nounwind {
; CHECK-LABEL: store:
; CHECK: # %bb.0:
@@ -31,6 +55,31 @@ define void @store(ptr %a, i64 %b) nounwind {
ret void
}
+define void @store_align4(ptr %a, i64 %b) nounwind {
+; SLOW-LABEL: store_align4:
+; SLOW: # %bb.0:
+; SLOW-NEXT: sw a1, 88(a0)
+; SLOW-NEXT: sw a2, 92(a0)
+; SLOW-NEXT: ret
+;
+; FAST-LABEL: store_align4:
+; FAST: # %bb.0:
+; FAST-NEXT: mv a3, a2
+; FAST-NEXT: mv a2, a1
+; FAST-NEXT: sd a2, 88(a0)
+; FAST-NEXT: ret
+;
+; 4BYTEALIGN-LABEL: store_align4:
+; 4BYTEALIGN: # %bb.0:
+; 4BYTEALIGN-NEXT: mv a3, a2
+; 4BYTEALIGN-NEXT: mv a2, a1
+; 4BYTEALIGN-NEXT: sd a2, 88(a0)
+; 4BYTEALIGN-NEXT: ret
+ %1 = getelementptr i64, ptr %a, i32 11
+ store i64 %b, ptr %1, align 4
+ ret void
+}
+
define i64 @load_unaligned(ptr %p) {
; SLOW-LABEL: load_unaligned:
; SLOW: # %bb.0:
@@ -60,6 +109,30 @@ define i64 @load_unaligned(ptr %p) {
; FAST: # %bb.0:
; FAST-NEXT: ld a0, 0(a0)
; FAST-NEXT: ret
+;
+; 4BYTEALIGN-LABEL: load_unaligned:
+; 4BYTEALIGN: # %bb.0:
+; 4BYTEALIGN-NEXT: lbu a1, 1(a0)
+; 4BYTEALIGN-NEXT: lbu a2, 2(a0)
+; 4BYTEALIGN-NEXT: lbu a3, 3(a0)
+; 4BYTEALIGN-NEXT: lbu a4, 0(a0)
+; 4BYTEALIGN-NEXT: slli a1, a1, 8
+; 4BYTEALIGN-NEXT: slli a2, a2, 16
+; 4BYTEALIGN-NEXT: slli a3, a3, 24
+; 4BYTEALIGN-NEXT: or a1, a1, a4
+; 4BYTEALIGN-NEXT: or a2, a3, a2
+; 4BYTEALIGN-NEXT: lbu a3, 5(a0)
+; 4BYTEALIGN-NEXT: lbu a4, 4(a0)
+; 4BYTEALIGN-NEXT: lbu a5, 6(a0)
+; 4BYTEALIGN-NEXT: lbu a0, 7(a0)
+; 4BYTEALIGN-NEXT: slli a3, a3, 8
+; 4BYTEALIGN-NEXT: or a3, a3, a4
+; 4BYTEALIGN-NEXT: slli a5, a5, 16
+; 4BYTEALIGN-NEXT: slli a0, a0, 24
+; 4BYTEALIGN-NEXT: or a5, a0, a5
+; 4BYTEALIGN-NEXT: or a0, a2, a1
+; 4BYTEALIGN-NEXT: or a1, a5, a3
+; 4BYTEALIGN-NEXT: ret
%res = load i64, ptr %p, align 1
ret i64 %res
}
@@ -89,6 +162,24 @@ define void @store_unaligned(ptr %p, i64 %v) {
; FAST-NEXT: mv a2, a1
; FAST-NEXT: sd a2, 0(a0)
; FAST-NEXT: ret
+;
+; 4BYTEALIGN-LABEL: store_unaligned:
+; 4BYTEALIGN: # %bb.0:
+; 4BYTEALIGN-NEXT: srli a3, a2, 24
+; 4BYTEALIGN-NEXT: srli a4, a2, 16
+; 4BYTEALIGN-NEXT: srli a5, a2, 8
+; 4BYTEALIGN-NEXT: srli a6, a1, 24
+; 4BYTEALIGN-NEXT: srli a7, a1, 16
+; 4BYTEALIGN-NEXT: sb a2, 4(a0)
+; 4BYTEALIGN-NEXT: sb a5, 5(a0)
+; 4BYTEALIGN-NEXT: sb a4, 6(a0)
+; 4BYTEALIGN-NEXT: sb a3, 7(a0)
+; 4BYTEALIGN-NEXT: srli a2, a1, 8
+; 4BYTEALIGN-NEXT: sb a1, 0(a0)
+; 4BYTEALIGN-NEXT: sb a2, 1(a0)
+; 4BYTEALIGN-NEXT: sb a7, 2(a0)
+; 4BYTEALIGN-NEXT: sb a6, 3(a0)
+; 4BYTEALIGN-NEXT: ret
store i64 %v, ptr %p, align 1
ret void
}