diff options
| author | Craig Topper <craig.topper@sifive.com> | 2025-11-22 23:16:31 -0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-11-22 23:16:31 -0800 |
| commit | b9107bfc1faa8aa74e736169626e0cf7eb0925ba (patch) | |
| tree | acae869b7c93d0eed0ddee7808a94d18f51dd0ef | |
| parent | d5f3ab8ec97786476a077b0c8e35c7c337dfddf2 (diff) | |
[RISCV] Support zilsd-4byte-align for i64 load/store in SelectionDAG. (#169182)
I think we need to keep the SelectionDAG code for volatile load/store so
we should support 4 byte alignment when possible.
| -rw-r--r-- | llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 | ||||
| -rw-r--r-- | llvm/lib/Target/RISCV/RISCVSubtarget.h | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/RISCV/zilsd.ll | 91 |
4 files changed, 101 insertions, 5 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index dd3225507dde..bc1768ac26c2 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -8404,7 +8404,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, if (Store->isTruncatingStore()) return SDValue(); - if (!Subtarget.enableUnalignedScalarMem() && Store->getAlign() < 8) + if (Store->getAlign() < Subtarget.getZilsdAlign()) return SDValue(); SDLoc DL(Op); @@ -14803,7 +14803,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, assert(Subtarget.hasStdExtZilsd() && !Subtarget.is64Bit() && "Unexpected custom legalisation"); - if (!Subtarget.enableUnalignedScalarMem() && Ld->getAlign() < 8) + if (Ld->getAlign() < Subtarget.getZilsdAlign()) return; SDLoc DL(N); diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 29df53c6c989..b659bb96f2f1 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -237,6 +237,13 @@ public: return 0; } + + Align getZilsdAlign() const { + return Align(enableUnalignedScalarMem() ? 1 + : allowZilsd4ByteAlign() ? 4 + : 8); + } + unsigned getELen() const { assert(hasVInstructions() && "Expected V extension"); return hasVInstructionsI64() ? 64 : 32; diff --git a/llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp index 99e83fbb05a7..3b47903c351b 100644 --- a/llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVZilsdOptimizer.cpp @@ -146,9 +146,7 @@ bool RISCVPreAllocZilsdOpt::runOnMachineFunction(MachineFunction &MF) { // Check alignment: default is 8-byte, but allow 4-byte with tune feature // If unaligned scalar memory is enabled, allow any alignment - RequiredAlign = STI->enableUnalignedScalarMem() ? Align(1) - : STI->allowZilsd4ByteAlign() ? Align(4) - : Align(8); + RequiredAlign = STI->getZilsdAlign(); bool Modified = false; for (auto &MBB : MF) { Modified |= rescheduleLoadStoreInstrs(&MBB); diff --git a/llvm/test/CodeGen/RISCV/zilsd.ll b/llvm/test/CodeGen/RISCV/zilsd.ll index 7f2d2dd12030..27b1ff76f6f0 100644 --- a/llvm/test/CodeGen/RISCV/zilsd.ll +++ b/llvm/test/CodeGen/RISCV/zilsd.ll @@ -3,6 +3,8 @@ ; RUN: | FileCheck -check-prefixes=CHECK,SLOW %s ; RUN: llc -mtriple=riscv32 -mattr=+zilsd,+unaligned-scalar-mem -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=CHECK,FAST %s +; RUN: llc -mtriple=riscv32 -mattr=+zilsd,+zilsd-4byte-align -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=CHECK,4BYTEALIGN %s define i64 @load(ptr %a) nounwind { ; CHECK-LABEL: load: @@ -17,6 +19,28 @@ define i64 @load(ptr %a) nounwind { ret i64 %2 } +define i64 @load_align4(ptr %a) nounwind { +; SLOW-LABEL: load_align4: +; SLOW: # %bb.0: +; SLOW-NEXT: lw a2, 80(a0) +; SLOW-NEXT: lw a1, 84(a0) +; SLOW-NEXT: mv a0, a2 +; SLOW-NEXT: ret +; +; FAST-LABEL: load_align4: +; FAST: # %bb.0: +; FAST-NEXT: ld a0, 80(a0) +; FAST-NEXT: ret +; +; 4BYTEALIGN-LABEL: load_align4: +; 4BYTEALIGN: # %bb.0: +; 4BYTEALIGN-NEXT: ld a0, 80(a0) +; 4BYTEALIGN-NEXT: ret + %1 = getelementptr i64, ptr %a, i32 10 + %2 = load i64, ptr %1, align 4 + ret i64 %2 +} + define void @store(ptr %a, i64 %b) nounwind { ; CHECK-LABEL: store: ; CHECK: # %bb.0: @@ -31,6 +55,31 @@ define void @store(ptr %a, i64 %b) nounwind { ret void } +define void @store_align4(ptr %a, i64 %b) nounwind { +; SLOW-LABEL: store_align4: +; SLOW: # %bb.0: +; SLOW-NEXT: sw a1, 88(a0) +; SLOW-NEXT: sw a2, 92(a0) +; SLOW-NEXT: ret +; +; FAST-LABEL: store_align4: +; FAST: # %bb.0: +; FAST-NEXT: mv a3, a2 +; FAST-NEXT: mv a2, a1 +; FAST-NEXT: sd a2, 88(a0) +; FAST-NEXT: ret +; +; 4BYTEALIGN-LABEL: store_align4: +; 4BYTEALIGN: # %bb.0: +; 4BYTEALIGN-NEXT: mv a3, a2 +; 4BYTEALIGN-NEXT: mv a2, a1 +; 4BYTEALIGN-NEXT: sd a2, 88(a0) +; 4BYTEALIGN-NEXT: ret + %1 = getelementptr i64, ptr %a, i32 11 + store i64 %b, ptr %1, align 4 + ret void +} + define i64 @load_unaligned(ptr %p) { ; SLOW-LABEL: load_unaligned: ; SLOW: # %bb.0: @@ -60,6 +109,30 @@ define i64 @load_unaligned(ptr %p) { ; FAST: # %bb.0: ; FAST-NEXT: ld a0, 0(a0) ; FAST-NEXT: ret +; +; 4BYTEALIGN-LABEL: load_unaligned: +; 4BYTEALIGN: # %bb.0: +; 4BYTEALIGN-NEXT: lbu a1, 1(a0) +; 4BYTEALIGN-NEXT: lbu a2, 2(a0) +; 4BYTEALIGN-NEXT: lbu a3, 3(a0) +; 4BYTEALIGN-NEXT: lbu a4, 0(a0) +; 4BYTEALIGN-NEXT: slli a1, a1, 8 +; 4BYTEALIGN-NEXT: slli a2, a2, 16 +; 4BYTEALIGN-NEXT: slli a3, a3, 24 +; 4BYTEALIGN-NEXT: or a1, a1, a4 +; 4BYTEALIGN-NEXT: or a2, a3, a2 +; 4BYTEALIGN-NEXT: lbu a3, 5(a0) +; 4BYTEALIGN-NEXT: lbu a4, 4(a0) +; 4BYTEALIGN-NEXT: lbu a5, 6(a0) +; 4BYTEALIGN-NEXT: lbu a0, 7(a0) +; 4BYTEALIGN-NEXT: slli a3, a3, 8 +; 4BYTEALIGN-NEXT: or a3, a3, a4 +; 4BYTEALIGN-NEXT: slli a5, a5, 16 +; 4BYTEALIGN-NEXT: slli a0, a0, 24 +; 4BYTEALIGN-NEXT: or a5, a0, a5 +; 4BYTEALIGN-NEXT: or a0, a2, a1 +; 4BYTEALIGN-NEXT: or a1, a5, a3 +; 4BYTEALIGN-NEXT: ret %res = load i64, ptr %p, align 1 ret i64 %res } @@ -89,6 +162,24 @@ define void @store_unaligned(ptr %p, i64 %v) { ; FAST-NEXT: mv a2, a1 ; FAST-NEXT: sd a2, 0(a0) ; FAST-NEXT: ret +; +; 4BYTEALIGN-LABEL: store_unaligned: +; 4BYTEALIGN: # %bb.0: +; 4BYTEALIGN-NEXT: srli a3, a2, 24 +; 4BYTEALIGN-NEXT: srli a4, a2, 16 +; 4BYTEALIGN-NEXT: srli a5, a2, 8 +; 4BYTEALIGN-NEXT: srli a6, a1, 24 +; 4BYTEALIGN-NEXT: srli a7, a1, 16 +; 4BYTEALIGN-NEXT: sb a2, 4(a0) +; 4BYTEALIGN-NEXT: sb a5, 5(a0) +; 4BYTEALIGN-NEXT: sb a4, 6(a0) +; 4BYTEALIGN-NEXT: sb a3, 7(a0) +; 4BYTEALIGN-NEXT: srli a2, a1, 8 +; 4BYTEALIGN-NEXT: sb a1, 0(a0) +; 4BYTEALIGN-NEXT: sb a2, 1(a0) +; 4BYTEALIGN-NEXT: sb a7, 2(a0) +; 4BYTEALIGN-NEXT: sb a6, 3(a0) +; 4BYTEALIGN-NEXT: ret store i64 %v, ptr %p, align 1 ret void } |
