From 128f850af4ac5face78be14f8321abd3f971da5b Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Thu, 30 Oct 2025 15:07:37 +0000 Subject: [lldb][test] Fix libc++ API tests on older Clang versions Both of these fail on our Clang-19 macOS bots. --- .../optional/TestDataFormatterLibcxxOptionalSimulator.py | 2 ++ .../cpp/libcxx-internals-recognizer/TestLibcxxInternalsRecognizer.py | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/optional/TestDataFormatterLibcxxOptionalSimulator.py b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/optional/TestDataFormatterLibcxxOptionalSimulator.py index 3fefe87dcad9..7463f8897901 100644 --- a/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/optional/TestDataFormatterLibcxxOptionalSimulator.py +++ b/lldb/test/API/functionalities/data-formatter/data-formatter-stl/libcxx-simulators/optional/TestDataFormatterLibcxxOptionalSimulator.py @@ -53,6 +53,8 @@ for r in range(2): # causing this test to fail. This was reverted in newer version of clang # with commit 52a9ba7ca. @skipIf(compiler="clang", compiler_version=["=", "17"]) + @skipIf(compiler="clang", compiler_version=["=", "18"]) + @skipIf(compiler="clang", compiler_version=["=", "19"]) @functools.wraps(LibcxxOptionalDataFormatterSimulatorTestCase._run_test) def test_method(self, defines=defines): LibcxxOptionalDataFormatterSimulatorTestCase._run_test(self, defines) diff --git a/lldb/test/API/lang/cpp/libcxx-internals-recognizer/TestLibcxxInternalsRecognizer.py b/lldb/test/API/lang/cpp/libcxx-internals-recognizer/TestLibcxxInternalsRecognizer.py index d8a729b322fe..2f942da604ff 100644 --- a/lldb/test/API/lang/cpp/libcxx-internals-recognizer/TestLibcxxInternalsRecognizer.py +++ b/lldb/test/API/lang/cpp/libcxx-internals-recognizer/TestLibcxxInternalsRecognizer.py @@ -9,7 +9,7 @@ class LibCxxInternalsRecognizerTestCase(TestBase): NO_DEBUG_INFO_TESTCASE = True @add_test_categories(["libc++"]) - @skipIf(compiler="clang", compiler_version=["<", "19.0"]) + @skipIf(compiler="clang", compiler_version=["<=", "19.0"]) def test_frame_recognizer(self): """Test that implementation details of libc++ are hidden""" self.build() -- cgit v1.2.3 From f0d809261bf1d8f838056dbcc518d126e9b78b38 Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 30 Oct 2025 10:10:45 -0500 Subject: [flang] One more fix for dumping evaluate::Expr (#165730) Clang doesn't have "std::string_view" in the type list. --- flang/include/flang/Semantics/dump-expr.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flang/include/flang/Semantics/dump-expr.h b/flang/include/flang/Semantics/dump-expr.h index 2dbd4cb60be5..5a78e13b19e5 100644 --- a/flang/include/flang/Semantics/dump-expr.h +++ b/flang/include/flang/Semantics/dump-expr.h @@ -48,10 +48,11 @@ private: // "... [with T = xyz; std::string_view = ...]" #ifdef __clang__ std::string_view front("[T = "); + std::string_view back("]"); #else std::string_view front("[with T = "); -#endif std::string_view back("; std::string_view ="); +#endif #elif defined(_MSC_VER) #define DUMP_EXPR_SHOW_TYPE -- cgit v1.2.3 From 0030fac839566eb83bdb8a7ed61800ac021b2465 Mon Sep 17 00:00:00 2001 From: Ivan Kosarev Date: Thu, 30 Oct 2025 15:14:56 +0000 Subject: [AMDGPU][MC][NFC] Use the lit substitution to extract instruction codes in tests. (#165450) Instead of invoking sed directly. Partially reverts https://github.com/llvm/llvm-project/pull/119778 . --- llvm/test/MC/AMDGPU/gfx12_asm_vop1.s | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s index d85ea799ed3d..399a6441629c 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop1.s @@ -1,8 +1,8 @@ // NOTE: Assertions have been autogenerated by utils/update_mc_test_checks.py UTC_ARGS: --unique --version 5 // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding -comment-column=0 %s | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-ASM %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | sed -n 's#.*\(\[0x[0-9a-fx,]\{1,\}\]\)#\1#p' | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding -comment-column=0 | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize32,+real-true16 -disassemble -show-encoding -comment-column=0 | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s // RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding -comment-column=0 %s | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-ASM %s -// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | sed -n 's#.*\(\[0x[0-9a-fx,]\{1,\}\]\)#\1#p' | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding -comment-column=0 | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s +// RUN: llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -show-encoding %s | %extract-encodings | llvm-mc -triple=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64,+real-true16 -disassemble -show-encoding -comment-column=0 | FileCheck --strict-whitespace --check-prefixes=GFX12,GFX12-DIS %s v_bfrev_b32_e32 v5, v1 // GFX12: v_bfrev_b32_e32 v5, v1 ; encoding: [0x01,0x71,0x0a,0x7e] -- cgit v1.2.3 From ba5cde79aa05eeaa87d45cf472f3583fa9f93bff Mon Sep 17 00:00:00 2001 From: vangthao95 Date: Thu, 30 Oct 2025 08:19:12 -0700 Subject: [AMDGPU][GlobalISel] Fix issue with copy_scc_vcc on gfx7 (#165355) When selecting for G_AMDGPU_COPY_SCC_VCC, we use S_CMP_LG_U64 or S_CMP_LG_U32 for wave64 and wave32 respectively. However, on gfx7 we do not have the S_CMP_LG_U64 instruction. Work around this issue by using S_OR_B64 instead. --- .../Target/AMDGPU/AMDGPUInstructionSelector.cpp | 20 +++++-- .../AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll | 66 ++++++++++++++++++++++ .../AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir | 37 ++++++++++++ 3 files changed, 118 insertions(+), 5 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index 9ce12243016f..aed325cf627b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -221,12 +221,22 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const { bool AMDGPUInstructionSelector::selectCOPY_SCC_VCC(MachineInstr &I) const { const DebugLoc &DL = I.getDebugLoc(); MachineBasicBlock *BB = I.getParent(); + Register VCCReg = I.getOperand(1).getReg(); + MachineInstr *Cmp; + + if (STI.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) { + unsigned CmpOpc = + STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32; + Cmp = BuildMI(*BB, &I, DL, TII.get(CmpOpc)).addReg(VCCReg).addImm(0); + } else { + // For gfx7 and earlier, S_CMP_LG_U64 doesn't exist, so we use S_OR_B64 + // which sets SCC as a side effect. + Register DeadDst = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass); + Cmp = BuildMI(*BB, &I, DL, TII.get(AMDGPU::S_OR_B64), DeadDst) + .addReg(VCCReg) + .addReg(VCCReg); + } - unsigned CmpOpc = - STI.isWave64() ? AMDGPU::S_CMP_LG_U64 : AMDGPU::S_CMP_LG_U32; - MachineInstr *Cmp = BuildMI(*BB, &I, DL, TII.get(CmpOpc)) - .addReg(I.getOperand(1).getReg()) - .addImm(0); if (!constrainSelectedInstRegOperands(*Cmp, TII, TRI, RBI)) return false; diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll new file mode 100644 index 000000000000..1a7ccf083568 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx700 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX7 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s + +define amdgpu_kernel void @fcmp_uniform_select(float %a, i32 %b, i32 %c, ptr addrspace(1) %out) { +; GFX7-LABEL: fcmp_uniform_select: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x9 +; GFX7-NEXT: s_load_dword s3, s[4:5], 0xb +; GFX7-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd +; GFX7-NEXT: s_mov_b32 s2, -1 +; GFX7-NEXT: s_waitcnt lgkmcnt(0) +; GFX7-NEXT: v_cmp_eq_f32_e64 s[4:5], s6, 0 +; GFX7-NEXT: s_or_b64 s[4:5], s[4:5], s[4:5] +; GFX7-NEXT: s_cselect_b32 s4, 1, 0 +; GFX7-NEXT: s_and_b32 s4, s4, 1 +; GFX7-NEXT: s_cmp_lg_u32 s4, 0 +; GFX7-NEXT: s_cselect_b32 s3, s7, s3 +; GFX7-NEXT: v_mov_b32_e32 v0, s3 +; GFX7-NEXT: s_mov_b32 s3, 0xf000 +; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX7-NEXT: s_endpgm +; +; GFX8-LABEL: fcmp_uniform_select: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX8-NEXT: s_load_dword s6, s[4:5], 0x2c +; GFX8-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x34 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) +; GFX8-NEXT: v_cmp_eq_f32_e64 s[4:5], s0, 0 +; GFX8-NEXT: s_cmp_lg_u64 s[4:5], 0 +; GFX8-NEXT: s_cselect_b32 s0, 1, 0 +; GFX8-NEXT: s_and_b32 s0, s0, 1 +; GFX8-NEXT: s_cmp_lg_u32 s0, 0 +; GFX8-NEXT: s_cselect_b32 s0, s1, s6 +; GFX8-NEXT: v_mov_b32_e32 v0, s2 +; GFX8-NEXT: v_mov_b32_e32 v2, s0 +; GFX8-NEXT: v_mov_b32_e32 v1, s3 +; GFX8-NEXT: flat_store_dword v[0:1], v2 +; GFX8-NEXT: s_endpgm +; +; GFX11-LABEL: fcmp_uniform_select: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x2c +; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x34 +; GFX11-NEXT: v_mov_b32_e32 v1, 0 +; GFX11-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-NEXT: v_cmp_eq_f32_e64 s0, s0, 0 +; GFX11-NEXT: s_cmp_lg_u32 s0, 0 +; GFX11-NEXT: s_cselect_b32 s0, 1, 0 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-NEXT: s_and_b32 s0, s0, 1 +; GFX11-NEXT: s_cmp_lg_u32 s0, 0 +; GFX11-NEXT: s_cselect_b32 s0, s1, s6 +; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: global_store_b32 v1, v0, s[2:3] +; GFX11-NEXT: s_endpgm + %cmp = fcmp oeq float %a, 0.0 + %sel = select i1 %cmp, i32 %b, i32 %c + store i32 %sel, ptr addrspace(1) %out + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir new file mode 100644 index 000000000000..67cc0169af61 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-copy-scc-vcc.mir @@ -0,0 +1,37 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=gfx700 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX7 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx803 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GF8 %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GFX11 %s + +--- +name: test_copy_scc_vcc +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0: + ; GFX7-LABEL: name: test_copy_scc_vcc + ; GFX7: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GFX7-NEXT: [[S_OR_B64_:%[0-9]+]]:sreg_64 = S_OR_B64 [[DEF]], [[DEF]], implicit-def $scc + ; GFX7-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc + ; GFX7-NEXT: $sgpr0 = COPY [[COPY]] + ; GFX7-NEXT: S_ENDPGM 0, implicit $sgpr0 + ; + ; GF8-LABEL: name: test_copy_scc_vcc + ; GF8: [[DEF:%[0-9]+]]:sreg_64_xexec = IMPLICIT_DEF + ; GF8-NEXT: S_CMP_LG_U64 [[DEF]], 0, implicit-def $scc + ; GF8-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc + ; GF8-NEXT: $sgpr0 = COPY [[COPY]] + ; GF8-NEXT: S_ENDPGM 0, implicit $sgpr0 + ; + ; GFX11-LABEL: name: test_copy_scc_vcc + ; GFX11: [[DEF:%[0-9]+]]:sreg_32_xm0_xexec = IMPLICIT_DEF + ; GFX11-NEXT: S_CMP_LG_U32 [[DEF]], 0, implicit-def $scc + ; GFX11-NEXT: [[COPY:%[0-9]+]]:sreg_32 = COPY $scc + ; GFX11-NEXT: $sgpr0 = COPY [[COPY]] + ; GFX11-NEXT: S_ENDPGM 0, implicit $sgpr0 + %0:vcc(s1) = G_IMPLICIT_DEF + %1:sgpr(s32) = G_AMDGPU_COPY_SCC_VCC %0 + $sgpr0 = COPY %1 + S_ENDPGM 0, implicit $sgpr0 +... -- cgit v1.2.3 From f5e175f06dacf6751f27f7a4bd9e9a3489e95a5e Mon Sep 17 00:00:00 2001 From: srcarroll <50210727+srcarroll@users.noreply.github.com> Date: Thu, 30 Oct 2025 10:20:19 -0500 Subject: [mlir][linalg] Genericize MapOp (#162742) This PR modifies the definition of `linalg::MapOp` so that it has the same structure of `linalg::GenericOp` and all other linalg ops. Mainly, it adds an `out` bbarg for the body of the op. Although the `out` arg is never used in the body, there doesn't seem to be much benefit in specializing the op to exclude it. In fact it only makes things more complicated because it doesn't align with the `GenericOp` structure. For example, `linalg-generalize-named-ops` avoided converting `linalg.map` purely because it didn't have the structure to do so. Moreover, although some fusion patterns are applied explicitly to `GenericOp`, we can change them to be applied to the base `LinalgOp` which will enable fusion for any fusion-compatible linalg op, but that requires the op having a generic structure. So these changes will enable us to use existing generic transformation patterns on `MapOp` that weren't possible before. They can either be applied to `MapOp` directly or applied after converting to `GenericOp`. --- .../mlir/Dialect/Linalg/IR/LinalgStructuredOps.td | 4 --- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 37 ++++++++++++++-------- .../Dialect/Linalg/Transforms/Generalization.cpp | 6 ++-- .../Transforms/BufferizableOpInterfaceImpl.cpp | 2 ++ mlir/test/Dialect/Linalg/canonicalize.mlir | 2 +- mlir/test/Dialect/Linalg/generalize-named-ops.mlir | 22 ++++++++----- mlir/test/Dialect/Linalg/invalid.mlir | 10 +++--- mlir/test/Dialect/Linalg/one-shot-bufferize.mlir | 2 +- mlir/test/Dialect/Linalg/roundtrip.mlir | 18 +++++------ .../vectorization/linalg-ops-with-patterns.mlir | 2 +- mlir/test/Dialect/Tensor/bufferize.mlir | 2 +- .../lower-to-loops-using-interface.mlir | 6 ++-- 12 files changed, 63 insertions(+), 50 deletions(-) diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td index f3674c3eecfe..ecd036d452b2 100644 --- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td +++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td @@ -293,10 +293,6 @@ def MapOp : LinalgStructuredBase_Op<"map", [ // Implement functions necessary for DestinationStyleOpInterface. MutableOperandRange getDpsInitsMutable() { return getInitMutable(); } - SmallVector getOpOperandsMatchingBBargs() { - return getDpsInputOperands(); - } - bool payloadUsesValueFromOperand(OpOperand * opOperand) { if (isDpsInit(opOperand)) return false; return !getMatchingBlockArgument(opOperand).use_empty(); diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index cbc565b0c8cb..3dc45edf4a23 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -1474,6 +1474,8 @@ void MapOp::getAsmBlockArgumentNames(Region ®ion, OpAsmSetValueNameFn setNameFn) { for (Value v : getRegionInputArgs()) setNameFn(v, "in"); + for (Value v : getRegionOutputArgs()) + setNameFn(v, "init"); } void MapOp::getAsmResultNames(function_ref setNameFn) { @@ -1495,14 +1497,14 @@ void MapOp::build( if (bodyBuild) buildGenericRegion(builder, result.location, *result.regions.front(), - inputs, /*outputs=*/{}, bodyBuild); + inputs, /*outputs=*/{init}, bodyBuild); } static void addBodyWithPayloadOp(OpAsmParser &parser, OperationState &result, const OperationName &payloadOpName, const NamedAttrList &payloadOpAttrs, ArrayRef operands, - bool initFirst = false) { + bool initFirst = false, bool mapInit = true) { OpBuilder b(parser.getContext()); Region *body = result.addRegion(); Block &block = body->emplaceBlock(); @@ -1516,12 +1518,13 @@ static void addBodyWithPayloadOp(OpAsmParser &parser, OperationState &result, // If initFirst flag is enabled, we consider init as the first position of // payload operands. if (initFirst) { - payloadOpOperands.push_back(block.getArguments().back()); + if (mapInit) + payloadOpOperands.push_back(block.getArguments().back()); for (const auto &arg : block.getArguments().drop_back()) payloadOpOperands.push_back(arg); } else { payloadOpOperands = {block.getArguments().begin(), - block.getArguments().end()}; + block.getArguments().end() - int(!mapInit)}; } Operation *payloadOp = b.create( @@ -1553,8 +1556,8 @@ ParseResult MapOp::parse(OpAsmParser &parser, OperationState &result) { if (payloadOpName.has_value()) { if (!result.operands.empty()) addBodyWithPayloadOp(parser, result, payloadOpName.value(), - payloadOpAttrs, - ArrayRef(result.operands).drop_back()); + payloadOpAttrs, ArrayRef(result.operands), false, + false); else result.addRegion(); } else { @@ -1570,7 +1573,11 @@ ParseResult MapOp::parse(OpAsmParser &parser, OperationState &result) { return success(); } -static bool canUseShortForm(Block *body, bool initFirst = false) { +static bool canUseShortForm(Block *body, bool initFirst = false, + bool mapInit = true) { + // `intFirst == true` implies that we want to map init arg + if (initFirst && !mapInit) + return false; // Check if the body can be printed in short form. The following 4 conditions // must be satisfied: @@ -1582,7 +1589,7 @@ static bool canUseShortForm(Block *body, bool initFirst = false) { // 2) The payload op must have the same number of operands as the number of // block arguments. if (payload.getNumOperands() == 0 || - payload.getNumOperands() != body->getNumArguments()) + payload.getNumOperands() != body->getNumArguments() - int(!mapInit)) return false; // 3) If `initFirst` is true (e.g., for reduction ops), the init block @@ -1600,7 +1607,8 @@ static bool canUseShortForm(Block *body, bool initFirst = false) { } } else { for (const auto &[operand, bbArg] : - llvm::zip(payload.getOperands(), body->getArguments())) { + llvm::zip(payload.getOperands(), + body->getArguments().drop_back(int(!mapInit)))) { if (bbArg != operand) return false; } @@ -1632,7 +1640,8 @@ static void printShortForm(OpAsmPrinter &p, Operation *payloadOp) { void MapOp::print(OpAsmPrinter &p) { Block *mapper = getBody(); - bool useShortForm = canUseShortForm(mapper); + bool useShortForm = + canUseShortForm(mapper, /*initFirst=*/false, /*mapInit*/ false); if (useShortForm) { printShortForm(p, &mapper->getOperations().front()); } @@ -1658,11 +1667,13 @@ LogicalResult MapOp::verify() { auto *bodyBlock = getBody(); auto blockArgs = bodyBlock->getArguments(); - // Checks if the number of `inputs` match the arity of the `mapper` region. - if (getInputs().size() != blockArgs.size()) + // Checks if the number of `inputs` + `init` match the arity of the `mapper` + // region. + if (getInputs().size() + 1 != blockArgs.size()) return emitOpError() << "expects number of operands to match the arity of " "mapper, but got: " - << getInputs().size() << " and " << blockArgs.size(); + << getInputs().size() + 1 << " and " + << blockArgs.size(); // The parameters of mapper should all match the element type of inputs. for (const auto &[bbArgType, inputArg] : diff --git a/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp index 3e31393fd51e..75bb1757a55f 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Generalization.cpp @@ -31,10 +31,8 @@ using namespace mlir; using namespace mlir::linalg; static LogicalResult generalizeNamedOpPrecondition(LinalgOp linalgOp) { - // Bailout if `linalgOp` is already a generic or a linalg.map. We cannot - // trivially generalize a `linalg.map`, as it does not use the output as - // region arguments in the block. - if (isa(linalgOp) || isa(linalgOp)) + // Bailout if `linalgOp` is already a generic. + if (isa(linalgOp)) return failure(); // Check if the operation has exactly one region. if (linalgOp->getNumRegions() != 1) { diff --git a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp index bce964e47a3b..c607ece418df 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp @@ -579,6 +579,7 @@ static Value lowerGenerateLikeOpBody(RewriterBase &rewriter, Location loc, linalg::MapOp::create(rewriter, loc, tensorType, /*inputs=*/ValueRange(), /*init=*/tensorDestination); Block &linalgBody = linalgOp.getMapper().emplaceBlock(); + linalgBody.addArgument(tensorType.getElementType(), loc); // Create linalg::IndexOps. rewriter.setInsertionPointToStart(&linalgBody); @@ -1068,6 +1069,7 @@ struct SplatOpInterface /*inputs=*/ValueRange(), /*init=*/*tensorAlloc); Block &linalgBody = linalgOp.getMapper().emplaceBlock(); + linalgBody.addArgument(tensorType.getElementType(), loc); // Create linalg::IndexOps. rewriter.setInsertionPointToStart(&linalgBody); diff --git a/mlir/test/Dialect/Linalg/canonicalize.mlir b/mlir/test/Dialect/Linalg/canonicalize.mlir index 26d2d98572f4..f4020ede4854 100644 --- a/mlir/test/Dialect/Linalg/canonicalize.mlir +++ b/mlir/test/Dialect/Linalg/canonicalize.mlir @@ -1423,7 +1423,7 @@ func.func @transpose_buffer(%input: memref, func.func @recursive_effect(%arg : tensor<1xf32>) { %init = arith.constant dense<0.0> : tensor<1xf32> %mapped = linalg.map ins(%arg:tensor<1xf32>) outs(%init :tensor<1xf32>) - (%in : f32) { + (%in : f32, %out: f32) { vector.print %in : f32 linalg.yield %in : f32 } diff --git a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir index ae07b1b82228..dcdd6c8db4b2 100644 --- a/mlir/test/Dialect/Linalg/generalize-named-ops.mlir +++ b/mlir/test/Dialect/Linalg/generalize-named-ops.mlir @@ -386,18 +386,24 @@ func.func @generalize_batch_reduce_gemm_bf16(%lhs: memref<7x8x9xbf16>, %rhs: mem // ----- -// CHECK-LABEL: generalize_linalg_map -func.func @generalize_linalg_map(%arg0: memref<1x8x8x8xf32>) { +func.func @generalize_linalg_map(%arg0: memref<1x8x8x8xf32>, %arg1: memref<1x8x8x8xf32>, %arg2: memref<1x8x8x8xf32>) { %cst = arith.constant 0.000000e+00 : f32 - // CHECK: linalg.map - // CHECK-NOT: linalg.generic - linalg.map outs(%arg0 : memref<1x8x8x8xf32>) - () { - linalg.yield %cst : f32 - } + linalg.map {arith.addf} ins(%arg0, %arg1: memref<1x8x8x8xf32>, memref<1x8x8x8xf32>) outs(%arg2 : memref<1x8x8x8xf32>) return } +// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)> + +// CHECK: @generalize_linalg_map + +// CHECK: linalg.generic +// CHECK-SAME: indexing_maps = [#[[MAP0]], #[[MAP0]], #[[MAP0]]] +// CHECK-SAME: iterator_types = ["parallel", "parallel", "parallel", "parallel"]} +// CHECK-SAME: ins(%{{.+}}, %{{.+}} : memref<1x8x8x8xf32>, memref<1x8x8x8xf32>) outs(%{{.+}} : memref<1x8x8x8xf32> +// CHECK: ^{{.+}}(%[[BBARG0:.+]]: f32, %[[BBARG1:.+]]: f32, %[[BBARG2:.+]]: f32) +// CHECK: %[[ADD:.+]] = arith.addf %[[BBARG0]], %[[BBARG1]] : f32 +// CHECK: linalg.yield %[[ADD]] : f32 + // ----- func.func @generalize_add(%lhs: memref<7x14x21xf32>, %rhs: memref<7x14x21xf32>, diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir index 40bf4d19d6b9..fabc8e610612 100644 --- a/mlir/test/Dialect/Linalg/invalid.mlir +++ b/mlir/test/Dialect/Linalg/invalid.mlir @@ -681,7 +681,7 @@ func.func @map_binary_wrong_yield_operands( %add = linalg.map ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>) outs(%init:tensor<64xf32>) - (%lhs_elem: f32, %rhs_elem: f32) { + (%lhs_elem: f32, %rhs_elem: f32, %out: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 // expected-error @+1{{'linalg.yield' op expected number of yield values (2) to match the number of inits / outs operands of the enclosing LinalgOp (1)}} linalg.yield %0, %0: f32, f32 @@ -694,11 +694,11 @@ func.func @map_binary_wrong_yield_operands( func.func @map_input_mapper_arity_mismatch( %lhs: tensor<64xf32>, %rhs: tensor<64xf32>, %init: tensor<64xf32>) -> tensor<64xf32> { - // expected-error@+1{{'linalg.map' op expects number of operands to match the arity of mapper, but got: 2 and 3}} + // expected-error@+1{{'linalg.map' op expects number of operands to match the arity of mapper, but got: 3 and 4}} %add = linalg.map ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>) outs(%init:tensor<64xf32>) - (%lhs_elem: f32, %rhs_elem: f32, %extra_elem: f32) { + (%lhs_elem: f32, %rhs_elem: f32, %out: f32, %extra_elem: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 linalg.yield %0: f32 } @@ -714,7 +714,7 @@ func.func @map_input_mapper_type_mismatch( %add = linalg.map ins(%lhs, %rhs : tensor<64xf32>, tensor<64xf32>) outs(%init:tensor<64xf32>) - (%lhs_elem: f64, %rhs_elem: f64) { + (%lhs_elem: f64, %rhs_elem: f64, %out: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f64 linalg.yield %0: f64 } @@ -730,7 +730,7 @@ func.func @map_input_output_shape_mismatch( %add = linalg.map ins(%lhs, %rhs : tensor<64x64xf32>, tensor<64x64xf32>) outs(%init:tensor<32xf32>) - (%lhs_elem: f32, %rhs_elem: f32) { + (%lhs_elem: f32, %rhs_elem: f32, %out: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 linalg.yield %0: f32 } diff --git a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir index 1df15e85bac1..85cc1ffc2029 100644 --- a/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Linalg/one-shot-bufferize.mlir @@ -339,7 +339,7 @@ func.func @map_binary(%lhs: tensor<64xf32>, %rhs: tensor<64xf32>, %add = linalg.map ins(%lhs, %rhs: tensor<64xf32>, tensor<64xf32>) outs(%init:tensor<64xf32>) - (%lhs_elem: f32, %rhs_elem: f32) { + (%lhs_elem: f32, %rhs_elem: f32, %out: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 linalg.yield %0: f32 } diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir index 563013d4083a..74928920c695 100644 --- a/mlir/test/Dialect/Linalg/roundtrip.mlir +++ b/mlir/test/Dialect/Linalg/roundtrip.mlir @@ -341,7 +341,7 @@ func.func @mixed_parallel_reduced_results(%arg0 : tensor, func.func @map_no_inputs(%init: tensor<64xf32>) -> tensor<64xf32> { %add = linalg.map outs(%init:tensor<64xf32>) - () { + (%out: f32) { %0 = arith.constant 0.0: f32 linalg.yield %0: f32 } @@ -349,7 +349,7 @@ func.func @map_no_inputs(%init: tensor<64xf32>) -> tensor<64xf32> { } // CHECK-LABEL: func @map_no_inputs // CHECK: linalg.map outs -// CHECK-NEXT: () { +// CHECK-NEXT: (%[[OUT:.*]]: f32) { // CHECK-NEXT: arith.constant // CHECK-NEXT: linalg.yield // CHECK-NEXT: } @@ -361,7 +361,7 @@ func.func @map_binary(%lhs: tensor<64xf32>, %rhs: tensor<64xf32>, %add = linalg.map ins(%lhs, %rhs: tensor<64xf32>, tensor<64xf32>) outs(%init:tensor<64xf32>) - (%lhs_elem: f32, %rhs_elem: f32) { + (%lhs_elem: f32, %rhs_elem: f32, %out: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 linalg.yield %0: f32 } @@ -378,7 +378,7 @@ func.func @map_binary_memref(%lhs: memref<64xf32>, %rhs: memref<64xf32>, linalg.map ins(%lhs, %rhs: memref<64xf32>, memref<64xf32>) outs(%init:memref<64xf32>) - (%lhs_elem: f32, %rhs_elem: f32) { + (%lhs_elem: f32, %rhs_elem: f32, %out: f32) { %0 = arith.addf %lhs_elem, %rhs_elem: f32 linalg.yield %0: f32 } @@ -393,7 +393,7 @@ func.func @map_unary(%input: tensor<64xf32>, %init: tensor<64xf32>) -> tensor<64 %abs = linalg.map ins(%input:tensor<64xf32>) outs(%init:tensor<64xf32>) - (%input_elem: f32) { + (%input_elem: f32, %out: f32) { %0 = math.absf %input_elem: f32 linalg.yield %0: f32 } @@ -408,7 +408,7 @@ func.func @map_unary_memref(%input: memref<64xf32>, %init: memref<64xf32>) { linalg.map ins(%input:memref<64xf32>) outs(%init:memref<64xf32>) - (%input_elem: f32) { + (%input_elem: f32, %out: f32) { %0 = math.absf %input_elem: f32 linalg.yield %0: f32 } @@ -604,7 +604,7 @@ func.func @map_arith_with_attr(%lhs: tensor<64xf32>, %rhs: tensor<64xf32>, %add = linalg.map ins(%lhs, %rhs: tensor<64xf32>, tensor<64xf32>) outs(%init:tensor<64xf32>) - (%lhs_elem: f32, %rhs_elem: f32) { + (%lhs_elem: f32, %rhs_elem: f32, %out: f32) { %0 = arith.addf %lhs_elem, %rhs_elem fastmath : f32 linalg.yield %0: f32 } @@ -622,7 +622,7 @@ func.func @map_arith_with_attr(%lhs: tensor<64xf32>, %rhs: tensor<64xf32>, func.func @map_not_short_form_compatible(%lhs: tensor<1x32xf32>, %rhs: tensor<1x32xf32>, %init: tensor<1x32xf32>) -> tensor<1x32xf32> { %mapped = linalg.map ins(%lhs, %rhs : tensor<1x32xf32>, tensor<1x32xf32>) outs(%init : tensor<1x32xf32>) - (%in_1: f32, %in_2: f32) { + (%in_1: f32, %in_2: f32, %out: f32) { %1 = arith.maximumf %in_1, %in_2 : f32 linalg.yield %in_1 : f32 } @@ -634,7 +634,7 @@ func.func @map_not_short_form_compatible(%lhs: tensor<1x32xf32>, %rhs: tensor<1x // CHECK-NOT: linalg.map { arith.maximumf } ins(%[[LHS]] : tensor<1x32xf32> // CHECK: linalg.map ins(%[[LHS]], %[[RHS]] : tensor<1x32xf32>, tensor<1x32xf32>) // CHECK-SAME: outs(%[[INIT]] : tensor<1x32xf32>) -// CHECK-NEXT: (%[[IN1:.*]]: f32, %[[IN2:.*]]: f32) { +// CHECK-NEXT: (%[[IN1:.*]]: f32, %[[IN2:.*]]: f32, %[[OUT:.*]]: f32) { // CHECK-NEXT: %[[MAX_RESULT:.*]] = arith.maximumf %[[IN1]], %[[IN2]] : f32 // CHECK-NEXT: linalg.yield %[[IN1]] : f32 // CHECK-NEXT: } diff --git a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir index 93a03369be23..aa2c1da4b627 100644 --- a/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir +++ b/mlir/test/Dialect/Linalg/vectorization/linalg-ops-with-patterns.mlir @@ -356,7 +356,7 @@ func.func @vectorize_map(%arg0: memref<64xf32>, %arg1: memref<64xf32>, %arg2: memref<64xf32>) { linalg.map ins(%arg0, %arg1 : memref<64xf32>, memref<64xf32>) outs(%arg2 : memref<64xf32>) - (%in: f32, %in_0: f32) { + (%in: f32, %in_0: f32, %out: f32) { %0 = arith.addf %in, %in_0 : f32 linalg.yield %0 : f32 } diff --git a/mlir/test/Dialect/Tensor/bufferize.mlir b/mlir/test/Dialect/Tensor/bufferize.mlir index 296ca02564e3..5eb2360a29b8 100644 --- a/mlir/test/Dialect/Tensor/bufferize.mlir +++ b/mlir/test/Dialect/Tensor/bufferize.mlir @@ -728,7 +728,7 @@ func.func @tensor.concat_dynamic_nonconcat_dim(%f: tensor, %g: tensor // CHECK: %[[ALLOC_T:.*]] = bufferization.to_tensor %[[ALLOC]] // CHECK: %[[MAPPED:.*]] = linalg.map outs(%[[ALLOC_T]] : tensor) -// CHECK: () { +// CHECK: (%[[INIT:.*]]: f32) { // CHECK: linalg.yield %[[F]] : f32 // CHECK: } // CHECK: return %[[MAPPED]] : tensor diff --git a/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir b/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir index 8cbee3cbb758..aa8882d21698 100644 --- a/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir +++ b/mlir/test/Interfaces/TilingInterface/lower-to-loops-using-interface.mlir @@ -257,10 +257,10 @@ module attributes {transform.with_named_sequence} { // ----- func.func @map(%lhs: memref<64xf32>, - %rhs: memref<64xf32>, %out: memref<64xf32>) { + %rhs: memref<64xf32>, %init: memref<64xf32>) { linalg.map ins(%lhs, %rhs : memref<64xf32>, memref<64xf32>) - outs(%out : memref<64xf32>) - (%in: f32, %in_0: f32) { + outs(%init : memref<64xf32>) + (%in: f32, %in_0: f32, %out: f32) { %0 = arith.addf %in, %in_0 : f32 linalg.yield %0 : f32 } -- cgit v1.2.3 -- cgit v1.2.3 From 521fb93ec2d0de8ee7cdee817260711459125ae8 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 30 Oct 2025 08:42:38 -0700 Subject: [RISCV] Support P extension ABSW instruction. (#165047) --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 1 + llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 14 +++++++++++++- llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 5 +++++ llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp | 1 + llvm/test/CodeGen/RISCV/rv64p.ll | 6 ++---- 5 files changed, 22 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 9a6afa1cd4ea..b25a05400fe3 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3995,6 +3995,7 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits, case RISCV::CTZW: case RISCV::CPOPW: case RISCV::SLLI_UW: + case RISCV::ABSW: case RISCV::FMV_W_X: case RISCV::FCVT_H_W: case RISCV::FCVT_H_W_INX: diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 1c930acd9c4a..56881f71934c 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -433,6 +433,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, if (Subtarget.hasStdExtP() || (Subtarget.hasVendorXCValu() && !Subtarget.is64Bit())) { setOperationAction(ISD::ABS, XLenVT, Legal); + if (Subtarget.is64Bit()) + setOperationAction(ISD::ABS, MVT::i32, Custom); } else if (Subtarget.hasShortForwardBranchOpt()) { // We can use PseudoCCSUB to implement ABS. setOperationAction(ISD::ABS, XLenVT, Legal); @@ -14816,8 +14818,16 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() && "Unexpected custom legalisation"); + if (Subtarget.hasStdExtP()) { + SDValue Src = + DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0)); + SDValue Abs = DAG.getNode(RISCVISD::ABSW, DL, MVT::i64, Src); + Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Abs)); + return; + } + if (Subtarget.hasStdExtZbb()) { - // Emit a special ABSW node that will be expanded to NEGW+MAX at isel. + // Emit a special node that will be expanded to NEGW+MAX at isel. // This allows us to remember that the result is sign extended. Expanding // to NEGW+MAX here requires a Freeze which breaks ComputeNumSignBits. SDValue Src = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, @@ -20290,6 +20300,7 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, break; } + case RISCVISD::ABSW: case RISCVISD::CLZW: case RISCVISD::CTZW: { // Only the lower 32 bits of the first operand are read @@ -21862,6 +21873,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( case RISCVISD::REMUW: case RISCVISD::ROLW: case RISCVISD::RORW: + case RISCVISD::ABSW: case RISCVISD::FCVT_W_RV64: case RISCVISD::FCVT_WU_RV64: case RISCVISD::STRICT_FCVT_W_RV64: diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index cc085bb6c9fd..4cbbba3aa68c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -1461,5 +1461,10 @@ let Predicates = [HasStdExtP, IsRV32] in { // Codegen patterns //===----------------------------------------------------------------------===// +def riscv_absw : RVSDNode<"ABSW", SDTIntUnaryOp>; + let Predicates = [HasStdExtP] in def : PatGpr; + +let Predicates = [HasStdExtP, IsRV64] in +def : PatGpr; diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp index d08115b72977..ea98cdb4a1e6 100644 --- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp +++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp @@ -172,6 +172,7 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI, case RISCV::CTZW: case RISCV::CPOPW: case RISCV::SLLI_UW: + case RISCV::ABSW: case RISCV::FMV_W_X: case RISCV::FCVT_H_W: case RISCV::FCVT_H_W_INX: diff --git a/llvm/test/CodeGen/RISCV/rv64p.ll b/llvm/test/CodeGen/RISCV/rv64p.ll index cb07f945a582..f937f44f1332 100644 --- a/llvm/test/CodeGen/RISCV/rv64p.ll +++ b/llvm/test/CodeGen/RISCV/rv64p.ll @@ -297,8 +297,7 @@ declare i32 @llvm.abs.i32(i32, i1 immarg) define i32 @abs_i32(i32 %x) { ; CHECK-LABEL: abs_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: sext.w a0, a0 -; CHECK-NEXT: abs a0, a0 +; CHECK-NEXT: absw a0, a0 ; CHECK-NEXT: ret %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true) ret i32 %abs @@ -307,8 +306,7 @@ define i32 @abs_i32(i32 %x) { define signext i32 @abs_i32_sext(i32 signext %x) { ; CHECK-LABEL: abs_i32_sext: ; CHECK: # %bb.0: -; CHECK-NEXT: abs a0, a0 -; CHECK-NEXT: sext.w a0, a0 +; CHECK-NEXT: absw a0, a0 ; CHECK-NEXT: ret %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true) ret i32 %abs -- cgit v1.2.3 From b1d5a2a156e96c98360926da17c972f229b015d4 Mon Sep 17 00:00:00 2001 From: Anshil Gandhi <95053726+gandhi56@users.noreply.github.com> Date: Thu, 30 Oct 2025 11:45:02 -0400 Subject: [AMDGPU] Add regbankselect rules for G_ADD/SUB and variants (#159860) Add legalization rules for G_ADD, G_UADDO, G_UADDE and their SUB counterparts. --- .../Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp | 15 +- .../Target/AMDGPU/AMDGPURegBankLegalizeHelper.h | 1 + .../Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp | 14 +- .../lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h | 3 +- llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll | 612 +++++++++++++++++++++ .../AMDGPU/GlobalISel/regbankselect-add.mir | 524 ++++++++++++++++++ .../AMDGPU/GlobalISel/regbankselect-add.s16.mir | 19 +- .../AMDGPU/GlobalISel/regbankselect-add.v2s16.mir | 24 +- .../AMDGPU/GlobalISel/regbankselect-sext.mir | 8 + .../AMDGPU/GlobalISel/regbankselect-sub.mir | 479 +++++++++++++++- .../AMDGPU/GlobalISel/regbankselect-zext.mir | 8 + llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll | 535 ++++++++++++++++++ 12 files changed, 2218 insertions(+), 24 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.mir create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp index 540756653dd2..b84c30ecaac0 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.cpp @@ -500,6 +500,16 @@ void RegBankLegalizeHelper::lowerUnpackMinMax(MachineInstr &MI) { MI.eraseFromParent(); } +void RegBankLegalizeHelper::lowerUnpackAExt(MachineInstr &MI) { + auto [Op1Lo, Op1Hi] = unpackAExt(MI.getOperand(1).getReg()); + auto [Op2Lo, Op2Hi] = unpackAExt(MI.getOperand(2).getReg()); + auto ResLo = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Op1Lo, Op2Lo}); + auto ResHi = B.buildInstr(MI.getOpcode(), {SgprRB_S32}, {Op1Hi, Op2Hi}); + B.buildBuildVectorTrunc(MI.getOperand(0).getReg(), + {ResLo.getReg(0), ResHi.getReg(0)}); + MI.eraseFromParent(); +} + static bool isSignedBFE(MachineInstr &MI) { if (GIntrinsic *GI = dyn_cast(&MI)) return (GI->is(Intrinsic::amdgcn_sbfe)); @@ -804,6 +814,8 @@ void RegBankLegalizeHelper::lower(MachineInstr &MI, } break; } + case UnpackAExt: + return lowerUnpackAExt(MI); case WidenMMOToS32: return widenMMOToS32(cast(MI)); } @@ -1120,7 +1132,8 @@ void RegBankLegalizeHelper::applyMappingDst( assert(RB == SgprRB); Register NewDst = MRI.createVirtualRegister(SgprRB_S32); Op.setReg(NewDst); - B.buildTrunc(Reg, NewDst); + if (!MRI.use_empty(Reg)) + B.buildTrunc(Reg, NewDst); break; } case InvalidMapping: { diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h index d937815bf471..ad3ff1d374ec 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeHelper.h @@ -124,6 +124,7 @@ private: void lowerSplitTo32Select(MachineInstr &MI); void lowerSplitTo32SExtInReg(MachineInstr &MI); void lowerUnpackMinMax(MachineInstr &MI); + void lowerUnpackAExt(MachineInstr &MI); }; } // end namespace AMDGPU diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp index a67b12a22589..01abd358ff59 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp @@ -470,7 +470,19 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST, .Uni(S16, {{Sgpr32Trunc}, {Sgpr32AExt, Sgpr32AExt}}) .Div(S16, {{Vgpr16}, {Vgpr16, Vgpr16}}) .Uni(S32, {{Sgpr32}, {Sgpr32, Sgpr32}}) - .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}}); + .Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}}) + .Uni(V2S16, {{SgprV2S16}, {SgprV2S16, SgprV2S16}, UnpackAExt}) + .Div(V2S16, {{VgprV2S16}, {VgprV2S16, VgprV2S16}}) + .Uni(S64, {{Sgpr64}, {Sgpr64, Sgpr64}}) + .Div(S64, {{Vgpr64}, {Vgpr64, Vgpr64}}); + + addRulesForGOpcs({G_UADDO, G_USUBO}, Standard) + .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32}}) + .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32}}); + + addRulesForGOpcs({G_UADDE, G_USUBE}, Standard) + .Uni(S32, {{Sgpr32, Sgpr32Trunc}, {Sgpr32, Sgpr32, Sgpr32AExtBoolInReg}}) + .Div(S32, {{Vgpr32, Vcc}, {Vgpr32, Vgpr32, Vcc}}); addRulesForGOpcs({G_MUL}, Standard).Div(S32, {{Vgpr32}, {Vgpr32, Vgpr32}}); diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h index 93e0efda77fd..030bd75f8cd1 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h +++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.h @@ -223,7 +223,8 @@ enum LoweringMethodID { UniCstExt, SplitLoad, WidenLoad, - WidenMMOToS32 + WidenMMOToS32, + UnpackAExt }; enum FastRulesTypes { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll new file mode 100644 index 000000000000..e11720011af1 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/add.ll @@ -0,0 +1,612 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX12 %s + +define i16 @s_add_i16(i16 inreg %a, i16 inreg %b) { +; GFX7-LABEL: s_add_i16: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_add_i32 s16, s16, s17 +; GFX7-NEXT: v_mov_b32_e32 v0, s16 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_add_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_add_i32 s16, s16, s17 +; GFX9-NEXT: v_mov_b32_e32 v0, s16 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_add_i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_add_i32 s16, s16, s17 +; GFX8-NEXT: v_mov_b32_e32 v0, s16 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_add_i16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_add_i32 s16, s16, s17 +; GFX10-NEXT: v_mov_b32_e32 v0, s16 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_add_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_add_i32 s0, s0, s1 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_add_i16: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_add_co_i32 s0, s0, s1 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = add i16 %a, %b + ret i16 %c +} + +define i16 @v_add_i16(i16 %a, i16 %b) { +; GFX7-LABEL: v_add_i16: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_add_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_u16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_add_i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_add_u16_e32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_add_i16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_nc_u16 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_add_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_nc_u16 v0.l, v0.l, v1.l +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_add_i16: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_add_nc_u16 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = add i16 %a, %b + ret i16 %c +} + +define i32 @s_add_i32(i32 inreg %a, i32 inreg %b) { +; GFX7-LABEL: s_add_i32: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_add_i32 s16, s16, s17 +; GFX7-NEXT: v_mov_b32_e32 v0, s16 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_add_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_add_i32 s16, s16, s17 +; GFX9-NEXT: v_mov_b32_e32 v0, s16 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_add_i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_add_i32 s16, s16, s17 +; GFX8-NEXT: v_mov_b32_e32 v0, s16 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_add_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_add_i32 s16, s16, s17 +; GFX10-NEXT: v_mov_b32_e32 v0, s16 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_add_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_add_i32 s0, s0, s1 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_add_i32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_add_co_i32 s0, s0, s1 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = add i32 %a, %b + ret i32 %c +} + +define i32 @v_add_i32(i32 %a, i32 %b) { +; GFX7-LABEL: v_add_i32: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_add_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_add_i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_add_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_add_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_add_i32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_add_nc_u32_e32 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = add i32 %a, %b + ret i32 %c +} + +define <2 x i16> @s_add_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) { +; GFX7-LABEL: s_add_v2i16: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_add_i32 s16, s16, s18 +; GFX7-NEXT: s_add_i32 s17, s17, s19 +; GFX7-NEXT: v_mov_b32_e32 v0, s16 +; GFX7-NEXT: v_mov_b32_e32 v1, s17 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_add_v2i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_lshr_b32 s4, s16, 16 +; GFX9-NEXT: s_lshr_b32 s5, s17, 16 +; GFX9-NEXT: s_add_i32 s16, s16, s17 +; GFX9-NEXT: s_add_i32 s4, s4, s5 +; GFX9-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_add_v2i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_lshr_b32 s4, s16, 16 +; GFX8-NEXT: s_lshr_b32 s5, s17, 16 +; GFX8-NEXT: s_add_i32 s4, s4, s5 +; GFX8-NEXT: s_add_i32 s16, s16, s17 +; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 +; GFX8-NEXT: s_and_b32 s5, 0xffff, s16 +; GFX8-NEXT: s_lshl_b32 s4, s4, 16 +; GFX8-NEXT: s_or_b32 s4, s5, s4 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_add_v2i16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_lshr_b32 s4, s16, 16 +; GFX10-NEXT: s_lshr_b32 s5, s17, 16 +; GFX10-NEXT: s_add_i32 s16, s16, s17 +; GFX10-NEXT: s_add_i32 s4, s4, s5 +; GFX10-NEXT: s_pack_ll_b32_b16 s4, s16, s4 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_add_v2i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_lshr_b32 s2, s0, 16 +; GFX11-NEXT: s_lshr_b32 s3, s1, 16 +; GFX11-NEXT: s_add_i32 s0, s0, s1 +; GFX11-NEXT: s_add_i32 s2, s2, s3 +; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s2 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_add_v2i16: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_lshr_b32 s2, s0, 16 +; GFX12-NEXT: s_lshr_b32 s3, s1, 16 +; GFX12-NEXT: s_add_co_i32 s0, s0, s1 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_add_co_i32 s2, s2, s3 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: s_pack_ll_b32_b16 s0, s0, s2 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = add <2 x i16> %a, %b + ret <2 x i16> %c +} + +define <2 x i16> @v_add_v2i16(<2 x i16> %a, <2 x i16> %b) { +; GFX7-LABEL: v_add_v2i16: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX7-NEXT: v_add_i32_e32 v1, vcc, v1, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_add_v2i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_add_u16 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_add_v2i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_add_u16_e32 v2, v0, v1 +; GFX8-NEXT: v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_add_v2i16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_pk_add_u16 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_add_v2i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_pk_add_u16 v0, v0, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_add_v2i16: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_pk_add_u16 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = add <2 x i16> %a, %b + ret <2 x i16> %c +} + +define i64 @s_add_i64(i64 inreg %a, i64 inreg %b) { +; GFX7-LABEL: s_add_i64: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_add_u32 s4, s16, s18 +; GFX7-NEXT: s_addc_u32 s5, s17, s19 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_add_i64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_add_u32 s4, s16, s18 +; GFX9-NEXT: s_addc_u32 s5, s17, s19 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_add_i64: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_add_u32 s4, s16, s18 +; GFX8-NEXT: s_addc_u32 s5, s17, s19 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 +; GFX8-NEXT: v_mov_b32_e32 v1, s5 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_add_i64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s4, s16, s18 +; GFX10-NEXT: s_addc_u32 s5, s17, s19 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: v_mov_b32_e32 v1, s5 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_add_i64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_add_u32 s0, s0, s2 +; GFX11-NEXT: s_addc_u32 s1, s1, s3 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_add_i64: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = add i64 %a, %b + ret i64 %c +} + +define i64 @v_add_i64(i64 %a, i64 %b) { +; GFX7-LABEL: v_add_i64: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_add_i64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_add_i64: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_add_i64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_add_i64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_add_i64: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX12-NEXT: s_wait_alu 0xfffd +; GFX12-NEXT: v_add_co_ci_u32_e64 v1, null, v1, v3, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = add i64 %a, %b + ret i64 %c +} + +define void @s_uaddo_uadde(i64 inreg %a, i64 inreg %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) { +; GFX7-LABEL: s_uaddo_uadde: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_add_u32 s4, s16, s18 +; GFX7-NEXT: s_addc_u32 s5, s17, s19 +; GFX7-NEXT: v_mov_b32_e32 v4, s4 +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_cselect_b32 s8, 1, 0 +; GFX7-NEXT: v_mov_b32_e32 v5, s5 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: v_mov_b32_e32 v0, s8 +; GFX7-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_uaddo_uadde: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_add_u32 s4, s16, s18 +; GFX9-NEXT: s_addc_u32 s5, s17, s19 +; GFX9-NEXT: v_mov_b32_e32 v4, s4 +; GFX9-NEXT: s_cselect_b32 s6, 1, 0 +; GFX9-NEXT: v_mov_b32_e32 v5, s5 +; GFX9-NEXT: global_store_dwordx2 v[0:1], v[4:5], off +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: global_store_dword v[2:3], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_uaddo_uadde: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_add_u32 s4, s16, s18 +; GFX8-NEXT: s_addc_u32 s5, s17, s19 +; GFX8-NEXT: v_mov_b32_e32 v4, s4 +; GFX8-NEXT: s_cselect_b32 s6, 1, 0 +; GFX8-NEXT: v_mov_b32_e32 v5, s5 +; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[4:5] +; GFX8-NEXT: v_mov_b32_e32 v0, s6 +; GFX8-NEXT: flat_store_dword v[2:3], v0 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_uaddo_uadde: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_add_u32 s4, s16, s18 +; GFX10-NEXT: s_addc_u32 s5, s17, s19 +; GFX10-NEXT: s_cselect_b32 s6, 1, 0 +; GFX10-NEXT: v_mov_b32_e32 v4, s4 +; GFX10-NEXT: v_mov_b32_e32 v5, s5 +; GFX10-NEXT: v_mov_b32_e32 v6, s6 +; GFX10-NEXT: global_store_dwordx2 v[0:1], v[4:5], off +; GFX10-NEXT: global_store_dword v[2:3], v6, off +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_uaddo_uadde: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_add_u32 s0, s0, s2 +; GFX11-NEXT: s_addc_u32 s1, s1, s3 +; GFX11-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11-NEXT: v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0 +; GFX11-NEXT: v_mov_b32_e32 v6, s2 +; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off +; GFX11-NEXT: global_store_b32 v[2:3], v6, off +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_uaddo_uadde: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_add_co_u32 s0, s0, s2 +; GFX12-NEXT: s_add_co_ci_u32 s1, s1, s3 +; GFX12-NEXT: s_cselect_b32 s2, 1, 0 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0 +; GFX12-NEXT: v_mov_b32_e32 v6, s2 +; GFX12-NEXT: global_store_b64 v[0:1], v[4:5], off +; GFX12-NEXT: global_store_b32 v[2:3], v6, off +; GFX12-NEXT: s_setpc_b64 s[30:31] + %uaddo = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %add = extractvalue {i64, i1} %uaddo, 0 + %of = extractvalue {i64, i1} %uaddo, 1 + %of32 = select i1 %of, i32 1, i32 0 + store i64 %add, ptr addrspace(1) %res + store i32 %of32, ptr addrspace(1) %carry + ret void +} + +define void @v_uaddo_uadde(i64 %a, i64 %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) { +; GFX7-LABEL: v_uaddo_uadde: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX7-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX7-NEXT: buffer_store_dwordx2 v[0:1], v[4:5], s[4:7], 0 addr64 +; GFX7-NEXT: buffer_store_dword v2, v[6:7], s[4:7], 0 addr64 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_uaddo_uadde: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: global_store_dwordx2 v[4:5], v[0:1], off +; GFX9-NEXT: global_store_dword v[6:7], v2, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_uaddo_uadde: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_addc_u32_e32 v1, vcc, v1, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: flat_store_dwordx2 v[4:5], v[0:1] +; GFX8-NEXT: flat_store_dword v[6:7], v2 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_uaddo_uadde: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX10-NEXT: global_store_dwordx2 v[4:5], v[0:1], off +; GFX10-NEXT: global_store_dword v[6:7], v2, off +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_uaddo_uadde: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11-NEXT: global_store_b64 v[4:5], v[0:1], off +; GFX11-NEXT: global_store_b32 v[6:7], v2, off +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_uaddo_uadde: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2 +; GFX12-NEXT: s_wait_alu 0xfffd +; GFX12-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX12-NEXT: s_wait_alu 0xfffd +; GFX12-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off +; GFX12-NEXT: global_store_b32 v[6:7], v2, off +; GFX12-NEXT: s_setpc_b64 s[30:31] + %uaddo = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %add = extractvalue {i64, i1} %uaddo, 0 + %of = extractvalue {i64, i1} %uaddo, 1 + %of32 = select i1 %of, i32 1, i32 0 + store i64 %add, ptr addrspace(1) %res + store i32 %of32, ptr addrspace(1) %carry + ret void +} + +declare {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.mir new file mode 100644 index 000000000000..097372a95746 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.mir @@ -0,0 +1,524 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s +--- +name: add_s16_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: add_s16_ss + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ADD]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC2]], [[TRUNC2]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_ADD %2, %3 + %5:_(s16) = G_AND %4, %4 +... + +--- +name: add_s16_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; CHECK-LABEL: name: add_s16_sv + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_ADD %2, %3 + %5:_(s16) = G_AND %4, %4 +... + +--- +name: add_s16_vs +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; CHECK-LABEL: name: add_s16_vs + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[COPY2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_ADD %2, %3 + %5:_(s16) = G_AND %4, %4 +... + +--- +name: add_s16_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: add_s16_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_ADD %2, %3 + %5:_(s16) = G_AND %4, %4 +... + +--- +name: add_s32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: add_s32_ss + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[ADD]], [[ADD]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = G_ADD %0, %1 + %3:_(s32) = G_AND %2, %2 +... + +--- +name: add_s32_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; CHECK-LABEL: name: add_s32_sv + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ADD]], [[ADD]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s32) = G_ADD %0, %1 + %3:_(s32) = G_AND %2, %2 +... + +--- +name: add_s32_vs +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; CHECK-LABEL: name: add_s32_vs + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ADD]], [[ADD]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + %2:_(s32) = G_ADD %0, %1 + %3:_(s32) = G_AND %2, %2 +... + +--- +name: add_s32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: add_s32_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s32) = G_ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[ADD]], [[ADD]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = G_ADD %0, %1 + %3:_(s32) = G_AND %2, %2 +... + +--- +name: add_s64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-LABEL: name: add_s64_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr2_sgpr3 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s64) = G_ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s64) = G_CONSTANT i64 255 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s64) = G_AND [[ADD]], [[ADD]] + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr2_sgpr3 + %2:_(s64) = G_ADD %0, %1 + %3:_(s64) = G_CONSTANT i64 255 + %4:_(s64) = G_AND %2, %2 +... + +--- +name: add_s64_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; CHECK-LABEL: name: add_s64_sv + ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s64) = G_ADD [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $vgpr0_vgpr1 + %2:_(s64) = G_ADD %0, %1 + %3:_(s64) = G_AND %2, %2 +... + +--- +name: add_s64_vs +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; CHECK-LABEL: name: add_s64_vs + ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s64) = G_ADD [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $sgpr0_sgpr1 + %2:_(s64) = G_ADD %0, %1 + %3:_(s64) = G_AND %2, %2 +... + +--- +name: add_s64_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: add_s64_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s64) = G_ADD [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[ADD]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_ADD %0, %1 + %3:_(s64) = G_AND %2, %2 +... + +--- +name: uaddo_s32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: uaddo_s32_ss + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:sgpr(s32), [[UADDO1:%[0-9]+]]:sgpr(s32) = G_UADDO [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[UADDO1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[SELECT]], [[UADDO]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32), %3:_(s1) = G_UADDO %0, %1 + %4:_(s32) = G_ZEXT %3 + %5:_(s32) = G_AND %4, %2 +... + +--- +name: uaddo_s32_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr1 + ; CHECK-LABEL: name: uaddo_s32_sv + ; CHECK: liveins: $sgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDO1]](s1), [[C]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDO]], [[SELECT]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32), %3:_(s1) = G_UADDO %0, %1 + %4:_(s32) = G_ZEXT %3 + %5:_(s32) = G_AND %2, %4 +... + +--- +name: uaddo_s32_vs +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $sgpr1 + ; CHECK-LABEL: name: uaddo_s32_vs + ; CHECK: liveins: $vgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDO1]](s1), [[C]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDO]], [[SELECT]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32), %3:_(s1) = G_UADDO %0, %1 + %4:_(s32) = G_ZEXT %3 + %5:_(s32) = G_AND %2, %4 +... + +--- +name: uaddo_s32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: uaddo_s32_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[UADDO:%[0-9]+]]:vgpr(s32), [[UADDO1:%[0-9]+]]:vcc(s1) = G_UADDO [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDO1]](s1), [[C]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDO]], [[SELECT]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32), %3:_(s1) = G_UADDO %0, %1 + %4:_(s32) = G_ZEXT %3 + %5:_(s32) = G_AND %2, %4 +... + +--- +name: uadde_s32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2 + ; CHECK-LABEL: name: uadde_s32_ss + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[C]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[COPY]], [[COPY1]], [[AND]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND1]](s32), [[C]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE]], [[SELECT]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s1) = G_TRUNC %2 + %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3 + %6:_(s32) = G_ZEXT %5 + %7:_(s32) = G_AND %4, %6 +... + +--- +name: uadde_s32_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr1, $sgpr2 + ; CHECK-LABEL: name: uadde_s32_sv + ; CHECK: liveins: $sgpr0, $vgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32) + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY3]], [[COPY1]], [[AMDGPU_COPY_VCC_SCC]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDE1]](s1), [[C]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDE]], [[SELECT]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s1) = G_TRUNC %2 + %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3 + %6:_(s32) = G_ZEXT %5 + %7:_(s32) = G_AND %4, %6 +... + +--- +name: uadde_s32_vs +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $sgpr1, $sgpr2 + ; CHECK-LABEL: name: uadde_s32_vs + ; CHECK: liveins: $vgpr0, $sgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32) + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY3]], [[AMDGPU_COPY_VCC_SCC]] + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDE1]](s1), [[C]], [[C1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UADDE]], [[SELECT]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s1) = G_TRUNC %2 + %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3 + %6:_(s32) = G_ZEXT %5 + %7:_(s32) = G_AND %4, %6 +... + +--- +name: uadde_s32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-LABEL: name: uadde_s32_vv + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY2]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:vgpr(s32), [[UADDE1:%[0-9]+]]:vcc(s1) = G_UADDE [[COPY]], [[COPY1]], [[ICMP]] + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[UADDE1]](s1), [[C]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UADDE]], [[SELECT]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s1) = G_TRUNC %2 + %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3 + %6:_(s32) = G_ZEXT %5 + %7:_(s32) = G_AND %4, %6 +... + +--- +name: uadde_s32_ss_scc_use +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2 + ; CHECK-LABEL: name: uadde_s32_ss_scc_use + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[C]] + ; CHECK-NEXT: [[UADDE:%[0-9]+]]:sgpr(s32), [[UADDE1:%[0-9]+]]:sgpr(s32) = G_UADDE [[COPY]], [[COPY1]], [[AND]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE1]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND1]](s32), [[C]], [[C1]] + ; CHECK-NEXT: [[AND2:%[0-9]+]]:sgpr(s32) = G_AND [[UADDE]], [[SELECT]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s1) = G_TRUNC %2 + %4:_(s32), %5:_(s1) = G_UADDE %0, %1, %3 + %6:_(s32) = G_ZEXT %5 + %8:_(s32) = G_AND %4, %6 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir index 54ee69fcb220..30c958fcb192 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.s16.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s --- name: add_s16_ss legalized: true @@ -19,13 +18,13 @@ body: | ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16) ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[ANYEXT]], [[ANYEXT1]] ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[ADD]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[TRUNC2]](s16) + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC2]], [[TRUNC2]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s16) = G_TRUNC %0 %3:_(s16) = G_TRUNC %1 %4:_(s16) = G_ADD %2, %3 - S_ENDPGM 0, implicit %4 + %5:_(s16) = G_AND %4, %4 ... --- @@ -44,13 +43,13 @@ body: | ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[COPY2]], [[TRUNC1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s16) = G_TRUNC %0 %3:_(s16) = G_TRUNC %1 %4:_(s16) = G_ADD %2, %3 - S_ENDPGM 0, implicit %4 + %5:_(s16) = G_AND %4, %4 ... --- @@ -69,13 +68,13 @@ body: | ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[COPY2]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s16) = G_TRUNC %0 %3:_(s16) = G_TRUNC %1 %4:_(s16) = G_ADD %2, %3 - S_ENDPGM 0, implicit %4 + %5:_(s16) = G_AND %4, %4 ... --- @@ -93,11 +92,11 @@ body: | ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(s16) = G_ADD [[TRUNC]], [[TRUNC1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](s16) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[ADD]], [[ADD]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s16) = G_TRUNC %0 %3:_(s16) = G_TRUNC %1 %4:_(s16) = G_ADD %2, %3 - S_ENDPGM 0, implicit %4 + %5:_(s16) = G_AND %4, %4 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir index 97018fac13a8..01eb39111b0a 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-add.v2s16.mir @@ -1,6 +1,5 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s -# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=regbankselect %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - | FileCheck %s --- name: add_v2s16_ss @@ -18,16 +17,19 @@ body: | ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32) ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>) - ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C1]](s32) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C]](s32) ; CHECK-NEXT: [[ADD:%[0-9]+]]:sgpr(s32) = G_ADD [[BITCAST]], [[BITCAST1]] ; CHECK-NEXT: [[ADD1:%[0-9]+]]:sgpr(s32) = G_ADD [[LSHR]], [[LSHR1]] ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[ADD]](s32), [[ADD1]](s32) - ; CHECK-NEXT: S_ENDPGM 0, implicit [[BUILD_VECTOR_TRUNC]](<2 x s16>) + ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR [[C1]](s16), [[C1]](s16) + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR]] %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $sgpr1 %2:_(<2 x s16>) = G_ADD %0, %1 - S_ENDPGM 0, implicit %2 + %3:_(s16) = G_CONSTANT i16 255 + %4:_(<2 x s16>) = G_BUILD_VECTOR %3, %3 + %5:_(<2 x s16>) = G_AND %2, %4 ... --- @@ -44,11 +46,11 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY2]], [[COPY1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](<2 x s16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ADD]], [[ADD]] %0:_(<2 x s16>) = COPY $sgpr0 %1:_(<2 x s16>) = COPY $vgpr0 %2:_(<2 x s16>) = G_ADD %0, %1 - S_ENDPGM 0, implicit %2 + %3:_(<2 x s16>) = G_AND %2, %2 ... --- @@ -65,9 +67,11 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ADD]], [[ADD]] %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $sgpr0 %2:_(<2 x s16>) = G_ADD %0, %1 + %3:_(<2 x s16>) = G_AND %2, %2 ... --- @@ -83,9 +87,9 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 ; CHECK-NEXT: [[ADD:%[0-9]+]]:vgpr(<2 x s16>) = G_ADD [[COPY]], [[COPY1]] - ; CHECK-NEXT: S_ENDPGM 0, implicit [[ADD]](<2 x s16>) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[ADD]], [[ADD]] %0:_(<2 x s16>) = COPY $vgpr0 %1:_(<2 x s16>) = COPY $vgpr1 %2:_(<2 x s16>) = G_ADD %0, %1 - S_ENDPGM 0, implicit %2 + %3:_(<2 x s16>) = G_AND %2, %2 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir index 7378c9366ec3..e0e783e7a62f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sext.mir @@ -77,10 +77,14 @@ body: | ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C3]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 %3:_(s16) = G_SEXT %2 + %4:_(s16) = G_CONSTANT i16 255 + %5:_(s16) = G_AND %3, %4 ... --- @@ -215,9 +219,13 @@ body: | ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C1]], [[C2]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32) + ; CHECK-NEXT: [[C3:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C3]] %0:_(s32) = COPY $sgpr0 %1:_(s1) = G_TRUNC %0 %2:_(s16) = G_SEXT %1 + %3:_(s16) = G_CONSTANT i16 255 + %4:_(s16) = G_AND %2, %3 ... --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir index b0199d3ad5cd..e3c01c0e7fcb 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-sub.mir @@ -1,5 +1,107 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py -# RUN: llc -mtriple=amdgcn -mcpu=fiji -run-pass="amdgpu-regbankselect,amdgpu-regbanklegalize" %s -o - | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - -regbankselect-fast | FileCheck %s +# RUN: llc -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-regbankselect,amdgpu-regbanklegalize %s -verify-machineinstrs -o - -regbankselect-greedy | FileCheck %s + +--- +name: sub_s16_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: sub_s16_ss + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC]](s16) + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:sgpr(s32) = G_ANYEXT [[TRUNC1]](s16) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[ANYEXT]], [[ANYEXT1]] + ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SUB]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC2]], [[TRUNC2]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_SUB %2, %3 + %6:_(s16) = G_AND %4, %4 +... + +--- +name: sub_s16_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; CHECK-LABEL: name: sub_s16_sv + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC]](s16) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s16) = G_SUB [[COPY2]], [[TRUNC1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[SUB]], [[SUB]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr0 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_SUB %2, %3 + %6:_(s16) = G_AND %4, %4 +... + +--- +name: sub_s16_vs +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; CHECK-LABEL: name: sub_s16_vs + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:sgpr(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s16) = COPY [[TRUNC1]](s16) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s16) = G_SUB [[TRUNC]], [[COPY2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[SUB]], [[SUB]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr0 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_SUB %2, %3 + %6:_(s16) = G_AND %4, %4 +... + +--- +name: sub_s16_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: sub_s16_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32) + ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY1]](s32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s16) = G_SUB [[TRUNC]], [[TRUNC1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s16) = G_AND [[SUB]], [[SUB]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s16) = G_TRUNC %0 + %3:_(s16) = G_TRUNC %1 + %4:_(s16) = G_SUB %2, %3 + %6:_(s16) = G_AND %4, %4 +... --- name: sub_s32_ss @@ -14,9 +116,11 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[SUB]], [[SUB]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s32) = G_SUB %0, %1 + %4:_(s32) = G_AND %2, %2 ... --- @@ -33,9 +137,11 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[SUB]], [[SUB]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $vgpr0 %2:_(s32) = G_SUB %0, %1 + %4:_(s32) = G_AND %2, %2 ... --- @@ -52,9 +158,11 @@ body: | ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[SUB]], [[SUB]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $sgpr0 %2:_(s32) = G_SUB %0, %1 + %4:_(s32) = G_AND %2, %2 ... --- @@ -70,7 +178,376 @@ body: | ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s32) = G_SUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[SUB]], [[SUB]] %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = G_SUB %0, %1 + %4:_(s32) = G_AND %2, %2 +... + +--- +name: sub_v2s16_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: sub_v2s16_ss + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr1 + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[LSHR:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST]], [[C]](s32) + ; CHECK-NEXT: [[BITCAST1:%[0-9]+]]:sgpr(s32) = G_BITCAST [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:sgpr(s32) = G_LSHR [[BITCAST1]], [[C]](s32) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s32) = G_SUB [[BITCAST]], [[BITCAST1]] + ; CHECK-NEXT: [[SUB1:%[0-9]+]]:sgpr(s32) = G_SUB [[LSHR]], [[LSHR1]] + ; CHECK-NEXT: [[BUILD_VECTOR_TRUNC:%[0-9]+]]:sgpr(<2 x s16>) = G_BUILD_VECTOR_TRUNC [[SUB]](s32), [[SUB1]](s32) + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(<2 x s16>) = G_AND [[BUILD_VECTOR_TRUNC]], [[BUILD_VECTOR_TRUNC]] + %0:_(<2 x s16>) = COPY $sgpr0 + %1:_(<2 x s16>) = COPY $sgpr1 + %2:_(<2 x s16>) = G_SUB %0, %1 + %5:_(<2 x s16>) = G_AND %2, %2 +... + +--- +name: sub_v2s16_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; CHECK-LABEL: name: sub_v2s16_sv + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY]](<2 x s16>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(<2 x s16>) = G_SUB [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[SUB]], [[SUB]] + %0:_(<2 x s16>) = COPY $sgpr0 + %1:_(<2 x s16>) = COPY $vgpr0 + %2:_(<2 x s16>) = G_SUB %0, %1 + %5:_(<2 x s16>) = G_AND %2, %2 +... + +--- +name: sub_v2s16_vs +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr0 + ; CHECK-LABEL: name: sub_v2s16_vs + ; CHECK: liveins: $sgpr0, $vgpr0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(<2 x s16>) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(<2 x s16>) = COPY [[COPY1]](<2 x s16>) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(<2 x s16>) = G_SUB [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[SUB]], [[SUB]] + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $sgpr0 + %2:_(<2 x s16>) = G_SUB %0, %1 + %5:_(<2 x s16>) = G_AND %2, %2 +... + +--- +name: sub_v2s16_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: sub_v2s16_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(<2 x s16>) = G_SUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(<2 x s16>) = G_AND [[SUB]], [[SUB]] + %0:_(<2 x s16>) = COPY $vgpr0 + %1:_(<2 x s16>) = COPY $vgpr1 + %2:_(<2 x s16>) = G_SUB %0, %1 + %5:_(<2 x s16>) = G_AND %2, %2 +... + +--- +name: sub_s64_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $sgpr0_sgpr1 + ; CHECK-LABEL: name: sub_s64_ss + ; CHECK: liveins: $sgpr0_sgpr1, $sgpr0_sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:sgpr(s64) = G_SUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s64) = G_AND [[SUB]], [[SUB]] + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $sgpr0_sgpr1 + %2:_(s64) = G_SUB %0, %1 + %4:_(s64) = G_AND %2, %2 +... + +--- +name: sub_s64_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; CHECK-LABEL: name: sub_s64_sv + ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY]](s64) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s64) = G_SUB [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(s64) = COPY $sgpr0_sgpr1 + %1:_(s64) = COPY $vgpr0_vgpr1 + %2:_(s64) = G_SUB %0, %1 + %4:_(s64) = G_AND %2, %2 +... + +--- +name: sub_s64_vs +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; CHECK-LABEL: name: sub_s64_vs + ; CHECK: liveins: $sgpr0_sgpr1, $vgpr0_vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s64) = COPY [[COPY1]](s64) + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s64) = G_SUB [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $sgpr0_sgpr1 + %2:_(s64) = G_SUB %0, %1 + %4:_(s64) = G_AND %2, %2 +... + +--- +name: sub_s64_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-LABEL: name: sub_s64_vv + ; CHECK: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s64) = COPY $vgpr2_vgpr3 + ; CHECK-NEXT: [[SUB:%[0-9]+]]:vgpr(s64) = G_SUB [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[UV:%[0-9]+]]:vgpr(s32), [[UV1:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64) + ; CHECK-NEXT: [[UV2:%[0-9]+]]:vgpr(s32), [[UV3:%[0-9]+]]:vgpr(s32) = G_UNMERGE_VALUES [[SUB]](s64) + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[UV]], [[UV2]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[UV1]], [[UV3]] + ; CHECK-NEXT: [[MV:%[0-9]+]]:vgpr(s64) = G_MERGE_VALUES [[AND]](s32), [[AND1]](s32) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = G_SUB %0, %1 + %4:_(s64) = G_AND %2, %2 +... + +--- +name: usubo_s32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1 + ; CHECK-LABEL: name: usubo_s32_ss + ; CHECK: liveins: $sgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:sgpr(s32), [[USUBO1:%[0-9]+]]:sgpr(s32) = G_USUBO [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[USUBO]], [[USUBO]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32), %3:_(s1) = G_USUBO %0, %1 + %5:_(s32) = G_AND %2, %2 +... + +--- +name: usubo_s32_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr1 + ; CHECK-LABEL: name: usubo_s32_sv + ; CHECK: liveins: $sgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY2]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBO]], [[USUBO]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32), %3:_(s1) = G_USUBO %0, %1 + %5:_(s32) = G_AND %2, %2 +... + +--- +name: usubo_s32_vs +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $sgpr1 + ; CHECK-LABEL: name: usubo_s32_vs + ; CHECK: liveins: $vgpr0, $sgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY]], [[COPY2]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBO]], [[USUBO]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32), %3:_(s1) = G_USUBO %0, %1 + %5:_(s32) = G_AND %2, %2 +... + +--- +name: usubo_s32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1 + ; CHECK-LABEL: name: usubo_s32_vv + ; CHECK: liveins: $vgpr0, $vgpr1 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[USUBO:%[0-9]+]]:vgpr(s32), [[USUBO1:%[0-9]+]]:vcc(s1) = G_USUBO [[COPY]], [[COPY1]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBO]], [[USUBO]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32), %3:_(s1) = G_USUBO %0, %1 + %5:_(s32) = G_AND %2, %2 +... + +--- +name: usube_s32_ss +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $sgpr1, $sgpr2 + ; CHECK-LABEL: name: usube_s32_ss + ; CHECK: liveins: $sgpr0, $sgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:sgpr(s32) = G_AND [[COPY2]], [[C]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:sgpr(s32), [[USUBE1:%[0-9]+]]:sgpr(s32) = G_USUBE [[COPY]], [[COPY1]], [[AND]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s32) = G_AND [[USUBE]], [[USUBE]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s1) = G_TRUNC %2 + %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3 + %7:_(s32) = G_AND %4, %4 +... + +--- +name: usube_s32_sv +legalized: true + +body: | + bb.0: + liveins: $sgpr0, $vgpr1, $sgpr2 + ; CHECK-LABEL: name: usube_s32_sv + ; CHECK: liveins: $sgpr0, $vgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32) + ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32) + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY3]], [[COPY1]], [[AMDGPU_COPY_VCC_SCC]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBE]], [[USUBE]] + %0:_(s32) = COPY $sgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s1) = G_TRUNC %2 + %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3 + %7:_(s32) = G_AND %4, %4 +... + +--- +name: usube_s32_vs +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $sgpr1, $sgpr2 + ; CHECK-LABEL: name: usube_s32_vs + ; CHECK: liveins: $vgpr0, $sgpr1, $sgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2 + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32) + ; CHECK-NEXT: [[AMDGPU_COPY_VCC_SCC:%[0-9]+]]:vcc(s1) = G_AMDGPU_COPY_VCC_SCC [[COPY2]](s32) + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY3]], [[AMDGPU_COPY_VCC_SCC]] + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[USUBE]], [[USUBE]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $sgpr1 + %2:_(s32) = COPY $sgpr2 + %3:_(s1) = G_TRUNC %2 + %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3 + %7:_(s32) = G_AND %4, %4 +... + +--- +name: usube_s32_vv +legalized: true + +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-LABEL: name: usube_s32_vv + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr(s32) = COPY $vgpr2 + ; CHECK-NEXT: [[C:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:vgpr(s32) = G_AND [[COPY2]], [[C]] + ; CHECK-NEXT: [[C1:%[0-9]+]]:vgpr(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(ne), [[AND]](s32), [[C1]] + ; CHECK-NEXT: [[USUBE:%[0-9]+]]:vgpr(s32), [[USUBE1:%[0-9]+]]:vcc(s1) = G_USUBE [[COPY]], [[COPY1]], [[ICMP]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:vgpr(s32) = G_AND [[USUBE]], [[USUBE]] + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s1) = G_TRUNC %2 + %4:_(s32), %5:_(s1) = G_USUBE %0, %1, %3 + %7:_(s32) = G_AND %4, %4 ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir index 088c20a3137f..d4baa5fb864f 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-zext.mir @@ -73,10 +73,14 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C]], [[C1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C2]] %0:_(s32) = COPY $sgpr0 %1:_(s32) = COPY $sgpr1 %2:_(s1) = G_ICMP intpred(eq), %0, %1 %3:_(s16) = G_ZEXT %2 + %4:_(s16) = G_CONSTANT i16 255 + %5:_(s16) = G_AND %3, %4 ... --- @@ -209,9 +213,13 @@ body: | ; CHECK-NEXT: [[C1:%[0-9]+]]:sgpr(s32) = G_CONSTANT i32 0 ; CHECK-NEXT: [[SELECT:%[0-9]+]]:sgpr(s32) = G_SELECT [[AND]](s32), [[C]], [[C1]] ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:sgpr(s16) = G_TRUNC [[SELECT]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:sgpr(s16) = G_CONSTANT i16 255 + ; CHECK-NEXT: [[AND1:%[0-9]+]]:sgpr(s16) = G_AND [[TRUNC]], [[C2]] %0:_(s32) = COPY $sgpr0 %1:_(s1) = G_TRUNC %0 %2:_(s16) = G_ZEXT %1 + %3:_(s16) = G_CONSTANT i16 255 + %4:_(s16) = G_AND %2, %3 ... --- diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll new file mode 100644 index 000000000000..8b5958daac16 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sub.ll @@ -0,0 +1,535 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=hawaii < %s | FileCheck -check-prefix=GFX7 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s +; RUN: llc -global-isel -new-reg-bank-select -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX12 %s + +define i16 @s_sub_i16(i16 inreg %a, i16 inreg %b) { +; GFX7-LABEL: s_sub_i16: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_sub_i32 s4, s16, s17 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_sub_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_sub_i32 s4, s16, s17 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_sub_i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_sub_i32 s4, s16, s17 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_sub_i16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_sub_i32 s4, s16, s17 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_sub_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_sub_i32 s0, s0, s1 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_sub_i16: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_sub_co_i32 s0, s0, s1 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = sub i16 %a, %b + ret i16 %c +} + +define i16 @v_sub_i16(i16 %a, i16 %b) { +; GFX7-LABEL: v_sub_i16: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_sub_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_sub_u16_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_sub_i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_sub_u16_e32 v0, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_sub_i16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_sub_nc_u16 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_sub_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_sub_nc_u16 v0.l, v0.l, v1.l +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_sub_i16: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_sub_nc_u16 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = sub i16 %a, %b + ret i16 %c +} + +define i32 @s_sub_i32(i32 inreg %a, i32 inreg %b) { +; GFX7-LABEL: s_sub_i32: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_sub_i32 s4, s16, s17 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_sub_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_sub_i32 s4, s16, s17 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_sub_i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_sub_i32 s4, s16, s17 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_sub_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_sub_i32 s4, s16, s17 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_sub_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_sub_i32 s0, s0, s1 +; GFX11-NEXT: v_mov_b32_e32 v0, s0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_sub_i32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_sub_co_i32 s0, s0, s1 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: v_mov_b32_e32 v0, s0 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = sub i32 %a, %b + ret i32 %c +} + +define i32 @v_sub_i32(i32 %a, i32 %b) { +; GFX7-LABEL: v_sub_i32: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v1 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_sub_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_sub_u32_e32 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_sub_i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_sub_i32: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_sub_nc_u32_e32 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_sub_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_sub_nc_u32_e32 v0, v0, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_sub_i32: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_sub_nc_u32_e32 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = sub i32 %a, %b + ret i32 %c +} + +; TODO: Add test for s_sub_v2i16. Instruction selector currently fails +; to handle G_UNMERGE_VALUES. + +define <2 x i16> @v_sub_v2i16(<2 x i16> %a, <2 x i16> %b) { +; GFX7-LABEL: v_sub_v2i16: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; GFX7-NEXT: v_sub_i32_e32 v1, vcc, v1, v3 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_sub_v2i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_sub_v2i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_sub_u16_e32 v2, v0, v1 +; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 +; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_sub_v2i16: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_pk_sub_i16 v0, v0, v1 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_sub_v2i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_pk_sub_i16 v0, v0, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_sub_v2i16: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_pk_sub_i16 v0, v0, v1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = sub <2 x i16> %a, %b + ret <2 x i16> %c +} + +define i64 @s_sub_i64(i64 inreg %a, i64 inreg %b) { +; GFX7-LABEL: s_sub_i64: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_sub_u32 s4, s16, s18 +; GFX7-NEXT: s_subb_u32 s5, s17, s19 +; GFX7-NEXT: v_mov_b32_e32 v0, s4 +; GFX7-NEXT: v_mov_b32_e32 v1, s5 +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_sub_i64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_sub_u32 s4, s16, s18 +; GFX9-NEXT: s_subb_u32 s5, s17, s19 +; GFX9-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_sub_i64: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_sub_u32 s4, s16, s18 +; GFX8-NEXT: s_subb_u32 s5, s17, s19 +; GFX8-NEXT: v_mov_b32_e32 v0, s4 +; GFX8-NEXT: v_mov_b32_e32 v1, s5 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_sub_i64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_sub_u32 s4, s16, s18 +; GFX10-NEXT: s_subb_u32 s5, s17, s19 +; GFX10-NEXT: v_mov_b32_e32 v0, s4 +; GFX10-NEXT: v_mov_b32_e32 v1, s5 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_sub_i64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_sub_u32 s0, s0, s2 +; GFX11-NEXT: s_subb_u32 s1, s1, s3 +; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_sub_i64: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_sub_nc_u64 s[0:1], s[0:1], s[2:3] +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = sub i64 %a, %b + ret i64 %c +} + +define i64 @v_sub_i64(i64 %a, i64 %b) { +; GFX7-LABEL: v_sub_i64: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; GFX7-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_sub_i64: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_sub_i64: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_sub_i64: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2 +; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_sub_i64: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2 +; GFX11-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_sub_i64: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2 +; GFX12-NEXT: s_wait_alu 0xfffd +; GFX12-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo +; GFX12-NEXT: s_setpc_b64 s[30:31] + %c = sub i64 %a, %b + ret i64 %c +} + +define void @s_usubo_usube(i64 inreg %a, i64 inreg %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) { +; GFX7-LABEL: s_usubo_usube: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: s_sub_u32 s4, s16, s18 +; GFX7-NEXT: s_subb_u32 s5, s17, s19 +; GFX7-NEXT: v_mov_b32_e32 v4, s4 +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_cselect_b32 s8, 1, 0 +; GFX7-NEXT: v_mov_b32_e32 v5, s5 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[0:1], s[4:7], 0 addr64 +; GFX7-NEXT: v_mov_b32_e32 v0, s8 +; GFX7-NEXT: buffer_store_dword v0, v[2:3], s[4:7], 0 addr64 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: s_usubo_usube: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: s_sub_u32 s4, s16, s18 +; GFX9-NEXT: s_subb_u32 s5, s17, s19 +; GFX9-NEXT: v_mov_b32_e32 v4, s4 +; GFX9-NEXT: s_cselect_b32 s6, 1, 0 +; GFX9-NEXT: v_mov_b32_e32 v5, s5 +; GFX9-NEXT: global_store_dwordx2 v[0:1], v[4:5], off +; GFX9-NEXT: v_mov_b32_e32 v0, s6 +; GFX9-NEXT: global_store_dword v[2:3], v0, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: s_usubo_usube: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: s_sub_u32 s4, s16, s18 +; GFX8-NEXT: s_subb_u32 s5, s17, s19 +; GFX8-NEXT: v_mov_b32_e32 v4, s4 +; GFX8-NEXT: s_cselect_b32 s6, 1, 0 +; GFX8-NEXT: v_mov_b32_e32 v5, s5 +; GFX8-NEXT: flat_store_dwordx2 v[0:1], v[4:5] +; GFX8-NEXT: v_mov_b32_e32 v0, s6 +; GFX8-NEXT: flat_store_dword v[2:3], v0 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: s_usubo_usube: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: s_sub_u32 s4, s16, s18 +; GFX10-NEXT: s_subb_u32 s5, s17, s19 +; GFX10-NEXT: s_cselect_b32 s6, 1, 0 +; GFX10-NEXT: v_mov_b32_e32 v4, s4 +; GFX10-NEXT: v_mov_b32_e32 v5, s5 +; GFX10-NEXT: v_mov_b32_e32 v6, s6 +; GFX10-NEXT: global_store_dwordx2 v[0:1], v[4:5], off +; GFX10-NEXT: global_store_dword v[2:3], v6, off +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: s_usubo_usube: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_sub_u32 s0, s0, s2 +; GFX11-NEXT: s_subb_u32 s1, s1, s3 +; GFX11-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11-NEXT: v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0 +; GFX11-NEXT: v_mov_b32_e32 v6, s2 +; GFX11-NEXT: global_store_b64 v[0:1], v[4:5], off +; GFX11-NEXT: global_store_b32 v[2:3], v6, off +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: s_usubo_usube: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: s_sub_co_u32 s0, s0, s2 +; GFX12-NEXT: s_sub_co_ci_u32 s1, s1, s3 +; GFX12-NEXT: s_cselect_b32 s2, 1, 0 +; GFX12-NEXT: s_wait_alu 0xfffe +; GFX12-NEXT: v_dual_mov_b32 v5, s1 :: v_dual_mov_b32 v4, s0 +; GFX12-NEXT: v_mov_b32_e32 v6, s2 +; GFX12-NEXT: global_store_b64 v[0:1], v[4:5], off +; GFX12-NEXT: global_store_b32 v[2:3], v6, off +; GFX12-NEXT: s_setpc_b64 s[30:31] + %usubo = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %sub = extractvalue {i64, i1} %usubo, 0 + %of = extractvalue {i64, i1} %usubo, 1 + %of32 = select i1 %of, i32 1, i32 0 + store i64 %sub, ptr addrspace(1) %res + store i32 %of32, ptr addrspace(1) %carry + ret void +} + +define void @v_usubo_usube(i64 %a, i64 %b, ptr addrspace(1) %res, ptr addrspace(1) %carry) { +; GFX7-LABEL: v_usubo_usube: +; GFX7: ; %bb.0: +; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX7-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 +; GFX7-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; GFX7-NEXT: s_mov_b32 s6, 0 +; GFX7-NEXT: s_mov_b32 s7, 0xf000 +; GFX7-NEXT: s_mov_b64 s[4:5], 0 +; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX7-NEXT: buffer_store_dwordx2 v[0:1], v[4:5], s[4:7], 0 addr64 +; GFX7-NEXT: buffer_store_dword v2, v[6:7], s[4:7], 0 addr64 +; GFX7-NEXT: s_waitcnt vmcnt(0) +; GFX7-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: v_usubo_usube: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v2 +; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v3, vcc +; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX9-NEXT: global_store_dwordx2 v[4:5], v[0:1], off +; GFX9-NEXT: global_store_dword v[6:7], v2, off +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: v_usubo_usube: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_sub_u32_e32 v0, vcc, v0, v2 +; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc +; GFX8-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc +; GFX8-NEXT: flat_store_dwordx2 v[4:5], v[0:1] +; GFX8-NEXT: flat_store_dword v[6:7], v2 +; GFX8-NEXT: s_waitcnt vmcnt(0) +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: v_usubo_usube: +; GFX10: ; %bb.0: +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2 +; GFX10-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX10-NEXT: global_store_dwordx2 v[4:5], v[0:1], off +; GFX10-NEXT: global_store_dword v[6:7], v2, off +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: v_usubo_usube: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2 +; GFX11-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11-NEXT: global_store_b64 v[4:5], v[0:1], off +; GFX11-NEXT: global_store_b32 v[6:7], v2, off +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX12-LABEL: v_usubo_usube: +; GFX12: ; %bb.0: +; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 +; GFX12-NEXT: s_wait_expcnt 0x0 +; GFX12-NEXT: s_wait_samplecnt 0x0 +; GFX12-NEXT: s_wait_bvhcnt 0x0 +; GFX12-NEXT: s_wait_kmcnt 0x0 +; GFX12-NEXT: v_sub_co_u32 v0, vcc_lo, v0, v2 +; GFX12-NEXT: s_wait_alu 0xfffd +; GFX12-NEXT: v_sub_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo +; GFX12-NEXT: s_wait_alu 0xfffd +; GFX12-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX12-NEXT: global_store_b64 v[4:5], v[0:1], off +; GFX12-NEXT: global_store_b32 v[6:7], v2, off +; GFX12-NEXT: s_setpc_b64 s[30:31] + %usubo = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %sub = extractvalue {i64, i1} %usubo, 0 + %of = extractvalue {i64, i1} %usubo, 1 + %of32 = select i1 %of, i32 1, i32 0 + store i64 %sub, ptr addrspace(1) %res + store i32 %of32, ptr addrspace(1) %carry + ret void +} -- cgit v1.2.3 From a98295dbcf500a21ea10e2124b6521a3124da643 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Thu, 30 Oct 2025 08:50:33 -0700 Subject: [libc++] Fix localization failures on macOS 15.4 (#138744) This patch reverts e15025dd and 88e15b781 which were temporary measures until we had figured out the underlying issues. It turns out that recent OSes updated localization data, removing the need for several Apple-specific workarounds in the tests. Fixes #135385 --- .../ios.base.cons/dtor.uninitialized.pass.cpp | 6 +-- .../fstreams/filebuf.virtuals/setbuf.pass.cpp | 6 +-- .../istream.unformatted/sync.pass.cpp | 6 +-- .../locale.collate.byname/compare.pass.cpp | 16 ------- .../get_long_double_fr_FR.pass.cpp | 5 --- .../get_long_double_ru_RU.pass.cpp | 3 -- .../get_long_double_zh_CN.pass.cpp | 27 +++++------- .../put_long_double_fr_FR.pass.cpp | 5 --- .../put_long_double_ru_RU.pass.cpp | 3 -- .../put_long_double_zh_CN.pass.cpp | 43 +++++++++--------- .../locale.moneypunct.byname/curr_symbol.pass.cpp | 15 +------ .../locale.moneypunct.byname/grouping.pass.cpp | 5 --- .../locale.moneypunct.byname/neg_format.pass.cpp | 35 ++------------- .../locale.moneypunct.byname/pos_format.pass.cpp | 10 +---- .../facet.num.get.members/get_double.pass.cpp | 6 +-- .../facet.num.get.members/get_float.pass.cpp | 6 +-- .../facet.num.get.members/get_long_double.pass.cpp | 6 +-- .../locale.numpunct.byname/grouping.pass.cpp | 7 +-- .../locale.numpunct.byname/thousands_sep.pass.cpp | 5 +-- .../time.duration.nonmember/ostream.pass.cpp | 10 ----- .../std/time/time.syn/formatter.duration.pass.cpp | 51 ---------------------- .../std/time/time.syn/formatter.file_time.pass.cpp | 19 -------- .../std/time/time.syn/formatter.hh_mm_ss.pass.cpp | 35 --------------- .../time/time.syn/formatter.local_time.pass.cpp | 19 -------- .../std/time/time.syn/formatter.sys_time.pass.cpp | 19 -------- libcxx/test/support/locale_helpers.h | 12 +++++ libcxxabi/test/uncaught_exception.pass.cpp | 6 +-- 27 files changed, 68 insertions(+), 318 deletions(-) diff --git a/libcxx/test/libcxx/input.output/iostreams.base/ios.base/ios.base.cons/dtor.uninitialized.pass.cpp b/libcxx/test/libcxx/input.output/iostreams.base/ios.base/ios.base.cons/dtor.uninitialized.pass.cpp index f17c1483c4a9..16d66e3be14e 100644 --- a/libcxx/test/libcxx/input.output/iostreams.base/ios.base/ios.base.cons/dtor.uninitialized.pass.cpp +++ b/libcxx/test/libcxx/input.output/iostreams.base/ios.base/ios.base.cons/dtor.uninitialized.pass.cpp @@ -6,14 +6,12 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // UNSUPPORTED: no-exceptions // The fix for issue 57964 requires an updated dylib due to explicit // instantiations. That means Apple backdeployment targets remain broken. -// XFAIL: using-built-library-before-llvm-19 +// TODO: Remove && !darwin once availability markup for LLVM 19 on macOS has been added +// XFAIL: using-built-library-before-llvm-19 && !darwin // diff --git a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/setbuf.pass.cpp b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/setbuf.pass.cpp index 9d14abcedd42..00aa97a45cc2 100644 --- a/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/setbuf.pass.cpp +++ b/libcxx/test/std/input.output/file.streams/fstreams/filebuf.virtuals/setbuf.pass.cpp @@ -6,16 +6,14 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // // basic_streambuf* setbuf(char_type* s, streamsize n) override; // This test requires the fix to https://llvm.org/PR60509 in the dylib, // which landed in 5afb937d8a30445642ccaf33866ee4cdd0713222. -// XFAIL: using-built-library-before-llvm-19 +// TODO: Remove && !darwin once availability markup for LLVM 19 on macOS has been added +// XFAIL: using-built-library-before-llvm-19 && !darwin #include #include diff --git a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/sync.pass.cpp b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/sync.pass.cpp index 3b685950d36a..b04d2c07ebb1 100644 --- a/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/sync.pass.cpp +++ b/libcxx/test/std/input.output/iostream.format/input.streams/istream.unformatted/sync.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // // int sync(); @@ -16,7 +13,8 @@ // The fix for bug 51497 and bug 51499 require and updated dylib due to // explicit instantiations. That means Apple backdeployment targets remain // broken. -// XFAIL: using-built-library-before-llvm-19 +// TODO: Remove && !darwin once availability markup for LLVM 19 on macOS has been added +// XFAIL: using-built-library-before-llvm-19 && !darwin #include #include diff --git a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp index 4905ed40f4a2..8ae6bc2d3ba6 100644 --- a/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.collate/locale.collate.byname/compare.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // Bionic has minimal locale support, investigate this later. // XFAIL: LIBCXX-ANDROID-FIXME @@ -56,14 +53,7 @@ int main(int, char**) ASSERT_COMPARE(std::string, "AAA", "BBB", -1); ASSERT_COMPARE(std::string, "bbb", "aaa", 1); ASSERT_COMPARE(std::string, "ccc", "ccc", 0); - -#if defined(__APPLE__) - // Apple's default collation is case-sensitive - ASSERT_COMPARE(std::string, "aaaaaaA", "BaaaaaA", 1); -#else - // Glibc, Windows, and FreeBSD's default collation is case-insensitive ASSERT_COMPARE(std::string, "aaaaaaA", "BaaaaaA", -1); -#endif } #ifndef TEST_HAS_NO_WIDE_CHARACTERS { @@ -73,13 +63,7 @@ int main(int, char**) ASSERT_COMPARE(std::wstring, L"AAA", L"BBB", -1); ASSERT_COMPARE(std::wstring, L"bbb", L"aaa", 1); ASSERT_COMPARE(std::wstring, L"ccc", L"ccc", 0); -#if defined(__APPLE__) - // Apple's default collation is case-sensitive - ASSERT_COMPARE(std::wstring, L"aaaaaaA", L"BaaaaaA", 1); -#else - // Glibc, Windows, and FreeBSD's default collation is case-insensitive ASSERT_COMPARE(std::wstring, L"aaaaaaA", L"BaaaaaA", -1); -#endif } #endif } diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp index ea6b07934510..c9ed59f3cb9a 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_fr_FR.pass.cpp @@ -6,11 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - -// XFAIL: darwin - // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp index f98758d086de..371cf0e90c8d 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_ru_RU.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp index 6980b7ae77db..c86df7e6b53b 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.get/locale.money.get.members/get_long_double_zh_CN.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd @@ -158,7 +155,7 @@ int main(int, char**) std::noshowbase(ios); } { // negative one, showbase -#ifdef _AIX +#if defined(_AIX) || defined(__APPLE__) std::string v = "-" + currency_symbol + "0.01"; #else std::string v = currency_symbol + "-0.01"; @@ -172,7 +169,7 @@ int main(int, char**) assert(ex == -1); } { // negative one, showbase -#ifdef _AIX +#if defined(_AIX) || defined(__APPLE__) std::string v = "-" + currency_symbol + "0.01"; #else std::string v = currency_symbol + "-0.01"; @@ -212,7 +209,7 @@ int main(int, char**) std::noshowbase(ios); } { // negative, showbase -#ifdef _AIX +#if defined(_AIX) || defined(__APPLE__) std::string v = "-" + currency_symbol + "1,234,567.89"; #else std::string v = currency_symbol + "-1,234,567.89"; @@ -333,7 +330,7 @@ int main(int, char**) std::noshowbase(ios); } { // negative one, showbase -#if defined(TEST_HAS_GLIBC) || defined(_AIX) +#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) std::string v = "-" + currency_name + "0.01"; #else std::string v = currency_name + "-0.01"; @@ -348,7 +345,7 @@ int main(int, char**) assert(ex == -1); } { // negative one, showbase -#if defined(TEST_HAS_GLIBC) || defined(_AIX) +#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) std::string v = "-" + currency_name + "0.01"; #else std::string v = currency_name + "-0.01"; @@ -389,7 +386,7 @@ int main(int, char**) std::noshowbase(ios); } { // negative, showbase -#if defined(TEST_HAS_GLIBC) || defined(_AIX) +#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) std::string v = "-" + currency_name + "1,234,567.89"; #else std::string v = currency_name + "-1,234,567.89"; @@ -518,7 +515,7 @@ int main(int, char**) std::noshowbase(ios); } { // negative one, showbase -# ifdef _AIX +# if defined(_AIX) || defined(__APPLE__) std::wstring v = L"-" + w_currency_symbol + L"0.01"; # else std::wstring v = w_currency_symbol + L"-0.01"; @@ -532,7 +529,7 @@ int main(int, char**) assert(ex == -1); } { // negative one, showbase -# ifdef _AIX +# if defined(_AIX) || defined(__APPLE__) std::wstring v = L"-" + w_currency_symbol + L"0.01"; # else std::wstring v = w_currency_symbol + L"-0.01"; @@ -572,7 +569,7 @@ int main(int, char**) std::noshowbase(ios); } { // negative, showbase -# ifdef _AIX +# if defined(_AIX) || defined(__APPLE__) std::wstring v = L"-" + w_currency_symbol + L"1,234,567.89"; # else std::wstring v = w_currency_symbol + L"-1,234,567.89"; @@ -693,7 +690,7 @@ int main(int, char**) std::noshowbase(ios); } { // negative one, showbase -# if defined(TEST_HAS_GLIBC) || defined(_AIX) +# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) std::wstring v = L"-" + w_currency_name + L"0.01"; # else std::wstring v = w_currency_name + L"-0.01"; @@ -707,7 +704,7 @@ int main(int, char**) assert(ex == -1); } { // negative one, showbase -# if defined(TEST_HAS_GLIBC) || defined(_AIX) +# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) std::wstring v = L"-" + w_currency_name + L"0.01"; # else std::wstring v = w_currency_name + L"-0.01"; @@ -747,7 +744,7 @@ int main(int, char**) std::noshowbase(ios); } { // negative, showbase -# if defined(TEST_HAS_GLIBC) || defined(_AIX) +# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) std::wstring v = L"-" + w_currency_name + L"1,234,567.89"; # else std::wstring v = w_currency_name + L"-1,234,567.89"; diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp index 14745996b9fd..f9d7998b07ff 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_fr_FR.pass.cpp @@ -6,11 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - -// XFAIL: darwin - // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp index 0455e5949c44..be1e39748846 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_ru_RU.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp index 68640fabb73b..25046a841708 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.money.put/locale.money.put.members/put_long_double_zh_CN.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd @@ -122,7 +119,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, '*', v); std::string ex(str, base(iter)); -#ifdef _AIX +#if defined(_AIX) || defined(__APPLE__) assert(ex == "-" + currency_symbol + "0.01"); #else assert(ex == currency_symbol + "-0.01"); @@ -142,7 +139,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, '*', v); std::string ex(str, base(iter)); -#ifdef _AIX +#if defined(_AIX) || defined(__APPLE__) assert(ex == "-" + currency_symbol + "1,234,567.89"); #else assert(ex == currency_symbol + "-1,234,567.89"); @@ -156,7 +153,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, ' ', v); std::string ex(str, base(iter)); -#ifdef _AIX +#if defined(_AIX) || defined(__APPLE__) assert(ex == "-" + currency_symbol + "1,234,567.89" + currency_symbol_padding); #else assert(ex == currency_symbol + "-1,234,567.89" + currency_symbol_padding); @@ -171,7 +168,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, ' ', v); std::string ex(str, base(iter)); -#ifdef _AIX +#if defined(_AIX) || defined(__APPLE__) assert(ex == "-" + currency_symbol + currency_symbol_padding + "1,234,567.89"); #else assert(ex == currency_symbol + "-" + currency_symbol_padding + "1,234,567.89"); @@ -186,7 +183,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, ' ', v); std::string ex(str, base(iter)); -#ifdef _AIX +#if defined(_AIX) || defined(__APPLE__) assert(ex == currency_symbol_padding + "-" + currency_symbol + "1,234,567.89"); #else assert(ex == currency_symbol_padding + currency_symbol + "-1,234,567.89"); @@ -239,7 +236,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, '*', v); std::string ex(str, base(iter)); -#if defined(TEST_HAS_GLIBC) || defined(_AIX) +#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) assert(ex == "-" + currency_name + "0.01"); #else assert(ex == currency_name + "-0.01"); @@ -259,7 +256,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, '*', v); std::string ex(str, base(iter)); -#if defined(TEST_HAS_GLIBC) || defined(_AIX) +#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) assert(ex == "-" + currency_name + "1,234,567.89"); #else assert(ex == currency_name + "-1,234,567.89"); @@ -273,7 +270,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, ' ', v); std::string ex(str, base(iter)); -#if defined(TEST_HAS_GLIBC) || defined(_AIX) +#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) assert(ex == "-" + currency_name + "1,234,567.89" + currency_name_padding); #else assert(ex == currency_name + "-1,234,567.89" + currency_name_padding); @@ -288,7 +285,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, ' ', v); std::string ex(str, base(iter)); -#if defined(TEST_HAS_GLIBC) || defined(_AIX) +#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) assert(ex == "-" + currency_name + currency_name_padding + "1,234,567.89"); #else assert(ex == currency_name + "-" + currency_name_padding + "1,234,567.89"); @@ -303,7 +300,7 @@ int main(int, char**) char str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, ' ', v); std::string ex(str, base(iter)); -#if defined(TEST_HAS_GLIBC) || defined(_AIX) +#if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) assert(ex == currency_name_padding + "-" + currency_name + "1,234,567.89"); #else assert(ex == currency_name_padding + currency_name + "-1,234,567.89"); @@ -366,7 +363,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, '*', v); std::wstring ex(str, base(iter)); -# ifdef _AIX +# if defined(_AIX) || defined(__APPLE__) assert(ex == L"-" + currency_symbol + L"0.01"); # else assert(ex == currency_symbol + L"-0.01"); @@ -386,7 +383,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, '*', v); std::wstring ex(str, base(iter)); -# ifdef _AIX +# if defined(_AIX) || defined(__APPLE__) assert(ex == L"-" + currency_symbol + L"1,234,567.89"); # else assert(ex == currency_symbol + L"-1,234,567.89"); @@ -400,7 +397,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, ' ', v); std::wstring ex(str, base(iter)); -# ifdef _AIX +# if defined(_AIX) || defined(__APPLE__) assert(ex == L"-" + currency_symbol + L"1,234,567.89 "); # else assert(ex == currency_symbol + L"-1,234,567.89 "); @@ -415,7 +412,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, ' ', v); std::wstring ex(str, base(iter)); -# ifdef _AIX +# if defined(_AIX) || defined(__APPLE__) assert(ex == L"-" + currency_symbol + L" 1,234,567.89"); # else assert(ex == currency_symbol + L"- 1,234,567.89"); @@ -430,7 +427,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), false, ios, ' ', v); std::wstring ex(str, base(iter)); -# ifdef _AIX +# if defined(_AIX) || defined(__APPLE__) assert(ex == L" -" + currency_symbol + L"1,234,567.89"); # else assert(ex == L" " + currency_symbol + L"-1,234,567.89"); @@ -483,7 +480,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, '*', v); std::wstring ex(str, base(iter)); -# if defined(TEST_HAS_GLIBC) || defined(_AIX) +# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) assert(ex == L"-" + currency_name + L"0.01"); #else assert(ex == currency_name + L"-0.01"); @@ -503,7 +500,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, '*', v); std::wstring ex(str, base(iter)); -# if defined(TEST_HAS_GLIBC) || defined(_AIX) +# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) assert(ex == L"-" + currency_name + L"1,234,567.89"); #else assert(ex == currency_name + L"-1,234,567.89"); @@ -517,7 +514,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, ' ', v); std::wstring ex(str, base(iter)); -# if defined(TEST_HAS_GLIBC) || defined(_AIX) +# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) assert(ex == L"-" + currency_name + L"1,234,567.89" + currency_name_padding); #else assert(ex == currency_name + L"-1,234,567.89" + currency_name_padding); @@ -532,7 +529,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, ' ', v); std::wstring ex(str, base(iter)); -# if defined(TEST_HAS_GLIBC) || defined(_AIX) +# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) assert(ex == L"-" + currency_name + currency_name_padding + L"1,234,567.89"); #else assert(ex == currency_name + L"-" + currency_name_padding + L"1,234,567.89"); @@ -547,7 +544,7 @@ int main(int, char**) wchar_t str[100]; cpp17_output_iterator iter = f.put(cpp17_output_iterator(str), true, ios, ' ', v); std::wstring ex(str, base(iter)); -# if defined(TEST_HAS_GLIBC) || defined(_AIX) +# if defined(TEST_HAS_GLIBC) || defined(_AIX) || defined(__APPLE__) assert(ex == currency_name_padding + L"-" + currency_name + L"1,234,567.89"); #else assert(ex == currency_name_padding + currency_name + L"-1,234,567.89"); diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/curr_symbol.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/curr_symbol.pass.cpp index 9c1253d47acd..e7f0f29e8774 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/curr_symbol.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/curr_symbol.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd @@ -117,11 +114,7 @@ int main(int, char**) { Fnf f(LOCALE_fr_FR_UTF_8, 1); -#ifdef __APPLE__ - assert(f.curr_symbol() == " Eu"); -#else assert(f.curr_symbol() == " \u20ac"); -#endif } { Fnt f(LOCALE_fr_FR_UTF_8, 1); @@ -130,11 +123,7 @@ int main(int, char**) #ifndef TEST_HAS_NO_WIDE_CHARACTERS { Fwf f(LOCALE_fr_FR_UTF_8, 1); -#ifdef __APPLE__ - assert(f.curr_symbol() == L" Eu"); -#else assert(f.curr_symbol() == L" \u20ac"); -#endif } { Fwt f(LOCALE_fr_FR_UTF_8, 1); @@ -164,7 +153,7 @@ int main(int, char**) { Fnf f(LOCALE_zh_CN_UTF_8, 1); -#ifdef _WIN32 +#if defined(_WIN32) || defined(__APPLE__) assert(f.curr_symbol() == "\xC2\xA5"); // \u00A5 #else assert(f.curr_symbol() == "\xEF\xBF\xA5"); // \uFFE5 @@ -177,7 +166,7 @@ int main(int, char**) #ifndef TEST_HAS_NO_WIDE_CHARACTERS { Fwf f(LOCALE_zh_CN_UTF_8, 1); -#ifdef _WIN32 +#if defined(_WIN32) || defined(__APPLE__) assert(f.curr_symbol() == L"\u00A5"); #else assert(f.curr_symbol() == L"\uFFE5"); diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/grouping.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/grouping.pass.cpp index 630b2739c88a..90dc6c4d7a2a 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/grouping.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/grouping.pass.cpp @@ -6,11 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - -// XFAIL: darwin -// // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/neg_format.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/neg_format.pass.cpp index a3e3d853524b..e9528147dfe6 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/neg_format.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/neg_format.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd @@ -82,14 +79,6 @@ void assert_sign_symbol_none_value(std::money_base::pattern p) assert(p.field[3] == std::money_base::value); } -void assert_value_none_symbol_sign(std::money_base::pattern p) -{ - assert(p.field[0] == std::money_base::value); - assert(p.field[1] == std::money_base::none); - assert(p.field[2] == std::money_base::symbol); - assert(p.field[3] == std::money_base::sign); -} - void assert_sign_value_none_symbol(std::money_base::pattern p) { assert(p.field[0] == std::money_base::sign); @@ -149,39 +138,23 @@ int main(int, char**) { Fnf f(LOCALE_fr_FR_UTF_8, 1); std::money_base::pattern p = f.neg_format(); -#ifdef __APPLE__ - assert_value_none_symbol_sign(p); -#else assert_sign_value_none_symbol(p); -#endif } { Fnt f(LOCALE_fr_FR_UTF_8, 1); std::money_base::pattern p = f.neg_format(); -#ifdef __APPLE__ - assert_value_none_symbol_sign(p); -#else assert_sign_value_none_symbol(p); -#endif } #ifndef TEST_HAS_NO_WIDE_CHARACTERS { Fwf f(LOCALE_fr_FR_UTF_8, 1); std::money_base::pattern p = f.neg_format(); -#ifdef __APPLE__ - assert_value_none_symbol_sign(p); -#else assert_sign_value_none_symbol(p); -#endif } { Fwt f(LOCALE_fr_FR_UTF_8, 1); std::money_base::pattern p = f.neg_format(); -#ifdef __APPLE__ - assert_value_none_symbol_sign(p); -#else assert_sign_value_none_symbol(p); -#endif } #endif // TEST_HAS_NO_WIDE_CHARACTERS @@ -211,7 +184,7 @@ int main(int, char**) { Fnf f(LOCALE_zh_CN_UTF_8, 1); std::money_base::pattern p = f.neg_format(); -#ifdef _AIX +#if defined(_AIX) || defined(__APPLE__) assert_sign_symbol_none_value(p); #else assert_symbol_sign_none_value(p); @@ -220,7 +193,7 @@ int main(int, char**) { Fnt f(LOCALE_zh_CN_UTF_8, 1); std::money_base::pattern p = f.neg_format(); -#if defined(_WIN32) || defined(__APPLE__) +#if defined(_WIN32) assert_symbol_sign_none_value(p); #else assert_sign_symbol_none_value(p); @@ -230,7 +203,7 @@ int main(int, char**) { Fwf f(LOCALE_zh_CN_UTF_8, 1); std::money_base::pattern p = f.neg_format(); -#ifdef _AIX +#if defined(_AIX) || defined(__APPLE__) assert_sign_symbol_none_value(p); #else assert_symbol_sign_none_value(p); @@ -239,7 +212,7 @@ int main(int, char**) { Fwt f(LOCALE_zh_CN_UTF_8, 1); std::money_base::pattern p = f.neg_format(); -#if defined(_WIN32) || defined(__APPLE__) +#if defined(_WIN32) assert_symbol_sign_none_value(p); #else assert_sign_symbol_none_value(p); diff --git a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/pos_format.pass.cpp b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/pos_format.pass.cpp index 671620a0c2f9..11832a7d8927 100644 --- a/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/pos_format.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.monetary/locale.moneypunct.byname/pos_format.pass.cpp @@ -5,7 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// + // NetBSD does not support LC_MONETARY at the moment // XFAIL: netbsd @@ -79,14 +79,6 @@ void assert_sign_symbol_none_value(std::money_base::pattern p) assert(p.field[3] == std::money_base::value); } -void assert_value_none_symbol_sign(std::money_base::pattern p) -{ - assert(p.field[0] == std::money_base::value); - assert(p.field[1] == std::money_base::none); - assert(p.field[2] == std::money_base::symbol); - assert(p.field[3] == std::money_base::sign); -} - void assert_sign_value_none_symbol(std::money_base::pattern p) { assert(p.field[0] == std::money_base::sign); diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp index 612d3738a373..31682fea43bc 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_double.pass.cpp @@ -6,12 +6,10 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // The fix for LWG2381 (https://github.com/llvm/llvm-project/pull/77948) changed behavior of // FP parsing. This requires 3e15c97fa3812993bdc319827a5c6d867b765ae8 in the dylib. -// XFAIL: using-built-library-before-llvm-19 +// TODO: Remove && !darwin once availability markup for LLVM 19 on macOS has been added +// XFAIL: using-built-library-before-llvm-19 && !darwin // diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp index 58bc9e5abef8..57eedc8633be 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_float.pass.cpp @@ -6,12 +6,10 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // The fix for LWG2381 (https://github.com/llvm/llvm-project/pull/77948) changed behavior of // FP parsing. This requires 3e15c97fa3812993bdc319827a5c6d867b765ae8 in the dylib. -// XFAIL: using-built-library-before-llvm-19 +// TODO: Remove && !darwin once availability markup for LLVM 19 on macOS has been added +// XFAIL: using-built-library-before-llvm-19 && !darwin // diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp index bf8bb651d6bc..8324ee317014 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.num.get/facet.num.get.members/get_long_double.pass.cpp @@ -6,12 +6,10 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // The fix for LWG2381 (https://github.com/llvm/llvm-project/pull/77948) changed behavior of // FP parsing. This requires 3e15c97fa3812993bdc319827a5c6d867b765ae8 in the dylib. -// XFAIL: using-built-library-before-llvm-19 +// TODO: Remove && !darwin once availability markup for LLVM 19 on macOS has been added +// XFAIL: using-built-library-before-llvm-19 && !darwin // diff --git a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/grouping.pass.cpp b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/grouping.pass.cpp index a87c5e0ace28..11ec75469c70 100644 --- a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/grouping.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/grouping.pass.cpp @@ -5,10 +5,7 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// - -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - +// // NetBSD does not support LC_NUMERIC at the moment // XFAIL: netbsd @@ -63,7 +60,7 @@ int main(int, char**) } { std::locale l(LOCALE_fr_FR_UTF_8); -#if defined(TEST_HAS_GLIBC) || defined(_WIN32) || defined(_AIX) +#if defined(TEST_HAS_GLIBC) || defined(_WIN32) || defined(_AIX) || defined(__APPLE__) const char* const group = "\3"; #else const char* const group = "\x7f"; diff --git a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp index ef39e8aa7b68..53f2c8554f3d 100644 --- a/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/facet.numpunct/locale.numpunct.byname/thousands_sep.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // NetBSD does not support LC_NUMERIC at the moment // XFAIL: netbsd @@ -69,7 +66,7 @@ int main(int, char**) // The below tests work around GLIBC's use of U202F as LC_NUMERIC thousands_sep. std::locale l(LOCALE_fr_FR_UTF_8); { -#if defined(_CS_GNU_LIBC_VERSION) || defined(_WIN32) || defined(_AIX) +#if defined(_CS_GNU_LIBC_VERSION) || defined(_WIN32) || defined(_AIX) || defined(__APPLE__) const char sep = ' '; #else const char sep = ','; diff --git a/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp b/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp index 4e84db9a84d7..97ac04275b0b 100644 --- a/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp +++ b/libcxx/test/std/time/time.duration/time.duration.nonmember/ostream.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: no-localization // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME @@ -83,17 +80,10 @@ static void test_values() { assert(stream_c_locale(1'000.123456s) == SV("1000.1235s")); if constexpr (std::same_as) { -#if defined(__APPLE__) - assert(stream_fr_FR_locale(-1'000'000s) == SV("-1000000s")); - assert(stream_fr_FR_locale(1'000'000s) == SV("1000000s")); - assert(stream_fr_FR_locale(-1'000.123456s) == SV("-1000,1235s")); - assert(stream_fr_FR_locale(1'000.123456s) == SV("1000,1235s")); -#else assert(stream_fr_FR_locale(-1'000'000s) == SV("-1 000 000s")); assert(stream_fr_FR_locale(1'000'000s) == SV("1 000 000s")); assert(stream_fr_FR_locale(-1'000.123456s) == SV("-1 000,1235s")); assert(stream_fr_FR_locale(1'000.123456s) == SV("1 000,1235s")); -#endif } else { #ifndef TEST_HAS_NO_WIDE_CHARACTERS assert(stream_fr_FR_locale(-1'000'000s) == L"-1" FR_THOU_SEP "000" FR_THOU_SEP "000s"); diff --git a/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp b/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp index 973bce8f81d4..f1f7debed246 100644 --- a/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.duration.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: no-localization // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME @@ -408,19 +405,11 @@ static void test_valid_positive_integral_values() { "%OM='00'\t" "%S='00'\t" "%OS='00'\t" -# if defined(__APPLE__) - "%p='AM'\t" -# else "%p='午前'\t" -# endif "%R='00:00'\t" "%T='00:00:00'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='12:00:00 AM'\t" -# else "%r='12:00:00 午前'\t" -# endif "%X='00時00分00秒'\t" "%EX='00時00分00秒'\t" # elif defined(_WIN32) @@ -448,19 +437,11 @@ static void test_valid_positive_integral_values() { "%OM='59'\t" "%S='59'\t" "%OS='59'\t" -# if defined(__APPLE__) - "%p='AM'\t" -# else "%p='午前'\t" -# endif "%R='11:59'\t" "%T='11:59:59'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='11:59:59 AM'\t" -# else "%r='11:59:59 午前'\t" -# endif "%X='11時59分59秒'\t" "%EX='11時59分59秒'\t" # elif defined(_WIN32) @@ -488,19 +469,11 @@ static void test_valid_positive_integral_values() { "%OM='00'\t" "%S='00'\t" "%OS='00'\t" -# if defined(__APPLE__) - "%p='PM'\t" -# else "%p='午後'\t" -# endif "%R='12:00'\t" "%T='12:00:00'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='12:00:00 PM'\t" -# else "%r='12:00:00 午後'\t" -# endif "%X='12時00分00秒'\t" "%EX='12時00分00秒'\t" # else @@ -528,19 +501,11 @@ static void test_valid_positive_integral_values() { "%OM='59'\t" "%S='59'\t" "%OS='59'\t" -# if defined(__APPLE__) - "%p='PM'\t" -# else "%p='午後'\t" -# endif "%R='23:59'\t" "%T='23:59:59'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='11:59:59 PM'\t" -# else "%r='11:59:59 午後'\t" -# endif "%X='23時59分59秒'\t" "%EX='23時59分59秒'\t" # else @@ -568,19 +533,11 @@ static void test_valid_positive_integral_values() { "%OM='00'\t" "%S='00'\t" "%OS='00'\t" -# if defined(__APPLE__) - "%p='AM'\t" -# else "%p='午前'\t" -# endif "%R='00:00'\t" "%T='00:00:00'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='12:00:00 AM'\t" -# else "%r='12:00:00 午前'\t" -# endif "%X='00時00分00秒'\t" "%EX='00時00分00秒'\t" # elif defined(_WIN32) @@ -835,19 +792,11 @@ static void test_valid_negative_integral_values() { "%OM='59'\t" "%S='59'\t" "%OS='59'\t" -# if defined(__APPLE__) - "%p='PM'\t" -# else "%p='午後'\t" -# endif "%R='23:59'\t" "%T='23:59:59'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='11:59:59 PM'\t" -# else "%r='11:59:59 午後'\t" -# endif "%X='23時59分59秒'\t" "%EX='23時59分59秒'\t" # elif defined(_WIN32) diff --git a/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp b/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp index 28a972b19dce..e258c4161eda 100644 --- a/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.file_time.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: no-localization // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME @@ -695,19 +692,11 @@ static void test_valid_values_time() { "%OM='00'\t" "%S='00'\t" "%OS='00'\t" -# if defined(__APPLE__) - "%p='AM'\t" -# else "%p='午前'\t" -# endif "%R='00:00'\t" "%T='00:00:00'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='12:00:00 AM'\t" -# else "%r='12:00:00 午前'\t" -# endif "%X='00時00分00秒'\t" "%EX='00時00分00秒'\t" # elif defined(_WIN32) @@ -732,19 +721,11 @@ static void test_valid_values_time() { "%OM='31'\t" "%S='30.123'\t" "%OS='30.123'\t" -# if defined(__APPLE__) - "%p='PM'\t" -# else "%p='午後'\t" -# endif "%R='23:31'\t" "%T='23:31:30.123'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='11:31:30 PM'\t" -# else "%r='11:31:30 午後'\t" -# endif "%X='23時31分30秒'\t" "%EX='23時31分30秒'\t" # elif defined(_WIN32) diff --git a/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp b/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp index 82d9b4c7540a..bbd9c074bef2 100644 --- a/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.hh_mm_ss.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: no-localization // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME @@ -302,19 +299,11 @@ static void test_valid_values() { "%OM='00'\t" "%S='00'\t" "%OS='00'\t" -# if defined(__APPLE__) - "%p='AM'\t" -# else "%p='午前'\t" -# endif "%R='00:00'\t" "%T='00:00:00'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='12:00:00 AM'\t" -# else "%r='12:00:00 午前'\t" -# endif "%X='00時00分00秒'\t" "%EX='00時00分00秒'\t" # elif defined(_WIN32) @@ -339,19 +328,11 @@ static void test_valid_values() { "%OM='31'\t" "%S='30.123'\t" "%OS='30.123'\t" -# if defined(__APPLE__) - "%p='PM'\t" -# else "%p='午後'\t" -# endif "%R='23:31'\t" "%T='23:31:30.123'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='11:31:30 PM'\t" -# else "%r='11:31:30 午後'\t" -# endif "%X='23時31分30秒'\t" "%EX='23時31分30秒'\t" # elif defined(_WIN32) @@ -376,19 +357,11 @@ static void test_valid_values() { "%OM='02'\t" "%S='01.123456789012'\t" "%OS='01.123456789012'\t" -# if defined(__APPLE__) - "%p='AM'\t" -# else "%p='午前'\t" -# endif "%R='03:02'\t" "%T='03:02:01.123456789012'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='03:02:01 AM'\t" -# else "%r='03:02:01 午前'\t" -# endif "%X='03時02分01秒'\t" "%EX='03時02分01秒'\t" # elif defined(_WIN32) @@ -413,19 +386,11 @@ static void test_valid_values() { "%OM='01'\t" "%S='01'\t" "%OS='01'\t" -# if defined(__APPLE__) - "%p='AM'\t" -# else "%p='午前'\t" -# endif "%R='01:01'\t" "%T='01:01:01'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='01:01:01 AM'\t" -# else "%r='01:01:01 午前'\t" -# endif "%X='01時01分01秒'\t" "%EX='01時01分01秒'\t" # elif defined(_WIN32) diff --git a/libcxx/test/std/time/time.syn/formatter.local_time.pass.cpp b/libcxx/test/std/time/time.syn/formatter.local_time.pass.cpp index bd23337ccb31..ce3af8ec199a 100644 --- a/libcxx/test/std/time/time.syn/formatter.local_time.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.local_time.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: no-localization // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME @@ -694,19 +691,11 @@ static void test_valid_values_time() { "%OM='00'\t" "%S='00'\t" "%OS='00'\t" -# if defined(__APPLE__) - "%p='AM'\t" -# else "%p='午前'\t" -# endif "%R='00:00'\t" "%T='00:00:00'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='12:00:00 AM'\t" -# else "%r='12:00:00 午前'\t" -# endif "%X='00時00分00秒'\t" "%EX='00時00分00秒'\t" # elif defined(_WIN32) @@ -731,19 +720,11 @@ static void test_valid_values_time() { "%OM='31'\t" "%S='30.123'\t" "%OS='30.123'\t" -# if defined(__APPLE__) - "%p='PM'\t" -# else "%p='午後'\t" -# endif "%R='23:31'\t" "%T='23:31:30.123'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='11:31:30 PM'\t" -# else "%r='11:31:30 午後'\t" -# endif "%X='23時31分30秒'\t" "%EX='23時31分30秒'\t" # elif defined(_WIN32) diff --git a/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp b/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp index 9c9c8e0de1e9..9238f3daf1f8 100644 --- a/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp +++ b/libcxx/test/std/time/time.syn/formatter.sys_time.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // UNSUPPORTED: c++03, c++11, c++14, c++17 // UNSUPPORTED: no-localization // UNSUPPORTED: GCC-ALWAYS_INLINE-FIXME @@ -691,19 +688,11 @@ static void test_valid_values_time() { "%OM='00'\t" "%S='00'\t" "%OS='00'\t" -# if defined(__APPLE__) - "%p='AM'\t" -# else "%p='午前'\t" -# endif "%R='00:00'\t" "%T='00:00:00'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='12:00:00 AM'\t" -# else "%r='12:00:00 午前'\t" -# endif "%X='00時00分00秒'\t" "%EX='00時00分00秒'\t" # elif defined(_WIN32) @@ -728,19 +717,11 @@ static void test_valid_values_time() { "%OM='31'\t" "%S='30.123'\t" "%OS='30.123'\t" -# if defined(__APPLE__) - "%p='PM'\t" -# else "%p='午後'\t" -# endif "%R='23:31'\t" "%T='23:31:30.123'\t" # if defined(__APPLE__) || defined(__FreeBSD__) -# if defined(__APPLE__) - "%r='11:31:30 PM'\t" -# else "%r='11:31:30 午後'\t" -# endif "%X='23時31分30秒'\t" "%EX='23時31分30秒'\t" # elif defined(_WIN32) diff --git a/libcxx/test/support/locale_helpers.h b/libcxx/test/support/locale_helpers.h index 946c2fed0f3a..3cec7397e3d7 100644 --- a/libcxx/test/support/locale_helpers.h +++ b/libcxx/test/support/locale_helpers.h @@ -73,6 +73,12 @@ MultiStringType currency_symbol_ru_RU() { return MKSTR("\u20BD"); // U+20BD RUBLE SIGN #elif defined(_WIN32) || defined(__FreeBSD__) || defined(_AIX) return MKSTR("\u20BD"); // U+20BD RUBLE SIGN +#elif defined(__APPLE__) + if (__builtin_available(macOS 15.4, *)) { + return MKSTR("\u20BD"); // U+20BD RUBLE SIGN + } else { + return MKSTR("\u0440\u0443\u0431."); + } #else return MKSTR("\u0440\u0443\u0431."); #endif @@ -81,6 +87,12 @@ MultiStringType currency_symbol_ru_RU() { MultiStringType currency_symbol_zh_CN() { #if defined(_WIN32) return MKSTR("\u00A5"); // U+00A5 YEN SIGN +#elif defined(__APPLE__) + if (__builtin_available(macOS 15.4, *)) { + return MKSTR("\u00A5"); // U+00A5 YEN SIGN + } else { + return MKSTR("\uFFE5"); // U+FFE5 FULLWIDTH YEN SIGN + } #else return MKSTR("\uFFE5"); // U+FFE5 FULLWIDTH YEN SIGN #endif diff --git a/libcxxabi/test/uncaught_exception.pass.cpp b/libcxxabi/test/uncaught_exception.pass.cpp index 8e8468c43240..e97732006e11 100644 --- a/libcxxabi/test/uncaught_exception.pass.cpp +++ b/libcxxabi/test/uncaught_exception.pass.cpp @@ -6,9 +6,6 @@ // //===----------------------------------------------------------------------===// -// TODO(mordante) Investigate -// UNSUPPORTED: apple-clang - // UNSUPPORTED: no-exceptions // This tests that libc++abi still provides __cxa_uncaught_exception() for @@ -18,7 +15,8 @@ // to undefined symbols when linking against a libc++ that re-exports the symbols, // but running against a libc++ that doesn't. Fortunately, usage of __cxa_uncaught_exception() // in the wild seems to be close to non-existent. -// XFAIL: using-built-library-before-llvm-19 +// TODO: Remove && !darwin once availability markup for LLVM 19 on macOS has been added +// XFAIL: using-built-library-before-llvm-19 && !darwin #include #include -- cgit v1.2.3 From cc1022ca0bba0564fbfa1e194414593640d28852 Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Thu, 30 Oct 2025 09:03:45 -0700 Subject: [InstrProf] Remove deprecated -debug-info-correlate flag (#165289) --- .../Transforms/Instrumentation/PGOInstrumentation.h | 2 -- llvm/lib/Frontend/Driver/CodeGenOptions.cpp | 4 +--- .../Transforms/Instrumentation/InstrProfiling.cpp | 20 +++++--------------- .../Instrumentation/PGOInstrumentation.cpp | 2 +- 4 files changed, 7 insertions(+), 21 deletions(-) diff --git a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h index ced446dacb6c..9dcd4b53a0db 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h +++ b/llvm/include/llvm/Transforms/Instrumentation/PGOInstrumentation.h @@ -26,8 +26,6 @@ namespace llvm { -LLVM_ABI extern cl::opt DebugInfoCorrelate; - class Function; class Instruction; class Module; diff --git a/llvm/lib/Frontend/Driver/CodeGenOptions.cpp b/llvm/lib/Frontend/Driver/CodeGenOptions.cpp index df884908845d..b546e816419e 100644 --- a/llvm/lib/Frontend/Driver/CodeGenOptions.cpp +++ b/llvm/lib/Frontend/Driver/CodeGenOptions.cpp @@ -12,7 +12,6 @@ #include "llvm/TargetParser/Triple.h" namespace llvm { -extern llvm::cl::opt DebugInfoCorrelate; extern llvm::cl::opt ProfileCorrelate; } // namespace llvm @@ -64,8 +63,7 @@ TargetLibraryInfoImpl *createTLII(const llvm::Triple &TargetTriple, } std::string getDefaultProfileGenName() { - return llvm::DebugInfoCorrelate || - llvm::ProfileCorrelate != InstrProfCorrelator::NONE + return llvm::ProfileCorrelate != InstrProfCorrelator::NONE ? "default_%m.proflite" : "default_%m.profraw"; } diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp index 7795cce9d9d3..b5548d4f24a2 100644 --- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -69,14 +69,6 @@ namespace llvm { // Command line option to enable vtable value profiling. Defined in // ProfileData/InstrProf.cpp: -enable-vtable-value-profiling= extern cl::opt EnableVTableValueProfiling; -// TODO: Remove -debug-info-correlate in next LLVM release, in favor of -// -profile-correlate=debug-info. -cl::opt DebugInfoCorrelate( - "debug-info-correlate", - cl::desc("Use debug info to correlate profiles. (Deprecated, use " - "-profile-correlate=debug-info)"), - cl::init(false)); - LLVM_ABI cl::opt ProfileCorrelate( "profile-correlate", cl::desc("Use debug info or binary file to correlate profiles."), @@ -1047,7 +1039,7 @@ void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) { // in lightweight mode. We need to move the value profile pointer to the // Counter struct to get this working. assert( - !DebugInfoCorrelate && ProfileCorrelate == InstrProfCorrelator::NONE && + ProfileCorrelate == InstrProfCorrelator::NONE && "Value profiling is not yet supported with lightweight instrumentation"); GlobalVariable *Name = Ind->getName(); auto It = ProfileDataMap.find(Name); @@ -1504,7 +1496,7 @@ static inline Constant *getVTableAddrForProfData(GlobalVariable *GV) { } void InstrLowerer::getOrCreateVTableProfData(GlobalVariable *GV) { - assert(!DebugInfoCorrelate && + assert(ProfileCorrelate != InstrProfCorrelator::DEBUG_INFO && "Value profiling is not supported with lightweight instrumentation"); if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) return; @@ -1584,8 +1576,7 @@ GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc, // Use internal rather than private linkage so the counter variable shows up // in the symbol table when using debug info for correlation. - if ((DebugInfoCorrelate || - ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) && + if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO && TT.isOSBinFormatMachO() && Linkage == GlobalValue::PrivateLinkage) Linkage = GlobalValue::InternalLinkage; @@ -1691,8 +1682,7 @@ InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) { auto *CounterPtr = setupProfileSection(Inc, IPSK_cnts); PD.RegionCounters = CounterPtr; - if (DebugInfoCorrelate || - ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) { + if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) { LLVMContext &Ctx = M.getContext(); Function *Fn = Inc->getParent()->getParent(); if (auto *SP = Fn->getSubprogram()) { @@ -1737,7 +1727,7 @@ InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) { void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) { // When debug information is correlated to profile data, a data variable // is not needed. - if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) + if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) return; GlobalVariable *NamePtr = Inc->getName(); diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp index 71736cfa4d89..af53fa0bae46 100644 --- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -456,7 +456,7 @@ createIRLevelProfileFlagVar(Module &M, ProfileVersion |= VARIANT_MASK_INSTR_ENTRY; if (PGOInstrumentLoopEntries) ProfileVersion |= VARIANT_MASK_INSTR_LOOP_ENTRIES; - if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) + if (ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) ProfileVersion |= VARIANT_MASK_DBG_CORRELATE; if (PGOFunctionEntryCoverage) ProfileVersion |= -- cgit v1.2.3 From 916e8f74a8216e858699fc8533929c013fa3d018 Mon Sep 17 00:00:00 2001 From: Ryotaro Kasuga Date: Fri, 31 Oct 2025 01:06:30 +0900 Subject: [DA] Check nsw when extracting a constant operand of SCEVMul (#164408) Given a `SCEVMulExpr` such as `5 * %m`, `gcdMIVtest` in DA assumes the value as a multiple of 5 in a mathematical sense. However, this is not necessarily true if `5 * %m` overflows, especially because an odd number has an inverse modulo `2^64`. Such incorrect assumptions can lead to invalid analysis results. This patch stops unconditionally extracting a constant operand from `SCEVMulExpr`. Instead, it only allows this when the `SCEVMulExpr` has the `nsw` flag. --- llvm/lib/Analysis/DependenceAnalysis.cpp | 22 +++++++++++++--------- llvm/test/Analysis/DependenceAnalysis/GCD.ll | 6 +++--- .../Analysis/DependenceAnalysis/SymbolicSIV.ll | 4 ++-- .../DependenceAnalysis/compute-absolute-value.ll | 2 +- .../DependenceAnalysis/gcd-miv-overflow.ll | 15 ++++++--------- 5 files changed, 25 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Analysis/DependenceAnalysis.cpp b/llvm/lib/Analysis/DependenceAnalysis.cpp index 84ee8c0bf3e1..11d829492a10 100644 --- a/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -2854,14 +2854,18 @@ bool DependenceInfo::testMIV(const SCEV *Src, const SCEV *Dst, banerjeeMIVtest(Src, Dst, Loops, Result); } -// Given a product, e.g., 10*X*Y, returns the first constant operand, -// in this case 10. If there is no constant part, returns std::nullopt. -static std::optional getConstantPart(const SCEV *Expr) { +/// Given a SCEVMulExpr, returns its first operand if its first operand is a +/// constant and the product doesn't overflow in a signed sense. Otherwise, +/// returns std::nullopt. For example, given (10 * X * Y), it returns 10. +/// Notably, if it doesn't have nsw, the multiplication may overflow, and if +/// so, it may not a multiple of 10. +static std::optional getConstanCoefficient(const SCEV *Expr) { if (const auto *Constant = dyn_cast(Expr)) return Constant->getAPInt(); if (const auto *Product = dyn_cast(Expr)) if (const auto *Constant = dyn_cast(Product->getOperand(0))) - return Constant->getAPInt(); + if (Product->hasNoSignedWrap()) + return Constant->getAPInt(); return std::nullopt; } @@ -2887,7 +2891,7 @@ bool DependenceInfo::accumulateCoefficientsGCD(const SCEV *Expr, if (AddRec->getLoop() == CurLoop) { CurLoopCoeff = Step; } else { - std::optional ConstCoeff = getConstantPart(Step); + std::optional ConstCoeff = getConstanCoefficient(Step); // If the coefficient is the product of a constant and other stuff, we can // use the constant in the GCD computation. @@ -2940,7 +2944,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, const SCEV *Coeff = AddRec->getStepRecurrence(*SE); // If the coefficient is the product of a constant and other stuff, // we can use the constant in the GCD computation. - std::optional ConstCoeff = getConstantPart(Coeff); + std::optional ConstCoeff = getConstanCoefficient(Coeff); if (!ConstCoeff) return false; RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff->abs()); @@ -2958,7 +2962,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, const SCEV *Coeff = AddRec->getStepRecurrence(*SE); // If the coefficient is the product of a constant and other stuff, // we can use the constant in the GCD computation. - std::optional ConstCoeff = getConstantPart(Coeff); + std::optional ConstCoeff = getConstanCoefficient(Coeff); if (!ConstCoeff) return false; RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff->abs()); @@ -2979,7 +2983,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, } else if (const SCEVMulExpr *Product = dyn_cast(Operand)) { // Search for constant operand to participate in GCD; // If none found; return false. - std::optional ConstOp = getConstantPart(Product); + std::optional ConstOp = getConstanCoefficient(Product); if (!ConstOp) return false; ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD, ConstOp->abs()); @@ -3032,7 +3036,7 @@ bool DependenceInfo::gcdMIVtest(const SCEV *Src, const SCEV *Dst, Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff); // If the coefficient is the product of a constant and other stuff, // we can use the constant in the GCD computation. - std::optional ConstCoeff = getConstantPart(Delta); + std::optional ConstCoeff = getConstanCoefficient(Delta); if (!ConstCoeff) // The difference of the two coefficients might not be a product // or constant, in which case we give up on this direction. diff --git a/llvm/test/Analysis/DependenceAnalysis/GCD.ll b/llvm/test/Analysis/DependenceAnalysis/GCD.ll index 03343e7a9821..cb14d189afe4 100644 --- a/llvm/test/Analysis/DependenceAnalysis/GCD.ll +++ b/llvm/test/Analysis/DependenceAnalysis/GCD.ll @@ -254,7 +254,7 @@ define void @gcd4(ptr %A, ptr %B, i64 %M, i64 %N) nounwind uwtable ssp { ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4 ; CHECK-NEXT: da analyze - output [* *]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx16, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - flow [* *|<]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx16, align 4 --> Dst: %0 = load i32, ptr %arrayidx16, align 4 @@ -322,7 +322,7 @@ define void @gcd5(ptr %A, ptr %B, i64 %M, i64 %N) nounwind uwtable ssp { ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4 ; CHECK-NEXT: da analyze - output [* *]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx16, align 4 -; CHECK-NEXT: da analyze - flow [<> *]! +; CHECK-NEXT: da analyze - flow [* *|<]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.11, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx16, align 4 --> Dst: %0 = load i32, ptr %arrayidx16, align 4 @@ -390,7 +390,7 @@ define void @gcd6(i64 %n, ptr %A, ptr %B) nounwind uwtable ssp { ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store i32 %conv, ptr %arrayidx5, align 4 ; CHECK-NEXT: da analyze - output [* *]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: %2 = load i32, ptr %arrayidx9, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - flow [* *|<]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx5, align 4 --> Dst: store i32 %2, ptr %B.addr.12, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %2 = load i32, ptr %arrayidx9, align 4 --> Dst: %2 = load i32, ptr %arrayidx9, align 4 diff --git a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll index cdfaec76fa89..73a415baef4c 100644 --- a/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll +++ b/llvm/test/Analysis/DependenceAnalysis/SymbolicSIV.ll @@ -384,7 +384,7 @@ define void @symbolicsiv6(ptr %A, ptr %B, i64 %n, i64 %N, i64 %M) nounwind uwtab ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %0 = load i32, ptr %arrayidx7, align 4 -; CHECK-NEXT: da analyze - none! +; CHECK-NEXT: da analyze - flow [*|<]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %0, ptr %B.addr.02, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %0 = load i32, ptr %arrayidx7, align 4 --> Dst: %0 = load i32, ptr %arrayidx7, align 4 @@ -440,7 +440,7 @@ define void @symbolicsiv7(ptr %A, ptr %B, i64 %n, i64 %N, i64 %M) nounwind uwtab ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %conv, ptr %arrayidx, align 4 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: %1 = load i32, ptr %arrayidx6, align 4 -; CHECK-NEXT: da analyze - flow [<>]! +; CHECK-NEXT: da analyze - flow [*|<]! ; CHECK-NEXT: Src: store i32 %conv, ptr %arrayidx, align 4 --> Dst: store i32 %1, ptr %B.addr.02, align 4 ; CHECK-NEXT: da analyze - confused! ; CHECK-NEXT: Src: %1 = load i32, ptr %arrayidx6, align 4 --> Dst: %1 = load i32, ptr %arrayidx6, align 4 diff --git a/llvm/test/Analysis/DependenceAnalysis/compute-absolute-value.ll b/llvm/test/Analysis/DependenceAnalysis/compute-absolute-value.ll index 64fad37ab699..783150af2cd1 100644 --- a/llvm/test/Analysis/DependenceAnalysis/compute-absolute-value.ll +++ b/llvm/test/Analysis/DependenceAnalysis/compute-absolute-value.ll @@ -18,7 +18,7 @@ define void @unknown_sign(ptr %a, i64 %k) { ; CHECK-NEXT: Src: store i8 1, ptr %idx.0, align 1 --> Dst: store i8 1, ptr %idx.0, align 1 ; CHECK-NEXT: da analyze - none! ; CHECK-NEXT: Src: store i8 1, ptr %idx.0, align 1 --> Dst: store i8 2, ptr %idx.1, align 1 -; CHECK-NEXT: da analyze - output [<>]! +; CHECK-NEXT: da analyze - output [*|<]! ; CHECK-NEXT: Src: store i8 2, ptr %idx.1, align 1 --> Dst: store i8 2, ptr %idx.1, align 1 ; CHECK-NEXT: da analyze - none! ; diff --git a/llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll b/llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll index 43f66dd7d097..9169ac323d83 100644 --- a/llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll +++ b/llvm/test/Analysis/DependenceAnalysis/gcd-miv-overflow.ll @@ -13,23 +13,20 @@ ; offset1 += 3; ; } ; -; FIXME: DependenceAnalysis currently detects no dependency between the two -; stores, but it does exist. E.g., consider `m` is 12297829382473034411, which -; is a modular multiplicative inverse of 3 under modulo 2^64. Then `offset0` is -; effectively `i + 4`, so accesses will be as follows: +; Dependency exists between the two stores. E.g., consider `m` is +; 12297829382473034411, which is a modular multiplicative inverse of 3 under +; modulo 2^64. Then `offset0` is effectively `i + 4`, so accesses will be as +; follows: ; ; - A[offset0] : A[4], A[5], A[6], ... ; - A[offset1] : A[0], A[3], A[6], ... ; -; The root cause is that DA interprets `3*m` in non-modular arithmetic, which -; isn't necessarily true due to overflow. -; define void @gcdmiv_coef_ovfl(ptr %A, i64 %m) { ; CHECK-ALL-LABEL: 'gcdmiv_coef_ovfl' ; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1 ; CHECK-ALL-NEXT: da analyze - none! ; CHECK-ALL-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 -; CHECK-ALL-NEXT: da analyze - none! +; CHECK-ALL-NEXT: da analyze - output [*|<]! ; CHECK-ALL-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 ; CHECK-ALL-NEXT: da analyze - none! ; @@ -37,7 +34,7 @@ define void @gcdmiv_coef_ovfl(ptr %A, i64 %m) { ; CHECK-GCD-MIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 1, ptr %gep.0, align 1 ; CHECK-GCD-MIV-NEXT: da analyze - consistent output [*]! ; CHECK-GCD-MIV-NEXT: Src: store i8 1, ptr %gep.0, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 -; CHECK-GCD-MIV-NEXT: da analyze - none! +; CHECK-GCD-MIV-NEXT: da analyze - consistent output [*|<]! ; CHECK-GCD-MIV-NEXT: Src: store i8 2, ptr %gep.1, align 1 --> Dst: store i8 2, ptr %gep.1, align 1 ; CHECK-GCD-MIV-NEXT: da analyze - consistent output [*]! ; -- cgit v1.2.3 From 9a51879253dbc21bb3ad8f0b04345fe76f9d2097 Mon Sep 17 00:00:00 2001 From: Ebin-McW Date: Thu, 30 Oct 2025 21:52:42 +0530 Subject: [Flang] Solved issue with inline compiler directive (#143699) Issue was with pointer passing. Fixes #139297 --- flang/lib/Parser/prescan.cpp | 2 +- flang/test/Parser/inline-directives.f90 | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) create mode 100644 flang/test/Parser/inline-directives.f90 diff --git a/flang/lib/Parser/prescan.cpp b/flang/lib/Parser/prescan.cpp index 4739da0676fa..fd69404f313d 100644 --- a/flang/lib/Parser/prescan.cpp +++ b/flang/lib/Parser/prescan.cpp @@ -557,7 +557,7 @@ bool Prescanner::MustSkipToEndOfLine() const { return true; // skip over ignored columns in right margin (73:80) } else if (*at_ == '!' && !inCharLiteral_ && (!inFixedForm_ || tabInCurrentLine_ || column_ != 6)) { - return !IsCompilerDirectiveSentinel(at_); + return !IsCompilerDirectiveSentinel(at_ + 1); } else { return false; } diff --git a/flang/test/Parser/inline-directives.f90 b/flang/test/Parser/inline-directives.f90 new file mode 100644 index 000000000000..24d4f95759a6 --- /dev/null +++ b/flang/test/Parser/inline-directives.f90 @@ -0,0 +1,29 @@ +! RUN: %flang_fc1 -fdebug-unparse %s 2>&1 | FileCheck %s + +! Test that checks whether compiler directives can be inlined without mistaking it as comment. + +module m +contains +#define MACRO(X) subroutine func1(X); real(2) :: X; !dir$ ignore_tkr(d) X; end subroutine func1; +MACRO(foo) + +!CHECK: SUBROUTINE func1 (foo) +!CHECK: !DIR$ IGNORE_TKR (d) foo +!CHECK: END SUBROUTINE func1 + + subroutine func2(foo) + real(2) :: foo; !dir$ ignore_tkr(d) foo; + end subroutine func2 + +!CHECK: SUBROUTINE func2 (foo) +!CHECK: !DIR$ IGNORE_TKR (d) foo +!CHECK: END SUBROUTINE func2 + + subroutine func3(foo) + real(2) :: foo; !dir$ ignore_tkr(d) foo; end subroutine func3; + +!CHECK: SUBROUTINE func3 (foo) +!CHECK: !DIR$ IGNORE_TKR (d) foo +!CHECK: END SUBROUTINE func3 + +end module -- cgit v1.2.3 From f81444637cddb1c3d4bf1d14ae999994e476b1ce Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 30 Oct 2025 16:21:24 +0000 Subject: [lldb][AArch64][test] Require SVE for some Linux tests These tests had only ever been run on SVE or SVE+SME systems. While investigating #138717 I found they failed on an SME only system. This happens because before the first stop we try to initialise SVE registers while outside of streaming mode. Which causes a SIGILL. To fix this, require SVE to be present. I could go in and make these work on SME only, but it's more complex and I will be adding SME only specific tests in future anyway. --- .../register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py b/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py index eb121ecbfdba..a985ebbced71 100644 --- a/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py +++ b/lldb/test/API/commands/register/register/aarch64_dynamic_regset/TestArm64DynamicRegsets.py @@ -97,6 +97,9 @@ class RegisterCommandsTestCase(TestBase): @skipIf(oslist=no_match(["linux"])) def test_aarch64_dynamic_regset_config(self): """Test AArch64 Dynamic Register sets configuration.""" + if not self.isAArch64SVE(): + self.skipTest("SVE must be present") + register_sets = self.setup_register_config_test() for registerSet in register_sets: @@ -259,6 +262,8 @@ class RegisterCommandsTestCase(TestBase): def test_aarch64_dynamic_regset_config_sme_write_za_to_enable(self): """Test that ZA and ZT0 (if present) shows as 0s when disabled and can be enabled by writing to ZA.""" + if not self.isAArch64SVE(): + self.skipTest("SVE must be present.") if not self.isAArch64SME(): self.skipTest("SME must be present.") @@ -270,6 +275,8 @@ class RegisterCommandsTestCase(TestBase): def test_aarch64_dynamic_regset_config_sme_write_zt0_to_enable(self): """Test that ZA and ZT0 (if present) shows as 0s when disabled and can be enabled by writing to ZT0.""" + if not self.isAArch64SVE(): + self.skipTest("SVE must be present.") if not self.isAArch64SME(): self.skipTest("SME must be present.") if not self.isAArch64SME2(): -- cgit v1.2.3 From 9b02901b26ff8ddf0cd82785c3ceb9ac587a4c29 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 30 Oct 2025 09:27:47 -0700 Subject: [lit] Add support for setting limits to unlimited This is used by a couple compiler-rt tests. Reviewers: petrhosek, ilovepi Reviewed By: ilovepi Pull Request: https://github.com/llvm/llvm-project/pull/165123 --- llvm/utils/lit/lit/TestRunner.py | 19 +++++++++++++++---- .../tests/Inputs/shtest-ulimit/ulimit_unlimited.txt | 6 ++++++ llvm/utils/lit/tests/shtest-ulimit.py | 8 +++++++- 3 files changed, 28 insertions(+), 5 deletions(-) create mode 100644 llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_unlimited.txt diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py index 4a9b3c618e4f..76beebd757a7 100644 --- a/llvm/utils/lit/lit/TestRunner.py +++ b/llvm/utils/lit/lit/TestRunner.py @@ -600,20 +600,31 @@ def executeBuiltinUmask(cmd, shenv): def executeBuiltinUlimit(cmd, shenv): """executeBuiltinUlimit - Change the current limits.""" - if os.name != "posix": + try: + # Try importing the resource module (available on POSIX systems) and + # emit an error where it does not exist (e.g., Windows). + import resource + except ImportError: raise InternalShellError(cmd, "'ulimit' not supported on this system") if len(cmd.args) != 3: raise InternalShellError(cmd, "'ulimit' requires two arguments") try: - new_limit = int(cmd.args[2]) + if cmd.args[2] == "unlimited": + new_limit = resource.RLIM_INFINITY + else: + new_limit = int(cmd.args[2]) except ValueError as err: raise InternalShellError(cmd, "Error: 'ulimit': %s" % str(err)) if cmd.args[1] == "-v": - shenv.ulimit["RLIMIT_AS"] = new_limit * 1024 + if new_limit != resource.RLIM_INFINITY: + new_limit = new_limit * 1024 + shenv.ulimit["RLIMIT_AS"] = new_limit elif cmd.args[1] == "-n": shenv.ulimit["RLIMIT_NOFILE"] = new_limit elif cmd.args[1] == "-s": - shenv.ulimit["RLIMIT_STACK"] = new_limit * 1024 + if new_limit != resource.RLIM_INFINITY: + new_limit = new_limit * 1024 + shenv.ulimit["RLIMIT_STACK"] = new_limit elif cmd.args[1] == "-f": shenv.ulimit["RLIMIT_FSIZE"] = new_limit else: diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_unlimited.txt b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_unlimited.txt new file mode 100644 index 000000000000..b8aa3d507171 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_unlimited.txt @@ -0,0 +1,6 @@ +# RUN: ulimit -f 5 +# RUN: %{python} %S/print_limits.py +# RUN: ulimit -f unlimited +# RUN: %{python} %S/print_limits.py +# Fail the test so that we can assert on the output. +# RUN: not echo return diff --git a/llvm/utils/lit/tests/shtest-ulimit.py b/llvm/utils/lit/tests/shtest-ulimit.py index 21e5a5e2491d..e15e19092030 100644 --- a/llvm/utils/lit/tests/shtest-ulimit.py +++ b/llvm/utils/lit/tests/shtest-ulimit.py @@ -11,7 +11,7 @@ # RUN: not %{lit} -a -v %{inputs}/shtest-ulimit --order=lexical \ # RUN: | FileCheck -DBASE_NOFILE_LIMIT=%{readfile:%t.nofile_limit} %s -# CHECK: -- Testing: 3 tests{{.*}} +# CHECK: -- Testing: 4 tests{{.*}} # CHECK-LABEL: FAIL: shtest-ulimit :: ulimit-bad-arg.txt ({{[^)]*}}) # CHECK: ulimit -n @@ -25,3 +25,9 @@ # CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_reset.txt ({{[^)]*}}) # CHECK: RLIMIT_NOFILE=[[BASE_NOFILE_LIMIT]] + +# CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_unlimited.txt ({{[^)]*}}) +# CHECK: ulimit -f 5 +# CHECK: RLIMIT_FSIZE=5 +# CHECK: ulimit -f unlimited +# CHECK: RLIMIT_FSIZE=-1 -- cgit v1.2.3 From 9ed889631a61acc09a1086dc5e5298ec1cd69776 Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Thu, 30 Oct 2025 16:30:59 +0000 Subject: [lldb][DWARF] Support DW_AT_bit_size on type tags (#165686) One (DWARF-spec compliant) exmample is: https://github.com/llvm/llvm-project/pull/164372, where we attach a `DW_AT_bit_size` to `_BitInt` types that can't be exactly described by a byte-size. This patch adds support for `DW_AT_bit_size` to `DWARFASTParserClang` when parsing type tags. Note, we don't use this bit-size yet, but will do so in follow-up patches. --- .../SymbolFile/DWARF/DWARFASTParserClang.cpp | 4 + .../Plugins/SymbolFile/DWARF/DWARFASTParserClang.h | 1 + .../SymbolFile/DWARF/DWARFASTParserClangTests.cpp | 90 ++++++++++++++++++++++ 3 files changed, 95 insertions(+) diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp index 36bc17680f3f..c049829f3721 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp @@ -450,6 +450,10 @@ ParsedDWARFTypeAttributes::ParsedDWARFTypeAttributes(const DWARFDIE &die) { byte_size = form_value.Unsigned(); break; + case DW_AT_bit_size: + data_bit_size = form_value.Unsigned(); + break; + case DW_AT_alignment: alignment = form_value.Unsigned(); break; diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h index da58f4c14622..f5f707129d67 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h @@ -574,6 +574,7 @@ struct ParsedDWARFTypeAttributes { lldb_private::plugin::dwarf::DWARFFormValue type; lldb::LanguageType class_language = lldb::eLanguageTypeUnknown; std::optional byte_size; + std::optional data_bit_size; std::optional alignment; size_t calling_convention = llvm::dwarf::DW_CC_normal; uint32_t bit_stride = 0; diff --git a/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp b/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp index 1abce6999874..064ed6d1d3e5 100644 --- a/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp +++ b/lldb/unittests/SymbolFile/DWARF/DWARFASTParserClangTests.cpp @@ -1651,3 +1651,93 @@ DWARF: EXPECT_EQ(param_die, ast_parser.GetObjectParameter(sub2, context_die)); } } + +TEST_F(DWARFASTParserClangTests, TestTypeBitSize) { + // Tests that we correctly parse DW_AT_bit_size of a DW_AT_base_type. + + const char *yamldata = R"( +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_AARCH64 +DWARF: + debug_str: + - _BitInt(2) + debug_abbrev: + - ID: 0 + Table: + - Code: 0x1 + Tag: DW_TAG_compile_unit + Children: DW_CHILDREN_yes + Attributes: + - Attribute: DW_AT_language + Form: DW_FORM_data2 + - Code: 0x2 + Tag: DW_TAG_base_type + Children: DW_CHILDREN_no + Attributes: + - Attribute: DW_AT_name + Form: DW_FORM_strp + - Attribute: DW_AT_encoding + Form: DW_FORM_data1 + - Attribute: DW_AT_byte_size + Form: DW_FORM_data1 + - Attribute: DW_AT_bit_size + Form: DW_FORM_data1 + + debug_info: + - Version: 5 + UnitType: DW_UT_compile + AddrSize: 8 + Entries: + +# DW_TAG_compile_unit +# DW_AT_language [DW_FORM_data2] (DW_LANG_C_plus_plus) + + - AbbrCode: 0x1 + Values: + - Value: 0x04 + +# DW_TAG_base_type +# DW_AT_name [DW_FORM_strp] ('_BitInt(2)') + + - AbbrCode: 0x2 + Values: + - Value: 0x0 + - Value: 0x05 + - Value: 0x01 + - Value: 0x02 +... +)"; + + YAMLModuleTester t(yamldata); + + DWARFUnit *unit = t.GetDwarfUnit(); + ASSERT_NE(unit, nullptr); + const DWARFDebugInfoEntry *cu_entry = unit->DIE().GetDIE(); + ASSERT_EQ(cu_entry->Tag(), DW_TAG_compile_unit); + ASSERT_EQ(unit->GetDWARFLanguageType(), DW_LANG_C_plus_plus); + DWARFDIE cu_die(unit, cu_entry); + + auto holder = std::make_unique("ast"); + auto &ast_ctx = *holder->GetAST(); + DWARFASTParserClangStub ast_parser(ast_ctx); + + auto type_die = cu_die.GetFirstChild(); + ASSERT_TRUE(type_die.IsValid()); + ASSERT_EQ(type_die.Tag(), DW_TAG_base_type); + + ParsedDWARFTypeAttributes attrs(type_die); + EXPECT_EQ(attrs.byte_size.value_or(0), 1U); + EXPECT_EQ(attrs.data_bit_size.value_or(0), 2U); + + SymbolContext sc; + auto type_sp = + ast_parser.ParseTypeFromDWARF(sc, type_die, /*type_is_new_ptr=*/nullptr); + ASSERT_NE(type_sp, nullptr); + + EXPECT_EQ(llvm::expectedToOptional(type_sp->GetByteSize(nullptr)).value_or(0), + 1U); +} -- cgit v1.2.3 From 1523332fbd43c47b76dc5117de3c5ac674d69b7d Mon Sep 17 00:00:00 2001 From: Erik Enikeev Date: Thu, 30 Oct 2025 19:36:55 +0300 Subject: [ARM] Mark function calls as possibly changing FPSCR (#160699) This patch does the same changes as D143001 for AArch64. This PR is part of the work on adding strict FP support in ARM, which was previously discussed in #137101. --- llvm/lib/Target/ARM/ARMISelLowering.cpp | 5 +++++ llvm/lib/Target/ARM/ARMISelLowering.h | 2 ++ llvm/test/CodeGen/ARM/strict-fp-func.ll | 13 +++++++++++++ 3 files changed, 20 insertions(+) create mode 100644 llvm/test/CodeGen/ARM/strict-fp-func.ll diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp index a4d3d62e9f48..6b0653457cba 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -22109,6 +22109,11 @@ bool ARMTargetLowering::isComplexDeinterleavingOperationSupported( ScalarTy->isIntegerTy(32)); } +ArrayRef ARMTargetLowering::getRoundingControlRegisters() const { + static const MCPhysReg RCRegs[] = {ARM::FPSCR_RM}; + return RCRegs; +} + Value *ARMTargetLowering::createComplexDeinterleavingIR( IRBuilderBase &B, ComplexDeinterleavingOperation OperationType, ComplexDeinterleavingRotation Rotation, Value *InputA, Value *InputB, diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h index 357d2c5d2fad..bf3438b0d880 100644 --- a/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/llvm/lib/Target/ARM/ARMISelLowering.h @@ -1009,6 +1009,8 @@ class VectorType; bool isUnsupportedFloatingType(EVT VT) const; + ArrayRef getRoundingControlRegisters() const override; + SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal, SDValue ARMcc, SDValue Flags, SelectionDAG &DAG) const; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, diff --git a/llvm/test/CodeGen/ARM/strict-fp-func.ll b/llvm/test/CodeGen/ARM/strict-fp-func.ll new file mode 100644 index 000000000000..39bb2b46bdac --- /dev/null +++ b/llvm/test/CodeGen/ARM/strict-fp-func.ll @@ -0,0 +1,13 @@ +; RUN: llc -mtriple arm-none-eabi -stop-after=finalize-isel %s -o - | FileCheck %s + +define float @func_02(float %x, float %y) strictfp nounwind { + %call = call float @func_01(float %x) strictfp + %res = call float @llvm.experimental.constrained.fadd.f32(float %call, float %y, metadata !"round.dynamic", metadata !"fpexcept.ignore") strictfp + ret float %res +} +; CHECK-LABEL: name: func_02 +; CHECK: BL @func_01, {{.*}}, implicit-def $fpscr_rm + + +declare float @func_01(float) +declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) -- cgit v1.2.3 From 88cee4c73787c977b03b89f22309c2e52769e0ec Mon Sep 17 00:00:00 2001 From: Ellis Hoag Date: Thu, 30 Oct 2025 09:42:12 -0700 Subject: [profile] Use correct flag in InstrProf test (#165738) The `--debug-info-correlate` flag was removed in https://github.com/llvm/llvm-project/pull/165289, but I must have forgotten this test. Replace with `--profile-correlate=debug-info` to fix. --- .../test/profile/Linux/instrprof-debug-info-correlate-warnings.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate-warnings.c b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate-warnings.c index 5069c6340b64..25022f241a6d 100644 --- a/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate-warnings.c +++ b/compiler-rt/test/profile/Linux/instrprof-debug-info-correlate-warnings.c @@ -1,6 +1,6 @@ // Disable full debug info and verify that we get warnings during merging -// RUN: %clang_pgogen -o %t -gline-tables-only -mllvm --debug-info-correlate -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp +// RUN: %clang_pgogen -o %t -gline-tables-only -mllvm --profile-correlate=debug-info -mllvm --disable-vp=true %S/../Inputs/instrprof-debug-info-correlate-main.cpp %S/../Inputs/instrprof-debug-info-correlate-foo.cpp // RUN: env LLVM_PROFILE_FILE=%t.proflite %run %t // RUN: llvm-profdata merge -o %t.profdata --debug-info=%t %t.proflite --max-debug-info-correlation-warnings=2 2>&1 >/dev/null | FileCheck %s --check-prefixes=CHECK,LIMIT --implicit-check-not=warning // RUN: llvm-profdata merge -o %t.profdata --debug-info=%t %t.proflite --max-debug-info-correlation-warnings=0 2>&1 >/dev/null | FileCheck %s --check-prefixes=CHECK,NOLIMIT --implicit-check-not=warning -- cgit v1.2.3 From 6b5afdc3ab3e2791baa1946acb4ee3f0b6db8ce3 Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Thu, 30 Oct 2025 09:44:25 -0700 Subject: [AMDGPU] Support bfloat comparison for ballot intrinsic (#165495) We do not have native instructions for direct bfloat comparisons. However, we can expand bfloat to float, and do float comparison instead. TODO: handle bfloat comparison for ballot intrinsic on global isel path. Fixes: SWDEV-563403 --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 10 ++++++++-- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll | 21 +++++++++++++++++++++ llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll | 12 ++++++++++++ 3 files changed, 41 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index b34ab2a7e08e..8bb28084159e 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -7035,9 +7035,15 @@ static SDValue lowerBALLOTIntrinsic(const SITargetLowering &TLI, SDNode *N, SDLoc SL(N); if (Src.getOpcode() == ISD::SETCC) { + SDValue Op0 = Src.getOperand(0); + SDValue Op1 = Src.getOperand(1); + // Need to expand bfloat to float for comparison (setcc). + if (Op0.getValueType() == MVT::bf16) { + Op0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Op0); + Op1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Op1); + } // (ballot (ISD::SETCC ...)) -> (AMDGPUISD::SETCC ...) - return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Src.getOperand(0), - Src.getOperand(1), Src.getOperand(2)); + return DAG.getNode(AMDGPUISD::SETCC, SL, VT, Op0, Op1, Src.getOperand(2)); } if (const ConstantSDNode *Arg = dyn_cast(Src)) { // (ballot 0) -> 0 diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll index aa591d28eb34..c1f3a12dba57 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll @@ -591,3 +591,24 @@ exit: store i32 %ballot, ptr addrspace(1) %out ret void } + +define amdgpu_cs i32 @compare_bfloats(bfloat %x, bfloat %y) { +; GFX10-LABEL: compare_bfloats: +; GFX10: ; %bb.0: +; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; GFX10-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; GFX10-NEXT: v_cmp_gt_f32_e64 s0, v0, v1 +; GFX10-NEXT: ; return to shader part epilog +; +; GFX11-LABEL: compare_bfloats: +; GFX11: ; %bb.0: +; GFX11-NEXT: v_mov_b16_e32 v2.l, 0 +; GFX11-NEXT: v_mov_b16_e32 v2.h, v1.l +; GFX11-NEXT: v_mov_b16_e32 v1.h, v0.l +; GFX11-NEXT: v_mov_b16_e32 v1.l, v2.l +; GFX11-NEXT: v_cmp_gt_f32_e64 s0, v1, v2 +; GFX11-NEXT: ; return to shader part epilog + %cmp = fcmp ogt bfloat %x, %y + %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp) + ret i32 %ballot +} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll index 30c2c260a327..827a01ff33d0 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll @@ -557,3 +557,15 @@ exit: store i64 %ballot, ptr addrspace(1) %out ret void } + +define amdgpu_cs i64 @compare_bfloats(bfloat %x, bfloat %y) { +; CHECK-LABEL: compare_bfloats: +; CHECK: ; %bb.0: +; CHECK-NEXT: v_lshlrev_b32_e32 v1, 16, v1 +; CHECK-NEXT: v_lshlrev_b32_e32 v0, 16, v0 +; CHECK-NEXT: v_cmp_gt_f32_e64 s[0:1], v0, v1 +; CHECK-NEXT: ; return to shader part epilog + %cmp = fcmp ogt bfloat %x, %y + %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp) + ret i64 %ballot +} -- cgit v1.2.3 From 9cf3e8a2aca696aa038be2b845e005a9fcfd8cdf Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Thu, 21 Aug 2025 12:32:28 -0700 Subject: [MLIR] Apply clang-tidy fixes for bugprone-argument-comment in TestTransformDialectExtension.cpp (NFC) --- mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp b/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp index 496f18bc49fa..61db9d2b4446 100644 --- a/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp +++ b/mlir/test/lib/Dialect/Transform/TestTransformDialectExtension.cpp @@ -797,7 +797,7 @@ DiagnosedSilenceableFailure mlir::test::TestProduceInvalidIR::applyToOne( // Provide some IR that does not verify. rewriter.setInsertionPointToStart(&target->getRegion(0).front()); TestDummyPayloadOp::create(rewriter, target->getLoc(), TypeRange(), - ValueRange(), /*failToVerify=*/true); + ValueRange(), /*fail_to_verify=*/true); return DiagnosedSilenceableFailure::success(); } -- cgit v1.2.3 From 24c75a21b8109908ed10d795d54c837b4621e941 Mon Sep 17 00:00:00 2001 From: Rana Pratap Reddy <109514914+ranapratap55@users.noreply.github.com> Date: Thu, 30 Oct 2025 22:20:28 +0530 Subject: [AMDGPU][Clang] Support for type inferring extended image builtins for AMDGPU (#164358) Introduces the builtins for extended image insts for amdgcn. --- clang/include/clang/Basic/Builtins.def | 1 + clang/include/clang/Basic/BuiltinsAMDGPU.def | 41 + clang/lib/AST/ASTContext.cpp | 5 + clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 81 +- clang/lib/Sema/SemaAMDGPU.cpp | 43 +- clang/test/CodeGen/builtins-extended-image.c | 1528 ++++++++++++++++++++ .../builtins-extended-image-param-gfx1100-err.cl | 227 +++ .../builtins-extended-image-param-gfx942-err.cl | 227 +++ 8 files changed, 2150 insertions(+), 3 deletions(-) create mode 100644 clang/test/CodeGen/builtins-extended-image.c create mode 100644 clang/test/SemaOpenCL/builtins-extended-image-param-gfx1100-err.cl create mode 100644 clang/test/SemaOpenCL/builtins-extended-image-param-gfx942-err.cl diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def index b856ad145824..3a5b72e20afa 100644 --- a/clang/include/clang/Basic/Builtins.def +++ b/clang/include/clang/Basic/Builtins.def @@ -43,6 +43,7 @@ // SJ -> sigjmp_buf // K -> ucontext_t // p -> pid_t +// e -> _Float16 for HIP/C++ and __fp16 for OpenCL // . -> "...". This may only occur at the end of the function list. // // Types may be prefixed with the following modifiers: diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def index f265d82efee7..36cb527a9c80 100644 --- a/clang/include/clang/Basic/BuiltinsAMDGPU.def +++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def @@ -967,6 +967,47 @@ TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f32_f32, "V4fifffQtV4ibii", "n TARGET_BUILTIN(__builtin_amdgcn_image_sample_3d_v4f16_f32, "V4hifffQtV4ibii", "nc", "image-insts") TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f32_f32, "V4fifffQtV4ibii", "nc", "image-insts") TARGET_BUILTIN(__builtin_amdgcn_image_sample_cube_v4f16_f32, "V4hifffQtV4ibii", "nc", "image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_1d_v4f32_f32, "V4fifQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_1d_v4f16_f32, "V4eifQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32, "V4fiffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32, "V4eiffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2d_f32_f32, "fiffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2d_v4f32_f32, "V4fiffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2d_v4f16_f32, "V4eiffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2darray_f32_f32, "fifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_3d_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_3d_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_cube_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_lz_cube_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_1d_v4f32_f32, "V4fiffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_1d_v4f16_f32, "V4eiffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_1darray_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_1darray_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2d_f32_f32, "fifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2d_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2d_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2darray_f32_f32, "fiffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2darray_v4f32_f32, "V4fiffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_2darray_v4f16_f32, "V4eiffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_3d_v4f32_f32, "V4fiffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_3d_v4f16_f32, "V4eiffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_cube_v4f32_f32, "V4fiffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_l_cube_v4f16_f32, "V4eiffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_1d_v4f32_f32, "V4fifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_1d_v4f16_f32, "V4eifffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_1darray_v4f32_f32, "V4fiffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_1darray_v4f16_f32, "V4eiffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2d_f32_f32, "fiffffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2d_v4f32_f32, "V4fiffffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2d_v4f16_f32, "V4eiffffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2darray_f32_f32, "fifffffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2darray_v4f32_f32, "V4fifffffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_2darray_v4f16_f32, "V4eifffffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_3d_v4f32_f32, "V4fifffffffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_sample_d_3d_v4f16_f32, "V4eifffffffffQtV4ibii", "nc", "extended-image-insts") +TARGET_BUILTIN(__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32, "V4fiffQtV4ibii", "nc", "extended-image-insts") #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 687cd46773f4..2669f6245671 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -12403,6 +12403,11 @@ static QualType DecodeTypeFromStr(const char *&Str, const ASTContext &Context, // Read the base type. switch (*Str++) { default: llvm_unreachable("Unknown builtin type letter!"); + case 'e': + assert(HowLong == 0 && !Signed && !Unsigned && + "Bad modifiers used with 'e'!"); + Type = Context.getLangOpts().OpenCL ? Context.HalfTy : Context.Float16Ty; + break; case 'x': assert(HowLong == 0 && !Signed && !Unsigned && "Bad modifiers used with 'x'!"); diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp index f49a5af2c958..9eab70955b6b 100644 --- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp @@ -647,8 +647,8 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_ballot_w64: { llvm::Type *ResultType = ConvertType(E->getType()); llvm::Value *Src = EmitScalarExpr(E->getArg(0)); - Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, { ResultType }); - return Builder.CreateCall(F, { Src }); + Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_ballot, {ResultType}); + return Builder.CreateCall(F, {Src}); } case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w32: case AMDGPU::BI__builtin_amdgcn_inverse_ballot_w64: { @@ -1139,6 +1139,83 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: return emitAMDGCNImageOverloadedReturnType( *this, E, Intrinsic::amdgcn_image_sample_cube, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_lz_1d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_l_1d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_d_1d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_lz_2d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_l_2d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_d_2d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_lz_3d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_l_3d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_d_3d, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_lz_cube, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_l_cube, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_lz_1darray, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_l_1darray, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_d_1darray, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_lz_2darray, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_l_2darray, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32: + case clang::AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_sample_d_2darray, false); + case clang::AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32: + return emitAMDGCNImageOverloadedReturnType( + *this, E, Intrinsic::amdgcn_image_gather4_lz_2d, false); case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_16x16x128_f8f6f4: case AMDGPU::BI__builtin_amdgcn_mfma_scale_f32_32x32x64_f8f6f4: { llvm::FixedVectorType *VT = FixedVectorType::get(Builder.getInt32Ty(), 8); diff --git a/clang/lib/Sema/SemaAMDGPU.cpp b/clang/lib/Sema/SemaAMDGPU.cpp index e32f4376a5eb..139c4abc040d 100644 --- a/clang/lib/Sema/SemaAMDGPU.cpp +++ b/clang/lib/Sema/SemaAMDGPU.cpp @@ -153,7 +153,48 @@ bool SemaAMDGPU::CheckAMDGCNBuiltinFunctionCall(unsigned BuiltinID, case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f32_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_3d_v4f16_f32: case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f32_f32: - case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: { + case AMDGPU::BI__builtin_amdgcn_image_sample_cube_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_3d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_lz_cube_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_1d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_1darray_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_2d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_2darray_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_3d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_l_cube_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_1d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_1darray_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_2d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_2darray_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f32_f32: + case AMDGPU::BI__builtin_amdgcn_image_sample_d_3d_v4f16_f32: + case AMDGPU::BI__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32: { StringRef FeatureList( getASTContext().BuiltinInfo.getRequiredFeatures(BuiltinID)); if (!Builtin::evaluateRequiredTargetFeatures(FeatureList, diff --git a/clang/test/CodeGen/builtins-extended-image.c b/clang/test/CodeGen/builtins-extended-image.c new file mode 100644 index 000000000000..0dbf81dabd77 --- /dev/null +++ b/clang/test/CodeGen/builtins-extended-image.c @@ -0,0 +1,1528 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 -target-feature +extended-image-insts %s -emit-llvm -o - | FileCheck %s + +typedef int int4 __attribute__((ext_vector_type(4))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef _Float16 half4 __attribute__((ext_vector_type(4))); + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_r( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_r(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(1, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_g( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 2, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_g(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(2, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_b( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 4, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_b(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(4, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_gather4_lz_2d_v4f32_f32_a( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32.v8i32.v4i32(i32 8, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_a(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(8, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_1d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP3]] +// +float4 test_amdgcn_image_sample_lz_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1d_v4f32_f32(100, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_1d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_sample_l_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_1d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_d_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_2d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_sample_lz_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_2d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32.v8i32.v4i32(i32 10, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_l_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2d_v4f32_f32(10, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_2d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP6]], align 32 +// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP7]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP8]] +// +float4 test_amdgcn_image_sample_d_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2d_v4f32_f32(100, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_3d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.3d.v4f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_lz_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_3d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_3d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.3d.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP6]] +// +float4 test_amdgcn_image_sample_l_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_3d_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_3d_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP9]], align 32 +// CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP11:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], float [[TMP7]], float [[TMP8]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP10]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP11]] +// +float4 test_amdgcn_image_sample_d_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_3d_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_cube_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.cube.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_lz_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_cube_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_cube_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.cube.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP6]] +// +float4 test_amdgcn_image_sample_l_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_cube_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_1darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.1darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP4]] +// +float4 test_amdgcn_image_sample_lz_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1darray_v4f32_f32(1, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_1darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.1darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_l_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_1darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.1darray.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP6]] +// +float4 test_amdgcn_image_sample_d_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_lz_2darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.lz.2darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP5]] +// +float4 test_amdgcn_image_sample_lz_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_l_2darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.l.2darray.v4f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP6]] +// +float4 test_amdgcn_image_sample_l_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x float> @test_amdgcn_image_sample_d_2darray_v4f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP7]], align 32 +// CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = call <4 x float> @llvm.amdgcn.image.sample.d.2darray.v4f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP8]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x float> [[TMP9]] +// +float4 test_amdgcn_image_sample_d_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2darray_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_1d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP1]], align 32 +// CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.1d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP2]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP3]] +// +half4 test_amdgcn_image_sample_lz_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1d_v4f16_f32(100, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_1d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.1d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_amdgcn_image_sample_l_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_1d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.1d.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_d_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_2d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_amdgcn_image_sample_lz_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_2d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.2d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_l_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_2d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP6]], align 32 +// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP8:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.2d.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP7]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP8]] +// +half4 test_amdgcn_image_sample_d_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_3d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.3d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_lz_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_3d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_3d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.3d.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP6]] +// +half4 test_amdgcn_image_sample_l_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_3d_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_3d_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP9]], align 32 +// CHECK-NEXT: [[TMP10:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP11:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.3d.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], float [[TMP7]], float [[TMP8]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP10]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP11]] +// +half4 test_amdgcn_image_sample_d_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_3d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_cube_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.cube.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_lz_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_cube_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_cube_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.cube.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP6]] +// +half4 test_amdgcn_image_sample_l_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_cube_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_1darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.1darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +half4 test_amdgcn_image_sample_lz_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1darray_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_1darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.1darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_l_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_1darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.1darray.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP6]] +// +half4 test_amdgcn_image_sample_d_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_lz_2darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.lz.2darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP5]] +// +half4 test_amdgcn_image_sample_lz_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_l_2darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.l.2darray.v4f16.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP6]] +// +half4 test_amdgcn_image_sample_l_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local <4 x half> @test_amdgcn_image_sample_d_2darray_v4f16_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca <4 x half>, align 8, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP7]], align 32 +// CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = call <4 x half> @llvm.amdgcn.image.sample.d.2darray.v4f16.f32.f32.v8i32.v4i32(i32 100, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP8]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret <4 x half> [[TMP9]] +// +half4 test_amdgcn_image_sample_d_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2darray_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_lz_2d_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP2]], align 32 +// CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.amdgcn.image.sample.lz.2d.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP3]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP4]] +// +float test_amdgcn_image_sample_lz_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2d_f32_f32(1, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_l_2d_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.l.2d.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP5]] +// +float test_amdgcn_image_sample_l_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2d_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_d_2d_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP6]], align 32 +// CHECK-NEXT: [[TMP7:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.amdgcn.image.sample.d.2d.f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP7]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP8]] +// +float test_amdgcn_image_sample_d_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2d_f32_f32(1, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_lz_2darray_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP3]], align 32 +// CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.amdgcn.image.sample.lz.2darray.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP4]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP5]] +// +float test_amdgcn_image_sample_lz_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2darray_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_l_2darray_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP4]], align 32 +// CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP5]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP6]] +// +float test_amdgcn_image_sample_l_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2darray_f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} + +// CHECK-LABEL: define dso_local float @test_amdgcn_image_sample_d_2darray_f32_f32( +// CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, addrspace(5) +// CHECK-NEXT: [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5) +// CHECK-NEXT: [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5) +// CHECK-NEXT: [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5) +// CHECK-NEXT: [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, addrspace(5) +// CHECK-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr +// CHECK-NEXT: [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[V4F32_ADDR]] to ptr +// CHECK-NEXT: [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[F32_ADDR]] to ptr +// CHECK-NEXT: [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[I32_ADDR]] to ptr +// CHECK-NEXT: [[TEX_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[TEX_ADDR]] to ptr +// CHECK-NEXT: [[VEC4I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[VEC4I32_ADDR]] to ptr +// CHECK-NEXT: store <4 x float> [[V4F32]], ptr [[V4F32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: store float [[F32]], ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store i32 [[I32]], ptr [[I32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: store ptr [[TEX]], ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: store <4 x i32> [[VEC4I32]], ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP3:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP5:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[F32_ADDR_ASCAST]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TEX_ADDR_ASCAST]], align 32 +// CHECK-NEXT: [[TEX_RSRC_VAL:%.*]] = load <8 x i32>, ptr [[TMP7]], align 32 +// CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr [[VEC4I32_ADDR_ASCAST]], align 16 +// CHECK-NEXT: [[TMP9:%.*]] = call float @llvm.amdgcn.image.sample.d.2darray.f32.f32.f32.v8i32.v4i32(i32 1, float [[TMP0]], float [[TMP1]], float [[TMP2]], float [[TMP3]], float [[TMP4]], float [[TMP5]], float [[TMP6]], <8 x i32> [[TEX_RSRC_VAL]], <4 x i32> [[TMP8]], i1 false, i32 120, i32 110) +// CHECK-NEXT: ret float [[TMP9]] +// +float test_amdgcn_image_sample_d_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2darray_f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 120, 110); +} diff --git a/clang/test/SemaOpenCL/builtins-extended-image-param-gfx1100-err.cl b/clang/test/SemaOpenCL/builtins-extended-image-param-gfx1100-err.cl new file mode 100644 index 000000000000..47dbdd4e5178 --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-extended-image-param-gfx1100-err.cl @@ -0,0 +1,227 @@ +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx1100 -target-feature +extended-image-insts -S -verify=expected -o - %s +// REQUIRES: amdgpu-registered-target + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +typedef int int4 __attribute__((ext_vector_type(4))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef half half4 __attribute__((ext_vector_type(4))); + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_r(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(1, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_g(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(2, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_b(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(4, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_a(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(8, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_gather4_lz_2d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_lz_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1d_v4f32_f32(i32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_1d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_l_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_1d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_d_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_1d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_lz_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_l_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, 103); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_d_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2d_v4f32_f32(i32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2d_v4f32_f32' must be a constant integer}} +} +float4 test_amdgcn_image_sample_lz_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_3d_v4f32_f32(i32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_3d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_l_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_3d_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_3d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_d_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_3d_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_3d_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_lz_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_cube_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_cube_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_l_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_cube_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_cube_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_lz_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1darray_v4f32_f32(1, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_1darray_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_l_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_1darray_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_d_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_1darray_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_lz_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2darray_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_l_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2darray_v4f32_f32' must be a constant integer}} +} + +float4 test_amdgcn_image_sample_d_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2darray_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2darray_v4f32_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_lz_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1d_v4f16_f32(23, f32, tex, vec4i32, 0, i32, 11); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_1d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_l_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1d_v4f16_f32(i32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_1d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_d_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1d_v4f16_f32(i32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_1d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_lz_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_l_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_d_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_lz_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_3d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_3d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_l_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_3d_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_3d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_d_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_3d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_3d_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_lz_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_cube_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_cube_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_l_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_cube_v4f16_f32(i32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_cube_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_lz_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1darray_v4f16_f32(i32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_1darray_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_l_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1darray_v4f16_f32(i32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_1darray_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_d_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_1darray_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_lz_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2darray_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_l_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2darray_v4f16_f32' must be a constant integer}} +} + +half4 test_amdgcn_image_sample_d_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2darray_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2darray_v4f16_f32' must be a constant integer}} +} + +float test_amdgcn_image_sample_lz_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2d_f32_f32(1, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2d_f32_f32' must be a constant integer}} +} + +float test_amdgcn_image_sample_l_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2d_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2d_f32_f32' must be a constant integer}} +} + +float test_amdgcn_image_sample_d_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2d_f32_f32(1, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2d_f32_f32' must be a constant integer}} +} + +float test_amdgcn_image_sample_lz_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2darray_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_lz_2darray_f32_f32' must be a constant integer}} +} + +float test_amdgcn_image_sample_l_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2darray_f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_l_2darray_f32_f32' must be a constant integer}} +} + +float test_amdgcn_image_sample_d_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2darray_f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, f32, i32); //expected-error{{argument to '__builtin_amdgcn_image_sample_d_2darray_f32_f32' must be a constant integer}} +} diff --git a/clang/test/SemaOpenCL/builtins-extended-image-param-gfx942-err.cl b/clang/test/SemaOpenCL/builtins-extended-image-param-gfx942-err.cl new file mode 100644 index 000000000000..e60f8c70dc7c --- /dev/null +++ b/clang/test/SemaOpenCL/builtins-extended-image-param-gfx942-err.cl @@ -0,0 +1,227 @@ +// RUN: %clang_cc1 -triple amdgcn-- -target-cpu gfx942 -verify=GFX94 -S -o - %s +// REQUIRES: amdgpu-registered-target + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +typedef int int4 __attribute__((ext_vector_type(4))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef half half4 __attribute__((ext_vector_type(4))); + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_r(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(1, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_gather4_lz_2d_v4f32_f32_r' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_g(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(2, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_gather4_lz_2d_v4f32_f32_g' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_b(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(4, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_gather4_lz_2d_v4f32_f32_b' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_a(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_gather4_lz_2d_v4f32_f32(8, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_gather4_lz_2d_v4f32_f32_a' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_lz_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1d_v4f32_f32(105, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_1d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_l_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_1d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_d_1d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1d_v4f32_f32(100, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_1d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_lz_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2d_v4f32_f32(100, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_l_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2d_v4f32_f32(10, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_d_2d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2d_v4f32_f32(105, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2d_v4f32_f32' needs target feature extended-image-insts}} +} +float4 test_amdgcn_image_sample_lz_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_3d_v4f32_f32(105, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_3d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_l_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_3d_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_3d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_d_3d_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_3d_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_3d_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_lz_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_cube_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_cube_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_l_cube_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_cube_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_cube_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_lz_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1darray_v4f32_f32(1, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_1darray_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_l_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_1darray_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_d_1darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_1darray_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_lz_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2darray_v4f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2darray_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_l_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2darray_v4f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2darray_v4f32_f32' needs target feature extended-image-insts}} +} + +float4 test_amdgcn_image_sample_d_2darray_v4f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2darray_v4f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2darray_v4f32_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_lz_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1d_v4f16_f32(105, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_1d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_l_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1d_v4f16_f32(105, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_1d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_d_1d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1d_v4f16_f32(105, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_1d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_lz_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2d_v4f16_f32(100, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_l_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_d_2d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_lz_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_3d_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_3d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_l_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_3d_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_3d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_d_3d_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_3d_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_3d_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_lz_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_cube_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_cube_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_l_cube_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_cube_v4f16_f32(105, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_cube_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_lz_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_1darray_v4f16_f32(105, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_1darray_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_l_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_1darray_v4f16_f32(105, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_1darray_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_d_1darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_1darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_1darray_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_lz_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2darray_v4f16_f32(100, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2darray_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_l_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2darray_v4f16_f32(100, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2darray_v4f16_f32' needs target feature extended-image-insts}} +} + +half4 test_amdgcn_image_sample_d_2darray_v4f16_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2darray_v4f16_f32(100, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2darray_v4f16_f32' needs target feature extended-image-insts}} +} + +float test_amdgcn_image_sample_lz_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2d_f32_f32(1, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2d_f32_f32' needs target feature extended-image-insts}} +} + +float test_amdgcn_image_sample_l_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2d_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2d_f32_f32' needs target feature extended-image-insts}} +} + +float test_amdgcn_image_sample_d_2d_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2d_f32_f32(1, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2d_f32_f32' needs target feature extended-image-insts}} +} + +float test_amdgcn_image_sample_lz_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_lz_2darray_f32_f32(1, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_lz_2darray_f32_f32' needs target feature extended-image-insts}} +} + +float test_amdgcn_image_sample_l_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_l_2darray_f32_f32(1, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_l_2darray_f32_f32' needs target feature extended-image-insts}} +} + +float test_amdgcn_image_sample_d_2darray_f32_f32(float4 v4f32, float f32, int i32, __amdgpu_texture_t tex, int4 vec4i32) { + + return __builtin_amdgcn_image_sample_d_2darray_f32_f32(1, f32, f32, f32, f32, f32, f32, f32, tex, vec4i32, 0, 101, 121); //GFX94-error{{'test_amdgcn_image_sample_d_2darray_f32_f32' needs target feature extended-image-insts}} +} -- cgit v1.2.3 From 22079e3f3698d5c367c7b67f63de8c838791ae76 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Thu, 30 Oct 2025 10:00:51 -0700 Subject: [libc][hdrgen] Add extra_standards and license_text (#165459) This adds a few new features to hdrgen, all meant to facilitate using it with inputs and outputs that are outside the llvm-libc source tree. The new `extra_standards` field is a dictionary to augment the set of names that can be used in `standards` lists. The keys are the identifiers used in YAML ("stdc") and the values are the pretty names generated in the header comments ("Standard C"). This lets a libc project that's leveraging the llvm-libc sources along with its own code define new APIs outside the formal and de facto standards that llvm-libc draws its supported APIs from. The new `license_text` field is a list of lines of license text that replaces the standard LLVM license text used at the top of each generated header. This lets other projects use hdrgen with their own inputs to produce generated headers that are not tied to the LLVM project. Finally, for any function attributes that are not in a canonical list known to be provided by __llvm-libc-common.h, an include will be generated for "llvm-libc-macros/{attribute name}.h", expecting that file to define the "attribute" name as a macro. All this can be used immediately by builds that drive hdrgen and build libc code outside the LLVM CMake build. Future changes could add CMake plumbing to facilitate augmenting the LLVM CMake build of libc with outside sources via overlays and cache files. --- libc/utils/hdrgen/hdrgen/header.py | 71 +++++++++++++++++----- libc/utils/hdrgen/hdrgen/yaml_to_classes.py | 2 + libc/utils/hdrgen/tests/expected_output/custom.h | 21 +++++++ .../hdrgen/tests/expected_output/test_header.h | 1 + .../hdrgen/tests/expected_output/test_small.json | 1 + libc/utils/hdrgen/tests/input/custom-common.yaml | 6 ++ libc/utils/hdrgen/tests/input/custom.yaml | 13 ++++ libc/utils/hdrgen/tests/test_integration.py | 7 +++ 8 files changed, 106 insertions(+), 16 deletions(-) create mode 100644 libc/utils/hdrgen/tests/expected_output/custom.h create mode 100644 libc/utils/hdrgen/tests/input/custom-common.yaml create mode 100644 libc/utils/hdrgen/tests/input/custom.yaml diff --git a/libc/utils/hdrgen/hdrgen/header.py b/libc/utils/hdrgen/hdrgen/header.py index 715d4b7c9b7e..558ee5846920 100644 --- a/libc/utils/hdrgen/hdrgen/header.py +++ b/libc/utils/hdrgen/hdrgen/header.py @@ -35,6 +35,13 @@ NONIDENTIFIER = re.compile("[^a-zA-Z0-9_]+") COMMON_HEADER = PurePosixPath("__llvm-libc-common.h") +# These "attributes" are known macros defined in COMMON_HEADER. +# Others are found in "llvm-libc-macros/{name}.h". +COMMON_ATTRIBUTES = { + "_Noreturn", + "_Returns_twice", +} + # All the canonical identifiers are in lowercase for easy maintenance. # This maps them to the pretty descriptions to generate in header comments. LIBRARY_DESCRIPTIONS = { @@ -50,9 +57,7 @@ LIBRARY_DESCRIPTIONS = { HEADER_TEMPLATE = """\ //===-- {library} header <{header}> --===// // -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +{license_lines} // //===---------------------------------------------------------------------===// @@ -64,6 +69,12 @@ HEADER_TEMPLATE = """\ #endif // {guard} """ +LLVM_LICENSE_TEXT = [ + "Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.", + "See https://llvm.org/LICENSE.txt for license information.", + "SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception", +] + class HeaderFile: def __init__(self, name): @@ -74,8 +85,10 @@ class HeaderFile: self.enumerations = [] self.objects = [] self.functions = [] + self.extra_standards = {} self.standards = [] self.merge_yaml_files = [] + self.license_text = [] def add_macro(self, macro): self.macros.append(macro) @@ -98,6 +111,11 @@ class HeaderFile: self.enumerations = sorted(set(self.enumerations) | set(other.enumerations)) self.objects = sorted(set(self.objects) | set(other.objects)) self.functions = sorted(set(self.functions) | set(other.functions)) + self.extra_standards |= other.extra_standards + if self.license_text: + assert not other.license_text, "only one `license_text` allowed" + else: + self.license_text = other.license_text def all_types(self): return reduce( @@ -106,6 +124,13 @@ class HeaderFile: set(self.types), ) + def all_attributes(self): + return reduce( + lambda a, b: a | b, + [set(f.attributes) for f in self.functions], + set(), + ) + def all_standards(self): # FIXME: Only functions have the "standard" field, but all the entity # types should have one too. @@ -114,16 +139,24 @@ class HeaderFile: ) def includes(self): - return { - PurePosixPath("llvm-libc-macros") / macro.header - for macro in self.macros - if macro.header is not None - } | { - COMPILER_HEADER_TYPES.get( - typ.type_name, PurePosixPath("llvm-libc-types") / f"{typ.type_name}.h" - ) - for typ in self.all_types() - } + return ( + { + PurePosixPath("llvm-libc-macros") / macro.header + for macro in self.macros + if macro.header is not None + } + | { + COMPILER_HEADER_TYPES.get( + typ.type_name, + PurePosixPath("llvm-libc-types") / f"{typ.type_name}.h", + ) + for typ in self.all_types() + } + | { + PurePosixPath("llvm-libc-macros") / f"{attr}.h" + for attr in self.all_attributes() - COMMON_ATTRIBUTES + } + ) def header_guard(self): return "_LLVM_LIBC_" + "_".join( @@ -131,24 +164,29 @@ class HeaderFile: ) def library_description(self): + descriptions = LIBRARY_DESCRIPTIONS | self.extra_standards # If the header itself is in standard C, just call it that. if "stdc" in self.standards: - return LIBRARY_DESCRIPTIONS["stdc"] + return descriptions["stdc"] # If the header itself is in POSIX, just call it that. if "posix" in self.standards: - return LIBRARY_DESCRIPTIONS["posix"] + return descriptions["posix"] # Otherwise, consider the standards for each symbol as well. standards = self.all_standards() # Otherwise, it's described by all those that apply, but ignoring # "stdc" and "posix" since this is not a "stdc" or "posix" header. return " / ".join( sorted( - LIBRARY_DESCRIPTIONS[standard] + descriptions[standard] for standard in standards if standard not in {"stdc", "posix"} ) ) + def license_lines(self): + lines = self.license_text or LLVM_LICENSE_TEXT + return "\n".join([f"// {line}" for line in lines]) + def template(self, dir, files_read): if self.template_file is not None: # There's a custom template file, so just read it in and record @@ -162,6 +200,7 @@ class HeaderFile: library=self.library_description(), header=self.name, guard=self.header_guard(), + license_lines=self.license_lines(), ) def public_api(self): diff --git a/libc/utils/hdrgen/hdrgen/yaml_to_classes.py b/libc/utils/hdrgen/hdrgen/yaml_to_classes.py index ebe7781d449f..9eddbe615cbb 100644 --- a/libc/utils/hdrgen/hdrgen/yaml_to_classes.py +++ b/libc/utils/hdrgen/hdrgen/yaml_to_classes.py @@ -37,6 +37,8 @@ def yaml_to_classes(yaml_data, header_class, entry_points=None): header = header_class(header_name) header.template_file = yaml_data.get("header_template") header.standards = yaml_data.get("standards", []) + header.extra_standards = yaml_data.get("extra_standards", {}) + header.license_text = yaml_data.get("license_text", []) header.merge_yaml_files = yaml_data.get("merge_yaml_files", []) for macro_data in yaml_data.get("macros", []): diff --git a/libc/utils/hdrgen/tests/expected_output/custom.h b/libc/utils/hdrgen/tests/expected_output/custom.h new file mode 100644 index 000000000000..5f9ed231490f --- /dev/null +++ b/libc/utils/hdrgen/tests/expected_output/custom.h @@ -0,0 +1,21 @@ +//===-- Wile E. Coyote header --===// +// +// Caveat emptor. +// I never studied law. +// +//===---------------------------------------------------------------------===// + +#ifndef _LLVM_LIBC_CUSTOM_H +#define _LLVM_LIBC_CUSTOM_H + +#include "__llvm-libc-common.h" +#include "llvm-libc-types/meep.h" +#include "llvm-libc-types/road.h" + +__BEGIN_C_DECLS + +road runner(meep, meep) __NOEXCEPT; + +__END_C_DECLS + +#endif // _LLVM_LIBC_CUSTOM_H diff --git a/libc/utils/hdrgen/tests/expected_output/test_header.h b/libc/utils/hdrgen/tests/expected_output/test_header.h index 748c09808c12..49112a353f7b 100644 --- a/libc/utils/hdrgen/tests/expected_output/test_header.h +++ b/libc/utils/hdrgen/tests/expected_output/test_header.h @@ -12,6 +12,7 @@ #include "__llvm-libc-common.h" #include "llvm-libc-macros/float16-macros.h" +#include "llvm-libc-macros/CONST_FUNC_A.h" #include "llvm-libc-macros/test_more-macros.h" #include "llvm-libc-macros/test_small-macros.h" #include "llvm-libc-types/float128.h" diff --git a/libc/utils/hdrgen/tests/expected_output/test_small.json b/libc/utils/hdrgen/tests/expected_output/test_small.json index 9cc73d013a67..8502df23b9a4 100644 --- a/libc/utils/hdrgen/tests/expected_output/test_small.json +++ b/libc/utils/hdrgen/tests/expected_output/test_small.json @@ -4,6 +4,7 @@ "standards": [], "includes": [ "__llvm-libc-common.h", + "llvm-libc-macros/CONST_FUNC_A.h", "llvm-libc-macros/test_more-macros.h", "llvm-libc-macros/test_small-macros.h", "llvm-libc-types/float128.h", diff --git a/libc/utils/hdrgen/tests/input/custom-common.yaml b/libc/utils/hdrgen/tests/input/custom-common.yaml new file mode 100644 index 000000000000..909a3ba5163a --- /dev/null +++ b/libc/utils/hdrgen/tests/input/custom-common.yaml @@ -0,0 +1,6 @@ +license_text: + - Caveat emptor. + - I never studied law. + +extra_standards: + acme: Wile E. Coyote diff --git a/libc/utils/hdrgen/tests/input/custom.yaml b/libc/utils/hdrgen/tests/input/custom.yaml new file mode 100644 index 000000000000..7d3ff8ec421d --- /dev/null +++ b/libc/utils/hdrgen/tests/input/custom.yaml @@ -0,0 +1,13 @@ +merge_yaml_files: + - custom-common.yaml + +header: custom.h +standards: + - acme + +functions: + - name: runner + return_type: road + arguments: + - type: meep + - type: meep diff --git a/libc/utils/hdrgen/tests/test_integration.py b/libc/utils/hdrgen/tests/test_integration.py index bf393d26a810..c6e76d826a3a 100644 --- a/libc/utils/hdrgen/tests/test_integration.py +++ b/libc/utils/hdrgen/tests/test_integration.py @@ -59,6 +59,13 @@ class TestHeaderGenIntegration(unittest.TestCase): self.run_script(yaml_file, output_file) self.compare_files(output_file, expected_output_file) + def test_custom_license_and_standards(self): + yaml_file = self.source_dir / "input" / "custom.yaml" + expected_output_file = self.source_dir / "expected_output" / "custom.h" + output_file = self.output_dir / "custom.h" + self.run_script(yaml_file, output_file) + self.compare_files(output_file, expected_output_file) + def test_generate_json(self): yaml_file = self.source_dir / "input/test_small.yaml" expected_output_file = self.source_dir / "expected_output/test_small.json" -- cgit v1.2.3 From 3056727a1ad8d8860d5c4302dd6867e485da38c9 Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Thu, 30 Oct 2025 10:25:53 -0700 Subject: [clang][lex] Use `FileManager` to make prebuilt module paths absolute (#165347) This PR switches from using `llvm::sys::fs::make_absolute()` to `FileManager::makeAbsolutePath()` so that `FileSystemOptions` (i.e. the `-working-directory` option) and the `VFS`'s CWD have a say in how the prebuilt module paths are resolved. This matches how the rest of the compiler treats input files. --- clang/lib/Lex/HeaderSearch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/Lex/HeaderSearch.cpp b/clang/lib/Lex/HeaderSearch.cpp index 65c324c10ca5..f05c28fd7a12 100644 --- a/clang/lib/Lex/HeaderSearch.cpp +++ b/clang/lib/Lex/HeaderSearch.cpp @@ -221,7 +221,7 @@ std::string HeaderSearch::getPrebuiltModuleFileName(StringRef ModuleName, // file. for (const std::string &Dir : HSOpts.PrebuiltModulePaths) { SmallString<256> Result(Dir); - llvm::sys::fs::make_absolute(Result); + FileMgr.makeAbsolutePath(Result); if (ModuleName.contains(':')) // The separator of C++20 modules partitions (':') is not good for file // systems, here clang and gcc choose '-' by default since it is not a @@ -246,7 +246,7 @@ std::string HeaderSearch::getPrebuiltImplicitModuleFileName(Module *Module) { StringRef ModuleCacheHash = HSOpts.DisableModuleHash ? "" : getModuleHash(); for (const std::string &Dir : HSOpts.PrebuiltModulePaths) { SmallString<256> CachePath(Dir); - llvm::sys::fs::make_absolute(CachePath); + FileMgr.makeAbsolutePath(CachePath); llvm::sys::path::append(CachePath, ModuleCacheHash); std::string FileName = getCachedModuleFileNameImpl(ModuleName, ModuleMapPath, CachePath); -- cgit v1.2.3 From 0029815af7b8335054345e2f5aacf68ef9d18e81 Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Thu, 30 Oct 2025 13:27:41 -0400 Subject: [ADT] Support `.Default` with `nullptr` and `nullopt` values in TypeSwitch (#165724) In the previous implementation, this would fail for cases like `TypeSwitch>` because `std::nullopt` does not match `ResultT` exactly and the overload for callable types would be selected. Add new overloads that support `nullptr` and `std::nullopt`. These can be added alongside generic callables because we wouldn't want to call any 'null' function refs anyway. I selected the `nullptr` and `nullopt` specializations because how often they appear in the codebase -- currently, you will see lots of code like `.Default(std::optional())` that can be simplified with this patch. --- llvm/include/llvm/ADT/TypeSwitch.h | 17 +++++++++++++++ llvm/unittests/ADT/TypeSwitchTest.cpp | 41 +++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/llvm/include/llvm/ADT/TypeSwitch.h b/llvm/include/llvm/ADT/TypeSwitch.h index 5657303b0a1f..50ca1d5a6b5b 100644 --- a/llvm/include/llvm/ADT/TypeSwitch.h +++ b/llvm/include/llvm/ADT/TypeSwitch.h @@ -111,6 +111,7 @@ public: return std::move(*result); return defaultFn(this->value); } + /// As a default, return the given value. [[nodiscard]] ResultT Default(ResultT defaultResult) { if (result) @@ -118,6 +119,22 @@ public: return defaultResult; } + /// Default for pointer-like results types that accept `nullptr`. + template >> + [[nodiscard]] ResultT Default(std::nullptr_t) { + return Default(ResultT(nullptr)); + } + + /// Default for optional results types that accept `std::nullopt`. + template >> + [[nodiscard]] ResultT Default(std::nullopt_t) { + return Default(ResultT(std::nullopt)); + } + /// Declare default as unreachable, making sure that all cases were handled. [[nodiscard]] ResultT DefaultUnreachable( const char *message = "Fell off the end of a type-switch") { diff --git a/llvm/unittests/ADT/TypeSwitchTest.cpp b/llvm/unittests/ADT/TypeSwitchTest.cpp index a7d934265c5f..b80122837c1a 100644 --- a/llvm/unittests/ADT/TypeSwitchTest.cpp +++ b/llvm/unittests/ADT/TypeSwitchTest.cpp @@ -142,3 +142,44 @@ TEST(TypeSwitchTest, DefaultUnreachableWithVoid) { EXPECT_DEATH((void)translate(DerivedD()), "Unhandled type"); #endif } + +TEST(TypeSwitchTest, DefaultNullopt) { + auto translate = [](auto value) { + return TypeSwitch>(&value) + .Case([](DerivedA *) { return 0; }) + .Default(std::nullopt); + }; + EXPECT_EQ(0, translate(DerivedA())); + EXPECT_EQ(std::nullopt, translate(DerivedD())); +} + +TEST(TypeSwitchTest, DefaultNullptr) { + float foo = 0.0f; + auto translate = [&](auto value) { + return TypeSwitch(&value) + .Case([&](DerivedA *) { return &foo; }) + .Default(nullptr); + }; + EXPECT_EQ(&foo, translate(DerivedA())); + EXPECT_EQ(nullptr, translate(DerivedD())); +} + +TEST(TypeSwitchTest, DefaultNullptrForPointerLike) { + struct Value { + void *ptr; + Value(const Value &other) : ptr(other.ptr) {} + Value(std::nullptr_t) : ptr(nullptr) {} + Value() : Value(nullptr) {} + }; + + float foo = 0.0f; + Value fooVal; + fooVal.ptr = &foo; + auto translate = [&](auto value) { + return TypeSwitch(&value) + .Case([&](DerivedA *) { return fooVal; }) + .Default(nullptr); + }; + EXPECT_EQ(&foo, translate(DerivedA()).ptr); + EXPECT_EQ(nullptr, translate(DerivedD()).ptr); +} -- cgit v1.2.3 From 17dbd8690e36f8e514fb47f4418f78420d0fc019 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 30 Oct 2025 16:41:37 +0000 Subject: Reland "[lldb-dap] Improving consistency of tests by removing concurrency." (#165688)" This reverts commit f205be095609aa61dfac3ae729406e0af2dcd15f. This new select mechanism has exposed the fact that the resources the Arm Linux bot has can vary a lot. We do limit it to a low number of parallel tests but in this case, I think it's write performance somewhere. Reland the changes since they work elsewhere, and disable lldb-dap tests on Arm Linux while I fix our buildbot. --- .../lldbsuite/test/tools/lldb-dap/dap_server.py | 206 ++++++++------------- .../test/tools/lldb-dap/lldbdap_testcase.py | 4 +- .../breakpoint-events/TestDAP_breakpointEvents.py | 30 ++- .../API/tools/lldb-dap/launch/TestDAP_launch.py | 2 +- .../lldb-dap/module-event/TestDAP_module_event.py | 88 ++++----- .../API/tools/lldb-dap/module/TestDAP_module.py | 8 +- .../lldb-dap/restart/TestDAP_restart_console.py | 24 ++- .../tools/lldb-dap/send-event/TestDAP_sendEvent.py | 2 +- 8 files changed, 161 insertions(+), 203 deletions(-) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py index d892c01f0bc7..8f3652172dfd 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/dap_server.py @@ -10,8 +10,8 @@ import string import subprocess import signal import sys -import threading import warnings +import selectors import time from typing import ( Any, @@ -139,35 +139,6 @@ def dump_memory(base_addr, data, num_per_line, outfile): outfile.write("\n") -def read_packet( - f: IO[bytes], trace_file: Optional[IO[str]] = None -) -> Optional[ProtocolMessage]: - """Decode a JSON packet that starts with the content length and is - followed by the JSON bytes from a file 'f'. Returns None on EOF. - """ - line = f.readline().decode("utf-8") - if len(line) == 0: - return None # EOF. - - # Watch for line that starts with the prefix - prefix = "Content-Length: " - if line.startswith(prefix): - # Decode length of JSON bytes - length = int(line[len(prefix) :]) - # Skip empty line - separator = f.readline().decode() - if separator != "": - Exception("malformed DAP content header, unexpected line: " + separator) - # Read JSON bytes - json_str = f.read(length).decode() - if trace_file: - trace_file.write("from adapter:\n%s\n" % (json_str)) - # Decode the JSON bytes into a python dictionary - return json.loads(json_str) - - raise Exception("unexpected malformed message from lldb-dap: " + line) - - def packet_type_is(packet, packet_type): return "type" in packet and packet["type"] == packet_type @@ -199,16 +170,8 @@ class DebugCommunication(object): self.log_file = log_file self.send = send self.recv = recv - - # Packets that have been received and processed but have not yet been - # requested by a test case. - self._pending_packets: List[Optional[ProtocolMessage]] = [] - # Received packets that have not yet been processed. - self._recv_packets: List[Optional[ProtocolMessage]] = [] - # Used as a mutex for _recv_packets and for notify when _recv_packets - # changes. - self._recv_condition = threading.Condition() - self._recv_thread = threading.Thread(target=self._read_packet_thread) + self.selector = selectors.DefaultSelector() + self.selector.register(recv, selectors.EVENT_READ) # session state self.init_commands = init_commands @@ -234,9 +197,6 @@ class DebugCommunication(object): # keyed by breakpoint id self.resolved_breakpoints: dict[str, Breakpoint] = {} - # trigger enqueue thread - self._recv_thread.start() - @classmethod def encode_content(cls, s: str) -> bytes: return ("Content-Length: %u\r\n\r\n%s" % (len(s), s)).encode("utf-8") @@ -252,17 +212,46 @@ class DebugCommunication(object): f"seq mismatch in response {command['seq']} != {response['request_seq']}" ) - def _read_packet_thread(self): - try: - while True: - packet = read_packet(self.recv, trace_file=self.trace_file) - # `packet` will be `None` on EOF. We want to pass it down to - # handle_recv_packet anyway so the main thread can handle unexpected - # termination of lldb-dap and stop waiting for new packets. - if not self._handle_recv_packet(packet): - break - finally: - dump_dap_log(self.log_file) + def _read_packet( + self, + timeout: float = DEFAULT_TIMEOUT, + ) -> Optional[ProtocolMessage]: + """Decode a JSON packet that starts with the content length and is + followed by the JSON bytes from self.recv. Returns None on EOF. + """ + + ready = self.selector.select(timeout) + if not ready: + warnings.warn( + "timeout occurred waiting for a packet, check if the test has a" + " negative assertion and see if it can be inverted.", + stacklevel=4, + ) + return None # timeout + + line = self.recv.readline().decode("utf-8") + if len(line) == 0: + return None # EOF. + + # Watch for line that starts with the prefix + prefix = "Content-Length: " + if line.startswith(prefix): + # Decode length of JSON bytes + length = int(line[len(prefix) :]) + # Skip empty line + separator = self.recv.readline().decode() + if separator != "": + Exception("malformed DAP content header, unexpected line: " + separator) + # Read JSON bytes + json_str = self.recv.read(length).decode() + if self.trace_file: + self.trace_file.write( + "%s from adapter:\n%s\n" % (time.time(), json_str) + ) + # Decode the JSON bytes into a python dictionary + return json.loads(json_str) + + raise Exception("unexpected malformed message from lldb-dap: " + line) def get_modules( self, start_module: Optional[int] = None, module_count: Optional[int] = None @@ -310,34 +299,6 @@ class DebugCommunication(object): output += self.get_output(category, clear=clear) return output - def _enqueue_recv_packet(self, packet: Optional[ProtocolMessage]): - with self.recv_condition: - self.recv_packets.append(packet) - self.recv_condition.notify() - - def _handle_recv_packet(self, packet: Optional[ProtocolMessage]) -> bool: - """Handles an incoming packet. - - Called by the read thread that is waiting for all incoming packets - to store the incoming packet in "self._recv_packets" in a thread safe - way. This function will then signal the "self._recv_condition" to - indicate a new packet is available. - - Args: - packet: A new packet to store. - - Returns: - True if the caller should keep calling this function for more - packets. - """ - with self._recv_condition: - self._recv_packets.append(packet) - self._recv_condition.notify() - # packet is None on EOF - return packet is not None and not ( - packet["type"] == "response" and packet["command"] == "disconnect" - ) - def _recv_packet( self, *, @@ -361,46 +322,34 @@ class DebugCommunication(object): The first matching packet for the given predicate, if specified, otherwise None. """ - assert ( - threading.current_thread != self._recv_thread - ), "Must not be called from the _recv_thread" - - def process_until_match(): - self._process_recv_packets() - for i, packet in enumerate(self._pending_packets): - if packet is None: - # We need to return a truthy value to break out of the - # wait_for, use `EOFError` as an indicator of EOF. - return EOFError() - if predicate and predicate(packet): - self._pending_packets.pop(i) - return packet - - with self._recv_condition: - packet = self._recv_condition.wait_for(process_until_match, timeout) - return None if isinstance(packet, EOFError) else packet - - def _process_recv_packets(self) -> None: + deadline = time.time() + timeout + + while time.time() < deadline: + packet = self._read_packet(timeout=deadline - time.time()) + if packet is None: + return None + self._process_recv_packet(packet) + if not predicate or predicate(packet): + return packet + + def _process_recv_packet(self, packet) -> None: """Process received packets, updating the session state.""" - with self._recv_condition: - for packet in self._recv_packets: - if packet and ("seq" not in packet or packet["seq"] == 0): - warnings.warn( - f"received a malformed packet, expected 'seq != 0' for {packet!r}" - ) - # Handle events that may modify any stateful properties of - # the DAP session. - if packet and packet["type"] == "event": - self._handle_event(packet) - elif packet and packet["type"] == "request": - # Handle reverse requests and keep processing. - self._handle_reverse_request(packet) - # Move the packet to the pending queue. - self._pending_packets.append(packet) - self._recv_packets.clear() + if packet and ("seq" not in packet or packet["seq"] == 0): + warnings.warn( + f"received a malformed packet, expected 'seq != 0' for {packet!r}" + ) + # Handle events that may modify any stateful properties of + # the DAP session. + if packet and packet["type"] == "event": + self._handle_event(packet) + elif packet and packet["type"] == "request": + # Handle reverse requests and keep processing. + self._handle_reverse_request(packet) def _handle_event(self, packet: Event) -> None: """Handle any events that modify debug session state we track.""" + self.events.append(packet) + event = packet["event"] body: Optional[Dict] = packet.get("body", None) @@ -453,6 +402,8 @@ class DebugCommunication(object): self.invalidated_event = packet elif event == "memory": self.memory_event = packet + elif event == "module": + self.module_events.append(packet) def _handle_reverse_request(self, request: Request) -> None: if request in self.reverse_requests: @@ -521,18 +472,14 @@ class DebugCommunication(object): Returns the seq number of the request. """ - # Set the seq for requests. - if packet["type"] == "request": - packet["seq"] = self.sequence - self.sequence += 1 - else: - packet["seq"] = 0 + packet["seq"] = self.sequence + self.sequence += 1 # Encode our command dictionary as a JSON string json_str = json.dumps(packet, separators=(",", ":")) if self.trace_file: - self.trace_file.write("to adapter:\n%s\n" % (json_str)) + self.trace_file.write("%s to adapter:\n%s\n" % (time.time(), json_str)) length = len(json_str) if length > 0: @@ -913,6 +860,8 @@ class DebugCommunication(object): if restartArguments: command_dict["arguments"] = restartArguments + # Clear state, the process is about to restart... + self._process_continued(True) response = self._send_recv(command_dict) # Caller must still call wait_for_stopped. return response @@ -1479,8 +1428,10 @@ class DebugCommunication(object): def terminate(self): self.send.close() - if self._recv_thread.is_alive(): - self._recv_thread.join() + self.recv.close() + self.selector.close() + if self.log_file: + dump_dap_log(self.log_file) def request_setInstructionBreakpoints(self, memory_reference=[]): breakpoints = [] @@ -1577,6 +1528,7 @@ class DebugAdapterServer(DebugCommunication): stdout=subprocess.PIPE, stderr=sys.stderr, env=adapter_env, + bufsize=0, ) if connection is None: diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py index 29935bb8046f..a897c1b01459 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py @@ -15,6 +15,8 @@ import base64 # DAP tests as a whole have been flakey on the Windows on Arm bot. See: # https://github.com/llvm/llvm-project/issues/137660 @skipIf(oslist=["windows"], archs=["aarch64"]) +# The Arm Linux bot needs stable resources before it can run these tests reliably. +@skipif(oslist=["linux]"], archs=["arm$"]) class DAPTestCaseBase(TestBase): # set timeout based on whether ASAN was enabled or not. Increase # timeout by a factor of 10 if ASAN is enabled. @@ -416,7 +418,7 @@ class DAPTestCaseBase(TestBase): return self.dap_server.wait_for_stopped() def continue_to_breakpoint(self, breakpoint_id: str): - self.continue_to_breakpoints((breakpoint_id)) + self.continue_to_breakpoints([breakpoint_id]) def continue_to_breakpoints(self, breakpoint_ids): self.do_continue() diff --git a/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py b/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py index beab4d6c1f5a..7b78541fb4f8 100644 --- a/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py +++ b/lldb/test/API/tools/lldb-dap/breakpoint-events/TestDAP_breakpointEvents.py @@ -81,24 +81,20 @@ class TestDAP_breakpointEvents(lldbdap_testcase.DAPTestCaseBase): breakpoint["verified"], "expect foo breakpoint to not be verified" ) - # Flush the breakpoint events. - self.dap_server.wait_for_breakpoint_events() - # Continue to the breakpoint - self.continue_to_breakpoints(dap_breakpoint_ids) + self.continue_to_breakpoint(foo_bp_id) + self.continue_to_next_stop() # foo_bp2 + self.continue_to_breakpoint(main_bp_id) + self.continue_to_exit() - verified_breakpoint_ids = [] - unverified_breakpoint_ids = [] - for breakpoint_event in self.dap_server.wait_for_breakpoint_events(): - breakpoint = breakpoint_event["body"]["breakpoint"] - id = breakpoint["id"] - if breakpoint["verified"]: - verified_breakpoint_ids.append(id) - else: - unverified_breakpoint_ids.append(id) + bp_events = [e for e in self.dap_server.events if e["event"] == "breakpoint"] - self.assertIn(main_bp_id, unverified_breakpoint_ids) - self.assertIn(foo_bp_id, unverified_breakpoint_ids) + main_bp_events = [ + e for e in bp_events if e["body"]["breakpoint"]["id"] == main_bp_id + ] + foo_bp_events = [ + e for e in bp_events if e["body"]["breakpoint"]["id"] == foo_bp_id + ] - self.assertIn(main_bp_id, verified_breakpoint_ids) - self.assertIn(foo_bp_id, verified_breakpoint_ids) + self.assertTrue(main_bp_events) + self.assertTrue(foo_bp_events) diff --git a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py index ca881f1d817c..09b13223e0a7 100644 --- a/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py +++ b/lldb/test/API/tools/lldb-dap/launch/TestDAP_launch.py @@ -156,6 +156,7 @@ class TestDAP_launch(lldbdap_testcase.DAPTestCaseBase): self.build_and_launch( program, debuggerRoot=program_parent_dir, initCommands=commands ) + self.continue_to_exit() output = self.get_console() self.assertTrue(output and len(output) > 0, "expect console output") lines = output.splitlines() @@ -171,7 +172,6 @@ class TestDAP_launch(lldbdap_testcase.DAPTestCaseBase): % (program_parent_dir, line[len(prefix) :]), ) self.assertTrue(found, "verified lldb-dap working directory") - self.continue_to_exit() def test_sourcePath(self): """ diff --git a/lldb/test/API/tools/lldb-dap/module-event/TestDAP_module_event.py b/lldb/test/API/tools/lldb-dap/module-event/TestDAP_module_event.py index 1f4afabbd161..9d1d17b704f7 100644 --- a/lldb/test/API/tools/lldb-dap/module-event/TestDAP_module_event.py +++ b/lldb/test/API/tools/lldb-dap/module-event/TestDAP_module_event.py @@ -1,58 +1,58 @@ -import dap_server +""" +Test 'module' events for dynamically loaded libraries. +""" + from lldbsuite.test.decorators import * from lldbsuite.test.lldbtest import * -from lldbsuite.test import lldbutil import lldbdap_testcase -import re class TestDAP_module_event(lldbdap_testcase.DAPTestCaseBase): + def lookup_module_id(self, name): + """Returns the identifier for the first module event starting with the given name.""" + for event in self.dap_server.module_events: + if self.get_dict_value(event, ["body", "module", "name"]).startswith(name): + return self.get_dict_value(event, ["body", "module", "id"]) + self.fail(f"No module events matching name={name}") + + def module_events(self, id): + """Finds all module events by identifier.""" + return [ + event + for event in self.dap_server.module_events + if self.get_dict_value(event, ["body", "module", "id"]) == id + ] + + def module_reasons(self, events): + """Returns the list of 'reason' values from the given events.""" + return [event["body"]["reason"] for event in events] + @skipIfWindows def test_module_event(self): + """ + Test that module events are fired on target load and when the list of + dynamic libraries updates while running. + """ program = self.getBuildArtifact("a.out") self.build_and_launch(program) + # We can analyze the order of events after the process exits. + self.continue_to_exit() - source = "main.cpp" - breakpoint1_line = line_number(source, "// breakpoint 1") - breakpoint2_line = line_number(source, "// breakpoint 2") - breakpoint3_line = line_number(source, "// breakpoint 3") + a_out_id = self.lookup_module_id("a.out") + a_out_events = self.module_events(id=a_out_id) - breakpoint_ids = self.set_source_breakpoints( - source, [breakpoint1_line, breakpoint2_line, breakpoint3_line] + self.assertIn( + "new", + self.module_reasons(a_out_events), + "Expected a.out to load during the debug session.", ) - self.continue_to_breakpoints(breakpoint_ids) - - # We're now stopped at breakpoint 1 before the dlopen. Flush all the module events. - event = self.dap_server.wait_for_event(["module"]) - while event is not None: - event = self.dap_server.wait_for_event(["module"]) - - # Continue to the second breakpoint, before the dlclose. - self.continue_to_breakpoints(breakpoint_ids) - - # Make sure we got a module event for libother. - event = self.dap_server.wait_for_event(["module"]) - self.assertIsNotNone(event, "didn't get a module event") - module_name = event["body"]["module"]["name"] - module_id = event["body"]["module"]["id"] - self.assertEqual(event["body"]["reason"], "new") - self.assertIn("libother", module_name) - - # Continue to the third breakpoint, after the dlclose. - self.continue_to_breakpoints(breakpoint_ids) - - # Make sure we got a module event for libother. - event = self.dap_server.wait_for_event(["module"]) - self.assertIsNotNone(event, "didn't get a module event") - reason = event["body"]["reason"] - self.assertEqual(reason, "removed") - self.assertEqual(event["body"]["module"]["id"], module_id) - - # The removed module event should omit everything but the module id and name - # as they are required fields. - module_data = event["body"]["module"] - required_keys = ["id", "name"] - self.assertListEqual(list(module_data.keys()), required_keys) - self.assertEqual(module_data["name"], "", "expects empty name.") - self.continue_to_exit() + libother_id = self.lookup_module_id( + "libother." # libother.so or libother.dylib based on OS. + ) + libother_events = self.module_events(id=libother_id) + self.assertEqual( + self.module_reasons(libother_events), + ["new", "removed"], + "Expected libother to be loaded then unloaded during the debug session.", + ) diff --git a/lldb/test/API/tools/lldb-dap/module/TestDAP_module.py b/lldb/test/API/tools/lldb-dap/module/TestDAP_module.py index 0ed53dac5d86..2d00c512721c 100644 --- a/lldb/test/API/tools/lldb-dap/module/TestDAP_module.py +++ b/lldb/test/API/tools/lldb-dap/module/TestDAP_module.py @@ -64,19 +64,18 @@ class TestDAP_module(lldbdap_testcase.DAPTestCaseBase): self.assertEqual(program, program_module["path"]) self.assertIn("addressRange", program_module) + self.continue_to_exit() + # Collect all the module names we saw as events. module_new_names = [] module_changed_names = [] - module_event = self.dap_server.wait_for_event(["module"]) - while module_event is not None: + for module_event in self.dap_server.module_events: reason = module_event["body"]["reason"] if reason == "new": module_new_names.append(module_event["body"]["module"]["name"]) elif reason == "changed": module_changed_names.append(module_event["body"]["module"]["name"]) - module_event = self.dap_server.wait_for_event(["module"]) - # Make sure we got an event for every active module. self.assertNotEqual(len(module_new_names), 0) for module in active_modules: @@ -86,7 +85,6 @@ class TestDAP_module(lldbdap_testcase.DAPTestCaseBase): # symbols got added. self.assertNotEqual(len(module_changed_names), 0) self.assertIn(program_module["name"], module_changed_names) - self.continue_to_exit() @skipIfWindows def test_modules(self): diff --git a/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py index e1ad1425a993..fa62ec243f5c 100644 --- a/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py +++ b/lldb/test/API/tools/lldb-dap/restart/TestDAP_restart_console.py @@ -30,7 +30,11 @@ class TestDAP_restart_console(lldbdap_testcase.DAPTestCaseBase): if reason == "entry": seen_stopped_event += 1 - self.assertEqual(seen_stopped_event, 1, "expect only one stopped entry event.") + self.assertEqual( + seen_stopped_event, + 1, + f"expect only one stopped entry event in {stopped_events}", + ) @skipIfAsan @skipIfWindows @@ -92,11 +96,13 @@ class TestDAP_restart_console(lldbdap_testcase.DAPTestCaseBase): self.build_and_launch(program, console="integratedTerminal", stopOnEntry=True) [bp_main] = self.set_function_breakpoints(["main"]) - self.dap_server.request_continue() # sends configuration done - stopped_events = self.dap_server.wait_for_stopped() + self.dap_server.request_configurationDone() + stopped_threads = list(self.dap_server.thread_stop_reasons.values()) # We should be stopped at the entry point. - self.assertGreaterEqual(len(stopped_events), 0, "expect stopped events") - self.verify_stopped_on_entry(stopped_events) + self.assertEqual( + len(stopped_threads), 1, "Expected the main thread to be stopped on entry." + ) + self.assertEqual(stopped_threads[0]["reason"], "entry") # Then, if we continue, we should hit the breakpoint at main. self.dap_server.request_continue() @@ -105,8 +111,12 @@ class TestDAP_restart_console(lldbdap_testcase.DAPTestCaseBase): # Restart and check that we still get a stopped event before reaching # main. self.dap_server.request_restart() - stopped_events = self.dap_server.wait_for_stopped() - self.verify_stopped_on_entry(stopped_events) + stopped_threads = list(self.dap_server.thread_stop_reasons.values()) + # We should be stopped at the entry point. + self.assertEqual( + len(stopped_threads), 1, "Expected the main thread to be stopped on entry." + ) + self.assertEqual(stopped_threads[0]["reason"], "entry") # continue to main self.dap_server.request_continue() diff --git a/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py b/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py index a01845669666..018402058917 100644 --- a/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py +++ b/lldb/test/API/tools/lldb-dap/send-event/TestDAP_sendEvent.py @@ -32,7 +32,7 @@ class TestDAP_sendEvent(lldbdap_testcase.DAPTestCaseBase): ], ) self.set_source_breakpoints(source, [breakpoint_line]) - self.continue_to_next_stop() + self.do_continue() custom_event = self.dap_server.wait_for_event( filter=["my-custom-event-no-body"] -- cgit v1.2.3 From eec44c0023bf71c0ba5eed7686799eeb40cdd0c5 Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 30 Oct 2025 17:33:05 +0000 Subject: [lldb][test] Fix typo in Arm Linux lldb-dap skip Fixes 17dbd8690e36f8e514fb47f4418f78420d0fc019. --- lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py index a897c1b01459..97c7f2d9e1b4 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py @@ -16,7 +16,7 @@ import base64 # https://github.com/llvm/llvm-project/issues/137660 @skipIf(oslist=["windows"], archs=["aarch64"]) # The Arm Linux bot needs stable resources before it can run these tests reliably. -@skipif(oslist=["linux]"], archs=["arm$"]) +@skipif(oslist=["linux"], archs=["arm$"]) class DAPTestCaseBase(TestBase): # set timeout based on whether ASAN was enabled or not. Increase # timeout by a factor of 10 if ASAN is enabled. -- cgit v1.2.3 From b1acd6df03d3e0dfaee00191a807142a65b2e212 Mon Sep 17 00:00:00 2001 From: Marcell Leleszi <59964679+mleleszi@users.noreply.github.com> Date: Thu, 30 Oct 2025 18:35:20 +0100 Subject: [libc] Remove faccessat entrypoint if faccessat2 syscall is not available (#164936) [#163091](https://github.com/llvm/llvm-project/issues/163091) Remove unistd.faccessat entrypoint for x86 linux if faccessat2 syscall is not available. Tested with non existent symbol and exclusion works. --- libc/CMakeLists.txt | 2 +- libc/config/linux/x86_64/exclude.txt | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt index 14718e2090bd..ae555a256ba6 100644 --- a/libc/CMakeLists.txt +++ b/libc/CMakeLists.txt @@ -363,7 +363,7 @@ elseif(LLVM_LIBC_FULL_BUILD) message(FATAL_ERROR "${LIBC_CONFIG_PATH}/headers.txt file not found and fullbuild requested.") endif() -# Check exclude.txt that appends to LIBC_EXCLUDE_ENTRYPOINTS list +# Check exclude.txt that appends to TARGET_LLVMLIBC_REMOVED_ENTRYPOINTS list if(EXISTS "${LIBC_CONFIG_PATH}/exclude.txt") include("${LIBC_CONFIG_PATH}/exclude.txt") endif() diff --git a/libc/config/linux/x86_64/exclude.txt b/libc/config/linux/x86_64/exclude.txt index 2c218b753b17..a0686310d21a 100644 --- a/libc/config/linux/x86_64/exclude.txt +++ b/libc/config/linux/x86_64/exclude.txt @@ -19,3 +19,11 @@ if(NOT has_sys_random) ) endif() endif() + +include(CheckSymbolExists) +check_symbol_exists(SYS_faccessat2 "sys/syscall.h" HAVE_SYS_FACCESSAT2) +if(NOT HAVE_SYS_FACCESSAT2) + list(APPEND TARGET_LLVMLIBC_REMOVED_ENTRYPOINTS + libc.src.unistd.faccessat + ) +endif() -- cgit v1.2.3 From 784b74c6ef8a7f6ffaa7ab250eb35696dd833426 Mon Sep 17 00:00:00 2001 From: Marcell Leleszi <59964679+mleleszi@users.noreply.github.com> Date: Thu, 30 Oct 2025 18:35:42 +0100 Subject: [libc] Fix off by one error in strftime (#165711) This patch fixes a bug in strftime's return value when the formatted output exactly fills the buffer, not including the null terminator. The previous check failed to account for the null terminator in this case, incorrectly returning the written count instead of 0. --- libc/src/time/strftime.cpp | 2 +- libc/src/time/strftime_l.cpp | 2 +- libc/test/src/time/strftime_test.cpp | 20 ++++++++++++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/libc/src/time/strftime.cpp b/libc/src/time/strftime.cpp index f36091bc9736..89b7d9bb7c1b 100644 --- a/libc/src/time/strftime.cpp +++ b/libc/src/time/strftime.cpp @@ -26,7 +26,7 @@ LLVM_LIBC_FUNCTION(size_t, strftime, int ret = strftime_core::strftime_main(&writer, format, timeptr); if (buffsz > 0) // if the buffsz is 0 the buffer may be a null pointer. wb.buff[wb.buff_cur] = '\0'; - return (ret < 0 || static_cast(ret) > buffsz) ? 0 : ret; + return (ret < 0 || static_cast(ret) >= buffsz) ? 0 : ret; } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/time/strftime_l.cpp b/libc/src/time/strftime_l.cpp index 201b85da39ee..409f8683b728 100644 --- a/libc/src/time/strftime_l.cpp +++ b/libc/src/time/strftime_l.cpp @@ -29,7 +29,7 @@ LLVM_LIBC_FUNCTION(size_t, strftime_l, int ret = strftime_core::strftime_main(&writer, format, timeptr); if (buffsz > 0) // if the buffsz is 0 the buffer may be a null pointer. wb.buff[wb.buff_cur] = '\0'; - return (ret < 0 || static_cast(ret) > buffsz) ? 0 : ret; + return (ret < 0 || static_cast(ret) >= buffsz) ? 0 : ret; } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/src/time/strftime_test.cpp b/libc/test/src/time/strftime_test.cpp index cac7560b2b94..38176f77804d 100644 --- a/libc/test/src/time/strftime_test.cpp +++ b/libc/test/src/time/strftime_test.cpp @@ -2326,3 +2326,23 @@ TEST(LlvmLibcStrftimeTest, TimeFormatFullDateTime) { // size_t written = 0; // SimplePaddedNum spn; // } + +TEST(LlvmLibcStrftimeTest, BufferTooSmall) { + struct tm time; + char buffer[1]; + + time.tm_year = get_adjusted_year(2025); + time.tm_mon = 10; + time.tm_mday = 24; + + size_t written = + LIBC_NAMESPACE::strftime(buffer, sizeof(buffer), "%F", &time); + EXPECT_EQ(written, size_t{0}); + + char buffer2[10]; + + // The string "2025-11-24" is 10 chars, + // so strftime needs 10 + 1 bytes to write the string and the null terminator. + written = LIBC_NAMESPACE::strftime(buffer, sizeof(buffer2), "%F", &time); + EXPECT_EQ(written, size_t{0}); +} -- cgit v1.2.3 From e63f0f50fae479b4eaec98ac97de0745735a90b7 Mon Sep 17 00:00:00 2001 From: Mircea Trofin Date: Thu, 30 Oct 2025 10:37:15 -0700 Subject: [SLU][profcheck] Estimate branch weights in partial unswitch cases (#164035) In the case of a partial unswitch, we take the invariant part of an expression consisting of either conjunctions or disjunctions, and hoist it out of the loop, conditioning a branch on it (==the invariant part). We can't correctly calculate the branch probability of this new branch, but can use the probability of the existing branch as a bound. That would preserve block frequencies better than allowing for the default, static (50-50) probability for that branch. Issue #147390 --- llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp | 60 ++++++-- .../nontrivial-unswitch-profile.ll | 89 ++++++++++++ llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll | 11 +- .../SimpleLoopUnswitch/simple-unswitch-profile.ll | 157 +++++++++++++++++++++ 4 files changed, 305 insertions(+), 12 deletions(-) create mode 100644 llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll create mode 100644 llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll diff --git a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp index 5af6c96c56a0..bb6c879f4d47 100644 --- a/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp +++ b/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -81,6 +81,7 @@ STATISTIC( STATISTIC(NumInvariantConditionsInjected, "Number of invariant conditions injected and unswitched"); +namespace llvm { static cl::opt EnableNonTrivialUnswitch( "enable-nontrivial-unswitch", cl::init(false), cl::Hidden, cl::desc("Forcibly enables non-trivial loop unswitching rather than " @@ -131,11 +132,17 @@ static cl::opt InjectInvariantConditions( static cl::opt InjectInvariantConditionHotnesThreshold( "simple-loop-unswitch-inject-invariant-condition-hotness-threshold", - cl::Hidden, cl::desc("Only try to inject loop invariant conditions and " - "unswitch on them to eliminate branches that are " - "not-taken 1/ times or less."), + cl::Hidden, + cl::desc("Only try to inject loop invariant conditions and " + "unswitch on them to eliminate branches that are " + "not-taken 1/ times or less."), cl::init(16)); +static cl::opt EstimateProfile("simple-loop-unswitch-estimate-profile", + cl::Hidden, cl::init(true)); +extern cl::opt ProfcheckDisableMetadataFixes; +} // namespace llvm + AnalysisKey ShouldRunExtraSimpleLoopUnswitch::Key; namespace { struct CompareDesc { @@ -268,13 +275,42 @@ static bool areLoopExitPHIsLoopInvariant(const Loop &L, llvm_unreachable("Basic blocks should never be empty!"); } -/// Copy a set of loop invariant values \p ToDuplicate and insert them at the +/// Copy a set of loop invariant values \p Invariants and insert them at the /// end of \p BB and conditionally branch on the copied condition. We only /// branch on a single value. +/// We attempt to estimate the profile of the resulting conditional branch from +/// \p ComputeProfFrom, which is the original conditional branch we're +/// unswitching. +/// When \p Direction is true, the \p Invariants form a disjunction, and the +/// branch conditioned on it exits the loop on the "true" case. When \p +/// Direction is false, the \p Invariants form a conjunction and the branch +/// exits on the "false" case. static void buildPartialUnswitchConditionalBranch( BasicBlock &BB, ArrayRef Invariants, bool Direction, BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, bool InsertFreeze, - const Instruction *I, AssumptionCache *AC, const DominatorTree &DT) { + const Instruction *I, AssumptionCache *AC, const DominatorTree &DT, + const BranchInst &ComputeProfFrom) { + + SmallVector BranchWeights; + bool HasBranchWeights = EstimateProfile && !ProfcheckDisableMetadataFixes && + extractBranchWeights(ComputeProfFrom, BranchWeights); + // If Direction is true, that means we had a disjunction and that the "true" + // case exits. The probability of the disjunction of the subset of terms is at + // most as high as the original one. So, if the probability is higher than the + // one we'd assign in absence of a profile (i.e. 0.5), we will use 0.5, + // but if it's lower, we will use the original probability. + // Conversely, if Direction is false, that means we had a conjunction, and the + // probability of exiting is captured in the second branch weight. That + // probability is a disjunction (of the negation of the original terms). The + // same reasoning applies as above. + // Issue #165649: should we expect BFI to conserve, and use that to calculate + // the branch weights? + if (HasBranchWeights && + static_cast(BranchWeights[Direction ? 0 : 1]) / + static_cast(sum_of(BranchWeights)) > + 0.5) + HasBranchWeights = false; + IRBuilder<> IRB(&BB); IRB.SetCurrentDebugLocation(DebugLoc::getCompilerGenerated()); @@ -287,8 +323,14 @@ static void buildPartialUnswitchConditionalBranch( Value *Cond = Direction ? IRB.CreateOr(FrozenInvariants) : IRB.CreateAnd(FrozenInvariants); - IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc, - Direction ? &NormalSucc : &UnswitchedSucc); + auto *BR = IRB.CreateCondBr( + Cond, Direction ? &UnswitchedSucc : &NormalSucc, + Direction ? &NormalSucc : &UnswitchedSucc, + HasBranchWeights ? ComputeProfFrom.getMetadata(LLVMContext::MD_prof) + : nullptr); + if (!HasBranchWeights) + setExplicitlyUnknownBranchWeightsIfProfiled( + *BR, *BR->getParent()->getParent(), DEBUG_TYPE); } /// Copy a set of loop invariant values, and conditionally branch on them. @@ -658,7 +700,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, " condition!"); buildPartialUnswitchConditionalBranch( *OldPH, Invariants, ExitDirection, *UnswitchedBB, *NewPH, - FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT); + FreezeLoopUnswitchCond, OldPH->getTerminator(), nullptr, DT, BI); } // Update the dominator tree with the added edge. @@ -2477,7 +2519,7 @@ static void unswitchNontrivialInvariants( else { buildPartialUnswitchConditionalBranch( *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, - FreezeLoopUnswitchCond, BI, &AC, DT); + FreezeLoopUnswitchCond, BI, &AC, DT, *BI); } DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH}); diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll new file mode 100644 index 000000000000..9cc417f6b874 --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch-profile.ll @@ -0,0 +1,89 @@ +; RUN: split-file %s %t +; RUN: cat %t/main.ll %t/probable-or.prof > %t/probable-or.ll +; RUN: cat %t/main.ll %t/probable-and.prof > %t/probable-and.ll +; RUN: opt -passes='loop(simple-loop-unswitch)' -S %t/probable-or.ll -o -| FileCheck %t/probable-or.prof +; RUN: opt -passes='loop(simple-loop-unswitch)' -S %t/probable-and.ll -o -| FileCheck %t/probable-and.prof + +;--- main.ll +declare i32 @a() +declare i32 @b() + +define i32 @or(ptr %ptr, i1 %cond) !prof !0 { +entry: + br label %loop_begin + +loop_begin: + %v1 = load i1, ptr %ptr + %cond_or = or i1 %v1, %cond + br i1 %cond_or, label %loop_a, label %loop_b, !prof !1 + +loop_a: + call i32 @a() + br label %latch + +loop_b: + call i32 @b() + br label %latch + +latch: + %v2 = load i1, ptr %ptr + br i1 %v2, label %loop_begin, label %loop_exit, !prof !2 + +loop_exit: + ret i32 0 +} + +define i32 @and(ptr %ptr, i1 %cond) !prof !0 { +entry: + br label %loop_begin + +loop_begin: + %v1 = load i1, ptr %ptr + %cond_and = and i1 %v1, %cond + br i1 %cond_and, label %loop_a, label %loop_b, !prof !1 + +loop_a: + call i32 @a() + br label %latch + +loop_b: + call i32 @b() + br label %latch + +latch: + %v2 = load i1, ptr %ptr + br i1 %v2, label %loop_begin, label %loop_exit, !prof !2 + +loop_exit: + ret i32 0 +} + +;--- probable-or.prof +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 1, i32 1000} +!2 = !{!"branch_weights", i32 5, i32 7} +; CHECK-LABEL: @or +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond.fr = freeze i1 %cond +; CHECK-NEXT: br i1 %cond.fr, label %entry.split.us, label %entry.split, !prof !1 +; CHECK-LABEL: @and +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond.fr = freeze i1 %cond +; CHECK-NEXT: br i1 %cond.fr, label %entry.split, label %entry.split.us, !prof !3 +; CHECK: !1 = !{!"branch_weights", i32 1, i32 1000} +; CHECK: !3 = !{!"unknown", !"simple-loop-unswitch"} + +;--- probable-and.prof +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 1000, i32 1} +!2 = !{!"branch_weights", i32 5, i32 7} +; CHECK-LABEL: @or +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond.fr = freeze i1 %cond +; CHECK-NEXT: br i1 %cond.fr, label %entry.split.us, label %entry.split, !prof !1 +; CHECK-LABEL: @and +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond.fr = freeze i1 %cond +; CHECK-NEXT: br i1 %cond.fr, label %entry.split, label %entry.split.us, !prof !3 +; CHECK: !1 = !{!"unknown", !"simple-loop-unswitch"} +; CHECK: !3 = !{!"branch_weights", i32 1000, i32 1} diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll b/llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll index 0964c55d1dec..3760be4b26f2 100644 --- a/llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll +++ b/llvm/test/Transforms/SimpleLoopUnswitch/pr60736.ll @@ -1,4 +1,4 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals ; RUN: opt < %s -simple-loop-unswitch-inject-invariant-conditions=true -passes='loop(simple-loop-unswitch,loop-instsimplify)' -S | FileCheck %s define void @test() { @@ -7,7 +7,7 @@ define void @test() { ; CHECK-NEXT: [[TMP:%.*]] = call i1 @llvm.experimental.widenable.condition() ; CHECK-NEXT: [[TMP1:%.*]] = load atomic i32, ptr addrspace(1) poison unordered, align 8 ; CHECK-NEXT: [[TMP2:%.*]] = load atomic i32, ptr addrspace(1) poison unordered, align 8 -; CHECK-NEXT: br i1 [[TMP]], label [[BB_SPLIT:%.*]], label [[BB3_SPLIT_US:%.*]] +; CHECK-NEXT: br i1 [[TMP]], label [[BB_SPLIT:%.*]], label [[BB3_SPLIT_US:%.*]], !prof [[PROF0:![0-9]+]] ; CHECK: bb.split: ; CHECK-NEXT: br label [[BB3:%.*]] ; CHECK: bb3: @@ -19,7 +19,7 @@ define void @test() { ; CHECK-NEXT: [[TMP6_US:%.*]] = phi i32 [ poison, [[BB3_SPLIT_US]] ] ; CHECK-NEXT: [[TMP7_US:%.*]] = add nuw nsw i32 [[TMP6_US]], 2 ; CHECK-NEXT: [[TMP8_US:%.*]] = icmp ult i32 [[TMP7_US]], [[TMP2]] -; CHECK-NEXT: br i1 [[TMP8_US]], label [[BB9_US:%.*]], label [[BB16_SPLIT_US:%.*]], !prof [[PROF0:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP8_US]], label [[BB9_US:%.*]], label [[BB16_SPLIT_US:%.*]], !prof [[PROF0]] ; CHECK: bb9.us: ; CHECK-NEXT: br label [[BB17_SPLIT_US:%.*]] ; CHECK: bb16.split.us: @@ -96,3 +96,8 @@ declare i1 @llvm.experimental.widenable.condition() !0 = !{!"branch_weights", i32 1048576, i32 1} +;. +; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(inaccessiblemem: readwrite) } +;. +; CHECK: [[PROF0]] = !{!"branch_weights", i32 1048576, i32 1} +;. diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll b/llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll new file mode 100644 index 000000000000..ec6baa5b3772 --- /dev/null +++ b/llvm/test/Transforms/SimpleLoopUnswitch/simple-unswitch-profile.ll @@ -0,0 +1,157 @@ +; RUN: split-file %s %t +; RUN: cat %t/main.ll %t/probable-or.prof > %t/probable-or.ll +; RUN: cat %t/main.ll %t/probable-and.prof > %t/probable-and.ll +; RUN: opt -passes='loop-mssa(simple-loop-unswitch)' -S %t/probable-or.ll -o - | FileCheck %t/probable-or.prof +; RUN: opt -passes='loop-mssa(simple-loop-unswitch)' -S %t/probable-and.ll -o - | FileCheck %t/probable-and.prof +; +; RUN: opt -passes='module(print),function(loop-mssa(simple-loop-unswitch)),module(print)' \ +; RUN: %t/probable-or.ll -disable-output -simple-loop-unswitch-estimate-profile=0 2>&1 | FileCheck %t/probable-or.prof --check-prefixes=PROFILE-COM,PROFILE-REF + +; RUN: opt -passes='module(print),function(loop-mssa(simple-loop-unswitch)),module(print)' \ +; RUN: %t/probable-or.ll -disable-output -simple-loop-unswitch-estimate-profile=1 2>&1 | FileCheck %t/probable-or.prof --check-prefixes=PROFILE-COM,PROFILE-CHK + +; RUN: opt -passes='module(print),function(loop-mssa(simple-loop-unswitch)),module(print)' \ +; RUN: %t/probable-and.ll -disable-output -simple-loop-unswitch-estimate-profile=0 2>&1 | FileCheck %t/probable-and.prof --check-prefixes=PROFILE-COM,PROFILE-REF + +; RUN: opt -passes='module(print),function(loop-mssa(simple-loop-unswitch)),module(print)' \ +; RUN: %t/probable-and.ll -disable-output -simple-loop-unswitch-estimate-profile=1 2>&1 | FileCheck %t/probable-and.prof --check-prefixes=PROFILE-COM,PROFILE-CHK + +;--- main.ll +declare void @some_func() noreturn + +define i32 @or(i1 %cond1, i32 %var1) !prof !0 { +entry: + br label %loop_begin + +loop_begin: + %var3 = phi i32 [%var1, %entry], [%var2, %do_something] + %cond2 = icmp eq i32 %var3, 10 + %cond.or = or i1 %cond1, %cond2 + br i1 %cond.or, label %loop_exit, label %do_something, !prof !1 + +do_something: + %var2 = add i32 %var3, 1 + call void @some_func() noreturn nounwind + br label %loop_begin + +loop_exit: + ret i32 0 +} + +define i32 @and(i1 %cond1, i32 %var1) !prof !0 { +entry: + br label %loop_begin + +loop_begin: + %var3 = phi i32 [%var1, %entry], [%var2, %do_something] + %cond2 = icmp eq i32 %var3, 10 + %cond.and = and i1 %cond1, %cond2 + br i1 %cond.and, label %do_something, label %loop_exit, !prof !1 + +do_something: + %var2 = add i32 %var3, 1 + call void @some_func() noreturn nounwind + br label %loop_begin + +loop_exit: + ret i32 0 +} + +;--- probable-or.prof +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 1, i32 1000} +; CHECK-LABEL: @or +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond1.fr = freeze i1 %cond1 +; CHECK-NEXT: br i1 %cond1.fr, label %loop_exit.split, label %entry.split, !prof !1 +; CHECK-LABEL: @and +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond1.fr = freeze i1 %cond1 +; CHECK-NEXT: br i1 %cond1.fr, label %entry.split, label %loop_exit.split, !prof !2 +; CHECK: !1 = !{!"branch_weights", i32 1, i32 1000} +; CHECK: !2 = !{!"unknown", !"simple-loop-unswitch"} + +; PROFILE-COM: Printing analysis results of BFI for function 'or': +; PROFILE-COM: block-frequency-info: or + ; PROFILE-COM: - entry: {{.*}} count = 10 + ; PROFILE-COM: - loop_begin: {{.*}} count = 10010 + ; PROFILE-COM: - do_something: {{.*}} count = 10000 + ; PROFILE-COM: - loop_exit: {{.*}} count = 10 + +; PROFILE-COM: Printing analysis results of BFI for function 'and': +; PROFILE-COM: block-frequency-info: and + ; PROFILE-COM: - entry: {{.*}} count = 10 + ; PROFILE-COM: - loop_begin: {{.*}} count = 10 + ; PROFILE-COM: - do_something: {{.*}} count = 0 + ; PROFILE-COM: - loop_exit: {{.*}} count = 10 + +; PROFILE-COM: Printing analysis results of BFI for function 'or': +; PROFILE-COM: block-frequency-info: or + ; PROFILE-COM: - entry: {{.*}} count = 10 + ; PROFILE-REF: - entry.split: {{.*}} count = 5 + ; PROFILE-CHK: - entry.split: {{.*}} count = 10 + ; PROFILE-REF: - loop_begin: {{.*}} count = 5005 + ; PROFILE-CHK: - loop_begin: {{.*}} count = 10000 + ; PROFILE-REF: - do_something: {{.*}} count = 5000 + ; PROFILE-CHK: - do_something: {{.*}} count = 9990 + ; PROFILE-REF: - loop_exit: {{.*}} count = 5 + ; PROFILE-CHK: - loop_exit: {{.*}} count = 10 + ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10 + +; PROFILE-COM: Printing analysis results of BFI for function 'and': +; PROFILE-COM: block-frequency-info: and + ; PROFILE-COM: - entry: {{.*}} count = 10 + ; PROFILE-COM: - entry.split: {{.*}} count = 5 + ; PROFILE-COM: - loop_begin: {{.*}} count = 5 + ; PROFILE-COM: - do_something: {{.*}} count = 0 + ; PROFILE-COM: - loop_exit: {{.*}} count = 5 + ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10 + +;--- probable-and.prof +!0 = !{!"function_entry_count", i32 10} +!1 = !{!"branch_weights", i32 1000, i32 1} +; CHECK-LABEL: @or +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond1.fr = freeze i1 %cond1 +; CHECK-NEXT: br i1 %cond1.fr, label %loop_exit.split, label %entry.split, !prof !1 +; CHECK-LABEL: @and +; CHECK-LABEL: entry: +; CHECK-NEXT: %cond1.fr = freeze i1 %cond1 +; CHECK-NEXT: br i1 %cond1.fr, label %entry.split, label %loop_exit.split, !prof !2 +; CHECK: !1 = !{!"unknown", !"simple-loop-unswitch"} +; CHECK: !2 = !{!"branch_weights", i32 1000, i32 1} +; PROFILE-COM: Printing analysis results of BFI for function 'or': +; PROFILE-COM: block-frequency-info: or + ; PROFILE-COM: - entry: {{.*}}, count = 10 + ; PROFILE-COM: - loop_begin: {{.*}}, count = 10 + ; PROFILE-COM: - do_something: {{.*}}, count = 0 + ; PROFILE-COM: - loop_exit: {{.*}}, count = 10 + +; PROFILE-COM: Printing analysis results of BFI for function 'and': +; PROFILE-COM: block-frequency-info: and + ; PROFILE-COM: - entry: {{.*}} count = 10 + ; PROFILE-COM: - loop_begin: {{.*}} count = 10010 + ; PROFILE-COM: - do_something: {{.*}} count = 10000 + ; PROFILE-COM: - loop_exit: {{.*}} count = 10 + +; PROFILE-COM: Printing analysis results of BFI for function 'or': +; PROFILE-COM: block-frequency-info: or + ; PROFILE-COM: - entry: {{.*}} count = 10 + ; PROFILE-COM: - entry.split: {{.*}} count = 5 + ; PROFILE-COM: - loop_begin: {{.*}} count = 5 + ; PROFILE-COM: - do_something: {{.*}} count = 0 + ; PROFILE-COM: - loop_exit: {{.*}} count = 5 + ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10 + +; PROFILE-COM: Printing analysis results of BFI for function 'and': +; PROFILE-COM: block-frequency-info: and + ; PROFILE-COM: - entry: {{.*}} count = 10 + ; PROFILE-REF: - entry.split: {{.*}} count = 5 + ; PROFILE-CHK: - entry.split: {{.*}} count = 10 + ; PROFILE-REF: - loop_begin: {{.*}} count = 5005 + ; PROFILE-CHK: - loop_begin: {{.*}} count = 10000 + ; PROFILE-REF: - do_something: {{.*}} count = 5000 + ; PROFILE-CHK: - do_something: {{.*}} count = 9990 + ; PROFILE-REF: - loop_exit: {{.*}} count = 5 + ; PROFILE-CHK: - loop_exit: {{.*}} count = 10 + ; PROFILE-COM: - loop_exit.split: {{.*}} count = 10 -- cgit v1.2.3 From 8d9cd5bf0a7953f5f816853f82f8ef755b0dd05d Mon Sep 17 00:00:00 2001 From: Amara Emerson Date: Thu, 30 Oct 2025 10:42:36 -0700 Subject: Move GlobalISel sync up meeting information from "past" to current sync ups. --- llvm/docs/GettingInvolved.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/docs/GettingInvolved.rst b/llvm/docs/GettingInvolved.rst index 4b4b09ad87ab..039d61624093 100644 --- a/llvm/docs/GettingInvolved.rst +++ b/llvm/docs/GettingInvolved.rst @@ -223,6 +223,10 @@ what to add to your calendar invite. - `ics `__ `gcal `__ - + * - GlobalISel + - Every 2nd Tuesday of the month + - `gcal `__ + - `Meeting details/agenda `__ For event owners, our Discord bot also supports sending automated announcements @@ -254,10 +258,6 @@ the future. - `ics `__ `gcal `__ - `Minutes/docs `__ - * - GlobalISel - - Every 2nd Tuesday of the month - - `gcal `__ - - `Meeting details/agenda `__ * - Vector Predication - Every 2 weeks on Tuesdays, 3pm UTC - -- cgit v1.2.3 From 160058fc19a9bcb70feb442a755229838b4dbc7a Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 30 Oct 2025 17:42:28 +0000 Subject: [lit] Move ulimit_unlimited.txt test to non Darwin tests This fails on MacOS because setting it to unlimited there just sets the limit to the max value which causes differences that show up in the check lines. --- .../lit/tests/Inputs/shtest-ulimit-nondarwin/ulimit_unlimited.txt | 6 ++++++ llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_unlimited.txt | 6 ------ llvm/utils/lit/tests/shtest-ulimit-nondarwin.py | 8 +++++++- llvm/utils/lit/tests/shtest-ulimit.py | 8 +------- 4 files changed, 14 insertions(+), 14 deletions(-) create mode 100644 llvm/utils/lit/tests/Inputs/shtest-ulimit-nondarwin/ulimit_unlimited.txt delete mode 100644 llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_unlimited.txt diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit-nondarwin/ulimit_unlimited.txt b/llvm/utils/lit/tests/Inputs/shtest-ulimit-nondarwin/ulimit_unlimited.txt new file mode 100644 index 000000000000..4c687e306186 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-ulimit-nondarwin/ulimit_unlimited.txt @@ -0,0 +1,6 @@ +# RUN: ulimit -f 5 +# RUN: %{python} %S/../shtest-ulimit/print_limits.py +# RUN: ulimit -f unlimited +# RUN: %{python} %S/../shtest-ulimit/print_limits.py +# Fail the test so that we can assert on the output. +# RUN: not echo return diff --git a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_unlimited.txt b/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_unlimited.txt deleted file mode 100644 index b8aa3d507171..000000000000 --- a/llvm/utils/lit/tests/Inputs/shtest-ulimit/ulimit_unlimited.txt +++ /dev/null @@ -1,6 +0,0 @@ -# RUN: ulimit -f 5 -# RUN: %{python} %S/print_limits.py -# RUN: ulimit -f unlimited -# RUN: %{python} %S/print_limits.py -# Fail the test so that we can assert on the output. -# RUN: not echo return diff --git a/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py b/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py index 022e8b5f4189..893270ec68f6 100644 --- a/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py +++ b/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py @@ -6,10 +6,16 @@ # RUN: not %{lit} -a -v %{inputs}/shtest-ulimit-nondarwin | FileCheck %s -# CHECK: -- Testing: 1 tests{{.*}} +# CHECK: -- Testing: 2 tests{{.*}} # CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_okay.txt ({{[^)]*}}) # CHECK: ulimit -v 1048576 # CHECK: ulimit -s 256 # CHECK: RLIMIT_AS=1073741824 # CHECK: RLIMIT_STACK=262144 + +# CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_unlimited.txt ({{[^)]*}}) +# CHECK: ulimit -f 5 +# CHECK: RLIMIT_FSIZE=5 +# CHECK: ulimit -f unlimited +# CHECK: RLIMIT_FSIZE=-1 diff --git a/llvm/utils/lit/tests/shtest-ulimit.py b/llvm/utils/lit/tests/shtest-ulimit.py index e15e19092030..21e5a5e2491d 100644 --- a/llvm/utils/lit/tests/shtest-ulimit.py +++ b/llvm/utils/lit/tests/shtest-ulimit.py @@ -11,7 +11,7 @@ # RUN: not %{lit} -a -v %{inputs}/shtest-ulimit --order=lexical \ # RUN: | FileCheck -DBASE_NOFILE_LIMIT=%{readfile:%t.nofile_limit} %s -# CHECK: -- Testing: 4 tests{{.*}} +# CHECK: -- Testing: 3 tests{{.*}} # CHECK-LABEL: FAIL: shtest-ulimit :: ulimit-bad-arg.txt ({{[^)]*}}) # CHECK: ulimit -n @@ -25,9 +25,3 @@ # CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_reset.txt ({{[^)]*}}) # CHECK: RLIMIT_NOFILE=[[BASE_NOFILE_LIMIT]] - -# CHECK-LABEL: FAIL: shtest-ulimit :: ulimit_unlimited.txt ({{[^)]*}}) -# CHECK: ulimit -f 5 -# CHECK: RLIMIT_FSIZE=5 -# CHECK: ulimit -f unlimited -# CHECK: RLIMIT_FSIZE=-1 -- cgit v1.2.3 From b73951f07a8fd84c7df2a733c7cd8130cef5e1d4 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 30 Oct 2025 10:31:53 -0700 Subject: [RISCV] Adjust stackmaps test to provide coverage for non-64 bit values --- llvm/test/CodeGen/RISCV/rv64-stackmap.ll | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rv64-stackmap.ll b/llvm/test/CodeGen/RISCV/rv64-stackmap.ll index d07f608bf789..c3183a1a3e03 100644 --- a/llvm/test/CodeGen/RISCV/rv64-stackmap.ll +++ b/llvm/test/CodeGen/RISCV/rv64-stackmap.ll @@ -290,9 +290,9 @@ define void @liveConstant() { ; CHECK-NEXT: .half 2 ; CHECK-NEXT: .half 0 ; CHECK-NEXT: .word -define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27) { +define void @spilledValue(i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i8 %l25, i16 zeroext %l26, i32 signext %l27) { entry: - call void (i64, i32, ptr, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 28, ptr null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i64 %l25, i64 %l26, i64 %l27) + call void (i64, i32, ptr, i32, ...) @llvm.experimental.patchpoint.void(i64 11, i32 28, ptr null, i32 5, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3, i64 %arg4, i64 %l0, i64 %l1, i64 %l2, i64 %l3, i64 %l4, i64 %l5, i64 %l6, i64 %l7, i64 %l8, i64 %l9, i64 %l10, i64 %l11, i64 %l12, i64 %l13, i64 %l14, i64 %l15, i64 %l16, i64 %l17, i64 %l18, i64 %l19, i64 %l20, i64 %l21, i64 %l22, i64 %l23, i64 %l24, i8 %l25, i16 %l26, i32 %l27) ret void } -- cgit v1.2.3 From 6a10d1dab01f424a5baaabde150d26acfd8fe48e Mon Sep 17 00:00:00 2001 From: Ian Anderson Date: Thu, 30 Oct 2025 10:54:14 -0700 Subject: [clang][docs] assert.h is not a good candidate for a textual header (#165057) The C standard behavior of `assert` cannot be accomplished with clang modules, either as a normal modular header, or a textual header. As a normal modular header: #define NDEBUG #include This pattern doesn't work, NDEBUG has to be passed on the command line to take effect, and then will effect all `assert`s in the includer. As a textual header: #define NDEBUG #include This pattern doesn't work for similar reasons, modular_header_that_has_an_assert.h captured the value of NDEBUG when its module built and won't pick it up from the includer. -DNDEBUG can be passed when building the module, but will similarly effect the entire module. This has the additional problem that every module will contain a declaration for `assert`, which can possibly conflict with each other if they use different values of NDEBUG. So really just doesn't work properly with clang modules. Avoid the issue by not mentioning it in the Modules documentation, and use "X macros" as the example for textual headers. Don't use [extern_c] in the example modules, that should very rarely be used. Don't put multiple `header` declarations in a submodule, that has the confusing effect of "fusing" the headers. e.g. does not include , but if it's in the same submodule, then an `#include ` will mysteriously also include . --- clang/docs/Modules.rst | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/clang/docs/Modules.rst b/clang/docs/Modules.rst index acbe45e0be97..e45ee9ff9eac 100644 --- a/clang/docs/Modules.rst +++ b/clang/docs/Modules.rst @@ -421,13 +421,7 @@ As an example, the module map file for the C standard library might look a bit l .. parsed-literal:: - module std [system] [extern_c] { - module assert { - textual header "assert.h" - header "bits/assert-decls.h" - export * - } - + module std [system] { module complex { header "complex.h" export * @@ -440,7 +434,6 @@ As an example, the module map file for the C standard library might look a bit l module errno { header "errno.h" - header "sys/errno.h" export * } @@ -673,14 +666,14 @@ of checking *use-declaration*\s, and must still be a lexically-valid header file. In the future, we intend to pre-tokenize such headers and include the token sequence within the prebuilt module representation. -A header with the ``exclude`` specifier is excluded from the module. It will not be included when the module is built, nor will it be considered to be part of the module, even if an ``umbrella`` header or directory would otherwise make it part of the module. +A header with the ``exclude`` specifier is excluded from the module. It will not be included when the module is built, nor will it be considered to be part of the module, even if an ``umbrella`` directory would otherwise make it part of the module. -**Example:** The C header ``assert.h`` is an excellent candidate for a textual header, because it is meant to be included multiple times (possibly with different ``NDEBUG`` settings). However, declarations within it should typically be split into a separate modular header. +**Example:** A "X macro" header is an excellent candidate for a textual header, because it is can't be compiled standalone, and by itself does not contain any declarations. .. parsed-literal:: - module std [system] { - textual header "assert.h" + module MyLib [system] { + textual header "xmacros.h" } A given header shall not be referenced by more than one *header-declaration*. -- cgit v1.2.3 From 28e98b85019d39b67fff0b236269361ca2d47dc1 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 30 Oct 2025 11:28:11 -0700 Subject: [lit] Expand late substitutions before running builtins This enables the use of readfile substitutions for populating environment variables. This is necessary in some compiler-rt tests. Reviewers: pawosm-arm Reviewed By: pawosm-arm Pull Request: https://github.com/llvm/llvm-project/pull/165140 --- llvm/utils/lit/lit/TestRunner.py | 7 ++++--- llvm/utils/lit/tests/Inputs/shtest-readfile/env.txt | 6 ++++++ llvm/utils/lit/tests/Inputs/shtest-readfile/lit.cfg | 1 + llvm/utils/lit/tests/shtest-readfile-external.py | 2 +- llvm/utils/lit/tests/shtest-readfile.py | 6 +++++- 5 files changed, 17 insertions(+), 5 deletions(-) create mode 100644 llvm/utils/lit/tests/Inputs/shtest-readfile/env.txt diff --git a/llvm/utils/lit/lit/TestRunner.py b/llvm/utils/lit/lit/TestRunner.py index 76beebd757a7..3176b1a25743 100644 --- a/llvm/utils/lit/lit/TestRunner.py +++ b/llvm/utils/lit/lit/TestRunner.py @@ -826,6 +826,10 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper): not_args = [] not_count = 0 not_crash = False + + # Expand all late substitutions. + args = _expandLateSubstitutions(j, args, cmd_shenv.cwd) + while True: if args[0] == "env": # Create a copy of the global environment and modify it for @@ -875,9 +879,6 @@ def _executeShCmd(cmd, shenv, results, timeoutHelper): # Ensure args[0] is hashable. args[0] = expand_glob(args[0], cmd_shenv.cwd)[0] - # Expand all late substitutions. - args = _expandLateSubstitutions(j, args, cmd_shenv.cwd) - inproc_builtin = inproc_builtins.get(args[0], None) if inproc_builtin and (args[0] != "echo" or len(cmd.commands) == 1): # env calling an in-process builtin is useless, so we take the safe diff --git a/llvm/utils/lit/tests/Inputs/shtest-readfile/env.txt b/llvm/utils/lit/tests/Inputs/shtest-readfile/env.txt new file mode 100644 index 000000000000..3e1937375497 --- /dev/null +++ b/llvm/utils/lit/tests/Inputs/shtest-readfile/env.txt @@ -0,0 +1,6 @@ +## Tests that readfile works with the env builtin. +# RUN: echo -n "hello" > %t.1 +# RUN: env TEST=%{readfile:%t.1} %{python} -c "import os; print(os.environ['TEST'])" + +## Fail the test so we can assert on the output. +# RUN: not echo return \ No newline at end of file diff --git a/llvm/utils/lit/tests/Inputs/shtest-readfile/lit.cfg b/llvm/utils/lit/tests/Inputs/shtest-readfile/lit.cfg index ee496674fdb6..80af27f57d35 100644 --- a/llvm/utils/lit/tests/Inputs/shtest-readfile/lit.cfg +++ b/llvm/utils/lit/tests/Inputs/shtest-readfile/lit.cfg @@ -10,6 +10,7 @@ use_lit_shell = lit.util.pythonize_bool(lit_shell_env) config.test_format = lit.formats.ShTest(execute_external=not use_lit_shell) config.test_source_root = None config.test_exec_root = None +config.substitutions.append(("%{python}", '"%s"' % (sys.executable))) # If we are testing with the external shell, remove the fake-externals from # PATH so that we use mkdir in the tests. diff --git a/llvm/utils/lit/tests/shtest-readfile-external.py b/llvm/utils/lit/tests/shtest-readfile-external.py index c00bff45c870..6fe1088efd67 100644 --- a/llvm/utils/lit/tests/shtest-readfile-external.py +++ b/llvm/utils/lit/tests/shtest-readfile-external.py @@ -6,7 +6,7 @@ # UNSUPPORTED: system-windows # RUN: env LIT_USE_INTERNAL_SHELL=0 not %{lit} -a -v %{inputs}/shtest-readfile | FileCheck -match-full-lines -DTEMP_PATH=%S/Inputs/shtest-readfile/Output %s -# CHECK: -- Testing: 4 tests{{.*}} +# CHECK: -- Testing: 5 tests{{.*}} # CHECK-LABEL: FAIL: shtest-readfile :: absolute-paths.txt ({{[^)]*}}) # CHECK: echo $(cat [[TEMP_PATH]]/absolute-paths.txt.tmp) && test -e [[TEMP_PATH]]/absolute-paths.txt.tmp {{.*}} diff --git a/llvm/utils/lit/tests/shtest-readfile.py b/llvm/utils/lit/tests/shtest-readfile.py index 66e3a042bf78..218da2257bcf 100644 --- a/llvm/utils/lit/tests/shtest-readfile.py +++ b/llvm/utils/lit/tests/shtest-readfile.py @@ -5,12 +5,16 @@ # RUN: env LIT_USE_INTERNAL_SHELL=1 not %{lit} -a -v %{inputs}/shtest-readfile | FileCheck -match-full-lines -DTEMP_PATH=%S%{fs-sep}Inputs%{fs-sep}shtest-readfile%{fs-sep}Output %s -# CHECK: -- Testing: 4 tests{{.*}} +# CHECK: -- Testing: 5 tests{{.*}} # CHECK-LABEL: FAIL: shtest-readfile :: absolute-paths.txt ({{[^)]*}}) # CHECK: echo hello # CHECK: # executed command: echo '%{readfile:[[TEMP_PATH]]{{[\\\/]}}absolute-paths.txt.tmp}' +# CHECK-LABEL: FAIL: shtest-readfile :: env.txt ({{[^)]*}}) +# CHECK: env TEST=hello {{.*}} -c "import os; print(os.environ['TEST'])" +# CHECK: # | hello + # CHECK-LABEL: FAIL: shtest-readfile :: file-does-not-exist.txt ({{[^)]*}}) # CHECK: # executed command: @echo 'echo %{readfile:/file/does/not/exist}' # CHECK: # | File specified in readfile substitution does not exist: {{.*}}/file/does/not/exist -- cgit v1.2.3 From 87673d3fa7398af1ae581121a1b971808407d778 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 30 Oct 2025 11:28:37 -0700 Subject: ELF: Rename RandomizePaddingSection to PaddingSection. This section type is about to be used by #147424 so let's give it a more generic name. Reviewers: smithp35, MaskRay Reviewed By: MaskRay Pull Request: https://github.com/llvm/llvm-project/pull/155540 --- lld/ELF/SyntheticSections.cpp | 7 +++---- lld/ELF/SyntheticSections.h | 4 ++-- lld/ELF/Writer.cpp | 7 +++---- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index bbf4b29a9fda..a4150ebfa165 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -2749,14 +2749,13 @@ RelroPaddingSection::RelroPaddingSection(Ctx &ctx) : SyntheticSection(ctx, ".relro_padding", SHT_NOBITS, SHF_ALLOC | SHF_WRITE, 1) {} -RandomizePaddingSection::RandomizePaddingSection(Ctx &ctx, uint64_t size, - OutputSection *parent) - : SyntheticSection(ctx, ".randomize_padding", SHT_PROGBITS, SHF_ALLOC, 1), +PaddingSection::PaddingSection(Ctx &ctx, uint64_t size, OutputSection *parent) + : SyntheticSection(ctx, ".padding", SHT_PROGBITS, SHF_ALLOC, 1), size(size) { this->parent = parent; } -void RandomizePaddingSection::writeTo(uint8_t *buf) { +void PaddingSection::writeTo(uint8_t *buf) { std::array filler = getParent()->getFiller(ctx); uint8_t *end = buf + size; for (; buf + 4 <= end; buf += 4) diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index ac3ec63f0a7a..38e68110e4bc 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -779,11 +779,11 @@ public: void writeTo(uint8_t *buf) override {} }; -class RandomizePaddingSection final : public SyntheticSection { +class PaddingSection final : public SyntheticSection { uint64_t size; public: - RandomizePaddingSection(Ctx &ctx, uint64_t size, OutputSection *parent); + PaddingSection(Ctx &ctx, uint64_t size, OutputSection *parent); size_t getSize() const override { return size; } void writeTo(uint8_t *buf) override; }; diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 4fa80397cbfa..083b4fb1dbd2 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -1495,15 +1495,14 @@ static void randomizeSectionPadding(Ctx &ctx) { if (auto *isd = dyn_cast(bc)) { SmallVector tmp; if (os->ptLoad != curPtLoad) { - tmp.push_back(make( - ctx, g() % ctx.arg.maxPageSize, os)); + tmp.push_back( + make(ctx, g() % ctx.arg.maxPageSize, os)); curPtLoad = os->ptLoad; } for (InputSection *isec : isd->sections) { // Probability of inserting padding is 1 in 16. if (g() % 16 == 0) - tmp.push_back( - make(ctx, isec->addralign, os)); + tmp.push_back(make(ctx, isec->addralign, os)); tmp.push_back(isec); } isd->sections = std::move(tmp); -- cgit v1.2.3 From 01fbbda62c85cd7f42d15959a88dd71ec02a2586 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 30 Oct 2025 18:32:33 +0000 Subject: [LV] Strengthen assert: VPlan0 doesn't have WidenPHIs (NFC) (#165715) VPWidenCanonicalIV and VPBlend recipes are created by VPPredicator, and VPCanonicalIVPHI and VPInstruction recipes are created by VPlanConstruction. WidenPHIs are never created. --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 8ebc10808027..505fb435e91e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8340,11 +8340,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( &R) || (isa(&R) && !UnderlyingValue)) continue; - - // FIXME: VPlan0, which models a copy of the original scalar loop, should - // not use VPWidenPHIRecipe to model the phis. - assert((isa(&R) || isa(&R)) && - UnderlyingValue && "unsupported recipe"); + assert(isa(&R) && UnderlyingValue && "unsupported recipe"); // TODO: Gradually replace uses of underlying instruction by analyses on // VPlan. -- cgit v1.2.3 From 25afea74985d32999a896b24c74d6eab313cd0ea Mon Sep 17 00:00:00 2001 From: David Spickett Date: Thu, 30 Oct 2025 18:34:05 +0000 Subject: [lldb][test] Fix typo in lldb-dap skip for Arm 32-bit Fixes 17dbd8690e36f8e514fb47f4418f78420d0fc019 (again) --- lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py index 97c7f2d9e1b4..405e91fc2dc3 100644 --- a/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py +++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-dap/lldbdap_testcase.py @@ -16,7 +16,7 @@ import base64 # https://github.com/llvm/llvm-project/issues/137660 @skipIf(oslist=["windows"], archs=["aarch64"]) # The Arm Linux bot needs stable resources before it can run these tests reliably. -@skipif(oslist=["linux"], archs=["arm$"]) +@skipIf(oslist=["linux"], archs=["arm$"]) class DAPTestCaseBase(TestBase): # set timeout based on whether ASAN was enabled or not. Increase # timeout by a factor of 10 if ASAN is enabled. -- cgit v1.2.3 From a24a7548b845eba93d8c03079f8c6fcd923306a6 Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Thu, 30 Oct 2025 11:49:22 -0700 Subject: [libc][hdrgen] Sort identifiers with leading underscores specially (#165745) This makes the sorting behavior more uniform: functions and macros are always sorted (separately), not only when merging. This changes the sort order used for functions and other things sorted by their symbol names. Symbols are sorted alphabetically without regard to leading underscores, and then for identifiers that differ only in the number of leading underscores, the fewer underscores the earlier in the sort order. For the functions declared in a generated header, adjacent names with and without underscores will be grouped together without blank lines. This is implemented by factoring the name field, equality, and sorting support out of the various entity classes into a new common superclass (hdrgen.Symbol). This uncovered YAML's requirement to quote the string "NULL" to avoid pyyaml parsing it as None (equivalent to Javascript null) rather than a string. --- libc/include/locale.yaml | 2 +- libc/include/stdio.yaml | 2 +- libc/include/stdlib.yaml | 2 +- libc/include/string.yaml | 2 +- libc/include/time.yaml | 2 +- libc/include/wchar.yaml | 8 ++--- libc/utils/hdrgen/hdrgen/enumeration.py | 16 ++------- libc/utils/hdrgen/hdrgen/function.py | 16 ++------- libc/utils/hdrgen/hdrgen/header.py | 14 +++++--- libc/utils/hdrgen/hdrgen/macro.py | 16 ++------- libc/utils/hdrgen/hdrgen/main.py | 1 + libc/utils/hdrgen/hdrgen/object.py | 16 ++------- libc/utils/hdrgen/hdrgen/symbol.py | 41 +++++++++++++++++++++++ libc/utils/hdrgen/hdrgen/type.py | 20 +++-------- libc/utils/hdrgen/tests/expected_output/sorting.h | 24 +++++++++++++ libc/utils/hdrgen/tests/input/sorting.yaml | 20 +++++++++++ libc/utils/hdrgen/tests/test_integration.py | 7 ++++ 17 files changed, 129 insertions(+), 80 deletions(-) create mode 100644 libc/utils/hdrgen/hdrgen/symbol.py create mode 100644 libc/utils/hdrgen/tests/expected_output/sorting.h create mode 100644 libc/utils/hdrgen/tests/input/sorting.yaml diff --git a/libc/include/locale.yaml b/libc/include/locale.yaml index 4566984ad83a..3c3998eb07aa 100644 --- a/libc/include/locale.yaml +++ b/libc/include/locale.yaml @@ -1,7 +1,7 @@ header: locale.h header_template: locale.h.def macros: - - macro_name: NULL + - macro_name: "NULL" macro_header: null-macro.h types: - type_name: locale_t diff --git a/libc/include/stdio.yaml b/libc/include/stdio.yaml index 394437ba3bbc..c50b4ecb0bf0 100644 --- a/libc/include/stdio.yaml +++ b/libc/include/stdio.yaml @@ -1,7 +1,7 @@ header: stdio.h header_template: stdio.h.def macros: - - macro_name: NULL + - macro_name: "NULL" macro_header: null-macro.h - macro_name: stdout macro_value: stdout diff --git a/libc/include/stdlib.yaml b/libc/include/stdlib.yaml index 3b2ff13c684b..495eb7e1317b 100644 --- a/libc/include/stdlib.yaml +++ b/libc/include/stdlib.yaml @@ -5,7 +5,7 @@ standards: merge_yaml_files: - stdlib-malloc.yaml macros: - - macro_name: NULL + - macro_name: "NULL" macro_header: null-macro.h types: - type_name: __atexithandler_t diff --git a/libc/include/string.yaml b/libc/include/string.yaml index 0bf297ee747a..22010f4afa81 100644 --- a/libc/include/string.yaml +++ b/libc/include/string.yaml @@ -2,7 +2,7 @@ header: string.h standards: - stdc macros: - - macro_name: NULL + - macro_name: "NULL" macro_header: null-macro.h types: - type_name: locale_t diff --git a/libc/include/time.yaml b/libc/include/time.yaml index 2f8024298fad..88e50d128823 100644 --- a/libc/include/time.yaml +++ b/libc/include/time.yaml @@ -1,7 +1,7 @@ header: time.h header_template: time.h.def macros: - - macro_name: NULL + - macro_name: "NULL" macro_header: null-macro.h types: - type_name: struct_timeval diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml index b8a0a748cd3a..c8b9e21b56b2 100644 --- a/libc/include/wchar.yaml +++ b/libc/include/wchar.yaml @@ -1,7 +1,7 @@ header: wchar.h header_template: wchar.h.def macros: - - macro_name: NULL + - macro_name: "NULL" macro_header: null-macro.h types: - type_name: FILE @@ -188,8 +188,8 @@ functions: standards: - stdc return_type: wchar_t * - arguments: - - type: wchar_t *__restrict + arguments: + - type: wchar_t *__restrict - type: const wchar_t *__restrict - type: size_t - name: wmemmove @@ -212,7 +212,7 @@ functions: standards: - stdc return_type: wchar_t * - arguments: + arguments: - type: wchar_t *__restrict - type: const wchar_t *__restrict - name: wcslcat diff --git a/libc/utils/hdrgen/hdrgen/enumeration.py b/libc/utils/hdrgen/hdrgen/enumeration.py index 198720826720..1e0f64aec1ed 100644 --- a/libc/utils/hdrgen/hdrgen/enumeration.py +++ b/libc/utils/hdrgen/hdrgen/enumeration.py @@ -6,24 +6,14 @@ # # ==-------------------------------------------------------------------------==# -from functools import total_ordering +from hdrgen.symbol import Symbol -@total_ordering -class Enumeration: +class Enumeration(Symbol): def __init__(self, name, value): - self.name = name + super().__init__(name) self.value = value - def __eq__(self, other): - return self.name == other.name - - def __lt__(self, other): - return self.name < other.name - - def __hash__(self): - return self.name.__hash__() - def __str__(self): if self.value != None: return f"{self.name} = {self.value}" diff --git a/libc/utils/hdrgen/hdrgen/function.py b/libc/utils/hdrgen/hdrgen/function.py index f039996584e3..4de3406cc408 100644 --- a/libc/utils/hdrgen/hdrgen/function.py +++ b/libc/utils/hdrgen/hdrgen/function.py @@ -7,7 +7,7 @@ # ==-------------------------------------------------------------------------==# import re -from functools import total_ordering +from hdrgen.symbol import Symbol from hdrgen.type import Type @@ -37,14 +37,13 @@ KEYWORDS = [ NONIDENTIFIER = re.compile("[^a-zA-Z0-9_]+") -@total_ordering -class Function: +class Function(Symbol): def __init__( self, return_type, name, arguments, standards, guard=None, attributes=[] ): + super().__init__(name) assert return_type self.return_type = return_type - self.name = name self.arguments = [ arg if isinstance(arg, str) else arg["type"] for arg in arguments ] @@ -53,15 +52,6 @@ class Function: self.guard = guard self.attributes = attributes or [] - def __eq__(self, other): - return self.name == other.name - - def __lt__(self, other): - return self.name < other.name - - def __hash__(self): - return self.name.__hash__() - def signature_types(self): def collapse(type_string): assert type_string diff --git a/libc/utils/hdrgen/hdrgen/header.py b/libc/utils/hdrgen/hdrgen/header.py index 558ee5846920..f592327f06ad 100644 --- a/libc/utils/hdrgen/hdrgen/header.py +++ b/libc/utils/hdrgen/hdrgen/header.py @@ -147,8 +147,8 @@ class HeaderFile: } | { COMPILER_HEADER_TYPES.get( - typ.type_name, - PurePosixPath("llvm-libc-types") / f"{typ.type_name}.h", + typ.name, + PurePosixPath("llvm-libc-types") / f"{typ.name}.h", ) for typ in self.all_types() } @@ -227,7 +227,7 @@ class HeaderFile: ) ] - for macro in self.macros: + for macro in sorted(self.macros): # When there is nothing to define, the Macro object converts to str # as an empty string. Don't emit a blank line for those cases. if str(macro): @@ -242,7 +242,12 @@ class HeaderFile: content.append("\n__BEGIN_C_DECLS\n") current_guard = None - for function in self.functions: + last_name = None + for function in sorted(self.functions): + # If the last function's name was the same after underscores, + # elide the blank line between the declarations. + if last_name == function.name_without_underscores(): + content.pop() if function.guard == None and current_guard == None: content.append(str(function) + " __NOEXCEPT;") content.append("") @@ -264,6 +269,7 @@ class HeaderFile: content.append(f"#ifdef {current_guard}") content.append(str(function) + " __NOEXCEPT;") content.append("") + last_name = function.name_without_underscores() if current_guard != None: content.pop() content.append(f"#endif // {current_guard}") diff --git a/libc/utils/hdrgen/hdrgen/macro.py b/libc/utils/hdrgen/hdrgen/macro.py index e42e82845694..4664d9fb0049 100644 --- a/libc/utils/hdrgen/hdrgen/macro.py +++ b/libc/utils/hdrgen/hdrgen/macro.py @@ -6,25 +6,15 @@ # # ==-------------------------------------------------------------------------==# -from functools import total_ordering +from hdrgen.symbol import Symbol -@total_ordering -class Macro: +class Macro(Symbol): def __init__(self, name, value=None, header=None): - self.name = name + super().__init__(name) self.value = value self.header = header - def __eq__(self, other): - return self.name == other.name - - def __lt__(self, other): - return self.name < other.name - - def __hash__(self): - return self.name.__hash__() - def __str__(self): if self.header != None: return "" diff --git a/libc/utils/hdrgen/hdrgen/main.py b/libc/utils/hdrgen/hdrgen/main.py index 25df41e506a1..c12e89ef771d 100755 --- a/libc/utils/hdrgen/hdrgen/main.py +++ b/libc/utils/hdrgen/hdrgen/main.py @@ -105,6 +105,7 @@ def main(): return 2 header.merge(merge_from_header) + assert header.name, f"`header: name.h` line is required in {yaml_file}" return header if args.json: diff --git a/libc/utils/hdrgen/hdrgen/object.py b/libc/utils/hdrgen/hdrgen/object.py index a311c37168d6..a2ab496bed01 100644 --- a/libc/utils/hdrgen/hdrgen/object.py +++ b/libc/utils/hdrgen/hdrgen/object.py @@ -6,23 +6,13 @@ # # ==-------------------------------------------------------------------------==# -from functools import total_ordering +from hdrgen.symbol import Symbol -@total_ordering -class Object: +class Object(Symbol): def __init__(self, name, type): - self.name = name + super().__init__(name) self.type = type - def __eq__(self, other): - return self.name == other.name - - def __lt__(self, other): - return self.name < other.name - - def __hash__(self): - return self.name.__hash__() - def __str__(self): return f"extern {self.type} {self.name};" diff --git a/libc/utils/hdrgen/hdrgen/symbol.py b/libc/utils/hdrgen/hdrgen/symbol.py new file mode 100644 index 000000000000..28e9def128e4 --- /dev/null +++ b/libc/utils/hdrgen/hdrgen/symbol.py @@ -0,0 +1,41 @@ +# ====-- Symbol class for libc function headers----------------*- python -*--==# +# +# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# ==-------------------------------------------------------------------------==# + +from functools import total_ordering + + +@total_ordering +class Symbol: + """ + Symbol is the common superclass for each kind of entity named by an + identifier. It provides the name field, and defines sort ordering, + hashing, and equality based only on the name. The sorting is pretty + presentation order for identifiers, which is to say it first sorts + lexically but ignores leading underscores and secondarily sorts with the + fewest underscores first. + """ + + def __init__(self, name): + assert name + self.name = name + + def __eq__(self, other): + return self.name == other.name + + def __hash__(self): + return self.name.__hash__() + + def name_without_underscores(self): + return self.name.lstrip("_") + + def name_sort_key(self): + ident = self.name_without_underscores() + return ident, len(self.name) - len(ident) + + def __lt__(self, other): + return self.name_sort_key() < other.name_sort_key() diff --git a/libc/utils/hdrgen/hdrgen/type.py b/libc/utils/hdrgen/hdrgen/type.py index 0c0af8569c61..20c1881a9379 100644 --- a/libc/utils/hdrgen/hdrgen/type.py +++ b/libc/utils/hdrgen/hdrgen/type.py @@ -6,20 +6,10 @@ # # ==-------------------------------------------------------------------------==# -from functools import total_ordering +from hdrgen.symbol import Symbol -@total_ordering -class Type: - def __init__(self, type_name): - assert type_name - self.type_name = type_name - - def __eq__(self, other): - return self.type_name == other.type_name - - def __lt__(self, other): - return self.type_name < other.type_name - - def __hash__(self): - return self.type_name.__hash__() +class Type(Symbol): + # A type so far carries no specific information beyond its name. + def __init__(self, name): + super().__init__(name) diff --git a/libc/utils/hdrgen/tests/expected_output/sorting.h b/libc/utils/hdrgen/tests/expected_output/sorting.h new file mode 100644 index 000000000000..a091a421b2c3 --- /dev/null +++ b/libc/utils/hdrgen/tests/expected_output/sorting.h @@ -0,0 +1,24 @@ +//===-- Standard C header --===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#ifndef _LLVM_LIBC_SORTING_H +#define _LLVM_LIBC_SORTING_H + +#include "__llvm-libc-common.h" + +__BEGIN_C_DECLS + +void func_with_aliases(int) __NOEXCEPT; +void _func_with_aliases(int) __NOEXCEPT; +void __func_with_aliases(int) __NOEXCEPT; + +void gunk(const char *) __NOEXCEPT; + +__END_C_DECLS + +#endif // _LLVM_LIBC_SORTING_H diff --git a/libc/utils/hdrgen/tests/input/sorting.yaml b/libc/utils/hdrgen/tests/input/sorting.yaml new file mode 100644 index 000000000000..3c26cde9e6c4 --- /dev/null +++ b/libc/utils/hdrgen/tests/input/sorting.yaml @@ -0,0 +1,20 @@ +header: sorting.h +standards: + - stdc +functions: + - name: gunk + return_type: void + arguments: + - type: const char * + - name: _func_with_aliases + return_type: void + arguments: + - type: int + - name: func_with_aliases + return_type: void + arguments: + - type: int + - name: __func_with_aliases + return_type: void + arguments: + - type: int diff --git a/libc/utils/hdrgen/tests/test_integration.py b/libc/utils/hdrgen/tests/test_integration.py index c6e76d826a3a..b975d8ff007b 100644 --- a/libc/utils/hdrgen/tests/test_integration.py +++ b/libc/utils/hdrgen/tests/test_integration.py @@ -75,6 +75,13 @@ class TestHeaderGenIntegration(unittest.TestCase): self.compare_files(output_file, expected_output_file) + def test_sorting(self): + yaml_file = self.source_dir / "input" / "sorting.yaml" + expected_output_file = self.source_dir / "expected_output" / "sorting.h" + output_file = self.output_dir / "sorting.h" + self.run_script(yaml_file, output_file) + self.compare_files(output_file, expected_output_file) + def main(): parser = argparse.ArgumentParser(description="TestHeaderGenIntegration arguments") -- cgit v1.2.3 From 546e91bacf2686613908701397ecad0b47165384 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 30 Oct 2025 11:58:41 -0700 Subject: [ASan] Make tests work with internal shell Some minor adjustmenets around environment variables to make a handful of tests work with the internal shell that did not before. Reviewers: fmayer, alexander-shaposhnikov Reviewed By: fmayer, alexander-shaposhnikov Pull Request: https://github.com/llvm/llvm-project/pull/165141 --- compiler-rt/test/asan/TestCases/log-path_test.cpp | 3 ++- compiler-rt/test/asan/TestCases/scariness_score_test.cpp | 4 ++-- compiler-rt/test/asan/lit.cfg.py | 3 +++ compiler-rt/test/lit.common.cfg.py | 2 ++ 4 files changed, 9 insertions(+), 3 deletions(-) diff --git a/compiler-rt/test/asan/TestCases/log-path_test.cpp b/compiler-rt/test/asan/TestCases/log-path_test.cpp index 3c5ca114cfd7..6875d57c43cc 100644 --- a/compiler-rt/test/asan/TestCases/log-path_test.cpp +++ b/compiler-rt/test/asan/TestCases/log-path_test.cpp @@ -25,7 +25,8 @@ // RUN: FileCheck %s --check-prefix=CHECK-BAD-DIR < %t.out // Too long log_path. -// RUN: %env_asan_opts=log_path=`for((i=0;i<10000;i++)); do echo -n $i; done` \ +// RUN: %python -c "for i in range(0, 10000): print(i, end='')" > %t.long_log_path +// RUN: %env_asan_opts=log_path=%{readfile:%t.long_log_path} \ // RUN: not %run %t 2> %t.out // RUN: FileCheck %s --check-prefix=CHECK-LONG < %t.out diff --git a/compiler-rt/test/asan/TestCases/scariness_score_test.cpp b/compiler-rt/test/asan/TestCases/scariness_score_test.cpp index 9e55e33675fd..5d229cf38364 100644 --- a/compiler-rt/test/asan/TestCases/scariness_score_test.cpp +++ b/compiler-rt/test/asan/TestCases/scariness_score_test.cpp @@ -6,7 +6,7 @@ // RUN: %clangxx_asan -O0 -mllvm -asan-use-stack-safety=0 %s -o %t // On OSX and Windows, alloc_dealloc_mismatch=1 isn't 100% reliable, so it's // off by default. It's safe for these tests, though, so we turn it on. -// RUN: export %env_asan_opts=symbolize=0:detect_stack_use_after_return=1:handle_abort=1:print_scariness=1:alloc_dealloc_mismatch=1 +// RUN: %export_asan_opts=symbolize=0:detect_stack_use_after_return=1:handle_abort=1:print_scariness=1:alloc_dealloc_mismatch=1 // Make sure the stack is limited (may not be the default under GNU make) // RUN: ulimit -s 4096 // RUN: not %run %t 1 2>&1 | FileCheck %s --check-prefix=CHECK1 @@ -41,7 +41,7 @@ // RUN: %clangxx_asan -O0 %s -o %t -fsanitize-address-use-after-return=always -mllvm -asan-use-stack-safety=0 // On OSX and Windows, alloc_dealloc_mismatch=1 isn't 100% reliable, so it's // off by default. It's safe for these tests, though, so we turn it on. -// RUN: export %env_asan_opts=symbolize=0:handle_abort=1:print_scariness=1:alloc_dealloc_mismatch=1 +// RUN: %export_asan_opts=symbolize=0:handle_abort=1:print_scariness=1:alloc_dealloc_mismatch=1 // Make sure the stack is limited (may not be the default under GNU make) // RUN: ulimit -s 4096 // RUN: not %run %t 1 2>&1 | FileCheck %s --check-prefix=CHECK1 diff --git a/compiler-rt/test/asan/lit.cfg.py b/compiler-rt/test/asan/lit.cfg.py index 96201e679b0a..0194c720d003 100644 --- a/compiler-rt/test/asan/lit.cfg.py +++ b/compiler-rt/test/asan/lit.cfg.py @@ -41,6 +41,9 @@ if default_asan_opts_str: config.substitutions.append( ("%env_asan_opts=", "env ASAN_OPTIONS=" + default_asan_opts_str) ) +config.substitutions.append( + ("%export_asan_opts=", "export ASAN_OPTIONS=" + default_asan_opts_str) +) # Setup source root. config.test_source_root = os.path.dirname(__file__) diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py index 8d147055293e..9d2f02189b8b 100644 --- a/compiler-rt/test/lit.common.cfg.py +++ b/compiler-rt/test/lit.common.cfg.py @@ -1066,3 +1066,5 @@ if config.compiler_id == "GNU": # llvm. config.substitutions.append(("%crt_src", config.compiler_rt_src_root)) config.substitutions.append(("%llvm_src", config.llvm_src_root)) + +config.substitutions.append(("%python", '"%s"' % (sys.executable))) -- cgit v1.2.3 From 9f64d75f8a03118ecdf62860411984d97a7080f8 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 30 Oct 2025 12:05:01 -0700 Subject: [Support] Simplify the continuation condition in encodeSLEB128 (NFC) (#165651) The boolean expression to determine if more bytes are needed for a signed LEB128 value is quite complex: !((((Value == 0 ) && ((Byte & 0x40) == 0)) || ((Value == -1) && ((Byte & 0x40) != 0)))) This patch simplifies it to an equivalent expression using a ternary operator, which is much easier to understand. --- llvm/include/llvm/Support/LEB128.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/Support/LEB128.h b/llvm/include/llvm/Support/LEB128.h index 898b4ea1f19a..4e2262fb15c5 100644 --- a/llvm/include/llvm/Support/LEB128.h +++ b/llvm/include/llvm/Support/LEB128.h @@ -29,8 +29,7 @@ inline unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, uint8_t Byte = Value & 0x7f; // NOTE: this assumes that this signed shift is an arithmetic right shift. Value >>= 7; - More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) || - ((Value == -1) && ((Byte & 0x40) != 0)))); + More = Value != ((Byte & 0x40) ? -1 : 0); Count++; if (More || Count < PadTo) Byte |= 0x80; // Mark this byte to show that more bytes will follow. @@ -58,8 +57,7 @@ inline unsigned encodeSLEB128(int64_t Value, uint8_t *p, unsigned PadTo = 0) { uint8_t Byte = Value & 0x7f; // NOTE: this assumes that this signed shift is an arithmetic right shift. Value >>= 7; - More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) || - ((Value == -1) && ((Byte & 0x40) != 0)))); + More = Value != ((Byte & 0x40) ? -1 : 0); Count++; if (More || Count < PadTo) Byte |= 0x80; // Mark this byte to show that more bytes will follow. -- cgit v1.2.3 From a1db7775738bf050aa5cd5ab33604f3eee864e8e Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 30 Oct 2025 12:05:10 -0700 Subject: [Hexagon] Use default member initializations (NFC) (#165653) Identified with modernize-use-default-member-init. --- llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp | 6 +++--- llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp | 8 ++++---- llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp b/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp index 3b810d0b65fa..79863e1c3cb7 100644 --- a/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp +++ b/llvm/lib/Target/Hexagon/HexagonCopyHoisting.cpp @@ -34,7 +34,7 @@ class HexagonCopyHoisting : public MachineFunctionPass { public: static char ID; - HexagonCopyHoisting() : MachineFunctionPass(ID), MFN(nullptr), MRI(nullptr) {} + HexagonCopyHoisting() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "Hexagon Copy Hoisting"; } @@ -56,8 +56,8 @@ public: void moveCopyInstr(MachineBasicBlock *DestBB, std::pair Key, MachineInstr *MI); - MachineFunction *MFN; - MachineRegisterInfo *MRI; + MachineFunction *MFN = nullptr; + MachineRegisterInfo *MRI = nullptr; std::vector, MachineInstr *>> CopyMIList; }; diff --git a/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp b/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp index 93418f7e15e8..a10c93704a85 100644 --- a/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp +++ b/llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp @@ -34,13 +34,13 @@ STATISTIC(HexagonNumStoreAbsConversions, namespace { class HexagonGenMemAbsolute : public MachineFunctionPass { - const HexagonInstrInfo *TII; - MachineRegisterInfo *MRI; - const TargetRegisterInfo *TRI; + const HexagonInstrInfo *TII = nullptr; + MachineRegisterInfo *MRI = nullptr; + const TargetRegisterInfo *TRI = nullptr; public: static char ID; - HexagonGenMemAbsolute() : MachineFunctionPass(ID), TII(0), MRI(0), TRI(0) {} + HexagonGenMemAbsolute() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "Hexagon Generate Load/Store Set Absolute Address Instruction"; diff --git a/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp b/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp index 71bdfc6657c5..5a85f348fdaf 100644 --- a/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp +++ b/llvm/lib/Target/Hexagon/HexagonTfrCleanup.cpp @@ -43,7 +43,7 @@ namespace { class HexagonTfrCleanup : public MachineFunctionPass { public: static char ID; - HexagonTfrCleanup() : MachineFunctionPass(ID), HII(0), TRI(0) {} + HexagonTfrCleanup() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "Hexagon TFR Cleanup"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.setPreservesAll(); @@ -52,8 +52,8 @@ public: bool runOnMachineFunction(MachineFunction &MF) override; private: - const HexagonInstrInfo *HII; - const TargetRegisterInfo *TRI; + const HexagonInstrInfo *HII = nullptr; + const TargetRegisterInfo *TRI = nullptr; typedef DenseMap ImmediateMap; -- cgit v1.2.3 From 2504f5f3c4e53af6050609292c48b08089b43607 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 30 Oct 2025 12:05:19 -0700 Subject: [llvm] Proofread HowToCrossCompileBuiltinsOnArm.rst (#165655) --- llvm/docs/HowToCrossCompileBuiltinsOnArm.rst | 30 ++++++++++++++-------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/llvm/docs/HowToCrossCompileBuiltinsOnArm.rst b/llvm/docs/HowToCrossCompileBuiltinsOnArm.rst index d7759ad8edd0..58599404d5cd 100644 --- a/llvm/docs/HowToCrossCompileBuiltinsOnArm.rst +++ b/llvm/docs/HowToCrossCompileBuiltinsOnArm.rst @@ -8,18 +8,18 @@ Introduction This document contains information about building and testing the builtins part of compiler-rt for an Arm target, from an x86_64 Linux machine. -While this document concentrates on Arm and Linux the general principles should +While this document concentrates on Arm and Linux, the general principles should apply to other targets supported by compiler-rt. Further contributions for other targets are welcome. The instructions in this document depend on libraries and programs external to -LLVM, there are many ways to install and configure these dependencies so you +LLVM. There are many ways to install and configure these dependencies, so you may need to adapt the instructions here to fit your own situation. Prerequisites ============= -In this use case we will be using cmake on a Debian-based Linux system, +In this use case, we will be using cmake on a Debian-based Linux system, cross-compiling from an x86_64 host to a hard-float Armv7-A target. We will be using as many of the LLVM tools as we can, but it is possible to use GNU equivalents. @@ -35,7 +35,7 @@ You will need: An existing sysroot is required because some of the builtins include C library headers and a sysroot is the easiest way to get those. -In this example we will be using ``ninja`` as the build tool. +In this example, we will be using ``ninja`` as the build tool. See https://compiler-rt.llvm.org/ for information about the dependencies on clang and LLVM. @@ -46,7 +46,7 @@ the source for LLVM and compiler-rt. ``qemu-arm`` should be available as a package for your Linux distribution. The most complicated of the prerequisites to satisfy is the ``arm-linux-gnueabihf`` -sysroot. In theory it is possible to use the Linux distributions multiarch +sysroot. In theory, it is possible to use the Linux distributions multiarch support to fulfill the dependencies for building but unfortunately due to ``/usr/local/include`` being added some host includes are selected. @@ -153,7 +153,7 @@ The cmake try compile stage fails At an early stage cmake will attempt to compile and link a simple C program to test if the toolchain is working. -This stage can often fail at link time if the ``--sysroot=``, ``--target`` or +This stage can often fail at link time if the ``--sysroot=``, ``--target``, or ``--gcc-toolchain=`` options are not passed to the compiler. Check the ``CMAKE__FLAGS`` and ``CMAKE__COMPILER_TARGET`` flags along with any of the specific CMake sysroot and toolchain options. @@ -165,7 +165,7 @@ to make sure it is working. For example:: Clang uses the host header files -------------------------------- -On debian based systems it is possible to install multiarch support for +On Debian-based systems, it is possible to install multiarch support for ``arm-linux-gnueabi`` and ``arm-linux-gnueabihf``. In many cases clang can successfully use this multiarch support when ``--gcc-toolchain=`` and ``--sysroot=`` are not supplied. Unfortunately clang adds ``/usr/local/include`` before @@ -177,8 +177,8 @@ use a separate ``arm-linux-gnueabihf`` toolchain. No target passed to clang ------------------------- -If clang is not given a target it will typically use the host target, this will -not understand the Arm assembly language files resulting in error messages such +If clang is not given a target, it will typically use the host target. This will +not understand the Arm assembly language files, resulting in error messages such as ``error: unknown directive .syntax unified``. You can check the clang invocation in the error message to see if there is no @@ -217,7 +217,7 @@ target to use is: * ``-DCMAKE_C_COMPILER_TARGET=arm-linux-gnueabi`` -Depending on whether you want to use floating point instructions or not you +Depending on whether you want to use floating point instructions or not, you may need extra c-flags such as ``-mfloat-abi=softfp`` for use of floating-point instructions, and ``-mfloat-abi=soft -mfpu=none`` for software floating-point emulation. @@ -241,7 +241,7 @@ To build and test the libraries using a similar method to Armv7-A is possible but more difficult. The main problems are: * There is not a ``qemu-arm`` user-mode emulator for bare-metal systems. - ``qemu-system-arm`` can be used but this is significantly more difficult + ``qemu-system-arm`` can be used, but this is significantly more difficult to setup. This document does not explain how to do this. * The targets to compile compiler-rt have the suffix ``-none-eabi``. This uses the BareMetal driver in clang and by default will not find the libraries @@ -252,8 +252,8 @@ that are supported on Armv7-A we can still get most of the value of running the tests using the same ``qemu-arm`` that we used for Armv7-A by building and running the test cases for Armv7-A but using the builtins compiled for Armv6-M, Armv7-M or Armv7E-M. This will test that the builtins can be linked -into a binary and execute the tests correctly but it will not catch if the -builtins use instructions that are supported on Armv7-A but not Armv6-M, +into a binary and execute the tests correctly, but it will not catch if the +builtins use instructions that are supported on Armv7-A but not on Armv6-M, Armv7-M and Armv7E-M. This requires a second ``arm-none-eabi`` toolchain for building the builtins. @@ -321,9 +321,9 @@ command for Armv7-A build and test:: The Armv6-M builtins will use the soft-float ABI. When compiling the tests for Armv7-A we must include ``"-mthumb -mfloat-abi=soft -mfpu=none"`` in the -test-c-flags. We must use an Armv7-A soft-float abi sysroot for ``qemu-arm``. +test-c-flags. We must use an Armv7-A soft-float ABI sysroot for ``qemu-arm``. -Depending on the linker used for the test cases you may encounter BuildAttribute +Depending on the linker used for the test cases, you may encounter BuildAttribute mismatches between the M-profile objects from compiler-rt and the A-profile objects from the test. The lld linker does not check the profile BuildAttribute so it can be used to link the tests by adding ``-fuse-ld=lld`` to the -- cgit v1.2.3 From cf85cf45236839b93ade5d7d777b3b2f81e873fb Mon Sep 17 00:00:00 2001 From: Ahmed Nour Date: Thu, 30 Oct 2025 22:07:00 +0300 Subject: [Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - allow insertps intrinsic to be used in constexp (#165513) Resolves #165161 --- clang/include/clang/Basic/BuiltinsX86.td | 3 ++- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 42 +++++++++++++++++++++++++---- clang/lib/AST/ExprConstant.cpp | 46 +++++++++++++++++++++++++++----- clang/test/CodeGen/X86/sse41-builtins.c | 10 +++++++ 4 files changed, 88 insertions(+), 13 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 500aa85fe535..9e877b92eac6 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -328,7 +328,6 @@ let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorW } let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; def roundps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Constant int)">; def roundss : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant int)">; def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">; @@ -342,6 +341,8 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def insertps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, " + "_Vector<4, float>, _Constant char)">; def ptestz128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">; def ptestc128 diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index b3ab82da5e01..8b57b963c538 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3411,7 +3411,7 @@ static bool interp__builtin_x86_byteshift( static bool interp__builtin_ia32_shuffle_generic( InterpState &S, CodePtr OpPC, const CallExpr *Call, - llvm::function_ref(unsigned, unsigned)> + llvm::function_ref(unsigned, unsigned)> GetSourceIndex) { assert(Call->getNumArgs() == 3); @@ -3428,8 +3428,19 @@ static bool interp__builtin_ia32_shuffle_generic( for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) { auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); - const Pointer &Src = (SrcVecIdx == 0) ? A : B; - TYPE_SWITCH(ElemT, { Dst.elem(DstIdx) = Src.elem(SrcIdx); }); + + if (SrcIdx < 0) { + // Zero out this element + if (ElemT == PT_Float) { + Dst.elem(DstIdx) = Floating( + S.getASTContext().getFloatTypeSemantics(VecT->getElementType())); + } else { + INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem(DstIdx) = T::from(0); }); + } + } else { + const Pointer &Src = (SrcVecIdx == 0) ? A : B; + TYPE_SWITCH(ElemT, { Dst.elem(DstIdx) = Src.elem(SrcIdx); }); + } } Dst.initializeAllElements(); @@ -4382,7 +4393,8 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0; unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; - return std::pair{SrcIdx, LaneOffset + Index}; + return std::pair{SrcIdx, + static_cast(LaneOffset + Index)}; }); case X86::BI__builtin_ia32_shufpd: case X86::BI__builtin_ia32_shufpd256: @@ -4400,7 +4412,27 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned SrcIdx = ElemInLane >= NumSelectableElems ? 1 : 0; unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; - return std::pair{SrcIdx, LaneOffset + Index}; + return std::pair{SrcIdx, + static_cast(LaneOffset + Index)}; + }); + case X86::BI__builtin_ia32_insertps128: + return interp__builtin_ia32_shuffle_generic( + S, OpPC, Call, [](unsigned DstIdx, unsigned Mask) { + // Bits [3:0]: zero mask - if bit is set, zero this element + if ((Mask & (1 << DstIdx)) != 0) { + return std::pair{0, -1}; + } + // Bits [7:6]: select element from source vector Y (0-3) + // Bits [5:4]: select destination position (0-3) + unsigned SrcElem = (Mask >> 6) & 0x3; + unsigned DstElem = (Mask >> 4) & 0x3; + if (DstIdx == DstElem) { + // Insert element from source vector (B) at this position + return std::pair{1, static_cast(SrcElem)}; + } else { + // Copy from destination vector (A) + return std::pair{0, static_cast(DstIdx)}; + } }); case X86::BI__builtin_ia32_pshufb128: case X86::BI__builtin_ia32_pshufb256: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index d0404b957ab0..97eeba8b9d6c 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11621,7 +11621,7 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result, static bool evalShuffleGeneric( EvalInfo &Info, const CallExpr *Call, APValue &Out, - llvm::function_ref(unsigned, unsigned)> + llvm::function_ref(unsigned, unsigned)> GetSourceIndex) { const auto *VT = Call->getType()->getAs(); @@ -11644,8 +11644,16 @@ static bool evalShuffleGeneric( for (unsigned DstIdx = 0; DstIdx != NumElts; ++DstIdx) { auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask); - const APValue &Src = (SrcVecIdx == 0) ? A : B; - ResultElements.push_back(Src.getVectorElt(SrcIdx)); + + if (SrcIdx < 0) { + // Zero out this element + QualType ElemTy = VT->getElementType(); + ResultElements.push_back( + APValue(APFloat::getZero(Info.Ctx.getFloatTypeSemantics(ElemTy)))); + } else { + const APValue &Src = (SrcVecIdx == 0) ? A : B; + ResultElements.push_back(Src.getVectorElt(SrcIdx)); + } } Out = APValue(ResultElements.data(), ResultElements.size()); @@ -12438,7 +12446,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (!evalShuffleGeneric( Info, E, R, [](unsigned DstIdx, - unsigned ShuffleMask) -> std::pair { + unsigned ShuffleMask) -> std::pair { constexpr unsigned LaneBits = 128u; unsigned NumElemPerLane = LaneBits / 32; unsigned NumSelectableElems = NumElemPerLane / 2; @@ -12451,7 +12459,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1; unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; - return {SrcIdx, LaneOffset + Index}; + return {SrcIdx, static_cast(LaneOffset + Index)}; })) return false; return Success(R, E); @@ -12463,7 +12471,7 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (!evalShuffleGeneric( Info, E, R, [](unsigned DstIdx, - unsigned ShuffleMask) -> std::pair { + unsigned ShuffleMask) -> std::pair { constexpr unsigned LaneBits = 128u; unsigned NumElemPerLane = LaneBits / 64; unsigned NumSelectableElems = NumElemPerLane / 2; @@ -12476,7 +12484,31 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { unsigned BitIndex = (DstIdx * BitsPerElem) % MaskBits; unsigned SrcIdx = (ElemInLane < NumSelectableElems) ? 0 : 1; unsigned Index = (ShuffleMask >> BitIndex) & IndexMask; - return {SrcIdx, LaneOffset + Index}; + return {SrcIdx, static_cast(LaneOffset + Index)}; + })) + return false; + return Success(R, E); + } + case X86::BI__builtin_ia32_insertps128: { + APValue R; + if (!evalShuffleGeneric( + Info, E, R, + [](unsigned DstIdx, unsigned Mask) -> std::pair { + // Bits [3:0]: zero mask - if bit is set, zero this element + if ((Mask & (1 << DstIdx)) != 0) { + return {0, -1}; + } + // Bits [7:6]: select element from source vector Y (0-3) + // Bits [5:4]: select destination position (0-3) + unsigned SrcElem = (Mask >> 6) & 0x3; + unsigned DstElem = (Mask >> 4) & 0x3; + if (DstIdx == DstElem) { + // Insert element from source vector (B) at this position + return {1, static_cast(SrcElem)}; + } else { + // Copy from destination vector (A) + return {0, static_cast(DstIdx)}; + } })) return false; return Success(R, E); diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index 62cd392824bb..35fa65a99836 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -307,6 +307,16 @@ __m128 test_mm_insert_ps(__m128 x, __m128 y) { return _mm_insert_ps(x, y, 4); } +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x10), 1.0f, 10.0f, 3.0f, 4.0f))); // Insert Y[0] into X[1] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x00), 10.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[0] into X[0] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x20), 1.0f, 2.0f, 10.0f, 4.0f))); // Insert Y[0] into X[2] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x30), 1.0f, 2.0f, 3.0f, 10.0f))); // Insert Y[0] into X[3] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x80), 30.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[2] into X[0] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x01), 0.0f, 2.0f, 3.0f, 4.0f))); // Insert Y[0] into X[0], zero X[0] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x0A), 10.0f, 0.0f, 3.0f, 0.0f))); // Insert Y[0] into X[0], zero X[1] and X[3] +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0x0F), 0.0f, 0.0f, 0.0f, 0.0f))); // Insert Y[0] into X[0], zero all +TEST_CONSTEXPR((match_m128(_mm_insert_ps(((__m128)(__v4sf){1.0f, 2.0f, 3.0f, 4.0f}), ((__m128)(__v4sf){10.0f, 20.0f, 30.0f, 40.0f}), 0xCF), 0.0f, 0.0f, 0.0f, 0.0f))); // Insert Y[3] into X[0], zero all + __m128i test_mm_max_epi8(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_max_epi8 // CHECK: call <16 x i8> @llvm.smax.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) -- cgit v1.2.3 From 3d42b48d5df80b89b630d0602b1c5d1e3d4b4683 Mon Sep 17 00:00:00 2001 From: lonely eagle <2020382038@qq.com> Date: Fri, 31 Oct 2025 03:07:21 +0800 Subject: [mlir][bufferize] Use resolveCallableInTable to cleanup getCalledFunction (NFC) (#165658) Simplify the implementation of `getCalledFunction` using `resolveCallableInTable`. --- .../Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp | 7 +------ .../Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp | 6 +----- 2 files changed, 2 insertions(+), 11 deletions(-) diff --git a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp index d9d69342e42a..8655ed3005a9 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/FuncBufferizableOpInterfaceImpl.cpp @@ -95,12 +95,7 @@ getBufferizedFunctionArgType(FuncOp funcOp, int64_t index, /// Return the FuncOp called by `callOp`. static FuncOp getCalledFunction(CallOpInterface callOp, SymbolTableCollection &symbolTables) { - SymbolRefAttr sym = - llvm::dyn_cast_if_present(callOp.getCallableForCallee()); - if (!sym) - return nullptr; - return dyn_cast_or_null( - symbolTables.lookupNearestSymbolFrom(callOp, sym)); + return dyn_cast_or_null(callOp.resolveCallableInTable(&symbolTables)); } /// Return the FuncOp called by `callOp`. diff --git a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp index aa53f94fe839..c233e24c2a15 100644 --- a/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp +++ b/mlir/lib/Dialect/Bufferization/Transforms/OneShotModuleBufferize.cpp @@ -285,12 +285,8 @@ static void removeBufferizationAttributes(BlockArgument bbArg) { static func::FuncOp getCalledFunction(func::CallOp callOp, mlir::SymbolTableCollection &symbolTable) { - SymbolRefAttr sym = - llvm::dyn_cast_if_present(callOp.getCallableForCallee()); - if (!sym) - return nullptr; return dyn_cast_or_null( - symbolTable.lookupNearestSymbolFrom(callOp, sym)); + callOp.resolveCallableInTable(&symbolTable)); } /// Return "true" if the given function signature has tensor semantics. -- cgit v1.2.3 From ba0be89cd2c08dfff483cbcbdb770dce44031b6c Mon Sep 17 00:00:00 2001 From: Jakub Kuderski Date: Thu, 30 Oct 2025 15:10:59 -0400 Subject: [mlir] Simplify Default cases in type switches. NFC. (#165767) Use default values instead of lambdas when possible. `std::nullopt` and `nullptr` can be used now because of https://github.com/llvm/llvm-project/pull/165724. --- mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp | 2 +- mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp | 2 +- mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp | 2 +- mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp | 2 +- mlir/lib/Dialect/Arith/IR/ArithOps.cpp | 2 +- mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp | 6 +++--- mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp | 2 +- mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 2 +- mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp | 2 +- mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp | 2 +- mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp | 2 +- mlir/lib/Dialect/Linalg/Transforms/SimplifyDepthwiseConv.cpp | 2 +- mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp | 2 +- mlir/lib/Dialect/MemRef/Transforms/FlattenMemRefs.cpp | 2 +- mlir/lib/Dialect/SCF/Transforms/LoopCanonicalization.cpp | 2 +- mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp | 4 ++-- mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp | 2 +- mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp | 2 +- mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp | 2 +- mlir/lib/TableGen/Type.cpp | 2 +- mlir/lib/Target/LLVMIR/DebugTranslation.cpp | 6 +++--- 21 files changed, 26 insertions(+), 26 deletions(-) diff --git a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp index 41e333c621ed..3a307a0756d9 100644 --- a/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp +++ b/mlir/lib/Conversion/AMDGPUToROCDL/AMDGPUToROCDL.cpp @@ -935,7 +935,7 @@ static std::optional mfmaTypeSelectCode(Type mlirElemType) { .Case([](Float6E2M3FNType) { return 2u; }) .Case([](Float6E3M2FNType) { return 3u; }) .Case([](Float4E2M1FNType) { return 4u; }) - .Default([](Type) { return std::nullopt; }); + .Default(std::nullopt); } /// If there is a scaled MFMA instruction for the input element types `aType` diff --git a/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp b/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp index 247dba101cfc..cfdcd9cc2d86 100644 --- a/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp +++ b/mlir/lib/Conversion/ArithToAMDGPU/ArithToAMDGPU.cpp @@ -432,7 +432,7 @@ static Value getOriginalVectorValue(Value value) { current = op.getSource(); return false; }) - .Default([](Operation *) { return false; }); + .Default(false); if (!skipOp) { break; diff --git a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp index 25f1e1b184d6..425594b3382f 100644 --- a/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp +++ b/mlir/lib/Conversion/GPUToLLVMSPV/GPUToLLVMSPV.cpp @@ -259,7 +259,7 @@ struct GPUShuffleConversion final : ConvertOpToLLVMPattern { } return std::nullopt; }) - .Default([](auto) { return std::nullopt; }); + .Default(std::nullopt); } static std::optional getFuncName(gpu::ShuffleMode mode, diff --git a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp index e2c7d803e5a5..91c1aa55fdb4 100644 --- a/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp +++ b/mlir/lib/Conversion/VectorToXeGPU/VectorToXeGPU.cpp @@ -46,7 +46,7 @@ static bool isZeroConstant(Value val) { [](auto floatAttr) { return floatAttr.getValue().isZero(); }) .Case( [](auto intAttr) { return intAttr.getValue().isZero(); }) - .Default([](auto) { return false; }); + .Default(false); } static LogicalResult storeLoadPreconditions(PatternRewriter &rewriter, diff --git a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp index 898d76ce8d9b..980442efdf70 100644 --- a/mlir/lib/Dialect/Arith/IR/ArithOps.cpp +++ b/mlir/lib/Dialect/Arith/IR/ArithOps.cpp @@ -2751,7 +2751,7 @@ std::optional mlir::arith::getNeutralElement(Operation *op) { .Case([](arith::MaxSIOp op) { return AtomicRMWKind::maxs; }) .Case([](arith::MinSIOp op) { return AtomicRMWKind::mins; }) .Case([](arith::MulIOp op) { return AtomicRMWKind::muli; }) - .Default([](Operation *op) { return std::nullopt; }); + .Default(std::nullopt); if (!maybeKind) { return std::nullopt; } diff --git a/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp b/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp index d2c2138d6163..025d1acf8d6b 100644 --- a/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/EliminateBarriers.cpp @@ -330,7 +330,7 @@ static Value getBase(Value v) { v = op.getSrc(); return true; }) - .Default([](Operation *) { return false; }); + .Default(false); if (!shouldContinue) break; } @@ -354,7 +354,7 @@ static Value propagatesCapture(Operation *op) { .Case([](memref::TransposeOp transpose) { return transpose.getIn(); }) .Case( [](auto op) { return op.getSrc(); }) - .Default([](Operation *) { return Value(); }); + .Default(nullptr); } /// Returns `true` if the given operation is known to capture the given value, @@ -371,7 +371,7 @@ static std::optional getKnownCapturingStatus(Operation *op, Value v) { // These operations are known not to capture. .Case([](memref::DeallocOp) { return false; }) // By default, we don't know anything. - .Default([](Operation *) { return std::nullopt; }); + .Default(std::nullopt); } /// Returns `true` if the value may be captured by any of its users, i.e., if diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp index 3eae67f4c1f9..2731069d6ef5 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp @@ -698,7 +698,7 @@ static void destructureIndices(Type currType, ArrayRef indices, return structType.getBody()[memberIndex]; return nullptr; }) - .Default(Type(nullptr)); + .Default(nullptr); } } diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp index cee943d2d86c..7d9058c26256 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp @@ -1111,7 +1111,7 @@ memsetCanUsesBeRemoved(MemsetIntr op, const MemorySlot &slot, .Case([](auto type) { return type.getWidth() % 8 == 0 && type.getWidth() > 0; }) - .Default([](Type) { return false; }); + .Default(false); if (!canConvertType) return false; diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp index ac35eea66e9d..ce93d18f56d3 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp @@ -798,7 +798,7 @@ static bool isCompatibleImpl(Type type, DenseSet &compatibleTypes) { // clang-format on .Case( [](Type type) { return isCompatiblePtrType(type); }) - .Default([](Type) { return false; }); + .Default(false); if (!result) compatibleTypes.erase(type); diff --git a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp index 8b8924448633..b09112bcf0bb 100644 --- a/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp +++ b/mlir/lib/Dialect/Linalg/TransformOps/LinalgTransformOps.cpp @@ -4499,7 +4499,7 @@ DiagnosedSilenceableFailure transform::DecomposeWinogradOp::applyToOne( maybeTransformed = decomposeWinogradOutputTransformOp(rewriter, op); return true; }) - .Default([&](Operation *op) { return false; }); + .Default(false); if (!supported) { DiagnosedSilenceableFailure diag = diff --git a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp index f05ffa8334d9..6519c4f64dd0 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp @@ -322,7 +322,7 @@ promoteSubViews(ImplicitLocOpBuilder &b, tmp = arith::ConstantOp::create(b, IntegerAttr::get(et, 0)); return complex::CreateOp::create(b, t, tmp, tmp); }) - .Default([](auto) { return Value(); }); + .Default(nullptr); if (!fillVal) return failure(); linalg::FillOp::create(b, fillVal, promotionInfo->fullLocalView); diff --git a/mlir/lib/Dialect/Linalg/Transforms/SimplifyDepthwiseConv.cpp b/mlir/lib/Dialect/Linalg/Transforms/SimplifyDepthwiseConv.cpp index 27ccf3c2ba14..6becc1f29afb 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/SimplifyDepthwiseConv.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/SimplifyDepthwiseConv.cpp @@ -89,7 +89,7 @@ matchAndReplaceDepthwiseConv(Operation *operation, Value input, Value kernel, ValueRange{input, collapsedKernel, iZp, kZp}, ValueRange{collapsedInit}, stride, dilation); }) - .Default([](Operation *op) { return nullptr; }); + .Default(nullptr); if (!newConv) return failure(); for (auto attr : preservedAttrs) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp index 0f317eac8fa4..cb6199f026e0 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -656,7 +656,7 @@ mlir::linalg::getCombinerOpKind(Operation *combinerOp) { [&](auto op) { return CombiningKind::MUL; }) .Case([&](auto op) { return CombiningKind::OR; }) .Case([&](auto op) { return CombiningKind::XOR; }) - .Default([&](auto op) { return std::nullopt; }); + .Default(std::nullopt); } /// Check whether `outputOperand` is a reduction with a single combiner diff --git a/mlir/lib/Dialect/MemRef/Transforms/FlattenMemRefs.cpp b/mlir/lib/Dialect/MemRef/Transforms/FlattenMemRefs.cpp index 1208fddf37e0..e6850890bf8f 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/FlattenMemRefs.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/FlattenMemRefs.cpp @@ -104,7 +104,7 @@ static Value getTargetMemref(Operation *op) { vector::MaskedStoreOp, vector::TransferReadOp, vector::TransferWriteOp>( [](auto op) { return op.getBase(); }) - .Default([](auto) { return Value{}; }); + .Default(nullptr); } template diff --git a/mlir/lib/Dialect/SCF/Transforms/LoopCanonicalization.cpp b/mlir/lib/Dialect/SCF/Transforms/LoopCanonicalization.cpp index 4ebd90dbcc1d..d380c46f7fbe 100644 --- a/mlir/lib/Dialect/SCF/Transforms/LoopCanonicalization.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/LoopCanonicalization.cpp @@ -55,7 +55,7 @@ static bool isShapePreserving(ForOp forOp, int64_t arg) { ? forOp.getInitArgs()[opResult.getResultNumber()] : Value(); }) - .Default([&](auto op) { return Value(); }); + .Default(nullptr); } return false; } diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp index 0c8114d5e957..938952ed273c 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVOps.cpp @@ -346,7 +346,7 @@ LogicalResult spirv::CompositeConstructOp::verify() { llvm::TypeSwitch(getType()) .Case( [](auto coopType) { return coopType.getElementType(); }) - .Default([](Type) { return nullptr; }); + .Default(nullptr); // Case 1. -- matrices. if (coopElementType) { @@ -1708,7 +1708,7 @@ LogicalResult spirv::MatrixTimesScalarOp::verify() { llvm::TypeSwitch(getMatrix().getType()) .Case( [](auto matrixType) { return matrixType.getElementType(); }) - .Default([](Type) { return nullptr; }); + .Default(nullptr); assert(elementType && "Unhandled type"); diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp index f895807ea1d1..d1e275d590f7 100644 --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVTypes.cpp @@ -731,7 +731,7 @@ std::optional SPIRVType::getSizeInBytes() { return *elementSize * type.getNumElements(); return std::nullopt; }) - .Default(std::optional()); + .Default(std::nullopt); } //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp index 88e1ab6ab1e4..cb9b7f6ec2fd 100644 --- a/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp +++ b/mlir/lib/Dialect/SPIRV/Transforms/SPIRVConversion.cpp @@ -1467,7 +1467,7 @@ mlir::spirv::getNativeVectorShape(Operation *op) { return TypeSwitch>>(op) .Case( [](auto typedOp) { return getNativeVectorShapeImpl(typedOp); }) - .Default([](Operation *) { return std::nullopt; }); + .Default(std::nullopt); } LogicalResult mlir::spirv::unrollVectorsInSignatures(Operation *op) { diff --git a/mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp b/mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp index 69e649d2eebe..bc4f5a5ac7f2 100644 --- a/mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp +++ b/mlir/lib/Dialect/Tensor/Transforms/RewriteAsConstant.cpp @@ -189,7 +189,7 @@ struct PadOpToConstant final : public OpRewritePattern { return constantFoldPadOp( rewriter, loc, inputAttr, integerAttr, *lowPad, *highPad); }) - .Default(Value()); + .Default(nullptr); if (!newOp) return rewriter.notifyMatchFailure(padTensorOp, diff --git a/mlir/lib/TableGen/Type.cpp b/mlir/lib/TableGen/Type.cpp index b31377e0de3e..0f1bf83d1987 100644 --- a/mlir/lib/TableGen/Type.cpp +++ b/mlir/lib/TableGen/Type.cpp @@ -56,7 +56,7 @@ std::optional TypeConstraint::getBuilderCall() const { StringRef value = init->getValue(); return value.empty() ? std::optional() : value; }) - .Default([](auto *) { return std::nullopt; }); + .Default(std::nullopt); } // Return the C++ type for this type (which may just be ::mlir::Type). diff --git a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp index eeb87253e5eb..e3bcf2749be1 100644 --- a/mlir/lib/Target/LLVMIR/DebugTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/DebugTranslation.cpp @@ -390,7 +390,7 @@ llvm::DISubrange *DebugTranslation::translateImpl(DISubrangeAttr attr) { .Case<>([&](LLVM::DIGlobalVariableAttr global) { return translate(global); }) - .Default([&](Attribute attr) { return nullptr; }); + .Default(nullptr); return metadata; }; return llvm::DISubrange::get(llvmCtx, getMetadataOrNull(attr.getCount()), @@ -420,10 +420,10 @@ DebugTranslation::translateImpl(DIGenericSubrangeAttr attr) { .Case([&](LLVM::DILocalVariableAttr local) { return translate(local); }) - .Case<>([&](LLVM::DIGlobalVariableAttr global) { + .Case([&](LLVM::DIGlobalVariableAttr global) { return translate(global); }) - .Default([&](Attribute attr) { return nullptr; }); + .Default(nullptr); return metadata; }; return llvm::DIGenericSubrange::get(llvmCtx, -- cgit v1.2.3 From 8067b5cff73ea96189b5bbca1110cfe4dce03552 Mon Sep 17 00:00:00 2001 From: Gedare Bloom Date: Thu, 30 Oct 2025 13:24:44 -0600 Subject: [clang-format] Add BreakAfterOpenBracket* and BreakBeforeCloseBracket* (#108332) Replace the `AlwaysBreak` and `BlockIndent` suboptions of `AlignAfterOpenBracket` with new style options `BreakAfterOpenBracket*` and `BreakBeforeCloseBracket*` for `*` in `BracedList` for braced list initializers, `if` for if conditional statements, `Loop` for loop control statements (for/while), `Switch` for switch statements, and `Function` for function calls/declarations/definitions. Deprecates `AlwaysBreak` and `BlockIndent`. Fixes #67738 Fixes #79176 Fixes #80123 Fixes #151844 --- clang/docs/ClangFormatStyleOptions.rst | 192 +++++++++++++++++++++------ clang/docs/ReleaseNotes.rst | 8 ++ clang/include/clang/Format/Format.h | 182 +++++++++++++++++++------ clang/lib/Format/ContinuationIndenter.cpp | 97 +++++++++----- clang/lib/Format/Format.cpp | 111 ++++++++++++++-- clang/lib/Format/FormatToken.cpp | 4 +- clang/lib/Format/FormatToken.h | 6 + clang/lib/Format/TokenAnnotator.cpp | 25 ++-- clang/unittests/Format/AlignBracketsTest.cpp | 58 +++++--- clang/unittests/Format/ConfigParseTest.cpp | 37 ++++-- clang/unittests/Format/FormatTest.cpp | 36 ++--- clang/unittests/Format/FormatTestJS.cpp | 2 +- 12 files changed, 566 insertions(+), 192 deletions(-) diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 570cab262c11..0b4a4849f6cc 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -197,57 +197,29 @@ the configuration (without a prefix: ``Auto``). .. _AlignAfterOpenBracket: -**AlignAfterOpenBracket** (``BracketAlignmentStyle``) :versionbadge:`clang-format 3.8` :ref:`¶ ` +**AlignAfterOpenBracket** (``Boolean``) :versionbadge:`clang-format 3.8` :ref:`¶ ` If ``true``, horizontally aligns arguments after an open bracket. - This applies to round brackets (parentheses), angle brackets and square - brackets. - - Possible values: - - * ``BAS_Align`` (in configuration: ``Align``) - Align parameters on the open bracket, e.g.: - - .. code-block:: c++ - - someLongFunction(argument1, - argument2); - - * ``BAS_DontAlign`` (in configuration: ``DontAlign``) - Don't align, instead use ``ContinuationIndentWidth``, e.g.: - - .. code-block:: c++ - - someLongFunction(argument1, - argument2); - - * ``BAS_AlwaysBreak`` (in configuration: ``AlwaysBreak``) - Always break after an open bracket, if the parameters don't fit - on a single line, e.g.: - - .. code-block:: c++ - someLongFunction( - argument1, argument2); - - * ``BAS_BlockIndent`` (in configuration: ``BlockIndent``) - Always break after an open bracket, if the parameters don't fit - on a single line. Closing brackets will be placed on a new line. - E.g.: - - .. code-block:: c++ + .. code-block:: c++ - someLongFunction( - argument1, argument2 - ) + true: vs. false + someLongFunction(argument1, someLongFunction(argument1, + argument2); argument2); - .. note:: - - This currently only applies to braced initializer lists (when - ``Cpp11BracedListStyle`` is not ``Block``) and parentheses. + .. note:: + As of clang-format 22 this option is a bool with the previous + option of ``Align`` replaced with ``true``, ``DontAlign`` replaced + with ``false``, and the options of ``AlwaysBreak`` and ``BlockIndent`` + replaced with ``true`` and with setting of new style options using + ``BreakAfterOpenBracketBracedList``, ``BreakAfterOpenBracketFunction``, + ``BreakAfterOpenBracketIf``, ``BreakBeforeCloseBracketBracedList``, + ``BreakBeforeCloseBracketFunction``, and ``BreakBeforeCloseBracketIf``. + This applies to round brackets (parentheses), angle brackets and square + brackets. .. _AlignArrayOfStructures: @@ -2746,6 +2718,67 @@ the configuration (without a prefix: ``Auto``). @Mock DataLoad loader; +.. _BreakAfterOpenBracketBracedList: + +**BreakAfterOpenBracketBracedList** (``Boolean``) :versionbadge:`clang-format 22` :ref:`¶ ` + Force break after the left bracket of a braced initializer list (when + ``Cpp11BracedListStyle`` is ``true``) when the list exceeds the column + limit. + + .. code-block:: c++ + + true: false: + vector x { vs. vector x {1, + 1, 2, 3} 2, 3} + +.. _BreakAfterOpenBracketFunction: + +**BreakAfterOpenBracketFunction** (``Boolean``) :versionbadge:`clang-format 22` :ref:`¶ ` + Force break after the left parenthesis of a function (declaration, + definition, call) when the parameters exceed the column limit. + + .. code-block:: c++ + + true: false: + foo ( vs. foo (a, + a , b) b) + +.. _BreakAfterOpenBracketIf: + +**BreakAfterOpenBracketIf** (``Boolean``) :versionbadge:`clang-format 22` :ref:`¶ ` + Force break after the left parenthesis of an if control statement + when the expression exceeds the column limit. + + .. code-block:: c++ + + true: false: + if constexpr ( vs. if constexpr (a || + a || b) b) + +.. _BreakAfterOpenBracketLoop: + +**BreakAfterOpenBracketLoop** (``Boolean``) :versionbadge:`clang-format 22` :ref:`¶ ` + Force break after the left parenthesis of a loop control statement + when the expression exceeds the column limit. + + .. code-block:: c++ + + true: false: + while ( vs. while (a && + a && b) { b) { + +.. _BreakAfterOpenBracketSwitch: + +**BreakAfterOpenBracketSwitch** (``Boolean``) :versionbadge:`clang-format 22` :ref:`¶ ` + Force break after the left parenthesis of a switch control statement + when the expression exceeds the column limit. + + .. code-block:: c++ + + true: false: + switch ( vs. switch (a + + a + b) { b) { + .. _BreakAfterReturnType: **BreakAfterReturnType** (``ReturnTypeBreakingStyle``) :versionbadge:`clang-format 19` :ref:`¶ ` @@ -3383,6 +3416,79 @@ the configuration (without a prefix: ``Auto``). +.. _BreakBeforeCloseBracketBracedList: + +**BreakBeforeCloseBracketBracedList** (``Boolean``) :versionbadge:`clang-format 22` :ref:`¶ ` + Force break before the right bracket of a braced initializer list (when + ``Cpp11BracedListStyle`` is ``true``) when the list exceeds the column + limit. The break before the right bracket is only made if there is a + break after the opening bracket. + + .. code-block:: c++ + + true: false: + vector x { vs. vector x { + 1, 2, 3 1, 2, 3} + } + +.. _BreakBeforeCloseBracketFunction: + +**BreakBeforeCloseBracketFunction** (``Boolean``) :versionbadge:`clang-format 22` :ref:`¶ ` + Force break before the right parenthesis of a function (declaration, + definition, call) when the parameters exceed the column limit. + + .. code-block:: c++ + + true: false: + foo ( vs. foo ( + a , b a , b) + ) + +.. _BreakBeforeCloseBracketIf: + +**BreakBeforeCloseBracketIf** (``Boolean``) :versionbadge:`clang-format 22` :ref:`¶ ` + Force break before the right parenthesis of an if control statement + when the expression exceeds the column limit. The break before the + closing parenthesis is only made if there is a break after the opening + parenthesis. + + .. code-block:: c++ + + true: false: + if constexpr ( vs. if constexpr ( + a || b a || b ) + ) + +.. _BreakBeforeCloseBracketLoop: + +**BreakBeforeCloseBracketLoop** (``Boolean``) :versionbadge:`clang-format 22` :ref:`¶ ` + Force break before the right parenthesis of a loop control statement + when the expression exceeds the column limit. The break before the + closing parenthesis is only made if there is a break after the opening + parenthesis. + + .. code-block:: c++ + + true: false: + while ( vs. while ( + a && b a && b) { + ) { + +.. _BreakBeforeCloseBracketSwitch: + +**BreakBeforeCloseBracketSwitch** (``Boolean``) :versionbadge:`clang-format 22` :ref:`¶ ` + Force break before the right parenthesis of a switch control statement + when the expression exceeds the column limit. The break before the + closing parenthesis is only made if there is a break after the opening + parenthesis. + + .. code-block:: c++ + + true: false: + switch ( vs. switch ( + a + b a + b) { + ) { + .. _BreakBeforeConceptDeclarations: **BreakBeforeConceptDeclarations** (``BreakBeforeConceptDeclarationsStyle``) :versionbadge:`clang-format 12` :ref:`¶ ` diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 8435f367029a..ba737b9efb00 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -613,6 +613,14 @@ clang-format literals. - Add ``Leave`` suboption to ``IndentPPDirectives``. - Add ``AllowBreakBeforeQtProperty`` option. +- Add ``BreakAfterOpenBracketBracedList'', ``BreakAfterOpenBracketFunction'', + ``BreakAfterOpenBracketIf``, ``BreakAfterOpenBracketLoop``, + ``BreakAfterOpenBracketSwitch``, ``BreakBeforeCloseBracketBracedList'', + ``BreakBeforeCloseBracketFunction``, ``BreakBeforeCloseBracketIf``, + ``BreakBeforeCloseBracketLoop``, ``BreakBeforeCloseBracketSwitch`` options. +- Deprecate ``AlwaysBreak`` and ``BlockIndent`` suboptions from the + ``AlignAfterOpenBracket`` option, and make ``AlignAfterOpenBracket`` a + ``bool`` type. libclang -------- diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 2852c4a2916a..f246defc1fe8 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -62,49 +62,28 @@ struct FormatStyle { /// \version 3.3 int AccessModifierOffset; - /// Different styles for aligning after open brackets. - enum BracketAlignmentStyle : int8_t { - /// Align parameters on the open bracket, e.g.: - /// \code - /// someLongFunction(argument1, - /// argument2); - /// \endcode - BAS_Align, - /// Don't align, instead use ``ContinuationIndentWidth``, e.g.: - /// \code - /// someLongFunction(argument1, - /// argument2); - /// \endcode - BAS_DontAlign, - /// Always break after an open bracket, if the parameters don't fit - /// on a single line, e.g.: - /// \code - /// someLongFunction( - /// argument1, argument2); - /// \endcode - BAS_AlwaysBreak, - /// Always break after an open bracket, if the parameters don't fit - /// on a single line. Closing brackets will be placed on a new line. - /// E.g.: - /// \code - /// someLongFunction( - /// argument1, argument2 - /// ) - /// \endcode - /// - /// \note - /// This currently only applies to braced initializer lists (when - /// ``Cpp11BracedListStyle`` is not ``Block``) and parentheses. - /// \endnote - BAS_BlockIndent, - }; - /// If ``true``, horizontally aligns arguments after an open bracket. /// + /// \code + /// true: vs. false + /// someLongFunction(argument1, someLongFunction(argument1, + /// argument2); argument2); + /// \endcode + /// + /// \note + /// As of clang-format 22 this option is a bool with the previous + /// option of ``Align`` replaced with ``true``, ``DontAlign`` replaced + /// with ``false``, and the options of ``AlwaysBreak`` and ``BlockIndent`` + /// replaced with ``true`` and with setting of new style options using + /// ``BreakAfterOpenBracketBracedList``, ``BreakAfterOpenBracketFunction``, + /// ``BreakAfterOpenBracketIf``, ``BreakBeforeCloseBracketBracedList``, + /// ``BreakBeforeCloseBracketFunction``, and ``BreakBeforeCloseBracketIf``. + /// \endnote + /// /// This applies to round brackets (parentheses), angle brackets and square /// brackets. /// \version 3.8 - BracketAlignmentStyle AlignAfterOpenBracket; + bool AlignAfterOpenBracket; /// Different style for aligning array initializers. enum ArrayInitializerAlignmentStyle : int8_t { @@ -1708,6 +1687,57 @@ struct FormatStyle { /// \version 16 AttributeBreakingStyle BreakAfterAttributes; + /// Force break after the left bracket of a braced initializer list (when + /// ``Cpp11BracedListStyle`` is ``true``) when the list exceeds the column + /// limit. + /// \code + /// true: false: + /// vector x { vs. vector x {1, + /// 1, 2, 3} 2, 3} + /// \endcode + /// \version 22 + bool BreakAfterOpenBracketBracedList; + + /// Force break after the left parenthesis of a function (declaration, + /// definition, call) when the parameters exceed the column limit. + /// \code + /// true: false: + /// foo ( vs. foo (a, + /// a , b) b) + /// \endcode + /// \version 22 + bool BreakAfterOpenBracketFunction; + + /// Force break after the left parenthesis of an if control statement + /// when the expression exceeds the column limit. + /// \code + /// true: false: + /// if constexpr ( vs. if constexpr (a || + /// a || b) b) + /// \endcode + /// \version 22 + bool BreakAfterOpenBracketIf; + + /// Force break after the left parenthesis of a loop control statement + /// when the expression exceeds the column limit. + /// \code + /// true: false: + /// while ( vs. while (a && + /// a && b) { b) { + /// \endcode + /// \version 22 + bool BreakAfterOpenBracketLoop; + + /// Force break after the left parenthesis of a switch control statement + /// when the expression exceeds the column limit. + /// \code + /// true: false: + /// switch ( vs. switch (a + + /// a + b) { b) { + /// \endcode + /// \version 22 + bool BreakAfterOpenBracketSwitch; + /// The function declaration return type breaking style to use. /// \version 19 ReturnTypeBreakingStyle BreakAfterReturnType; @@ -2221,6 +2251,69 @@ struct FormatStyle { /// \version 3.7 BraceBreakingStyle BreakBeforeBraces; + /// Force break before the right bracket of a braced initializer list (when + /// ``Cpp11BracedListStyle`` is ``true``) when the list exceeds the column + /// limit. The break before the right bracket is only made if there is a + /// break after the opening bracket. + /// \code + /// true: false: + /// vector x { vs. vector x { + /// 1, 2, 3 1, 2, 3} + /// } + /// \endcode + /// \version 22 + bool BreakBeforeCloseBracketBracedList; + + /// Force break before the right parenthesis of a function (declaration, + /// definition, call) when the parameters exceed the column limit. + /// \code + /// true: false: + /// foo ( vs. foo ( + /// a , b a , b) + /// ) + /// \endcode + /// \version 22 + bool BreakBeforeCloseBracketFunction; + + /// Force break before the right parenthesis of an if control statement + /// when the expression exceeds the column limit. The break before the + /// closing parenthesis is only made if there is a break after the opening + /// parenthesis. + /// \code + /// true: false: + /// if constexpr ( vs. if constexpr ( + /// a || b a || b ) + /// ) + /// \endcode + /// \version 22 + bool BreakBeforeCloseBracketIf; + + /// Force break before the right parenthesis of a loop control statement + /// when the expression exceeds the column limit. The break before the + /// closing parenthesis is only made if there is a break after the opening + /// parenthesis. + /// \code + /// true: false: + /// while ( vs. while ( + /// a && b a && b) { + /// ) { + /// \endcode + /// \version 22 + bool BreakBeforeCloseBracketLoop; + + /// Force break before the right parenthesis of a switch control statement + /// when the expression exceeds the column limit. The break before the + /// closing parenthesis is only made if there is a break after the opening + /// parenthesis. + /// \code + /// true: false: + /// switch ( vs. switch ( + /// a + b a + b) { + /// ) { + /// \endcode + /// \version 22 + bool BreakBeforeCloseBracketSwitch; + /// Different ways to break before concept declarations. enum BreakBeforeConceptDeclarationsStyle : int8_t { /// Keep the template declaration line together with ``concept``. @@ -5530,10 +5623,23 @@ struct FormatStyle { BreakAdjacentStringLiterals == R.BreakAdjacentStringLiterals && BreakAfterAttributes == R.BreakAfterAttributes && BreakAfterJavaFieldAnnotations == R.BreakAfterJavaFieldAnnotations && + BreakAfterOpenBracketBracedList == + R.BreakAfterOpenBracketBracedList && + BreakAfterOpenBracketFunction == R.BreakAfterOpenBracketFunction && + BreakAfterOpenBracketIf == R.BreakAfterOpenBracketIf && + BreakAfterOpenBracketLoop == R.BreakAfterOpenBracketLoop && + BreakAfterOpenBracketSwitch == R.BreakAfterOpenBracketSwitch && BreakAfterReturnType == R.BreakAfterReturnType && BreakArrays == R.BreakArrays && BreakBeforeBinaryOperators == R.BreakBeforeBinaryOperators && BreakBeforeBraces == R.BreakBeforeBraces && + BreakBeforeCloseBracketBracedList == + R.BreakBeforeCloseBracketBracedList && + BreakBeforeCloseBracketFunction == + R.BreakBeforeCloseBracketFunction && + BreakBeforeCloseBracketIf == R.BreakBeforeCloseBracketIf && + BreakBeforeCloseBracketLoop == R.BreakBeforeCloseBracketLoop && + BreakBeforeCloseBracketSwitch == R.BreakBeforeCloseBracketSwitch && BreakBeforeConceptDeclarations == R.BreakBeforeConceptDeclarations && BreakBeforeInlineASMColon == R.BreakBeforeInlineASMColon && BreakBeforeTemplateCloser == R.BreakBeforeTemplateCloser && diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index e5abf833194d..9ab024a03fbd 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -356,9 +356,11 @@ bool ContinuationIndenter::canBreak(const LineState &State) { return CurrentState.BreakBeforeClosingBrace; } - // Allow breaking before the right parens with block indentation if there was - // a break after the left parens, which is tracked by BreakBeforeClosingParen. - if (Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent && + // Check need to break before the right parens if there was a break after + // the left parens, which is tracked by BreakBeforeClosingParen. + if ((Style.BreakBeforeCloseBracketFunction || + Style.BreakBeforeCloseBracketIf || Style.BreakBeforeCloseBracketLoop || + Style.BreakBeforeCloseBracketSwitch) && Current.is(tok::r_paren)) { return CurrentState.BreakBeforeClosingParen; } @@ -837,32 +839,38 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, return Tok.is(tok::l_brace) && Tok.isNot(BK_Block) && Style.Cpp11BracedListStyle != FormatStyle::BLS_Block; }; - if (Tok.isNoneOf(tok::l_paren, TT_TemplateOpener, tok::l_square) && - !IsStartOfBracedList()) { + if (IsStartOfBracedList()) + return Style.BreakAfterOpenBracketBracedList; + if (Tok.isNoneOf(tok::l_paren, TT_TemplateOpener, tok::l_square)) return false; - } if (!Tok.Previous) return true; if (Tok.Previous->isIf()) - return Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak; - return Tok.Previous->isNoneOf(TT_CastRParen, tok::kw_for, tok::kw_while, - tok::kw_switch) && - !(Style.isJavaScript() && Tok.Previous->is(Keywords.kw_await)); + return Style.BreakAfterOpenBracketIf; + if (Tok.Previous->isLoop(Style)) + return Style.BreakAfterOpenBracketLoop; + if (Tok.Previous->is(tok::kw_switch)) + return Style.BreakAfterOpenBracketSwitch; + if (Style.BreakAfterOpenBracketFunction) { + return !Tok.Previous->is(TT_CastRParen) && + !(Style.isJavaScript() && Tok.is(Keywords.kw_await)); + } + return false; }; auto IsFunctionCallParen = [](const FormatToken &Tok) { return Tok.is(tok::l_paren) && Tok.ParameterCount > 0 && Tok.Previous && Tok.Previous->is(tok::identifier); }; - auto IsInTemplateString = [this](const FormatToken &Tok) { + auto IsInTemplateString = [this](const FormatToken &Tok, bool NestBlocks) { if (!Style.isJavaScript()) return false; for (const auto *Prev = &Tok; Prev; Prev = Prev->Previous) { if (Prev->is(TT_TemplateString) && Prev->opensScope()) return true; - if (Prev->opensScope() || - (Prev->is(TT_TemplateString) && Prev->closesScope())) { - break; - } + if (Prev->opensScope() && !NestBlocks) + return false; + if (Prev->is(TT_TemplateString) && Prev->closesScope()) + return false; } return false; }; @@ -884,21 +892,25 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, Tok.isOneOf(tok::ellipsis, Keywords.kw_await))) { return true; } - if (const auto *Previous = Tok.Previous; - !Previous || (Previous->isNoneOf(TT_FunctionDeclarationLParen, - TT_LambdaDefinitionLParen) && - !IsFunctionCallParen(*Previous))) { + const auto *Previous = TokAfterLParen.Previous; + assert(Previous); // IsOpeningBracket(Previous) + if (Previous->Previous && + (Previous->Previous->isIf() || Previous->Previous->isLoop(Style) || + Previous->Previous->is(tok::kw_switch))) { + return false; + } + if (Previous->isNoneOf(TT_FunctionDeclarationLParen, + TT_LambdaDefinitionLParen) && + !IsFunctionCallParen(*Previous)) { return true; } - if (IsOpeningBracket(Tok) || IsInTemplateString(Tok)) + if (IsOpeningBracket(Tok) || IsInTemplateString(Tok, true)) return true; const auto *Next = Tok.Next; return !Next || Next->isMemberAccess() || Next->is(TT_FunctionDeclarationLParen) || IsFunctionCallParen(*Next); }; - if ((Style.AlignAfterOpenBracket == FormatStyle::BAS_AlwaysBreak || - Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent) && - IsOpeningBracket(Previous) && State.Column > getNewLineColumn(State) && + if (IsOpeningBracket(Previous) && State.Column > getNewLineColumn(State) && // Don't do this for simple (no expressions) one-argument function calls // as that feels like needlessly wasting whitespace, e.g.: // @@ -920,7 +932,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, // Note: This doesn't apply to macro expansion lines, which are MACRO( , , ) // with args as children of the '(' and ',' tokens. It does not make sense to // align the commas with the opening paren. - if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign && + if (Style.AlignAfterOpenBracket && !CurrentState.IsCSharpGenericTypeConstraint && Previous.opensScope() && Previous.isNoneOf(TT_ObjCMethodExpr, TT_RequiresClause, TT_TableGenDAGArgOpener, @@ -933,7 +945,7 @@ void ContinuationIndenter::addTokenOnCurrentLine(LineState &State, bool DryRun, Previous.Previous->isNoneOf(tok::identifier, tok::l_paren, BK_BracedInit))) || Previous.is(TT_VerilogMultiLineListLParen)) && - !IsInTemplateString(Current)) { + !IsInTemplateString(Current, false)) { CurrentState.Indent = State.Column + Spaces; CurrentState.IsAligned = true; } @@ -1271,8 +1283,20 @@ unsigned ContinuationIndenter::addTokenOnNewLine(LineState &State, } if (PreviousNonComment && PreviousNonComment->is(tok::l_paren)) { - CurrentState.BreakBeforeClosingParen = - Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent; + if (auto Previous = PreviousNonComment->Previous) { + if (Previous->isIf()) { + CurrentState.BreakBeforeClosingParen = Style.BreakBeforeCloseBracketIf; + } else if (Previous->isLoop(Style)) { + CurrentState.BreakBeforeClosingParen = + Style.BreakBeforeCloseBracketLoop; + } else if (Previous->is(tok::kw_switch)) { + CurrentState.BreakBeforeClosingParen = + Style.BreakBeforeCloseBracketSwitch; + } else { + CurrentState.BreakBeforeClosingParen = + Style.BreakBeforeCloseBracketFunction; + } + } } if (PreviousNonComment && PreviousNonComment->is(TT_TemplateOpener)) @@ -1416,13 +1440,17 @@ unsigned ContinuationIndenter::getNewLineColumn(const LineState &State) { State.Stack.size() > 1) { return State.Stack[State.Stack.size() - 2].LastSpace; } - if (Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent && - (Current.is(tok::r_paren) || - (Current.is(tok::r_brace) && Current.MatchingParen && - Current.MatchingParen->is(BK_BracedInit))) && + if (Style.BreakBeforeCloseBracketBracedList && Current.is(tok::r_brace) && + Current.MatchingParen && Current.MatchingParen->is(BK_BracedInit) && State.Stack.size() > 1) { return State.Stack[State.Stack.size() - 2].LastSpace; } + if ((Style.BreakBeforeCloseBracketFunction || + Style.BreakBeforeCloseBracketIf || Style.BreakBeforeCloseBracketLoop || + Style.BreakBeforeCloseBracketSwitch) && + Current.is(tok::r_paren) && State.Stack.size() > 1) { + return State.Stack[State.Stack.size() - 2].LastSpace; + } if (Style.BreakBeforeTemplateCloser && Current.is(TT_TemplateCloser) && State.Stack.size() > 1) { return State.Stack[State.Stack.size() - 2].LastSpace; @@ -1844,8 +1872,8 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, PrecedenceLevel < prec::Assignment) && (!Previous || Previous->isNot(tok::kw_return) || (!Style.isJava() && PrecedenceLevel > 0)) && - (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign || - PrecedenceLevel > prec::Comma || Current.NestingLevel == 0) && + (Style.AlignAfterOpenBracket || PrecedenceLevel > prec::Comma || + Current.NestingLevel == 0) && (!Style.isTableGen() || (Previous && Previous->isOneOf(TT_TableGenDAGArgListComma, TT_TableGenDAGArgListCommaToBreak)))) { @@ -1885,8 +1913,7 @@ void ContinuationIndenter::moveStatePastFakeLParens(LineState &State, if (PrecedenceLevel > prec::Unknown) NewParenState.LastSpace = std::max(NewParenState.LastSpace, State.Column); if (PrecedenceLevel != prec::Conditional && - Current.isNot(TT_UnaryOperator) && - Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) { + Current.isNot(TT_UnaryOperator) && Style.AlignAfterOpenBracket) { NewParenState.StartOfFunctionCall = State.Column; } diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index edd126c7724b..dd14fcd72922 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -32,6 +32,13 @@ using clang::format::FormatStyle; LLVM_YAML_IS_SEQUENCE_VECTOR(FormatStyle::RawStringFormat) +enum BracketAlignmentStyle : int8_t { + BAS_Align, + BAS_DontAlign, + BAS_AlwaysBreak, + BAS_BlockIndent +}; + namespace llvm { namespace yaml { template <> @@ -204,16 +211,16 @@ template <> struct MappingTraits { } }; -template <> struct ScalarEnumerationTraits { - static void enumeration(IO &IO, FormatStyle::BracketAlignmentStyle &Value) { - IO.enumCase(Value, "Align", FormatStyle::BAS_Align); - IO.enumCase(Value, "DontAlign", FormatStyle::BAS_DontAlign); - IO.enumCase(Value, "AlwaysBreak", FormatStyle::BAS_AlwaysBreak); - IO.enumCase(Value, "BlockIndent", FormatStyle::BAS_BlockIndent); +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &IO, BracketAlignmentStyle &Value) { + IO.enumCase(Value, "Align", BAS_Align); + IO.enumCase(Value, "DontAlign", BAS_DontAlign); // For backward compatibility. - IO.enumCase(Value, "true", FormatStyle::BAS_Align); - IO.enumCase(Value, "false", FormatStyle::BAS_DontAlign); + IO.enumCase(Value, "true", BAS_Align); + IO.enumCase(Value, "false", BAS_DontAlign); + IO.enumCase(Value, "AlwaysBreak", BAS_AlwaysBreak); + IO.enumCase(Value, "BlockIndent", BAS_BlockIndent); } }; @@ -979,6 +986,54 @@ template <> struct MappingTraits { bool SpacesInCStyleCastParentheses = false; bool SpacesInParentheses = false; + if (IO.outputting()) { + IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket); + } else { + // For backward compatibility. + BracketAlignmentStyle LocalBAS = BAS_Align; + if (IsGoogleOrChromium) { + FormatStyle::LanguageKind Language = Style.Language; + if (Language == FormatStyle::LK_None) + Language = ((FormatStyle *)IO.getContext())->Language; + if (Language == FormatStyle::LK_JavaScript) + LocalBAS = BAS_AlwaysBreak; + else if (Language == FormatStyle::LK_Java) + LocalBAS = BAS_DontAlign; + } else if (BasedOnStyle.equals_insensitive("webkit")) { + LocalBAS = BAS_DontAlign; + } + IO.mapOptional("AlignAfterOpenBracket", LocalBAS); + Style.BreakAfterOpenBracketBracedList = false; + Style.BreakAfterOpenBracketFunction = false; + Style.BreakAfterOpenBracketIf = false; + Style.BreakAfterOpenBracketLoop = false; + Style.BreakAfterOpenBracketSwitch = false; + Style.BreakBeforeCloseBracketBracedList = false; + Style.BreakBeforeCloseBracketFunction = false; + Style.BreakBeforeCloseBracketIf = false; + Style.BreakBeforeCloseBracketLoop = false; + Style.BreakBeforeCloseBracketSwitch = false; + + switch (LocalBAS) { + case BAS_DontAlign: + Style.AlignAfterOpenBracket = false; + break; + case BAS_BlockIndent: + Style.BreakBeforeCloseBracketBracedList = true; + Style.BreakBeforeCloseBracketFunction = true; + Style.BreakBeforeCloseBracketIf = true; + [[fallthrough]]; + case BAS_AlwaysBreak: + Style.BreakAfterOpenBracketBracedList = true; + Style.BreakAfterOpenBracketFunction = true; + Style.BreakAfterOpenBracketIf = true; + [[fallthrough]]; + case BAS_Align: + Style.AlignAfterOpenBracket = true; + break; + } + } + // For backward compatibility. if (!IO.outputting()) { IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlines); @@ -1014,7 +1069,6 @@ template <> struct MappingTraits { } IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset); - IO.mapOptional("AlignAfterOpenBracket", Style.AlignAfterOpenBracket); IO.mapOptional("AlignArrayOfStructures", Style.AlignArrayOfStructures); IO.mapOptional("AlignConsecutiveAssignments", Style.AlignConsecutiveAssignments); @@ -1079,10 +1133,29 @@ template <> struct MappingTraits { IO.mapOptional("BreakAfterAttributes", Style.BreakAfterAttributes); IO.mapOptional("BreakAfterJavaFieldAnnotations", Style.BreakAfterJavaFieldAnnotations); + IO.mapOptional("BreakAfterOpenBracketBracedList", + Style.BreakAfterOpenBracketBracedList); + IO.mapOptional("BreakAfterOpenBracketFunction", + Style.BreakAfterOpenBracketFunction); + IO.mapOptional("BreakAfterOpenBracketIf", Style.BreakAfterOpenBracketIf); + IO.mapOptional("BreakAfterOpenBracketLoop", + Style.BreakAfterOpenBracketLoop); + IO.mapOptional("BreakAfterOpenBracketSwitch", + Style.BreakAfterOpenBracketSwitch); IO.mapOptional("BreakAfterReturnType", Style.BreakAfterReturnType); IO.mapOptional("BreakArrays", Style.BreakArrays); IO.mapOptional("BreakBeforeBinaryOperators", Style.BreakBeforeBinaryOperators); + IO.mapOptional("BreakBeforeCloseBracketBracedList", + Style.BreakBeforeCloseBracketBracedList); + IO.mapOptional("BreakBeforeCloseBracketFunction", + Style.BreakBeforeCloseBracketFunction); + IO.mapOptional("BreakBeforeCloseBracketIf", + Style.BreakBeforeCloseBracketIf); + IO.mapOptional("BreakBeforeCloseBracketLoop", + Style.BreakBeforeCloseBracketLoop); + IO.mapOptional("BreakBeforeCloseBracketSwitch", + Style.BreakBeforeCloseBracketSwitch); IO.mapOptional("BreakBeforeConceptDeclarations", Style.BreakBeforeConceptDeclarations); IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); @@ -1561,7 +1634,7 @@ static void expandPresetsSpacesInParens(FormatStyle &Expanded) { FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { FormatStyle LLVMStyle; LLVMStyle.AccessModifierOffset = -2; - LLVMStyle.AlignAfterOpenBracket = FormatStyle::BAS_Align; + LLVMStyle.AlignAfterOpenBracket = true; LLVMStyle.AlignArrayOfStructures = FormatStyle::AIAS_None; LLVMStyle.AlignConsecutiveAssignments = {}; LLVMStyle.AlignConsecutiveAssignments.PadOperators = true; @@ -1621,10 +1694,20 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.BreakAdjacentStringLiterals = true; LLVMStyle.BreakAfterAttributes = FormatStyle::ABS_Leave; LLVMStyle.BreakAfterJavaFieldAnnotations = false; + LLVMStyle.BreakAfterOpenBracketBracedList = false; + LLVMStyle.BreakAfterOpenBracketFunction = false; + LLVMStyle.BreakAfterOpenBracketIf = false; + LLVMStyle.BreakAfterOpenBracketLoop = false; + LLVMStyle.BreakAfterOpenBracketSwitch = false; LLVMStyle.BreakAfterReturnType = FormatStyle::RTBS_None; LLVMStyle.BreakArrays = true; LLVMStyle.BreakBeforeBinaryOperators = FormatStyle::BOS_None; LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; + LLVMStyle.BreakBeforeCloseBracketBracedList = false; + LLVMStyle.BreakBeforeCloseBracketFunction = false; + LLVMStyle.BreakBeforeCloseBracketIf = false; + LLVMStyle.BreakBeforeCloseBracketLoop = false; + LLVMStyle.BreakBeforeCloseBracketSwitch = false; LLVMStyle.BreakBeforeConceptDeclarations = FormatStyle::BBCDS_Always; LLVMStyle.BreakBeforeInlineASMColon = FormatStyle::BBIAS_OnlyMultiline; LLVMStyle.BreakBeforeTemplateCloser = false; @@ -1877,7 +1960,7 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; if (Language == FormatStyle::LK_Java) { - GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + GoogleStyle.AlignAfterOpenBracket = false; GoogleStyle.AlignOperands = FormatStyle::OAS_DontAlign; GoogleStyle.AlignTrailingComments = {}; GoogleStyle.AlignTrailingComments.Kind = FormatStyle::TCAS_Never; @@ -1889,7 +1972,9 @@ FormatStyle getGoogleStyle(FormatStyle::LanguageKind Language) { GoogleStyle.SpaceAfterCStyleCast = true; GoogleStyle.SpacesBeforeTrailingComments = 1; } else if (Language == FormatStyle::LK_JavaScript) { - GoogleStyle.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + GoogleStyle.BreakAfterOpenBracketBracedList = true; + GoogleStyle.BreakAfterOpenBracketFunction = true; + GoogleStyle.BreakAfterOpenBracketIf = true; GoogleStyle.AlignOperands = FormatStyle::OAS_DontAlign; GoogleStyle.AllowShortFunctionsOnASingleLine = FormatStyle::SFS_Empty; // TODO: still under discussion whether to switch to SLS_All. @@ -2026,7 +2111,7 @@ FormatStyle getMozillaStyle() { FormatStyle getWebKitStyle() { FormatStyle Style = getLLVMStyle(); Style.AccessModifierOffset = -4; - Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + Style.AlignAfterOpenBracket = false; Style.AlignOperands = FormatStyle::OAS_DontAlign; Style.AlignTrailingComments = {}; Style.AlignTrailingComments.Kind = FormatStyle::TCAS_Never; diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp index d1c62642efd4..28fdbcbf0e47 100644 --- a/clang/lib/Format/FormatToken.cpp +++ b/clang/lib/Format/FormatToken.cpp @@ -68,7 +68,7 @@ bool FormatToken::isBlockIndentedInitRBrace(const FormatStyle &Style) const { assert(MatchingParen); assert(MatchingParen->is(tok::l_brace)); if (Style.Cpp11BracedListStyle == FormatStyle::BLS_Block || - Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent) { + !Style.BreakBeforeCloseBracketBracedList) { return false; } const auto *LBrace = MatchingParen; @@ -198,7 +198,7 @@ void CommaSeparatedList::precomputeFormattingInfos(const FormatToken *Token) { return; // Column format doesn't really make sense if we don't align after brackets. - if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign) + if (!Style.AlignAfterOpenBracket) return; FormatToken *ItemBegin = Token->Next; diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index 6f3d24aefc1c..d833130a538f 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -666,6 +666,12 @@ public: (endsSequence(tok::identifier, tok::kw_if) && AllowConstexprMacro); } + bool isLoop(const FormatStyle &Style) const { + return isOneOf(tok::kw_for, tok::kw_while) || + (Style.isJavaScript() && isNot(tok::l_paren) && Previous && + Previous->is(tok::kw_for)); + } + bool closesScopeAfterBlock() const { if (getBlockKind() == BK_Block) return true; diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index 021d8c658eb1..8e227da2a79a 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -4427,10 +4427,8 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, if (Left.is(tok::l_paren) && Style.PenaltyBreakOpenParenthesis != 0) return Style.PenaltyBreakOpenParenthesis; - if (Left.is(tok::l_paren) && InFunctionDecl && - Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign) { + if (Left.is(tok::l_paren) && InFunctionDecl && Style.AlignAfterOpenBracket) return 100; - } if (Left.is(tok::l_paren) && Left.Previous && (Left.Previous->isOneOf(tok::kw_for, tok::kw__Generic) || Left.Previous->isIf())) { @@ -4446,7 +4444,7 @@ unsigned TokenAnnotator::splitPenalty(const AnnotatedLine &Line, // If we aren't aligning after opening parens/braces we can always break // here unless the style does not want us to place all arguments on the // next line. - if (Style.AlignAfterOpenBracket == FormatStyle::BAS_DontAlign && + if (!Style.AlignAfterOpenBracket && (Left.ParameterCount <= 1 || Style.AllowAllArgumentsOnNextLine)) { return 0; } @@ -6226,24 +6224,31 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, (Right.isBlockIndentedInitRBrace(Style))); } - // We only break before r_paren if we're in a block indented context. + // We can break before r_paren if we're in a block indented context or + // a control statement with an explicit style option. if (Right.is(tok::r_paren)) { - if (Style.AlignAfterOpenBracket != FormatStyle::BAS_BlockIndent || - !Right.MatchingParen) { + if (!Right.MatchingParen) return false; - } auto Next = Right.Next; if (Next && Next->is(tok::r_paren)) Next = Next->Next; if (Next && Next->is(tok::l_paren)) return false; const FormatToken *Previous = Right.MatchingParen->Previous; - return !(Previous && (Previous->is(tok::kw_for) || Previous->isIf())); + if (!Previous) + return false; + if (Previous->isIf()) + return Style.BreakBeforeCloseBracketIf; + if (Previous->isLoop(Style)) + return Style.BreakBeforeCloseBracketLoop; + if (Previous->is(tok::kw_switch)) + return Style.BreakBeforeCloseBracketSwitch; + return Style.BreakBeforeCloseBracketFunction; } if (Left.isOneOf(tok::r_paren, TT_TrailingAnnotation) && Right.is(TT_TrailingAnnotation) && - Style.AlignAfterOpenBracket == FormatStyle::BAS_BlockIndent) { + Style.BreakBeforeCloseBracketFunction) { return false; } diff --git a/clang/unittests/Format/AlignBracketsTest.cpp b/clang/unittests/Format/AlignBracketsTest.cpp index ea8db51a4d18..10ca5fb7da1c 100644 --- a/clang/unittests/Format/AlignBracketsTest.cpp +++ b/clang/unittests/Format/AlignBracketsTest.cpp @@ -28,7 +28,7 @@ TEST_F(AlignBracketsTest, AlignsAfterOpenBracket) { "SomeLongVariableName->someFunction(foooooooo(aaaaaaaaaaaaaaa,\n" " aaaaaaaaaaaaaaaaaaaaa));"); FormatStyle Style = getLLVMStyle(); - Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + Style.AlignAfterOpenBracket = false; verifyFormat("void aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n" " aaaaaaaaaaa aaaaaaaa, aaaaaaaaa aaaaaaa) {}", Style); @@ -64,7 +64,7 @@ TEST_F(AlignBracketsTest, AlignsAfterOpenBracket) { Style); Style.ColumnLimit = 80; - Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + Style.BreakAfterOpenBracketFunction = true; Style.BinPackArguments = false; Style.BinPackParameters = FormatStyle::BPPS_OnePerLine; verifyFormat("void aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n" @@ -115,7 +115,9 @@ TEST_F(AlignBracketsTest, AlignsAfterOpenBracket) { " XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXZZZZZZZZZZZZZZZZZZZZZZZZZ()));", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_BlockIndent; + Style.BreakAfterOpenBracketFunction = true; + Style.BreakBeforeCloseBracketFunction = true; + Style.BreakBeforeCloseBracketBracedList = true; Style.BinPackArguments = false; Style.BinPackParameters = FormatStyle::BPPS_OnePerLine; verifyFormat("void aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa(\n" @@ -254,7 +256,8 @@ TEST_F(AlignBracketsTest, AlignAfterOpenBracketBlockIndent) { "argument5));", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_BlockIndent; + Style.BreakAfterOpenBracketFunction = true; + Style.BreakBeforeCloseBracketFunction = true; verifyFormat(Short, Style); verifyFormat( @@ -378,7 +381,8 @@ TEST_F(AlignBracketsTest, AlignAfterOpenBracketBlockIndentIfStatement) { "}", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_BlockIndent; + Style.BreakAfterOpenBracketFunction = true; + Style.BreakBeforeCloseBracketFunction = true; verifyFormat("if (foo()) {\n" " return;\n" @@ -440,7 +444,8 @@ TEST_F(AlignBracketsTest, AlignAfterOpenBracketBlockIndentForStatement) { "}", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_BlockIndent; + Style.BreakAfterOpenBracketFunction = true; + Style.BreakBeforeCloseBracketFunction = true; verifyFormat("for (int i = 0; i < 5; ++i) {\n" " doSomething();\n" @@ -457,7 +462,8 @@ TEST_F(AlignBracketsTest, AlignAfterOpenBracketBlockIndentForStatement) { TEST_F(AlignBracketsTest, AlignAfterOpenBracketBlockIndentInitializers) { auto Style = getLLVMStyleWithColumns(60); - Style.AlignAfterOpenBracket = FormatStyle::BAS_BlockIndent; + Style.BreakAfterOpenBracketBracedList = true; + Style.BreakBeforeCloseBracketBracedList = true; // Aggregate initialization. verifyFormat("int LooooooooooooooooooooooooongVariable[2] = {\n" " 10000000, 20000000\n" @@ -611,13 +617,13 @@ TEST_F(AlignBracketsTest, AllowAllArgumentsOnNextLineDontAlign) { StringRef Input = "functionCall(paramA, paramB, paramC);\n" "void functionDecl(int A, int B, int C);"; Style.AllowAllArgumentsOnNextLine = false; - Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + Style.AlignAfterOpenBracket = false; verifyFormat(StringRef("functionCall(paramA, paramB,\n" " paramC);\n" "void functionDecl(int A, int B,\n" " int C);"), Input, Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_Align; + Style.AlignAfterOpenBracket = true; verifyFormat(StringRef("functionCall(paramA, paramB,\n" " paramC);\n" "void functionDecl(int A, int B,\n" @@ -625,13 +631,14 @@ TEST_F(AlignBracketsTest, AllowAllArgumentsOnNextLineDontAlign) { Input, Style); // However, BAS_AlwaysBreak and BAS_BlockIndent should take precedence over // AllowAllArgumentsOnNextLine. - Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + Style.BreakAfterOpenBracketFunction = true; verifyFormat(StringRef("functionCall(\n" " paramA, paramB, paramC);\n" "void functionDecl(\n" " int A, int B, int C);"), Input, Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_BlockIndent; + Style.BreakAfterOpenBracketFunction = true; + Style.BreakBeforeCloseBracketFunction = true; verifyFormat("functionCall(\n" " paramA, paramB, paramC\n" ");\n" @@ -639,11 +646,12 @@ TEST_F(AlignBracketsTest, AllowAllArgumentsOnNextLineDontAlign) { " int A, int B, int C\n" ");", Input, Style); + Style.BreakBeforeCloseBracketFunction = false; // When AllowAllArgumentsOnNextLine is set, we prefer breaking before the // first argument. Style.AllowAllArgumentsOnNextLine = true; - Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + Style.BreakAfterOpenBracketFunction = true; verifyFormat(StringRef("functionCall(\n" " paramA, paramB, paramC);\n" "void functionDecl(\n" @@ -651,13 +659,14 @@ TEST_F(AlignBracketsTest, AllowAllArgumentsOnNextLineDontAlign) { Input, Style); // It wouldn't fit on one line with aligned parameters so this setting // doesn't change anything for BAS_Align. - Style.AlignAfterOpenBracket = FormatStyle::BAS_Align; + Style.AlignAfterOpenBracket = true; + Style.BreakAfterOpenBracketFunction = false; verifyFormat(StringRef("functionCall(paramA, paramB,\n" " paramC);\n" "void functionDecl(int A, int B,\n" " int C);"), Input, Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + Style.BreakAfterOpenBracketFunction = true; verifyFormat(StringRef("functionCall(\n" " paramA, paramB, paramC);\n" "void functionDecl(\n" @@ -678,13 +687,14 @@ TEST_F(AlignBracketsTest, FormatsDeclarationBreakAlways) { // Ensure AlignAfterOpenBracket interacts correctly with BinPackParameters set // to BPPS_AlwaysOnePerLine. - BreakAlways.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + BreakAlways.BreakAfterOpenBracketFunction = true; verifyFormat( "void someLongFunctionName(\n" " int aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa,\n" " int b);", BreakAlways); - BreakAlways.AlignAfterOpenBracket = FormatStyle::BAS_BlockIndent; + BreakAlways.BreakAfterOpenBracketFunction = true; + BreakAlways.BreakBeforeCloseBracketFunction = true; verifyFormat( "void someLongFunctionName(\n" " int aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa,\n" @@ -734,7 +744,7 @@ TEST_F(AlignBracketsTest, FormatsDefinitionBreakAlways) { // Ensure AlignAfterOpenBracket interacts correctly with BinPackParameters set // to BPPS_AlwaysOnePerLine. - BreakAlways.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + BreakAlways.BreakAfterOpenBracketFunction = true; verifyFormat( "void someLongFunctionName(\n" " int aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa,\n" @@ -743,7 +753,8 @@ TEST_F(AlignBracketsTest, FormatsDefinitionBreakAlways) { " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa, b);\n" "}", BreakAlways); - BreakAlways.AlignAfterOpenBracket = FormatStyle::BAS_BlockIndent; + BreakAlways.BreakAfterOpenBracketFunction = true; + BreakAlways.BreakBeforeCloseBracketFunction = true; verifyFormat( "void someLongFunctionName(\n" " int aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa,\n" @@ -761,17 +772,17 @@ TEST_F(AlignBracketsTest, ParenthesesAndOperandAlignment) { verifyFormat("int a = f(aaaaaaaaaaaaaaaaaaaaaa &&\n" " bbbbbbbbbbbbbbbbbbbbbb);", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_Align; + Style.AlignAfterOpenBracket = true; Style.AlignOperands = FormatStyle::OAS_DontAlign; verifyFormat("int a = f(aaaaaaaaaaaaaaaaaaaaaa &&\n" " bbbbbbbbbbbbbbbbbbbbbb);", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + Style.AlignAfterOpenBracket = false; Style.AlignOperands = FormatStyle::OAS_Align; verifyFormat("int a = f(aaaaaaaaaaaaaaaaaaaaaa &&\n" " bbbbbbbbbbbbbbbbbbbbbb);", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + Style.AlignAfterOpenBracket = false; Style.AlignOperands = FormatStyle::OAS_DontAlign; verifyFormat("int a = f(aaaaaaaaaaaaaaaaaaaaaa &&\n" " bbbbbbbbbbbbbbbbbbbbbb);", @@ -781,7 +792,10 @@ TEST_F(AlignBracketsTest, ParenthesesAndOperandAlignment) { TEST_F(AlignBracketsTest, BlockIndentAndNamespace) { auto Style = getLLVMStyleWithColumns(120); Style.AllowShortNamespacesOnASingleLine = true; - Style.AlignAfterOpenBracket = FormatStyle::BAS_BlockIndent; + Style.BreakAfterOpenBracketFunction = true; + Style.BreakAfterOpenBracketBracedList = true; + Style.BreakBeforeCloseBracketFunction = true; + Style.BreakBeforeCloseBracketBracedList = true; verifyNoCrash( "namespace {\n" diff --git a/clang/unittests/Format/ConfigParseTest.cpp b/clang/unittests/Format/ConfigParseTest.cpp index 6488e38badee..43b21176962e 100644 --- a/clang/unittests/Format/ConfigParseTest.cpp +++ b/clang/unittests/Format/ConfigParseTest.cpp @@ -172,6 +172,16 @@ TEST(ConfigParseTest, ParsesConfigurationBools) { CHECK_PARSE_BOOL(BinPackLongBracedList); CHECK_PARSE_BOOL(BreakAdjacentStringLiterals); CHECK_PARSE_BOOL(BreakAfterJavaFieldAnnotations); + CHECK_PARSE_BOOL(BreakAfterOpenBracketBracedList); + CHECK_PARSE_BOOL(BreakAfterOpenBracketFunction); + CHECK_PARSE_BOOL(BreakAfterOpenBracketIf); + CHECK_PARSE_BOOL(BreakAfterOpenBracketLoop); + CHECK_PARSE_BOOL(BreakAfterOpenBracketSwitch); + CHECK_PARSE_BOOL(BreakBeforeCloseBracketBracedList); + CHECK_PARSE_BOOL(BreakBeforeCloseBracketFunction); + CHECK_PARSE_BOOL(BreakBeforeCloseBracketIf); + CHECK_PARSE_BOOL(BreakBeforeCloseBracketLoop); + CHECK_PARSE_BOOL(BreakBeforeCloseBracketSwitch); CHECK_PARSE_BOOL(BreakBeforeTemplateCloser); CHECK_PARSE_BOOL(BreakBeforeTernaryOperators); CHECK_PARSE_BOOL(BreakStringLiterals); @@ -533,20 +543,23 @@ TEST(ConfigParseTest, ParsesConfiguration) { CHECK_PARSE("EnumTrailingComma: Remove", EnumTrailingComma, FormatStyle::ETC_Remove); - Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; - CHECK_PARSE("AlignAfterOpenBracket: Align", AlignAfterOpenBracket, - FormatStyle::BAS_Align); - CHECK_PARSE("AlignAfterOpenBracket: DontAlign", AlignAfterOpenBracket, - FormatStyle::BAS_DontAlign); + Style.AlignAfterOpenBracket = false; + CHECK_PARSE("AlignAfterOpenBracket: Align", AlignAfterOpenBracket, true); + CHECK_PARSE("AlignAfterOpenBracket: DontAlign", AlignAfterOpenBracket, false); + // For backward compatibility: CHECK_PARSE("AlignAfterOpenBracket: AlwaysBreak", AlignAfterOpenBracket, - FormatStyle::BAS_AlwaysBreak); + true); + CHECK_PARSE("AlignAfterOpenBracket: AlwaysBreak\n" + "BreakAfterOpenBracketIf: false", + BreakAfterOpenBracketIf, false); + CHECK_PARSE("BreakAfterOpenBracketLoop: true\n" + "AlignAfterOpenBracket: AlwaysBreak", + BreakAfterOpenBracketLoop, true); + CHECK_PARSE("AlignAfterOpenBracket: false", AlignAfterOpenBracket, false); CHECK_PARSE("AlignAfterOpenBracket: BlockIndent", AlignAfterOpenBracket, - FormatStyle::BAS_BlockIndent); - // For backward compatibility: - CHECK_PARSE("AlignAfterOpenBracket: false", AlignAfterOpenBracket, - FormatStyle::BAS_DontAlign); - CHECK_PARSE("AlignAfterOpenBracket: true", AlignAfterOpenBracket, - FormatStyle::BAS_Align); + true); + Style.AlignAfterOpenBracket = false; + CHECK_PARSE("AlignAfterOpenBracket: true", AlignAfterOpenBracket, true); Style.AlignEscapedNewlines = FormatStyle::ENAS_Left; CHECK_PARSE("AlignEscapedNewlines: DontAlign", AlignEscapedNewlines, diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index d45babe1b82a..ca9e7925e5e9 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -5126,7 +5126,8 @@ TEST_F(FormatTest, DesignatedInitializers) { TEST_F(FormatTest, BracedInitializerIndentWidth) { auto Style = getLLVMStyleWithColumns(60); Style.BinPackArguments = true; - Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + Style.BreakAfterOpenBracketFunction = true; + Style.BreakAfterOpenBracketBracedList = true; Style.BracedInitializerIndentWidth = 6; // Non-initializing braces are unaffected by BracedInitializerIndentWidth. @@ -5302,7 +5303,8 @@ TEST_F(FormatTest, BracedInitializerIndentWidth) { Style); // Aligning after open braces unaffected by BracedInitializerIndentWidth. - Style.AlignAfterOpenBracket = FormatStyle::BAS_Align; + Style.AlignAfterOpenBracket = true; + Style.BreakAfterOpenBracketBracedList = false; verifyFormat("SomeStruct s{\"xxxxxxxxxxxxx\", \"yyyyyyyyyyyyy\",\n" " \"zzzzzzzzzzzzz\"};", Style); @@ -7459,7 +7461,7 @@ TEST_F(FormatTest, ExpressionIndentationBreakingBeforeOperators) { Style.IndentWidth = 4; Style.TabWidth = 4; Style.UseTab = FormatStyle::UT_Always; - Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + Style.AlignAfterOpenBracket = false; Style.AlignOperands = FormatStyle::OAS_DontAlign; verifyFormat("return someVeryVeryLongConditionThatBarelyFitsOnALine\n" "\t&& (someOtherLongishConditionPart1\n" @@ -7470,7 +7472,7 @@ TEST_F(FormatTest, ExpressionIndentationBreakingBeforeOperators) { Style); Style = getLLVMStyleWithColumns(20); - Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + Style.BreakAfterOpenBracketFunction = true; Style.BinPackParameters = FormatStyle::BPPS_OnePerLine; Style.BreakBeforeBinaryOperators = FormatStyle::BOS_NonAssignment; Style.ContinuationIndentWidth = 2; @@ -7632,7 +7634,7 @@ TEST_F(FormatTest, NoOperandAlignment) { " * cccccccccccccccccccccccccccccccccccc;", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + Style.AlignAfterOpenBracket = false; verifyFormat("return (a > b\n" " // comment1\n" " // comment2\n" @@ -11248,7 +11250,7 @@ TEST_F(FormatTest, BreakBeforeTemplateCloser) { TEST_F(FormatTest, WrapsTemplateParameters) { FormatStyle Style = getLLVMStyle(); - Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + Style.AlignAfterOpenBracket = false; Style.BreakBeforeBinaryOperators = FormatStyle::BOS_None; verifyFormat( "template struct q {};\n" @@ -11256,7 +11258,7 @@ TEST_F(FormatTest, WrapsTemplateParameters) { " aaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaa>\n" " y;", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_DontAlign; + Style.AlignAfterOpenBracket = false; Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; verifyFormat( "template struct r {};\n" @@ -11264,7 +11266,7 @@ TEST_F(FormatTest, WrapsTemplateParameters) { " aaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaa>\n" " y;", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + Style.BreakAfterOpenBracketFunction = true; Style.BreakBeforeBinaryOperators = FormatStyle::BOS_None; verifyFormat("template struct s {};\n" "extern s<\n" @@ -11274,7 +11276,7 @@ TEST_F(FormatTest, WrapsTemplateParameters) { "aaaaaaaaaaaaaaaaaaaaaa>\n" " y;", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + Style.BreakAfterOpenBracketFunction = true; Style.BreakBeforeBinaryOperators = FormatStyle::BOS_All; verifyFormat("template struct t {};\n" "extern t<\n" @@ -14302,7 +14304,7 @@ TEST_F(FormatTest, LayoutCxx11BraceInitializers) { "};", NoBinPacking); - NoBinPacking.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + NoBinPacking.BreakAfterOpenBracketBracedList = true; verifyFormat("static uint8 CddDp83848Reg[] = {\n" " CDDDP83848_BMCR_REGISTER,\n" " CDDDP83848_BMSR_REGISTER,\n" @@ -15972,13 +15974,14 @@ TEST_F(FormatTest, BreaksStringLiteralOperands) { // In a function call with two operands, with AlignAfterOpenBracket enabled, // the first must be broken with a line break before it. FormatStyle Style = getLLVMStyleWithColumns(25); - Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + Style.BreakAfterOpenBracketFunction = true; verifyFormat("someFunction(\n" " \"long long long \"\n" " \"long\",\n" " a);", "someFunction(\"long long long long\", a);", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_BlockIndent; + Style.BreakAfterOpenBracketFunction = true; + Style.BreakBeforeCloseBracketFunction = true; verifyFormat("someFunction(\n" " \"long long long \"\n" " \"long\",\n" @@ -17773,7 +17776,7 @@ TEST_F(FormatTest, ConfigurableSpacesInParens) { Spaces.ColumnLimit = 80; Spaces.IndentWidth = 4; - Spaces.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + Spaces.BreakAfterOpenBracketFunction = true; verifyFormat("void foo( ) {\n" " size_t foo = (*(function))(\n" " Foooo, Barrrrr, Foooo, Barrrr, FoooooooooLooooong, " @@ -17798,7 +17801,8 @@ TEST_F(FormatTest, ConfigurableSpacesInParens) { "}", Spaces); - Spaces.AlignAfterOpenBracket = FormatStyle::BAS_BlockIndent; + Spaces.BreakAfterOpenBracketFunction = true; + Spaces.BreakBeforeCloseBracketFunction = true; verifyFormat("void foo( ) {\n" " size_t foo = (*(function))(\n" " Foooo, Barrrrr, Foooo, Barrrr, FoooooooooLooooong, " @@ -22827,7 +22831,7 @@ TEST_F(FormatTest, ConstructorInitializerIndentWidth) { ": aaaaaaaaaaaaa(aaaaaaaaaaaaaa), aaaaaaaaaaaaa(aaaaaaaaaaaaaa),\n" " aaaaaaaaaaaaa(aaaaaaaaaaaaaa) {}", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + Style.BreakAfterOpenBracketFunction = true; verifyFormat( "SomeLongTemplateVariableName<\n" " aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa, aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa>", @@ -24082,7 +24086,7 @@ TEST_F(FormatTest, FormatsLambdas) { " return aFunkyFunctionCall(qux);\n" " }} {}", Style); - Style.AlignAfterOpenBracket = FormatStyle::BAS_AlwaysBreak; + Style.BreakAfterOpenBracketFunction = true; // FIXME: The following test should pass, but fails at the time of writing. #if 0 // As long as all the non-lambda arguments fit on a single line, AlwaysBreak diff --git a/clang/unittests/Format/FormatTestJS.cpp b/clang/unittests/Format/FormatTestJS.cpp index 91577b9a4916..4847151c14b3 100644 --- a/clang/unittests/Format/FormatTestJS.cpp +++ b/clang/unittests/Format/FormatTestJS.cpp @@ -2883,7 +2883,7 @@ TEST_F(FormatTestJS, DontBreakFieldsAsGoToLabels) { TEST_F(FormatTestJS, BreakAfterOpenBracket) { auto Style = getGoogleStyle(FormatStyle::LK_JavaScript); - EXPECT_EQ(Style.AlignAfterOpenBracket, FormatStyle::BAS_AlwaysBreak); + EXPECT_EQ(Style.BreakAfterOpenBracketFunction, true); verifyFormat("ctrl.onCopy(/** @type {!WizEvent}*/ (\n" " {event, targetElement: {el: () => selectedElement}}));", Style); -- cgit v1.2.3 From d2c5e49015fdc20cb4d6cd131edb3ebfd3dbf44b Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 30 Oct 2025 12:29:09 -0700 Subject: [NSan] Make Tests work with Internal Shell There was one test that was using a subshell. This is not supported by lit's internal shell. Rewrite the test to use the readfile substitution. Reviewers: alexander-shaposhnikov, fmayer Reviewed By: alexander-shaposhnikov, fmayer Pull Request: https://github.com/llvm/llvm-project/pull/165142 --- compiler-rt/test/nsan/Posix/allocator_mapping.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler-rt/test/nsan/Posix/allocator_mapping.cpp b/compiler-rt/test/nsan/Posix/allocator_mapping.cpp index 3a3e655e259d..a92962e16d9d 100644 --- a/compiler-rt/test/nsan/Posix/allocator_mapping.cpp +++ b/compiler-rt/test/nsan/Posix/allocator_mapping.cpp @@ -2,7 +2,8 @@ /// Test that a module constructor can not map memory over the NSan heap /// (without MAP_FIXED, of course). // RUN: %clangxx_nsan -O0 %s -o %t_1 -// RUN: %clangxx_nsan -O0 -DHEAP_ADDRESS=$(%run %t_1) %s -o %t_2 && %run %t_2 +// RUN: %run %t_1 > %t.heap_address +// RUN: %clangxx_nsan -O0 -DHEAP_ADDRESS=%{readfile:%t.heap_address} %s -o %t_2 && %run %t_2 #include #include -- cgit v1.2.3 From 291b8ce40d0de8f876ab3791740b98dff5961a93 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 30 Oct 2025 12:30:47 -0700 Subject: [XRay] Make Test Work with Internal Shell There was one test that set an environment variable without using env and also used a subshell. These are features the internal shell does not support. Rewrite the test to use readfile/env. Reviewers: fmayer, MaskRay Reviewed By: fmayer Pull Request: https://github.com/llvm/llvm-project/pull/165143 --- compiler-rt/test/xray/TestCases/Posix/fdr-single-thread.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/compiler-rt/test/xray/TestCases/Posix/fdr-single-thread.cpp b/compiler-rt/test/xray/TestCases/Posix/fdr-single-thread.cpp index b8803aedc885..36a4e65988f9 100644 --- a/compiler-rt/test/xray/TestCases/Posix/fdr-single-thread.cpp +++ b/compiler-rt/test/xray/TestCases/Posix/fdr-single-thread.cpp @@ -1,11 +1,12 @@ // RUN: %clangxx_xray -g -std=c++11 %s -o %t // RUN: rm -f fdr-logging-1thr-* -// RUN: XRAY_OPTIONS=XRAY_OPTIONS="verbosity=1 patch_premain=true \ +// RUN: env XRAY_OPTIONS=XRAY_OPTIONS="verbosity=1 patch_premain=true \ // RUN: xray_fdr_log=true \ // RUN: xray_fdr_log_func_duration_threshold_us=0 \ // RUN: xray_logfile_base=fdr-logging-1thr-" %run %t 2>&1 +// RUN: ls fdr-logging-1thr-* | head -n1 | tr -d '\n' > %t.xray_input // RUN: %llvm_xray convert --output-format=yaml --symbolize --instr_map=%t \ -// RUN: "`ls fdr-logging-1thr-* | head -n1`" | FileCheck %s +// RUN: "%{readfile:%t.xray_input}" | FileCheck %s // RUN: rm fdr-logging-1thr-* // UNSUPPORTED: target=arm{{.*}} -- cgit v1.2.3 From 39189c3e99b634a0a8b58cf89312d6d47d0a51ba Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Thu, 30 Oct 2025 12:36:51 -0700 Subject: [libc] Fix strftime_test (#165770) A typo in #165711 caused sanitizer failures (the small buffer was used for the larger test). Renamed the variables to avoid the mistake in future. --- libc/test/src/time/strftime_test.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/libc/test/src/time/strftime_test.cpp b/libc/test/src/time/strftime_test.cpp index 38176f77804d..522215279190 100644 --- a/libc/test/src/time/strftime_test.cpp +++ b/libc/test/src/time/strftime_test.cpp @@ -2329,20 +2329,21 @@ TEST(LlvmLibcStrftimeTest, TimeFormatFullDateTime) { TEST(LlvmLibcStrftimeTest, BufferTooSmall) { struct tm time; - char buffer[1]; + char tiny_buffer[1]; time.tm_year = get_adjusted_year(2025); time.tm_mon = 10; time.tm_mday = 24; size_t written = - LIBC_NAMESPACE::strftime(buffer, sizeof(buffer), "%F", &time); + LIBC_NAMESPACE::strftime(tiny_buffer, sizeof(tiny_buffer), "%F", &time); EXPECT_EQ(written, size_t{0}); - char buffer2[10]; + char small_buffer[10]; // The string "2025-11-24" is 10 chars, // so strftime needs 10 + 1 bytes to write the string and the null terminator. - written = LIBC_NAMESPACE::strftime(buffer, sizeof(buffer2), "%F", &time); + written = + LIBC_NAMESPACE::strftime(small_buffer, sizeof(small_buffer), "%F", &time); EXPECT_EQ(written, size_t{0}); } -- cgit v1.2.3 From fdf5ece379188177be4329daa3fddca31bbedb8a Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 30 Oct 2025 12:42:27 -0700 Subject: [MSan] Make Test work with Internal Shell This test used a subshell which is not supported by lit's internal shell. Rewrite it to use the readfile substitution. Reviewers: thurstond, fmayer Reviewed By: thurstond, fmayer Pull Request: https://github.com/llvm/llvm-project/pull/165144 --- compiler-rt/test/msan/allocator_mapping.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler-rt/test/msan/allocator_mapping.cpp b/compiler-rt/test/msan/allocator_mapping.cpp index e7a12da48915..6eaba7e16a5b 100644 --- a/compiler-rt/test/msan/allocator_mapping.cpp +++ b/compiler-rt/test/msan/allocator_mapping.cpp @@ -3,7 +3,8 @@ // mapping the heap early, in __msan_init. // // RUN: %clangxx_msan -O0 %s -o %t_1 -// RUN: %clangxx_msan -O0 -DHEAP_ADDRESS=$(%run %t_1) %s -o %t_2 && %run %t_2 +// RUN: %run %t_1 > %t.heap_address +// RUN: %clangxx_msan -O0 -DHEAP_ADDRESS=%{readfile:%t.heap_address} %s -o %t_2 && %run %t_2 // // This test only makes sense for the 64-bit allocator. The 32-bit allocator // does not have a fixed mapping. Exclude platforms that use the 32-bit -- cgit v1.2.3 From 841d85f6ee61b536405561bc263f360df2f385eb Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 30 Oct 2025 12:46:01 -0700 Subject: [Profile] Rewrite Test to work with Internal Shell There was one test that used subshells to read a file. Replace those subshells with the readfile substitution. Reviewers: fmayer, mingmingl-llvm Reviewed By: mingmingl-llvm, fmayer Pull Request: https://github.com/llvm/llvm-project/pull/165145 --- compiler-rt/test/profile/instrprof-hostname.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler-rt/test/profile/instrprof-hostname.c b/compiler-rt/test/profile/instrprof-hostname.c index b77cf8df158b..c0b3426eeaa8 100644 --- a/compiler-rt/test/profile/instrprof-hostname.c +++ b/compiler-rt/test/profile/instrprof-hostname.c @@ -1,7 +1,7 @@ // RUN: %clang_profgen -o %t -O3 %s // RUN: env LLVM_PROFILE_FILE=%h.%t-%h.profraw_%h %run %t -// RUN: %run uname -n > %t.n -// RUN: llvm-profdata merge -o %t.profdata `cat %t.n`.%t-`cat %t.n`.profraw_`cat %t.n` +// RUN: %run uname -n | tr -d '\n' > %t.n +// RUN: llvm-profdata merge -o %t.profdata %{readfile:%t.n}.%t-%{readfile:%t.n}.profraw_%{readfile:%t.n} // RUN: %clang_profuse=%t.profdata -o - -S -emit-llvm %s | FileCheck %s // REQUIRES: shell -- cgit v1.2.3 From bb8261b7f3863350848751de9d5f3f52cf692962 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 30 Oct 2025 12:47:14 -0700 Subject: [MemProf] Make Test work with Internal Shell There is one test that uses a subshell to generate a long path name. Replace it with a python invocation and a readfile substitution. This helps move compiler-rt over to lit's internal shell. Reviewers: fmayer, snehasish, teresajohnson Reviewed By: fmayer, teresajohnson Pull Request: https://github.com/llvm/llvm-project/pull/165146 --- compiler-rt/test/memprof/TestCases/log_path_test.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compiler-rt/test/memprof/TestCases/log_path_test.cpp b/compiler-rt/test/memprof/TestCases/log_path_test.cpp index 664ab7939319..683ca67122c3 100644 --- a/compiler-rt/test/memprof/TestCases/log_path_test.cpp +++ b/compiler-rt/test/memprof/TestCases/log_path_test.cpp @@ -18,7 +18,8 @@ // RUN: %env_memprof_opts=print_text=true:log_path=/dev/null/INVALID not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-BAD-DIR --dump-input=always // Too long log_path. -// RUN: %env_memprof_opts=print_text=true:log_path=`for((i=0;i<10000;i++)); do echo -n $i; done` \ +// RUN: %python -c "for i in range(0, 10000): print(i, end='')" > %t.long_log_path +// RUN: %env_memprof_opts=print_text=true:log_path=%{readfile:%t.long_log_path} \ // RUN: not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-LONG --dump-input=always // Specifying the log name via the __memprof_profile_filename variable. -- cgit v1.2.3 From fe52f1d77d082aa967d1557aff021c1eeb7c528a Mon Sep 17 00:00:00 2001 From: wdx727 Date: Fri, 31 Oct 2025 04:11:08 +0800 Subject: Adding Matching and Inference Functionality to Propeller-PR3: Read basic block hashes from propeller profile. (#164223) Adding Matching and Inference Functionality to Propeller. For detailed information, please refer to the following RFC: https://discourse.llvm.org/t/rfc-adding-matching-and-inference-functionality-to-propeller/86238. This is the third PR, which is used to read basic block hashes from the propeller profile. The associated PRs are: PR1: https://github.com/llvm/llvm-project/pull/160706 PR2: https://github.com/llvm/llvm-project/pull/162963 co-authors: lifengxiang1025 [lifengxiang@kuaishou.com](mailto:lifengxiang@kuaishou.com); zcfh [wuminghui03@kuaishou.com](mailto:wuminghui03@kuaishou.com) Co-authored-by: lifengxiang1025 Co-authored-by: zcfh --- .../llvm/CodeGen/BasicBlockSectionsProfileReader.h | 4 ++ .../CodeGen/BasicBlockSectionsProfileReader.cpp | 19 ++++++++ .../CodeGen/X86/basic-block-sections-bb-hash.ll | 51 ++++++++++++++++++++++ .../X86/basic-block-sections-clusters-error.ll | 14 ++++++ 4 files changed, 88 insertions(+) create mode 100644 llvm/test/CodeGen/X86/basic-block-sections-bb-hash.ll diff --git a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h index 48650a6df22f..823753021ff7 100644 --- a/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h +++ b/llvm/include/llvm/CodeGen/BasicBlockSectionsProfileReader.h @@ -54,6 +54,10 @@ struct FunctionPathAndClusterInfo { DenseMap NodeCounts; // Edge counts for each edge, stored as a nested map. DenseMap> EdgeCounts; + // Hash for each basic block. The Hashes are stored for every original block + // (not cloned blocks), hence the map key being unsigned instead of + // UniqueBBID. + DenseMap BBHashes; }; class BasicBlockSectionsProfileReader { diff --git a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp index fbcd614b85d1..485b44ae4c4a 100644 --- a/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp +++ b/llvm/lib/CodeGen/BasicBlockSectionsProfileReader.cpp @@ -287,6 +287,25 @@ Error BasicBlockSectionsProfileReader::ReadV1Profile() { } continue; } + case 'h': { // Basic block hash secifier. + // Skip the profile when the profile iterator (FI) refers to the + // past-the-end element. + if (FI == ProgramPathAndClusterInfo.end()) + continue; + for (auto BBIDHashStr : Values) { + auto [BBIDStr, HashStr] = BBIDHashStr.split(':'); + unsigned long long BBID = 0, Hash = 0; + if (getAsUnsignedInteger(BBIDStr, 10, BBID)) + return createProfileParseError(Twine("unsigned integer expected: '") + + BBIDStr + "'"); + if (getAsUnsignedInteger(HashStr, 16, Hash)) + return createProfileParseError( + Twine("unsigned integer expected in hex format: '") + HashStr + + "'"); + FI->second.BBHashes[BBID] = Hash; + } + continue; + } default: return createProfileParseError(Twine("invalid specifier: '") + Twine(Specifier) + "'"); diff --git a/llvm/test/CodeGen/X86/basic-block-sections-bb-hash.ll b/llvm/test/CodeGen/X86/basic-block-sections-bb-hash.ll new file mode 100644 index 000000000000..f46d6ed262b2 --- /dev/null +++ b/llvm/test/CodeGen/X86/basic-block-sections-bb-hash.ll @@ -0,0 +1,51 @@ +; BB section test with basic block hashes. +; +; RUN: llc %s -O0 -mtriple=x86_64-pc-linux -function-sections -filetype=obj -basic-block-address-map -emit-bb-hash -o %t.o +; RUN: obj2yaml %t.o -o %t.yaml +; +;; Profile for version 1: +; RUN: echo 'v1' > %t +; RUN: echo 'f foo' >> %t +; RUN: echo 'g 0:10,1:9,2:1 1:8,3:8 2:2,3:2 3:11' >> %t +; RUN: echo 'c 0 2 3' >> %t + +; These commands read BB hashes from SHT_LLVM_BB_ADDR_MAP +; and put them into the basic blocks sections profile. +; RUN: grep -E '^\s+(- ID:|Hash:)' %t.yaml | \ +; RUN: grep -B1 'Hash:' | \ +; RUN: sed 's/^\s*//; s/^- ID: *//; s/Hash: *0x//' | \ +; RUN: paste -d: - - | \ +; RUN: tr '\n' ' ' | \ +; RUN: sed 's/ $/\n/; s/^/h /' >> %t +; +; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t | FileCheck %s +; +define void @foo(i1 zeroext) nounwind { + %2 = alloca i8, align 1 + %3 = zext i1 %0 to i8 + store i8 %3, ptr %2, align 1 + %4 = load i8, ptr %2, align 1 + %5 = trunc i8 %4 to i1 + br i1 %5, label %6, label %8 + +6: ; preds = %1 + %7 = call i32 @bar() + br label %10 + +8: ; preds = %1 + %9 = call i32 @baz() + br label %10 + +10: ; preds = %8, %6 + ret void +} + +declare i32 @bar() #1 + +declare i32 @baz() #1 + +; CHECK: .section .text.foo,"ax",@progbits +; CHECK: callq baz +; CHECK: retq +; CHECK: .section .text.split.foo,"ax",@progbits +; CHECK: callq bar diff --git a/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll b/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll index 751ab76722c0..eb0a14b2820b 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-clusters-error.ll @@ -69,6 +69,20 @@ ; RUN: echo 'g 0:4,1:2:3' >> %t15 ; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t15 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR15 ; CHECK-ERROR15: LLVM ERROR: invalid profile {{.*}} at line 4: unsigned integer expected: '2:3' +; RUN: echo 'v1' > %t16 +; RUN: echo 'f dummy1' >> %t16 +; RUN: echo 'c 0 1' >> %t16 +; RUN: echo 'g 0:4,1:2' >> %t16 +; RUN: echo 'h a:1111111111111111 1:ffffffffffffffff' >> %t16 +; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t16 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR16 +; CHECK-ERROR16: LLVM ERROR: invalid profile {{.*}} at line 5: unsigned integer expected: 'a' +; RUN: echo 'v1' > %t17 +; RUN: echo 'f dummy1' >> %t17 +; RUN: echo 'c 0 1' >> %t17 +; RUN: echo 'g 0:4,1:2' >> %t17 +; RUN: echo 'h 0:111111111111111g 1:ffffffffffffffff' >> %t17 +; RUN: not --crash llc < %s -O0 -mtriple=x86_64 -function-sections -basic-block-sections=%t17 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR17 +; CHECK-ERROR17: LLVM ERROR: invalid profile {{.*}} at line 5: unsigned integer expected in hex format: '111111111111111g' define i32 @dummy1(i32 %x, i32 %y, i32 %z) { -- cgit v1.2.3 From e9368a056dff94815b3c43a0da78e7c1e5b3d4f4 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 30 Oct 2025 13:12:06 -0700 Subject: [SHT_LLVM_BB_ADDR] Implement ELF and YAML support for Propeller CFG data in PGO analysis map. (#164914) This PR implements the ELF support for PostLink CFG in PGO analysis map as discussed in [RFC](https://discourse.llvm.org/t/rfc-extending-the-pgo-analysis-map-with-propeller-cfg-frequencies/88617/2). A later PR will implement the Codegen Support. --- llvm/include/llvm/Object/ELFTypes.h | 42 ++++++---- llvm/include/llvm/ObjectYAML/ELFYAML.h | 4 +- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 2 +- llvm/lib/Object/ELF.cpp | 26 +++++-- llvm/lib/ObjectYAML/ELFEmitter.cpp | 18 ++++- llvm/lib/ObjectYAML/ELFYAML.cpp | 4 +- .../ELF/bb-addr-map-feature-warning.test | 37 +++++++++ .../ELF/bb-addr-map-pgo-analysis-map.test | 17 +++-- .../obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml | 49 +++++++----- .../yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml | 25 +++--- llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml | 4 +- llvm/tools/llvm-readobj/ELFDumper.cpp | 4 + llvm/tools/obj2yaml/elf2yaml.cpp | 11 ++- llvm/unittests/Object/ELFObjectFileTest.cpp | 89 ++++++++++++---------- llvm/unittests/Object/ELFTypesTest.cpp | 38 ++++----- 15 files changed, 242 insertions(+), 128 deletions(-) create mode 100644 llvm/test/tools/llvm-readobj/ELF/bb-addr-map-feature-warning.test diff --git a/llvm/include/llvm/Object/ELFTypes.h b/llvm/include/llvm/Object/ELFTypes.h index e9a417d3d4fb..467ab6fd3c1e 100644 --- a/llvm/include/llvm/Object/ELFTypes.h +++ b/llvm/include/llvm/Object/ELFTypes.h @@ -834,30 +834,32 @@ struct BBAddrMap { bool OmitBBEntries : 1; bool CallsiteEndOffsets : 1; bool BBHash : 1; + bool PostLinkCfg : 1; bool hasPGOAnalysis() const { return FuncEntryCount || BBFreq || BrProb; } bool hasPGOAnalysisBBData() const { return BBFreq || BrProb; } // Encodes to minimum bit width representation. - uint8_t encode() const { - return (static_cast(FuncEntryCount) << 0) | - (static_cast(BBFreq) << 1) | - (static_cast(BrProb) << 2) | - (static_cast(MultiBBRange) << 3) | - (static_cast(OmitBBEntries) << 4) | - (static_cast(CallsiteEndOffsets) << 5) | - (static_cast(BBHash) << 6); + uint16_t encode() const { + return (static_cast(FuncEntryCount) << 0) | + (static_cast(BBFreq) << 1) | + (static_cast(BrProb) << 2) | + (static_cast(MultiBBRange) << 3) | + (static_cast(OmitBBEntries) << 4) | + (static_cast(CallsiteEndOffsets) << 5) | + (static_cast(BBHash) << 6) | + (static_cast(PostLinkCfg) << 7); } // Decodes from minimum bit width representation and validates no // unnecessary bits are used. - static Expected decode(uint8_t Val) { + static Expected decode(uint16_t Val) { Features Feat{ static_cast(Val & (1 << 0)), static_cast(Val & (1 << 1)), static_cast(Val & (1 << 2)), static_cast(Val & (1 << 3)), static_cast(Val & (1 << 4)), static_cast(Val & (1 << 5)), - static_cast(Val & (1 << 6))}; + static_cast(Val & (1 << 6)), static_cast(Val & (1 << 7))}; if (Feat.encode() != Val) return createStringError( std::error_code(), "invalid encoding for BBAddrMap::Features: 0x%x", @@ -867,10 +869,11 @@ struct BBAddrMap { bool operator==(const Features &Other) const { return std::tie(FuncEntryCount, BBFreq, BrProb, MultiBBRange, - OmitBBEntries, CallsiteEndOffsets, BBHash) == + OmitBBEntries, CallsiteEndOffsets, BBHash, PostLinkCfg) == std::tie(Other.FuncEntryCount, Other.BBFreq, Other.BrProb, Other.MultiBBRange, Other.OmitBBEntries, - Other.CallsiteEndOffsets, Other.BBHash); + Other.CallsiteEndOffsets, Other.BBHash, + Other.PostLinkCfg); } }; @@ -1010,23 +1013,30 @@ struct PGOAnalysisMap { /// probability associated with it. struct SuccessorEntry { /// Unique ID of this successor basic block. - uint32_t ID; + uint32_t ID = 0; /// Branch Probability of the edge to this successor taken from MBPI. BranchProbability Prob; + /// Raw edge count from the post link profile (e.g., from bolt or + /// propeller). + uint64_t PostLinkFreq = 0; bool operator==(const SuccessorEntry &Other) const { - return std::tie(ID, Prob) == std::tie(Other.ID, Other.Prob); + return std::tie(ID, Prob, PostLinkFreq) == + std::tie(Other.ID, Other.Prob, Other.PostLinkFreq); } }; /// Block frequency taken from MBFI BlockFrequency BlockFreq; + /// Raw block count taken from the post link profile (e.g., from bolt or + /// propeller). + uint64_t PostLinkBlockFreq = 0; /// List of successors of the current block llvm::SmallVector Successors; bool operator==(const PGOBBEntry &Other) const { - return std::tie(BlockFreq, Successors) == - std::tie(Other.BlockFreq, Other.Successors); + return std::tie(BlockFreq, PostLinkBlockFreq, Successors) == + std::tie(Other.BlockFreq, PostLinkBlockFreq, Other.Successors); } }; diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h index a7c7c7c436dc..a8236ca37b5e 100644 --- a/llvm/include/llvm/ObjectYAML/ELFYAML.h +++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h @@ -166,7 +166,7 @@ struct BBAddrMapEntry { std::optional Hash; }; uint8_t Version; - llvm::yaml::Hex8 Feature; + llvm::yaml::Hex16 Feature; struct BBRangeEntry { llvm::yaml::Hex64 BaseAddress; @@ -203,8 +203,10 @@ struct PGOAnalysisMapEntry { struct SuccessorEntry { uint32_t ID; llvm::yaml::Hex32 BrProb; + std::optional PostLinkBrFreq; }; std::optional BBFreq; + std::optional PostLinkBBFreq; std::optional> Successors; }; std::optional FuncEntryCount; diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 8aa488f0efd8..f65d88a669f1 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1443,7 +1443,7 @@ getBBAddrMapFeature(const MachineFunction &MF, int NumMBBSectionRanges, MF.hasBBSections() && NumMBBSectionRanges > 1, // Use static_cast to avoid breakage of tests on windows. static_cast(BBAddrMapSkipEmitBBEntries), HasCalls, - static_cast(EmitBBHash)}; + static_cast(EmitBBHash), false}; } void AsmPrinter::emitBBAddrMapSection(const MachineFunction &MF) { diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index 6da97f9b3755..354c51d66419 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -831,17 +831,17 @@ decodeBBAddrMapImpl(const ELFFile &EF, }; uint8_t Version = 0; - uint8_t Feature = 0; + uint16_t Feature = 0; BBAddrMap::Features FeatEnable{}; while (!ULEBSizeErr && !MetadataDecodeErr && Cur && Cur.tell() < Content.size()) { Version = Data.getU8(Cur); if (!Cur) break; - if (Version < 2 || Version > 4) + if (Version < 2 || Version > 5) return createError("unsupported SHT_LLVM_BB_ADDR_MAP version: " + Twine(static_cast(Version))); - Feature = Data.getU8(Cur); // Feature byte + Feature = Version < 5 ? Data.getU8(Cur) : Data.getU16(Cur); if (!Cur) break; auto FeatEnableOrErr = BBAddrMap::Features::decode(Feature); @@ -858,6 +858,11 @@ decodeBBAddrMapImpl(const ELFFile &EF, "basic block hash feature is enabled: version = " + Twine(static_cast(Version)) + " feature = " + Twine(static_cast(Feature))); + if (FeatEnable.PostLinkCfg && Version < 5) + return createError("version should be >= 5 for SHT_LLVM_BB_ADDR_MAP when " + "post link cfg feature is enabled: version = " + + Twine(static_cast(Version)) + + " feature = " + Twine(static_cast(Feature))); uint32_t NumBlocksInBBRange = 0; uint32_t NumBBRanges = 1; typename ELFFile::uintX_t RangeBaseAddress = 0; @@ -946,6 +951,10 @@ decodeBBAddrMapImpl(const ELFFile &EF, uint64_t BBF = FeatEnable.BBFreq ? readULEB128As(Data, Cur, ULEBSizeErr) : 0; + uint32_t PostLinkBBFreq = + FeatEnable.PostLinkCfg + ? readULEB128As(Data, Cur, ULEBSizeErr) + : 0; // Branch probability llvm::SmallVector @@ -955,13 +964,20 @@ decodeBBAddrMapImpl(const ELFFile &EF, for (uint64_t I = 0; I < SuccCount; ++I) { uint32_t BBID = readULEB128As(Data, Cur, ULEBSizeErr); uint32_t BrProb = readULEB128As(Data, Cur, ULEBSizeErr); + uint32_t PostLinkFreq = + FeatEnable.PostLinkCfg + ? readULEB128As(Data, Cur, ULEBSizeErr) + : 0; + if (PGOAnalyses) - Successors.push_back({BBID, BranchProbability::getRaw(BrProb)}); + Successors.push_back( + {BBID, BranchProbability::getRaw(BrProb), PostLinkFreq}); } } if (PGOAnalyses) - PGOBBEntries.push_back({BlockFrequency(BBF), std::move(Successors)}); + PGOBBEntries.push_back( + {BlockFrequency(BBF), PostLinkBBFreq, std::move(Successors)}); } if (PGOAnalyses) diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp index 8b75fbe8291f..8530785d07c9 100644 --- a/llvm/lib/ObjectYAML/ELFEmitter.cpp +++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp @@ -1465,13 +1465,19 @@ void ELFState::writeSectionContent( for (const auto &[Idx, E] : llvm::enumerate(*Section.Entries)) { // Write version and feature values. if (Section.Type == llvm::ELF::SHT_LLVM_BB_ADDR_MAP) { - if (E.Version > 4) + if (E.Version > 5) WithColor::warning() << "unsupported SHT_LLVM_BB_ADDR_MAP version: " << static_cast(E.Version) << "; encoding using the most recent version"; CBA.write(E.Version); - CBA.write(E.Feature); - SHeader.sh_size += 2; + SHeader.sh_size += 1; + if (E.Version < 5) { + CBA.write(static_cast(E.Feature)); + SHeader.sh_size += 1; + } else { + CBA.write(E.Feature, ELFT::Endianness); + SHeader.sh_size += 2; + } } auto FeatureOrErr = llvm::object::BBAddrMap::Features::decode(E.Feature); bool MultiBBRangeFeatureEnabled = false; @@ -1556,11 +1562,15 @@ void ELFState::writeSectionContent( for (const auto &PGOBBE : PGOBBEntries) { if (PGOBBE.BBFreq) SHeader.sh_size += CBA.writeULEB128(*PGOBBE.BBFreq); + if (FeatureOrErr->PostLinkCfg || PGOBBE.PostLinkBBFreq.has_value()) + SHeader.sh_size += CBA.writeULEB128(PGOBBE.PostLinkBBFreq.value_or(0)); if (PGOBBE.Successors) { SHeader.sh_size += CBA.writeULEB128(PGOBBE.Successors->size()); - for (const auto &[ID, BrProb] : *PGOBBE.Successors) { + for (const auto &[ID, BrProb, PostLinkBrFreq] : *PGOBBE.Successors) { SHeader.sh_size += CBA.writeULEB128(ID); SHeader.sh_size += CBA.writeULEB128(BrProb); + if (FeatureOrErr->PostLinkCfg || PostLinkBrFreq.has_value()) + SHeader.sh_size += CBA.writeULEB128(PostLinkBrFreq.value_or(0)); } } } diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp index f8a84b075b77..e5e5fc20728e 100644 --- a/llvm/lib/ObjectYAML/ELFYAML.cpp +++ b/llvm/lib/ObjectYAML/ELFYAML.cpp @@ -1886,7 +1886,7 @@ void MappingTraits::mapping( IO &IO, ELFYAML::BBAddrMapEntry &E) { assert(IO.getContext() && "The IO context is not initialized"); IO.mapRequired("Version", E.Version); - IO.mapOptional("Feature", E.Feature, Hex8(0)); + IO.mapOptional("Feature", E.Feature, Hex16(0)); IO.mapOptional("NumBBRanges", E.NumBBRanges); IO.mapOptional("BBRanges", E.BBRanges); } @@ -1920,6 +1920,7 @@ void MappingTraits::mapping( IO &IO, ELFYAML::PGOAnalysisMapEntry::PGOBBEntry &E) { assert(IO.getContext() && "The IO context is not initialized"); IO.mapOptional("BBFreq", E.BBFreq); + IO.mapOptional("PostLinkBBFreq", E.PostLinkBBFreq); IO.mapOptional("Successors", E.Successors); } @@ -1929,6 +1930,7 @@ void MappingTraits:: assert(IO.getContext() && "The IO context is not initialized"); IO.mapRequired("ID", E.ID); IO.mapRequired("BrProb", E.BrProb); + IO.mapOptional("PostLinkBrFreq", E.PostLinkBrFreq); } void MappingTraits::mapping(IO &IO, diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-feature-warning.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-feature-warning.test new file mode 100644 index 000000000000..24726c34d350 --- /dev/null +++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-feature-warning.test @@ -0,0 +1,37 @@ +## This test checks that we output a warning when the specified version is too old to support the given features. + +# RUN: yaml2obj %s -o %t +# RUN: llvm-readobj --bb-addr-map %t 2>&1 | FileCheck -DFILE=%t %s + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + +# CHECK: BBAddrMap [ +# CHECK-NEXT: warning: '[[FILE]]': unable to dump SHT_LLVM_BB_ADDR_MAP section with index 1: version should be >= 3 for SHT_LLVM_BB_ADDR_MAP when callsite offsets feature is enabled: version = 2 feature = 32 +Sections: + - Name: '.llvm_bb_addr_map (1)' + Type: SHT_LLVM_BB_ADDR_MAP + Entries: + - Version: 2 + Feature: 0x20 + +# CHECK: BBAddrMap [ +# CHECK-NEXT: warning: '[[FILE]]': unable to dump SHT_LLVM_BB_ADDR_MAP section with index 2: version should be >= 4 for SHT_LLVM_BB_ADDR_MAP when basic block hash feature is enabled: version = 3 feature = 64 + + - Name: '.llvm_bb_addr_map (2)' + Type: SHT_LLVM_BB_ADDR_MAP + Entries: + - Version: 3 + Feature: 0x40 + +# CHECK: BBAddrMap [ +# CHECK-NEXT: warning: '[[FILE]]': unable to dump SHT_LLVM_BB_ADDR_MAP section with index 3: version should be >= 5 for SHT_LLVM_BB_ADDR_MAP when post link cfg feature is enabled: version = 4 feature = 128 + + - Name: '.llvm_bb_addr_map (3)' + Type: SHT_LLVM_BB_ADDR_MAP + Entries: + - Version: 4 + Feature: 0x80 diff --git a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test index 5faafd4d83b2..8e9d2271b872 100644 --- a/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test +++ b/llvm/test/tools/llvm-readobj/ELF/bb-addr-map-pgo-analysis-map.test @@ -15,7 +15,7 @@ ## Check that a malformed section can be handled. # RUN: yaml2obj %s -DBITS=32 -DSIZE=24 -o %t2.o -# RUN: llvm-readobj %t2.o --bb-addr-map 2>&1 | FileCheck --match-full-lines %s -DOFFSET=0x00000018 -DFILE=%t2.o --check-prefix=TRUNCATED +# RUN: llvm-readobj %t2.o --bb-addr-map 2>&1 | FileCheck --match-full-lines %s -DOFFSET=0x00000015 -DFILE=%t2.o --check-prefix=TRUNCATED ## Check that missing features can be handled. # RUN: yaml2obj %s -DBITS=32 -DFEATURE=0x2 -o %t3.o @@ -59,17 +59,20 @@ # CHECK-NEXT: { # RAW-NEXT: Frequency: 100 # PRETTY-NEXT: Frequency: 1.0 +# CHECK-NEXT: PostLink Frequency: 10 # CHECK-NEXT: Successors [ # CHECK-NEXT: { # CHECK-NEXT: ID: 2 # RAW-NEXT: Probability: 0x80000000 # PRETTY-NEXT: Probability: 0x80000000 / 0x80000000 = 100.00% +# CHECK-NEXT: PostLink Probability: 7 # CHECK-NEXT: } # CHECK-NEXT: ] # CHECK-NEXT: } # CHECK-NEXT: { # RAW-NEXT: Frequency: 100 # PRETTY-NEXT: Frequency: 1.0 +# CHECK-NEXT: PostLink Frequency: 0 # CHECK-NEXT: Successors [ # CHECK-NEXT: ] # CHECK-NEXT: } @@ -172,8 +175,8 @@ Sections: ShSize: [[SIZE=]] Link: .text Entries: - - Version: 2 - Feature: 0x7 + - Version: 5 + Feature: 0x87 BBRanges: - BaseAddress: [[ADDR=0x11111]] BBEntries: @@ -197,10 +200,12 @@ Sections: PGOAnalyses: - FuncEntryCount: 100 PGOBBEntries: - - BBFreq: 100 + - BBFreq: 100 + PostLinkBBFreq: 10 Successors: - - ID: 2 - BrProb: 0x80000000 + - ID: 2 + BrProb: 0x80000000 + PostLinkBrFreq: 7 - BBFreq: 100 Successors: [] - FuncEntryCount: 8888 diff --git a/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml b/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml index 299bf463cf4b..645507af080c 100644 --- a/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml +++ b/llvm/test/tools/obj2yaml/ELF/bb-addr-map-pgo-analysis-map.yaml @@ -15,7 +15,7 @@ # VALID-NEXT: Type: SHT_LLVM_BB_ADDR_MAP # VALID-NEXT: Entries: # VALID-NEXT: - Version: 2 -# VALID-NEXT: Feature: 0x7 +# VALID-NEXT: Feature: 0x87 ## The 'BaseAddress' field is omitted when it's zero. # VALID-NEXT: BBRanges: # VALID-NEXT: - BBEntries: @@ -43,17 +43,23 @@ # VALID-NEXT: PGOAnalyses: # VALID-NEXT: - FuncEntryCount: 100 # VALID-NEXT: PGOBBEntries: -# VALID-NEXT: - BBFreq: 100 +# VALID-NEXT: - BBFreq: 100 +# VALID-NEXT: PostLinkBBFreq: 10 # VALID-NEXT: Successors: -# VALID-NEXT: - ID: 2 -# VALID-NEXT: BrProb: 0x80000000 -# VALID-NEXT: - ID: 4 -# VALID-NEXT: BrProb: 0x80000000 -# VALID-NEXT: - BBFreq: 50 +# VALID-NEXT: - ID: 2 +# VALID-NEXT: BrProb: 0x80000000 +# VALID-NEXT: PostLinkBrFreq: 7 +# VALID-NEXT: - ID: 4 +# VALID-NEXT: BrProb: 0x80000000 +# VALID-NEXT: PostLinkBrFreq: 0 +# VALID-NEXT: - BBFreq: 50 +# VALID-NEXT: PostLinkBBFreq: 0 # VALID-NEXT: Successors: -# VALID-NEXT: - ID: 4 -# VALID-NEXT: BrProb: 0xFFFFFFFF -# VALID-NEXT: - BBFreq: 100 +# VALID-NEXT: - ID: 4 +# VALID-NEXT: BrProb: 0xFFFFFFFF +# VALID-NEXT: PostLinkBrFreq: 0 +# VALID-NEXT: - BBFreq: 100 +# VALID-NEXT: PostLinkBBFreq: 3 # VALID-NEXT: Successors: [] # VALID-NEXT: PGOBBEntries: # VALID-NEXT: - BBFreq: 20 @@ -69,7 +75,7 @@ Sections: ShSize: [[SIZE=]] Entries: - Version: 2 - Feature: 0x7 + Feature: 0x87 BBRanges: - BaseAddress: 0x0 BBEntries: @@ -97,17 +103,20 @@ Sections: PGOAnalyses: - FuncEntryCount: 100 PGOBBEntries: - - BBFreq: 100 + - BBFreq: 100 + PostLinkBBFreq: 10 Successors: - - ID: 2 - BrProb: 0x80000000 - - ID: 4 - BrProb: 0x80000000 - - BBFreq: 50 + - ID: 2 + BrProb: 0x80000000 + PostLinkBrFreq: 7 + - ID: 4 + BrProb: 0x80000000 + - BBFreq: 50 Successors: - - ID: 4 - BrProb: 0xFFFFFFFF - - BBFreq: 100 + - ID: 4 + BrProb: 0xFFFFFFFF + - BBFreq: 100 + PostLinkBBFreq: 3 Successors: [] - PGOBBEntries: - BBFreq: 20 diff --git a/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml b/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml index a4cb572e6d99..ac9c8d402b0a 100644 --- a/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml +++ b/llvm/test/tools/yaml2obj/ELF/bb-addr-map-pgo-analysis-map.yaml @@ -6,8 +6,9 @@ # Case 4: Specify Entries. # CHECK: Name: .llvm_bb_addr_map (1) # CHECK: SectionData ( -# CHECK-NEXT: 0000: 02072000 00000000 0000010B 010203E8 -# CHECK-NEXT: 0010: 07E80702 0CEEDDBB F70E0D91 A2C48801 +# CHECK-NEXT: 0000: 02872000 00000000 0000010B 010203E8 +# CHECK-NEXT: 0010: 07E80764 020CEEDD BBF70E28 0D91A2C4 +# CHECK-NEXT: 0020: 880100 # CHECK-NEXT: ) # Case 7: Not including a field which is enabled in feature doesn't emit value @@ -26,12 +27,12 @@ Sections: ## Test the following cases: ## 1) We can produce an .llvm_bb_addr_map section from a description with -## Entries and PGO Analysis data. +## Entries and PGO Analysis and Post Link data. - Name: '.llvm_bb_addr_map (1)' Type: SHT_LLVM_BB_ADDR_MAP Entries: - Version: 2 - Feature: 0x7 + Feature: 0x87 BBRanges: - BaseAddress: 0x0000000000000020 BBEntries: @@ -42,12 +43,14 @@ Sections: PGOAnalyses: - FuncEntryCount: 1000 PGOBBEntries: - - BBFreq: 1000 + - BBFreq: 1000 + PostLinkBBFreq: 100 Successors: - - ID: 12 - BrProb: 0xeeeeeeee - - ID: 13 - BrProb: 0x11111111 + - ID: 12 + BrProb: 0xeeeeeeee + PostLinkBrFreq: 40 + - ID: 13 + BrProb: 0x11111111 ## 2) According to feature we have FuncEntryCount but none is provided in yaml - Name: '.llvm_bb_addr_map (2)' @@ -66,7 +69,7 @@ Sections: ## Check that yaml2obj generates a warning when we use unsupported feature. # RUN: yaml2obj --docnum=2 %s 2>&1 | FileCheck %s --check-prefix=INVALID-FEATURE -# INVALID-FEATURE: warning: invalid encoding for BBAddrMap::Features: 0xf0 +# INVALID-FEATURE: warning: invalid encoding for BBAddrMap::Features: 0x100 --- !ELF FileHeader: @@ -79,4 +82,4 @@ Sections: Entries: - Version: 2 ## Specify unsupported feature - Feature: 0xF0 + Feature: 0x100 diff --git a/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml b/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml index 339e419b3945..05d77d67e446 100644 --- a/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml +++ b/llvm/test/tools/yaml2obj/ELF/bb-addr-map.yaml @@ -220,7 +220,7 @@ Sections: ## Check that yaml2obj generates a warning when we use unsupported versions. # RUN: yaml2obj --docnum=3 %s 2>&1 | FileCheck %s --check-prefix=INVALID-VERSION -# INVALID-VERSION: warning: unsupported SHT_LLVM_BB_ADDR_MAP version: 5; encoding using the most recent version +# INVALID-VERSION: warning: unsupported SHT_LLVM_BB_ADDR_MAP version: 6; encoding using the most recent version --- !ELF FileHeader: @@ -232,4 +232,4 @@ Sections: Type: SHT_LLVM_BB_ADDR_MAP Entries: ## Specify unsupported version - - Version: 5 + - Version: 6 diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 423a11fd5b72..6f09da5a4099 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -8188,6 +8188,8 @@ void LLVMELFDumper::printBBAddrMaps(bool PrettyPGOAnalysis) { } else { W.printNumber("Frequency", PBBE.BlockFreq.getFrequency()); } + if (PAM.FeatEnable.PostLinkCfg) + W.printNumber("PostLink Frequency", PBBE.PostLinkBlockFreq); } if (PAM.FeatEnable.BrProb) { @@ -8200,6 +8202,8 @@ void LLVMELFDumper::printBBAddrMaps(bool PrettyPGOAnalysis) { } else { W.printHex("Probability", Succ.Prob.getNumerator()); } + if (PAM.FeatEnable.PostLinkCfg) + W.printNumber("PostLink Probability", Succ.PostLinkFreq); } } } diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp index 68e18f6c7920..4364d15a8b45 100644 --- a/llvm/tools/obj2yaml/elf2yaml.cpp +++ b/llvm/tools/obj2yaml/elf2yaml.cpp @@ -895,7 +895,7 @@ ELFDumper::dumpBBAddrMapSection(const Elf_Shdr *Shdr) { std::vector PGOAnalyses; DataExtractor::Cursor Cur(0); uint8_t Version = 0; - uint8_t Feature = 0; + uint16_t Feature = 0; uint64_t Address = 0; while (Cur && Cur.tell() < Content.size()) { if (Shdr->sh_type == ELF::SHT_LLVM_BB_ADDR_MAP) { @@ -905,7 +905,7 @@ ELFDumper::dumpBBAddrMapSection(const Elf_Shdr *Shdr) { errc::invalid_argument, "invalid SHT_LLVM_BB_ADDR_MAP section version: " + Twine(static_cast(Version))); - Feature = Data.getU8(Cur); + Feature = Version < 5 ? Data.getU8(Cur) : Data.getU16(Cur); } uint64_t NumBBRanges = 1; uint64_t NumBlocks = 0; @@ -972,6 +972,8 @@ ELFDumper::dumpBBAddrMapSection(const Elf_Shdr *Shdr) { auto &PGOBBEntry = PGOBBEntries.emplace_back(); if (FeatureOrErr->BBFreq) { PGOBBEntry.BBFreq = Data.getULEB128(Cur); + if (FeatureOrErr->PostLinkCfg) + PGOBBEntry.PostLinkBBFreq = Data.getULEB128(Cur); if (!Cur) break; } @@ -982,7 +984,10 @@ ELFDumper::dumpBBAddrMapSection(const Elf_Shdr *Shdr) { for (uint64_t SuccIdx = 0; Cur && SuccIdx < SuccCount; ++SuccIdx) { uint32_t ID = Data.getULEB128(Cur); uint32_t BrProb = Data.getULEB128(Cur); - SuccEntries.push_back({ID, BrProb}); + std::optional PostLinkBrFreq; + if (FeatureOrErr->PostLinkCfg) + PostLinkBrFreq = Data.getULEB128(Cur); + SuccEntries.push_back({ID, BrProb, PostLinkBrFreq}); } } } diff --git a/llvm/unittests/Object/ELFObjectFileTest.cpp b/llvm/unittests/Object/ELFObjectFileTest.cpp index d6a3ca53b215..1e2955ae40a6 100644 --- a/llvm/unittests/Object/ELFObjectFileTest.cpp +++ b/llvm/unittests/Object/ELFObjectFileTest.cpp @@ -531,7 +531,7 @@ Sections: // Check that we can detect unsupported versions. SmallString<128> UnsupportedVersionYamlString(CommonYamlString); UnsupportedVersionYamlString += R"( - - Version: 5 + - Version: 6 BBRanges: - BaseAddress: 0x11111 BBEntries: @@ -543,7 +543,7 @@ Sections: { SCOPED_TRACE("unsupported version"); DoCheck(UnsupportedVersionYamlString, - "unsupported SHT_LLVM_BB_ADDR_MAP version: 5"); + "unsupported SHT_LLVM_BB_ADDR_MAP version: 6"); } SmallString<128> ZeroBBRangesYamlString(CommonYamlString); @@ -1181,8 +1181,8 @@ Sections: Type: SHT_LLVM_BB_ADDR_MAP # Link: 0 (by default, can be overriden) Entries: - - Version: 2 - Feature: 0x7 + - Version: 5 + Feature: 0x87 BBRanges: - BaseAddress: 0x44444 BBEntries: @@ -1205,7 +1205,8 @@ Sections: PGOAnalyses: - FuncEntryCount: 1000 PGOBBEntries: - - BBFreq: 1000 + - BBFreq: 1000 + PostLinkBBFreq: 50 Successors: - ID: 1 BrProb: 0x22222222 @@ -1243,8 +1244,8 @@ Sections: Type: SHT_LLVM_BB_ADDR_MAP # Link: 0 (by default, can be overriden) Entries: - - Version: 2 - Feature: 0xc + - Version: 5 + Feature: 0x8c BBRanges: - BaseAddress: 0x66666 BBEntries: @@ -1265,8 +1266,9 @@ Sections: PGOAnalyses: - PGOBBEntries: - Successors: - - ID: 1 - BrProb: 0x22222222 + - ID: 1 + BrProb: 0x22222222 + PostLinkBrFreq: 7 - ID: 2 BrProb: 0xcccccccc - Successors: @@ -1278,59 +1280,66 @@ Sections: BBAddrMap E1 = { {{0x11111, {{1, 0x0, 0x1, {false, true, false, false, false}, {}, 0}}}}}; PGOAnalysisMap P1 = { - 892, {}, {true, false, false, false, false, false, false}}; + 892, {}, {true, false, false, false, false, false, false, false}}; BBAddrMap E2 = { {{0x22222, {{2, 0x0, 0x2, {false, false, true, false, false}, {}, 0}}}}}; PGOAnalysisMap P2 = {{}, - {{BlockFrequency(343), {}}}, - {false, true, false, false, false, false, false}}; + {{BlockFrequency(343), 0, {}}}, + {false, true, false, false, false, false, false, false}}; BBAddrMap E3 = { {{0x33333, {{0, 0x0, 0x3, {false, true, true, false, false}, {}, 0}, {1, 0x3, 0x3, {false, false, true, false, false}, {}, 0}, {2, 0x6, 0x3, {false, false, false, false, false}, {}, 0}}}}}; - PGOAnalysisMap P3 = {{}, - {{{}, - {{1, BranchProbability::getRaw(0x1111'1111)}, - {2, BranchProbability::getRaw(0xeeee'eeee)}}}, - {{}, {{2, BranchProbability::getRaw(0xffff'ffff)}}}, - {{}, {}}}, - {false, false, true, false, false, false, false}}; + PGOAnalysisMap P3 = { + {}, + {{{}, + 0, + {{1, BranchProbability::getRaw(0x1111'1111), 0}, + {2, BranchProbability::getRaw(0xeeee'eeee), 0}}}, + {{}, 0, {{2, BranchProbability::getRaw(0xffff'ffff), 0}}}, + {{}, 0, {}}}, + {false, false, true, false, false, false, false, false}}; BBAddrMap E4 = { {{0x44444, {{0, 0x0, 0x4, {false, false, false, true, true}, {}, 0}, {1, 0x4, 0x4, {false, false, false, false, false}, {}, 0}, {2, 0x8, 0x4, {false, false, false, false, false}, {}, 0}, {3, 0xc, 0x4, {false, false, false, false, false}, {}, 0}}}}}; - PGOAnalysisMap P4 = { - 1000, - {{BlockFrequency(1000), - {{1, BranchProbability::getRaw(0x2222'2222)}, - {2, BranchProbability::getRaw(0x3333'3333)}, - {3, BranchProbability::getRaw(0xaaaa'aaaa)}}}, - {BlockFrequency(133), - {{2, BranchProbability::getRaw(0x1111'1111)}, - {3, BranchProbability::getRaw(0xeeee'eeee)}}}, - {BlockFrequency(18), {{3, BranchProbability::getRaw(0xffff'ffff)}}}, - {BlockFrequency(1000), {}}}, - {true, true, true, false, false, false, false}}; + PGOAnalysisMap P4 = {1000, + {{BlockFrequency(1000), + 50, + {{1, BranchProbability::getRaw(0x2222'2222), 0}, + {2, BranchProbability::getRaw(0x3333'3333), 0}, + {3, BranchProbability::getRaw(0xaaaa'aaaa), 0}}}, + {BlockFrequency(133), + 0, + {{2, BranchProbability::getRaw(0x1111'1111), 0}, + {3, BranchProbability::getRaw(0xeeee'eeee), 0}}}, + {BlockFrequency(18), + 0, + {{3, BranchProbability::getRaw(0xffff'ffff), 0}}}, + {BlockFrequency(1000), 0, {}}}, + {true, true, true, false, false, false, false, true}}; BBAddrMap E5 = { {{0x55555, {{2, 0x0, 0x2, {false, false, true, false, false}, {}, 0}}}}}; PGOAnalysisMap P5 = { - {}, {}, {false, false, false, false, false, false, false}}; + {}, {}, {false, false, false, false, false, false, false, false}}; BBAddrMap E6 = { {{0x66666, {{0, 0x0, 0x6, {false, true, true, false, false}, {}, 0}, {1, 0x6, 0x6, {false, false, true, false, false}, {}, 0}}}, {0x666661, {{2, 0x0, 0x6, {false, false, false, false, false}, {}, 0}}}}}; - PGOAnalysisMap P6 = {{}, - {{{}, - {{1, BranchProbability::getRaw(0x2222'2222)}, - {2, BranchProbability::getRaw(0xcccc'cccc)}}}, - {{}, {{2, BranchProbability::getRaw(0x8888'8888)}}}, - {{}, {}}}, - {false, false, true, true, false, false, false}}; + PGOAnalysisMap P6 = { + {}, + {{{}, + 0, + {{1, BranchProbability::getRaw(0x2222'2222), 7}, + {2, BranchProbability::getRaw(0xcccc'cccc), 0}}}, + {{}, 0, {{2, BranchProbability::getRaw(0x8888'8888), 0}}}, + {{}, 0, {}}}, + {false, false, true, true, false, false, false, true}}; std::vector Section0BBAddrMaps = {E4, E5, E6}; std::vector Section1BBAddrMaps = {E3}; @@ -1465,7 +1474,7 @@ Sections: DoCheckFails( TruncatedYamlString, /*TextSectionIndex=*/std::nullopt, "unable to read SHT_LLVM_BB_ADDR_MAP section with index 6: " - "unexpected end of data at offset 0xa while reading [0x3, 0xb)"); + "unexpected end of data at offset 0xa while reading [0x4, 0xc)"); // Check that we can read the other section's bb-address-maps which are // valid. DoCheckSucceeds(TruncatedYamlString, /*TextSectionIndex=*/2, diff --git a/llvm/unittests/Object/ELFTypesTest.cpp b/llvm/unittests/Object/ELFTypesTest.cpp index 1765e1500396..9e99b4a6d7bf 100644 --- a/llvm/unittests/Object/ELFTypesTest.cpp +++ b/llvm/unittests/Object/ELFTypesTest.cpp @@ -101,22 +101,24 @@ static_assert( "PGOAnalysisMap should use the same type for basic block ID as BBAddrMap"); TEST(ELFTypesTest, BBAddrMapFeaturesEncodingTest) { - const std::array Decoded = { - {{false, false, false, false, false, false, false}, - {true, false, false, false, false, false, false}, - {false, true, false, false, false, false, false}, - {false, false, true, false, false, false, false}, - {false, false, false, true, false, false, false}, - {true, true, false, false, false, false, false}, - {false, true, true, false, false, false, false}, - {false, true, true, true, false, false, false}, - {true, true, true, true, false, false, false}, - {false, false, false, false, true, false, false}, - {false, false, false, false, false, true, false}, - {false, false, false, false, false, false, true}}}; - const std::array Encoded = { + const std::array Decoded = { + {{false, false, false, false, false, false, false, false}, + {true, false, false, false, false, false, false, false}, + {false, true, false, false, false, false, false, false}, + {false, false, true, false, false, false, false, false}, + {false, false, false, true, false, false, false, false}, + {true, true, false, false, false, false, false, false}, + {false, true, true, false, false, false, false, false}, + {false, true, true, true, false, false, false, false}, + {true, true, true, true, false, false, false, false}, + {false, false, false, false, true, false, false, false}, + {false, false, false, false, false, true, false, false}, + {false, false, false, false, false, false, true, false}, + {false, false, false, false, false, false, false, true}, + {false, false, false, false, false, false, true, true}}}; + const std::array Encoded = { {0b0000, 0b0001, 0b0010, 0b0100, 0b1000, 0b0011, 0b0110, 0b1110, 0b1111, - 0b1'0000, 0b10'0000, 0b100'0000}}; + 0b1'0000, 0b10'0000, 0b100'0000, 0b1000'0000, 0b1100'0000}}; for (const auto &[Feat, EncodedVal] : llvm::zip(Decoded, Encoded)) EXPECT_EQ(Feat.encode(), EncodedVal); for (const auto &[Feat, EncodedVal] : llvm::zip(Decoded, Encoded)) { @@ -129,9 +131,9 @@ TEST(ELFTypesTest, BBAddrMapFeaturesEncodingTest) { TEST(ELFTypesTest, BBAddrMapFeaturesInvalidEncodingTest) { const std::array Errors = { - "invalid encoding for BBAddrMap::Features: 0x80", - "invalid encoding for BBAddrMap::Features: 0xf0"}; - const std::array Values = {{0b1000'0000, 0b1111'0000}}; + "invalid encoding for BBAddrMap::Features: 0x100", + "invalid encoding for BBAddrMap::Features: 0x1000"}; + const std::array Values = {{0b1'0000'0000, 0b1'0000'0000'0000}}; for (const auto &[Val, Error] : llvm::zip(Values, Errors)) { EXPECT_THAT_ERROR(BBAddrMap::Features::decode(Val).takeError(), FailedWithMessage(Error)); -- cgit v1.2.3 From 5b760562dd0d0c33c0cb3632487d14a1837aef17 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 30 Oct 2025 20:15:53 +0000 Subject: [X86] narrowBitOpRMW - add tests showing failure to fold to BTC/BTR/BTS RMW patterns (#165758) Failure to fold if the store's chain doesn't directly touch the RMW load source (we should be using reachesChainWithoutSideEffects to avoid this). Failure to fold if the stored value has additional uses (we could update other uses of the whole stored value to reload after the new narrow store) --- llvm/test/CodeGen/X86/bittest-big-integer.ll | 288 +++++++++++++++++++++++++++ 1 file changed, 288 insertions(+) diff --git a/llvm/test/CodeGen/X86/bittest-big-integer.ll b/llvm/test/CodeGen/X86/bittest-big-integer.ll index cc3dcf32ac0e..06e7d4773c58 100644 --- a/llvm/test/CodeGen/X86/bittest-big-integer.ll +++ b/llvm/test/CodeGen/X86/bittest-big-integer.ll @@ -1676,3 +1676,291 @@ define i1 @test_ne_i4096(ptr %word, i32 %position) nounwind { %cmp = icmp ne i4096 %test, 0 ret i1 %cmp } + +; Special Cases + +; Multiple uses of the stored value +define i1 @complement_cmpz_i128(ptr %word, i32 %position) nounwind { +; X86-LABEL: complement_cmpz_i128: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $64, %esp +; X86-NEXT: movzbl 12(%ebp), %ecx +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $1, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $12, %al +; X86-NEXT: negb %al +; X86-NEXT: movsbl %al, %esi +; X86-NEXT: movl 36(%esp,%esi), %eax +; X86-NEXT: movl 40(%esp,%esi), %edi +; X86-NEXT: movl %edi, %edx +; X86-NEXT: shldl %cl, %eax, %edx +; X86-NEXT: movl 32(%esp,%esi), %ebx +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 44(%esp,%esi), %esi +; X86-NEXT: shldl %cl, %edi, %esi +; X86-NEXT: movl %ebx, %edi +; X86-NEXT: shll %cl, %edi +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: shldl %cl, %ebx, %eax +; X86-NEXT: movl 8(%ebp), %ecx +; X86-NEXT: xorl 12(%ecx), %esi +; X86-NEXT: xorl 8(%ecx), %edx +; X86-NEXT: xorl 4(%ecx), %eax +; X86-NEXT: xorl (%ecx), %edi +; X86-NEXT: movl %edx, 8(%ecx) +; X86-NEXT: movl %esi, 12(%ecx) +; X86-NEXT: movl %edi, (%ecx) +; X86-NEXT: movl %eax, 4(%ecx) +; X86-NEXT: orl %esi, %eax +; X86-NEXT: orl %edx, %edi +; X86-NEXT: orl %eax, %edi +; X86-NEXT: setne %al +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; SSE-LABEL: complement_cmpz_i128: +; SSE: # %bb.0: +; SSE-NEXT: movl %esi, %ecx +; SSE-NEXT: movl $1, %eax +; SSE-NEXT: xorl %edx, %edx +; SSE-NEXT: shldq %cl, %rax, %rdx +; SSE-NEXT: shlq %cl, %rax +; SSE-NEXT: xorl %esi, %esi +; SSE-NEXT: testb $64, %cl +; SSE-NEXT: cmovneq %rax, %rdx +; SSE-NEXT: cmovneq %rsi, %rax +; SSE-NEXT: xorq 8(%rdi), %rdx +; SSE-NEXT: xorq (%rdi), %rax +; SSE-NEXT: movq %rax, (%rdi) +; SSE-NEXT: movq %rdx, 8(%rdi) +; SSE-NEXT: orq %rdx, %rax +; SSE-NEXT: setne %al +; SSE-NEXT: retq +; +; AVX2-LABEL: complement_cmpz_i128: +; AVX2: # %bb.0: +; AVX2-NEXT: movl %esi, %ecx +; AVX2-NEXT: movl $1, %eax +; AVX2-NEXT: xorl %edx, %edx +; AVX2-NEXT: shldq %cl, %rax, %rdx +; AVX2-NEXT: xorl %esi, %esi +; AVX2-NEXT: shlxq %rcx, %rax, %rax +; AVX2-NEXT: testb $64, %cl +; AVX2-NEXT: cmovneq %rax, %rdx +; AVX2-NEXT: cmovneq %rsi, %rax +; AVX2-NEXT: xorq 8(%rdi), %rdx +; AVX2-NEXT: xorq (%rdi), %rax +; AVX2-NEXT: movq %rax, (%rdi) +; AVX2-NEXT: movq %rdx, 8(%rdi) +; AVX2-NEXT: orq %rdx, %rax +; AVX2-NEXT: setne %al +; AVX2-NEXT: retq +; +; AVX512-LABEL: complement_cmpz_i128: +; AVX512: # %bb.0: +; AVX512-NEXT: movl %esi, %ecx +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: movl $1, %edx +; AVX512-NEXT: xorl %esi, %esi +; AVX512-NEXT: shldq %cl, %rdx, %rsi +; AVX512-NEXT: shlxq %rcx, %rdx, %rdx +; AVX512-NEXT: testb $64, %cl +; AVX512-NEXT: cmovneq %rdx, %rsi +; AVX512-NEXT: cmovneq %rax, %rdx +; AVX512-NEXT: xorq 8(%rdi), %rsi +; AVX512-NEXT: xorq (%rdi), %rdx +; AVX512-NEXT: movq %rdx, (%rdi) +; AVX512-NEXT: movq %rsi, 8(%rdi) +; AVX512-NEXT: orq %rsi, %rdx +; AVX512-NEXT: setne %al +; AVX512-NEXT: retq + %rem = and i32 %position, 127 + %ofs = zext nneg i32 %rem to i128 + %bit = shl nuw i128 1, %ofs + %ld = load i128, ptr %word + %res = xor i128 %ld, %bit + store i128 %res, ptr %word + %cmp = icmp ne i128 %res, 0 + ret i1 %cmp +} + +; Multiple loads in store chain +define i32 @reset_multiload_i128(ptr %word, i32 %position, ptr %p) nounwind { +; X86-LABEL: reset_multiload_i128: +; X86: # %bb.0: +; X86-NEXT: pushl %ebp +; X86-NEXT: movl %esp, %ebp +; X86-NEXT: pushl %ebx +; X86-NEXT: pushl %edi +; X86-NEXT: pushl %esi +; X86-NEXT: andl $-16, %esp +; X86-NEXT: subl $64, %esp +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $1, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl $0, {{[0-9]+}}(%esp) +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $12, %al +; X86-NEXT: negb %al +; X86-NEXT: movsbl %al, %edi +; X86-NEXT: movl 36(%esp,%edi), %edx +; X86-NEXT: movl 40(%esp,%edi), %ebx +; X86-NEXT: movl %ebx, %esi +; X86-NEXT: shldl %cl, %edx, %esi +; X86-NEXT: movl 32(%esp,%edi), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 44(%esp,%edi), %edi +; X86-NEXT: shldl %cl, %ebx, %edi +; X86-NEXT: movl %eax, %ebx +; X86-NEXT: # kill: def $cl killed $cl killed $ecx +; X86-NEXT: shll %cl, %ebx +; X86-NEXT: notl %ebx +; X86-NEXT: movl 16(%ebp), %eax +; X86-NEXT: movl (%eax), %eax +; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: movl 12(%ebp), %eax +; X86-NEXT: andl $96, %eax +; X86-NEXT: shrl $3, %eax +; X86-NEXT: movl 8(%ebp), %ecx +; X86-NEXT: movl (%ecx,%eax), %eax +; X86-NEXT: andl %ebx, (%ecx) +; X86-NEXT: movl 12(%ebp), %ecx +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; X86-NEXT: shldl %cl, %ebx, %edx +; X86-NEXT: notl %edx +; X86-NEXT: movl 8(%ebp), %ebx +; X86-NEXT: andl %edx, 4(%ebx) +; X86-NEXT: notl %esi +; X86-NEXT: andl %esi, 8(%ebx) +; X86-NEXT: notl %edi +; X86-NEXT: andl %edi, 12(%ebx) +; X86-NEXT: btl %ecx, %eax +; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; X86-NEXT: jae .LBB22_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: .LBB22_2: +; X86-NEXT: leal -12(%ebp), %esp +; X86-NEXT: popl %esi +; X86-NEXT: popl %edi +; X86-NEXT: popl %ebx +; X86-NEXT: popl %ebp +; X86-NEXT: retl +; +; SSE-LABEL: reset_multiload_i128: +; SSE: # %bb.0: +; SSE-NEXT: movl %esi, %ecx +; SSE-NEXT: movl $1, %esi +; SSE-NEXT: xorl %r8d, %r8d +; SSE-NEXT: shldq %cl, %rsi, %r8 +; SSE-NEXT: xorl %eax, %eax +; SSE-NEXT: shlq %cl, %rsi +; SSE-NEXT: testb $64, %cl +; SSE-NEXT: cmovneq %rsi, %r8 +; SSE-NEXT: cmovneq %rax, %rsi +; SSE-NEXT: notq %r8 +; SSE-NEXT: notq %rsi +; SSE-NEXT: movl %ecx, %r9d +; SSE-NEXT: andl $96, %r9d +; SSE-NEXT: shrl $3, %r9d +; SSE-NEXT: movl (%rdi,%r9), %r9d +; SSE-NEXT: btl %ecx, %r9d +; SSE-NEXT: jb .LBB22_2 +; SSE-NEXT: # %bb.1: +; SSE-NEXT: movl (%rdx), %eax +; SSE-NEXT: .LBB22_2: +; SSE-NEXT: andq %r8, 8(%rdi) +; SSE-NEXT: andq %rsi, (%rdi) +; SSE-NEXT: # kill: def $eax killed $eax killed $rax +; SSE-NEXT: retq +; +; AVX2-LABEL: reset_multiload_i128: +; AVX2: # %bb.0: +; AVX2-NEXT: movl %esi, %ecx +; AVX2-NEXT: xorl %eax, %eax +; AVX2-NEXT: movl $1, %r8d +; AVX2-NEXT: xorl %esi, %esi +; AVX2-NEXT: shldq %cl, %r8, %rsi +; AVX2-NEXT: shlxq %rcx, %r8, %r8 +; AVX2-NEXT: testb $64, %cl +; AVX2-NEXT: cmovneq %r8, %rsi +; AVX2-NEXT: cmovneq %rax, %r8 +; AVX2-NEXT: notq %rsi +; AVX2-NEXT: notq %r8 +; AVX2-NEXT: movl %ecx, %r9d +; AVX2-NEXT: andl $96, %r9d +; AVX2-NEXT: shrl $3, %r9d +; AVX2-NEXT: movl (%rdi,%r9), %r9d +; AVX2-NEXT: btl %ecx, %r9d +; AVX2-NEXT: jb .LBB22_2 +; AVX2-NEXT: # %bb.1: +; AVX2-NEXT: movl (%rdx), %eax +; AVX2-NEXT: .LBB22_2: +; AVX2-NEXT: andq %rsi, 8(%rdi) +; AVX2-NEXT: andq %r8, (%rdi) +; AVX2-NEXT: # kill: def $eax killed $eax killed $rax +; AVX2-NEXT: retq +; +; AVX512-LABEL: reset_multiload_i128: +; AVX512: # %bb.0: +; AVX512-NEXT: movl %esi, %ecx +; AVX512-NEXT: movl $1, %r8d +; AVX512-NEXT: xorl %esi, %esi +; AVX512-NEXT: shldq %cl, %r8, %rsi +; AVX512-NEXT: xorl %eax, %eax +; AVX512-NEXT: shlxq %rcx, %r8, %r8 +; AVX512-NEXT: testb $64, %cl +; AVX512-NEXT: cmovneq %r8, %rsi +; AVX512-NEXT: cmovneq %rax, %r8 +; AVX512-NEXT: notq %rsi +; AVX512-NEXT: notq %r8 +; AVX512-NEXT: movl %ecx, %r9d +; AVX512-NEXT: andl $96, %r9d +; AVX512-NEXT: shrl $3, %r9d +; AVX512-NEXT: movl (%rdi,%r9), %r9d +; AVX512-NEXT: btl %ecx, %r9d +; AVX512-NEXT: jb .LBB22_2 +; AVX512-NEXT: # %bb.1: +; AVX512-NEXT: movl (%rdx), %eax +; AVX512-NEXT: .LBB22_2: +; AVX512-NEXT: andq %rsi, 8(%rdi) +; AVX512-NEXT: andq %r8, (%rdi) +; AVX512-NEXT: # kill: def $eax killed $eax killed $rax +; AVX512-NEXT: retq + %rem = and i32 %position, 127 + %ofs = zext nneg i32 %rem to i128 + %bit = shl nuw i128 1, %ofs + %mask = xor i128 %bit, -1 + %ld = load i128, ptr %word + %sel = load i32, ptr %p + %test = and i128 %ld, %bit + %res = and i128 %ld, %mask + %cmp = icmp eq i128 %test, 0 + store i128 %res, ptr %word + %ret = select i1 %cmp, i32 %sel, i32 0 + ret i32 %ret +} -- cgit v1.2.3 From 87616939190b1c0d322f0f3c1d69ba3626d18582 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 30 Oct 2025 13:19:44 -0700 Subject: [TSan] Make Test work with Internal Shell This test was using subshells to setup LD_LIBRARY_PATH properly. Use a python script and readfile substitutions to preserve the same behavior. Reviewers: vitalybuka, fmayer, thurstond Reviewed By: thurstond Pull Request: https://github.com/llvm/llvm-project/pull/165147 --- compiler-rt/test/tsan/ignore_lib0.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/compiler-rt/test/tsan/ignore_lib0.cpp b/compiler-rt/test/tsan/ignore_lib0.cpp index cba58c617703..1673e8df6c50 100644 --- a/compiler-rt/test/tsan/ignore_lib0.cpp +++ b/compiler-rt/test/tsan/ignore_lib0.cpp @@ -4,11 +4,13 @@ // RUN: %clangxx_tsan -O1 -fno-builtin %s -DLIB -fPIC -fno-sanitize=thread -shared -o %t-dir/libignore_lib0.so // RUN: %clangxx_tsan -O1 %s -L%t-dir -lignore_lib0 %link_libcxx_tsan -o %t // RUN: echo running w/o suppressions: -// RUN: env LD_LIBRARY_PATH=%t-dir${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} %deflake %run %t | FileCheck %s --check-prefix=CHECK-NOSUPP +// RUN: echo -n %t-dir > %t.ld_library_path +// RUN: python -c "if 'LD_LIBRARY_PATH' in __import__('os').environ: print(':' + __import__('os').environ['LD_LIBRARY_PATH'], end='')" >> %t.ld_library_path +// RUN: env LD_LIBRARY_PATH=%{readfile:%t.ld_library_path} %deflake %run %t | FileCheck %s --check-prefix=CHECK-NOSUPP // RUN: echo running with suppressions: -// RUN: env LD_LIBRARY_PATH=%t-dir${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} %env_tsan_opts=suppressions='%s.supp' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP +// RUN: env LD_LIBRARY_PATH=%{readfile:%t.ld_library_path} %env_tsan_opts=suppressions='%s.supp' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP // RUN: echo running with generic suppression of noninstrumented code: -// RUN: env LD_LIBRARY_PATH=%t-dir${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} %env_tsan_opts=ignore_noninstrumented_modules=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP +// RUN: env LD_LIBRARY_PATH=%{readfile:%t.ld_library_path} %env_tsan_opts=ignore_noninstrumented_modules=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP // Tests that interceptors coming from a library specified in called_from_lib // suppression are ignored. -- cgit v1.2.3 From 855a3e974dfaa317d65d9e66c3cadf76d470da2b Mon Sep 17 00:00:00 2001 From: Thomas Applencourt Date: Thu, 30 Oct 2025 15:26:13 -0500 Subject: [libclang/python] Add isFunctionInlined support (#162882) `cindex.py` was missing support for [isFunctionInlined](https://clang.llvm.org/doxygen/group__CINDEX__TYPES.html#ga963097b9aecabf5dce7554dff18b061d), this PR add it. --------- Co-authored-by: Vlad Serebrennikov --- clang/bindings/python/clang/cindex.py | 8 ++++++++ clang/bindings/python/tests/cindex/test_cursor.py | 15 +++++++++++++++ clang/docs/ReleaseNotes.rst | 1 + 3 files changed, 24 insertions(+) diff --git a/clang/bindings/python/clang/cindex.py b/clang/bindings/python/clang/cindex.py index 2786add27f5e..c48bc9c2eb7d 100644 --- a/clang/bindings/python/clang/cindex.py +++ b/clang/bindings/python/clang/cindex.py @@ -2362,6 +2362,13 @@ class Cursor(Structure): """ return conf.lib.clang_getFieldDeclBitWidth(self) # type: ignore [no-any-return] + @cursor_null_guard + def is_function_inlined(self) -> bool: + """ + Check if the function is inlined. + """ + return bool(conf.lib.clang_Cursor_isFunctionInlined(self)) + @cursor_null_guard def has_attrs(self) -> bool: """ @@ -4310,6 +4317,7 @@ FUNCTION_LIST: list[LibFunc] = [ ("clang_Cursor_isAnonymous", [Cursor], bool), ("clang_Cursor_isAnonymousRecordDecl", [Cursor], bool), ("clang_Cursor_isBitField", [Cursor], bool), + ("clang_Cursor_isFunctionInlined", [Cursor], c_uint), ("clang_Location_isInSystemHeader", [SourceLocation], bool), ("clang_PrintingPolicy_dispose", [PrintingPolicy]), ("clang_PrintingPolicy_getProperty", [PrintingPolicy, c_int], c_uint), diff --git a/clang/bindings/python/tests/cindex/test_cursor.py b/clang/bindings/python/tests/cindex/test_cursor.py index eb0d1d50601a..7cb616a7ef14 100644 --- a/clang/bindings/python/tests/cindex/test_cursor.py +++ b/clang/bindings/python/tests/cindex/test_cursor.py @@ -784,6 +784,21 @@ int count(int a, int b){ cursor = get_cursor(tu, "reg") self.assertEqual(cursor.storage_class, StorageClass.REGISTER) + def test_function_inlined(self): + tu = get_tu( + """ +inline void f_inline(void); +void f_noninline(void); +int d_noninline; +""" + ) + cursor = get_cursor(tu, "f_inline") + self.assertEqual(cursor.is_function_inlined(), True) + cursor = get_cursor(tu, "f_noninline") + self.assertEqual(cursor.is_function_inlined(), False) + cursor = get_cursor(tu, "d_noninline") + self.assertEqual(cursor.is_function_inlined(), False) + def test_availability(self): tu = get_tu("class A { A(A const&) = delete; };", lang="cpp") diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index ba737b9efb00..73aaaad8b32e 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -659,6 +659,7 @@ Sanitizers Python Binding Changes ---------------------- +- Exposed ``clang_Cursor_isFunctionInlined``. - Exposed ``clang_getCursorLanguage`` via ``Cursor.language``. - Add all missing ``CursorKind``s, ``TypeKind``s and ``ExceptionSpecificationKind``s from ``Index.h`` -- cgit v1.2.3 From 39f08eb997424626bd396a0529daf4ab816d19e6 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 30 Oct 2025 20:56:13 +0000 Subject: Revert "[TSan] Make Test work with Internal Shell" This reverts commit 87616939190b1c0d322f0f3c1d69ba3626d18582. This broke a buildbot. Reverting so I can ensure I'm comitting with the proper fix given this didn't reporoduce locally on my Linux box. https://lab.llvm.org/buildbot/#/builders/174/builds/26760 --- compiler-rt/test/tsan/ignore_lib0.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/compiler-rt/test/tsan/ignore_lib0.cpp b/compiler-rt/test/tsan/ignore_lib0.cpp index 1673e8df6c50..cba58c617703 100644 --- a/compiler-rt/test/tsan/ignore_lib0.cpp +++ b/compiler-rt/test/tsan/ignore_lib0.cpp @@ -4,13 +4,11 @@ // RUN: %clangxx_tsan -O1 -fno-builtin %s -DLIB -fPIC -fno-sanitize=thread -shared -o %t-dir/libignore_lib0.so // RUN: %clangxx_tsan -O1 %s -L%t-dir -lignore_lib0 %link_libcxx_tsan -o %t // RUN: echo running w/o suppressions: -// RUN: echo -n %t-dir > %t.ld_library_path -// RUN: python -c "if 'LD_LIBRARY_PATH' in __import__('os').environ: print(':' + __import__('os').environ['LD_LIBRARY_PATH'], end='')" >> %t.ld_library_path -// RUN: env LD_LIBRARY_PATH=%{readfile:%t.ld_library_path} %deflake %run %t | FileCheck %s --check-prefix=CHECK-NOSUPP +// RUN: env LD_LIBRARY_PATH=%t-dir${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} %deflake %run %t | FileCheck %s --check-prefix=CHECK-NOSUPP // RUN: echo running with suppressions: -// RUN: env LD_LIBRARY_PATH=%{readfile:%t.ld_library_path} %env_tsan_opts=suppressions='%s.supp' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP +// RUN: env LD_LIBRARY_PATH=%t-dir${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} %env_tsan_opts=suppressions='%s.supp' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP // RUN: echo running with generic suppression of noninstrumented code: -// RUN: env LD_LIBRARY_PATH=%{readfile:%t.ld_library_path} %env_tsan_opts=ignore_noninstrumented_modules=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP +// RUN: env LD_LIBRARY_PATH=%t-dir${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} %env_tsan_opts=ignore_noninstrumented_modules=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP // Tests that interceptors coming from a library specified in called_from_lib // suppression are ignored. -- cgit v1.2.3 From 3475b3f97b3060880c31c8b8a440f47f9d654e49 Mon Sep 17 00:00:00 2001 From: Greg Clayton Date: Thu, 30 Oct 2025 14:08:44 -0700 Subject: Enable LLDB to load large dSYM files. (#164471) llvm-dsymutil can produce mach-o files where some sections in __DWARF exceed the 4GB barrier and subsequent sections in the dSYM will be inaccessible because the mach-o section_64 structure only has a 32 bit file offset. This patch enables LLDB to load a large dSYM file by figuring out when this happens and properly adjusting the file offset of the LLDB sections. I was unable to add a test as obj2yaml and yaml2obj are broken for mach-o files and they can't convert a yaml file back into a valid mach-o object file. Any suggestions for adding a test would be appreciated. --- .../Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp | 30 +++++++++++++++------ .../MachO/Inputs/section-overflow-binary | Bin 0 -> 344 bytes .../ObjectFile/MachO/section-overflow-binary.test | 13 +++++++++ 3 files changed, 35 insertions(+), 8 deletions(-) create mode 100644 lldb/test/Shell/ObjectFile/MachO/Inputs/section-overflow-binary create mode 100644 lldb/test/Shell/ObjectFile/MachO/section-overflow-binary.test diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 9cdb8467bfc6..c8e520d687f6 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -1674,6 +1674,10 @@ void ObjectFileMachO::ProcessSegmentCommand( uint32_t segment_sect_idx; const lldb::user_id_t first_segment_sectID = context.NextSectionIdx + 1; + // 64 bit mach-o files have sections with 32 bit file offsets. If any section + // data end will exceed UINT32_MAX, then we need to do some bookkeeping to + // ensure we can access this data correctly. + uint64_t section_offset_adjust = 0; const uint32_t num_u32s = load_cmd.cmd == LC_SEGMENT ? 7 : 8; for (segment_sect_idx = 0; segment_sect_idx < load_cmd.nsects; ++segment_sect_idx) { @@ -1697,6 +1701,16 @@ void ObjectFileMachO::ProcessSegmentCommand( // isn't stored in the abstracted Sections. m_mach_sections.push_back(sect64); + // Make sure we can load sections in mach-o files where some sections cross + // a 4GB boundary. llvm::MachO::section_64 have only 32 bit file offsets + // for the file offset of the section contents, so we need to track and + // sections that overflow and adjust the offsets accordingly. + const uint64_t section_file_offset = + (uint64_t)sect64.offset + section_offset_adjust; + const uint64_t end_section_offset = (uint64_t)sect64.offset + sect64.size; + if (end_section_offset >= UINT32_MAX) + section_offset_adjust += end_section_offset & 0xFFFFFFFF00000000ull; + if (add_section) { ConstString section_name( sect64.sectname, strnlen(sect64.sectname, sizeof(sect64.sectname))); @@ -1736,13 +1750,13 @@ void ObjectFileMachO::ProcessSegmentCommand( } // Grow the section size as needed. - if (sect64.offset) { + if (section_file_offset) { const lldb::addr_t segment_min_file_offset = segment->GetFileOffset(); const lldb::addr_t segment_max_file_offset = segment_min_file_offset + segment->GetFileSize(); - const lldb::addr_t section_min_file_offset = sect64.offset; + const lldb::addr_t section_min_file_offset = section_file_offset; const lldb::addr_t section_max_file_offset = section_min_file_offset + sect64.size; const lldb::addr_t new_file_offset = @@ -1769,10 +1783,10 @@ void ObjectFileMachO::ProcessSegmentCommand( // other sections. sect64.addr, // File VM address == addresses as they are // found in the object file - sect64.size, // VM size in bytes of this section - sect64.offset, // Offset to the data for this section in + sect64.size, // VM size in bytes of this section + section_file_offset, // Offset to the data for this section in // the file - sect64.offset ? sect64.size : 0, // Size in bytes of + section_file_offset ? sect64.size : 0, // Size in bytes of // this section as // found in the file sect64.align, @@ -1792,14 +1806,14 @@ void ObjectFileMachO::ProcessSegmentCommand( SectionSP section_sp(new Section( segment_sp, module_sp, this, ++context.NextSectionIdx, section_name, sect_type, sect64.addr - segment_sp->GetFileAddress(), sect64.size, - sect64.offset, sect64.offset == 0 ? 0 : sect64.size, sect64.align, - sect64.flags)); + section_file_offset, section_file_offset == 0 ? 0 : sect64.size, + sect64.align, sect64.flags)); // Set the section to be encrypted to match the segment bool section_is_encrypted = false; if (!segment_is_encrypted && load_cmd.filesize != 0) section_is_encrypted = context.EncryptedRanges.FindEntryThatContains( - sect64.offset) != nullptr; + section_file_offset) != nullptr; section_sp->SetIsEncrypted(segment_is_encrypted || section_is_encrypted); section_sp->SetPermissions(segment_permissions); diff --git a/lldb/test/Shell/ObjectFile/MachO/Inputs/section-overflow-binary b/lldb/test/Shell/ObjectFile/MachO/Inputs/section-overflow-binary new file mode 100644 index 000000000000..19dc2f4ac9ff Binary files /dev/null and b/lldb/test/Shell/ObjectFile/MachO/Inputs/section-overflow-binary differ diff --git a/lldb/test/Shell/ObjectFile/MachO/section-overflow-binary.test b/lldb/test/Shell/ObjectFile/MachO/section-overflow-binary.test new file mode 100644 index 000000000000..76c335f65a76 --- /dev/null +++ b/lldb/test/Shell/ObjectFile/MachO/section-overflow-binary.test @@ -0,0 +1,13 @@ +RUN: %lldb -b %p/Inputs/section-overflow-binary \ +RUN: -o 'script dwarf = lldb.target.module[0].sections[0]' \ +RUN: -o 'script section = dwarf.GetSubSectionAtIndex(0)' \ +RUN: -o "script print(f'{section.GetName()} file_offset=0x{section.GetFileOffset():016x}')" \ +RUN: -o 'script section = dwarf.GetSubSectionAtIndex(1)' \ +RUN: -o "script print(f'{section.GetName()} file_offset=0x{section.GetFileOffset():016x}')" \ +RUN: -o 'script section = dwarf.GetSubSectionAtIndex(2)' \ +RUN: -o "script print(f'{section.GetName()} file_offset=0x{section.GetFileOffset():016x}')" \ +RUN: | FileCheck %s + +CHECK: __debug_abbrev file_offset=0x00000000fffffff0 +CHECK: __debug_info file_offset=0x0000000100000010 +CHECK: __debug_line file_offset=0x0000000300000010 -- cgit v1.2.3 From 1079f7a66ad7bbaf845fb3bd97c00c6b17169d54 Mon Sep 17 00:00:00 2001 From: Raul Tambre Date: Thu, 30 Oct 2025 23:15:19 +0200 Subject: [NFCI][lldb][test] Add missing includes (#165772) `std::ref()` is provided in `` and with recent libc++ changes it no longer seems to be included transitively. Fix by including explicitly. --- lldb/test/Shell/Register/Inputs/x86-multithread-read.cpp | 1 + lldb/test/Shell/Register/Inputs/x86-multithread-write.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/lldb/test/Shell/Register/Inputs/x86-multithread-read.cpp b/lldb/test/Shell/Register/Inputs/x86-multithread-read.cpp index c5f571fc1d2c..0d2869c0c577 100644 --- a/lldb/test/Shell/Register/Inputs/x86-multithread-read.cpp +++ b/lldb/test/Shell/Register/Inputs/x86-multithread-read.cpp @@ -1,4 +1,5 @@ #include +#include #include #include diff --git a/lldb/test/Shell/Register/Inputs/x86-multithread-write.cpp b/lldb/test/Shell/Register/Inputs/x86-multithread-write.cpp index 320f9e938e5b..1f4e91acc4c0 100644 --- a/lldb/test/Shell/Register/Inputs/x86-multithread-write.cpp +++ b/lldb/test/Shell/Register/Inputs/x86-multithread-write.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 51c6a04496348fcdb8c68b02547a413a7ab44941 Mon Sep 17 00:00:00 2001 From: Raul Tambre Date: Thu, 30 Oct 2025 23:15:36 +0200 Subject: [NFCI][lldb] Omit redundant member initializer list (#164451) These all have member initializers of the same value so they're redundant. Fixes: 47b9aadb3215e914119d0c45827ea58cb7499204 --- lldb/include/lldb/Target/Process.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h index 8f5892e16ced..c1f9785e76f9 100644 --- a/lldb/include/lldb/Target/Process.h +++ b/lldb/include/lldb/Target/Process.h @@ -127,10 +127,7 @@ class ProcessAttachInfo : public ProcessInstanceInfo { public: ProcessAttachInfo() = default; - ProcessAttachInfo(const ProcessLaunchInfo &launch_info) - : m_resume_count(0), m_wait_for_launch(false), m_ignore_existing(true), - m_continue_once_attached(false), m_detach_on_error(true), - m_async(false) { + ProcessAttachInfo(const ProcessLaunchInfo &launch_info) { ProcessInfo::operator=(launch_info); SetProcessPluginName(launch_info.GetProcessPluginName()); SetResumeCount(launch_info.GetResumeCount()); -- cgit v1.2.3 From 4174429c591e0eebadcb2bbf77ca4b5a8a5baa83 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Thu, 30 Oct 2025 14:17:34 -0700 Subject: [GitHub] Move Copilot instructions for LLVM (#165763) GitHub allows specifying custom instructions for the GitHub Copilot reviewer [1]. Currently, we have a top level file, but GitHub supports having different instructions for different files, which requires creating an `instructions` subdirectory with multiple files and a patch it applies to. This PR moves the top level file into a new `instructions` directory, and make it apply to the `llvm/` subdirectory. I spoke with Mircea at the Dev Meeting and that should match his original intent. [1] https://docs.github.com/en/copilot/how-tos/use-copilot-agents/request-a-code-review/use-code-review#customizing-copilots-reviews-with-custom-instructions --- .github/copilot-instructions.md | 4 ---- .github/instructions/llvm.instructions.md | 8 ++++++++ 2 files changed, 8 insertions(+), 4 deletions(-) delete mode 100644 .github/copilot-instructions.md create mode 100644 .github/instructions/llvm.instructions.md diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md deleted file mode 100644 index 03748938700e..000000000000 --- a/.github/copilot-instructions.md +++ /dev/null @@ -1,4 +0,0 @@ -When performing a code review, pay close attention to code modifying a function's -control flow. Could the change result in the corruption of performance profile -data? Could the change result in invalid debug information, in particular for -branches and calls? diff --git a/.github/instructions/llvm.instructions.md b/.github/instructions/llvm.instructions.md new file mode 100644 index 000000000000..3f1308f51e67 --- /dev/null +++ b/.github/instructions/llvm.instructions.md @@ -0,0 +1,8 @@ +--- +applyTo: llvm/**/* +--- + +When performing a code review, pay close attention to code modifying a function's +control flow. Could the change result in the corruption of performance profile +data? Could the change result in invalid debug information, in particular for +branches and calls? -- cgit v1.2.3 From 21041c92925faf529717d96279b6d8f20b17c70c Mon Sep 17 00:00:00 2001 From: Raul Tambre Date: Thu, 30 Oct 2025 23:18:32 +0200 Subject: [NFCI][lldb][test] Fix mismatched C/C++ substitutions (#165773) Most of the cases were where a C++ file was being compiled with the C substitution. There were a few cases of the opposite though. LLDB seems to be the only real culprit in the LLVM codebase for these mismatches. Rest of the LLVM presumably sticks at least language-specific options in the common substitutions making the mistakes immediately apparent. I found these by using Clang frontend configuration files containing language-specific options for both C and C++ (e.g. `-std=c2y` and `-std=c++26`). --- .../functionalities/breakpoint/same_cu_name/Makefile | 8 ++++---- lldb/test/Shell/Breakpoint/jit-loader_jitlink_elf.test | 4 ++-- lldb/test/Shell/Breakpoint/jit-loader_rtdyld_elf.test | 4 ++-- .../Shell/Commands/command-image-dump-ast-colored.test | 2 +- lldb/test/Shell/Commands/command-image-dump-ast.test | 2 +- lldb/test/Shell/Commands/list-header.test | 8 ++++---- lldb/test/Shell/Error/cleanup.cpp | 2 +- lldb/test/Shell/Expr/TestExprLanguageNote.test | 2 +- lldb/test/Shell/Expr/TestLambdaExprImport.test | 2 +- lldb/test/Shell/ObjectFile/ELF/elf-memory.test | 2 +- .../verbose_trap-in-stl-callback-user-leaf.test | 2 +- .../Shell/Recognizer/verbose_trap-in-stl-callback.test | 2 +- .../Recognizer/verbose_trap-in-stl-max-depth.test | 2 +- .../Shell/Recognizer/verbose_trap-in-stl-nested.test | 2 +- lldb/test/Shell/Recognizer/verbose_trap-in-stl.test | 2 +- lldb/test/Shell/Recognizer/verbose_trap.test | 8 ++++---- lldb/test/Shell/Settings/TestChildCountTruncation.test | 2 +- lldb/test/Shell/Settings/TestChildDepthTruncation.test | 2 +- lldb/test/Shell/Settings/TestCxxFrameFormat.test | 2 +- lldb/test/Shell/Settings/TestCxxFrameFormatEmpty.test | 2 +- .../Settings/TestCxxFrameFormatMixedLanguages.test | 12 ++++++------ .../Settings/TestCxxFrameFormatPartialFailure.test | 2 +- .../Settings/TestFrameFormatFunctionBasename.test | 4 ++-- .../TestFrameFormatFunctionFormattedArguments.test | 4 ++-- .../Settings/TestFrameFormatFunctionQualifiers.test | 4 ++-- .../Shell/Settings/TestFrameFormatFunctionReturn.test | 4 ++-- .../Shell/Settings/TestFrameFormatFunctionScope.test | 4 ++-- .../Shell/Settings/TestFrameFormatFunctionSuffix.test | 2 +- .../TestFrameFormatFunctionTemplateArguments.test | 4 ++-- lldb/test/Shell/Settings/TestFrameFunctionInlined.test | 2 +- .../DWARF/split-dwarf-expression-eval-bug.cpp | 8 ++++---- .../Shell/SymbolFile/DWARF/x86/apple-index-is-used.cpp | 2 +- .../SymbolFile/DWARF/x86/debug-names-compressed.cpp | 2 +- .../SymbolFile/DWARF/x86/debug-types-debug-names.cpp | 2 +- .../DWARF/x86/debug-types-dwo-cross-reference.cpp | 4 ++-- .../SymbolFile/DWARF/x86/dwarf5-index-is-used.cpp | 2 +- .../SymbolFile/DWARF/x86/dwarf5-partial-index.cpp | 4 ++-- .../SymbolFile/DWARF/x86/dwo-not-found-warning.cpp | 2 +- .../SymbolFile/DWARF/x86/dwp-foreign-type-units.cpp | 4 ++-- .../Shell/SymbolFile/DWARF/x86/dwp-index-cache.cpp | 8 ++++---- .../SymbolFile/DWARF/x86/dwp-separate-debug-file.cpp | 6 +++--- .../Shell/SymbolFile/DWARF/x86/find-basic-function.cpp | 6 +++--- .../SymbolFile/DWARF/x86/find-basic-namespace.cpp | 6 +++--- .../Shell/SymbolFile/DWARF/x86/find-basic-type.cpp | 6 +++--- .../Shell/SymbolFile/DWARF/x86/find-basic-variable.cpp | 6 +++--- .../Shell/SymbolFile/DWARF/x86/find-function-regex.cpp | 6 +++--- .../SymbolFile/DWARF/x86/find-method-local-struct.cpp | 2 +- lldb/test/Shell/SymbolFile/DWARF/x86/find-method.cpp | 6 +++--- .../SymbolFile/DWARF/x86/find-qualified-variable.cpp | 2 +- .../Shell/SymbolFile/DWARF/x86/find-variable-dwo.cpp | 4 ++-- .../Shell/SymbolFile/DWARF/x86/find-variable-file.cpp | 18 +++++++++--------- .../Shell/SymbolFile/DWARF/x86/member-pointers.cpp | 2 +- .../Shell/SymbolFile/DWARF/x86/module-ownership.mm | 2 +- .../DWARF/x86/no_unique_address-with-bitfields.cpp | 2 +- .../SymbolFile/DWARF/x86/type-definition-search.cpp | 12 ++++++------ .../SymbolFile/DWARF/x86/type-unit-same-basename.cpp | 4 ++-- 56 files changed, 116 insertions(+), 116 deletions(-) diff --git a/lldb/test/API/functionalities/breakpoint/same_cu_name/Makefile b/lldb/test/API/functionalities/breakpoint/same_cu_name/Makefile index b19e7818601e..b508da24c682 100644 --- a/lldb/test/API/functionalities/breakpoint/same_cu_name/Makefile +++ b/lldb/test/API/functionalities/breakpoint/same_cu_name/Makefile @@ -4,16 +4,16 @@ LD_EXTRAS := ns1.o ns2.o ns3.o ns4.o a.out: main.o ns1.o ns2.o ns3.o ns4.o ns1.o: common.cpp - $(CC) -gdwarf -c -DNAMESPACE=ns1 -o $@ $< + $(CXX) -gdwarf -c -DNAMESPACE=ns1 -o $@ $< ns2.o: common.cpp - $(CC) -gdwarf -c -DNAMESPACE=ns2 -o $@ $< + $(CXX) -gdwarf -c -DNAMESPACE=ns2 -o $@ $< ns3.o: common.cpp - $(CC) -gdwarf -c -DNAMESPACE=ns3 -o $@ $< + $(CXX) -gdwarf -c -DNAMESPACE=ns3 -o $@ $< ns4.o: common.cpp - $(CC) -gdwarf -c -DNAMESPACE=ns4 -o $@ $< + $(CXX) -gdwarf -c -DNAMESPACE=ns4 -o $@ $< include Makefile.rules diff --git a/lldb/test/Shell/Breakpoint/jit-loader_jitlink_elf.test b/lldb/test/Shell/Breakpoint/jit-loader_jitlink_elf.test index 52c86fa5530b..9a972f1f1ece 100644 --- a/lldb/test/Shell/Breakpoint/jit-loader_jitlink_elf.test +++ b/lldb/test/Shell/Breakpoint/jit-loader_jitlink_elf.test @@ -3,8 +3,8 @@ # JITLink is the Orc-specific JIT linker implementation. # -# RUN: %clang -g -S -emit-llvm -fPIC --target=x86_64-unknown-unknown-elf \ -# RUN: -o %t.ll %p/Inputs/jitbp.cpp +# RUN: %clangxx -g -S -emit-llvm -fPIC --target=x86_64-unknown-unknown-elf \ +# RUN: -o %t.ll %p/Inputs/jitbp.cpp # RUN: %lldb -b -o 'settings set plugin.jit-loader.gdb.enable on' -o 'b jitbp' \ # RUN: -o 'run --jit-linker=jitlink %t.ll' lli | FileCheck %s diff --git a/lldb/test/Shell/Breakpoint/jit-loader_rtdyld_elf.test b/lldb/test/Shell/Breakpoint/jit-loader_rtdyld_elf.test index b34a5673936f..ae9402a51949 100644 --- a/lldb/test/Shell/Breakpoint/jit-loader_rtdyld_elf.test +++ b/lldb/test/Shell/Breakpoint/jit-loader_rtdyld_elf.test @@ -3,8 +3,8 @@ # RuntimeDyld can be used to link and load emitted code for both, MCJIT and Orc. # -# RUN: %clang -g -S -emit-llvm --target=x86_64-unknown-unknown-elf \ -# RUN: -o %t.ll %p/Inputs/jitbp.cpp +# RUN: %clangxx -g -S -emit-llvm --target=x86_64-unknown-unknown-elf \ +# RUN: -o %t.ll %p/Inputs/jitbp.cpp # # RUN: %lldb -b -o 'settings set plugin.jit-loader.gdb.enable on' -o 'b jitbp' \ # RUN: -o 'run --jit-kind=mcjit %t.ll' lli | FileCheck %s diff --git a/lldb/test/Shell/Commands/command-image-dump-ast-colored.test b/lldb/test/Shell/Commands/command-image-dump-ast-colored.test index 355ef6bb1d19..7fd70d234fbd 100644 --- a/lldb/test/Shell/Commands/command-image-dump-ast-colored.test +++ b/lldb/test/Shell/Commands/command-image-dump-ast-colored.test @@ -1,7 +1,7 @@ # Test AST dumping with and without color. # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/Commands/command-image-dump-ast.test b/lldb/test/Shell/Commands/command-image-dump-ast.test index 3204022418cb..86fe1836a2c6 100644 --- a/lldb/test/Shell/Commands/command-image-dump-ast.test +++ b/lldb/test/Shell/Commands/command-image-dump-ast.test @@ -5,7 +5,7 @@ # UNSUPPORTED: system-windows # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/Commands/list-header.test b/lldb/test/Shell/Commands/list-header.test index 53c4b786f181..27eaa1a4f29c 100644 --- a/lldb/test/Shell/Commands/list-header.test +++ b/lldb/test/Shell/Commands/list-header.test @@ -3,11 +3,11 @@ # XFAIL: target-windows ## Test that `list header.h:` works correctly when header is available. -## +## # RUN: split-file %s %t -# RUN: %clang_host -g %t/main_with_inlined.cc %t/foo.cc -o %t/main_with_inlined.out -# RUN: %clang_host -g %t/main_no_inlined.cc %t/foo.cc -o %t/main_no_inlined.out +# RUN: %clangxx_host -g %t/main_with_inlined.cc %t/foo.cc -o %t/main_with_inlined.out +# RUN: %clangxx_host -g %t/main_no_inlined.cc %t/foo.cc -o %t/main_no_inlined.out # RUN: %lldb %t/main_with_inlined.out -o "list foo.h:2" -o "exit" 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-INLINED @@ -19,7 +19,7 @@ # CHECK-INLINED: 2 extern int* ptr; # CHECK-INLINED: 3 void f(int x); -# CHECK-INLINED: 4 +# CHECK-INLINED: 4 # CHECK-INLINED: 5 inline void g(int x) { # CHECK-INLINED: 6 *ptr = x; // should crash here # CHECK-INLINED: 7 } diff --git a/lldb/test/Shell/Error/cleanup.cpp b/lldb/test/Shell/Error/cleanup.cpp index 6abc62dc4af9..1e83478a8333 100644 --- a/lldb/test/Shell/Error/cleanup.cpp +++ b/lldb/test/Shell/Error/cleanup.cpp @@ -1,5 +1,5 @@ // Test CommandObject is cleaned up even after commands fail due to not taking any argument. -// RUN: %clang_host -g %s -o %t +// RUN: %clangxx_host -g %s -o %t // RUN: %lldb -f %t -o "settings set interpreter.stop-command-source-on-error false" -s \ // RUN: %S/Inputs/cleanup.lldbinit int main() { return 0; } diff --git a/lldb/test/Shell/Expr/TestExprLanguageNote.test b/lldb/test/Shell/Expr/TestExprLanguageNote.test index e8e4e1399e45..e7da30816319 100644 --- a/lldb/test/Shell/Expr/TestExprLanguageNote.test +++ b/lldb/test/Shell/Expr/TestExprLanguageNote.test @@ -1,5 +1,5 @@ # RUN: split-file %s %t -# RUN: %clang_host -g %t/main.cpp -o %t.out +# RUN: %clangxx_host -g %t/main.cpp -o %t.out # # RUN: %lldb -x -b -o "settings set interpreter.stop-command-source-on-error false" \ # RUN: -s %t/no-target.input 2>&1 | FileCheck %s --check-prefix=CHECK-NO-TARGET diff --git a/lldb/test/Shell/Expr/TestLambdaExprImport.test b/lldb/test/Shell/Expr/TestLambdaExprImport.test index c57ce06453fe..b49a38036e56 100644 --- a/lldb/test/Shell/Expr/TestLambdaExprImport.test +++ b/lldb/test/Shell/Expr/TestLambdaExprImport.test @@ -3,7 +3,7 @@ # uses always). # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -o "settings set interpreter.stop-command-source-on-error false" \ # RUN: -x -b -s %t/commands.input %t.out 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/ObjectFile/ELF/elf-memory.test b/lldb/test/Shell/ObjectFile/ELF/elf-memory.test index 75a68edd2d34..170dc7682aab 100644 --- a/lldb/test/Shell/ObjectFile/ELF/elf-memory.test +++ b/lldb/test/Shell/ObjectFile/ELF/elf-memory.test @@ -11,7 +11,7 @@ // - verify that "image dump objfile" will dump the dynamic section of the // memory elf file and find the .dynamic string table. -// RUN: %clang_host %p/Inputs/memory-elf.cpp -g -O0 -o %t +// RUN: %clangxx_host %p/Inputs/memory-elf.cpp -g -O0 -o %t // RUN: %lldb %t -b \ // RUN: -o "b main" \ diff --git a/lldb/test/Shell/Recognizer/verbose_trap-in-stl-callback-user-leaf.test b/lldb/test/Shell/Recognizer/verbose_trap-in-stl-callback-user-leaf.test index 5a84c163453c..32b4095d9add 100644 --- a/lldb/test/Shell/Recognizer/verbose_trap-in-stl-callback-user-leaf.test +++ b/lldb/test/Shell/Recognizer/verbose_trap-in-stl-callback-user-leaf.test @@ -12,7 +12,7 @@ # UNSUPPORTED: system-windows # -# RUN: %clang_host -g -O0 %S/Inputs/verbose_trap-in-stl-callback-user-leaf.cpp -o %t.out +# RUN: %clangxx_host -g -O0 %S/Inputs/verbose_trap-in-stl-callback-user-leaf.cpp -o %t.out # RUN: %lldb -b -s %s %t.out | FileCheck %s --check-prefixes=CHECK run diff --git a/lldb/test/Shell/Recognizer/verbose_trap-in-stl-callback.test b/lldb/test/Shell/Recognizer/verbose_trap-in-stl-callback.test index b15bcb3a384f..c8c433c0a819 100644 --- a/lldb/test/Shell/Recognizer/verbose_trap-in-stl-callback.test +++ b/lldb/test/Shell/Recognizer/verbose_trap-in-stl-callback.test @@ -11,7 +11,7 @@ # UNSUPPORTED: system-windows # -# RUN: %clang_host -g -O0 %S/Inputs/verbose_trap-in-stl-callback.cpp -o %t.out +# RUN: %clangxx_host -g -O0 %S/Inputs/verbose_trap-in-stl-callback.cpp -o %t.out # RUN: %lldb -b -s %s %t.out | FileCheck %s --check-prefixes=CHECK run diff --git a/lldb/test/Shell/Recognizer/verbose_trap-in-stl-max-depth.test b/lldb/test/Shell/Recognizer/verbose_trap-in-stl-max-depth.test index 2ea6594643c9..d0789ac7dc67 100644 --- a/lldb/test/Shell/Recognizer/verbose_trap-in-stl-max-depth.test +++ b/lldb/test/Shell/Recognizer/verbose_trap-in-stl-max-depth.test @@ -4,7 +4,7 @@ # UNSUPPORTED: system-windows # -# RUN: %clang_host -g -O0 %S/Inputs/verbose_trap-in-stl-max-depth.cpp -o %t.out +# RUN: %clangxx_host -g -O0 %S/Inputs/verbose_trap-in-stl-max-depth.cpp -o %t.out # RUN: %lldb -b -s %s %t.out | FileCheck %s --check-prefixes=CHECK run diff --git a/lldb/test/Shell/Recognizer/verbose_trap-in-stl-nested.test b/lldb/test/Shell/Recognizer/verbose_trap-in-stl-nested.test index 81a492d1ed57..68a4ea612c0d 100644 --- a/lldb/test/Shell/Recognizer/verbose_trap-in-stl-nested.test +++ b/lldb/test/Shell/Recognizer/verbose_trap-in-stl-nested.test @@ -3,7 +3,7 @@ # UNSUPPORTED: system-windows # -# RUN: %clang_host -g -O0 %S/Inputs/verbose_trap-in-stl-nested.cpp -o %t.out +# RUN: %clangxx_host -g -O0 %S/Inputs/verbose_trap-in-stl-nested.cpp -o %t.out # RUN: %lldb -b -s %s %t.out | FileCheck %s --check-prefixes=CHECK run diff --git a/lldb/test/Shell/Recognizer/verbose_trap-in-stl.test b/lldb/test/Shell/Recognizer/verbose_trap-in-stl.test index dd08290174e3..bd4851146b40 100644 --- a/lldb/test/Shell/Recognizer/verbose_trap-in-stl.test +++ b/lldb/test/Shell/Recognizer/verbose_trap-in-stl.test @@ -3,7 +3,7 @@ # UNSUPPORTED: system-windows # -# RUN: %clang_host -g -O0 %S/Inputs/verbose_trap-in-stl.cpp -o %t.out +# RUN: %clangxx_host -g -O0 %S/Inputs/verbose_trap-in-stl.cpp -o %t.out # RUN: %lldb -b -s %s %t.out | FileCheck %s --check-prefixes=CHECK run diff --git a/lldb/test/Shell/Recognizer/verbose_trap.test b/lldb/test/Shell/Recognizer/verbose_trap.test index dafab7bdea68..ab0df082cc03 100644 --- a/lldb/test/Shell/Recognizer/verbose_trap.test +++ b/lldb/test/Shell/Recognizer/verbose_trap.test @@ -1,15 +1,15 @@ # UNSUPPORTED: system-windows # -# RUN: %clang_host -g -O0 %S/Inputs/verbose_trap.cpp -o %t.out -DVERBOSE_TRAP_TEST_CATEGORY=\"Foo\" -DVERBOSE_TRAP_TEST_MESSAGE=\"Bar\" +# RUN: %clangxx_host -g -O0 %S/Inputs/verbose_trap.cpp -o %t.out -DVERBOSE_TRAP_TEST_CATEGORY=\"Foo\" -DVERBOSE_TRAP_TEST_MESSAGE=\"Bar\" # RUN: %lldb -b -s %s %t.out | FileCheck %s --check-prefixes=CHECK,CHECK-BOTH # -# RUN: %clang_host -g -O0 %S/Inputs/verbose_trap.cpp -o %t.out -DVERBOSE_TRAP_TEST_CATEGORY=\"\" -DVERBOSE_TRAP_TEST_MESSAGE=\"Bar\" +# RUN: %clangxx_host -g -O0 %S/Inputs/verbose_trap.cpp -o %t.out -DVERBOSE_TRAP_TEST_CATEGORY=\"\" -DVERBOSE_TRAP_TEST_MESSAGE=\"Bar\" # RUN: %lldb -b -s %s %t.out | FileCheck %s --check-prefixes=CHECK,CHECK-MESSAGE_ONLY # -# RUN: %clang_host -g -O0 %S/Inputs/verbose_trap.cpp -o %t.out -DVERBOSE_TRAP_TEST_CATEGORY=\"Foo\" -DVERBOSE_TRAP_TEST_MESSAGE=\"\" +# RUN: %clangxx_host -g -O0 %S/Inputs/verbose_trap.cpp -o %t.out -DVERBOSE_TRAP_TEST_CATEGORY=\"Foo\" -DVERBOSE_TRAP_TEST_MESSAGE=\"\" # RUN: %lldb -b -s %s %t.out | FileCheck %s --check-prefixes=CHECK,CHECK-CATEGORY_ONLY # -# RUN: %clang_host -g -O0 %S/Inputs/verbose_trap.cpp -o %t.out -DVERBOSE_TRAP_TEST_CATEGORY=\"\" -DVERBOSE_TRAP_TEST_MESSAGE=\"\" +# RUN: %clangxx_host -g -O0 %S/Inputs/verbose_trap.cpp -o %t.out -DVERBOSE_TRAP_TEST_CATEGORY=\"\" -DVERBOSE_TRAP_TEST_MESSAGE=\"\" # RUN: %lldb -b -s %s %t.out | FileCheck %s --check-prefixes=CHECK,CHECK-NONE run diff --git a/lldb/test/Shell/Settings/TestChildCountTruncation.test b/lldb/test/Shell/Settings/TestChildCountTruncation.test index da6436cb5ca2..b66d0df98306 100644 --- a/lldb/test/Shell/Settings/TestChildCountTruncation.test +++ b/lldb/test/Shell/Settings/TestChildCountTruncation.test @@ -2,7 +2,7 @@ # when target.max-children-count wasn't explicitly set. # RUN: split-file %s %t -# RUN: %clang_host -g %t/main.cpp -o %t.out +# RUN: %clangxx_host -g %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/dwim-commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s --check-prefix=DWIM # diff --git a/lldb/test/Shell/Settings/TestChildDepthTruncation.test b/lldb/test/Shell/Settings/TestChildDepthTruncation.test index 12f5661600ae..7e4fbbef9e45 100644 --- a/lldb/test/Shell/Settings/TestChildDepthTruncation.test +++ b/lldb/test/Shell/Settings/TestChildDepthTruncation.test @@ -2,7 +2,7 @@ # when target.max-children-depth wasn't explicitly set. # RUN: split-file %s %t -# RUN: %clang_host -g %t/main.cpp -o %t.out +# RUN: %clangxx_host -g %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/dwim-commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s --check-prefix=DWIM # diff --git a/lldb/test/Shell/Settings/TestCxxFrameFormat.test b/lldb/test/Shell/Settings/TestCxxFrameFormat.test index d70db582e975..3ee92d53492f 100644 --- a/lldb/test/Shell/Settings/TestCxxFrameFormat.test +++ b/lldb/test/Shell/Settings/TestCxxFrameFormat.test @@ -3,7 +3,7 @@ # Test the plugin.cplusplus.display.function-name-format setting. # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/Settings/TestCxxFrameFormatEmpty.test b/lldb/test/Shell/Settings/TestCxxFrameFormatEmpty.test index 0a6d2723ded3..a0550b733d78 100644 --- a/lldb/test/Shell/Settings/TestCxxFrameFormatEmpty.test +++ b/lldb/test/Shell/Settings/TestCxxFrameFormatEmpty.test @@ -5,7 +5,7 @@ # ${function.name-with-args}. # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/Settings/TestCxxFrameFormatMixedLanguages.test b/lldb/test/Shell/Settings/TestCxxFrameFormatMixedLanguages.test index bafd36f5ae17..679d6e4d5abe 100644 --- a/lldb/test/Shell/Settings/TestCxxFrameFormatMixedLanguages.test +++ b/lldb/test/Shell/Settings/TestCxxFrameFormatMixedLanguages.test @@ -4,9 +4,9 @@ # when interoperating multiple languages. # RUN: split-file %s %t -# RUN: %clangxx_host -x c -c -g %t/lib.c -o %t.clib.o +# RUN: %clang_host -x c -c -g %t/lib.c -o %t.clib.o # RUN: %clangxx_host -c -g %t/lib.cpp -o %t.cxxlib.o -# RUN: %clangxx_host %t/main.m %t.cxxlib.o %t.clib.o -o %t.out +# RUN: %clang_host %t/main.m %t.cxxlib.o %t.clib.o -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 | FileCheck %s #--- lib.c @@ -47,7 +47,7 @@ break set -n method run bt -# CHECK: custom-frame 'this affects C++ only' -# CHECK: custom-frame 'this affects C++ only' -# CHECK: custom-frame 'func' -# CHECK: custom-frame 'main' +# CHECK: custom-frame 'this affects C++ only' +# CHECK: custom-frame 'this affects C++ only' +# CHECK: custom-frame 'func' +# CHECK: custom-frame 'main' diff --git a/lldb/test/Shell/Settings/TestCxxFrameFormatPartialFailure.test b/lldb/test/Shell/Settings/TestCxxFrameFormatPartialFailure.test index e914ff7a010d..f279f07afcda 100644 --- a/lldb/test/Shell/Settings/TestCxxFrameFormatPartialFailure.test +++ b/lldb/test/Shell/Settings/TestCxxFrameFormatPartialFailure.test @@ -5,7 +5,7 @@ # were successful. # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/Settings/TestFrameFormatFunctionBasename.test b/lldb/test/Shell/Settings/TestFrameFormatFunctionBasename.test index c0008e50927b..56ec09e2f951 100644 --- a/lldb/test/Shell/Settings/TestFrameFormatFunctionBasename.test +++ b/lldb/test/Shell/Settings/TestFrameFormatFunctionBasename.test @@ -3,11 +3,11 @@ # Test the ${function.basename} frame-format variable. # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s # -# RUN: %clang_host -O0 %t/main.cpp -o %t-nodebug.out +# RUN: %clangxx_host -O0 %t/main.cpp -o %t-nodebug.out # RUN: %lldb -x -b -s %t/commands.input %t-nodebug.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/Settings/TestFrameFormatFunctionFormattedArguments.test b/lldb/test/Shell/Settings/TestFrameFormatFunctionFormattedArguments.test index 04f51701a2a2..f20fc8ca77ae 100644 --- a/lldb/test/Shell/Settings/TestFrameFormatFunctionFormattedArguments.test +++ b/lldb/test/Shell/Settings/TestFrameFormatFunctionFormattedArguments.test @@ -3,11 +3,11 @@ # Test the ${function.formatted-arguments} frame-format variable. # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s # -# RUN: %clang_host -O0 %t/main.cpp -o %t-nodebug.out +# RUN: %clangxx_host -O0 %t/main.cpp -o %t-nodebug.out # RUN: %lldb -x -b -s %t/commands.input %t-nodebug.out -o exit 2>&1 \ # RUN: | FileCheck %s --check-prefix=CHECK-NODEBUG diff --git a/lldb/test/Shell/Settings/TestFrameFormatFunctionQualifiers.test b/lldb/test/Shell/Settings/TestFrameFormatFunctionQualifiers.test index b1dfe834c1de..d05e60b0e8d1 100644 --- a/lldb/test/Shell/Settings/TestFrameFormatFunctionQualifiers.test +++ b/lldb/test/Shell/Settings/TestFrameFormatFunctionQualifiers.test @@ -3,11 +3,11 @@ # Test the ${function.qualifiers} frame-format variable. # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s # -# RUN: %clang_host -O0 %t/main.cpp -o %t-nodebug.out +# RUN: %clangxx_host -O0 %t/main.cpp -o %t-nodebug.out # RUN: %lldb -x -b -s %t/commands.input %t-nodebug.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/Settings/TestFrameFormatFunctionReturn.test b/lldb/test/Shell/Settings/TestFrameFormatFunctionReturn.test index f913162a1aa6..bb78258aba75 100644 --- a/lldb/test/Shell/Settings/TestFrameFormatFunctionReturn.test +++ b/lldb/test/Shell/Settings/TestFrameFormatFunctionReturn.test @@ -4,11 +4,11 @@ # frame-format variables. # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s # -# RUN: %clang_host -O0 %t/main.cpp -o %t-nodebug.out +# RUN: %clangxx_host -O0 %t/main.cpp -o %t-nodebug.out # RUN: %lldb -x -b -s %t/commands.input %t-nodebug.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/Settings/TestFrameFormatFunctionScope.test b/lldb/test/Shell/Settings/TestFrameFormatFunctionScope.test index a28c16f95a9e..f4a17661c360 100644 --- a/lldb/test/Shell/Settings/TestFrameFormatFunctionScope.test +++ b/lldb/test/Shell/Settings/TestFrameFormatFunctionScope.test @@ -3,11 +3,11 @@ # Test the ${function.scope} frame-format variable. # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s # -# RUN: %clang_host -O0 %t/main.cpp -o %t-nodebug.out +# RUN: %clangxx_host -O0 %t/main.cpp -o %t-nodebug.out # RUN: %lldb -x -b -s %t/commands.input %t-nodebug.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/Settings/TestFrameFormatFunctionSuffix.test b/lldb/test/Shell/Settings/TestFrameFormatFunctionSuffix.test index 4609a0412a0a..5883c722f333 100644 --- a/lldb/test/Shell/Settings/TestFrameFormatFunctionSuffix.test +++ b/lldb/test/Shell/Settings/TestFrameFormatFunctionSuffix.test @@ -3,7 +3,7 @@ # Test the ${function.suffix} frame-format variable. # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/Settings/TestFrameFormatFunctionTemplateArguments.test b/lldb/test/Shell/Settings/TestFrameFormatFunctionTemplateArguments.test index ac8a32820c88..a09a9610f48d 100644 --- a/lldb/test/Shell/Settings/TestFrameFormatFunctionTemplateArguments.test +++ b/lldb/test/Shell/Settings/TestFrameFormatFunctionTemplateArguments.test @@ -3,11 +3,11 @@ # Test the ${function.template-arguments} frame-format variable. # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s # -# RUN: %clang_host -O0 %t/main.cpp -o %t-nodebug.out +# RUN: %clangxx_host -O0 %t/main.cpp -o %t-nodebug.out # RUN: %lldb -x -b -s %t/commands.input %t-nodebug.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/Settings/TestFrameFunctionInlined.test b/lldb/test/Shell/Settings/TestFrameFunctionInlined.test index 5db34b416085..1bb7ab486bcf 100644 --- a/lldb/test/Shell/Settings/TestFrameFunctionInlined.test +++ b/lldb/test/Shell/Settings/TestFrameFunctionInlined.test @@ -6,7 +6,7 @@ # REQUIRES: (system-windows && lld) || !system-windows # RUN: split-file %s %t -# RUN: %clang_host -g -gdwarf %t/main.cpp -o %t.out %if system-windows %{-fuse-ld=lld%} +# RUN: %clangxx_host -g -gdwarf %t/main.cpp -o %t.out %if system-windows %{-fuse-ld=lld%} # RUN: %lldb -x -b -s %t/commands.input %t.out -o exit 2>&1 \ # RUN: | FileCheck %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/split-dwarf-expression-eval-bug.cpp b/lldb/test/Shell/SymbolFile/DWARF/split-dwarf-expression-eval-bug.cpp index 4a8004ddd287..b02eea6bbc4f 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/split-dwarf-expression-eval-bug.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/split-dwarf-expression-eval-bug.cpp @@ -7,10 +7,10 @@ // UNSUPPORTED: system-darwin, system-windows -// RUN: %clang_host -c -gsplit-dwarf -g %s -o %t1.o -DONE -// RUN: %clang_host -c -gsplit-dwarf -g %s -o %t2.o -DTWO -// RUN: %clang_host -c -gsplit-dwarf -g %s -o %t3.o -DTHREE -// RUN: %clang_host %t1.o %t2.o %t3.o -o %t +// RUN: %clangxx_host -c -gsplit-dwarf -g %s -o %t1.o -DONE +// RUN: %clangxx_host -c -gsplit-dwarf -g %s -o %t2.o -DTWO +// RUN: %clangxx_host -c -gsplit-dwarf -g %s -o %t3.o -DTHREE +// RUN: %clangxx_host %t1.o %t2.o %t3.o -o %t // RUN: %lldb %t -o "br set -n foo" -o run -o "expression bool_in_first_cu" -o exit \ // RUN: | FileCheck %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/apple-index-is-used.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/apple-index-is-used.cpp index 5bcb2cbcbbe2..8ef2e56ba3d4 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/apple-index-is-used.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/apple-index-is-used.cpp @@ -1,5 +1,5 @@ // Test that we use the apple indexes. -// RUN: %clang %s -g -c -o %t --target=x86_64-apple-macosx -gdwarf-4 +// RUN: %clangxx %s -g -c -o %t --target=x86_64-apple-macosx -gdwarf-4 // RUN: lldb-test symbols %t | FileCheck %s // CHECK: .apple_names index present diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/debug-names-compressed.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/debug-names-compressed.cpp index 4dcbb4715220..53c3d3daa40c 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/debug-names-compressed.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/debug-names-compressed.cpp @@ -3,7 +3,7 @@ // REQUIRES: lld, zlib -// RUN: %clang -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames %s +// RUN: %clangxx -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames %s // RUN: ld.lld %t.o -o %t --compress-debug-sections=zlib // RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix NAMES // RUN: lldb-test symbols --find=variable --name=foo %t | FileCheck %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-debug-names.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-debug-names.cpp index 2b7a928c89a8..acc34dd41688 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-debug-names.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-debug-names.cpp @@ -6,7 +6,7 @@ // REQUIRES: lld -// RUN: %clang %s -target x86_64-pc-linux -gdwarf-5 -fdebug-types-section \ +// RUN: %clangxx %s -target x86_64-pc-linux -gdwarf-5 -fdebug-types-section \ // RUN: -gpubnames -fno-limit-debug-info -c -o %t.o // RUN: ld.lld %t.o -o %t // RUN: %lldb %t -o "type lookup stype" -b | FileCheck %s --check-prefix=BASE diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-dwo-cross-reference.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-dwo-cross-reference.cpp index 0e29cb3e7f16..bc863fb64a9c 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-dwo-cross-reference.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/debug-types-dwo-cross-reference.cpp @@ -3,9 +3,9 @@ // REQUIRES: lld -// RUN: %clang %s -target x86_64-pc-linux -fno-standalone-debug -g \ +// RUN: %clangxx %s -target x86_64-pc-linux -fno-standalone-debug -g \ // RUN: -fdebug-types-section -gsplit-dwarf -c -o %t1.o -DONE -// RUN: %clang %s -target x86_64-pc-linux -fno-standalone-debug -g \ +// RUN: %clangxx %s -target x86_64-pc-linux -fno-standalone-debug -g \ // RUN: -fdebug-types-section -gsplit-dwarf -c -o %t2.o -DTWO // RUN: llvm-dwarfdump %t1.dwo -debug-types -debug-info | FileCheck --check-prefix=ONEUNIT %s // RUN: llvm-dwarfdump %t2.dwo -debug-types -debug-info | FileCheck --check-prefix=ONEUNIT %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/dwarf5-index-is-used.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/dwarf5-index-is-used.cpp index d6ac23716f6c..2fdb1d8d7ca7 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/dwarf5-index-is-used.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/dwarf5-index-is-used.cpp @@ -2,7 +2,7 @@ // REQUIRES: lld -// RUN: %clang %s -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames +// RUN: %clangxx %s -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames // RUN: ld.lld %t.o -o %t // RUN: lldb-test symbols %t | FileCheck %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/dwarf5-partial-index.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/dwarf5-partial-index.cpp index ab84415f61b2..a739dfde48aa 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/dwarf5-partial-index.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/dwarf5-partial-index.cpp @@ -3,9 +3,9 @@ // REQUIRES: lld -// RUN: %clang %s -c -o %t-1.o --target=x86_64-pc-linux -DONE -gdwarf-5 -gpubnames +// RUN: %clangxx %s -c -o %t-1.o --target=x86_64-pc-linux -DONE -gdwarf-5 -gpubnames // RUN: llvm-readobj --sections %t-1.o | FileCheck %s --check-prefix NAMES -// RUN: %clang %s -c -o %t-2.o --target=x86_64-pc-linux -DTWO -gdwarf-5 -gno-pubnames +// RUN: %clangxx %s -c -o %t-2.o --target=x86_64-pc-linux -DTWO -gdwarf-5 -gno-pubnames // RUN: ld.lld %t-1.o %t-2.o -o %t // RUN: lldb-test symbols --find=variable --name=foo %t | FileCheck %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/dwo-not-found-warning.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/dwo-not-found-warning.cpp index 929e11f80e34..36eb299f0663 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/dwo-not-found-warning.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/dwo-not-found-warning.cpp @@ -1,4 +1,4 @@ -// RUN: %clang --target=x86_64-pc-linux -g -gsplit-dwarf -c %s -o %t.o +// RUN: %clangxx --target=x86_64-pc-linux -g -gsplit-dwarf -c %s -o %t.o // RUN: rm %t.dwo // RUN: %lldb %t.o -o "br set -n main" -o exit 2>&1 | FileCheck %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-foreign-type-units.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-foreign-type-units.cpp index 9251930d7d13..7fbc4f98e797 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-foreign-type-units.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-foreign-type-units.cpp @@ -16,9 +16,9 @@ // type unit comes from by looking at the DW_AT_dwo_name attribute in the // DW_TAG_type_unit. -// RUN: %clang -target x86_64-pc-linux -gdwarf-5 -gsplit-dwarf \ +// RUN: %clangxx -target x86_64-pc-linux -gdwarf-5 -gsplit-dwarf \ // RUN: -fdebug-types-section -gpubnames -c %s -o %t.main.o -// RUN: %clang -target x86_64-pc-linux -gdwarf-5 -gsplit-dwarf -DVARIANT \ +// RUN: %clangxx -target x86_64-pc-linux -gdwarf-5 -gsplit-dwarf -DVARIANT \ // RUN: -fdebug-types-section -gpubnames -c %s -o %t.foo.o // RUN: ld.lld %t.main.o %t.foo.o -o %t diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-index-cache.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-index-cache.cpp index 3e97c3fb1ebc..3edcd8f180a1 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-index-cache.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-index-cache.cpp @@ -14,8 +14,8 @@ // complete DWARF index. // Test that if we don't have .debug_names, that we save a full DWARF index. -// RUN: %clang -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -DMAIN=1 -c %s -o %t.main.o -// RUN: %clang -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -DMAIN=0 -c %s -o %t.foo.o +// RUN: %clangxx -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -DMAIN=1 -c %s -o %t.main.o +// RUN: %clangxx -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -DMAIN=0 -c %s -o %t.foo.o // RUN: ld.lld %t.main.o %t.foo.o -o %t.nonames // RUN: llvm-dwp %t.main.dwo %t.foo.dwo -o %t.nonames.dwp // RUN: rm %t.main.dwo %t.foo.dwo @@ -35,8 +35,8 @@ // Test that if we have one .o file with .debug_names and one without, that we // save a partial DWARF index. -// RUN: %clang -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -DMAIN=1 -c %s -o %t.main.o -gpubnames -// RUN: %clang -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -DMAIN=0 -c %s -o %t.foo.o +// RUN: %clangxx -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -DMAIN=1 -c %s -o %t.main.o -gpubnames +// RUN: %clangxx -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -DMAIN=0 -c %s -o %t.foo.o // RUN: ld.lld %t.main.o %t.foo.o -o %t.somenames // RUN: llvm-dwp %t.main.dwo %t.foo.dwo -o %t.somenames.dwp // RUN: rm %t.main.dwo %t.foo.dwo diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-separate-debug-file.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-separate-debug-file.cpp index 888e96bbb10a..f625fda2087d 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-separate-debug-file.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/dwp-separate-debug-file.cpp @@ -1,7 +1,7 @@ // REQUIRES: lld, python // Now test with DWARF5 -// RUN: %clang -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -c %s -o %t.dwarf5.o +// RUN: %clangxx -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -c %s -o %t.dwarf5.o // RUN: ld.lld %t.dwarf5.o -o %t.dwarf5 // RUN: llvm-dwp %t.dwarf5.dwo -o %t.dwarf5.dwp // RUN: rm %t.dwarf5.dwo @@ -64,7 +64,7 @@ // RUN: -b %t.dwarf5.debug 2>&1 | FileCheck %s -check-prefix=NODWP // Now test with DWARF4 -// RUN: %clang -target x86_64-pc-linux -gsplit-dwarf -gdwarf-4 -c %s -o %t.dwarf4.o +// RUN: %clangxx -target x86_64-pc-linux -gsplit-dwarf -gdwarf-4 -c %s -o %t.dwarf4.o // RUN: ld.lld %t.dwarf4.o -o %t.dwarf4 // RUN: llvm-dwp %t.dwarf4.dwo -o %t.dwarf4.dwp // RUN: rm %t.dwarf4.dwo @@ -128,7 +128,7 @@ // Test if we have a GNU build ID in our main executable and in our debug file, // and we have a .dwp file that doesn't, that we can still load our .dwp file. -// RUN: %clang -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -c %s -o %t.o +// RUN: %clangxx -target x86_64-pc-linux -gsplit-dwarf -gdwarf-5 -c %s -o %t.o // RUN: ld.lld %t.o --build-id=md5 -o %t // RUN: llvm-dwp %t.dwo -o %t.dwp // RUN: rm %t.dwo diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-function.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-function.cpp index c42f9fe0b8b5..a00b2bd9506e 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-function.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-function.cpp @@ -1,6 +1,6 @@ // REQUIRES: lld -// RUN: %clang %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames +// RUN: %clangxx %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames // RUN: ld.lld %t.o -o %t // RUN: lldb-test symbols --name=foo --find=function --function-flags=base %t | \ // RUN: FileCheck --check-prefix=BASE %s @@ -19,7 +19,7 @@ // RUN: lldb-test symbols --name=not_there --find=function %t | \ // RUN: FileCheck --check-prefix=EMPTY %s // -// RUN: %clang %s -g -c -o %t --target=x86_64-apple-macosx +// RUN: %clangxx %s -g -c -o %t --target=x86_64-apple-macosx // RUN: lldb-test symbols --name=foo --find=function --function-flags=base %t | \ // RUN: FileCheck --check-prefix=BASE %s // RUN: lldb-test symbols --name=foo --find=function --function-flags=method %t | \ @@ -39,7 +39,7 @@ // RUN: lldb-test symbols --name=not_there --find=function %t | \ // RUN: FileCheck --check-prefix=EMPTY %s -// RUN: %clang %s -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames +// RUN: %clangxx %s -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames // RUN: ld.lld %t.o -o %t // RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix NAMES // RUN: lldb-test symbols --name=foo --find=function --function-flags=base %t | \ diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-namespace.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-namespace.cpp index 13d50af7ef60..14c73c3e82ef 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-namespace.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-namespace.cpp @@ -1,6 +1,6 @@ // REQUIRES: lld -// RUN: %clang %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames +// RUN: %clangxx %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames // RUN: ld.lld %t.o -o %t // RUN: lldb-test symbols --name=foo --find=namespace %t | \ // RUN: FileCheck --check-prefix=FOO %s @@ -9,7 +9,7 @@ // RUN: lldb-test symbols --name=not_there --find=namespace %t | \ // RUN: FileCheck --check-prefix=EMPTY %s // -// RUN: %clang %s -g -c -o %t --target=x86_64-apple-macosx +// RUN: %clangxx %s -g -c -o %t --target=x86_64-apple-macosx // RUN: lldb-test symbols --name=foo --find=namespace %t | \ // RUN: FileCheck --check-prefix=FOO %s // RUN: lldb-test symbols --name=foo --find=namespace --context=context %t | \ @@ -17,7 +17,7 @@ // RUN: lldb-test symbols --name=not_there --find=namespace %t | \ // RUN: FileCheck --check-prefix=EMPTY %s -// RUN: %clang %s -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames +// RUN: %clangxx %s -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames // RUN: ld.lld %t.o -o %t // RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix NAMES // RUN: lldb-test symbols --name=foo --find=namespace %t | \ diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-type.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-type.cpp index af4920660872..315fab344dfe 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-type.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-type.cpp @@ -1,6 +1,6 @@ // REQUIRES: lld -// RUN: %clang %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames +// RUN: %clangxx %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames // RUN: ld.lld %t.o -o %t // RUN: lldb-test symbols --name=foo --find=type %t | \ // RUN: FileCheck --check-prefix=NAME %s @@ -11,7 +11,7 @@ // RUN: lldb-test symbols --name=not_there --find=type %t | \ // RUN: FileCheck --check-prefix=EMPTY %s // -// RUN: %clang %s -g -c -o %t --target=x86_64-apple-macosx +// RUN: %clangxx %s -g -c -o %t --target=x86_64-apple-macosx // RUN: lldb-test symbols --name=foo --find=type %t | \ // RUN: FileCheck --check-prefix=NAME %s // RUN: lldb-test symbols --name=::foo --find=type %t | \ @@ -21,7 +21,7 @@ // RUN: lldb-test symbols --name=not_there --find=type %t | \ // RUN: FileCheck --check-prefix=EMPTY %s -// RUN: %clang %s -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames +// RUN: %clangxx %s -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames // RUN: ld.lld %t.o -o %t // RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix NAMES // RUN: lldb-test symbols --name=foo --find=type %t | \ diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-variable.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-variable.cpp index e46fa14489d3..b6e2252c2840 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-variable.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/find-basic-variable.cpp @@ -1,6 +1,6 @@ // REQUIRES: lld -// RUN: %clang %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames +// RUN: %clangxx %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames // RUN: ld.lld %t.o -o %t // RUN: lldb-test symbols --name=foo --find=variable --context=context %t | \ // RUN: FileCheck --check-prefix=CONTEXT %s @@ -11,7 +11,7 @@ // RUN: lldb-test symbols --name=not_there --find=variable %t | \ // RUN: FileCheck --check-prefix=EMPTY %s // -// RUN: %clang %s -g -c -o %t --target=x86_64-apple-macosx +// RUN: %clangxx %s -g -c -o %t --target=x86_64-apple-macosx // RUN: lldb-test symbols --name=foo --find=variable --context=context %t | \ // RUN: FileCheck --check-prefix=CONTEXT %s // RUN: lldb-test symbols --name=foo --find=variable %t | \ @@ -21,7 +21,7 @@ // RUN: lldb-test symbols --name=not_there --find=variable %t | \ // RUN: FileCheck --check-prefix=EMPTY %s // -// RUN: %clang %s -g -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames +// RUN: %clangxx %s -g -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames // RUN: ld.lld %t.o -o %t // RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix NAMES // RUN: lldb-test symbols --name=foo --find=variable --context=context %t | \ diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/find-function-regex.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/find-function-regex.cpp index be267596fb37..5c7ad844f660 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/find-function-regex.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/find-function-regex.cpp @@ -1,13 +1,13 @@ // REQUIRES: lld -// RUN: %clang %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames +// RUN: %clangxx %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames // RUN: ld.lld %t.o -o %t // RUN: lldb-test symbols --name=f.o --regex --find=function %t | FileCheck %s // -// RUN: %clang %s -g -c -o %t --target=x86_64-apple-macosx +// RUN: %clangxx %s -g -c -o %t --target=x86_64-apple-macosx // RUN: lldb-test symbols --name=f.o --regex --find=function %t | FileCheck %s -// RUN: %clang %s -g -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames +// RUN: %clangxx %s -g -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames // RUN: ld.lld %t.o -o %t // RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix NAMES // RUN: lldb-test symbols --name=f.o --regex --find=function %t | FileCheck %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/find-method-local-struct.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/find-method-local-struct.cpp index 3da4a4a23f8a..46553a83081e 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/find-method-local-struct.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/find-method-local-struct.cpp @@ -1,4 +1,4 @@ -// RUN: %clang %s -g -c -o %t --target=x86_64-apple-macosx +// RUN: %clangxx %s -g -c -o %t --target=x86_64-apple-macosx // RUN: lldb-test symbols --name=foo --find=function --function-flags=method %t | \ // RUN: FileCheck %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/find-method.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/find-method.cpp index 9f8b3df2f31a..26faf8907b4a 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/find-method.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/find-method.cpp @@ -1,15 +1,15 @@ // REQUIRES: lld -// RUN: %clang %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames +// RUN: %clangxx %s -g -c -o %t.o --target=x86_64-pc-linux -gno-pubnames // RUN: ld.lld %t.o -o %t // RUN: lldb-test symbols --name=foo --find=function --function-flags=method %t | \ // RUN: FileCheck %s // -// RUN: %clang %s -g -c -o %t --target=x86_64-apple-macosx +// RUN: %clangxx %s -g -c -o %t --target=x86_64-apple-macosx // RUN: lldb-test symbols --name=foo --find=function --function-flags=method %t | \ // RUN: FileCheck %s -// RUN: %clang %s -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames +// RUN: %clangxx %s -c -o %t.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames // RUN: ld.lld %t.o -o %t // RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix NAMES // RUN: lldb-test symbols --name=foo --find=function --function-flags=method %t | \ diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/find-qualified-variable.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/find-qualified-variable.cpp index 1ad3e7fbadf5..e3f9ce308b75 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/find-qualified-variable.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/find-qualified-variable.cpp @@ -1,4 +1,4 @@ -// RUN: %clang %s -g -c -o %t --target=x86_64-apple-macosx +// RUN: %clangxx %s -g -c -o %t --target=x86_64-apple-macosx // RUN: lldb-test symbols --name=A::foo --find=variable %t | FileCheck %s // CHECK: Found 1 variables: diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/find-variable-dwo.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/find-variable-dwo.cpp index b5d35e4f7883..250b34377acd 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/find-variable-dwo.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/find-variable-dwo.cpp @@ -1,9 +1,9 @@ // REQUIRES: lld -// RUN: %clang %s -gdwarf-5 -gpubnames -gsplit-dwarf -c -emit-llvm -o - --target=x86_64-pc-linux -DONE | \ +// RUN: %clangxx %s -gdwarf-5 -gpubnames -gsplit-dwarf -c -emit-llvm -o - --target=x86_64-pc-linux -DONE | \ // RUN: llc -filetype=obj -split-dwarf-file=%t-1.dwo -o %t-1.o // RUN: llvm-objcopy --split-dwo=%t-1.dwo %t-1.o -// RUN: %clang %s -gdwarf-5 -gpubnames -gsplit-dwarf -c -emit-llvm -o - --target=x86_64-pc-linux -DTWO | \ +// RUN: %clangxx %s -gdwarf-5 -gpubnames -gsplit-dwarf -c -emit-llvm -o - --target=x86_64-pc-linux -DTWO | \ // RUN: llc -filetype=obj -split-dwarf-file=%t-2.dwo -o %t-2.o // RUN: llvm-objcopy --split-dwo=%t-2.dwo %t-2.o // RUN: ld.lld %t-1.o %t-2.o -o %t diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/find-variable-file.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/find-variable-file.cpp index f1a9a4eb12d0..3a8cf89ac367 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/find-variable-file.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/find-variable-file.cpp @@ -1,7 +1,7 @@ // REQUIRES: lld -// RUN: %clang -g -c -o %t-1.o --target=x86_64-pc-linux -gno-pubnames %s -// RUN: %clang -g -c -o %t-2.o --target=x86_64-pc-linux -gno-pubnames %S/Inputs/find-variable-file-2.cpp +// RUN: %clangxx -g -c -o %t-1.o --target=x86_64-pc-linux -gno-pubnames %s +// RUN: %clangxx -g -c -o %t-2.o --target=x86_64-pc-linux -gno-pubnames %S/Inputs/find-variable-file-2.cpp // RUN: ld.lld %t-1.o %t-2.o -o %t // RUN: lldb-test symbols --file=find-variable-file.cpp --find=variable %t | \ // RUN: FileCheck --check-prefix=ONE %s @@ -10,16 +10,16 @@ // Run the same test with split-dwarf. This is interesting because the two // split compile units will have the same offset (0). -// RUN: %clang -g -c -o %t-1.o --target=x86_64-pc-linux -gsplit-dwarf %s -// RUN: %clang -g -c -o %t-2.o --target=x86_64-pc-linux -gsplit-dwarf %S/Inputs/find-variable-file-2.cpp +// RUN: %clangxx -g -c -o %t-1.o --target=x86_64-pc-linux -gsplit-dwarf %s +// RUN: %clangxx -g -c -o %t-2.o --target=x86_64-pc-linux -gsplit-dwarf %S/Inputs/find-variable-file-2.cpp // RUN: ld.lld %t-1.o %t-2.o -o %t // RUN: lldb-test symbols --file=find-variable-file.cpp --find=variable %t | \ // RUN: FileCheck --check-prefix=ONE %s // RUN: lldb-test symbols --file=find-variable-file-2.cpp --find=variable %t | \ // RUN: FileCheck --check-prefix=TWO %s -// RUN: %clang -c -o %t-1.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames %s -// RUN: %clang -c -o %t-2.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames %S/Inputs/find-variable-file-2.cpp +// RUN: %clangxx -c -o %t-1.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames %s +// RUN: %clangxx -c -o %t-2.o --target=x86_64-pc-linux -gdwarf-5 -gpubnames %S/Inputs/find-variable-file-2.cpp // RUN: ld.lld %t-1.o %t-2.o -o %t // RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix NAMES // RUN: lldb-test symbols --file=find-variable-file.cpp --find=variable %t | \ @@ -29,9 +29,9 @@ // Run the same test with split dwarf and pubnames to check whether we can find // the compile unit using the name index if it is split. -// RUN: %clang -c -o %t-1.o --target=x86_64-pc-linux -gdwarf-5 -gsplit-dwarf -gpubnames %s -// RUN: %clang -c -o %t-2.o --target=x86_64-pc-linux -gdwarf-5 -gsplit-dwarf -gpubnames %S/Inputs/find-variable-file-2.cpp -// RUN: %clang -c -o %t-3.o --target=x86_64-pc-linux -gdwarf-5 -gsplit-dwarf -gpubnames %S/Inputs/find-variable-file-3.cpp +// RUN: %clangxx -c -o %t-1.o --target=x86_64-pc-linux -gdwarf-5 -gsplit-dwarf -gpubnames %s +// RUN: %clangxx -c -o %t-2.o --target=x86_64-pc-linux -gdwarf-5 -gsplit-dwarf -gpubnames %S/Inputs/find-variable-file-2.cpp +// RUN: %clangxx -c -o %t-3.o --target=x86_64-pc-linux -gdwarf-5 -gsplit-dwarf -gpubnames %S/Inputs/find-variable-file-3.cpp // RUN: ld.lld %t-1.o %t-2.o %t-3.o -o %t // RUN: llvm-readobj --sections %t | FileCheck %s --check-prefix NAMES // RUN: lldb-test symbols --file=find-variable-file.cpp --find=variable %t | \ diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/member-pointers.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/member-pointers.cpp index a12892305798..00805770af11 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/member-pointers.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/member-pointers.cpp @@ -1,7 +1,7 @@ // REQUIRES: lld // Itanium ABI: -// RUN: %clang --target=x86_64-pc-linux -gdwarf -c -o %t_linux.o %s +// RUN: %clangxx --target=x86_64-pc-linux -gdwarf -c -o %t_linux.o %s // RUN: %lldb -f %t_linux.o -b -o "target variable s1 s2 m1 m2 v1 v2 v3 v4" | FileCheck --check-prefix=CHECK-GNU %s // // CHECK-GNU: (void (Single1::*)()) s1 = 0x00000000000000000000000000000000 diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/module-ownership.mm b/lldb/test/Shell/SymbolFile/DWARF/x86/module-ownership.mm index 2dec109a781c..27aa1365ab54 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/module-ownership.mm +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/module-ownership.mm @@ -1,5 +1,5 @@ // RUN: rm -rf %t.cache -// RUN: %clang --target=x86_64-apple-macosx -g -gmodules -Wno-objc-root-class \ +// RUN: %clangxx --target=x86_64-apple-macosx -g -gmodules -Wno-objc-root-class \ // RUN: -fmodules -fmodules-cache-path=%t.cache \ // RUN: -c -o %t.o %s -I%S/Inputs // RUN: lldb-test symbols -dump-clang-ast %t.o | FileCheck --check-prefix CHECK-ANON-S1 %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/no_unique_address-with-bitfields.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/no_unique_address-with-bitfields.cpp index 297fb82caee5..8f530c803a40 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/no_unique_address-with-bitfields.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/no_unique_address-with-bitfields.cpp @@ -1,4 +1,4 @@ -// RUN: %clang --target=x86_64-apple-macosx -c -gdwarf -o %t %s +// RUN: %clangxx --target=x86_64-apple-macosx -c -gdwarf -o %t %s // RUN: %lldb %t \ // RUN: -o "target var global" \ // RUN: -o "target var global2" \ diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/type-definition-search.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/type-definition-search.cpp index 5a40a6e0fbc2..5ab45eefd221 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/type-definition-search.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/type-definition-search.cpp @@ -4,18 +4,18 @@ // REQUIRES: lld -// RUN: %clang --target=x86_64-pc-linux -c %s -o %t-n-a.o -g -gsimple-template-names -DFILE_A -// RUN: %clang --target=x86_64-pc-linux -c %s -o %t-n-b.o -g -gsimple-template-names -DFILE_B +// RUN: %clangxx --target=x86_64-pc-linux -c %s -o %t-n-a.o -g -gsimple-template-names -DFILE_A +// RUN: %clangxx --target=x86_64-pc-linux -c %s -o %t-n-b.o -g -gsimple-template-names -DFILE_B // RUN: ld.lld %t-n-a.o %t-n-b.o -o %t-n // RUN: %lldb %t-n -o "target variable --ptr-depth 1 --show-types both_a both_b" -o exit | FileCheck %s -// RUN: %clang --target=x86_64-pc-linux -c %s -o %t-t-a.o -g -fdebug-types-section -DFILE_A -// RUN: %clang --target=x86_64-pc-linux -c %s -o %t-t-b.o -g -fdebug-types-section -DFILE_B +// RUN: %clangxx --target=x86_64-pc-linux -c %s -o %t-t-a.o -g -fdebug-types-section -DFILE_A +// RUN: %clangxx --target=x86_64-pc-linux -c %s -o %t-t-b.o -g -fdebug-types-section -DFILE_B // RUN: ld.lld %t-t-a.o %t-t-b.o -o %t-t // RUN: %lldb %t-t -o "target variable --ptr-depth 1 --show-types both_a both_b" -o exit | FileCheck %s -// RUN: %clang --target=x86_64-pc-linux -c %s -o %t-tn-a.o -g -fdebug-types-section -gsimple-template-names -DFILE_A -// RUN: %clang --target=x86_64-pc-linux -c %s -o %t-tn-b.o -g -fdebug-types-section -gsimple-template-names -DFILE_B +// RUN: %clangxx --target=x86_64-pc-linux -c %s -o %t-tn-a.o -g -fdebug-types-section -gsimple-template-names -DFILE_A +// RUN: %clangxx --target=x86_64-pc-linux -c %s -o %t-tn-b.o -g -fdebug-types-section -gsimple-template-names -DFILE_B // RUN: ld.lld %t-tn-a.o %t-tn-b.o -o %t-tn // RUN: %lldb %t-tn -o "target variable --ptr-depth 1 --show-types both_a both_b" -o exit | FileCheck %s diff --git a/lldb/test/Shell/SymbolFile/DWARF/x86/type-unit-same-basename.cpp b/lldb/test/Shell/SymbolFile/DWARF/x86/type-unit-same-basename.cpp index f7f5a30aaba9..f9fd5b5e5225 100644 --- a/lldb/test/Shell/SymbolFile/DWARF/x86/type-unit-same-basename.cpp +++ b/lldb/test/Shell/SymbolFile/DWARF/x86/type-unit-same-basename.cpp @@ -5,8 +5,8 @@ // REQUIRES: lld -// RUN: %clang --target=x86_64-pc-linux -c %s -o %t-a.o -g -fdebug-types-section -flimit-debug-info -DFILE_A -// RUN: %clang --target=x86_64-pc-linux -c %s -o %t-b.o -g -fdebug-types-section -flimit-debug-info -DFILE_B +// RUN: %clangxx --target=x86_64-pc-linux -c %s -o %t-a.o -g -fdebug-types-section -flimit-debug-info -DFILE_A +// RUN: %clangxx --target=x86_64-pc-linux -c %s -o %t-b.o -g -fdebug-types-section -flimit-debug-info -DFILE_B // RUN: ld.lld -z undefs %t-a.o %t-b.o -o %t // RUN: %lldb %t -o "target variable x" -o exit | FileCheck %s -- cgit v1.2.3 From 4afb0e6f51768ff8560a8cb5d0de5bb7867ff6a5 Mon Sep 17 00:00:00 2001 From: Rahman Lavaee Date: Thu, 30 Oct 2025 21:05:45 +0000 Subject: Simplify the basic-block-sections-bb-hash.ll test. The original test which uses grep,sed,tr commands fails on darwin: https://github.com/llvm/llvm-project/issues/165781 --- .../test/CodeGen/X86/basic-block-sections-bb-hash.ll | 20 ++++---------------- 1 file changed, 4 insertions(+), 16 deletions(-) diff --git a/llvm/test/CodeGen/X86/basic-block-sections-bb-hash.ll b/llvm/test/CodeGen/X86/basic-block-sections-bb-hash.ll index f46d6ed262b2..293b48d7dc5d 100644 --- a/llvm/test/CodeGen/X86/basic-block-sections-bb-hash.ll +++ b/llvm/test/CodeGen/X86/basic-block-sections-bb-hash.ll @@ -1,23 +1,11 @@ -; BB section test with basic block hashes. -; -; RUN: llc %s -O0 -mtriple=x86_64-pc-linux -function-sections -filetype=obj -basic-block-address-map -emit-bb-hash -o %t.o -; RUN: obj2yaml %t.o -o %t.yaml -; -;; Profile for version 1: +;; BB section test with basic block hashes. + +;; basic block sections Profile with bb hashes ; RUN: echo 'v1' > %t ; RUN: echo 'f foo' >> %t ; RUN: echo 'g 0:10,1:9,2:1 1:8,3:8 2:2,3:2 3:11' >> %t ; RUN: echo 'c 0 2 3' >> %t - -; These commands read BB hashes from SHT_LLVM_BB_ADDR_MAP -; and put them into the basic blocks sections profile. -; RUN: grep -E '^\s+(- ID:|Hash:)' %t.yaml | \ -; RUN: grep -B1 'Hash:' | \ -; RUN: sed 's/^\s*//; s/^- ID: *//; s/Hash: *0x//' | \ -; RUN: paste -d: - - | \ -; RUN: tr '\n' ' ' | \ -; RUN: sed 's/ $/\n/; s/^/h /' >> %t -; +; RUN: echo 'h 0:64863A11B5CA0000 1:54F1E80D6B270006 2:54F1F4E66B270008 3:C8BC6041A2CB0009' >> %t ; RUN: llc < %s -O0 -mtriple=x86_64-pc-linux -function-sections -basic-block-sections=%t | FileCheck %s ; define void @foo(i1 zeroext) nounwind { -- cgit v1.2.3 From 6c1678abce2c31b0db22634aa19368095a75ca77 Mon Sep 17 00:00:00 2001 From: Baranov Victor Date: Fri, 31 Oct 2025 00:42:31 +0300 Subject: [CI] Remove unused variable in code-format job (#165454) `comments` were never used plus generated pylint error --- llvm/utils/git/code-format-helper.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/llvm/utils/git/code-format-helper.py b/llvm/utils/git/code-format-helper.py index dff7f78ce64a..f6b28f480b8a 100755 --- a/llvm/utils/git/code-format-helper.py +++ b/llvm/utils/git/code-format-helper.py @@ -486,8 +486,6 @@ def hook_main(): if fmt.has_tool(): if not fmt.run(args.changed_files, args): failed_fmts.append(fmt.name) - if fmt.comment: - comments.append(fmt.comment) else: print(f"Couldn't find {fmt.name}, can't check " + fmt.friendly_name.lower()) -- cgit v1.2.3 From c46bfed1a484d30cd251a9a225649d74e3bf0af5 Mon Sep 17 00:00:00 2001 From: Ebuka Ezike Date: Thu, 30 Oct 2025 21:43:53 +0000 Subject: [lldb] Add alternative SBThread::GetStopDescription (#165379) the function signature for `GetStopDescription` is `lldb::SBThread::GetStopDescription(char *dst_or_null, size_t len)`. To get a description you need to call the function first time to get the buffer size. a second time to get the description. This is little worse from the python size as the signature is `lldb.SBThread.GetStopDescription(int: len) -> list[str]` the user has to pass the max size as possible with no way of checking if it is enough. This patch adds a new api `lldb.SBThread.GetStopDescription(desc: lldb.SBStream()) -> bool` `bool lldb::SBThread::GetStopDescription(lldb::SBStream &description)` which handles this case. Adds new Test case for lua. --- lldb/bindings/lua/lua-typemaps.swig | 20 ++++++++++++- lldb/bindings/python/python-typemaps.swig | 18 +++++++++++ lldb/include/lldb/API/SBThread.h | 8 +++++ lldb/source/API/SBThread.cpp | 35 ++++++++++++++++++---- lldb/test/API/lua_api/TestThreadAPI.lua | 25 ++++++++++++++++ .../python_api/default-constructor/sb_thread.py | 1 + lldb/test/API/python_api/thread/TestThreadAPI.py | 5 ++++ 7 files changed, 105 insertions(+), 7 deletions(-) create mode 100644 lldb/test/API/lua_api/TestThreadAPI.lua diff --git a/lldb/bindings/lua/lua-typemaps.swig b/lldb/bindings/lua/lua-typemaps.swig index 56756936a532..f2a740141936 100644 --- a/lldb/bindings/lua/lua-typemaps.swig +++ b/lldb/bindings/lua/lua-typemaps.swig @@ -121,9 +121,27 @@ LLDB_NUMBER_TYPEMAP(enum SWIGTYPE); $1 = (char *)malloc($2); } +// Disable default type checking for this method to avoid SWIG dispatch issues. +// +// Problem: SBThread::GetStopDescription has two overloads: +// 1. GetStopDescription(char* dst_or_null, size_t dst_len) +// 2. GetStopDescription(lldb::SBStream& stream) +// +// SWIG generates a dispatch function to select the correct overload based on argument types. +// see https://www.swig.org/Doc4.0/SWIGDocumentation.html#Typemaps_overloading. +// However, this dispatcher doesn't consider typemaps that transform function signatures. +// +// In lua, our typemap converts GetStopDescription(char*, size_t) to GetStopDescription(int). +// The dispatcher still checks against the original (char*, size_t) signature instead of +// the transformed (int) signature, causing type matching to fail. +// This only affects SBThread::GetStopDescription since the type check also matches +// the argument name, which is unique to this function. +%typemap(typecheck, precedence=SWIG_TYPECHECK_POINTER) (char *dst_or_null, size_t dst_len) "" + %typemap(argout) (char *dst_or_null, size_t dst_len) { lua_pop(L, 1); // Blow away the previous result - lua_pushlstring(L, (const char *)$1, $result); + llvm::StringRef ref($1); + lua_pushlstring(L, (const char *)$1, ref.size()); free($1); // SWIG_arg was already incremented } diff --git a/lldb/bindings/python/python-typemaps.swig b/lldb/bindings/python/python-typemaps.swig index 715914fe745f..4d3a95768f2f 100644 --- a/lldb/bindings/python/python-typemaps.swig +++ b/lldb/bindings/python/python-typemaps.swig @@ -224,6 +224,24 @@ AND call SWIG_fail at the same time, because it will result in a double free. } $1 = (char *)malloc($2); } + +// Disable default type checking for this method to avoid SWIG dispatch issues. +// +// Problem: SBThread::GetStopDescription has two overloads: +// 1. GetStopDescription(char* dst_or_null, size_t dst_len) +// 2. GetStopDescription(lldb::SBStream& stream) +// +// SWIG generates a dispatch function to select the correct overload based on argument types. +// see https://www.swig.org/Doc4.0/SWIGDocumentation.html#Typemaps_overloading. +// However, this dispatcher doesn't consider typemaps that transform function signatures. +// +// In Python, our typemap converts GetStopDescription(char*, size_t) to GetStopDescription(int). +// The dispatcher still checks against the original (char*, size_t) signature instead of +// the transformed (int) signature, causing type matching to fail. +// This only affects SBThread::GetStopDescription since the type check also matches +// the argument name, which is unique to this function. +%typemap(typecheck, precedence=SWIG_TYPECHECK_POINTER) (char *dst_or_null, size_t dst_len) "" + %typemap(argout) (char *dst_or_null, size_t dst_len) { Py_XDECREF($result); /* Blow away any previous result */ llvm::StringRef ref($1); diff --git a/lldb/include/lldb/API/SBThread.h b/lldb/include/lldb/API/SBThread.h index e9fe5858d125..2411dfd37651 100644 --- a/lldb/include/lldb/API/SBThread.h +++ b/lldb/include/lldb/API/SBThread.h @@ -81,6 +81,14 @@ public: SBThreadCollection GetStopReasonExtendedBacktraces(InstrumentationRuntimeType type); + /// Gets a human-readable description of why the thread stopped. + /// + /// \param stream Output stream to receive the stop description text + /// \return + /// true if obtained and written to the stream, + // false if there was an error retrieving the description. + bool GetStopDescription(lldb::SBStream &stream) const; + size_t GetStopDescription(char *dst_or_null, size_t dst_len); SBValue GetStopReturnValue(); diff --git a/lldb/source/API/SBThread.cpp b/lldb/source/API/SBThread.cpp index 4e4aa48bc9a2..f58a1b52afa9 100644 --- a/lldb/source/API/SBThread.cpp +++ b/lldb/source/API/SBThread.cpp @@ -239,11 +239,34 @@ SBThread::GetStopReasonExtendedBacktraces(InstrumentationRuntimeType type) { return threads; } -size_t SBThread::GetStopDescription(char *dst, size_t dst_len) { - LLDB_INSTRUMENT_VA(this, dst, dst_len); +bool SBThread::GetStopDescription(lldb::SBStream &stream) const { + LLDB_INSTRUMENT_VA(this, stream); + + if (!m_opaque_sp) + return false; + + llvm::Expected exe_ctx = + GetStoppedExecutionContext(m_opaque_sp); + if (!exe_ctx) { + LLDB_LOG_ERROR(GetLog(LLDBLog::API), exe_ctx.takeError(), "{0}"); + return false; + } + + if (!exe_ctx->HasThreadScope()) + return false; + + Stream &strm = stream.ref(); + const std::string stop_desc = exe_ctx->GetThreadPtr()->GetStopDescription(); + strm.PutCString(stop_desc); + + return true; +} + +size_t SBThread::GetStopDescription(char *dst_or_null, size_t dst_len) { + LLDB_INSTRUMENT_VA(this, dst_or_null, dst_len); - if (dst) - *dst = 0; + if (dst_or_null) + *dst_or_null = 0; llvm::Expected exe_ctx = GetStoppedExecutionContext(m_opaque_sp); @@ -259,8 +282,8 @@ size_t SBThread::GetStopDescription(char *dst, size_t dst_len) { if (thread_stop_desc.empty()) return 0; - if (dst) - return ::snprintf(dst, dst_len, "%s", thread_stop_desc.c_str()) + 1; + if (dst_or_null) + return ::snprintf(dst_or_null, dst_len, "%s", thread_stop_desc.c_str()) + 1; // NULL dst passed in, return the length needed to contain the // description. diff --git a/lldb/test/API/lua_api/TestThreadAPI.lua b/lldb/test/API/lua_api/TestThreadAPI.lua new file mode 100644 index 000000000000..5a38d0ba9192 --- /dev/null +++ b/lldb/test/API/lua_api/TestThreadAPI.lua @@ -0,0 +1,25 @@ +_T = require('lua_lldb_test').create_test('TestThreadAPI') + +function _T:TestGetStopDescription() + local target = self:create_target() + local breakpoint = target:BreakpointCreateByName("main", "a.out") + assertTrue(breakpoint:IsValid() and breakpoint:GetNumLocations() == 1) + + local process = target:LaunchSimple({ 'arg1', 'arg2' }, nil, nil) + local thread = get_stopped_thread(process, lldb.eStopReasonBreakpoint) + assertNotNil(thread) + assertTrue(thread:IsValid()) + + assertEqual("breakpoint", thread:GetStopDescription(string.len("breakpoint") + 1)) + assertEqual("break", thread:GetStopDescription(string.len("break") + 1)) + assertEqual("b", thread:GetStopDescription(string.len("b") + 1)) + assertEqual("breakpoint 1.1", thread:GetStopDescription(string.len("breakpoint 1.1") + 100)) + + -- Test stream variation + local stream = lldb.SBStream() + assertTrue(thread:GetStopDescription(stream)) + assertNotNil(stream) + assertEqual("breakpoint 1.1", stream:GetData()) +end + +os.exit(_T:run()) diff --git a/lldb/test/API/python_api/default-constructor/sb_thread.py b/lldb/test/API/python_api/default-constructor/sb_thread.py index 34eb3db852c3..4252fa0321ff 100644 --- a/lldb/test/API/python_api/default-constructor/sb_thread.py +++ b/lldb/test/API/python_api/default-constructor/sb_thread.py @@ -10,6 +10,7 @@ def fuzz_obj(obj): obj.GetStopReasonDataCount() obj.GetStopReasonDataAtIndex(100) obj.GetStopDescription(256) + obj.GetStopDescription(lldb.SBStream()) obj.GetThreadID() obj.GetIndexID() obj.GetName() diff --git a/lldb/test/API/python_api/thread/TestThreadAPI.py b/lldb/test/API/python_api/thread/TestThreadAPI.py index 5583434a742a..acad7583eec1 100644 --- a/lldb/test/API/python_api/thread/TestThreadAPI.py +++ b/lldb/test/API/python_api/thread/TestThreadAPI.py @@ -138,6 +138,11 @@ class ThreadAPITestCase(TestBase): "breakpoint 1.1", thread.GetStopDescription(len("breakpoint 1.1") + 100) ) + # Test the stream variation + stream = lldb.SBStream() + self.assertTrue(thread.GetStopDescription(stream)) + self.assertEqual("breakpoint 1.1", stream.GetData()) + def step_out_of_malloc_into_function_b(self, exe_name): """Test Python SBThread.StepOut() API to step out of a malloc call where the call site is at function b().""" exe = self.getBuildArtifact(exe_name) -- cgit v1.2.3 From b2d12d6f2ba60a2f5c76cf535e2b215443a9c221 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 30 Oct 2025 15:46:19 -0700 Subject: [VPlan] Extend getSCEVForVPV, use to compute VPReplicateRecipe cost. (#161276) Update getSCEVExprForVPValue to handle more complex expressions, to use it in VPReplicateRecipe::comptueCost. In particular, it supports construction SCEV expressions for GetElementPtr VPReplicateRecipes, with operands that are VPScalarIVStepsRecipe, VPDerivedIVRecipe and VPCanonicalIVRecipe. If we hit a sub-expression we don't support yet, we return SCEVCouldNotCompute. Note that the SCEV expression is valid VF = 1: we only support construction AddRecs for VPCanonicalIVRecipe, which is an AddRec starting at 0 and stepping by 1. The returned SCEV expressions could be converted to a VF specific one, by rewriting the AddRecs to ones with the appropriate step. Note that the logic for constructing SCEVs for GetElementPtr was directly ported from ScalarEvolution.cpp. Another thing to note is that we construct SCEV expression purely by looking at the operation of the recipe and its translated operands, w/o accessing the underlying IR (the exception being getting the source element type for GEPs). PR: https://github.com/llvm/llvm-project/pull/161276 --- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 14 ++++--- llvm/lib/Transforms/Vectorize/VPlanHelpers.h | 5 ++- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 23 +++++++----- llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 49 ++++++++++++++++++++++++- llvm/lib/Transforms/Vectorize/VPlanUtils.h | 3 +- 5 files changed, 74 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 505fb435e91e..25bf49db0e07 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -3908,7 +3908,7 @@ void LoopVectorizationPlanner::emitInvalidCostRemarks( continue; VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind, - *CM.PSE.getSE()); + *CM.PSE.getSE(), OrigLoop); precomputeCosts(*Plan, VF, CostCtx); auto Iter = vp_depth_first_deep(Plan->getVectorLoopRegion()->getEntry()); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly(Iter)) { @@ -4166,7 +4166,7 @@ VectorizationFactor LoopVectorizationPlanner::selectVectorizationFactor() { // Add on other costs that are modelled in VPlan, but not in the legacy // cost model. VPCostContext CostCtx(CM.TTI, *CM.TLI, *P, CM, CM.CostKind, - *CM.PSE.getSE()); + *CM.PSE.getSE(), OrigLoop); VPRegionBlock *VectorRegion = P->getVectorLoopRegion(); assert(VectorRegion && "Expected to have a vector region!"); for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly( @@ -6876,7 +6876,8 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF, InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan, ElementCount VF) const { - VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind, *PSE.getSE()); + VPCostContext CostCtx(CM.TTI, *CM.TLI, Plan, CM, CM.CostKind, *PSE.getSE(), + OrigLoop); InstructionCost Cost = precomputeCosts(Plan, VF, CostCtx); // Now compute and add the VPlan-based cost. @@ -7110,12 +7111,13 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() { // case, don't trigger the assertion, as the extra simplifications may cause a // different VF to be picked by the VPlan-based cost model. VPCostContext CostCtx(CM.TTI, *CM.TLI, BestPlan, CM, CM.CostKind, - *CM.PSE.getSE()); + *CM.PSE.getSE(), OrigLoop); precomputeCosts(BestPlan, BestFactor.Width, CostCtx); // Verify that the VPlan-based and legacy cost models agree, except for VPlans // with early exits and plans with additional VPlan simplifications. The // legacy cost model doesn't properly model costs for such loops. assert((BestFactor.Width == LegacyVF.Width || BestPlan.hasEarlyExit() || + !Legal->getLAI()->getSymbolicStrides().empty() || planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width), CostCtx, OrigLoop, BestFactor.Width) || @@ -8441,7 +8443,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( // and mulacc-reduction are implemented. if (!CM.foldTailWithEVL()) { VPCostContext CostCtx(CM.TTI, *CM.TLI, *Plan, CM, CM.CostKind, - *CM.PSE.getSE()); + *CM.PSE.getSE(), OrigLoop); VPlanTransforms::runPass(VPlanTransforms::convertToAbstractRecipes, *Plan, CostCtx, Range); } @@ -9911,7 +9913,7 @@ bool LoopVectorizePass::processLoop(Loop *L) { bool ForceVectorization = Hints.getForce() == LoopVectorizeHints::FK_Enabled; VPCostContext CostCtx(CM.TTI, *CM.TLI, LVP.getPlanFor(VF.Width), CM, - CM.CostKind, *CM.PSE.getSE()); + CM.CostKind, *CM.PSE.getSE(), L); if (!ForceVectorization && !isOutsideLoopWorkProfitable(Checks, VF, L, PSE, CostCtx, LVP.getPlanFor(VF.Width), SEL, diff --git a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h index 2aaabd9ebdd0..965426f86ff2 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanHelpers.h +++ b/llvm/lib/Transforms/Vectorize/VPlanHelpers.h @@ -350,13 +350,14 @@ struct VPCostContext { SmallPtrSet SkipCostComputation; TargetTransformInfo::TargetCostKind CostKind; ScalarEvolution &SE; + const Loop *L; VPCostContext(const TargetTransformInfo &TTI, const TargetLibraryInfo &TLI, const VPlan &Plan, LoopVectorizationCostModel &CM, TargetTransformInfo::TargetCostKind CostKind, - ScalarEvolution &SE) + ScalarEvolution &SE, const Loop *L) : TTI(TTI), TLI(TLI), Types(Plan), LLVMCtx(Plan.getContext()), CM(CM), - CostKind(CostKind), SE(SE) {} + CostKind(CostKind), SE(SE), L(L) {} /// Return the cost for \p UI with \p VF using the legacy cost model as /// fallback until computing the cost of all recipes migrates to VPlan. diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index 9a63c802047e..bde62dd6dd4b 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -3167,26 +3167,30 @@ bool VPReplicateRecipe::shouldPack() const { }); } -/// Returns true if \p Ptr is a pointer computation for which the legacy cost -/// model computes a SCEV expression when computing the address cost. -static bool shouldUseAddressAccessSCEV(const VPValue *Ptr) { +/// Returns a SCEV expression for \p Ptr if it is a pointer computation for +/// which the legacy cost model computes a SCEV expression when computing the +/// address cost. Computing SCEVs for VPValues is incomplete and returns +/// SCEVCouldNotCompute in cases the legacy cost model can compute SCEVs. In +/// those cases we fall back to the legacy cost model. Otherwise return nullptr. +static const SCEV *getAddressAccessSCEV(const VPValue *Ptr, ScalarEvolution &SE, + const Loop *L) { auto *PtrR = Ptr->getDefiningRecipe(); if (!PtrR || !((isa(PtrR) && cast(PtrR)->getOpcode() == Instruction::GetElementPtr) || isa(PtrR) || match(Ptr, m_GetElementPtr(m_VPValue(), m_VPValue())))) - return false; + return nullptr; // We are looking for a GEP where all indices are either loop invariant or // inductions. for (VPValue *Opd : drop_begin(PtrR->operands())) { if (!Opd->isDefinedOutsideLoopRegions() && !isa(Opd)) - return false; + return nullptr; } - return true; + return vputils::getSCEVExprForVPValue(Ptr, SE, L); } /// Returns true if \p V is used as part of the address of another load or @@ -3354,9 +3358,8 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, bool IsLoad = UI->getOpcode() == Instruction::Load; const VPValue *PtrOp = getOperand(!IsLoad); - // TODO: Handle cases where we need to pass a SCEV to - // getAddressComputationCost. - if (shouldUseAddressAccessSCEV(PtrOp)) + const SCEV *PtrSCEV = getAddressAccessSCEV(PtrOp, Ctx.SE, Ctx.L); + if (isa_and_nonnull(PtrSCEV)) break; Type *ValTy = Ctx.Types.inferScalarType(IsLoad ? this : getOperand(0)); @@ -3374,7 +3377,7 @@ InstructionCost VPReplicateRecipe::computeCost(ElementCount VF, InstructionCost ScalarCost = ScalarMemOpCost + Ctx.TTI.getAddressComputationCost( PtrTy, UsedByLoadStoreAddress ? nullptr : &Ctx.SE, - nullptr, Ctx.CostKind); + PtrSCEV, Ctx.CostKind); if (isSingleScalar()) return ScalarCost; diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 4db92e7def3e..54348c6e3448 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -75,7 +75,8 @@ bool vputils::isHeaderMask(const VPValue *V, const VPlan &Plan) { B == Plan.getBackedgeTakenCount(); } -const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) { +const SCEV *vputils::getSCEVExprForVPValue(const VPValue *V, + ScalarEvolution &SE, const Loop *L) { if (V->isLiveIn()) { if (Value *LiveIn = V->getLiveInIRValue()) return SE.getSCEV(LiveIn); @@ -86,6 +87,52 @@ const SCEV *vputils::getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE) { return TypeSwitch(V->getDefiningRecipe()) .Case( [](const VPExpandSCEVRecipe *R) { return R->getSCEV(); }) + .Case([&SE, L](const VPCanonicalIVPHIRecipe *R) { + if (!L) + return SE.getCouldNotCompute(); + const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), SE, L); + return SE.getAddRecExpr(Start, SE.getOne(Start->getType()), L, + SCEV::FlagAnyWrap); + }) + .Case([&SE, L](const VPDerivedIVRecipe *R) { + const SCEV *Start = getSCEVExprForVPValue(R->getOperand(0), SE, L); + const SCEV *IV = getSCEVExprForVPValue(R->getOperand(1), SE, L); + const SCEV *Scale = getSCEVExprForVPValue(R->getOperand(2), SE, L); + if (any_of(ArrayRef({Start, IV, Scale}), IsaPred)) + return SE.getCouldNotCompute(); + + return SE.getAddExpr(SE.getTruncateOrSignExtend(Start, IV->getType()), + SE.getMulExpr(IV, SE.getTruncateOrSignExtend( + Scale, IV->getType()))); + }) + .Case([&SE, L](const VPScalarIVStepsRecipe *R) { + const SCEV *IV = getSCEVExprForVPValue(R->getOperand(0), SE, L); + const SCEV *Step = getSCEVExprForVPValue(R->getOperand(1), SE, L); + if (isa(IV) || isa(Step)) + return SE.getCouldNotCompute(); + return SE.getMulExpr(SE.getTruncateOrSignExtend(IV, Step->getType()), + Step); + }) + .Case([&SE, L](const VPReplicateRecipe *R) { + if (R->getOpcode() != Instruction::GetElementPtr) + return SE.getCouldNotCompute(); + + const SCEV *Base = getSCEVExprForVPValue(R->getOperand(0), SE, L); + if (isa(Base)) + return SE.getCouldNotCompute(); + + SmallVector IndexExprs; + for (VPValue *Index : drop_begin(R->operands())) { + const SCEV *IndexExpr = getSCEVExprForVPValue(Index, SE, L); + if (isa(IndexExpr)) + return SE.getCouldNotCompute(); + IndexExprs.push_back(IndexExpr); + } + + Type *SrcElementTy = cast(R->getUnderlyingInstr()) + ->getSourceElementType(); + return SE.getGEPExpr(Base, IndexExprs, SrcElementTy); + }) .Default([&SE](const VPRecipeBase *) { return SE.getCouldNotCompute(); }); } diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.h b/llvm/lib/Transforms/Vectorize/VPlanUtils.h index 37cd413da907..c21a0e70c139 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.h +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.h @@ -37,7 +37,8 @@ VPValue *getOrCreateVPValueForSCEVExpr(VPlan &Plan, const SCEV *Expr); /// Return the SCEV expression for \p V. Returns SCEVCouldNotCompute if no /// SCEV expression could be constructed. -const SCEV *getSCEVExprForVPValue(VPValue *V, ScalarEvolution &SE); +const SCEV *getSCEVExprForVPValue(const VPValue *V, ScalarEvolution &SE, + const Loop *L = nullptr); /// Returns true if \p VPV is a single scalar, either because it produces the /// same value for all lanes or only has its first lane used. -- cgit v1.2.3 From 1e3a1ce911d1e5e3804b63e3ba3059c36eb697e5 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Thu, 30 Oct 2025 15:50:45 -0700 Subject: Add tests for CWG issues 6, 212, 232, 2823. (#165633) Unfortunately this adds two more "no"s to cxx_dr_status for 232 and 2823. --------- Co-authored-by: Vlad Serebrennikov --- clang/test/CXX/drs/cwg0xx.cpp | 2 ++ clang/test/CXX/drs/cwg28xx.cpp | 18 +++++++++++++++ clang/test/CXX/drs/cwg2xx.cpp | 35 +++++++++++++++++++++++++++++ clang/test/CXX/drs/cwg6.cpp | 51 ++++++++++++++++++++++++++++++++++++++++++ clang/www/cxx_dr_status.html | 8 +++---- 5 files changed, 110 insertions(+), 4 deletions(-) create mode 100644 clang/test/CXX/drs/cwg6.cpp diff --git a/clang/test/CXX/drs/cwg0xx.cpp b/clang/test/CXX/drs/cwg0xx.cpp index 805be67f2dc1..10a4f1d6add3 100644 --- a/clang/test/CXX/drs/cwg0xx.cpp +++ b/clang/test/CXX/drs/cwg0xx.cpp @@ -90,6 +90,8 @@ namespace cwg5 { // cwg5: 3.1 const C c = e; } // namespace cwg5 +// cwg6 is in cwg6.cpp + namespace cwg7 { // cwg7: 3.4 class A { public: ~A(); }; class B : virtual private A {}; // #cwg7-B diff --git a/clang/test/CXX/drs/cwg28xx.cpp b/clang/test/CXX/drs/cwg28xx.cpp index a6b2b99e0c3f..d0ee191ef23d 100644 --- a/clang/test/CXX/drs/cwg28xx.cpp +++ b/clang/test/CXX/drs/cwg28xx.cpp @@ -61,6 +61,24 @@ namespace cwg2819 { // cwg2819: 19 c++26 #endif } // namespace cwg2819 +namespace cwg2823 { // cwg2823: no +#if __cplusplus >= 201103L + constexpr int *p = 0; + constexpr int *q1 = &*p; + // expected-error@-1 {{constexpr variable 'q1' must be initialized by a constant expression}} + // expected-note@-2 {{dereferencing a null pointer is not allowed in a constant expression}} + // FIXME: invalid: dereferencing a null pointer. + constexpr int *q2 = &p[0]; + + int arr[32]; + constexpr int *r = arr; + // FIXME: invalid: dereferencing a past-the-end pointer. + constexpr int *s1 = &*(r + 32); + // FIXME: invalid: dereferencing a past-the-end pointer. + constexpr int *s2 = &r[32]; +#endif +} + namespace cwg2847 { // cwg2847: 19 review 2024-03-01 #if __cplusplus >= 202002L diff --git a/clang/test/CXX/drs/cwg2xx.cpp b/clang/test/CXX/drs/cwg2xx.cpp index 37186e3c3f20..a4995ddc2c58 100644 --- a/clang/test/CXX/drs/cwg2xx.cpp +++ b/clang/test/CXX/drs/cwg2xx.cpp @@ -230,6 +230,38 @@ namespace cwg211 { // cwg211: 2.7 }; } // namespace cwg211 +namespace cwg212 { // cwg212: 2.7 + template struct Base; + template struct Derived; + + int *overload(void*); + float *overload(Base*); + double *overload(Base*); + + void f(Derived *p) { + // OK, calls void* overload. + int *a = overload(p); + + Base *q = p; + // expected-error@-1 {{cannot initialize a variable of type 'Base *' with an lvalue of type 'Derived *'}} + } + + template struct Base {}; + template struct Derived : Base {}; + + void g(Derived *p) { + // OK, instantiates and calls Base* overlod. + double *b = overload(p); + (void)b; + } + + void h(Derived *p) { + // OK, instantiates and converts. + Base *q = p; + (void)q; + } +} + namespace cwg213 { // cwg213: 2.7 template struct A : T { void h(T t) { @@ -593,6 +625,9 @@ namespace cwg231 { // cwg231: 2.7 } } // namespace cwg231 +// 232 is NAD; the desired behavior is described in 2823. +// cwg232: dup 2823 + // cwg234: na // cwg235: na diff --git a/clang/test/CXX/drs/cwg6.cpp b/clang/test/CXX/drs/cwg6.cpp new file mode 100644 index 000000000000..4752e72034c7 --- /dev/null +++ b/clang/test/CXX/drs/cwg6.cpp @@ -0,0 +1,51 @@ +// RUN: %clang_cc1 -std=c++98 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++11 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++14 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++17 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++20 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++23 %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | FileCheck %s --check-prefixes CHECK +// RUN: %clang_cc1 -std=c++2c %s -triple x86_64-linux-gnu -emit-llvm -o - -fexceptions -fcxx-exceptions -pedantic-errors | FileCheck %s --check-prefixes CHECK + +#if __cplusplus == 199711L +#define static_assert(expr) __extension__ _Static_assert(expr) +#define noexcept throw() +#endif + +namespace cwg6 { // cwg6: 2.7 +#if __cplusplus >= 201103L +struct Counter { + int copies; + constexpr Counter(int copies) : copies(copies) {} + constexpr Counter(const Counter& other) : copies(other.copies + 1) {} +}; + +// Passing an lvalue by value makes a non-elidable copy. +constexpr int PassByValue(Counter c) { return c.copies; } +constexpr int PassByValue2(Counter c) { return PassByValue(c); } +constexpr int PassByValue3(Counter c) { return PassByValue2(c); } +static_assert(PassByValue(Counter(0)) == 0, "expect no copies"); +static_assert(PassByValue2(Counter(0)) == 1, "expect 1 copy"); +static_assert(PassByValue3(Counter(0)) == 2, "expect 2 copies"); +#endif + +struct A { + A() noexcept; + A(const A&) noexcept; + ~A() noexcept; +}; + +inline void f(A a) noexcept {} + +// CHECK-LABEL: define {{.*}} @_ZN4cwg64callEv +void call() { + A a; + // We copy the parameter here, even though object is not mutated by f and + // otherwise satisfies the criteria for the proposed CWG6 optimization. + // CHECK: call {{.*}} @_ZN4cwg61AC1ERKS0_( + // CHECK: call {{.*}} @_ZN4cwg61fENS_1AE( + f(a); + // CHECK: call {{.*}} @_ZN4cwg61AD1Ev( + // CHECK: call {{.*}} @_ZN4cwg61AD1Ev( +} + +} // namespace cwg6 diff --git a/clang/www/cxx_dr_status.html b/clang/www/cxx_dr_status.html index ae9b28ee625c..0312c9dfc066 100755 --- a/clang/www/cxx_dr_status.html +++ b/clang/www/cxx_dr_status.html @@ -81,7 +81,7 @@ 6 NAD Should the optimization that allows a class object to alias another object also allow the case of a parameter in an inline function to alias its argument? - Unknown + Yes 7 @@ -1318,7 +1318,7 @@ accessible? 212 CD4 Implicit instantiation is not described clearly enough - Unknown + Yes 213 @@ -1438,7 +1438,7 @@ accessible? 232 NAD Is indirection through a null pointer undefined behavior? - Unknown + Duplicate of 2823 233 @@ -16790,7 +16790,7 @@ objects 2823 CD7 Implicit undefined behavior when dereferencing pointers - Unknown + No 2824 -- cgit v1.2.3 From f7a21a837c57ef182bee364923e92308d36c2d0d Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Thu, 30 Oct 2025 15:52:50 -0700 Subject: [libc] Remove optimization flags on entrypoints (#165782) Optimization flags are now handled through a common flag. These are no longer necessary. Fixes #112409 --- libc/src/fenv/CMakeLists.txt | 32 ----------------- libc/src/math/amdgpu/CMakeLists.txt | 70 ------------------------------------ libc/src/math/generic/CMakeLists.txt | 28 +-------------- libc/src/math/nvptx/CMakeLists.txt | 58 ------------------------------ 4 files changed, 1 insertion(+), 187 deletions(-) diff --git a/libc/src/fenv/CMakeLists.txt b/libc/src/fenv/CMakeLists.txt index c5431b1b9d55..f36884597796 100644 --- a/libc/src/fenv/CMakeLists.txt +++ b/libc/src/fenv/CMakeLists.txt @@ -6,8 +6,6 @@ add_entrypoint_object( fegetround.h DEPENDS libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -18,8 +16,6 @@ add_entrypoint_object( fesetround.h DEPENDS libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -30,8 +26,6 @@ add_entrypoint_object( feclearexcept.h DEPENDS libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -42,8 +36,6 @@ add_entrypoint_object( feraiseexcept.h DEPENDS libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -54,8 +46,6 @@ add_entrypoint_object( fetestexcept.h DEPENDS libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -67,8 +57,6 @@ add_entrypoint_object( DEPENDS libc.hdr.types.fexcept_t libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -80,8 +68,6 @@ add_entrypoint_object( DEPENDS libc.hdr.types.fenv_t libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -93,8 +79,6 @@ add_entrypoint_object( DEPENDS libc.hdr.types.fenv_t libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -107,8 +91,6 @@ add_entrypoint_object( libc.hdr.fenv_macros libc.hdr.types.fexcept_t libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -119,8 +101,6 @@ add_entrypoint_object( fesetexcept.h DEPENDS libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -133,8 +113,6 @@ add_entrypoint_object( libc.hdr.fenv_macros libc.hdr.types.fexcept_t libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -147,8 +125,6 @@ add_entrypoint_object( libc.hdr.fenv_macros libc.hdr.types.fenv_t libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -161,8 +137,6 @@ add_entrypoint_object( libc.hdr.fenv_macros libc.hdr.types.fenv_t libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -173,8 +147,6 @@ add_entrypoint_object( feenableexcept.h DEPENDS libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -185,8 +157,6 @@ add_entrypoint_object( fedisableexcept.h DEPENDS libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -197,6 +167,4 @@ add_entrypoint_object( fegetexcept.h DEPENDS libc.src.__support.FPUtil.fenv_impl - COMPILE_OPTIONS - -O2 ) diff --git a/libc/src/math/amdgpu/CMakeLists.txt b/libc/src/math/amdgpu/CMakeLists.txt index e2cd3b99c303..d05d519b74b4 100644 --- a/libc/src/math/amdgpu/CMakeLists.txt +++ b/libc/src/math/amdgpu/CMakeLists.txt @@ -4,8 +4,6 @@ add_entrypoint_object( ceil.cpp HDRS ../ceil.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -14,8 +12,6 @@ add_entrypoint_object( ceilf.cpp HDRS ../ceilf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -24,8 +20,6 @@ add_entrypoint_object( copysign.cpp HDRS ../copysign.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -34,8 +28,6 @@ add_entrypoint_object( copysignf.cpp HDRS ../copysignf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -44,8 +36,6 @@ add_entrypoint_object( fabs.cpp HDRS ../fabs.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -54,8 +44,6 @@ add_entrypoint_object( fabsf.cpp HDRS ../fabsf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -64,8 +52,6 @@ add_entrypoint_object( floor.cpp HDRS ../floor.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -74,8 +60,6 @@ add_entrypoint_object( floorf.cpp HDRS ../floorf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -84,8 +68,6 @@ add_entrypoint_object( fma.cpp HDRS ../fma.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -94,8 +76,6 @@ add_entrypoint_object( fmaf.cpp HDRS ../fmaf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -104,8 +84,6 @@ add_entrypoint_object( fmax.cpp HDRS ../fmax.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -114,8 +92,6 @@ add_entrypoint_object( fmaxf.cpp HDRS ../fmaxf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -124,8 +100,6 @@ add_entrypoint_object( fmin.cpp HDRS ../fmin.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -134,8 +108,6 @@ add_entrypoint_object( fminf.cpp HDRS ../fminf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -144,8 +116,6 @@ add_entrypoint_object( fmod.cpp HDRS ../fmod.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -154,8 +124,6 @@ add_entrypoint_object( fmodf.cpp HDRS ../fmodf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -164,8 +132,6 @@ add_entrypoint_object( nearbyint.cpp HDRS ../nearbyint.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -174,8 +140,6 @@ add_entrypoint_object( nearbyintf.cpp HDRS ../nearbyintf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -184,8 +148,6 @@ add_entrypoint_object( remainder.cpp HDRS ../remainder.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -194,8 +156,6 @@ add_entrypoint_object( remainderf.cpp HDRS ../remainderf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -204,8 +164,6 @@ add_entrypoint_object( rint.cpp HDRS ../rint.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -214,8 +172,6 @@ add_entrypoint_object( rintf.cpp HDRS ../rintf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -224,8 +180,6 @@ add_entrypoint_object( round.cpp HDRS ../round.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -234,8 +188,6 @@ add_entrypoint_object( sqrt.cpp HDRS ../sqrt.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -244,8 +196,6 @@ add_entrypoint_object( sqrtf.cpp HDRS ../sqrtf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -254,8 +204,6 @@ add_entrypoint_object( trunc.cpp HDRS ../trunc.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -264,8 +212,6 @@ add_entrypoint_object( truncf.cpp HDRS ../truncf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -274,8 +220,6 @@ add_entrypoint_object( frexp.cpp HDRS ../frexp.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -284,8 +228,6 @@ add_entrypoint_object( frexpf.cpp HDRS ../frexpf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -294,8 +236,6 @@ add_entrypoint_object( scalbn.cpp HDRS ../scalbn.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -304,8 +244,6 @@ add_entrypoint_object( scalbnf.cpp HDRS ../scalbnf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -314,8 +252,6 @@ add_entrypoint_object( ldexp.cpp HDRS ../ldexp.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -324,8 +260,6 @@ add_entrypoint_object( ldexpf.cpp HDRS ../ldexpf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -336,7 +270,6 @@ add_entrypoint_object( ../tgamma.h COMPILE_OPTIONS ${bitcode_link_flags} - -O2 ) add_entrypoint_object( @@ -347,7 +280,6 @@ add_entrypoint_object( ../tgammaf.h COMPILE_OPTIONS ${bitcode_link_flags} - -O2 ) add_entrypoint_object( @@ -358,7 +290,6 @@ add_entrypoint_object( ../lgamma.h COMPILE_OPTIONS ${bitcode_link_flags} - -O2 ) add_entrypoint_object( @@ -369,5 +300,4 @@ add_entrypoint_object( ../lgamma_r.h COMPILE_OPTIONS ${bitcode_link_flags} - -O2 ) diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 6068c36e558e..c048a64db6bc 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -2662,8 +2662,6 @@ add_entrypoint_object( ../fmaximum_mag.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -2674,8 +2672,6 @@ add_entrypoint_object( ../fmaximum_magf.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -2686,8 +2682,6 @@ add_entrypoint_object( ../fmaximum_magl.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -2735,8 +2729,6 @@ add_entrypoint_object( ../fmaximum_mag_num.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -2747,8 +2739,6 @@ add_entrypoint_object( ../fmaximum_mag_numf.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -2759,8 +2749,6 @@ add_entrypoint_object( ../fmaximum_mag_numl.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -2954,8 +2942,6 @@ add_entrypoint_object( ../fminimum_mag.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -2966,8 +2952,6 @@ add_entrypoint_object( ../fminimum_magf.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -2978,8 +2962,6 @@ add_entrypoint_object( ../fminimum_magl.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -3027,8 +3009,6 @@ add_entrypoint_object( ../fminimum_mag_num.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -3039,8 +3019,6 @@ add_entrypoint_object( ../fminimum_mag_numf.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -3051,8 +3029,6 @@ add_entrypoint_object( ../fminimum_mag_numl.h DEPENDS libc.src.__support.FPUtil.basic_operations - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -4306,7 +4282,7 @@ add_entrypoint_object( libc.hdr.errno_macros libc.hdr.fenv_macros libc.src.__support.FPUtil.except_value_utils - libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fenv_impl libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.rounding_mode libc.src.__support.macros.optimization @@ -4546,8 +4522,6 @@ add_entrypoint_object( atan.cpp HDRS ../atan.h - COMPILE_OPTIONS - -O3 DEPENDS libc.src.__support.math.atan ) diff --git a/libc/src/math/nvptx/CMakeLists.txt b/libc/src/math/nvptx/CMakeLists.txt index fcb2870b4bb1..e27c316ff20c 100644 --- a/libc/src/math/nvptx/CMakeLists.txt +++ b/libc/src/math/nvptx/CMakeLists.txt @@ -4,8 +4,6 @@ add_entrypoint_object( ceil.cpp HDRS ../ceil.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -14,8 +12,6 @@ add_entrypoint_object( ceilf.cpp HDRS ../ceilf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -24,8 +20,6 @@ add_entrypoint_object( copysign.cpp HDRS ../copysign.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -34,8 +28,6 @@ add_entrypoint_object( copysignf.cpp HDRS ../copysignf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -44,8 +36,6 @@ add_entrypoint_object( fabs.cpp HDRS ../fabs.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -54,8 +44,6 @@ add_entrypoint_object( fabsf.cpp HDRS ../fabsf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -64,8 +52,6 @@ add_entrypoint_object( floor.cpp HDRS ../floor.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -74,8 +60,6 @@ add_entrypoint_object( floorf.cpp HDRS ../floorf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -84,8 +68,6 @@ add_entrypoint_object( fma.cpp HDRS ../fma.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -94,8 +76,6 @@ add_entrypoint_object( fmaf.cpp HDRS ../fmaf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -104,8 +84,6 @@ add_entrypoint_object( fmax.cpp HDRS ../fmax.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -114,8 +92,6 @@ add_entrypoint_object( fmaxf.cpp HDRS ../fmaxf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -124,8 +100,6 @@ add_entrypoint_object( fmin.cpp HDRS ../fmin.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -134,8 +108,6 @@ add_entrypoint_object( fminf.cpp HDRS ../fminf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -144,8 +116,6 @@ add_entrypoint_object( fmod.cpp HDRS ../fmod.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -154,8 +124,6 @@ add_entrypoint_object( fmodf.cpp HDRS ../fmodf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -164,8 +132,6 @@ add_entrypoint_object( nearbyint.cpp HDRS ../nearbyint.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -174,8 +140,6 @@ add_entrypoint_object( nearbyintf.cpp HDRS ../nearbyintf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -184,8 +148,6 @@ add_entrypoint_object( remainder.cpp HDRS ../remainder.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -194,8 +156,6 @@ add_entrypoint_object( remainderf.cpp HDRS ../remainderf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -204,8 +164,6 @@ add_entrypoint_object( rint.cpp HDRS ../rint.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -214,8 +172,6 @@ add_entrypoint_object( rintf.cpp HDRS ../rintf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -224,8 +180,6 @@ add_entrypoint_object( round.cpp HDRS ../round.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -234,8 +188,6 @@ add_entrypoint_object( sqrt.cpp HDRS ../sqrt.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -244,8 +196,6 @@ add_entrypoint_object( sqrtf.cpp HDRS ../sqrtf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -254,8 +204,6 @@ add_entrypoint_object( trunc.cpp HDRS ../trunc.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -264,8 +212,6 @@ add_entrypoint_object( truncf.cpp HDRS ../truncf.h - COMPILE_OPTIONS - -O2 ) add_entrypoint_object( @@ -276,7 +222,6 @@ add_entrypoint_object( ../tgamma.h COMPILE_OPTIONS ${bitcode_link_flags} - -O2 ) add_entrypoint_object( @@ -287,7 +232,6 @@ add_entrypoint_object( ../tgammaf.h COMPILE_OPTIONS ${bitcode_link_flags} - -O2 ) add_entrypoint_object( @@ -298,7 +242,6 @@ add_entrypoint_object( ../lgamma.h COMPILE_OPTIONS ${bitcode_link_flags} - -O2 ) add_entrypoint_object( @@ -309,5 +252,4 @@ add_entrypoint_object( ../lgamma_r.h COMPILE_OPTIONS ${bitcode_link_flags} - -O2 ) -- cgit v1.2.3 From 9077522b2b2bf1a1ce2fca60ae76536ed1bbeb26 Mon Sep 17 00:00:00 2001 From: Razvan Lupusoru Date: Thu, 30 Oct 2025 16:17:00 -0700 Subject: [acc][flang] Define hasUnknownDimensions in MappableType (#165794) The MappableType interface currently defines a `generateAccBounds` method which examines a variable and generates `acc.bounds` operations that encode its dimensions. The implementation can extract bounds information in various ways: either from the MLIR type itself or by analyzing the IR to find dimension information from defining operations. However, we need to distinguish between cases where dimensional information is not directly available from the type itself. This new `hasUnknownDimensions` API returns true when the MLIR type does not encode dimensional information and there is no associated descriptor or metadata that would make this information extractable from the visible ssa value the represents the variable. The expected use case is calling `generateAccBounds` only when this returns true, as it indicates that bounds must be extracted from the IR (by walking back from current variable to its defining spots or its descriptor). This supports cases such as raw references to arrays with non-constant bounds (e.g., explicit-shape arrays in Fortran where bounds are passed as arguments). This functionality could also be leveraged for CIR VLA support in the future. For FIR types: - Box types return false (descriptor encodes dimensions) - Reference types check if the pointee has dynamic size using fir::hasDynamicSize() --- .../OpenACC/Support/FIROpenACCTypeInterfaces.h | 2 ++ .../OpenACC/Support/FIROpenACCTypeInterfaces.cpp | 22 ++++++++++++++++++++++ flang/test/Fir/OpenACC/openacc-mappable.fir | 5 +++++ flang/test/lib/OpenACC/TestOpenACCInterfaces.cpp | 4 ++++ .../mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td | 12 ++++++++++++ mlir/unittests/Dialect/OpenACC/OpenACCOpsTest.cpp | 4 +++- 6 files changed, 48 insertions(+), 1 deletion(-) diff --git a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h index 4817ed933ba0..3167c554abbd 100644 --- a/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h +++ b/flang/include/flang/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.h @@ -60,6 +60,8 @@ struct OpenACCMappableModel getOffsetInBytes(mlir::Type type, mlir::Value var, mlir::ValueRange accBounds, const mlir::DataLayout &dataLayout) const; + bool hasUnknownDimensions(mlir::Type type) const; + llvm::SmallVector generateAccBounds(mlir::Type type, mlir::Value var, mlir::OpBuilder &builder) const; diff --git a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp index ed9e41c74375..ae0f5fb8197f 100644 --- a/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp +++ b/flang/lib/Optimizer/OpenACC/Support/FIROpenACCTypeInterfaces.cpp @@ -193,6 +193,28 @@ OpenACCMappableModel::getOffsetInBytes( mlir::Type type, mlir::Value var, mlir::ValueRange accBounds, const mlir::DataLayout &dataLayout) const; +template +bool OpenACCMappableModel::hasUnknownDimensions(mlir::Type type) const { + assert(fir::isa_ref_type(type) && "expected FIR reference type"); + return fir::hasDynamicSize(fir::unwrapRefType(type)); +} + +template bool OpenACCMappableModel::hasUnknownDimensions( + mlir::Type type) const; + +template bool OpenACCMappableModel::hasUnknownDimensions( + mlir::Type type) const; + +template bool OpenACCMappableModel::hasUnknownDimensions( + mlir::Type type) const; + +template <> +bool OpenACCMappableModel::hasUnknownDimensions( + mlir::Type type) const { + // Descriptor-based entities have dimensions encoded. + return false; +} + static llvm::SmallVector generateSeqTyAccBounds(fir::SequenceType seqType, mlir::Value var, mlir::OpBuilder &builder) { diff --git a/flang/test/Fir/OpenACC/openacc-mappable.fir b/flang/test/Fir/OpenACC/openacc-mappable.fir index 05df35a48290..00fe2574da62 100644 --- a/flang/test/Fir/OpenACC/openacc-mappable.fir +++ b/flang/test/Fir/OpenACC/openacc-mappable.fir @@ -21,11 +21,13 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<2xi64>, // CHECK: Mappable: !fir.box> // CHECK: Type category: array // CHECK: Size: 40 + // CHECK: Has unknown dimensions: false // CHECK: Visiting: %{{.*}} = acc.copyin varPtr(%{{.*}} : !fir.ref>) -> !fir.ref> {name = "arr", structured = false} // CHECK: Pointer-like and Mappable: !fir.ref> // CHECK: Type category: array // CHECK: Size: 40 + // CHECK: Has unknown dimensions: false // This second test exercises argument of explicit-shape arrays in following forms: // `real :: arr1(nn), arr2(2:nn), arr3(10)` @@ -62,6 +64,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<2xi64>, // CHECK: Visiting: %{{.*}} = acc.copyin varPtr(%{{.*}} : !fir.ref>) -> !fir.ref> {name = "arr1", structured = false} // CHECK: Pointer-like and Mappable: !fir.ref> // CHECK: Type category: array + // CHECK: Has unknown dimensions: true // CHECK: Shape: %{{.*}} = fir.shape %[[EXTENT1:.*]] : (index) -> !fir.shape<1> // CHECK: Bound[0]: %{{.*}} = acc.bounds lowerbound(%[[LB1:.*]] : index) upperbound(%[[UB1:.*]] : index) extent(%{{.*}} : index) stride(%c1{{.*}} : index) startIdx(%c1{{.*}} : index) // CHECK: Lower bound: %[[LB1]] = arith.constant 0 : index @@ -70,6 +73,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<2xi64>, // CHECK: Visiting: %{{.*}} = acc.copyin varPtr(%{{.*}} : !fir.ref>) -> !fir.ref> {name = "arr2", structured = false} // CHECK: Pointer-like and Mappable: !fir.ref> // CHECK: Type category: array + // CHECK: Has unknown dimensions: true // CHECK: Shape: %{{.*}} = fir.shape_shift %c2{{.*}}, %[[EXTENT2:.*]] : (index, index) -> !fir.shapeshift<1> // CHECK: Bound[0]: %{{.*}} = acc.bounds lowerbound(%[[LB2:.*]] : index) upperbound(%[[UB2:.*]] : index) extent(%{{.*}} : index) stride(%c1{{.*}} : index) startIdx(%c2{{.*}} : index) // CHECK: Lower bound: %[[LB2]] = arith.constant 0 : index @@ -80,6 +84,7 @@ module attributes {dlti.dl_spec = #dlti.dl_spec : vector<2xi64>, // CHECK: Type category: array // CHECK: Size: 40 // CHECK: Offset: 0 + // CHECK: Has unknown dimensions: false // CHECK: Shape: %{{.*}} = fir.shape %[[EXTENT3:.*]] : (index) -> !fir.shape<1> // CHECK: Bound[0]: %{{.*}} = acc.bounds lowerbound(%[[LB3:.*]] : index) upperbound(%[[UB3:.*]] : index) extent(%c10{{.*}} : index) stride(%c1{{.*}} : index) startIdx(%c1{{.*}} : index) // CHECK: Lower bound: %[[LB3]] = arith.constant 0 : index diff --git a/flang/test/lib/OpenACC/TestOpenACCInterfaces.cpp b/flang/test/lib/OpenACC/TestOpenACCInterfaces.cpp index 9a80e3b1a9ae..072aee5ba269 100644 --- a/flang/test/lib/OpenACC/TestOpenACCInterfaces.cpp +++ b/flang/test/lib/OpenACC/TestOpenACCInterfaces.cpp @@ -100,6 +100,10 @@ struct TestFIROpenACCInterfaces } } + llvm::errs() << "\t\tHas unknown dimensions: " + << (mappableTy.hasUnknownDimensions() ? "true" : "false") + << "\n"; + if (auto declareOp = dyn_cast_if_present(var.getDefiningOp())) { llvm::errs() << "\t\tShape: " << declareOp.getShape() << "\n"; diff --git a/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td b/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td index 93e9e3d0689f..d1bbc7f206ce 100644 --- a/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td +++ b/mlir/include/mlir/Dialect/OpenACC/OpenACCTypeInterfaces.td @@ -259,6 +259,18 @@ def OpenACC_MappableTypeInterface : TypeInterface<"MappableType"> { return {}; }] >, + InterfaceMethod< + /*description=*/[{ + Returns true if the dimensions of this type are not known. This occurs + when the MLIR type does not encode dimensional information and there is + no associated descriptor or metadata in the current entity that would + make this information extractable. For example, an opaque pointer type + pointing to an array without dimension information would have unknown + dimensions. + }], + /*retTy=*/"bool", + /*methodName=*/"hasUnknownDimensions" + >, InterfaceMethod< /*description=*/[{ Returns explicit `acc.bounds` operations that envelop the whole diff --git a/mlir/unittests/Dialect/OpenACC/OpenACCOpsTest.cpp b/mlir/unittests/Dialect/OpenACC/OpenACCOpsTest.cpp index 6ac9a873e615..d6203b97e00d 100644 --- a/mlir/unittests/Dialect/OpenACC/OpenACCOpsTest.cpp +++ b/mlir/unittests/Dialect/OpenACC/OpenACCOpsTest.cpp @@ -766,7 +766,9 @@ void testShortDataEntryOpBuildersMappableVar(OpBuilder &b, MLIRContext &context, struct IntegerOpenACCMappableModel : public mlir::acc::MappableType::ExternalModel {}; + IntegerType> { + bool hasUnknownDimensions(mlir::Type type) const { return false; } +}; TEST_F(OpenACCOpsTest, mappableTypeBuilderDataEntry) { // First, set up the test by attaching MappableInterface to IntegerType. -- cgit v1.2.3 From 1099d2839e72902937f460a2eaa2055ab565b7a8 Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Thu, 30 Oct 2025 16:20:54 -0700 Subject: workflows/release-binaries: Drop x86_64 Mac OS builds (#165645) We don't have the resources to test this and the builds are very expensive. If someone is interested in providing x86_64 macOS they can submit this as third-party binaries or provide resources to test the builds. --- .github/workflows/release-binaries-all.yml | 1 - .github/workflows/release-binaries.yml | 11 ----------- 2 files changed, 12 deletions(-) diff --git a/.github/workflows/release-binaries-all.yml b/.github/workflows/release-binaries-all.yml index 0b52a08202f1..eef49b5e3625 100644 --- a/.github/workflows/release-binaries-all.yml +++ b/.github/workflows/release-binaries-all.yml @@ -90,7 +90,6 @@ jobs: runs-on: - ubuntu-22.04 - ubuntu-22.04-arm - - macos-13 - macos-14 uses: ./.github/workflows/release-binaries.yml diff --git a/.github/workflows/release-binaries.yml b/.github/workflows/release-binaries.yml index 814592626525..fa73b9d9fe8d 100644 --- a/.github/workflows/release-binaries.yml +++ b/.github/workflows/release-binaries.yml @@ -21,7 +21,6 @@ on: options: - ubuntu-22.04 - ubuntu-22.04-arm - - macos-13 - macos-14 workflow_call: @@ -130,8 +129,6 @@ jobs: target_cmake_flags="$target_cmake_flags -DBOOTSTRAP_BOOTSTRAP_COMPILER_RT_ENABLE_IOS=OFF" if [ "$RUNNER_ARCH" = "ARM64" ]; then arches=arm64 - else - arches=x86_64 fi target_cmake_flags="$target_cmake_flags -DBOOTSTRAP_BOOTSTRAP_DARWIN_osx_ARCHS=$arches -DBOOTSTRAP_BOOTSTRAP_DARWIN_osx_BUILTIN_ARCHS=$arches" fi @@ -147,14 +144,6 @@ jobs: build_runs_on="depot-${{ inputs.runs-on }}-16" test_runs_on=$build_runs_on ;; - macos-13) - if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then - build_runs_on="${{ inputs.runs-on }}" - else - build_runs_on="macos-13-large" - fi - test_runs_on="${{ inputs.runs-on }}" - ;; macos-14) if [ "$GITHUB_EVENT_NAME" = "pull_request" ]; then build_runs_on="${{ inputs.runs-on }}" -- cgit v1.2.3 From 45b1a4bb8d0f77a336730c9da7f15820c2869611 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Thu, 30 Oct 2025 16:37:00 -0700 Subject: Add to llvm-libc-types headers that need it. (#165798) We need `` to support having "bool" members inside pthread structs that may get included through `` from C code prior to C23. --- libc/include/llvm-libc-types/__barrier_type.h | 2 ++ libc/include/llvm-libc-types/pthread_barrierattr_t.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/libc/include/llvm-libc-types/__barrier_type.h b/libc/include/llvm-libc-types/__barrier_type.h index 59712619e917..5752f832f04b 100644 --- a/libc/include/llvm-libc-types/__barrier_type.h +++ b/libc/include/llvm-libc-types/__barrier_type.h @@ -9,6 +9,8 @@ #ifndef LLVM_LIBC_TYPES__BARRIER_TYPE_H #define LLVM_LIBC_TYPES__BARRIER_TYPE_H +#include + typedef struct __attribute__((aligned(8 /* alignof (Barrier) */))) { unsigned expected; unsigned waiting; diff --git a/libc/include/llvm-libc-types/pthread_barrierattr_t.h b/libc/include/llvm-libc-types/pthread_barrierattr_t.h index 064be5bfb672..b62fdc0f72e1 100644 --- a/libc/include/llvm-libc-types/pthread_barrierattr_t.h +++ b/libc/include/llvm-libc-types/pthread_barrierattr_t.h @@ -9,6 +9,8 @@ #ifndef LLVM_LIBC_TYPES_PTHREAD_BARRIERATTR_T_H #define LLVM_LIBC_TYPES_PTHREAD_BARRIERATTR_T_H +#include + typedef struct { bool pshared; } pthread_barrierattr_t; -- cgit v1.2.3 From 09318c6bffcfe9790d73469297833ef45876baa0 Mon Sep 17 00:00:00 2001 From: agozillon Date: Fri, 31 Oct 2025 00:54:31 +0100 Subject: [MLIR][OpenMP] Fix and simplify bounds offset calculation for 1-D GEP offsets (#165486) Currently this is being calculated incorrectly and will result in incorrect index offsets in more complicated array slices. This PR tries to address it by refactoring and changing the calculation to be more correct. --- .../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 55 +++++++------------ ...omptarget-record-type-with-ptr-member-host.mlir | 3 +- .../fortran/descriptor-array-slice-map.f90 | 61 ++++++++++++++++++++++ 3 files changed, 81 insertions(+), 38 deletions(-) create mode 100644 offload/test/offloading/fortran/descriptor-array-slice-map.f90 diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index f28454075f1d..8edec990eaab 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -4084,12 +4084,13 @@ static omp::MapInfoOp getFirstOrLastMappedMemberPtr(omp::MapInfoOp mapInfo, /// /// Fortran /// map(tofrom: array(2:5, 3:2)) -/// or -/// C++ -/// map(tofrom: array[1:4][2:3]) +/// /// We must calculate the initial pointer offset to pass across, this function /// performs this using bounds. /// +/// TODO/WARNING: This only supports Fortran's column major indexing currently +/// as is noted in the note below and comments in the function, we must extend +/// this function when we add a C++ frontend. /// NOTE: which while specified in row-major order it currently needs to be /// flipped for Fortran's column order array allocation and access (as /// opposed to C++'s row-major, hence the backwards processing where order is @@ -4125,46 +4126,28 @@ calculateBoundsOffset(LLVM::ModuleTranslation &moduleTranslation, // with a pointer that's being treated like an array and we have the // underlying type e.g. an i32, or f64 etc, e.g. a fortran descriptor base // address (pointer pointing to the actual data) so we must caclulate the - // offset using a single index which the following two loops attempts to - // compute. - - // Calculates the size offset we need to make per row e.g. first row or - // column only needs to be offset by one, but the next would have to be - // the previous row/column offset multiplied by the extent of current row. + // offset using a single index which the following loop attempts to + // compute using the standard column-major algorithm e.g for a 3D array: // - // For example ([1][10][100]): + // ((((c_idx * b_len) + b_idx) * a_len) + a_idx) // - // - First row/column we move by 1 for each index increment - // - Second row/column we move by 1 (first row/column) * 10 (extent/size of - // current) for 10 for each index increment - // - Third row/column we would move by 10 (second row/column) * - // (extent/size of current) 100 for 1000 for each index increment - std::vector dimensionIndexSizeOffset{builder.getInt64(1)}; - for (size_t i = 1; i < bounds.size(); ++i) { - if (auto boundOp = dyn_cast_if_present( - bounds[i].getDefiningOp())) { - dimensionIndexSizeOffset.push_back(builder.CreateMul( - moduleTranslation.lookupValue(boundOp.getExtent()), - dimensionIndexSizeOffset[i - 1])); - } - } - - // Now that we have calculated how much we move by per index, we must - // multiply each lower bound offset in indexes by the size offset we - // have calculated in the previous and accumulate the results to get - // our final resulting offset. + // It is of note that it's doing column-major rather than row-major at the + // moment, but having a way for the frontend to indicate which major format + // to use or standardizing/canonicalizing the order of the bounds to compute + // the offset may be useful in the future when there's other frontends with + // different formats. + std::vector dimensionIndexSizeOffset; for (int i = bounds.size() - 1; i >= 0; --i) { if (auto boundOp = dyn_cast_if_present( bounds[i].getDefiningOp())) { - if (idx.empty()) - idx.emplace_back(builder.CreateMul( - moduleTranslation.lookupValue(boundOp.getLowerBound()), - dimensionIndexSizeOffset[i])); + if (i == ((int)bounds.size() - 1)) + idx.emplace_back( + moduleTranslation.lookupValue(boundOp.getLowerBound())); else idx.back() = builder.CreateAdd( - idx.back(), builder.CreateMul(moduleTranslation.lookupValue( - boundOp.getLowerBound()), - dimensionIndexSizeOffset[i])); + builder.CreateMul(idx.back(), moduleTranslation.lookupValue( + boundOp.getExtent())), + moduleTranslation.lookupValue(boundOp.getLowerBound())); } } } diff --git a/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir b/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir index a1e415c35e4b..9640f03311af 100644 --- a/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir +++ b/mlir/test/Target/LLVMIR/omptarget-record-type-with-ptr-member-host.mlir @@ -81,9 +81,8 @@ module attributes {omp.is_target_device = false, omp.target_triples = ["amdgcn-a // CHECK: %[[ARR_SECT_SIZE:.*]] = mul i64 %[[ARR_SECT_SIZE1]], 4 // CHECK: %[[LFULL_ARR:.*]] = load ptr, ptr @full_arr, align 8 // CHECK: %[[FULL_ARR_PTR:.*]] = getelementptr inbounds float, ptr %[[LFULL_ARR]], i64 0 -// CHECK: %[[ARR_SECT_OFFSET1:.*]] = mul i64 %[[ARR_SECT_OFFSET2]], 1 // CHECK: %[[LARR_SECT:.*]] = load ptr, ptr @sect_arr, align 8 -// CHECK: %[[ARR_SECT_PTR:.*]] = getelementptr inbounds i32, ptr %[[LARR_SECT]], i64 %[[ARR_SECT_OFFSET1]] +// CHECK: %[[ARR_SECT_PTR:.*]] = getelementptr inbounds i32, ptr %[[LARR_SECT]], i64 %[[ARR_SECT_OFFSET2]] // CHECK: %[[SCALAR_PTR_LOAD:.*]] = load ptr, ptr %[[SCALAR_BASE]], align 8 // CHECK: %[[FULL_ARR_DESC_SIZE:.*]] = sdiv exact i64 48, ptrtoint (ptr getelementptr (i8, ptr null, i32 1) to i64) // CHECK: %[[FULL_ARR_SIZE_CMP:.*]] = icmp eq ptr %[[FULL_ARR_PTR]], null diff --git a/offload/test/offloading/fortran/descriptor-array-slice-map.f90 b/offload/test/offloading/fortran/descriptor-array-slice-map.f90 new file mode 100644 index 000000000000..69abb320adc3 --- /dev/null +++ b/offload/test/offloading/fortran/descriptor-array-slice-map.f90 @@ -0,0 +1,61 @@ +! Offloading test which aims to test that an allocatable/descriptor type map +! will allow the appropriate slicing behaviour. +! REQUIRES: flang, amdgpu + +subroutine slice_writer(n, a, b, c) + implicit none + integer, intent(in) :: n + real(8), intent(in) :: a(n) + real(8), intent(in) :: b(n) + real(8), intent(out) :: c(n) + integer :: i + + !$omp target teams distribute parallel do + do i=1,n + c(i) = b(i) + a(i) + end do +end subroutine slice_writer + +! RUN: %libomptarget-compile-fortran-run-and-check-generic +program main + implicit none + real(kind=8), allocatable :: a(:,:,:) + integer :: i, j, k, idx, idx1, idx2, idx3 + + i=50 + j=100 + k=2 + + allocate(a(1:i,1:j,1:k)) + + do idx1=1, i + do idx2=1, j + do idx3=1, k + a(idx1,idx2,idx3) = idx2 + end do + end do + end do + + do idx=1,k + !$omp target enter data map(alloc: a(1:i,:, idx)) + + !$omp target update to(a(1:i, 1:30, idx), & + !$omp& a(1:i, 61:100, idx)) + + call slice_writer(i, a(:, 1, idx), a(:, 61, idx), a(:, 31, idx)) + call slice_writer(i, a(:, 30, idx), a(:, 100, idx), a(:, 60, idx)) + + !$omp target update from(a(1:i, 31:60, idx)) + !$omp target exit data map(delete: a(1:i, :, idx)) + + print *, a(1, 31, idx), a(2, 31, idx), a(i, 31, idx) + print *, a(1, 60, idx), a(2, 60, idx), a(i, 60, idx) + enddo + + deallocate(a) +end program + +! CHECK: 62. 62. 62. +! CHECK: 130. 130. 130. +! CHECK: 62. 62. 62. +! CHECK: 130. 130. 130. -- cgit v1.2.3 From 1f702d4ee855b1622a3ab289a9ac8b6742e3901c Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Fri, 31 Oct 2025 00:01:50 +0000 Subject: [lit] Mark ulimit test as unsupported on Solaris 160058fc19a9bcb70feb442a755229838b4dbc7a broke the Solaris bots because they do not support RLIMIT_FSIZE despite it being in POSIX 2004. Disable it there for now as the loss of test coverage should not be significant. --- llvm/utils/lit/tests/shtest-ulimit-nondarwin.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py b/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py index 893270ec68f6..286fd3d7e173 100644 --- a/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py +++ b/llvm/utils/lit/tests/shtest-ulimit-nondarwin.py @@ -2,7 +2,7 @@ # ulimit does not work on non-POSIX platforms. # These tests are specific to options that Darwin does not support. -# UNSUPPORTED: system-windows, system-darwin, system-aix +# UNSUPPORTED: system-windows, system-darwin, system-aix, system-solaris # RUN: not %{lit} -a -v %{inputs}/shtest-ulimit-nondarwin | FileCheck %s -- cgit v1.2.3 From c8fd662bfa83b3cfa1c7a37ffaec8fac9bb36ccf Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Thu, 30 Oct 2025 17:18:18 -0700 Subject: Update Qualcomm email addresses. (#165799) Updating email IDs to align with employer mandate. --- clang/AreaTeamMembers.txt | 2 +- clang/Maintainers.rst | 2 +- llvm/Maintainers.md | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/clang/AreaTeamMembers.txt b/clang/AreaTeamMembers.txt index 964d11e79f69..2928943f4753 100644 --- a/clang/AreaTeamMembers.txt +++ b/clang/AreaTeamMembers.txt @@ -13,5 +13,5 @@ rnk@google.com (email), rnk (Discourse), rnk (GitHub), rnk (Discord) Other Members ------------- Eli Friedman -efriedma@quicinc.com> (email), efriedma-quic (Discourse), efriedma-quic (GitHub) +efriedma@qti.qualcomm.com> (email), efriedma-quic (Discourse), efriedma-quic (GitHub) diff --git a/clang/Maintainers.rst b/clang/Maintainers.rst index 8fb2201aae16..1d16ea9fe563 100644 --- a/clang/Maintainers.rst +++ b/clang/Maintainers.rst @@ -46,7 +46,7 @@ Clang LLVM IR generation | rjmccall\@apple.com (email), rjmccall (Phabricator), rjmccall (GitHub) | Eli Friedman -| efriedma\@quicinc.com (email), efriedma (Phabricator), efriedma-quic (GitHub) +| efriedma\@qti.qualcomm.com (email), efriedma (Phabricator), efriedma-quic (GitHub) | Anton Korobeynikov | anton\@korobeynikov.info (email), asl (Phabricator), asl (GitHub) diff --git a/llvm/Maintainers.md b/llvm/Maintainers.md index e52259236fc1..1eba955f9d6e 100644 --- a/llvm/Maintainers.md +++ b/llvm/Maintainers.md @@ -197,7 +197,7 @@ david.green@arm.com (email), [davemgreen](https://github.com/davemgreen) (GitHub Amara Emerson (esp. AArch64 GlobalISel) \ amara@apple.com (email), [aemerson](https://github.com/aemerson) (GitHub) \ Eli Friedman (esp. ARM64EC) \ -efriedma@quicinc.com (email), [efriedma-quic](https://github.com/efriedma-quic) (GitHub) \ +efriedma@qti.qualcomm.com (email), [efriedma-quic](https://github.com/efriedma-quic) (GitHub) \ Sjoerd Meijer \ smeijer@nvidia.com (email), [sjoerdmeijer](https://github.com/sjoerdmeijer) (GitHub) \ Nashe Mncube \ @@ -246,7 +246,7 @@ mail@justinbogner.com (email), [bogner](https://github.com/bogner) (GitHub) #### Hexagon backend Sundeep Kushwaha \ -sundeepk@quicinc.com (email), [SundeepKushwaha](https://github.com/SundeepKushwaha) (GitHub) +sundeepk@qti.qualcomm.com (email), [SundeepKushwaha](https://github.com/SundeepKushwaha) (GitHub) #### Lanai backend -- cgit v1.2.3 From f8b5f86cd9c1a6b10c7cf161075eae5b32e18126 Mon Sep 17 00:00:00 2001 From: Aiden Grossman Date: Thu, 30 Oct 2025 17:30:22 -0700 Subject: Reapply "[TSan] Make Test work with Internal Shell" This reverts commit 39f08eb997424626bd396a0529daf4ab816d19e6. This was causing buildbot failures because we were using an explicit python call instead of the python substitution. This leads to failures on platforms that do not have a binary called python. --- compiler-rt/test/tsan/ignore_lib0.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/compiler-rt/test/tsan/ignore_lib0.cpp b/compiler-rt/test/tsan/ignore_lib0.cpp index cba58c617703..9c4919022b51 100644 --- a/compiler-rt/test/tsan/ignore_lib0.cpp +++ b/compiler-rt/test/tsan/ignore_lib0.cpp @@ -4,11 +4,13 @@ // RUN: %clangxx_tsan -O1 -fno-builtin %s -DLIB -fPIC -fno-sanitize=thread -shared -o %t-dir/libignore_lib0.so // RUN: %clangxx_tsan -O1 %s -L%t-dir -lignore_lib0 %link_libcxx_tsan -o %t // RUN: echo running w/o suppressions: -// RUN: env LD_LIBRARY_PATH=%t-dir${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} %deflake %run %t | FileCheck %s --check-prefix=CHECK-NOSUPP +// RUN: echo -n %t-dir > %t.ld_library_path +// RUN: %python -c "if 'LD_LIBRARY_PATH' in __import__('os').environ: print(':' + __import__('os').environ['LD_LIBRARY_PATH'], end='')" >> %t.ld_library_path +// RUN: env LD_LIBRARY_PATH=%{readfile:%t.ld_library_path} %deflake %run %t | FileCheck %s --check-prefix=CHECK-NOSUPP // RUN: echo running with suppressions: -// RUN: env LD_LIBRARY_PATH=%t-dir${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} %env_tsan_opts=suppressions='%s.supp' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP +// RUN: env LD_LIBRARY_PATH=%{readfile:%t.ld_library_path} %env_tsan_opts=suppressions='%s.supp' %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP // RUN: echo running with generic suppression of noninstrumented code: -// RUN: env LD_LIBRARY_PATH=%t-dir${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH} %env_tsan_opts=ignore_noninstrumented_modules=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP +// RUN: env LD_LIBRARY_PATH=%{readfile:%t.ld_library_path} %env_tsan_opts=ignore_noninstrumented_modules=1 %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-WITHSUPP // Tests that interceptors coming from a library specified in called_from_lib // suppression are ignored. -- cgit v1.2.3 From 73b092ff3398321b91f913c66bf9fe3bbf4d097a Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Fri, 31 Oct 2025 10:07:38 +0800 Subject: [libc++][test] Make `deallocate_size.pass.cpp` MSVC-friendly (#165162) This patch contains several changes to `deallocate_size.pass.cpp`: 1. `static_cast`-ing some parameters to `size_t` to avoid narrowing. 2. Changing the type of loop variable `i` to `unsigned int` avoid signedness mismatch with the constructor parameter. 3. Separately counting allocations and deallocations in variables `allocated_` and `deallocated_`, and changing their type to `uint64_t`. 4. Avoiding `assert`-ing count of allocations when a `basic_string` is allocated, just `assert`-ing after destruction instead. --- .../string.capacity/deallocate_size.pass.cpp | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/libcxx/test/std/strings/basic.string/string.capacity/deallocate_size.pass.cpp b/libcxx/test/std/strings/basic.string/string.capacity/deallocate_size.pass.cpp index 00f9e2b84678..ecdc39701641 100644 --- a/libcxx/test/std/strings/basic.string/string.capacity/deallocate_size.pass.cpp +++ b/libcxx/test/std/strings/basic.string/string.capacity/deallocate_size.pass.cpp @@ -12,12 +12,14 @@ #include #include +#include #include #include #include "test_macros.h" -static int allocated_; +static std::uint64_t allocated_; +static std::uint64_t deallocated_; template struct test_alloc { @@ -41,12 +43,12 @@ struct test_alloc { pointer allocate(size_type n, const void* = nullptr) { allocated_ += n; - return std::allocator().allocate(n); + return std::allocator().allocate(static_cast(n)); } void deallocate(pointer p, size_type s) { - allocated_ -= s; - std::allocator().deallocate(p, s); + deallocated_ += s; + std::allocator().deallocate(p, static_cast(s)); } template @@ -64,14 +66,13 @@ struct test_alloc { template void test() { - for (int i = 1; i < 1000; ++i) { - using Str = std::basic_string, test_alloc >; + for (unsigned int i = 1; i < 1000; ++i) { { - Str s(i, 't'); - assert(allocated_ == 0 || allocated_ >= i); + std::basic_string, test_alloc > s(i, 't'); + (void)s; } + assert(allocated_ == deallocated_); } - assert(allocated_ == 0); } int main(int, char**) { -- cgit v1.2.3 From 683b00bb50a12d6e15427bb316c25f4d38070148 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Fri, 31 Oct 2025 02:22:55 +0000 Subject: [VPlan] Limit VPScalarIVSteps to step == 1 in getSCEVExprForVPValue. For now, just support VPScalarIVSteps with step == 1 in getSCEVExprForVPValue. This fixes a crash when the step would be != 1. --- llvm/lib/Transforms/Vectorize/VPlanUtils.cpp | 3 +- .../AArch64/replicating-load-store-costs.ll | 108 +++++++++++++++++++++ 2 files changed, 110 insertions(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp index 54348c6e3448..8c23e78693db 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanUtils.cpp @@ -108,7 +108,8 @@ const SCEV *vputils::getSCEVExprForVPValue(const VPValue *V, .Case([&SE, L](const VPScalarIVStepsRecipe *R) { const SCEV *IV = getSCEVExprForVPValue(R->getOperand(0), SE, L); const SCEV *Step = getSCEVExprForVPValue(R->getOperand(1), SE, L); - if (isa(IV) || isa(Step)) + if (isa(IV) || isa(Step) || + !Step->isOne()) return SE.getCouldNotCompute(); return SE.getMulExpr(SE.getTruncateOrSignExtend(IV, Step->getType()), Step); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll index 7f345133f51d..68cfc659e1e9 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll @@ -660,6 +660,114 @@ exit: ret i32 %red } + +define i32 @test_or_reduction_with_stride_2(i32 %scale, ptr %src) { +; CHECK-LABEL: define i32 @test_or_reduction_with_stride_2( +; CHECK-SAME: i32 [[SCALE:%.*]], ptr [[SRC:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i32> poison, i32 [[SCALE]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i32> [[BROADCAST_SPLATINSERT]], <16 x i32> poison, <16 x i32> zeroinitializer +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <16 x i32> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP66:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2 +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[OFFSET_IDX]], 2 +; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[OFFSET_IDX]], 4 +; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 6 +; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 10 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 12 +; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 14 +; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 16 +; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 18 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 20 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], 22 +; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[OFFSET_IDX]], 24 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[OFFSET_IDX]], 26 +; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[OFFSET_IDX]], 28 +; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[OFFSET_IDX]], 30 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP0]] +; CHECK-NEXT: [[TMP17:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP2]] +; CHECK-NEXT: [[TMP19:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP3]] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP4]] +; CHECK-NEXT: [[TMP21:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP5]] +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP6]] +; CHECK-NEXT: [[TMP23:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP7]] +; CHECK-NEXT: [[TMP24:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP8]] +; CHECK-NEXT: [[TMP25:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP9]] +; CHECK-NEXT: [[TMP26:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP27:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP11]] +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP12]] +; CHECK-NEXT: [[TMP29:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP13]] +; CHECK-NEXT: [[TMP30:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP14]] +; CHECK-NEXT: [[TMP31:%.*]] = getelementptr [32 x i8], ptr [[SRC]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP32:%.*]] = load i8, ptr [[TMP16]], align 1 +; CHECK-NEXT: [[TMP33:%.*]] = load i8, ptr [[TMP17]], align 1 +; CHECK-NEXT: [[TMP34:%.*]] = load i8, ptr [[TMP18]], align 1 +; CHECK-NEXT: [[TMP35:%.*]] = load i8, ptr [[TMP19]], align 1 +; CHECK-NEXT: [[TMP36:%.*]] = load i8, ptr [[TMP20]], align 1 +; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[TMP21]], align 1 +; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[TMP22]], align 1 +; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[TMP23]], align 1 +; CHECK-NEXT: [[TMP40:%.*]] = load i8, ptr [[TMP24]], align 1 +; CHECK-NEXT: [[TMP41:%.*]] = load i8, ptr [[TMP25]], align 1 +; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr [[TMP26]], align 1 +; CHECK-NEXT: [[TMP43:%.*]] = load i8, ptr [[TMP27]], align 1 +; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr [[TMP28]], align 1 +; CHECK-NEXT: [[TMP45:%.*]] = load i8, ptr [[TMP29]], align 1 +; CHECK-NEXT: [[TMP46:%.*]] = load i8, ptr [[TMP30]], align 1 +; CHECK-NEXT: [[TMP47:%.*]] = load i8, ptr [[TMP31]], align 1 +; CHECK-NEXT: [[TMP48:%.*]] = insertelement <16 x i8> poison, i8 [[TMP32]], i32 0 +; CHECK-NEXT: [[TMP49:%.*]] = insertelement <16 x i8> [[TMP48]], i8 [[TMP33]], i32 1 +; CHECK-NEXT: [[TMP50:%.*]] = insertelement <16 x i8> [[TMP49]], i8 [[TMP34]], i32 2 +; CHECK-NEXT: [[TMP51:%.*]] = insertelement <16 x i8> [[TMP50]], i8 [[TMP35]], i32 3 +; CHECK-NEXT: [[TMP52:%.*]] = insertelement <16 x i8> [[TMP51]], i8 [[TMP36]], i32 4 +; CHECK-NEXT: [[TMP53:%.*]] = insertelement <16 x i8> [[TMP52]], i8 [[TMP37]], i32 5 +; CHECK-NEXT: [[TMP54:%.*]] = insertelement <16 x i8> [[TMP53]], i8 [[TMP38]], i32 6 +; CHECK-NEXT: [[TMP55:%.*]] = insertelement <16 x i8> [[TMP54]], i8 [[TMP39]], i32 7 +; CHECK-NEXT: [[TMP56:%.*]] = insertelement <16 x i8> [[TMP55]], i8 [[TMP40]], i32 8 +; CHECK-NEXT: [[TMP57:%.*]] = insertelement <16 x i8> [[TMP56]], i8 [[TMP41]], i32 9 +; CHECK-NEXT: [[TMP58:%.*]] = insertelement <16 x i8> [[TMP57]], i8 [[TMP42]], i32 10 +; CHECK-NEXT: [[TMP59:%.*]] = insertelement <16 x i8> [[TMP58]], i8 [[TMP43]], i32 11 +; CHECK-NEXT: [[TMP60:%.*]] = insertelement <16 x i8> [[TMP59]], i8 [[TMP44]], i32 12 +; CHECK-NEXT: [[TMP61:%.*]] = insertelement <16 x i8> [[TMP60]], i8 [[TMP45]], i32 13 +; CHECK-NEXT: [[TMP62:%.*]] = insertelement <16 x i8> [[TMP61]], i8 [[TMP46]], i32 14 +; CHECK-NEXT: [[TMP63:%.*]] = insertelement <16 x i8> [[TMP62]], i8 [[TMP47]], i32 15 +; CHECK-NEXT: [[TMP64:%.*]] = sext <16 x i8> [[TMP63]] to <16 x i32> +; CHECK-NEXT: [[TMP65:%.*]] = mul <16 x i32> [[BROADCAST_SPLAT]], [[TMP64]] +; CHECK-NEXT: [[TMP66]] = or <16 x i32> [[TMP65]], [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; CHECK-NEXT: [[TMP67:%.*]] = icmp eq i64 [[INDEX_NEXT]], 48 +; CHECK-NEXT: br i1 [[TMP67]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[TMP68:%.*]] = call i32 @llvm.vector.reduce.or.v16i32(<16 x i32> [[TMP66]]) +; CHECK-NEXT: br label %[[SCALAR_PH:.*]] +; CHECK: [[SCALAR_PH]]: +; +entry: + br label %loop + +loop: + %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ] + %reduction = phi i32 [ %reduction.next, %loop ], [ 0, %entry ] + %gep = getelementptr [32 x i8], ptr %src, i64 %iv + %load = load i8, ptr %gep, align 1 + %sext = sext i8 %load to i32 + %mul = mul i32 %scale, %sext + %reduction.next = or i32 %mul, %reduction + %iv.next = add i64 %iv, 2 + %cmp = icmp eq i64 %iv.next, 100 + br i1 %cmp, label %exit, label %loop + +exit: + ret i32 %reduction.next +} + attributes #0 = { "target-cpu"="neoverse-512tvb" } !0 = !{!1, !2, i64 0} -- cgit v1.2.3 From d9e5e725ed33e462477d5559ffece0d08e9c8dad Mon Sep 17 00:00:00 2001 From: Zhaoxin Yang Date: Fri, 31 Oct 2025 10:25:51 +0800 Subject: [LoongArch] Lowering flog2 to flogb (#162978) According to LoongArch ISA Volume 1 V1.11, FLOGB.S/D is unsupported in LA32. --- .../Target/LoongArch/LoongArchFloat32InstrInfo.td | 1 + .../Target/LoongArch/LoongArchFloat64InstrInfo.td | 1 + .../lib/Target/LoongArch/LoongArchISelLowering.cpp | 10 +- .../lib/Target/LoongArch/LoongArchLASXInstrInfo.td | 3 + llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td | 3 + .../test/CodeGen/LoongArch/ir-instruction/flog2.ll | 8 +- .../CodeGen/LoongArch/lasx/ir-instruction/flog2.ll | 258 ++------------------- .../CodeGen/LoongArch/lsx/ir-instruction/flog2.ll | 156 ++----------- 8 files changed, 48 insertions(+), 392 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td index 690dd73014e5..e86b21cf849c 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat32InstrInfo.td @@ -365,6 +365,7 @@ def : Pat<(f32 (uint_to_fp (i64 (sexti32 (i64 GPR:$src))))), // FP Rounding let Predicates = [HasBasicF, IsLA64] in { def : PatFpr; +def : PatFpr; } // Predicates = [HasBasicF, IsLA64] let Predicates = [HasBasicF, IsLA32] in { diff --git a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td index daefbaa52d42..2e88254aab4d 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchFloat64InstrInfo.td @@ -348,6 +348,7 @@ def : Pat<(bitconvert FPR64:$src), (MOVFR2GR_D FPR64:$src)>; // FP Rounding let Predicates = [HasBasicD, IsLA64] in { def : PatFpr; +def : PatFpr; } // Predicates = [HasBasicD, IsLA64] /// Pseudo-instructions needed for the soft-float ABI with LA32D diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp index 80c96c6dc8eb..a6de839de7c2 100644 --- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp @@ -244,8 +244,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_TO_BF16, MVT::f32, Subtarget.isSoftFPABI() ? LibCall : Custom); - if (Subtarget.is64Bit()) + if (Subtarget.is64Bit()) { setOperationAction(ISD::FRINT, MVT::f32, Legal); + setOperationAction(ISD::FLOG2, MVT::f32, Legal); + } if (!Subtarget.hasBasicD()) { setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); @@ -291,8 +293,10 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FP_TO_BF16, MVT::f64, Subtarget.isSoftFPABI() ? LibCall : Custom); - if (Subtarget.is64Bit()) + if (Subtarget.is64Bit()) { setOperationAction(ISD::FRINT, MVT::f64, Legal); + setOperationAction(ISD::FLOG2, MVT::f64, Legal); + } } // Set operations for 'LSX' feature. @@ -362,6 +366,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMA, VT, Legal); setOperationAction(ISD::FSQRT, VT, Legal); setOperationAction(ISD::FNEG, VT, Legal); + setOperationAction(ISD::FLOG2, VT, Legal); setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, ISD::SETUGE, ISD::SETUGT}, VT, Expand); @@ -443,6 +448,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM, setOperationAction(ISD::FMA, VT, Legal); setOperationAction(ISD::FSQRT, VT, Legal); setOperationAction(ISD::FNEG, VT, Legal); + setOperationAction(ISD::FLOG2, VT, Legal); setCondCodeAction({ISD::SETGE, ISD::SETGT, ISD::SETOGE, ISD::SETOGT, ISD::SETUGE, ISD::SETUGT}, VT, Expand); diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index 613dea6093f5..ddf91ca54e1e 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -1593,6 +1593,9 @@ def : Pat<(fma_nsz (fneg v4f64:$xj), v4f64:$xk, v4f64:$xa), // XVFSQRT_{S/D} defm : PatXrF; +// XVFLOGB_{S/D} +defm : PatXrF; + // XVRECIP_{S/D} def : Pat<(fdiv vsplatf32_fpimm_eq_1, v8f32:$xj), (XVFRECIP_S v8f32:$xj)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index 4619c6bd248a..ba1204d62057 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -1783,6 +1783,9 @@ def : Pat<(fma_nsz (fneg v2f64:$vj), v2f64:$vk, v2f64:$va), // VFSQRT_{S/D} defm : PatVrF; +// VFLOGB_{S/D} +defm : PatVrF; + // VFRECIP_{S/D} def : Pat<(fdiv vsplatf32_fpimm_eq_1, v4f32:$vj), (VFRECIP_S v4f32:$vj)>; diff --git a/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll index 93fcd421e4bd..e02a2e7cce9b 100644 --- a/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll +++ b/llvm/test/CodeGen/LoongArch/ir-instruction/flog2.ll @@ -12,8 +12,8 @@ define float @flog2_s(float %x) nounwind { ; ; LA64-LABEL: flog2_s: ; LA64: # %bb.0: -; LA64-NEXT: pcaddu18i $t8, %call36(log2f) -; LA64-NEXT: jr $t8 +; LA64-NEXT: flogb.s $fa0, $fa0 +; LA64-NEXT: ret %y = call float @llvm.log2.f32(float %x) ret float %y } @@ -25,8 +25,8 @@ define double @flog2_d(double %x) nounwind { ; ; LA64-LABEL: flog2_d: ; LA64: # %bb.0: -; LA64-NEXT: pcaddu18i $t8, %call36(log2) -; LA64-NEXT: jr $t8 +; LA64-NEXT: flogb.d $fa0, $fa0 +; LA64-NEXT: ret %y = call double @llvm.log2.f64(double %x) ret double %y } diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll index 68f2e3ab488e..6b5f5751e570 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/flog2.ll @@ -1,166 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefix=LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s declare <8 x float> @llvm.log2.v8f32(<8 x float>) declare <4 x double> @llvm.log2.v4f64(<4 x double>) define void @flog2_v8f32(ptr %res, ptr %a) nounwind { -; LA32-LABEL: flog2_v8f32: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -128 -; LA32-NEXT: st.w $ra, $sp, 124 # 4-byte Folded Spill -; LA32-NEXT: st.w $fp, $sp, 120 # 4-byte Folded Spill -; LA32-NEXT: xvld $xr0, $a1, 0 -; LA32-NEXT: xvst $xr0, $sp, 80 # 32-byte Folded Spill -; LA32-NEXT: move $fp, $a0 -; LA32-NEXT: xvpickve.w $xr0, $xr0, 5 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: vst $vr0, $sp, 48 # 16-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.w $xr0, $xr0, 4 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0 -; LA32-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload -; LA32-NEXT: vextrins.w $vr0, $vr1, 16 -; LA32-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.w $xr0, $xr0, 6 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload -; LA32-NEXT: vextrins.w $vr1, $vr0, 32 -; LA32-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.w $xr0, $xr0, 7 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload -; LA32-NEXT: vextrins.w $vr1, $vr0, 48 -; LA32-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.w $xr0, $xr0, 1 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.w $xr0, $xr0, 0 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $xr0 -; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA32-NEXT: vextrins.w $vr0, $vr1, 16 -; LA32-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.w $xr0, $xr0, 2 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload -; LA32-NEXT: vextrins.w $vr1, $vr0, 32 -; LA32-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.w $xr0, $xr0, 3 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload -; LA32-NEXT: vextrins.w $vr1, $vr0, 48 -; LA32-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload -; LA32-NEXT: xvpermi.q $xr1, $xr0, 2 -; LA32-NEXT: xvst $xr1, $fp, 0 -; LA32-NEXT: ld.w $fp, $sp, 120 # 4-byte Folded Reload -; LA32-NEXT: ld.w $ra, $sp, 124 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 128 -; LA32-NEXT: ret -; -; LA64-LABEL: flog2_v8f32: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -128 -; LA64-NEXT: st.d $ra, $sp, 120 # 8-byte Folded Spill -; LA64-NEXT: st.d $fp, $sp, 112 # 8-byte Folded Spill -; LA64-NEXT: xvld $xr0, $a1, 0 -; LA64-NEXT: xvst $xr0, $sp, 80 # 32-byte Folded Spill -; LA64-NEXT: move $fp, $a0 -; LA64-NEXT: xvpickve.w $xr0, $xr0, 5 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: vst $vr0, $sp, 48 # 16-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.w $xr0, $xr0, 4 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0 -; LA64-NEXT: vld $vr1, $sp, 48 # 16-byte Folded Reload -; LA64-NEXT: vextrins.w $vr0, $vr1, 16 -; LA64-NEXT: xvst $xr0, $sp, 48 # 32-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.w $xr0, $xr0, 6 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload -; LA64-NEXT: vextrins.w $vr1, $vr0, 32 -; LA64-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.w $xr0, $xr0, 7 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: xvld $xr1, $sp, 48 # 32-byte Folded Reload -; LA64-NEXT: vextrins.w $vr1, $vr0, 48 -; LA64-NEXT: xvst $xr1, $sp, 48 # 32-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.w $xr0, $xr0, 1 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.w $xr0, $xr0, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $xr0 -; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT: vextrins.w $vr0, $vr1, 16 -; LA64-NEXT: xvst $xr0, $sp, 16 # 32-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.w $xr0, $xr0, 2 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload -; LA64-NEXT: vextrins.w $vr1, $vr0, 32 -; LA64-NEXT: xvst $xr1, $sp, 16 # 32-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 80 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.w $xr0, $xr0, 3 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: xvld $xr1, $sp, 16 # 32-byte Folded Reload -; LA64-NEXT: vextrins.w $vr1, $vr0, 48 -; LA64-NEXT: xvld $xr0, $sp, 48 # 32-byte Folded Reload -; LA64-NEXT: xvpermi.q $xr1, $xr0, 2 -; LA64-NEXT: xvst $xr1, $fp, 0 -; LA64-NEXT: ld.d $fp, $sp, 112 # 8-byte Folded Reload -; LA64-NEXT: ld.d $ra, $sp, 120 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 128 -; LA64-NEXT: ret +; CHECK-LABEL: flog2_v8f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvflogb.s $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret entry: %v = load <8 x float>, ptr %a %r = call <8 x float> @llvm.log2.v8f32(<8 x float> %v) @@ -169,93 +20,12 @@ entry: } define void @flog2_v4f64(ptr %res, ptr %a) nounwind { -; LA32-LABEL: flog2_v4f64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -112 -; LA32-NEXT: st.w $ra, $sp, 108 # 4-byte Folded Spill -; LA32-NEXT: st.w $fp, $sp, 104 # 4-byte Folded Spill -; LA32-NEXT: xvld $xr0, $a1, 0 -; LA32-NEXT: xvst $xr0, $sp, 64 # 32-byte Folded Spill -; LA32-NEXT: move $fp, $a0 -; LA32-NEXT: xvpickve.d $xr0, $xr0, 3 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA32-NEXT: bl log2 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA32-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.d $xr0, $xr0, 2 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA32-NEXT: bl log2 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 -; LA32-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload -; LA32-NEXT: vextrins.d $vr0, $vr1, 16 -; LA32-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.d $xr0, $xr0, 1 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA32-NEXT: bl log2 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA32-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA32-NEXT: xvpickve.d $xr0, $xr0, 0 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA32-NEXT: bl log2 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 -; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA32-NEXT: vextrins.d $vr0, $vr1, 16 -; LA32-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload -; LA32-NEXT: xvpermi.q $xr0, $xr1, 2 -; LA32-NEXT: xvst $xr0, $fp, 0 -; LA32-NEXT: ld.w $fp, $sp, 104 # 4-byte Folded Reload -; LA32-NEXT: ld.w $ra, $sp, 108 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 112 -; LA32-NEXT: ret -; -; LA64-LABEL: flog2_v4f64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -112 -; LA64-NEXT: st.d $ra, $sp, 104 # 8-byte Folded Spill -; LA64-NEXT: st.d $fp, $sp, 96 # 8-byte Folded Spill -; LA64-NEXT: xvld $xr0, $a1, 0 -; LA64-NEXT: xvst $xr0, $sp, 64 # 32-byte Folded Spill -; LA64-NEXT: move $fp, $a0 -; LA64-NEXT: xvpickve.d $xr0, $xr0, 3 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA64-NEXT: vst $vr0, $sp, 32 # 16-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.d $xr0, $xr0, 2 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 -; LA64-NEXT: vld $vr1, $sp, 32 # 16-byte Folded Reload -; LA64-NEXT: vextrins.d $vr0, $vr1, 16 -; LA64-NEXT: xvst $xr0, $sp, 32 # 32-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.d $xr0, $xr0, 1 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA64-NEXT: xvld $xr0, $sp, 64 # 32-byte Folded Reload -; LA64-NEXT: xvpickve.d $xr0, $xr0, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $xr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0 -; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT: vextrins.d $vr0, $vr1, 16 -; LA64-NEXT: xvld $xr1, $sp, 32 # 32-byte Folded Reload -; LA64-NEXT: xvpermi.q $xr0, $xr1, 2 -; LA64-NEXT: xvst $xr0, $fp, 0 -; LA64-NEXT: ld.d $fp, $sp, 96 # 8-byte Folded Reload -; LA64-NEXT: ld.d $ra, $sp, 104 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 112 -; LA64-NEXT: ret +; CHECK-LABEL: flog2_v4f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xvld $xr0, $a1, 0 +; CHECK-NEXT: xvflogb.d $xr0, $xr0 +; CHECK-NEXT: xvst $xr0, $a0, 0 +; CHECK-NEXT: ret entry: %v = load <4 x double>, ptr %a %r = call <4 x double> @llvm.log2.v4f64(<4 x double> %v) diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll index e5e75ec617b5..87cc7c6dbc70 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/flog2.ll @@ -1,98 +1,17 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefix=LA32 -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefix=LA64 +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s declare <4 x float> @llvm.log2.v4f32(<4 x float>) declare <2 x double> @llvm.log2.v2f64(<2 x double>) define void @flog2_v4f32(ptr %res, ptr %a) nounwind { -; LA32-LABEL: flog2_v4f32: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -48 -; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill -; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill -; LA32-NEXT: vld $vr0, $a1, 0 -; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA32-NEXT: move $fp, $a0 -; LA32-NEXT: vreplvei.w $vr0, $vr0, 1 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill -; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload -; LA32-NEXT: vreplvei.w $vr0, $vr0, 0 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload -; LA32-NEXT: vextrins.w $vr0, $vr1, 16 -; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill -; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload -; LA32-NEXT: vreplvei.w $vr0, $vr0, 2 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload -; LA32-NEXT: vextrins.w $vr1, $vr0, 32 -; LA32-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill -; LA32-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload -; LA32-NEXT: vreplvei.w $vr0, $vr0, 3 -; LA32-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; LA32-NEXT: bl log2f -; LA32-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA32-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload -; LA32-NEXT: vextrins.w $vr1, $vr0, 48 -; LA32-NEXT: vst $vr1, $fp, 0 -; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload -; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 48 -; LA32-NEXT: ret -; -; LA64-LABEL: flog2_v4f32: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -48 -; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64-NEXT: vld $vr0, $a1, 0 -; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA64-NEXT: move $fp, $a0 -; LA64-NEXT: vreplvei.w $vr0, $vr0, 1 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill -; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT: vreplvei.w $vr0, $vr0, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload -; LA64-NEXT: vextrins.w $vr0, $vr1, 16 -; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill -; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT: vreplvei.w $vr0, $vr0, 2 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload -; LA64-NEXT: vextrins.w $vr1, $vr0, 32 -; LA64-NEXT: vst $vr1, $sp, 0 # 16-byte Folded Spill -; LA64-NEXT: vld $vr0, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT: vreplvei.w $vr0, $vr0, 3 -; LA64-NEXT: # kill: def $f0 killed $f0 killed $vr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2f) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0 killed $f0 def $vr0 -; LA64-NEXT: vld $vr1, $sp, 0 # 16-byte Folded Reload -; LA64-NEXT: vextrins.w $vr1, $vr0, 48 -; LA64-NEXT: vst $vr1, $fp, 0 -; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 48 -; LA64-NEXT: ret +; CHECK-LABEL: flog2_v4f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vflogb.s $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret entry: %v = load <4 x float>, ptr %a %r = call <4 x float> @llvm.log2.v4f32(<4 x float> %v) @@ -101,59 +20,12 @@ entry: } define void @flog2_v2f64(ptr %res, ptr %a) nounwind { -; LA32-LABEL: flog2_v2f64: -; LA32: # %bb.0: # %entry -; LA32-NEXT: addi.w $sp, $sp, -48 -; LA32-NEXT: st.w $ra, $sp, 44 # 4-byte Folded Spill -; LA32-NEXT: st.w $fp, $sp, 40 # 4-byte Folded Spill -; LA32-NEXT: vld $vr0, $a1, 0 -; LA32-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill -; LA32-NEXT: move $fp, $a0 -; LA32-NEXT: vreplvei.d $vr0, $vr0, 1 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 -; LA32-NEXT: bl log2 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA32-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA32-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload -; LA32-NEXT: vreplvei.d $vr0, $vr0, 0 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 -; LA32-NEXT: bl log2 -; LA32-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA32-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA32-NEXT: vextrins.d $vr0, $vr1, 16 -; LA32-NEXT: vst $vr0, $fp, 0 -; LA32-NEXT: ld.w $fp, $sp, 40 # 4-byte Folded Reload -; LA32-NEXT: ld.w $ra, $sp, 44 # 4-byte Folded Reload -; LA32-NEXT: addi.w $sp, $sp, 48 -; LA32-NEXT: ret -; -; LA64-LABEL: flog2_v2f64: -; LA64: # %bb.0: # %entry -; LA64-NEXT: addi.d $sp, $sp, -48 -; LA64-NEXT: st.d $ra, $sp, 40 # 8-byte Folded Spill -; LA64-NEXT: st.d $fp, $sp, 32 # 8-byte Folded Spill -; LA64-NEXT: vld $vr0, $a1, 0 -; LA64-NEXT: vst $vr0, $sp, 0 # 16-byte Folded Spill -; LA64-NEXT: move $fp, $a0 -; LA64-NEXT: vreplvei.d $vr0, $vr0, 1 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA64-NEXT: vst $vr0, $sp, 16 # 16-byte Folded Spill -; LA64-NEXT: vld $vr0, $sp, 0 # 16-byte Folded Reload -; LA64-NEXT: vreplvei.d $vr0, $vr0, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 killed $vr0 -; LA64-NEXT: pcaddu18i $ra, %call36(log2) -; LA64-NEXT: jirl $ra, $ra, 0 -; LA64-NEXT: # kill: def $f0_64 killed $f0_64 def $vr0 -; LA64-NEXT: vld $vr1, $sp, 16 # 16-byte Folded Reload -; LA64-NEXT: vextrins.d $vr0, $vr1, 16 -; LA64-NEXT: vst $vr0, $fp, 0 -; LA64-NEXT: ld.d $fp, $sp, 32 # 8-byte Folded Reload -; LA64-NEXT: ld.d $ra, $sp, 40 # 8-byte Folded Reload -; LA64-NEXT: addi.d $sp, $sp, 48 -; LA64-NEXT: ret +; CHECK-LABEL: flog2_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vld $vr0, $a1, 0 +; CHECK-NEXT: vflogb.d $vr0, $vr0 +; CHECK-NEXT: vst $vr0, $a0, 0 +; CHECK-NEXT: ret entry: %v = load <2 x double>, ptr %a %r = call <2 x double> @llvm.log2.v2f64(<2 x double> %v) -- cgit v1.2.3 From 27eabd5219b34438096499fe92386e1bcc31c40c Mon Sep 17 00:00:00 2001 From: ZhaoQi Date: Fri, 31 Oct 2025 10:27:01 +0800 Subject: [LoongArch] Add patterns to support vector type average instructions generation (#161079) NOTE: For simplicity and convenience, `v2i64/v4i64` types on LA32 is not optimized. If hoping to implement this in the future, special handling for `bitcast` and `build_vector` is needed. --- .../lib/Target/LoongArch/LoongArchLASXInstrInfo.td | 18 +++ llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td | 30 +++++ .../CodeGen/LoongArch/lasx/ir-instruction/avg.ll | 146 +++++++++++---------- .../CodeGen/LoongArch/lsx/ir-instruction/avg.ll | 146 +++++++++++---------- 4 files changed, 208 insertions(+), 132 deletions(-) diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td index ddf91ca54e1e..ca4ee5f89573 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td @@ -2027,6 +2027,24 @@ def : Pat<(v4i32(fp_to_uint v4f64:$vj)), (XVFTINTRZ_LU_D v4f64:$vj)), sub_128)>; +// XVAVG_{B/H/W/D/BU/HU/WU/DU}, XVAVGR_{B/H/W/D/BU/HU/WU/DU} +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrPat; + // abs def : Pat<(abs v32i8:$xj), (XVSIGNCOV_B v32i8:$xj, v32i8:$xj)>; def : Pat<(abs v16i16:$xj), (XVSIGNCOV_H v16i16:$xj, v16i16:$xj)>; diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td index ba1204d62057..92402baa0fa0 100644 --- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td @@ -1518,6 +1518,18 @@ multiclass InsertExtractPatV2 { } } +multiclass VAvgPat { + def : Pat<(OpNode (vt (add vt:$vj, vt:$vk)), (vt (vsplat_imm_eq_1))), + (!cast(Inst) vt:$vj, vt:$vk)>; +} + +multiclass VAvgrPat { + def : Pat<(OpNode (vt (add (vt (add vt:$vj, vt:$vk)), + (vt (vsplat_imm_eq_1)))), + (vt (vsplat_imm_eq_1))), + (!cast(Inst) vt:$vj, vt:$vk)>; +} + let Predicates = [HasExtLSX] in { // VADD_{B/H/W/D} @@ -2157,6 +2169,24 @@ def : Pat<(f32 f32imm_vldi:$in), def : Pat<(f64 f64imm_vldi:$in), (f64 (EXTRACT_SUBREG (VLDI (to_f64imm_vldi f64imm_vldi:$in)), sub_64))>; +// VAVG_{B/H/W/D/BU/HU/WU/DU}, VAVGR_{B/H/W/D/BU/HU/WU/DU} +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrPat; +defm : VAvgrPat; + // abs def : Pat<(abs v16i8:$vj), (VSIGNCOV_B v16i8:$vj, v16i8:$vj)>; def : Pat<(abs v8i16:$vj), (VSIGNCOV_H v8i16:$vj, v8i16:$vj)>; diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll index 2a5a8fa05d64..5c5c19935080 100644 --- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll +++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/avg.ll @@ -1,14 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s -; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64 define void @xvavg_b(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK-LABEL: xvavg_b: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1 +; CHECK-NEXT: xvavg.b $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -25,8 +24,7 @@ define void @xvavg_h(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 -; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1 +; CHECK-NEXT: xvavg.h $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -43,8 +41,7 @@ define void @xvavg_w(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 -; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1 +; CHECK-NEXT: xvavg.w $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -57,14 +54,22 @@ entry: } define void @xvavg_d(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: xvavg_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 -; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: xvavg_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: xvld $xr1, $a2, 0 +; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1 +; LA32-NEXT: xvsrai.d $xr0, $xr0, 1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: xvavg_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvld $xr1, $a2, 0 +; LA64-NEXT: xvavg.d $xr0, $xr0, $xr1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: %va = load <4 x i64>, ptr %a %vb = load <4 x i64>, ptr %b @@ -79,8 +84,7 @@ define void @xvavg_bu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1 +; CHECK-NEXT: xvavg.bu $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -97,8 +101,7 @@ define void @xvavg_hu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 -; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1 +; CHECK-NEXT: xvavg.hu $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -115,8 +118,7 @@ define void @xvavg_wu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 -; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1 +; CHECK-NEXT: xvavg.wu $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -129,14 +131,22 @@ entry: } define void @xvavg_du(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: xvavg_du: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 -; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: xvavg_du: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: xvld $xr1, $a2, 0 +; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1 +; LA32-NEXT: xvsrli.d $xr0, $xr0, 1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: xvavg_du: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvld $xr1, $a2, 0 +; LA64-NEXT: xvavg.du $xr0, $xr0, $xr1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: %va = load <4 x i64>, ptr %a %vb = load <4 x i64>, ptr %b @@ -151,9 +161,7 @@ define void @xvavgr_b(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 1 -; CHECK-NEXT: xvsrai.b $xr0, $xr0, 1 +; CHECK-NEXT: xvavgr.b $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -171,9 +179,7 @@ define void @xvavgr_h(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 -; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 1 -; CHECK-NEXT: xvsrai.h $xr0, $xr0, 1 +; CHECK-NEXT: xvavgr.h $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -191,9 +197,7 @@ define void @xvavgr_w(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 -; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 1 -; CHECK-NEXT: xvsrai.w $xr0, $xr0, 1 +; CHECK-NEXT: xvavgr.w $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -207,15 +211,23 @@ entry: } define void @xvavgr_d(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: xvavgr_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 -; CHECK-NEXT: xvaddi.du $xr0, $xr0, 1 -; CHECK-NEXT: xvsrai.d $xr0, $xr0, 1 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: xvavgr_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: xvld $xr1, $a2, 0 +; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1 +; LA32-NEXT: xvaddi.du $xr0, $xr0, 1 +; LA32-NEXT: xvsrai.d $xr0, $xr0, 1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: xvavgr_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvld $xr1, $a2, 0 +; LA64-NEXT: xvavgr.d $xr0, $xr0, $xr1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: %va = load <4 x i64>, ptr %a %vb = load <4 x i64>, ptr %b @@ -231,9 +243,7 @@ define void @xvavgr_bu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.b $xr0, $xr0, $xr1 -; CHECK-NEXT: xvaddi.bu $xr0, $xr0, 1 -; CHECK-NEXT: xvsrli.b $xr0, $xr0, 1 +; CHECK-NEXT: xvavgr.bu $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -251,9 +261,7 @@ define void @xvavgr_hu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.h $xr0, $xr0, $xr1 -; CHECK-NEXT: xvaddi.hu $xr0, $xr0, 1 -; CHECK-NEXT: xvsrli.h $xr0, $xr0, 1 +; CHECK-NEXT: xvavgr.hu $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -271,9 +279,7 @@ define void @xvavgr_wu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: xvld $xr0, $a1, 0 ; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.w $xr0, $xr0, $xr1 -; CHECK-NEXT: xvaddi.wu $xr0, $xr0, 1 -; CHECK-NEXT: xvsrli.w $xr0, $xr0, 1 +; CHECK-NEXT: xvavgr.wu $xr0, $xr0, $xr1 ; CHECK-NEXT: xvst $xr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -287,15 +293,23 @@ entry: } define void @xvavgr_du(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: xvavgr_du: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: xvld $xr0, $a1, 0 -; CHECK-NEXT: xvld $xr1, $a2, 0 -; CHECK-NEXT: xvadd.d $xr0, $xr0, $xr1 -; CHECK-NEXT: xvaddi.du $xr0, $xr0, 1 -; CHECK-NEXT: xvsrli.d $xr0, $xr0, 1 -; CHECK-NEXT: xvst $xr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: xvavgr_du: +; LA32: # %bb.0: # %entry +; LA32-NEXT: xvld $xr0, $a1, 0 +; LA32-NEXT: xvld $xr1, $a2, 0 +; LA32-NEXT: xvadd.d $xr0, $xr0, $xr1 +; LA32-NEXT: xvaddi.du $xr0, $xr0, 1 +; LA32-NEXT: xvsrli.d $xr0, $xr0, 1 +; LA32-NEXT: xvst $xr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: xvavgr_du: +; LA64: # %bb.0: # %entry +; LA64-NEXT: xvld $xr0, $a1, 0 +; LA64-NEXT: xvld $xr1, $a2, 0 +; LA64-NEXT: xvavgr.du $xr0, $xr0, $xr1 +; LA64-NEXT: xvst $xr0, $a0, 0 +; LA64-NEXT: ret entry: %va = load <4 x i64>, ptr %a %vb = load <4 x i64>, ptr %b diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll index 20b8898436cc..334af22edee5 100644 --- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll +++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/avg.ll @@ -1,14 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 -; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s -; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s +; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA32 +; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s --check-prefixes=CHECK,LA64 define void @vavg_b(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK-LABEL: vavg_b: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrai.b $vr0, $vr0, 1 +; CHECK-NEXT: vavg.b $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -25,8 +24,7 @@ define void @vavg_h(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrai.h $vr0, $vr0, 1 +; CHECK-NEXT: vavg.h $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -43,8 +41,7 @@ define void @vavg_w(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrai.w $vr0, $vr0, 1 +; CHECK-NEXT: vavg.w $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -57,14 +54,22 @@ entry: } define void @vavg_d(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: vavg_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 1 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: vavg_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: vld $vr1, $a2, 0 +; LA32-NEXT: vadd.d $vr0, $vr0, $vr1 +; LA32-NEXT: vsrai.d $vr0, $vr0, 1 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: vavg_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vld $vr1, $a2, 0 +; LA64-NEXT: vavg.d $vr0, $vr0, $vr1 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret entry: %va = load <2 x i64>, ptr %a %vb = load <2 x i64>, ptr %b @@ -79,8 +84,7 @@ define void @vavg_bu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrli.b $vr0, $vr0, 1 +; CHECK-NEXT: vavg.bu $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -97,8 +101,7 @@ define void @vavg_hu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrli.h $vr0, $vr0, 1 +; CHECK-NEXT: vavg.hu $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -115,8 +118,7 @@ define void @vavg_wu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrli.w $vr0, $vr0, 1 +; CHECK-NEXT: vavg.wu $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -129,14 +131,22 @@ entry: } define void @vavg_du(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: vavg_du: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 -; CHECK-NEXT: vsrli.d $vr0, $vr0, 1 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: vavg_du: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: vld $vr1, $a2, 0 +; LA32-NEXT: vadd.d $vr0, $vr0, $vr1 +; LA32-NEXT: vsrli.d $vr0, $vr0, 1 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: vavg_du: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vld $vr1, $a2, 0 +; LA64-NEXT: vavg.du $vr0, $vr0, $vr1 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret entry: %va = load <2 x i64>, ptr %a %vb = load <2 x i64>, ptr %b @@ -151,9 +161,7 @@ define void @vavgr_b(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vaddi.bu $vr0, $vr0, 1 -; CHECK-NEXT: vsrai.b $vr0, $vr0, 1 +; CHECK-NEXT: vavgr.b $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -171,9 +179,7 @@ define void @vavgr_h(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 -; CHECK-NEXT: vaddi.hu $vr0, $vr0, 1 -; CHECK-NEXT: vsrai.h $vr0, $vr0, 1 +; CHECK-NEXT: vavgr.h $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -191,9 +197,7 @@ define void @vavgr_w(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 -; CHECK-NEXT: vaddi.wu $vr0, $vr0, 1 -; CHECK-NEXT: vsrai.w $vr0, $vr0, 1 +; CHECK-NEXT: vavgr.w $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -207,15 +211,23 @@ entry: } define void @vavgr_d(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: vavgr_d: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 -; CHECK-NEXT: vaddi.du $vr0, $vr0, 1 -; CHECK-NEXT: vsrai.d $vr0, $vr0, 1 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: vavgr_d: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: vld $vr1, $a2, 0 +; LA32-NEXT: vadd.d $vr0, $vr0, $vr1 +; LA32-NEXT: vaddi.du $vr0, $vr0, 1 +; LA32-NEXT: vsrai.d $vr0, $vr0, 1 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: vavgr_d: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vld $vr1, $a2, 0 +; LA64-NEXT: vavgr.d $vr0, $vr0, $vr1 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret entry: %va = load <2 x i64>, ptr %a %vb = load <2 x i64>, ptr %b @@ -231,9 +243,7 @@ define void @vavgr_bu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.b $vr0, $vr0, $vr1 -; CHECK-NEXT: vaddi.bu $vr0, $vr0, 1 -; CHECK-NEXT: vsrli.b $vr0, $vr0, 1 +; CHECK-NEXT: vavgr.bu $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -251,9 +261,7 @@ define void @vavgr_hu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.h $vr0, $vr0, $vr1 -; CHECK-NEXT: vaddi.hu $vr0, $vr0, 1 -; CHECK-NEXT: vsrli.h $vr0, $vr0, 1 +; CHECK-NEXT: vavgr.hu $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -271,9 +279,7 @@ define void @vavgr_wu(ptr %res, ptr %a, ptr %b) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vld $vr0, $a1, 0 ; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.w $vr0, $vr0, $vr1 -; CHECK-NEXT: vaddi.wu $vr0, $vr0, 1 -; CHECK-NEXT: vsrli.w $vr0, $vr0, 1 +; CHECK-NEXT: vavgr.wu $vr0, $vr0, $vr1 ; CHECK-NEXT: vst $vr0, $a0, 0 ; CHECK-NEXT: ret entry: @@ -287,15 +293,23 @@ entry: } define void @vavgr_du(ptr %res, ptr %a, ptr %b) nounwind { -; CHECK-LABEL: vavgr_du: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vld $vr0, $a1, 0 -; CHECK-NEXT: vld $vr1, $a2, 0 -; CHECK-NEXT: vadd.d $vr0, $vr0, $vr1 -; CHECK-NEXT: vaddi.du $vr0, $vr0, 1 -; CHECK-NEXT: vsrli.d $vr0, $vr0, 1 -; CHECK-NEXT: vst $vr0, $a0, 0 -; CHECK-NEXT: ret +; LA32-LABEL: vavgr_du: +; LA32: # %bb.0: # %entry +; LA32-NEXT: vld $vr0, $a1, 0 +; LA32-NEXT: vld $vr1, $a2, 0 +; LA32-NEXT: vadd.d $vr0, $vr0, $vr1 +; LA32-NEXT: vaddi.du $vr0, $vr0, 1 +; LA32-NEXT: vsrli.d $vr0, $vr0, 1 +; LA32-NEXT: vst $vr0, $a0, 0 +; LA32-NEXT: ret +; +; LA64-LABEL: vavgr_du: +; LA64: # %bb.0: # %entry +; LA64-NEXT: vld $vr0, $a1, 0 +; LA64-NEXT: vld $vr1, $a2, 0 +; LA64-NEXT: vavgr.du $vr0, $vr0, $vr1 +; LA64-NEXT: vst $vr0, $a0, 0 +; LA64-NEXT: ret entry: %va = load <2 x i64>, ptr %a %vb = load <2 x i64>, ptr %b -- cgit v1.2.3 From 6ba2127a5cbe87a97a01ee73b2ef2c4681203b8c Mon Sep 17 00:00:00 2001 From: Jinjie Huang Date: Fri, 31 Oct 2025 10:29:00 +0800 Subject: [BOLT] Add constant island check in scanExternalRefs() (#165577) The [previous patch](https://github.com/llvm/llvm-project/pull/163418) has added a check to prevent adding an entry point into a constant island, but only for successfully disassembled functions. Because scanExternalRefs() is also called when a function fails to be disassembled or is skipped, it can still attempt to add an entry point at constant islands. The same issue may occur if without a check for it So, this patch complements the 'constant island' check in scanExternalRefs(). --- bolt/lib/Core/BinaryFunction.cpp | 16 +++++++++++++--- bolt/test/AArch64/constant-island-entry.s | 9 +++++++-- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/bolt/lib/Core/BinaryFunction.cpp b/bolt/lib/Core/BinaryFunction.cpp index 84023efe1084..fbe186454351 100644 --- a/bolt/lib/Core/BinaryFunction.cpp +++ b/bolt/lib/Core/BinaryFunction.cpp @@ -1699,9 +1699,19 @@ bool BinaryFunction::scanExternalRefs() { const uint64_t FunctionOffset = TargetAddress - TargetFunction->getAddress(); - BranchTargetSymbol = - FunctionOffset ? TargetFunction->addEntryPointAtOffset(FunctionOffset) - : TargetFunction->getSymbol(); + if (!TargetFunction->isInConstantIsland(TargetAddress)) { + BranchTargetSymbol = + FunctionOffset + ? TargetFunction->addEntryPointAtOffset(FunctionOffset) + : TargetFunction->getSymbol(); + } else { + TargetFunction->setIgnored(); + BC.outs() << "BOLT-WARNING: Ignoring entry point at address 0x" + << Twine::utohexstr(Address) + << " in constant island of function " << *TargetFunction + << '\n'; + continue; + } } // Can't find more references. Not creating relocations since we are not diff --git a/bolt/test/AArch64/constant-island-entry.s b/bolt/test/AArch64/constant-island-entry.s index 6567114eb980..7f8449deea13 100644 --- a/bolt/test/AArch64/constant-island-entry.s +++ b/bolt/test/AArch64/constant-island-entry.s @@ -1,10 +1,15 @@ -// This test checks that we ignore functions which add an entry point that -// is in a constant island. +## This test checks that we ignore functions which add an entry point that +## is in a constant island. # RUN: llvm-mc -filetype=obj -triple aarch64-unknown-unknown %s -o %t.o # RUN: %clang %cflags %t.o -pie -Wl,-q -o %t.exe + +## Check when the caller is successfully disassembled. # RUN: llvm-bolt %t.exe -o %t.bolt 2>&1 | FileCheck %s +## Skip caller to check the identical warning is triggered from ScanExternalRefs(). +# RUN: llvm-bolt %t.exe -o %t.bolt -skip-funcs=caller 2>&1 | FileCheck %s + # CHECK: BOLT-WARNING: Ignoring entry point at address 0x{{[0-9a-f]+}} in constant island of function func .globl func -- cgit v1.2.3 From 56777e7da2cb30f72a3ddc9861a2fbe3b9adbc6b Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Fri, 31 Oct 2025 10:30:29 +0800 Subject: [SimplifyCFG] Avoid use-after-free when removing incoming values from PHI nodes (#165744) `PHINode::removeIncomingValue` removes itself when there are no incoming edges. Then we cannot use it to retrieve the next instruction. Closes https://github.com/llvm/llvm-project/issues/165301. --- llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 8 ++++---- llvm/test/Transforms/SimplifyCFG/pr165301.ll | 26 ++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Transforms/SimplifyCFG/pr165301.ll diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp index b03fb6213d61..7f6d779687e9 100644 --- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp @@ -5977,14 +5977,14 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI, } // Prune obsolete incoming values off the successors' PHI nodes. - for (auto BBI = Dest->begin(); isa(BBI); ++BBI) { + for (auto &PHI : make_early_inc_range(Dest->phis())) { unsigned PreviousEdges = Cases->size(); if (Dest == SI->getDefaultDest()) ++PreviousEdges; for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I) - cast(BBI)->removeIncomingValue(SI->getParent()); + PHI.removeIncomingValue(SI->getParent()); } - for (auto BBI = OtherDest->begin(); isa(BBI); ++BBI) { + for (auto &PHI : make_early_inc_range(OtherDest->phis())) { unsigned PreviousEdges = OtherCases->size(); if (OtherDest == SI->getDefaultDest()) ++PreviousEdges; @@ -5993,7 +5993,7 @@ bool SimplifyCFGOpt::turnSwitchRangeIntoICmp(SwitchInst *SI, if (NewBI->isUnconditional()) ++E; for (unsigned I = 0; I != E; ++I) - cast(BBI)->removeIncomingValue(SI->getParent()); + PHI.removeIncomingValue(SI->getParent()); } // Clean up the default block - it may have phis or other instructions before diff --git a/llvm/test/Transforms/SimplifyCFG/pr165301.ll b/llvm/test/Transforms/SimplifyCFG/pr165301.ll new file mode 100644 index 000000000000..4a539d77af3c --- /dev/null +++ b/llvm/test/Transforms/SimplifyCFG/pr165301.ll @@ -0,0 +1,26 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6 +; RUN: opt -S -passes="simplifycfg" < %s | FileCheck %s + +; Make sure there's no use after free when removing incoming values from PHI nodes + +define i32 @pr165301(i1 %cond) { +; CHECK-LABEL: define i32 @pr165301( +; CHECK-SAME: i1 [[COND:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br label %[[SWITCHBB:.*]] +; CHECK: [[SWITCHBB]]: +; CHECK-NEXT: br label %[[SWITCHBB]] +; +entry: + br label %switchbb + +switchbb: + switch i1 %cond, label %default [ + i1 false, label %switchbb + i1 true, label %switchbb + ] + +default: + %phi.lcssa = phi i32 [ 0, %switchbb ] + ret i32 %phi.lcssa +} -- cgit v1.2.3 From bf99f6693e6163256ba4a80ea571ff9e210ee4c6 Mon Sep 17 00:00:00 2001 From: quic-likaid Date: Fri, 31 Oct 2025 10:43:07 +0800 Subject: lsan: fix allocator on arm64 Android (#165656) The default config is too large for arm64 Android devices, which are typically configured with 39-bit address space. This change brings it inline with sanitizer_allocator_test.cpp. --- compiler-rt/lib/lsan/lsan_allocator.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/compiler-rt/lib/lsan/lsan_allocator.h b/compiler-rt/lib/lsan/lsan_allocator.h index 556b9f56a4a4..2d0ea0b46fe0 100644 --- a/compiler-rt/lib/lsan/lsan_allocator.h +++ b/compiler-rt/lib/lsan/lsan_allocator.h @@ -93,6 +93,10 @@ using LSanSizeClassMap = DefaultSizeClassMap; const uptr kAllocatorSpace = 0x600000000000ULL; const uptr kAllocatorSize = 0x40000000000ULL; // 4T. using LSanSizeClassMap = DefaultSizeClassMap; +# elif SANITIZER_ANDROID && defined(__aarch64__) +const uptr kAllocatorSpace = 0x3000000000ULL; +const uptr kAllocatorSize = 0x2000000000ULL; +using LSanSizeClassMap = VeryCompactSizeClassMap; # else const uptr kAllocatorSpace = 0x500000000000ULL; const uptr kAllocatorSize = 0x40000000000ULL; // 4T. -- cgit v1.2.3 From e65d52ab5ab9e58a3b6d3bce470c04f2db2cd078 Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Fri, 31 Oct 2025 10:51:58 +0800 Subject: [clang-tidy][readability-redundant-parentheses] add option to prevent widely used work around (#164827) Part of #164125 Add a new option to ignore some decls. --------- Co-authored-by: EugeneZelenko --- .../readability/RedundantParenthesesCheck.cpp | 25 ++++++++++++++++++---- .../readability/RedundantParenthesesCheck.h | 7 ++++-- .../checks/readability/redundant-parentheses.rst | 13 +++++++++++ .../checkers/readability/redundant-parentheses.cpp | 9 ++++++++ 4 files changed, 48 insertions(+), 6 deletions(-) diff --git a/clang-tools-extra/clang-tidy/readability/RedundantParenthesesCheck.cpp b/clang-tools-extra/clang-tidy/readability/RedundantParenthesesCheck.cpp index 0ab59fff39d8..874b9618bd88 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantParenthesesCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/RedundantParenthesesCheck.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "RedundantParenthesesCheck.h" +#include "../utils/Matchers.h" +#include "../utils/OptionsUtils.h" #include "clang/AST/Expr.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" @@ -32,15 +34,30 @@ AST_MATCHER(ParenExpr, isInMacro) { } // namespace +RedundantParenthesesCheck::RedundantParenthesesCheck(StringRef Name, + ClangTidyContext *Context) + : ClangTidyCheck(Name, Context), + AllowedDecls(utils::options::parseStringList( + Options.get("AllowedDecls", "std::max;std::min"))) {} + +void RedundantParenthesesCheck::storeOptions( + ClangTidyOptions::OptionMap &Opts) { + Options.store(Opts, "AllowedDecls", + utils::options::serializeStringList(AllowedDecls)); +} + void RedundantParenthesesCheck::registerMatchers(MatchFinder *Finder) { const auto ConstantExpr = expr(anyOf(integerLiteral(), floatLiteral(), characterLiteral(), cxxBoolLiteral(), stringLiteral(), cxxNullPtrLiteralExpr())); Finder->addMatcher( - parenExpr(subExpr(anyOf(parenExpr(), ConstantExpr, declRefExpr())), - unless(anyOf(isInMacro(), - // sizeof(...) is common used. - hasParent(unaryExprOrTypeTraitExpr())))) + parenExpr( + subExpr(anyOf(parenExpr(), ConstantExpr, + declRefExpr(to(namedDecl(unless( + matchers::matchesAnyListedName(AllowedDecls))))))), + unless(anyOf(isInMacro(), + // sizeof(...) is common used. + hasParent(unaryExprOrTypeTraitExpr())))) .bind("dup"), this); } diff --git a/clang-tools-extra/clang-tidy/readability/RedundantParenthesesCheck.h b/clang-tools-extra/clang-tidy/readability/RedundantParenthesesCheck.h index 9a0409b83fff..2638a09730f7 100644 --- a/clang-tools-extra/clang-tidy/readability/RedundantParenthesesCheck.h +++ b/clang-tools-extra/clang-tidy/readability/RedundantParenthesesCheck.h @@ -20,13 +20,16 @@ namespace clang::tidy::readability { /// https://clang.llvm.org/extra/clang-tidy/checks/readability/redundant-parentheses.html class RedundantParenthesesCheck : public ClangTidyCheck { public: - RedundantParenthesesCheck(StringRef Name, ClangTidyContext *Context) - : ClangTidyCheck(Name, Context) {} + RedundantParenthesesCheck(StringRef Name, ClangTidyContext *Context); + void storeOptions(ClangTidyOptions::OptionMap &Opts) override; void registerMatchers(ast_matchers::MatchFinder *Finder) override; void check(const ast_matchers::MatchFinder::MatchResult &Result) override; bool isLanguageVersionSupported(const LangOptions &LangOpts) const override { return LangOpts.CPlusPlus | LangOpts.C99; } + +private: + const std::vector AllowedDecls; }; } // namespace clang::tidy::readability diff --git a/clang-tools-extra/docs/clang-tidy/checks/readability/redundant-parentheses.rst b/clang-tools-extra/docs/clang-tidy/checks/readability/redundant-parentheses.rst index 23d975e64649..20e3891c72d7 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/readability/redundant-parentheses.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/readability/redundant-parentheses.rst @@ -27,3 +27,16 @@ affect the semantics. .. code-block:: c++ int a = (1 * 2) + 3; // no warning + +Options +------- + +.. option:: AllowedDecls + + Semicolon-separated list of regular expressions matching names of declarations + to ignore when the parentheses are around. Declarations can include variables + or functions. The default is an `std::max;std::min`. + + Some STL library functions may have the same name as widely used function-like + macro. For example, ``std::max`` and ``max`` macro. A workaround to distinguish + them is adding parentheses around functions to prevent function-like macro. diff --git a/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-parentheses.cpp b/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-parentheses.cpp index 926cb118c77c..c77608c66469 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-parentheses.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/readability/redundant-parentheses.cpp @@ -62,3 +62,12 @@ void exceptions() { // CHECK-MESSAGES: :[[@LINE-1]]:11: warning: redundant parentheses around expression [readability-redundant-parentheses] // CHECK-FIXES: alignof(3); } + +namespace std { + template T max(T, T); + template T min(T, T); +} // namespace std +void ignoreStdMaxMin() { + (std::max)(1,2); + (std::min)(1,2); +} -- cgit v1.2.3 From 067e20506901d10166235b20f70344310bf716e8 Mon Sep 17 00:00:00 2001 From: Yu Hao Date: Thu, 30 Oct 2025 20:18:08 -0700 Subject: [clang][transformer] Change `name` range-selector to return `Error` instead of an invalid range. (#164715) Previously, when the text in selected range was different from the decl's name, `name` returned an invalid range, which could cause crashes if `name` was nested in other range selectors that assumed always valid ranges. With this change, `name` returns an `Error` if it can't get the range. --- clang/lib/Tooling/Transformer/RangeSelector.cpp | 8 ++++++-- clang/unittests/Tooling/RangeSelectorTest.cpp | 25 +++++++++++++++++++++++++ 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/clang/lib/Tooling/Transformer/RangeSelector.cpp b/clang/lib/Tooling/Transformer/RangeSelector.cpp index 171c786bc366..b4bdec1fcdd6 100644 --- a/clang/lib/Tooling/Transformer/RangeSelector.cpp +++ b/clang/lib/Tooling/Transformer/RangeSelector.cpp @@ -205,8 +205,12 @@ RangeSelector transformer::name(std::string ID) { // `foo` for which this range will be too short. Doing so will // require subcasing `NamedDecl`, because it doesn't provide virtual // access to the \c DeclarationNameInfo. - if (tooling::getText(R, *Result.Context) != D->getName()) - return CharSourceRange(); + StringRef Text = tooling::getText(R, *Result.Context); + if (Text != D->getName()) + return llvm::make_error( + llvm::errc::not_supported, + "range selected by name(node id=" + ID + "): '" + Text + + "' is different from decl name '" + D->getName() + "'"); return R; } if (const auto *E = Node.get()) { diff --git a/clang/unittests/Tooling/RangeSelectorTest.cpp b/clang/unittests/Tooling/RangeSelectorTest.cpp index adf5e74ea319..a1fcbb023832 100644 --- a/clang/unittests/Tooling/RangeSelectorTest.cpp +++ b/clang/unittests/Tooling/RangeSelectorTest.cpp @@ -527,6 +527,31 @@ TEST(RangeSelectorTest, NameOpDeclRefError) { AllOf(HasSubstr(Ref), HasSubstr("requires property 'identifier'"))))); } +TEST(RangeSelectorTest, NameOpDeclInMacroArg) { + StringRef Code = R"cc( + #define MACRO(name) int name; + MACRO(x) + )cc"; + const char *ID = "id"; + TestMatch Match = matchCode(Code, varDecl().bind(ID)); + EXPECT_THAT_EXPECTED(select(name(ID), Match), HasValue("x")); +} + +TEST(RangeSelectorTest, NameOpDeclInMacroBodyError) { + StringRef Code = R"cc( + #define MACRO int x; + MACRO + )cc"; + const char *ID = "id"; + TestMatch Match = matchCode(Code, varDecl().bind(ID)); + EXPECT_THAT_EXPECTED( + name(ID)(Match.Result), + Failed(testing::Property( + &StringError::getMessage, + AllOf(HasSubstr("range selected by name(node id="), + HasSubstr("' is different from decl name 'x'"))))); +} + TEST(RangeSelectorTest, CallArgsOp) { const StringRef Code = R"cc( struct C { -- cgit v1.2.3 From 42a8ff877d47131ecb1280a1cc7e5e3c3bca6952 Mon Sep 17 00:00:00 2001 From: Alexey Samsonov Date: Thu, 30 Oct 2025 20:30:49 -0700 Subject: [libc] Add "struct tm" declaration to (#165795) `` should at least include the forward declaration of `struct tm`, since it's needed for the `wcsftime` declaration (also, see https://man7.org/linux/man-pages/man0/wchar.h.0p.html). Even though we don't yet have `wcsftime`, some downstream users (notably - libcxx) expects to see `struct tm` declaration there, to re-declare it under `std` namespace: https://github.com/llvm/llvm-project/blob/c46bfed1a484d30cd251a9a225649d74e3bf0af5/libcxx/include/cwchar#L135 So, add this type declaration to llvm-libc version of `wchar.h` now. --- libc/include/wchar.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml index c8b9e21b56b2..fb5b19b523b3 100644 --- a/libc/include/wchar.yaml +++ b/libc/include/wchar.yaml @@ -6,6 +6,10 @@ macros: types: - type_name: FILE - type_name: size_t + # TODO: Remove this once we have a function declaration using "struct tm" + # (wcsftime). We're declaring it here now, since libc++ expects + # forward-declaration of "struct tm" in the header. + - type_name: struct_tm - type_name: wint_t - type_name: wchar_t - type_name: mbstate_t -- cgit v1.2.3 From 5ba0b91a6236342ab136e302b07597de82dce133 Mon Sep 17 00:00:00 2001 From: Matt Arsenault Date: Thu, 23 Oct 2025 15:00:19 +0900 Subject: RuntimeLibcalls: Whitespace fix --- llvm/include/llvm/IR/RuntimeLibcalls.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/IR/RuntimeLibcalls.td b/llvm/include/llvm/IR/RuntimeLibcalls.td index 7be1b654ca72..24c1b035d0dd 100644 --- a/llvm/include/llvm/IR/RuntimeLibcalls.td +++ b/llvm/include/llvm/IR/RuntimeLibcalls.td @@ -1585,7 +1585,7 @@ def __aeabi_f2ulz : RuntimeLibcallImpl; // CallingConv::ARM_AA // RTABI chapter 4.1.2, Table 7 def __aeabi_d2f : RuntimeLibcallImpl; // CallingConv::ARM_AAPCS def __aeabi_d2h : RuntimeLibcallImpl; // CallingConv::ARM_AAPCS -def __aeabi_f2d : RuntimeLibcallImpl; // CallingConv::ARM_AAPCS +def __aeabi_f2d : RuntimeLibcallImpl; // CallingConv::ARM_AAPCS // Integer to floating-point conversions. // RTABI chapter 4.1.2, Table 8 -- cgit v1.2.3