summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.atomic.buffer.load.ll
diff options
context:
space:
mode:
authorChristudasan Devadasan <christudasan.devadasan@amd.com>2025-11-21 19:42:17 +0530
committerGitHub <noreply@github.com>2025-11-21 19:42:17 +0530
commita2dc4e02e7ba77ddcb0afca0304535d8f142c98b (patch)
tree4604ba453f6b4eb4252afcb2dc5602143e01a4f4 /llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.atomic.buffer.load.ll
parent49995b2af0abbec1095031dfe9eb049945b6d4f7 (diff)
[AMDGPU] Enable multi-group xnack replay in hardware (GFX1250) (#169016)
This patch enables the multi-group xnack replay mode by configuring the hardware MODE register at kernel entry. This aligns the hardware behavior with the compiler's existing multi-group s_wait_xcnt insertion logic.
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.atomic.buffer.load.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.atomic.buffer.load.ll14
1 files changed, 14 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.atomic.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.atomic.buffer.load.ll
index 1e4b43d1f4fc..c2c8580de937 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.atomic.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.ptr.atomic.buffer.load.ll
@@ -34,6 +34,7 @@ define amdgpu_kernel void @struct_ptr_atomic_buffer_load_i32(ptr addrspace(8) %p
;
; GFX12-LABEL: struct_ptr_atomic_buffer_load_i32:
; GFX12: ; %bb.0: ; %bb
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX12-NEXT: s_clause 0x1
; GFX12-NEXT: s_load_b32 s6, s[4:5], 0x34
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -85,6 +86,7 @@ define amdgpu_kernel void @struct_ptr_atomic_buffer_load_i32_const_idx(ptr addrs
;
; GFX12-LABEL: struct_ptr_atomic_buffer_load_i32_const_idx:
; GFX12: ; %bb.0: ; %bb
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX12-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GFX12-NEXT: v_mov_b32_e32 v1, 15
@@ -137,6 +139,7 @@ define amdgpu_kernel void @struct_ptr_atomic_buffer_load_i32_off(ptr addrspace(8
;
; GFX12-LABEL: struct_ptr_atomic_buffer_load_i32_off:
; GFX12: ; %bb.0: ; %bb
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX12-NEXT: s_clause 0x1
; GFX12-NEXT: s_load_b32 s6, s[4:5], 0x34
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -191,6 +194,7 @@ define amdgpu_kernel void @struct_ptr_atomic_buffer_load_i32_soff(ptr addrspace(
;
; GFX12-LABEL: struct_ptr_atomic_buffer_load_i32_soff:
; GFX12: ; %bb.0: ; %bb
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX12-NEXT: s_clause 0x1
; GFX12-NEXT: s_load_b32 s6, s[4:5], 0x34
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -245,6 +249,7 @@ define amdgpu_kernel void @struct_ptr_atomic_buffer_load_i32_dlc(ptr addrspace(8
;
; GFX12-LABEL: struct_ptr_atomic_buffer_load_i32_dlc:
; GFX12: ; %bb.0: ; %bb
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX12-NEXT: s_clause 0x1
; GFX12-NEXT: s_load_b32 s6, s[4:5], 0x34
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -299,6 +304,7 @@ define amdgpu_kernel void @struct_ptr_nonatomic_buffer_load_i32(ptr addrspace(8)
;
; GFX12-LABEL: struct_ptr_nonatomic_buffer_load_i32:
; GFX12: ; %bb.0: ; %bb
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX12-NEXT: s_clause 0x1
; GFX12-NEXT: s_load_b32 s6, s[4:5], 0x34
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -354,6 +360,7 @@ define amdgpu_kernel void @struct_ptr_atomic_buffer_load_i64(ptr addrspace(8) %p
;
; GFX12-SDAG-TRUE16-LABEL: struct_ptr_atomic_buffer_load_i64:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %bb
+; GFX12-SDAG-TRUE16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX12-SDAG-TRUE16-NEXT: s_clause 0x1
; GFX12-SDAG-TRUE16-NEXT: s_load_b32 s6, s[4:5], 0x34
; GFX12-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -377,6 +384,7 @@ define amdgpu_kernel void @struct_ptr_atomic_buffer_load_i64(ptr addrspace(8) %p
;
; GFX12-GISEL-TRUE16-LABEL: struct_ptr_atomic_buffer_load_i64:
; GFX12-GISEL-TRUE16: ; %bb.0: ; %bb
+; GFX12-GISEL-TRUE16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX12-GISEL-TRUE16-NEXT: s_clause 0x1
; GFX12-GISEL-TRUE16-NEXT: s_load_b32 s6, s[4:5], 0x34
; GFX12-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -433,6 +441,7 @@ define amdgpu_kernel void @struct_ptr_atomic_buffer_load_v2i16(ptr addrspace(8)
;
; GFX12-LABEL: struct_ptr_atomic_buffer_load_v2i16:
; GFX12: ; %bb.0: ; %bb
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX12-NEXT: s_clause 0x1
; GFX12-NEXT: s_load_b32 s6, s[4:5], 0x34
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -559,6 +568,7 @@ define amdgpu_kernel void @struct_ptr_atomic_buffer_load_v4i16(ptr addrspace(8)
;
; GFX12-SDAG-TRUE16-LABEL: struct_ptr_atomic_buffer_load_v4i16:
; GFX12-SDAG-TRUE16: ; %bb.0: ; %bb
+; GFX12-SDAG-TRUE16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX12-SDAG-TRUE16-NEXT: s_clause 0x1
; GFX12-SDAG-TRUE16-NEXT: s_load_b32 s6, s[4:5], 0x34
; GFX12-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -582,6 +592,7 @@ define amdgpu_kernel void @struct_ptr_atomic_buffer_load_v4i16(ptr addrspace(8)
;
; GFX12-FAKE16-LABEL: struct_ptr_atomic_buffer_load_v4i16:
; GFX12-FAKE16: ; %bb.0: ; %bb
+; GFX12-FAKE16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX12-FAKE16-NEXT: s_clause 0x1
; GFX12-FAKE16-NEXT: s_load_b32 s6, s[4:5], 0x34
; GFX12-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -607,6 +618,7 @@ define amdgpu_kernel void @struct_ptr_atomic_buffer_load_v4i16(ptr addrspace(8)
;
; GFX12-GISEL-TRUE16-LABEL: struct_ptr_atomic_buffer_load_v4i16:
; GFX12-GISEL-TRUE16: ; %bb.0: ; %bb
+; GFX12-GISEL-TRUE16-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX12-GISEL-TRUE16-NEXT: s_clause 0x1
; GFX12-GISEL-TRUE16-NEXT: s_load_b32 s6, s[4:5], 0x34
; GFX12-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -664,6 +676,7 @@ define amdgpu_kernel void @struct_ptr_atomic_buffer_load_v4i32(ptr addrspace(8)
;
; GFX12-LABEL: struct_ptr_atomic_buffer_load_v4i32:
; GFX12: ; %bb.0: ; %bb
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX12-NEXT: s_clause 0x1
; GFX12-NEXT: s_load_b32 s6, s[4:5], 0x34
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -721,6 +734,7 @@ define amdgpu_kernel void @struct_ptr_atomic_buffer_load_ptr(ptr addrspace(8) %p
;
; GFX12-LABEL: struct_ptr_atomic_buffer_load_ptr:
; GFX12: ; %bb.0: ; %bb
+; GFX12-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1
; GFX12-NEXT: s_clause 0x1
; GFX12-NEXT: s_load_b32 s6, s[4:5], 0x34
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x24