diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/scale-offset-smem.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/scale-offset-smem.ll | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/scale-offset-smem.ll b/llvm/test/CodeGen/AMDGPU/scale-offset-smem.ll index e0ea08d27697..7b9faf47793c 100644 --- a/llvm/test/CodeGen/AMDGPU/scale-offset-smem.ll +++ b/llvm/test/CodeGen/AMDGPU/scale-offset-smem.ll @@ -5,6 +5,7 @@ define amdgpu_ps float @s_load_b32_idxprom(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) { ; GCN-LABEL: s_load_b32_idxprom: ; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GCN-NEXT: s_load_b32 s0, s[0:1], s2 offset:0x0 scale_offset ; GCN-NEXT: s_wait_kmcnt 0x0 ; GCN-NEXT: v_mov_b32_e32 v0, s0 @@ -21,6 +22,7 @@ entry: define amdgpu_ps float @s_load_b32_idx32(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) { ; SDAG-LABEL: s_load_b32_idx32: ; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; SDAG-NEXT: s_ashr_i32 s3, s2, 31 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 2 @@ -32,6 +34,7 @@ define amdgpu_ps float @s_load_b32_idx32(ptr addrspace(4) align 4 inreg %p, i32 ; ; GISEL-LABEL: s_load_b32_idx32: ; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GISEL-NEXT: s_ashr_i32 s3, s2, 31 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GISEL-NEXT: s_lshl_b64 s[2:3], s[2:3], 2 @@ -50,6 +53,7 @@ entry: define amdgpu_ps float @s_load_b32_idxprom_wrong_stride(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) { ; SDAG-LABEL: s_load_b32_idxprom_wrong_stride: ; SDAG: ; %bb.0: ; %entry +; SDAG-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; SDAG-NEXT: s_mov_b32 s3, 0 ; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; SDAG-NEXT: s_lshl_b64 s[2:3], s[2:3], 3 @@ -61,6 +65,7 @@ define amdgpu_ps float @s_load_b32_idxprom_wrong_stride(ptr addrspace(4) align 4 ; ; GISEL-LABEL: s_load_b32_idxprom_wrong_stride: ; GISEL: ; %bb.0: ; %entry +; GISEL-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GISEL-NEXT: s_mov_b32 s3, 0 ; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) ; GISEL-NEXT: s_lshl_b64 s[2:3], s[2:3], 3 @@ -80,6 +85,7 @@ entry: define amdgpu_ps float @s_load_b16_idxprom_ioffset(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) { ; GCN-LABEL: s_load_b16_idxprom_ioffset: ; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GCN-NEXT: s_load_u16 s0, s[0:1], s2 offset:0x20 scale_offset ; GCN-NEXT: s_wait_kmcnt 0x0 ; GCN-NEXT: v_mov_b32_e32 v0, s0 @@ -97,6 +103,7 @@ entry: define amdgpu_ps <2 x float> @s_load_b64_idxprom(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) { ; GCN-LABEL: s_load_b64_idxprom: ; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GCN-NEXT: s_load_b64 s[4:5], s[0:1], s2 offset:0x0 scale_offset ; GCN-NEXT: s_wait_kmcnt 0x0 ; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 @@ -111,6 +118,7 @@ entry: define amdgpu_ps <3 x float> @s_load_b96_idxprom(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) { ; GCN-LABEL: s_load_b96_idxprom: ; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GCN-NEXT: s_load_b96 s[4:6], s[0:1], s2 offset:0x0 scale_offset ; GCN-NEXT: s_wait_kmcnt 0x0 ; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 @@ -126,6 +134,7 @@ entry: define amdgpu_ps <4 x float> @s_load_b128_idxprom(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) { ; GCN-LABEL: s_load_b128_idxprom: ; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GCN-NEXT: s_load_b128 s[4:7], s[0:1], s2 offset:0x0 scale_offset ; GCN-NEXT: s_wait_kmcnt 0x0 ; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 @@ -141,6 +150,7 @@ entry: define amdgpu_ps <8 x float> @s_load_b256_idxprom(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) { ; GCN-LABEL: s_load_b256_idxprom: ; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GCN-NEXT: s_load_b256 s[4:11], s[0:1], s2 offset:0x0 scale_offset ; GCN-NEXT: s_wait_kmcnt 0x0 ; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 @@ -158,6 +168,7 @@ entry: define amdgpu_ps <16 x float> @s_load_b512_idxprom(ptr addrspace(4) align 4 inreg %p, i32 inreg %idx) { ; GCN-LABEL: s_load_b512_idxprom: ; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GCN-NEXT: s_load_b512 s[4:19], s[0:1], s2 offset:0x0 scale_offset ; GCN-NEXT: s_wait_kmcnt 0x0 ; GCN-NEXT: v_dual_mov_b32 v0, s4 :: v_dual_mov_b32 v1, s5 @@ -179,6 +190,7 @@ entry: define amdgpu_ps float @s_load_b32_idxprom_range(ptr addrspace(4) align 4 inreg %p) { ; GCN-LABEL: s_load_b32_idxprom_range: ; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0 ; GCN-NEXT: s_wait_kmcnt 0x0 ; GCN-NEXT: s_load_b32 s0, s[0:1], s2 offset:0x0 scale_offset @@ -196,6 +208,7 @@ entry: define amdgpu_ps float @s_load_b32_idxprom_range_ioffset(ptr addrspace(4) align 4 inreg %p) { ; GCN-LABEL: s_load_b32_idxprom_range_ioffset: ; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0 ; GCN-NEXT: s_wait_kmcnt 0x0 ; GCN-NEXT: s_load_b32 s0, s[0:1], s2 offset:0x40 scale_offset @@ -216,6 +229,7 @@ entry: define amdgpu_ps float @s_load_b8_idxprom_range_ioffset(ptr addrspace(4) align 4 inreg %p) { ; GCN-LABEL: s_load_b8_idxprom_range_ioffset: ; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0 ; GCN-NEXT: s_wait_kmcnt 0x0 ; GCN-NEXT: s_load_u8 s0, s[0:1], s2 offset:0x10 @@ -236,6 +250,7 @@ entry: define amdgpu_ps float @s_load_b16_idxprom_range(ptr addrspace(4) align 4 inreg %p) { ; GCN-LABEL: s_load_b16_idxprom_range: ; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0 ; GCN-NEXT: s_wait_kmcnt 0x0 ; GCN-NEXT: s_load_u16 s0, s[0:1], s2 offset:0x0 scale_offset @@ -255,6 +270,7 @@ entry: define amdgpu_ps float @s_load_b16_idxprom_range_ioffset(ptr addrspace(4) align 4 inreg %p) { ; GCN-LABEL: s_load_b16_idxprom_range_ioffset: ; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0 ; GCN-NEXT: s_wait_kmcnt 0x0 ; GCN-NEXT: s_load_u16 s0, s[0:1], s2 offset:0x20 scale_offset @@ -275,6 +291,7 @@ entry: define amdgpu_ps <2 x float> @s_load_b64_idxprom_range(ptr addrspace(4) align 4 inreg %p) { ; GCN-LABEL: s_load_b64_idxprom_range: ; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GCN-NEXT: s_load_b32 s4, s[0:1], 0x0 ; GCN-NEXT: s_wait_kmcnt 0x0 ; GCN-NEXT: s_load_b64 s[2:3], s[0:1], s4 offset:0x0 scale_offset @@ -292,6 +309,7 @@ entry: define amdgpu_ps <3 x float> @s_load_b96_idxprom_range(ptr addrspace(4) align 4 inreg %p) { ; GCN-LABEL: s_load_b96_idxprom_range: ; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0 ; GCN-NEXT: s_wait_kmcnt 0x0 ; GCN-NEXT: s_load_b96 s[4:6], s[0:1], s2 offset:0x0 scale_offset @@ -310,6 +328,7 @@ entry: define amdgpu_ps <4 x float> @s_load_b128_idxprom_range(ptr addrspace(4) align 4 inreg %p) { ; GCN-LABEL: s_load_b128_idxprom_range: ; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0 ; GCN-NEXT: s_wait_kmcnt 0x0 ; GCN-NEXT: s_load_b128 s[4:7], s[0:1], s2 offset:0x0 scale_offset @@ -328,6 +347,7 @@ entry: define amdgpu_ps <8 x float> @s_load_b256_idxprom_range(ptr addrspace(4) align 4 inreg %p) { ; GCN-LABEL: s_load_b256_idxprom_range: ; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0 ; GCN-NEXT: s_wait_kmcnt 0x0 ; GCN-NEXT: s_load_b256 s[4:11], s[0:1], s2 offset:0x0 scale_offset @@ -348,6 +368,7 @@ entry: define amdgpu_ps <16 x float> @s_load_b512_idxprom_range(ptr addrspace(4) align 4 inreg %p) { ; GCN-LABEL: s_load_b512_idxprom_range: ; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE, 25, 1), 1 ; GCN-NEXT: s_load_b32 s2, s[0:1], 0x0 ; GCN-NEXT: s_wait_kmcnt 0x0 ; GCN-NEXT: s_load_b512 s[4:19], s[0:1], s2 offset:0x0 scale_offset |
