summaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen/AMDGPU/mul_int24.ll
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/mul_int24.ll')
-rw-r--r--llvm/test/CodeGen/AMDGPU/mul_int24.ll49
1 files changed, 25 insertions, 24 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/mul_int24.ll b/llvm/test/CodeGen/AMDGPU/mul_int24.ll
index 10d4eb029ee3..36dabd858c70 100644
--- a/llvm/test/CodeGen/AMDGPU/mul_int24.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul_int24.ll
@@ -459,18 +459,18 @@ define amdgpu_kernel void @test_smul24_i64_square(ptr addrspace(1) %out, i32 %a,
define amdgpu_kernel void @test_smul24_i33(ptr addrspace(1) %out, i33 %a, i33 %b) #0 {
; SI-LABEL: test_smul24_i33:
; SI: ; %bb.0: ; %entry
-; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
-; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: s_mov_b64 s[6:7], s[2:3]
+; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NEXT: s_load_dword s6, s[4:5], 0xd
+; SI-NEXT: s_load_dword s4, s[4:5], 0xb
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
-; SI-NEXT: s_bfe_i32 s4, s4, 0x180000
+; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_bfe_i32 s5, s6, 0x180000
-; SI-NEXT: v_mov_b32_e32 v0, s4
-; SI-NEXT: s_mul_i32 s4, s5, s4
-; SI-NEXT: v_mul_hi_i32_i24_e32 v1, s5, v0
-; SI-NEXT: v_mov_b32_e32 v0, s4
+; SI-NEXT: s_bfe_i32 s4, s4, 0x180000
+; SI-NEXT: v_mov_b32_e32 v0, s5
+; SI-NEXT: s_mul_i32 s5, s4, s5
+; SI-NEXT: v_mul_hi_i32_i24_e32 v1, s4, v0
+; SI-NEXT: v_mov_b32_e32 v0, s5
; SI-NEXT: v_lshl_b64 v[0:1], v[0:1], 31
; SI-NEXT: v_ashr_i64 v[0:1], v[0:1], 31
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
@@ -478,11 +478,12 @@ define amdgpu_kernel void @test_smul24_i33(ptr addrspace(1) %out, i33 %a, i33 %b
;
; VI-LABEL: test_smul24_i33:
; VI: ; %bb.0: ; %entry
-; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
+; VI-NEXT: s_load_dword s2, s[4:5], 0x2c
+; VI-NEXT: s_load_dword s3, s[4:5], 0x34
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; VI-NEXT: s_waitcnt lgkmcnt(0)
; VI-NEXT: s_bfe_i32 s2, s2, 0x180000
-; VI-NEXT: s_bfe_i32 s3, s4, 0x180000
+; VI-NEXT: s_bfe_i32 s3, s3, 0x180000
; VI-NEXT: v_mov_b32_e32 v0, s3
; VI-NEXT: v_mul_hi_i32_i24_e32 v1, s2, v0
; VI-NEXT: v_mul_i32_i24_e32 v0, s2, v0
@@ -569,28 +570,28 @@ entry:
define amdgpu_kernel void @test_smulhi24_i33(ptr addrspace(1) %out, i33 %a, i33 %b) {
; SI-LABEL: test_smulhi24_i33:
; SI: ; %bb.0: ; %entry
-; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd
-; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: s_mov_b64 s[6:7], s[2:3]
+; SI-NEXT: s_load_dword s6, s[4:5], 0xd
+; SI-NEXT: s_load_dword s7, s[4:5], 0xb
+; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
-; SI-NEXT: v_mov_b32_e32 v0, s4
-; SI-NEXT: v_mul_hi_i32_i24_e32 v0, s6, v0
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: v_mov_b32_e32 v0, s6
+; SI-NEXT: v_mul_hi_i32_i24_e32 v0, s7, v0
; SI-NEXT: v_and_b32_e32 v0, 1, v0
; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: test_smulhi24_i33:
; VI: ; %bb.0: ; %entry
-; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
-; VI-NEXT: s_waitcnt lgkmcnt(0)
-; VI-NEXT: s_mov_b64 s[6:7], s[2:3]
-; VI-NEXT: v_mov_b32_e32 v0, s4
-; VI-NEXT: v_mul_hi_i32_i24_e32 v0, s6, v0
+; VI-NEXT: s_load_dword s6, s[4:5], 0x34
+; VI-NEXT: s_load_dword s7, s[4:5], 0x2c
+; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
; VI-NEXT: s_mov_b32 s3, 0xf000
; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v0, s6
+; VI-NEXT: v_mul_hi_i32_i24_e32 v0, s7, v0
; VI-NEXT: v_and_b32_e32 v0, 1, v0
; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-NEXT: s_endpgm