diff options
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll | 282 |
1 files changed, 117 insertions, 165 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll index cae833b0d64e..0e1bbbd1ea92 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/insertelement.i16.ll @@ -123,9 +123,8 @@ define amdgpu_ps void @insertelement_v_v2i16_s_s(ptr addrspace(1) %ptr, i16 inre ; GFX8-NEXT: s_lshl_b32 s0, s0, 4 ; GFX8-NEXT: s_lshl_b32 s1, s1, s0 ; GFX8-NEXT: s_lshl_b32 s0, 0xffff, s0 -; GFX8-NEXT: s_not_b32 s0, s0 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_and_b32_e32 v2, s0, v0 +; GFX8-NEXT: v_bfi_b32 v2, s0, 0, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 ; GFX8-NEXT: v_or_b32_e32 v2, s1, v2 @@ -143,11 +142,10 @@ define amdgpu_ps void @insertelement_v_v2i16_s_s(ptr addrspace(1) %ptr, i16 inre ; GFX7-NEXT: s_lshl_b32 s0, s0, 4 ; GFX7-NEXT: s_lshl_b32 s1, s1, s0 ; GFX7-NEXT: s_lshl_b32 s0, 0xffff, s0 -; GFX7-NEXT: s_not_b32 s0, s0 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 +; GFX7-NEXT: v_bfi_b32 v0, s0, 0, v0 ; GFX7-NEXT: v_or_b32_e32 v0, s1, v0 ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX7-NEXT: s_endpgm @@ -302,9 +300,8 @@ define amdgpu_ps void @insertelement_s_v2i16_s_v(ptr addrspace(4) inreg %ptr, i1 ; GFX8-NEXT: v_mov_b32_e32 v1, 0xffff ; GFX8-NEXT: v_lshlrev_b32_e64 v2, v0, s1 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, v0, v1 -; GFX8-NEXT: v_not_b32_e32 v0, v0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: v_and_b32_e32 v3, s0, v0 +; GFX8-NEXT: v_bfi_b32 v3, v0, 0, s0 ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 ; GFX8-NEXT: v_or_b32_e32 v2, v3, v2 @@ -319,9 +316,8 @@ define amdgpu_ps void @insertelement_s_v2i16_s_v(ptr addrspace(4) inreg %ptr, i1 ; GFX7-NEXT: s_and_b32 s1, s4, 0xffff ; GFX7-NEXT: v_lshl_b32_e32 v1, s1, v0 ; GFX7-NEXT: v_lshl_b32_e32 v0, 0xffff, v0 -; GFX7-NEXT: v_not_b32_e32 v0, v0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 +; GFX7-NEXT: v_bfi_b32 v0, v0, 0, s0 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: s_mov_b64 s[0:1], 0 ; GFX7-NEXT: s_mov_b32 s2, -1 @@ -393,9 +389,8 @@ define amdgpu_ps void @insertelement_s_v2i16_v_v(ptr addrspace(4) inreg %ptr, i1 ; GFX8-NEXT: v_mov_b32_e32 v2, 0xffff ; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, v1, v2 -; GFX8-NEXT: v_not_b32_e32 v0, v0 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) -; GFX8-NEXT: v_and_b32_e32 v2, s0, v0 +; GFX8-NEXT: v_bfi_b32 v2, v0, 0, s0 ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v3 @@ -410,9 +405,8 @@ define amdgpu_ps void @insertelement_s_v2i16_v_v(ptr addrspace(4) inreg %ptr, i1 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, v1, v0 ; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1 -; GFX7-NEXT: v_not_b32_e32 v1, v1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) -; GFX7-NEXT: v_and_b32_e32 v1, s0, v1 +; GFX7-NEXT: v_bfi_b32 v1, v1, 0, s0 ; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX7-NEXT: s_mov_b64 s[0:1], 0 ; GFX7-NEXT: s_mov_b32 s2, -1 @@ -482,12 +476,11 @@ define amdgpu_ps void @insertelement_v_v2i16_s_v(ptr addrspace(1) %ptr, i16 inre ; GFX8-NEXT: v_and_b32_e32 v2, 1, v2 ; GFX8-NEXT: v_mov_b32_e32 v1, 0xffff ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 4, v2 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, v2, v1 ; GFX8-NEXT: s_and_b32 s0, s2, 0xffff -; GFX8-NEXT: v_not_b32_e32 v1, v1 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, v2, v1 ; GFX8-NEXT: v_lshlrev_b32_e64 v3, v2, s0 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_and_b32_e32 v2, v0, v1 +; GFX8-NEXT: v_bfi_b32 v2, v1, 0, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v3 @@ -505,11 +498,10 @@ define amdgpu_ps void @insertelement_v_v2i16_s_v(ptr addrspace(1) %ptr, i16 inre ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 4, v1 ; GFX7-NEXT: v_lshl_b32_e32 v2, s0, v1 ; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1 -; GFX7-NEXT: v_not_b32_e32 v1, v1 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_and_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_bfi_b32 v0, v1, 0, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX7-NEXT: s_endpgm @@ -576,10 +568,9 @@ define amdgpu_ps void @insertelement_v_v2i16_v_s(ptr addrspace(1) %ptr, i16 %val ; GFX8-NEXT: s_lshl_b32 s0, s0, 4 ; GFX8-NEXT: v_mov_b32_e32 v1, s0 ; GFX8-NEXT: s_lshl_b32 s0, 0xffff, s0 -; GFX8-NEXT: s_not_b32 s0, s0 ; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_and_b32_e32 v3, s0, v0 +; GFX8-NEXT: v_bfi_b32 v3, s0, 0, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 ; GFX8-NEXT: v_or_b32_e32 v2, v3, v2 @@ -597,11 +588,10 @@ define amdgpu_ps void @insertelement_v_v2i16_v_s(ptr addrspace(1) %ptr, i16 %val ; GFX7-NEXT: s_lshl_b32 s0, s0, 4 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, s0, v1 ; GFX7-NEXT: s_lshl_b32 s0, 0xffff, s0 -; GFX7-NEXT: s_not_b32 s0, s0 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 +; GFX7-NEXT: v_bfi_b32 v0, s0, 0, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 ; GFX7-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX7-NEXT: s_endpgm @@ -668,10 +658,9 @@ define amdgpu_ps void @insertelement_v_v2i16_v_v(ptr addrspace(1) %ptr, i16 %val ; GFX8-NEXT: v_mov_b32_e32 v1, 0xffff ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 4, v3 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, v3, v1 -; GFX8-NEXT: v_not_b32_e32 v1, v1 ; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX8-NEXT: s_waitcnt vmcnt(0) -; GFX8-NEXT: v_and_b32_e32 v3, v0, v1 +; GFX8-NEXT: v_bfi_b32 v3, v1, 0, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 ; GFX8-NEXT: v_or_b32_e32 v2, v3, v2 @@ -689,11 +678,10 @@ define amdgpu_ps void @insertelement_v_v2i16_v_v(ptr addrspace(1) %ptr, i16 %val ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 4, v1 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, v1, v2 ; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1 -; GFX7-NEXT: v_not_b32_e32 v1, v1 ; GFX7-NEXT: s_mov_b64 s[0:1], 0 ; GFX7-NEXT: s_mov_b32 s2, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) -; GFX7-NEXT: v_and_b32_e32 v0, v0, v1 +; GFX7-NEXT: v_bfi_b32 v0, v1, 0, v0 ; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 ; GFX7-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7-NEXT: s_endpgm @@ -820,19 +808,18 @@ define amdgpu_ps void @insertelement_v_v4i16_s_s(ptr addrspace(1) %ptr, i16 inre ; GFX8-LABEL: insertelement_v_v4i16_s_s: ; GFX8: ; %bb.0: ; GFX8-NEXT: flat_load_dwordx2 v[0:1], v[0:1] -; GFX8-NEXT: s_and_b32 s1, s3, 1 ; GFX8-NEXT: s_lshr_b32 s0, s3, 1 +; GFX8-NEXT: s_and_b32 s1, s3, 1 ; GFX8-NEXT: s_and_b32 s2, s2, 0xffff ; GFX8-NEXT: s_lshl_b32 s1, s1, 4 +; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 ; GFX8-NEXT: s_lshl_b32 s2, s2, s1 ; GFX8-NEXT: s_lshl_b32 s1, 0xffff, s1 -; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 -; GFX8-NEXT: s_not_b32 s1, s1 ; GFX8-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-NEXT: v_mov_b32_e32 v3, 0 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc -; GFX8-NEXT: v_and_b32_e32 v4, s1, v4 +; GFX8-NEXT: v_bfi_b32 v4, s1, 0, v4 ; GFX8-NEXT: v_or_b32_e32 v4, s2, v4 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s0, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[0:1] @@ -846,19 +833,18 @@ define amdgpu_ps void @insertelement_v_v4i16_s_s(ptr addrspace(1) %ptr, i16 inre ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_and_b32 s1, s3, 1 ; GFX7-NEXT: s_lshr_b32 s0, s3, 1 +; GFX7-NEXT: s_and_b32 s1, s3, 1 ; GFX7-NEXT: s_and_b32 s2, s2, 0xffff ; GFX7-NEXT: s_lshl_b32 s1, s1, 4 +; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 ; GFX7-NEXT: s_lshl_b32 s2, s2, s1 ; GFX7-NEXT: s_lshl_b32 s1, 0xffff, s1 -; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 -; GFX7-NEXT: s_not_b32 s1, s1 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc -; GFX7-NEXT: v_and_b32_e32 v2, s1, v2 +; GFX7-NEXT: v_bfi_b32 v2, s1, 0, v2 ; GFX7-NEXT: v_or_b32_e32 v2, s2, v2 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], s0, 0 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] @@ -1090,8 +1076,7 @@ define amdgpu_ps void @insertelement_s_v4i16_s_v(ptr addrspace(4) inreg %ptr, i1 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX8-NEXT: v_lshlrev_b32_e64 v3, v0, s2 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, v0, v4 -; GFX8-NEXT: v_not_b32_e32 v0, v0 -; GFX8-NEXT: v_and_b32_e32 v0, v1, v0 +; GFX8-NEXT: v_bfi_b32 v0, v0, 0, v1 ; GFX8-NEXT: v_or_b32_e32 v4, v0, v3 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 @@ -1117,8 +1102,7 @@ define amdgpu_ps void @insertelement_s_v4i16_s_v(ptr addrspace(4) inreg %ptr, i1 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc ; GFX7-NEXT: v_lshl_b32_e32 v3, s2, v0 ; GFX7-NEXT: v_lshl_b32_e32 v0, 0xffff, v0 -; GFX7-NEXT: v_not_b32_e32 v0, v0 -; GFX7-NEXT: v_and_b32_e32 v0, v1, v0 +; GFX7-NEXT: v_bfi_b32 v0, v0, 0, v1 ; GFX7-NEXT: v_or_b32_e32 v3, v0, v3 ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -1228,8 +1212,7 @@ define amdgpu_ps void @insertelement_s_v4i16_v_v(ptr addrspace(4) inreg %ptr, i1 ; GFX8-NEXT: v_mov_b32_e32 v4, 0xffff ; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, v1, v4 -; GFX8-NEXT: v_not_b32_e32 v1, v1 -; GFX8-NEXT: v_and_b32_e32 v1, v3, v1 +; GFX8-NEXT: v_bfi_b32 v1, v1, 0, v3 ; GFX8-NEXT: v_or_b32_e32 v4, v1, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, s0 ; GFX8-NEXT: v_mov_b32_e32 v1, s1 @@ -1246,17 +1229,16 @@ define amdgpu_ps void @insertelement_s_v4i16_v_v(ptr addrspace(4) inreg %ptr, i1 ; GFX7-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x0 ; GFX7-NEXT: v_lshrrev_b32_e32 v2, 1, v1 ; GFX7-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 4, v1 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v3, s0 ; GFX7-NEXT: v_mov_b32_e32 v4, s1 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc ; GFX7-NEXT: v_lshlrev_b32_e32 v0, v1, v0 ; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1 -; GFX7-NEXT: v_cndmask_b32_e32 v3, v3, v4, vcc -; GFX7-NEXT: v_not_b32_e32 v1, v1 -; GFX7-NEXT: v_and_b32_e32 v1, v3, v1 +; GFX7-NEXT: v_bfi_b32 v1, v1, 0, v3 ; GFX7-NEXT: v_or_b32_e32 v3, v1, v0 ; GFX7-NEXT: v_mov_b32_e32 v0, s0 ; GFX7-NEXT: v_mov_b32_e32 v1, s1 @@ -1356,16 +1338,15 @@ define amdgpu_ps void @insertelement_v_v4i16_s_v(ptr addrspace(1) %ptr, i16 inre ; GFX8-NEXT: v_mov_b32_e32 v5, 0xffff ; GFX8-NEXT: s_and_b32 s0, s2, 0xffff ; GFX8-NEXT: v_lshlrev_b32_e32 v2, 4, v2 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6 ; GFX8-NEXT: v_lshlrev_b32_e64 v7, v2, s0 ; GFX8-NEXT: v_lshlrev_b32_e32 v2, v2, v5 -; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v6 -; GFX8-NEXT: v_not_b32_e32 v2, v2 ; GFX8-NEXT: v_mov_b32_e32 v3, 0 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v6 ; GFX8-NEXT: v_mov_b32_e32 v4, 0 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_cndmask_b32_e32 v5, v0, v1, vcc -; GFX8-NEXT: v_and_b32_e32 v2, v5, v2 +; GFX8-NEXT: v_bfi_b32 v2, v2, 0, v5 ; GFX8-NEXT: v_or_b32_e32 v2, v2, v7 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc @@ -1382,16 +1363,15 @@ define amdgpu_ps void @insertelement_v_v4i16_s_v(ptr addrspace(1) %ptr, i16 inre ; GFX7-NEXT: v_and_b32_e32 v2, 1, v2 ; GFX7-NEXT: s_and_b32 s0, s2, 0xffff ; GFX7-NEXT: v_lshlrev_b32_e32 v2, 4, v2 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 ; GFX7-NEXT: v_lshl_b32_e32 v4, s0, v2 ; GFX7-NEXT: v_lshl_b32_e32 v2, 0xffff, v2 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 -; GFX7-NEXT: v_not_b32_e32 v2, v2 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v3 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_cndmask_b32_e32 v5, v0, v1, vcc -; GFX7-NEXT: v_and_b32_e32 v2, v5, v2 +; GFX7-NEXT: v_bfi_b32 v2, v2, 0, v5 ; GFX7-NEXT: v_or_b32_e32 v2, v2, v4 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc @@ -1479,15 +1459,14 @@ define amdgpu_ps void @insertelement_v_v4i16_v_s(ptr addrspace(1) %ptr, i16 %val ; GFX8-NEXT: s_lshr_b32 s0, s2, 1 ; GFX8-NEXT: s_lshl_b32 s1, s1, 4 ; GFX8-NEXT: v_mov_b32_e32 v5, s1 -; GFX8-NEXT: s_lshl_b32 s1, 0xffff, s1 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 +; GFX8-NEXT: s_lshl_b32 s1, 0xffff, s1 ; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; GFX8-NEXT: s_not_b32 s1, s1 ; GFX8-NEXT: v_mov_b32_e32 v3, 0 ; GFX8-NEXT: v_mov_b32_e32 v4, 0 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_cndmask_b32_e32 v5, v0, v1, vcc -; GFX8-NEXT: v_and_b32_e32 v5, s1, v5 +; GFX8-NEXT: v_bfi_b32 v5, s1, 0, v5 ; GFX8-NEXT: v_or_b32_e32 v2, v5, v2 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s0, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] @@ -1501,19 +1480,18 @@ define amdgpu_ps void @insertelement_v_v4i16_v_s(ptr addrspace(1) %ptr, i16 %val ; GFX7-NEXT: s_mov_b32 s7, 0xf000 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: buffer_load_dwordx2 v[0:1], v[0:1], s[4:7], 0 addr64 -; GFX7-NEXT: s_and_b32 s1, s2, 1 ; GFX7-NEXT: s_lshr_b32 s0, s2, 1 +; GFX7-NEXT: s_and_b32 s1, s2, 1 ; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; GFX7-NEXT: s_lshl_b32 s1, s1, 4 +; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, s1, v2 ; GFX7-NEXT: s_lshl_b32 s1, 0xffff, s1 -; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s0, 1 -; GFX7-NEXT: s_not_b32 s1, s1 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_cndmask_b32_e32 v3, v0, v1, vcc -; GFX7-NEXT: v_and_b32_e32 v3, s1, v3 +; GFX7-NEXT: v_bfi_b32 v3, s1, 0, v3 ; GFX7-NEXT: v_or_b32_e32 v2, v3, v2 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], s0, 0 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] @@ -1601,16 +1579,15 @@ define amdgpu_ps void @insertelement_v_v4i16_v_v(ptr addrspace(1) %ptr, i16 %val ; GFX8-NEXT: v_and_b32_e32 v3, 1, v3 ; GFX8-NEXT: v_mov_b32_e32 v6, 0xffff ; GFX8-NEXT: v_lshlrev_b32_e32 v3, 4, v3 +; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7 ; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX8-NEXT: v_lshlrev_b32_e32 v3, v3, v6 -; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v7 -; GFX8-NEXT: v_not_b32_e32 v3, v3 ; GFX8-NEXT: v_mov_b32_e32 v4, 0 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v7 ; GFX8-NEXT: v_mov_b32_e32 v5, 0 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_cndmask_b32_e32 v6, v0, v1, vcc -; GFX8-NEXT: v_and_b32_e32 v3, v6, v3 +; GFX8-NEXT: v_bfi_b32 v3, v3, 0, v6 ; GFX8-NEXT: v_or_b32_e32 v2, v3, v2 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc @@ -1627,16 +1604,15 @@ define amdgpu_ps void @insertelement_v_v4i16_v_v(ptr addrspace(1) %ptr, i16 %val ; GFX7-NEXT: v_and_b32_e32 v3, 1, v3 ; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; GFX7-NEXT: v_lshlrev_b32_e32 v3, 4, v3 +; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, v3, v2 ; GFX7-NEXT: v_lshl_b32_e32 v3, 0xffff, v3 -; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 -; GFX7-NEXT: v_not_b32_e32 v3, v3 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v4 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_cndmask_b32_e32 v5, v0, v1, vcc -; GFX7-NEXT: v_and_b32_e32 v3, v5, v3 +; GFX7-NEXT: v_bfi_b32 v3, v3, 0, v5 ; GFX7-NEXT: v_or_b32_e32 v2, v3, v2 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc @@ -1910,14 +1886,13 @@ define amdgpu_ps void @insertelement_v_v8i16_s_s(ptr addrspace(1) %ptr, i16 inre ; GFX8-LABEL: insertelement_v_v8i16_s_s: ; GFX8: ; %bb.0: ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] -; GFX8-NEXT: s_and_b32 s0, s3, 1 ; GFX8-NEXT: s_lshr_b32 s4, s3, 1 +; GFX8-NEXT: s_and_b32 s0, s3, 1 ; GFX8-NEXT: s_and_b32 s1, s2, 0xffff ; GFX8-NEXT: s_lshl_b32 s0, s0, 4 -; GFX8-NEXT: s_lshl_b32 s5, s1, s0 -; GFX8-NEXT: s_lshl_b32 s0, 0xffff, s0 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s4, 1 -; GFX8-NEXT: s_not_b32 s6, s0 +; GFX8-NEXT: s_lshl_b32 s5, s1, s0 +; GFX8-NEXT: s_lshl_b32 s6, 0xffff, s0 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s4, 2 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[2:3], s4, 3 ; GFX8-NEXT: v_mov_b32_e32 v4, 0 @@ -1926,7 +1901,7 @@ define amdgpu_ps void @insertelement_v_v8i16_s_s(ptr addrspace(1) %ptr, i16 inre ; GFX8-NEXT: v_cndmask_b32_e32 v6, v0, v1, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v2, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v3, s[2:3] -; GFX8-NEXT: v_and_b32_e32 v6, s6, v6 +; GFX8-NEXT: v_bfi_b32 v6, s6, 0, v6 ; GFX8-NEXT: v_or_b32_e32 v6, s5, v6 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], s4, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v6, s[4:5] @@ -1942,14 +1917,13 @@ define amdgpu_ps void @insertelement_v_v8i16_s_s(ptr addrspace(1) %ptr, i16 inre ; GFX7-NEXT: s_mov_b32 s11, 0xf000 ; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: buffer_load_dwordx4 v[0:3], v[0:1], s[8:11], 0 addr64 -; GFX7-NEXT: s_and_b32 s0, s3, 1 ; GFX7-NEXT: s_lshr_b32 s4, s3, 1 +; GFX7-NEXT: s_and_b32 s0, s3, 1 ; GFX7-NEXT: s_and_b32 s1, s2, 0xffff ; GFX7-NEXT: s_lshl_b32 s0, s0, 4 -; GFX7-NEXT: s_lshl_b32 s5, s1, s0 -; GFX7-NEXT: s_lshl_b32 s0, 0xffff, s0 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s4, 1 -; GFX7-NEXT: s_not_b32 s6, s0 +; GFX7-NEXT: s_lshl_b32 s5, s1, s0 +; GFX7-NEXT: s_lshl_b32 s6, 0xffff, s0 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], s4, 2 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], s4, 3 ; GFX7-NEXT: s_mov_b64 s[8:9], 0 @@ -1958,7 +1932,7 @@ define amdgpu_ps void @insertelement_v_v8i16_s_s(ptr addrspace(1) %ptr, i16 inre ; GFX7-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc ; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v2, s[0:1] ; GFX7-NEXT: v_cndmask_b32_e64 v4, v4, v3, s[2:3] -; GFX7-NEXT: v_and_b32_e32 v4, s6, v4 +; GFX7-NEXT: v_bfi_b32 v4, s6, 0, v4 ; GFX7-NEXT: v_or_b32_e32 v4, s5, v4 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[4:5], s4, 0 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] @@ -2263,17 +2237,16 @@ define amdgpu_ps void @insertelement_s_v8i16_s_v(ptr addrspace(4) inreg %ptr, i1 ; GFX8-NEXT: v_mov_b32_e32 v2, s9 ; GFX8-NEXT: v_mov_b32_e32 v3, s10 ; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX8-NEXT: v_mov_b32_e32 v5, s11 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] +; GFX8-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v4 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, 4, v0 ; GFX8-NEXT: s_and_b32 s4, s4, 0xffff ; GFX8-NEXT: v_mov_b32_e32 v3, 0xffff -; GFX8-NEXT: v_mov_b32_e32 v5, s11 -; GFX8-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v4 +; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[2:3] ; GFX8-NEXT: v_lshlrev_b32_e64 v2, v0, s4 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, v0, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[2:3] -; GFX8-NEXT: v_not_b32_e32 v0, v0 -; GFX8-NEXT: v_and_b32_e32 v0, v1, v0 +; GFX8-NEXT: v_bfi_b32 v0, v0, 0, v1 ; GFX8-NEXT: v_or_b32_e32 v6, v0, v2 ; GFX8-NEXT: v_mov_b32_e32 v0, s8 ; GFX8-NEXT: v_mov_b32_e32 v1, s9 @@ -2294,23 +2267,22 @@ define amdgpu_ps void @insertelement_s_v8i16_s_v(ptr addrspace(4) inreg %ptr, i1 ; GFX7-NEXT: s_load_dwordx4 s[8:11], s[2:3], 0x0 ; GFX7-NEXT: v_lshrrev_b32_e32 v4, 1, v0 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 -; GFX7-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v4 +; GFX7-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v1, s8 ; GFX7-NEXT: v_mov_b32_e32 v2, s9 ; GFX7-NEXT: v_mov_b32_e32 v3, s10 ; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 4, v0 -; GFX7-NEXT: s_and_b32 s4, s4, 0xffff ; GFX7-NEXT: v_mov_b32_e32 v5, s11 ; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] ; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; GFX7-NEXT: s_and_b32 s4, s4, 0xffff +; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[2:3] ; GFX7-NEXT: v_lshl_b32_e32 v2, s4, v0 ; GFX7-NEXT: v_lshl_b32_e32 v0, 0xffff, v0 -; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[2:3] -; GFX7-NEXT: v_not_b32_e32 v0, v0 -; GFX7-NEXT: v_and_b32_e32 v0, v1, v0 +; GFX7-NEXT: v_bfi_b32 v0, v0, 0, v1 ; GFX7-NEXT: v_or_b32_e32 v5, v0, v2 ; GFX7-NEXT: v_mov_b32_e32 v0, s8 ; GFX7-NEXT: v_mov_b32_e32 v1, s9 @@ -2441,23 +2413,22 @@ define amdgpu_ps void @insertelement_s_v8i16_v_v(ptr addrspace(4) inreg %ptr, i1 ; GFX8-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 ; GFX8-NEXT: v_lshrrev_b32_e32 v4, 1, v1 ; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 -; GFX8-NEXT: v_and_b32_e32 v1, 1, v1 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v4 +; GFX8-NEXT: v_and_b32_e32 v1, 1, v1 ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mov_b32_e32 v2, s4 ; GFX8-NEXT: v_mov_b32_e32 v3, s5 ; GFX8-NEXT: v_mov_b32_e32 v5, s6 ; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 4, v1 -; GFX8-NEXT: v_mov_b32_e32 v3, 0xffff ; GFX8-NEXT: v_mov_b32_e32 v6, s7 ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[0:1] ; GFX8-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v4 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 4, v1 +; GFX8-NEXT: v_mov_b32_e32 v3, 0xffff +; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[2:3] ; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, v1, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[2:3] -; GFX8-NEXT: v_not_b32_e32 v1, v1 -; GFX8-NEXT: v_and_b32_e32 v1, v2, v1 +; GFX8-NEXT: v_bfi_b32 v1, v1, 0, v2 ; GFX8-NEXT: v_or_b32_e32 v6, v1, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, s4 ; GFX8-NEXT: v_mov_b32_e32 v1, s5 @@ -2478,23 +2449,22 @@ define amdgpu_ps void @insertelement_s_v8i16_v_v(ptr addrspace(4) inreg %ptr, i1 ; GFX7-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 ; GFX7-NEXT: v_lshrrev_b32_e32 v4, 1, v1 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v4 -; GFX7-NEXT: v_and_b32_e32 v1, 1, v1 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v4 +; GFX7-NEXT: v_and_b32_e32 v1, 1, v1 ; GFX7-NEXT: s_waitcnt lgkmcnt(0) ; GFX7-NEXT: v_mov_b32_e32 v2, s4 ; GFX7-NEXT: v_mov_b32_e32 v3, s5 ; GFX7-NEXT: v_mov_b32_e32 v5, s6 ; GFX7-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 4, v1 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 ; GFX7-NEXT: v_mov_b32_e32 v6, s7 ; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[0:1] ; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v4 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 4, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[2:3] ; GFX7-NEXT: v_lshlrev_b32_e32 v0, v1, v0 ; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1 -; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[2:3] -; GFX7-NEXT: v_not_b32_e32 v1, v1 -; GFX7-NEXT: v_and_b32_e32 v1, v2, v1 +; GFX7-NEXT: v_bfi_b32 v1, v1, 0, v2 ; GFX7-NEXT: v_or_b32_e32 v5, v1, v0 ; GFX7-NEXT: v_mov_b32_e32 v0, s4 ; GFX7-NEXT: v_mov_b32_e32 v1, s5 @@ -2628,7 +2598,6 @@ define amdgpu_ps void @insertelement_v_v8i16_s_v(ptr addrspace(1) %ptr, i16 inre ; GFX8-NEXT: v_lshlrev_b32_e32 v0, v2, v0 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v1 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v1 -; GFX8-NEXT: v_not_b32_e32 v0, v0 ; GFX8-NEXT: v_mov_b32_e32 v7, 0 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 ; GFX8-NEXT: v_mov_b32_e32 v8, 0 @@ -2636,7 +2605,7 @@ define amdgpu_ps void @insertelement_v_v8i16_s_v(ptr addrspace(1) %ptr, i16 inre ; GFX8-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v5, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[2:3] -; GFX8-NEXT: v_and_b32_e32 v0, v2, v0 +; GFX8-NEXT: v_bfi_b32 v0, v0, 0, v2 ; GFX8-NEXT: v_or_b32_e32 v9, v0, v9 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v3, v9, s[4:5] ; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v9, vcc @@ -2658,9 +2627,8 @@ define amdgpu_ps void @insertelement_v_v8i16_s_v(ptr addrspace(1) %ptr, i16 inre ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; GFX7-NEXT: v_lshl_b32_e32 v2, s0, v1 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v0 -; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0 -; GFX7-NEXT: v_not_b32_e32 v1, v1 +; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 ; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: s_mov_b32 s10, -1 @@ -2668,7 +2636,7 @@ define amdgpu_ps void @insertelement_v_v8i16_s_v(ptr addrspace(1) %ptr, i16 inre ; GFX7-NEXT: v_cndmask_b32_e32 v7, v3, v4, vcc ; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v5, s[0:1] ; GFX7-NEXT: v_cndmask_b32_e64 v7, v7, v6, s[2:3] -; GFX7-NEXT: v_and_b32_e32 v1, v7, v1 +; GFX7-NEXT: v_bfi_b32 v1, v1, 0, v7 ; GFX7-NEXT: v_or_b32_e32 v7, v1, v2 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v3, v7, s[4:5] ; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v7, vcc @@ -2773,13 +2741,12 @@ define amdgpu_ps void @insertelement_v_v8i16_v_s(ptr addrspace(1) %ptr, i16 %val ; GFX8-LABEL: insertelement_v_v8i16_v_s: ; GFX8: ; %bb.0: ; GFX8-NEXT: flat_load_dwordx4 v[3:6], v[0:1] -; GFX8-NEXT: s_and_b32 s0, s2, 1 ; GFX8-NEXT: s_lshr_b32 s4, s2, 1 +; GFX8-NEXT: s_and_b32 s0, s2, 1 ; GFX8-NEXT: s_lshl_b32 s0, s0, 4 -; GFX8-NEXT: v_mov_b32_e32 v0, s0 -; GFX8-NEXT: s_lshl_b32 s0, 0xffff, s0 ; GFX8-NEXT: v_cmp_eq_u32_e64 vcc, s4, 1 -; GFX8-NEXT: s_not_b32 s5, s0 +; GFX8-NEXT: v_mov_b32_e32 v0, s0 +; GFX8-NEXT: s_lshl_b32 s5, 0xffff, s0 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], s4, 2 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[2:3], s4, 3 ; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 @@ -2789,7 +2756,7 @@ define amdgpu_ps void @insertelement_v_v8i16_v_s(ptr addrspace(1) %ptr, i16 %val ; GFX8-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[2:3] -; GFX8-NEXT: v_and_b32_e32 v1, s5, v1 +; GFX8-NEXT: v_bfi_b32 v1, s5, 0, v1 ; GFX8-NEXT: v_or_b32_e32 v9, v1, v0 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], s4, 0 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v3, v9, s[4:5] @@ -2805,14 +2772,13 @@ define amdgpu_ps void @insertelement_v_v8i16_v_s(ptr addrspace(1) %ptr, i16 %val ; GFX7-NEXT: s_mov_b32 s11, 0xf000 ; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: buffer_load_dwordx4 v[3:6], v[0:1], s[8:11], 0 addr64 -; GFX7-NEXT: s_and_b32 s0, s2, 1 ; GFX7-NEXT: s_lshr_b32 s4, s2, 1 +; GFX7-NEXT: s_and_b32 s0, s2, 1 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v2 ; GFX7-NEXT: s_lshl_b32 s0, s0, 4 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, s0, v0 -; GFX7-NEXT: s_lshl_b32 s0, 0xffff, s0 ; GFX7-NEXT: v_cmp_eq_u32_e64 vcc, s4, 1 -; GFX7-NEXT: s_not_b32 s5, s0 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, s0, v0 +; GFX7-NEXT: s_lshl_b32 s5, 0xffff, s0 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], s4, 2 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], s4, 3 ; GFX7-NEXT: s_mov_b64 s[8:9], 0 @@ -2821,7 +2787,7 @@ define amdgpu_ps void @insertelement_v_v8i16_v_s(ptr addrspace(1) %ptr, i16 %val ; GFX7-NEXT: v_cndmask_b32_e32 v1, v3, v4, vcc ; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[0:1] ; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[2:3] -; GFX7-NEXT: v_and_b32_e32 v1, s5, v1 +; GFX7-NEXT: v_bfi_b32 v1, s5, 0, v1 ; GFX7-NEXT: v_or_b32_e32 v7, v1, v0 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[4:5], s4, 0 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v3, v7, s[4:5] @@ -2935,7 +2901,6 @@ define amdgpu_ps void @insertelement_v_v8i16_v_v(ptr addrspace(1) %ptr, i16 %val ; GFX8-NEXT: v_lshlrev_b32_e32 v0, v3, v0 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v1 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v1 -; GFX8-NEXT: v_not_b32_e32 v0, v0 ; GFX8-NEXT: v_mov_b32_e32 v8, 0 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v1 ; GFX8-NEXT: v_mov_b32_e32 v9, 0 @@ -2943,7 +2908,7 @@ define amdgpu_ps void @insertelement_v_v8i16_v_v(ptr addrspace(1) %ptr, i16 %val ; GFX8-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc ; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[0:1] ; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[2:3] -; GFX8-NEXT: v_and_b32_e32 v0, v3, v0 +; GFX8-NEXT: v_bfi_b32 v0, v0, 0, v3 ; GFX8-NEXT: v_or_b32_e32 v3, v0, v2 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, v3, s[4:5] ; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc @@ -2959,15 +2924,14 @@ define amdgpu_ps void @insertelement_v_v8i16_v_v(ptr addrspace(1) %ptr, i16 %val ; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: buffer_load_dwordx4 v[4:7], v[0:1], s[8:11], 0 addr64 ; GFX7-NEXT: v_lshrrev_b32_e32 v0, 1, v3 -; GFX7-NEXT: v_and_b32_e32 v1, 1, v3 ; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; GFX7-NEXT: v_and_b32_e32 v1, 1, v3 +; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v0 ; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 4, v1 -; GFX7-NEXT: v_cmp_eq_u32_e64 s[0:1], 2, v0 +; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, v1, v2 ; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1 -; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0 -; GFX7-NEXT: v_not_b32_e32 v1, v1 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 ; GFX7-NEXT: s_mov_b64 s[8:9], 0 ; GFX7-NEXT: s_mov_b32 s10, -1 @@ -2975,7 +2939,7 @@ define amdgpu_ps void @insertelement_v_v8i16_v_v(ptr addrspace(1) %ptr, i16 %val ; GFX7-NEXT: v_cndmask_b32_e32 v3, v4, v5, vcc ; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v6, s[0:1] ; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v7, s[2:3] -; GFX7-NEXT: v_and_b32_e32 v1, v3, v1 +; GFX7-NEXT: v_bfi_b32 v1, v1, 0, v3 ; GFX7-NEXT: v_or_b32_e32 v3, v1, v2 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, v3, s[4:5] ; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc @@ -3283,19 +3247,18 @@ define amdgpu_ps void @insertelement_v_v16i16_s_s(ptr addrspace(1) %ptr, i16 inr ; GFX8-NEXT: flat_load_dwordx4 v[0:3], v[0:1] ; GFX8-NEXT: flat_load_dwordx4 v[4:7], v[4:5] ; GFX8-NEXT: s_and_b32 s0, s3, 1 +; GFX8-NEXT: s_lshr_b32 m0, s3, 1 ; GFX8-NEXT: s_and_b32 s1, s2, 0xffff ; GFX8-NEXT: s_lshl_b32 s0, s0, 4 -; GFX8-NEXT: s_lshr_b32 m0, s3, 1 ; GFX8-NEXT: s_lshl_b32 s1, s1, s0 ; GFX8-NEXT: s_lshl_b32 s0, 0xffff, s0 -; GFX8-NEXT: s_not_b32 s0, s0 ; GFX8-NEXT: v_mov_b32_e32 v8, 0 ; GFX8-NEXT: v_mov_b32_e32 v9, 0 ; GFX8-NEXT: v_mov_b32_e32 v10, 16 ; GFX8-NEXT: v_mov_b32_e32 v11, 0 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_movrels_b32_e32 v12, v0 -; GFX8-NEXT: v_and_b32_e32 v12, s0, v12 +; GFX8-NEXT: v_bfi_b32 v12, s0, 0, v12 ; GFX8-NEXT: v_or_b32_e32 v12, s1, v12 ; GFX8-NEXT: v_movreld_b32_e32 v0, v12 ; GFX8-NEXT: flat_store_dwordx4 v[8:9], v[0:3] @@ -3310,17 +3273,16 @@ define amdgpu_ps void @insertelement_v_v16i16_s_s(ptr addrspace(1) %ptr, i16 inr ; GFX7-NEXT: buffer_load_dwordx4 v[2:5], v[0:1], s[4:7], 0 addr64 ; GFX7-NEXT: buffer_load_dwordx4 v[6:9], v[0:1], s[4:7], 0 addr64 offset:16 ; GFX7-NEXT: s_and_b32 s0, s3, 1 +; GFX7-NEXT: s_lshr_b32 m0, s3, 1 ; GFX7-NEXT: s_and_b32 s1, s2, 0xffff ; GFX7-NEXT: s_lshl_b32 s0, s0, 4 -; GFX7-NEXT: s_lshr_b32 m0, s3, 1 ; GFX7-NEXT: s_lshl_b32 s1, s1, s0 ; GFX7-NEXT: s_lshl_b32 s0, 0xffff, s0 -; GFX7-NEXT: s_not_b32 s0, s0 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_movrels_b32_e32 v0, v2 -; GFX7-NEXT: v_and_b32_e32 v0, s0, v0 +; GFX7-NEXT: v_bfi_b32 v0, s0, 0, v0 ; GFX7-NEXT: v_or_b32_e32 v0, s1, v0 ; GFX7-NEXT: v_movreld_b32_e32 v2, v0 ; GFX7-NEXT: buffer_store_dwordx4 v[2:5], off, s[4:7], 0 @@ -3644,21 +3606,20 @@ define amdgpu_ps void @insertelement_s_v16i16_s_v(ptr addrspace(4) inreg %ptr, i ; GFX8-NEXT: v_mov_b32_e32 v6, s21 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[14:15] ; GFX8-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v8 -; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX8-NEXT: v_mov_b32_e32 v7, s22 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[6:7] ; GFX8-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v8 -; GFX8-NEXT: v_lshlrev_b32_e32 v0, 4, v0 -; GFX8-NEXT: s_and_b32 s4, s4, 0xffff -; GFX8-NEXT: v_mov_b32_e32 v3, 0xffff +; GFX8-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX8-NEXT: v_mov_b32_e32 v9, s23 ; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[8:9] ; GFX8-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v8 +; GFX8-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; GFX8-NEXT: s_and_b32 s4, s4, 0xffff +; GFX8-NEXT: v_mov_b32_e32 v3, 0xffff +; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[10:11] ; GFX8-NEXT: v_lshlrev_b32_e64 v2, v0, s4 ; GFX8-NEXT: v_lshlrev_b32_e32 v0, v0, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[10:11] -; GFX8-NEXT: v_not_b32_e32 v0, v0 -; GFX8-NEXT: v_and_b32_e32 v0, v1, v0 +; GFX8-NEXT: v_bfi_b32 v0, v0, 0, v1 ; GFX8-NEXT: v_or_b32_e32 v9, v0, v2 ; GFX8-NEXT: v_mov_b32_e32 v0, s16 ; GFX8-NEXT: v_mov_b32_e32 v1, s17 @@ -3705,20 +3666,19 @@ define amdgpu_ps void @insertelement_s_v16i16_s_v(ptr addrspace(4) inreg %ptr, i ; GFX7-NEXT: v_mov_b32_e32 v6, s21 ; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[14:15] ; GFX7-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v8 -; GFX7-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX7-NEXT: v_mov_b32_e32 v7, s22 ; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v6, s[6:7] ; GFX7-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v8 -; GFX7-NEXT: v_lshlrev_b32_e32 v0, 4, v0 -; GFX7-NEXT: s_and_b32 s4, s4, 0xffff +; GFX7-NEXT: v_and_b32_e32 v0, 1, v0 ; GFX7-NEXT: v_mov_b32_e32 v9, s23 ; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v7, s[8:9] ; GFX7-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v8 +; GFX7-NEXT: v_lshlrev_b32_e32 v0, 4, v0 +; GFX7-NEXT: s_and_b32 s4, s4, 0xffff +; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[10:11] ; GFX7-NEXT: v_lshl_b32_e32 v2, s4, v0 ; GFX7-NEXT: v_lshl_b32_e32 v0, 0xffff, v0 -; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, v9, s[10:11] -; GFX7-NEXT: v_not_b32_e32 v0, v0 -; GFX7-NEXT: v_and_b32_e32 v0, v1, v0 +; GFX7-NEXT: v_bfi_b32 v0, v0, 0, v1 ; GFX7-NEXT: v_or_b32_e32 v9, v0, v2 ; GFX7-NEXT: v_mov_b32_e32 v0, s16 ; GFX7-NEXT: v_mov_b32_e32 v1, s17 @@ -3936,20 +3896,19 @@ define amdgpu_ps void @insertelement_s_v16i16_v_v(ptr addrspace(4) inreg %ptr, i ; GFX8-NEXT: v_mov_b32_e32 v7, s17 ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5] ; GFX8-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v8 -; GFX8-NEXT: v_and_b32_e32 v1, 1, v1 ; GFX8-NEXT: v_mov_b32_e32 v9, s18 ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v7, s[6:7] ; GFX8-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v8 -; GFX8-NEXT: v_lshlrev_b32_e32 v1, 4, v1 -; GFX8-NEXT: v_mov_b32_e32 v3, 0xffff +; GFX8-NEXT: v_and_b32_e32 v1, 1, v1 ; GFX8-NEXT: v_mov_b32_e32 v10, s19 ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v9, s[8:9] ; GFX8-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v8 +; GFX8-NEXT: v_lshlrev_b32_e32 v1, 4, v1 +; GFX8-NEXT: v_mov_b32_e32 v3, 0xffff +; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[10:11] ; GFX8-NEXT: v_lshlrev_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 ; GFX8-NEXT: v_lshlrev_b32_e32 v1, v1, v3 -; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[10:11] -; GFX8-NEXT: v_not_b32_e32 v1, v1 -; GFX8-NEXT: v_and_b32_e32 v1, v2, v1 +; GFX8-NEXT: v_bfi_b32 v1, v1, 0, v2 ; GFX8-NEXT: v_or_b32_e32 v9, v1, v0 ; GFX8-NEXT: v_mov_b32_e32 v0, s12 ; GFX8-NEXT: v_mov_b32_e32 v1, s13 @@ -3996,20 +3955,19 @@ define amdgpu_ps void @insertelement_s_v16i16_v_v(ptr addrspace(4) inreg %ptr, i ; GFX7-NEXT: v_mov_b32_e32 v7, s17 ; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v6, s[4:5] ; GFX7-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v8 -; GFX7-NEXT: v_and_b32_e32 v1, 1, v1 ; GFX7-NEXT: v_mov_b32_e32 v9, s18 ; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v7, s[6:7] ; GFX7-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v8 -; GFX7-NEXT: v_lshlrev_b32_e32 v1, 4, v1 -; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7-NEXT: v_and_b32_e32 v1, 1, v1 ; GFX7-NEXT: v_mov_b32_e32 v10, s19 ; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v9, s[8:9] ; GFX7-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v8 +; GFX7-NEXT: v_lshlrev_b32_e32 v1, 4, v1 +; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[10:11] ; GFX7-NEXT: v_lshlrev_b32_e32 v0, v1, v0 ; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1 -; GFX7-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[10:11] -; GFX7-NEXT: v_not_b32_e32 v1, v1 -; GFX7-NEXT: v_and_b32_e32 v1, v2, v1 +; GFX7-NEXT: v_bfi_b32 v1, v1, 0, v2 ; GFX7-NEXT: v_or_b32_e32 v9, v1, v0 ; GFX7-NEXT: v_mov_b32_e32 v0, s12 ; GFX7-NEXT: v_mov_b32_e32 v1, s13 @@ -4216,7 +4174,6 @@ define amdgpu_ps void @insertelement_v_v16i16_s_v(ptr addrspace(1) %ptr, i16 inr ; GFX8-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v1 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v1 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v1 -; GFX8-NEXT: v_not_b32_e32 v0, v0 ; GFX8-NEXT: v_mov_b32_e32 v11, 0 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v1 ; GFX8-NEXT: v_mov_b32_e32 v12, 0 @@ -4231,7 +4188,7 @@ define amdgpu_ps void @insertelement_v_v16i16_s_v(ptr addrspace(1) %ptr, i16 inr ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[6:7] ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v9, s[8:9] ; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v10, s[10:11] -; GFX8-NEXT: v_and_b32_e32 v0, v2, v0 +; GFX8-NEXT: v_bfi_b32 v0, v0, 0, v2 ; GFX8-NEXT: v_or_b32_e32 v15, v0, v15 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v3, v15, s[12:13] ; GFX8-NEXT: v_cndmask_b32_e32 v1, v4, v15, vcc @@ -4263,9 +4220,8 @@ define amdgpu_ps void @insertelement_v_v16i16_s_v(ptr addrspace(1) %ptr, i16 inr ; GFX7-NEXT: v_cmp_eq_u32_e64 s[4:5], 4, v0 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v0 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v0 -; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v0 -; GFX7-NEXT: v_not_b32_e32 v1, v1 +; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v0 ; GFX7-NEXT: s_mov_b64 s[16:17], 0 ; GFX7-NEXT: s_mov_b32 s18, -1 @@ -4278,7 +4234,7 @@ define amdgpu_ps void @insertelement_v_v16i16_s_v(ptr addrspace(1) %ptr, i16 inr ; GFX7-NEXT: v_cndmask_b32_e64 v11, v11, v8, s[6:7] ; GFX7-NEXT: v_cndmask_b32_e64 v11, v11, v9, s[8:9] ; GFX7-NEXT: v_cndmask_b32_e64 v11, v11, v10, s[10:11] -; GFX7-NEXT: v_and_b32_e32 v1, v11, v1 +; GFX7-NEXT: v_bfi_b32 v1, v1, 0, v11 ; GFX7-NEXT: v_or_b32_e32 v11, v1, v2 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v3, v11, s[12:13] ; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v11, vcc @@ -4452,14 +4408,13 @@ define amdgpu_ps void @insertelement_v_v16i16_v_s(ptr addrspace(1) %ptr, i16 %va ; GFX8-NEXT: v_mov_b32_e32 v13, s0 ; GFX8-NEXT: s_lshl_b32 s0, 0xffff, s0 ; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v13, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0 -; GFX8-NEXT: s_not_b32 s0, s0 ; GFX8-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 ; GFX8-NEXT: v_mov_b32_e32 v11, 16 ; GFX8-NEXT: v_mov_b32_e32 v12, 0 ; GFX8-NEXT: s_waitcnt vmcnt(0) ; GFX8-NEXT: v_movrels_b32_e32 v13, v3 -; GFX8-NEXT: v_and_b32_e32 v13, s0, v13 +; GFX8-NEXT: v_bfi_b32 v13, s0, 0, v13 ; GFX8-NEXT: v_or_b32_e32 v2, v13, v2 ; GFX8-NEXT: v_movreld_b32_e32 v3, v2 ; GFX8-NEXT: flat_store_dwordx4 v[0:1], v[3:6] @@ -4474,17 +4429,16 @@ define amdgpu_ps void @insertelement_v_v16i16_v_s(ptr addrspace(1) %ptr, i16 %va ; GFX7-NEXT: buffer_load_dwordx4 v[3:6], v[0:1], s[4:7], 0 addr64 ; GFX7-NEXT: buffer_load_dwordx4 v[7:10], v[0:1], s[4:7], 0 addr64 offset:16 ; GFX7-NEXT: s_and_b32 s0, s2, 1 +; GFX7-NEXT: s_lshr_b32 m0, s2, 1 ; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v2 ; GFX7-NEXT: s_lshl_b32 s0, s0, 4 -; GFX7-NEXT: s_lshr_b32 m0, s2, 1 ; GFX7-NEXT: v_lshlrev_b32_e32 v0, s0, v0 ; GFX7-NEXT: s_lshl_b32 s0, 0xffff, s0 -; GFX7-NEXT: s_not_b32 s0, s0 ; GFX7-NEXT: s_mov_b64 s[4:5], 0 ; GFX7-NEXT: s_mov_b32 s6, -1 ; GFX7-NEXT: s_waitcnt vmcnt(0) ; GFX7-NEXT: v_movrels_b32_e32 v1, v3 -; GFX7-NEXT: v_and_b32_e32 v1, s0, v1 +; GFX7-NEXT: v_bfi_b32 v1, s0, 0, v1 ; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 ; GFX7-NEXT: v_movreld_b32_e32 v3, v0 ; GFX7-NEXT: buffer_store_dwordx4 v[3:6], off, s[4:7], 0 @@ -4611,7 +4565,6 @@ define amdgpu_ps void @insertelement_v_v16i16_v_v(ptr addrspace(1) %ptr, i16 %va ; GFX8-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v1 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v1 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v1 -; GFX8-NEXT: v_not_b32_e32 v0, v0 ; GFX8-NEXT: v_mov_b32_e32 v12, 0 ; GFX8-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v1 ; GFX8-NEXT: v_mov_b32_e32 v13, 0 @@ -4626,7 +4579,7 @@ define amdgpu_ps void @insertelement_v_v16i16_v_v(ptr addrspace(1) %ptr, i16 %va ; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[6:7] ; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v10, s[8:9] ; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[10:11] -; GFX8-NEXT: v_and_b32_e32 v0, v3, v0 +; GFX8-NEXT: v_bfi_b32 v0, v0, 0, v3 ; GFX8-NEXT: v_or_b32_e32 v16, v0, v2 ; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, v16, s[12:13] ; GFX8-NEXT: v_cndmask_b32_e32 v1, v5, v16, vcc @@ -4654,13 +4607,12 @@ define amdgpu_ps void @insertelement_v_v16i16_v_v(ptr addrspace(1) %ptr, i16 %va ; GFX7-NEXT: v_cmp_eq_u32_e64 s[2:3], 3, v0 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[4:5], 4, v0 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[6:7], 5, v0 +; GFX7-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v0 ; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 ; GFX7-NEXT: v_lshlrev_b32_e32 v1, 4, v1 -; GFX7-NEXT: v_cmp_eq_u32_e64 s[8:9], 6, v0 +; GFX7-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v0 ; GFX7-NEXT: v_lshlrev_b32_e32 v2, v1, v2 ; GFX7-NEXT: v_lshl_b32_e32 v1, 0xffff, v1 -; GFX7-NEXT: v_cmp_eq_u32_e64 s[10:11], 7, v0 -; GFX7-NEXT: v_not_b32_e32 v1, v1 ; GFX7-NEXT: v_cmp_eq_u32_e64 s[12:13], 0, v0 ; GFX7-NEXT: s_mov_b64 s[16:17], 0 ; GFX7-NEXT: s_mov_b32 s18, -1 @@ -4673,7 +4625,7 @@ define amdgpu_ps void @insertelement_v_v16i16_v_v(ptr addrspace(1) %ptr, i16 %va ; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[6:7] ; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v10, s[8:9] ; GFX7-NEXT: v_cndmask_b32_e64 v3, v3, v11, s[10:11] -; GFX7-NEXT: v_and_b32_e32 v1, v3, v1 +; GFX7-NEXT: v_bfi_b32 v1, v1, 0, v3 ; GFX7-NEXT: v_or_b32_e32 v12, v1, v2 ; GFX7-NEXT: v_cndmask_b32_e64 v0, v4, v12, s[12:13] ; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v12, vcc |
