summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPetar Avramovic <Petar.Avramovic@amd.com>2025-11-17 18:42:52 +0100
committerPetar Avramovic <Petar.Avramovic@amd.com>2025-11-18 14:33:55 +0100
commit9e70882cad3857c0dfbc5285d567b921ef0280e7 (patch)
tree266b8b6b514b1d0330696efd6eba913103fd7708
parent3d5d32c6058807008e579dd5ea2faced33a7943b (diff)
AMDGPU/GlobalISel: Combine S16 copy-trunc-readanylane-anyextusers/petar-avramovic/readanylane-combine
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp8
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll7
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll6
3 files changed, 8 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
index 907f8300de6d..396d64625fb5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalize.cpp
@@ -173,6 +173,14 @@ Register AMDGPURegBankLegalizeCombiner::getReadAnyLaneSrc(Register Src) {
if (mi_match(Src, MRI, m_GAMDGPUReadAnyLane(m_Reg(RALSrc))))
return RALSrc;
+ // RALSrc = G_ANYEXT S16Src
+ // TruncSrc = G_AMDGPU_READANYLANE RALSrc
+ // Src = G_TRUNC TruncSrc
+ if (mi_match(Src, MRI,
+ m_GTrunc(m_GAMDGPUReadAnyLane(m_GAnyExt(m_Reg(RALSrc)))))) {
+ return RALSrc;
+ }
+
// TruncSrc = G_AMDGPU_READANYLANE RALSrc
// AextSrc = G_TRUNC TruncSrc
// Src = G_ANYEXT AextSrc
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll
index 4361e5c11370..27005e7aa175 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform-in-vgpr.ll
@@ -1070,9 +1070,6 @@ define amdgpu_ps void @load_divergent_P3_i16(ptr addrspace(3) inreg %ptra, ptr a
; GFX11-True16-NEXT: v_mov_b32_e32 v1, s0
; GFX11-True16-NEXT: ds_load_u16_d16 v1, v1
; GFX11-True16-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-True16-NEXT: v_readfirstlane_b32 s0, v1
-; GFX11-True16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-True16-NEXT: v_mov_b16_e32 v1.l, s0
; GFX11-True16-NEXT: ds_store_b16 v0, v1
; GFX11-True16-NEXT: s_endpgm
;
@@ -1089,10 +1086,6 @@ define amdgpu_ps void @load_divergent_P3_i16(ptr addrspace(3) inreg %ptra, ptr a
; GFX12-True16-NEXT: v_mov_b32_e32 v1, s0
; GFX12-True16-NEXT: ds_load_u16_d16 v1, v1
; GFX12-True16-NEXT: s_wait_dscnt 0x0
-; GFX12-True16-NEXT: v_readfirstlane_b32 s0, v1
-; GFX12-True16-NEXT: s_wait_alu 0xf1ff
-; GFX12-True16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX12-True16-NEXT: v_mov_b16_e32 v1.l, s0
; GFX12-True16-NEXT: ds_store_b16 v0, v1
; GFX12-True16-NEXT: s_endpgm
;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll
index bf36deac3338..9bf140cf744d 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-uniform.ll
@@ -13,9 +13,6 @@ define amdgpu_ps void @load_uniform_P1_i16_gfx12(ptr addrspace(1) inreg %ptra, p
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: global_load_d16_b16 v2, v2, s[0:1]
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_readfirstlane_b32 s0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_mov_b16_e32 v2.l, s0
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
; GFX11-NEXT: s_endpgm
;
@@ -312,9 +309,6 @@ define amdgpu_ps void @load_uniform_P4_i16_gfx12(ptr addrspace(4) inreg %ptra, p
; GFX11-NEXT: v_mov_b32_e32 v2, 0
; GFX11-NEXT: global_load_d16_b16 v2, v2, s[0:1]
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_readfirstlane_b32 s0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_mov_b16_e32 v2.l, s0
; GFX11-NEXT: global_store_b16 v[0:1], v2, off
; GFX11-NEXT: s_endpgm
;