diff options
| author | Mingming Liu <mingmingl@google.com> | 2025-09-10 15:25:31 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-10 15:25:31 -0700 |
| commit | 1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch) | |
| tree | 57f4b1f313c8cf74eed8819870f39c36ea263c68 /llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll | |
| parent | 898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff) | |
| parent | b8cefcb601ddaa18482555c4ff363c01a270c2fe (diff) | |
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll')
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll | 151 |
1 files changed, 89 insertions, 62 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll index 5674ae328406..db08cb132a3d 100644 --- a/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll +++ b/llvm/test/CodeGen/AMDGPU/fneg-modifier-casting.ll @@ -38,17 +38,29 @@ define <2 x i32> @fneg_xor_select_v2i32(<2 x i1> %cond, <2 x i32> %arg0, <2 x i3 ; GCN-NEXT: v_cndmask_b32_e64 v1, -v5, -v3, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: fneg_xor_select_v2i32: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX11-NEXT: v_and_b32_e32 v1, 1, v1 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX11-NEXT: v_cndmask_b32_e64 v0, -v4, -v2, vcc_lo -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 -; GFX11-NEXT: v_cndmask_b32_e64 v1, -v5, -v3, vcc_lo -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: fneg_xor_select_v2i32: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 1, v0.l +; GFX11-TRUE16-NEXT: v_and_b16 v1.l, 1, v1.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v0, -v4, -v2, vcc_lo +; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v1.l +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, -v5, -v3, vcc_lo +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: fneg_xor_select_v2i32: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX11-FAKE16-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v0, -v4, -v2, vcc_lo +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, -v5, -v3, vcc_lo +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %select = select <2 x i1> %cond, <2 x i32> %arg0, <2 x i32> %arg1 %fneg = xor <2 x i32> %select, <i32 -2147483648, i32 -2147483648> ret <2 x i32> %fneg @@ -131,19 +143,34 @@ define <2 x i64> @fneg_xor_select_v2i64(<2 x i1> %cond, <2 x i64> %arg0, <2 x i6 ; GCN-NEXT: v_cndmask_b32_e64 v3, -v9, -v5, s[4:5] ; GCN-NEXT: s_setpc_b64 s[30:31] ; -; GFX11-LABEL: fneg_xor_select_v2i64: -; GFX11: ; %bb.0: -; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) -; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX11-NEXT: v_dual_cndmask_b32 v0, v6, v2 :: v_dual_and_b32 v1, 1, v1 -; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v1 -; GFX11-NEXT: v_cndmask_b32_e64 v1, -v7, -v3, vcc_lo -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, v4, s0 -; GFX11-NEXT: v_cndmask_b32_e64 v3, -v9, -v5, s0 -; GFX11-NEXT: s_setpc_b64 s[30:31] +; GFX11-TRUE16-LABEL: fneg_xor_select_v2i64: +; GFX11-TRUE16: ; %bb.0: +; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 1, v0.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 1, v1.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.l +; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 1, v0.h +; GFX11-TRUE16-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v2, v8, v4, s0 +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v1, -v7, -v3, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b32_e64 v3, -v9, -v5, s0 +; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-FAKE16-LABEL: fneg_xor_select_v2i64: +; GFX11-FAKE16: ; %bb.0: +; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-FAKE16-NEXT: v_and_b32_e32 v0, 1, v0 +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 +; GFX11-FAKE16-NEXT: v_dual_cndmask_b32 v0, v6, v2 :: v_dual_and_b32 v1, 1, v1 +; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e64 s0, 1, v1 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v1, -v7, -v3, vcc_lo +; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v2, v8, v4, s0 +; GFX11-FAKE16-NEXT: v_cndmask_b32_e64 v3, -v9, -v5, s0 +; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31] %select = select <2 x i1> %cond, <2 x i64> %arg0, <2 x i64> %arg1 %fneg = xor <2 x i64> %select, <i64 9223372036854775808, i64 9223372036854775808> ret <2 x i64> %fneg @@ -218,11 +245,11 @@ define <2 x i16> @fneg_xor_select_v2i16(<2 x i1> %cond, <2 x i16> %arg0, <2 x i1 ; GFX11-TRUE16-LABEL: fneg_xor_select_v2i16: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 1, v1 +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 1, v0.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 1, v1.l ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 1, v1 +; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.l +; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 1, v0.h ; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, vcc_lo ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) ; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, s0 @@ -755,24 +782,24 @@ define <2 x half> @select_fneg_select_v2f16(<2 x i1> %cond0, <2 x i1> %cond1, <2 ; GFX11-TRUE16-LABEL: select_fneg_select_v2f16: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 1, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX11-TRUE16-NEXT: v_xor_b32_e32 v4, 0x80008000, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 1, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 -; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 1, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 1, v3 -; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, v4.h, v5.h, vcc_lo +; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 1, v1.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 1, v0.l +; GFX11-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v4 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, v4.l, v5.l, s0 -; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 1, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v1 -; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v0.h, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, s0 +; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.h +; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 1, v0.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 1, v3.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 1, v2.l +; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, v1.h, v5.h, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, v1.l, v5.l, s0 +; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 1, v0.h +; GFX11-TRUE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v2.l, s0 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-FAKE16-LABEL: select_fneg_select_v2f16: @@ -861,24 +888,24 @@ define <2 x i16> @select_fneg_xor_select_v2i16(<2 x i1> %cond0, <2 x i1> %cond1, ; GFX11-TRUE16-LABEL: select_fneg_xor_select_v2i16: ; GFX11-TRUE16: ; %bb.0: ; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 1, v1 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 1, v0 -; GFX11-TRUE16-NEXT: v_xor_b32_e32 v4, 0x80008000, v4 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 1, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) -; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 -; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 1, v0 -; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 1, v3 -; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, v4.h, v5.h, vcc_lo +; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 1, v1.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 1, v0.l +; GFX11-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v4 ; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) -; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, v4.l, v5.l, s0 -; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 -; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 1, v2 -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) -; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v1 -; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v0.h, vcc_lo -; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) -; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v0.l, s0 +; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.h +; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 1, v0.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.l, 1, v3.l +; GFX11-TRUE16-NEXT: v_and_b16 v0.h, 1, v2.l +; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.h, v1.h, v5.h, vcc_lo +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v1.l, v1.l, v5.l, s0 +; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.l +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 1, v0.h +; GFX11-TRUE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v1 +; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v2.h, vcc_lo +; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v2.l, s0 ; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-FAKE16-LABEL: select_fneg_xor_select_v2i16: |
