diff options
| author | Austin Kerbow <Austin.Kerbow@amd.com> | 2025-09-01 23:05:40 -0700 |
|---|---|---|
| committer | Austin Kerbow <Austin.Kerbow@amd.com> | 2025-09-01 23:15:39 -0700 |
| commit | 33e36db9bf7e02f72bca0b9e495f7a223c9abb12 (patch) | |
| tree | 8db808a0ab75445e4e85b96bc04d1be85e85dde4 | |
| parent | 57365952ef3a0b5cd79b5af095c46579b409f7a2 (diff) | |
[AMDGPU] Fix gfx950 Trans32 latencyusers/kerbowa/gfx950-trans32-lat-fix
Updated from 4->2 on gfx950
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SISchedule.td | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/AMDGPU/bf16.ll | 12 |
2 files changed, 11 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td index 8eecb1c1019a..9319d43e2ce1 100644 --- a/llvm/lib/Target/AMDGPU/SISchedule.td +++ b/llvm/lib/Target/AMDGPU/SISchedule.td @@ -172,7 +172,6 @@ multiclass SICommonWriteRes { def : HWVALUWriteRes<Write32Bit, 1>; def : HWVALUWriteRes<WriteFloatCvt, 4>; - def : HWVALUWriteRes<WriteTrans32, 4>; def : HWVALUWriteRes<WriteQuarterRate32, 4>; let ReleaseAtCycles = [4] in @@ -231,6 +230,7 @@ def : HWVALUWriteRes<WriteFloatFMA, 1>; def : HWVALUWriteRes<WriteDouble, 4>; def : HWVALUWriteRes<WriteDoubleAdd, 2>; def : HWVALUWriteRes<WriteDoubleCvt, 4>; +def : HWVALUWriteRes<WriteTrans32, 4>; def : HWVALUWriteRes<WriteTrans64, 4>; } // End RetireOOO = 1 @@ -249,6 +249,7 @@ def : HWVALUWriteRes<WriteFloatFMA, 16>; def : HWVALUWriteRes<WriteDouble, 16>; def : HWVALUWriteRes<WriteDoubleAdd, 8>; def : HWVALUWriteRes<WriteDoubleCvt, 4>; +def : HWVALUWriteRes<WriteTrans32, 4>; def : HWVALUWriteRes<WriteTrans64, 16>; } // End RetireOOO = 1 @@ -269,6 +270,7 @@ def : HWVALUWriteRes<WriteFloatFMA, 1>; def : HWVALUWriteRes<WriteDouble, 1>; def : HWVALUWriteRes<WriteDoubleAdd, 1>; def : HWVALUWriteRes<WriteDoubleCvt, 1>; +def : HWVALUWriteRes<WriteTrans32, 4>; def : HWVALUWriteRes<WriteTrans64, 4>; def : HWVALUWriteRes<WriteIntMul, 1>; def : HWVALUWriteRes<Write64Bit, 1>; @@ -292,6 +294,7 @@ def : HWVALUWriteRes<WriteFloatFMA, 1>; def : HWVALUWriteRes<WriteDouble, 1>; def : HWVALUWriteRes<WriteDoubleAdd, 1>; def : HWVALUWriteRes<WriteDoubleCvt, 1>; +def : HWVALUWriteRes<WriteTrans32, 4>; def : HWVALUWriteRes<WriteTrans64, 4>; def : HWVALUWriteRes<WriteIntMul, 1>; def : HWVALUWriteRes<Write64Bit, 1>; @@ -326,6 +329,7 @@ def : HWVALUWriteRes<WriteFloatFMA, 1>; def : HWVALUWriteRes<WriteDouble, 1>; def : HWVALUWriteRes<WriteDoubleAdd, 1>; def : HWVALUWriteRes<WriteDoubleCvt, 1>; +def : HWVALUWriteRes<WriteTrans32, 2>; def : HWVALUWriteRes<WriteTrans64, 4>; def : HWVALUWriteRes<WriteIntMul, 1>; def : HWVALUWriteRes<Write64Bit, 1>; diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll index 10e523d1a0cf..e8a4329e7f5c 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16.ll @@ -30434,15 +30434,15 @@ define bfloat @v_exp_bf16(bfloat %a) { ; GFX950-NEXT: v_sub_f32_e32 v3, v1, v2 ; GFX950-NEXT: v_fma_f32 v1, v0, s0, -v1 ; GFX950-NEXT: v_fmamk_f32 v1, v0, 0x32a5705f, v1 +; GFX950-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX950-NEXT: v_add_f32_e32 v1, v3, v1 ; GFX950-NEXT: v_exp_f32_e32 v1, v1 -; GFX950-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX950-NEXT: s_mov_b32 s0, 0xc2ce8ed0 +; GFX950-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX950-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0 ; GFX950-NEXT: s_mov_b32 s0, 0x42b17218 -; GFX950-NEXT: v_ldexp_f32 v1, v1, v2 -; GFX950-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX950-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX950-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX950-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v0 ; GFX950-NEXT: s_nop 1 ; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc @@ -30834,15 +30834,15 @@ define bfloat @v_exp10_bf16(bfloat %a) { ; GFX950-NEXT: v_sub_f32_e32 v3, v1, v2 ; GFX950-NEXT: v_fma_f32 v1, v0, s0, -v1 ; GFX950-NEXT: v_fmamk_f32 v1, v0, 0x33979a37, v1 +; GFX950-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX950-NEXT: v_add_f32_e32 v1, v3, v1 ; GFX950-NEXT: v_exp_f32_e32 v1, v1 -; GFX950-NEXT: v_cvt_i32_f32_e32 v2, v2 ; GFX950-NEXT: s_mov_b32 s0, 0xc23369f4 +; GFX950-NEXT: v_ldexp_f32 v1, v1, v2 ; GFX950-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0 ; GFX950-NEXT: s_mov_b32 s0, 0x421a209b -; GFX950-NEXT: v_ldexp_f32 v1, v1, v2 -; GFX950-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX950-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX950-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GFX950-NEXT: v_cmp_nlt_f32_e32 vcc, s0, v0 ; GFX950-NEXT: s_nop 1 ; GFX950-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc |
