summaryrefslogtreecommitdiff
path: root/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
diff options
context:
space:
mode:
authorNicolai Hähnle <nicolai.haehnle@amd.com>2025-11-19 18:00:32 -0800
committerNicolai Hähnle <nicolai.haehnle@amd.com>2025-11-21 11:05:26 -0800
commite673cdaee95d870dd5e2fa13ab064f6dbd0ba273 (patch)
tree8d4ebe0df83984917f49cccef42797dda2a7e1ce /llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
parent459939f82086d02c39f5d6eeae141c25f9932d40 (diff)
VectorCombine: Improve the insert/extract fold in the narrowing caseusers/nhaehnle/spr/main/c151bb04
Keeping the extracted element in a natural position in the narrowed vector has two beneficial effects: 1. It makes the narrowing shuffles cheaper (at least on AMDGPU), which allows the insert/extract fold to trigger. 2. It makes the narrowing shuffles in a chain of extract/insert compatible, which allows foldLengthChangingShuffles to successfully recognize a chain that can be folded. There are minor X86 test changes that look reasonable to me. The IR change for AVX2 in llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll doesn't change the assembly generated by `llc -mtriple=x86_64-- -mattr=AVX2` at all. commit-id:c151bb04
Diffstat (limited to 'llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll')
-rw-r--r--llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll17
1 files changed, 2 insertions, 15 deletions
diff --git a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
index eaab7199a3cf..442a93689a79 100644
--- a/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
+++ b/llvm/test/Transforms/VectorCombine/AMDGPU/extract-insert-i8.ll
@@ -91,21 +91,8 @@ entry:
define <8 x i8> @extract_insert_chain_shortening(<32 x i8> %in) {
; OPT-LABEL: define <8 x i8> @extract_insert_chain_shortening(
; OPT-SAME: <32 x i8> [[IN:%.*]]) #[[ATTR0]] {
-; OPT-NEXT: [[I_1:%.*]] = extractelement <32 x i8> [[IN]], i64 17
-; OPT-NEXT: [[I_2:%.*]] = extractelement <32 x i8> [[IN]], i64 18
-; OPT-NEXT: [[I_3:%.*]] = extractelement <32 x i8> [[IN]], i64 19
-; OPT-NEXT: [[I_5:%.*]] = extractelement <32 x i8> [[IN]], i64 21
-; OPT-NEXT: [[I_6:%.*]] = extractelement <32 x i8> [[IN]], i64 22
-; OPT-NEXT: [[I_7:%.*]] = extractelement <32 x i8> [[IN]], i64 23
-; OPT-NEXT: [[O_0:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison, <8 x i32> <i32 16, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; OPT-NEXT: [[O_1:%.*]] = insertelement <8 x i8> [[O_0]], i8 [[I_1]], i32 1
-; OPT-NEXT: [[O_2:%.*]] = insertelement <8 x i8> [[O_1]], i8 [[I_2]], i32 2
-; OPT-NEXT: [[O_3:%.*]] = insertelement <8 x i8> [[O_2]], i8 [[I_3]], i32 3
-; OPT-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison, <8 x i32> <i32 20, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; OPT-NEXT: [[O_4:%.*]] = shufflevector <8 x i8> [[O_3]], <8 x i8> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7>
-; OPT-NEXT: [[O_5:%.*]] = insertelement <8 x i8> [[O_4]], i8 [[I_5]], i32 5
-; OPT-NEXT: [[O_6:%.*]] = insertelement <8 x i8> [[O_5]], i8 [[I_6]], i32 6
-; OPT-NEXT: [[O_7:%.*]] = insertelement <8 x i8> [[O_6]], i8 [[I_7]], i32 7
+; OPT-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[IN]], <32 x i8> poison, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; OPT-NEXT: [[O_7:%.*]] = shufflevector <8 x i8> poison, <8 x i8> [[TMP1]], <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
; OPT-NEXT: ret <8 x i8> [[O_7]]
;
%i.0 = extractelement <32 x i8> %in, i64 16