diff options
| author | Hongyu Chen <xxs_chy@outlook.com> | 2025-11-22 03:33:53 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-11-22 03:33:53 +0800 |
| commit | 3fec26e3294ae0f276ff08fd810850421444588c (patch) | |
| tree | c16de5a0253651e2767e2e0763c2ece4fcddefa6 | |
| parent | 69589dd2c0b34a664c24f7ffbb084d2eea848ab6 (diff) | |
[DAGCombiner] Don't optimize insert_vector_elt into shuffle if implicit truncation exists (#169022)
Fixes #169017
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll | 26 |
2 files changed, 30 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index ca2070e958fa..6b79dbb46cad 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -23469,6 +23469,10 @@ SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) { EVT SubVecVT = SubVec.getValueType(); EVT VT = DestVec.getValueType(); unsigned NumSrcElts = SubVecVT.getVectorNumElements(); + // Bail out if the inserted value is larger than the vector element, as + // insert_vector_elt performs an implicit truncation in this case. + if (InsertVal.getValueType() != VT.getVectorElementType()) + return SDValue(); // If the source only has a single vector element, the cost of creating adding // it to a vector is likely to exceed the cost of a insert_vector_elt. if (NumSrcElts == 1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll index 3a5b3719931a..79286c0304e0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -1143,6 +1143,32 @@ define <4 x half> @insertelt_v4f16_idx(<4 x half> %a, half %y, i32 zeroext %idx) %b = insertelement <4 x half> %a, half %y, i32 %idx ret <4 x half> %b } + +define <2 x i8> @pr169017(<4 x i16> %vecinit, <2 x i8> %dst_vec) { +; CHECK-LABEL: pr169017: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-NEXT: vmv.x.s a0, v8 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, tu, ma +; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vmv1r.v v8, v9 +; CHECK-NEXT: ret +; +; VISNI-LABEL: pr169017: +; VISNI: # %bb.0: # %entry +; VISNI-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; VISNI-NEXT: vmv.x.s a0, v8 +; VISNI-NEXT: vsetvli zero, zero, e8, mf8, tu, ma +; VISNI-NEXT: vmv.s.x v9, a0 +; VISNI-NEXT: vmv1r.v v8, v9 +; VISNI-NEXT: ret +entry: + %cast = bitcast <4 x i16> %vecinit to i64 + %trunc = trunc i64 %cast to i8 + %2 = insertelement <2 x i8> %dst_vec, i8 %trunc, i64 0 + ret <2 x i8> %2 +} + ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; ZVFHMINRV32: {{.*}} ; ZVFHMINRV64: {{.*}} |
