summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp')
-rw-r--r--llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp436
1 files changed, 177 insertions, 259 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
index a6ff22c4b391..dd68a5556cdb 100644
--- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
@@ -14,6 +14,7 @@
#include "RISCVISelLowering.h"
#include "RISCVSubtarget.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
@@ -68,6 +69,89 @@ static const Intrinsic::ID ScalableVlsegIntrIds[] = {
Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
Intrinsic::riscv_vlseg8_mask};
+static const Intrinsic::ID FixedVssegIntrIds[] = {
+ Intrinsic::riscv_seg2_store_mask, Intrinsic::riscv_seg3_store_mask,
+ Intrinsic::riscv_seg4_store_mask, Intrinsic::riscv_seg5_store_mask,
+ Intrinsic::riscv_seg6_store_mask, Intrinsic::riscv_seg7_store_mask,
+ Intrinsic::riscv_seg8_store_mask};
+
+static const Intrinsic::ID ScalableVssegIntrIds[] = {
+ Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
+ Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
+ Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
+ Intrinsic::riscv_vsseg8_mask};
+
+static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) {
+ assert(N);
+ if (N == 1)
+ return true;
+
+ using namespace PatternMatch;
+ // Right now we're only recognizing the simplest pattern.
+ uint64_t C;
+ if (match(V, m_CombineOr(m_ConstantInt(C),
+ m_NUWMul(m_Value(), m_ConstantInt(C)))) &&
+ C && C % N == 0)
+ return true;
+
+ if (isPowerOf2_32(N)) {
+ KnownBits KB = llvm::computeKnownBits(V, DL);
+ return KB.countMinTrailingZeros() >= Log2_32(N);
+ }
+
+ return false;
+}
+
+/// Do the common operand retrieval and validition required by the
+/// routines below.
+static bool getMemOperands(unsigned Factor, VectorType *VTy, Type *XLenTy,
+ Instruction *I, Value *&Ptr, Value *&Mask,
+ Value *&VL, Align &Alignment) {
+
+ IRBuilder<> Builder(I);
+ const DataLayout &DL = I->getDataLayout();
+ ElementCount EC = VTy->getElementCount();
+ if (auto *LI = dyn_cast<LoadInst>(I)) {
+ assert(LI->isSimple());
+ Ptr = LI->getPointerOperand();
+ Alignment = LI->getAlign();
+ assert(!Mask && "Unexpected mask on a load");
+ Mask = Builder.getAllOnesMask(EC);
+ VL = isa<FixedVectorType>(VTy) ? Builder.CreateElementCount(XLenTy, EC)
+ : Constant::getAllOnesValue(XLenTy);
+ return true;
+ }
+ if (auto *SI = dyn_cast<StoreInst>(I)) {
+ assert(SI->isSimple());
+ Ptr = SI->getPointerOperand();
+ Alignment = SI->getAlign();
+ assert(!Mask && "Unexpected mask on a store");
+ Mask = Builder.getAllOnesMask(EC);
+ VL = isa<FixedVectorType>(VTy) ? Builder.CreateElementCount(XLenTy, EC)
+ : Constant::getAllOnesValue(XLenTy);
+ return true;
+ }
+ auto *VPLdSt = cast<VPIntrinsic>(I);
+ assert((VPLdSt->getIntrinsicID() == Intrinsic::vp_load ||
+ VPLdSt->getIntrinsicID() == Intrinsic::vp_store) &&
+ "Unexpected intrinsic");
+ Ptr = VPLdSt->getMemoryPointerParam();
+ Alignment = VPLdSt->getPointerAlignment().value_or(
+ DL.getABITypeAlign(VTy->getElementType()));
+
+ assert(Mask && "vp.load and vp.store needs a mask!");
+
+ Value *WideEVL = VPLdSt->getVectorLengthParam();
+ // Conservatively check if EVL is a multiple of factor, otherwise some
+ // (trailing) elements might be lost after the transformation.
+ if (!isMultipleOfN(WideEVL, I->getDataLayout(), Factor))
+ return false;
+
+ auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor);
+ VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy);
+ return true;
+}
+
/// Lower an interleaved load into a vlsegN intrinsic.
///
/// E.g. Lower an interleaved load (Factor = 2):
@@ -81,21 +165,25 @@ static const Intrinsic::ID ScalableVlsegIntrIds[] = {
/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
bool RISCVTargetLowering::lowerInterleavedLoad(
- LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
+ Instruction *Load, Value *Mask, ArrayRef<ShuffleVectorInst *> Shuffles,
ArrayRef<unsigned> Indices, unsigned Factor) const {
assert(Indices.size() == Shuffles.size());
- IRBuilder<> Builder(LI);
-
- const DataLayout &DL = LI->getDataLayout();
+ IRBuilder<> Builder(Load);
+ const DataLayout &DL = Load->getDataLayout();
auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType());
- if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(),
- LI->getPointerAddressSpace(), DL))
+ auto *XLenTy = Type::getIntNTy(Load->getContext(), Subtarget.getXLen());
+
+ Value *Ptr, *VL;
+ Align Alignment;
+ if (!getMemOperands(Factor, VTy, XLenTy, Load, Ptr, Mask, VL, Alignment))
return false;
- auto *PtrTy = LI->getPointerOperandType();
- auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
+ Type *PtrTy = Ptr->getType();
+ unsigned AS = PtrTy->getPointerAddressSpace();
+ if (!isLegalInterleavedAccessType(VTy, Factor, Alignment, AS, DL))
+ return false;
// If the segment load is going to be performed segment at a time anyways
// and there's only one element used, use a strided load instead. This
@@ -104,25 +192,23 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType());
Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes);
Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes);
- Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(), Offset);
- Value *Mask = Builder.getAllOnesMask(VTy->getElementCount());
- Value *VL = Builder.getInt32(VTy->getNumElements());
-
+ Value *BasePtr = Builder.CreatePtrAdd(Ptr, Offset);
+ // Note: Same VL as above, but i32 not xlen due to signature of
+ // vp.strided.load
+ VL = Builder.CreateElementCount(Builder.getInt32Ty(),
+ VTy->getElementCount());
CallInst *CI =
Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load,
{VTy, BasePtr->getType(), Stride->getType()},
{BasePtr, Stride, Mask, VL});
- CI->addParamAttr(
- 0, Attribute::getWithAlignment(CI->getContext(), LI->getAlign()));
+ CI->addParamAttr(0,
+ Attribute::getWithAlignment(CI->getContext(), Alignment));
Shuffles[0]->replaceAllUsesWith(CI);
return true;
};
- Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
- Value *Mask = Builder.getAllOnesMask(VTy->getElementCount());
CallInst *VlsegN = Builder.CreateIntrinsic(
- FixedVlsegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy},
- {LI->getPointerOperand(), Mask, VL});
+ FixedVlsegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy}, {Ptr, Mask, VL});
for (unsigned i = 0; i < Shuffles.size(); i++) {
Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]);
@@ -132,18 +218,6 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
return true;
}
-static const Intrinsic::ID FixedVssegIntrIds[] = {
- Intrinsic::riscv_seg2_store_mask, Intrinsic::riscv_seg3_store_mask,
- Intrinsic::riscv_seg4_store_mask, Intrinsic::riscv_seg5_store_mask,
- Intrinsic::riscv_seg6_store_mask, Intrinsic::riscv_seg7_store_mask,
- Intrinsic::riscv_seg8_store_mask};
-
-static const Intrinsic::ID ScalableVssegIntrIds[] = {
- Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
- Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
- Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
- Intrinsic::riscv_vsseg8_mask};
-
/// Lower an interleaved store into a vssegN intrinsic.
///
/// E.g. Lower an interleaved store (Factor = 3):
@@ -191,7 +265,8 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
Value *Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes);
Value *BasePtr = Builder.CreatePtrAdd(SI->getPointerOperand(), Offset);
Value *Mask = Builder.getAllOnesMask(DataVTy->getElementCount());
- Value *VL = Builder.getInt32(VTy->getNumElements());
+ Value *VL = Builder.CreateElementCount(Builder.getInt32Ty(),
+ VTy->getElementCount());
CallInst *CI = Builder.CreateIntrinsic(
Intrinsic::experimental_vp_strided_store,
@@ -223,7 +298,7 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
// This VL should be OK (should be executable in one vsseg instruction,
// potentially under larger LMULs) because we checked that the fixed vector
// type fits in isLegalInterleavedAccessType
- Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements());
+ Value *VL = Builder.CreateElementCount(XLenTy, VTy->getElementCount());
Value *StoreMask = Builder.getAllOnesMask(VTy->getElementCount());
Ops.append({SI->getPointerOperand(), StoreMask, VL});
@@ -233,58 +308,57 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI,
}
bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
- LoadInst *LI, ArrayRef<Value *> DeinterleaveValues) const {
- const unsigned Factor = DeinterleaveValues.size();
+ Instruction *Load, Value *Mask, IntrinsicInst *DI) const {
+ const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID());
if (Factor > 8)
return false;
- assert(LI->isSimple());
- IRBuilder<> Builder(LI);
+ IRBuilder<> Builder(Load);
- Value *FirstActive =
- *llvm::find_if(DeinterleaveValues, [](Value *V) { return V != nullptr; });
- VectorType *ResVTy = cast<VectorType>(FirstActive->getType());
+ VectorType *ResVTy = getDeinterleavedVectorType(DI);
- const DataLayout &DL = LI->getDataLayout();
+ const DataLayout &DL = Load->getDataLayout();
+ auto *XLenTy = Type::getIntNTy(Load->getContext(), Subtarget.getXLen());
- if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(),
- LI->getPointerAddressSpace(), DL))
+ Value *Ptr, *VL;
+ Align Alignment;
+ if (!getMemOperands(Factor, ResVTy, XLenTy, Load, Ptr, Mask, VL, Alignment))
return false;
- Value *Return;
- Type *PtrTy = LI->getPointerOperandType();
- Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen());
+ Type *PtrTy = Ptr->getType();
+ unsigned AS = PtrTy->getPointerAddressSpace();
+ if (!isLegalInterleavedAccessType(ResVTy, Factor, Alignment, AS, DL))
+ return false;
- if (auto *FVTy = dyn_cast<FixedVectorType>(ResVTy)) {
- Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
- Value *Mask = Builder.getAllOnesMask(FVTy->getElementCount());
+ Value *Return;
+ if (isa<FixedVectorType>(ResVTy)) {
Return = Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2],
- {ResVTy, PtrTy, XLenTy},
- {LI->getPointerOperand(), Mask, VL});
+ {ResVTy, PtrTy, XLenTy}, {Ptr, Mask, VL});
} else {
- static const Intrinsic::ID IntrIds[] = {
- Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3,
- Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5,
- Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7,
- Intrinsic::riscv_vlseg8};
-
unsigned SEW = DL.getTypeSizeInBits(ResVTy->getElementType());
unsigned NumElts = ResVTy->getElementCount().getKnownMinValue();
Type *VecTupTy = TargetExtType::get(
- LI->getContext(), "riscv.vector.tuple",
- ScalableVectorType::get(Type::getInt8Ty(LI->getContext()),
+ Load->getContext(), "riscv.vector.tuple",
+ ScalableVectorType::get(Type::getInt8Ty(Load->getContext()),
NumElts * SEW / 8),
Factor);
+ Function *VlsegNFunc = Intrinsic::getOrInsertDeclaration(
+ Load->getModule(), ScalableVlsegIntrIds[Factor - 2],
+ {VecTupTy, PtrTy, Mask->getType(), VL->getType()});
- Value *VL = Constant::getAllOnesValue(XLenTy);
+ Value *Operands[] = {
+ PoisonValue::get(VecTupTy),
+ Ptr,
+ Mask,
+ VL,
+ ConstantInt::get(XLenTy,
+ RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC),
+ ConstantInt::get(XLenTy, Log2_64(SEW))};
- Value *Vlseg = Builder.CreateIntrinsic(
- IntrIds[Factor - 2], {VecTupTy, PtrTy, XLenTy},
- {PoisonValue::get(VecTupTy), LI->getPointerOperand(), VL,
- ConstantInt::get(XLenTy, Log2_64(SEW))});
+ CallInst *Vlseg = Builder.CreateCall(VlsegNFunc, Operands);
SmallVector<Type *, 2> AggrTypes{Factor, ResVTy};
- Return = PoisonValue::get(StructType::get(LI->getContext(), AggrTypes));
+ Return = PoisonValue::get(StructType::get(Load->getContext(), AggrTypes));
for (unsigned i = 0; i < Factor; ++i) {
Value *VecExtract = Builder.CreateIntrinsic(
Intrinsic::riscv_tuple_extract, {ResVTy, VecTupTy},
@@ -293,217 +367,61 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
}
}
- for (auto [Idx, DIV] : enumerate(DeinterleaveValues)) {
- if (!DIV)
- continue;
- // We have to create a brand new ExtractValue to replace each
- // of these old ExtractValue instructions.
- Value *NewEV =
- Builder.CreateExtractValue(Return, {static_cast<unsigned>(Idx)});
- DIV->replaceAllUsesWith(NewEV);
- }
-
+ DI->replaceAllUsesWith(Return);
return true;
}
bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
- StoreInst *SI, ArrayRef<Value *> InterleaveValues) const {
+ Instruction *Store, Value *Mask, ArrayRef<Value *> InterleaveValues) const {
unsigned Factor = InterleaveValues.size();
if (Factor > 8)
return false;
- assert(SI->isSimple());
- IRBuilder<> Builder(SI);
+ IRBuilder<> Builder(Store);
auto *InVTy = cast<VectorType>(InterleaveValues[0]->getType());
- auto *PtrTy = SI->getPointerOperandType();
- const DataLayout &DL = SI->getDataLayout();
+ const DataLayout &DL = Store->getDataLayout();
+ Type *XLenTy = Type::getIntNTy(Store->getContext(), Subtarget.getXLen());
- if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(),
- SI->getPointerAddressSpace(), DL))
+ Value *Ptr, *VL;
+ Align Alignment;
+ if (!getMemOperands(Factor, InVTy, XLenTy, Store, Ptr, Mask, VL, Alignment))
+ return false;
+ Type *PtrTy = Ptr->getType();
+ unsigned AS = Ptr->getType()->getPointerAddressSpace();
+ if (!isLegalInterleavedAccessType(InVTy, Factor, Alignment, AS, DL))
return false;
- Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen());
-
- if (auto *FVTy = dyn_cast<FixedVectorType>(InVTy)) {
+ if (isa<FixedVectorType>(InVTy)) {
Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
- SI->getModule(), FixedVssegIntrIds[Factor - 2], {InVTy, PtrTy, XLenTy});
-
+ Store->getModule(), FixedVssegIntrIds[Factor - 2],
+ {InVTy, PtrTy, XLenTy});
SmallVector<Value *, 10> Ops(InterleaveValues);
- Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements());
- Value *Mask = Builder.getAllOnesMask(FVTy->getElementCount());
- Ops.append({SI->getPointerOperand(), Mask, VL});
-
+ Ops.append({Ptr, Mask, VL});
Builder.CreateCall(VssegNFunc, Ops);
- } else {
- static const Intrinsic::ID IntrIds[] = {
- Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3,
- Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5,
- Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7,
- Intrinsic::riscv_vsseg8};
-
- unsigned SEW = DL.getTypeSizeInBits(InVTy->getElementType());
- unsigned NumElts = InVTy->getElementCount().getKnownMinValue();
- Type *VecTupTy = TargetExtType::get(
- SI->getContext(), "riscv.vector.tuple",
- ScalableVectorType::get(Type::getInt8Ty(SI->getContext()),
- NumElts * SEW / 8),
- Factor);
-
- Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
- SI->getModule(), IntrIds[Factor - 2], {VecTupTy, PtrTy, XLenTy});
-
- Value *VL = Constant::getAllOnesValue(XLenTy);
-
- Value *StoredVal = PoisonValue::get(VecTupTy);
- for (unsigned i = 0; i < Factor; ++i)
- StoredVal = Builder.CreateIntrinsic(
- Intrinsic::riscv_tuple_insert, {VecTupTy, InVTy},
- {StoredVal, InterleaveValues[i], Builder.getInt32(i)});
-
- Builder.CreateCall(VssegNFunc, {StoredVal, SI->getPointerOperand(), VL,
- ConstantInt::get(XLenTy, Log2_64(SEW))});
- }
-
- return true;
-}
-
-static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) {
- assert(N);
- if (N == 1)
- return true;
-
- using namespace PatternMatch;
- // Right now we're only recognizing the simplest pattern.
- uint64_t C;
- if (match(V, m_CombineOr(m_ConstantInt(C),
- m_c_Mul(m_Value(), m_ConstantInt(C)))) &&
- C && C % N == 0)
return true;
-
- if (isPowerOf2_32(N)) {
- KnownBits KB = llvm::computeKnownBits(V, DL);
- return KB.countMinTrailingZeros() >= Log2_32(N);
}
+ unsigned SEW = DL.getTypeSizeInBits(InVTy->getElementType());
+ unsigned NumElts = InVTy->getElementCount().getKnownMinValue();
+ Type *VecTupTy = TargetExtType::get(
+ Store->getContext(), "riscv.vector.tuple",
+ ScalableVectorType::get(Type::getInt8Ty(Store->getContext()),
+ NumElts * SEW / 8),
+ Factor);
- return false;
-}
-
-/// Lower an interleaved vp.load into a vlsegN intrinsic.
-///
-/// E.g. Lower an interleaved vp.load (Factor = 2):
-/// %l = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr %ptr,
-/// %mask,
-/// i32 %wide.rvl)
-/// %dl = tail call { <vscale x 32 x i8>, <vscale x 32 x i8> }
-/// @llvm.vector.deinterleave2.nxv64i8(
-/// <vscale x 64 x i8> %l)
-/// %r0 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %dl, 0
-/// %r1 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %dl, 1
-///
-/// Into:
-/// %rvl = udiv %wide.rvl, 2
-/// %sl = call { <vscale x 32 x i8>, <vscale x 32 x i8> }
-/// @llvm.riscv.vlseg2.mask.nxv32i8.i64(<vscale x 32 x i8> undef,
-/// <vscale x 32 x i8> undef,
-/// ptr %ptr,
-/// %mask,
-/// i64 %rvl,
-/// i64 1)
-/// %r0 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %sl, 0
-/// %r1 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %sl, 1
-///
-/// NOTE: the deinterleave2 intrinsic won't be touched and is expected to be
-/// removed by the caller
-/// TODO: We probably can loosen the dependency on matching extractvalue when
-/// dealing with factor of 2 (extractvalue is still required for most of other
-/// factors though).
-bool RISCVTargetLowering::lowerInterleavedVPLoad(
- VPIntrinsic *Load, Value *Mask,
- ArrayRef<Value *> DeinterleaveResults) const {
- const unsigned Factor = DeinterleaveResults.size();
- assert(Mask && "Expect a valid mask");
- assert(Load->getIntrinsicID() == Intrinsic::vp_load &&
- "Unexpected intrinsic");
-
- Value *FirstActive = *llvm::find_if(DeinterleaveResults,
- [](Value *V) { return V != nullptr; });
- VectorType *VTy = cast<VectorType>(FirstActive->getType());
-
- auto &DL = Load->getModule()->getDataLayout();
- Align Alignment = Load->getParamAlign(0).value_or(
- DL.getABITypeAlign(VTy->getElementType()));
- if (!isLegalInterleavedAccessType(
- VTy, Factor, Alignment,
- Load->getArgOperand(0)->getType()->getPointerAddressSpace(), DL))
- return false;
-
- IRBuilder<> Builder(Load);
-
- Value *WideEVL = Load->getVectorLengthParam();
- // Conservatively check if EVL is a multiple of factor, otherwise some
- // (trailing) elements might be lost after the transformation.
- if (!isMultipleOfN(WideEVL, Load->getDataLayout(), Factor))
- return false;
-
- auto *PtrTy = Load->getArgOperand(0)->getType();
- auto *XLenTy = Type::getIntNTy(Load->getContext(), Subtarget.getXLen());
- Value *EVL = Builder.CreateZExt(
- Builder.CreateUDiv(WideEVL, ConstantInt::get(WideEVL->getType(), Factor)),
- XLenTy);
-
- Value *Return = nullptr;
- if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
- Return = Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2],
- {FVTy, PtrTy, XLenTy},
- {Load->getArgOperand(0), Mask, EVL});
- } else {
- unsigned SEW = DL.getTypeSizeInBits(VTy->getElementType());
- unsigned NumElts = VTy->getElementCount().getKnownMinValue();
- Type *VecTupTy = TargetExtType::get(
- Load->getContext(), "riscv.vector.tuple",
- ScalableVectorType::get(Type::getInt8Ty(Load->getContext()),
- NumElts * SEW / 8),
- Factor);
-
- Value *PoisonVal = PoisonValue::get(VecTupTy);
-
- Function *VlsegNFunc = Intrinsic::getOrInsertDeclaration(
- Load->getModule(), ScalableVlsegIntrIds[Factor - 2],
- {VecTupTy, PtrTy, Mask->getType(), EVL->getType()});
-
- Value *Operands[] = {
- PoisonVal,
- Load->getArgOperand(0),
- Mask,
- EVL,
- ConstantInt::get(XLenTy,
- RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC),
- ConstantInt::get(XLenTy, Log2_64(SEW))};
-
- CallInst *VlsegN = Builder.CreateCall(VlsegNFunc, Operands);
-
- SmallVector<Type *, 8> AggrTypes{Factor, VTy};
- Return = PoisonValue::get(StructType::get(Load->getContext(), AggrTypes));
- Function *VecExtractFunc = Intrinsic::getOrInsertDeclaration(
- Load->getModule(), Intrinsic::riscv_tuple_extract, {VTy, VecTupTy});
- for (unsigned i = 0; i < Factor; ++i) {
- Value *VecExtract =
- Builder.CreateCall(VecExtractFunc, {VlsegN, Builder.getInt32(i)});
- Return = Builder.CreateInsertValue(Return, VecExtract, i);
- }
- }
+ Value *StoredVal = PoisonValue::get(VecTupTy);
+ for (unsigned i = 0; i < Factor; ++i)
+ StoredVal = Builder.CreateIntrinsic(
+ Intrinsic::riscv_tuple_insert, {VecTupTy, InVTy},
+ {StoredVal, InterleaveValues[i], Builder.getInt32(i)});
- for (auto [Idx, DIO] : enumerate(DeinterleaveResults)) {
- if (!DIO)
- continue;
- // We have to create a brand new ExtractValue to replace each
- // of these old ExtractValue instructions.
- Value *NewEV =
- Builder.CreateExtractValue(Return, {static_cast<unsigned>(Idx)});
- DIO->replaceAllUsesWith(NewEV);
- }
+ Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
+ Store->getModule(), ScalableVssegIntrIds[Factor - 2],
+ {VecTupTy, PtrTy, Mask->getType(), VL->getType()});
+ Value *Operands[] = {StoredVal, Ptr, Mask, VL,
+ ConstantInt::get(XLenTy, Log2_64(SEW))};
+ Builder.CreateCall(VssegNFunc, Operands);
return true;
}
@@ -557,15 +475,15 @@ bool RISCVTargetLowering::lowerInterleavedVPStore(
auto *PtrTy = Store->getArgOperand(1)->getType();
auto *XLenTy = Type::getIntNTy(Store->getContext(), Subtarget.getXLen());
- Value *EVL = Builder.CreateZExt(
- Builder.CreateUDiv(WideEVL, ConstantInt::get(WideEVL->getType(), Factor)),
- XLenTy);
+ auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor);
+ Value *EVL =
+ Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy);
- if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) {
+ if (isa<FixedVectorType>(VTy)) {
SmallVector<Value *, 8> Operands(InterleaveOperands);
Operands.append({Store->getArgOperand(1), Mask, EVL});
Builder.CreateIntrinsic(FixedVssegIntrIds[Factor - 2],
- {FVTy, PtrTy, XLenTy}, Operands);
+ {VTy, PtrTy, XLenTy}, Operands);
return true;
}