diff options
Diffstat (limited to 'llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp')
| -rw-r--r-- | llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp | 436 |
1 files changed, 177 insertions, 259 deletions
diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp index a6ff22c4b391..dd68a5556cdb 100644 --- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp +++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp @@ -14,6 +14,7 @@ #include "RISCVISelLowering.h" #include "RISCVSubtarget.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/VectorUtils.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" @@ -68,6 +69,89 @@ static const Intrinsic::ID ScalableVlsegIntrIds[] = { Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask, Intrinsic::riscv_vlseg8_mask}; +static const Intrinsic::ID FixedVssegIntrIds[] = { + Intrinsic::riscv_seg2_store_mask, Intrinsic::riscv_seg3_store_mask, + Intrinsic::riscv_seg4_store_mask, Intrinsic::riscv_seg5_store_mask, + Intrinsic::riscv_seg6_store_mask, Intrinsic::riscv_seg7_store_mask, + Intrinsic::riscv_seg8_store_mask}; + +static const Intrinsic::ID ScalableVssegIntrIds[] = { + Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask, + Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask, + Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask, + Intrinsic::riscv_vsseg8_mask}; + +static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) { + assert(N); + if (N == 1) + return true; + + using namespace PatternMatch; + // Right now we're only recognizing the simplest pattern. + uint64_t C; + if (match(V, m_CombineOr(m_ConstantInt(C), + m_NUWMul(m_Value(), m_ConstantInt(C)))) && + C && C % N == 0) + return true; + + if (isPowerOf2_32(N)) { + KnownBits KB = llvm::computeKnownBits(V, DL); + return KB.countMinTrailingZeros() >= Log2_32(N); + } + + return false; +} + +/// Do the common operand retrieval and validition required by the +/// routines below. +static bool getMemOperands(unsigned Factor, VectorType *VTy, Type *XLenTy, + Instruction *I, Value *&Ptr, Value *&Mask, + Value *&VL, Align &Alignment) { + + IRBuilder<> Builder(I); + const DataLayout &DL = I->getDataLayout(); + ElementCount EC = VTy->getElementCount(); + if (auto *LI = dyn_cast<LoadInst>(I)) { + assert(LI->isSimple()); + Ptr = LI->getPointerOperand(); + Alignment = LI->getAlign(); + assert(!Mask && "Unexpected mask on a load"); + Mask = Builder.getAllOnesMask(EC); + VL = isa<FixedVectorType>(VTy) ? Builder.CreateElementCount(XLenTy, EC) + : Constant::getAllOnesValue(XLenTy); + return true; + } + if (auto *SI = dyn_cast<StoreInst>(I)) { + assert(SI->isSimple()); + Ptr = SI->getPointerOperand(); + Alignment = SI->getAlign(); + assert(!Mask && "Unexpected mask on a store"); + Mask = Builder.getAllOnesMask(EC); + VL = isa<FixedVectorType>(VTy) ? Builder.CreateElementCount(XLenTy, EC) + : Constant::getAllOnesValue(XLenTy); + return true; + } + auto *VPLdSt = cast<VPIntrinsic>(I); + assert((VPLdSt->getIntrinsicID() == Intrinsic::vp_load || + VPLdSt->getIntrinsicID() == Intrinsic::vp_store) && + "Unexpected intrinsic"); + Ptr = VPLdSt->getMemoryPointerParam(); + Alignment = VPLdSt->getPointerAlignment().value_or( + DL.getABITypeAlign(VTy->getElementType())); + + assert(Mask && "vp.load and vp.store needs a mask!"); + + Value *WideEVL = VPLdSt->getVectorLengthParam(); + // Conservatively check if EVL is a multiple of factor, otherwise some + // (trailing) elements might be lost after the transformation. + if (!isMultipleOfN(WideEVL, I->getDataLayout(), Factor)) + return false; + + auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor); + VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy); + return true; +} + /// Lower an interleaved load into a vlsegN intrinsic. /// /// E.g. Lower an interleaved load (Factor = 2): @@ -81,21 +165,25 @@ static const Intrinsic::ID ScalableVlsegIntrIds[] = { /// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0 /// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1 bool RISCVTargetLowering::lowerInterleavedLoad( - LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles, + Instruction *Load, Value *Mask, ArrayRef<ShuffleVectorInst *> Shuffles, ArrayRef<unsigned> Indices, unsigned Factor) const { assert(Indices.size() == Shuffles.size()); - IRBuilder<> Builder(LI); - - const DataLayout &DL = LI->getDataLayout(); + IRBuilder<> Builder(Load); + const DataLayout &DL = Load->getDataLayout(); auto *VTy = cast<FixedVectorType>(Shuffles[0]->getType()); - if (!isLegalInterleavedAccessType(VTy, Factor, LI->getAlign(), - LI->getPointerAddressSpace(), DL)) + auto *XLenTy = Type::getIntNTy(Load->getContext(), Subtarget.getXLen()); + + Value *Ptr, *VL; + Align Alignment; + if (!getMemOperands(Factor, VTy, XLenTy, Load, Ptr, Mask, VL, Alignment)) return false; - auto *PtrTy = LI->getPointerOperandType(); - auto *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen()); + Type *PtrTy = Ptr->getType(); + unsigned AS = PtrTy->getPointerAddressSpace(); + if (!isLegalInterleavedAccessType(VTy, Factor, Alignment, AS, DL)) + return false; // If the segment load is going to be performed segment at a time anyways // and there's only one element used, use a strided load instead. This @@ -104,25 +192,23 @@ bool RISCVTargetLowering::lowerInterleavedLoad( unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType()); Value *Stride = ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes); Value *Offset = ConstantInt::get(XLenTy, Indices[0] * ScalarSizeInBytes); - Value *BasePtr = Builder.CreatePtrAdd(LI->getPointerOperand(), Offset); - Value *Mask = Builder.getAllOnesMask(VTy->getElementCount()); - Value *VL = Builder.getInt32(VTy->getNumElements()); - + Value *BasePtr = Builder.CreatePtrAdd(Ptr, Offset); + // Note: Same VL as above, but i32 not xlen due to signature of + // vp.strided.load + VL = Builder.CreateElementCount(Builder.getInt32Ty(), + VTy->getElementCount()); CallInst *CI = Builder.CreateIntrinsic(Intrinsic::experimental_vp_strided_load, {VTy, BasePtr->getType(), Stride->getType()}, {BasePtr, Stride, Mask, VL}); - CI->addParamAttr( - 0, Attribute::getWithAlignment(CI->getContext(), LI->getAlign())); + CI->addParamAttr(0, + Attribute::getWithAlignment(CI->getContext(), Alignment)); Shuffles[0]->replaceAllUsesWith(CI); return true; }; - Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements()); - Value *Mask = Builder.getAllOnesMask(VTy->getElementCount()); CallInst *VlsegN = Builder.CreateIntrinsic( - FixedVlsegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy}, - {LI->getPointerOperand(), Mask, VL}); + FixedVlsegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy}, {Ptr, Mask, VL}); for (unsigned i = 0; i < Shuffles.size(); i++) { Value *SubVec = Builder.CreateExtractValue(VlsegN, Indices[i]); @@ -132,18 +218,6 @@ bool RISCVTargetLowering::lowerInterleavedLoad( return true; } -static const Intrinsic::ID FixedVssegIntrIds[] = { - Intrinsic::riscv_seg2_store_mask, Intrinsic::riscv_seg3_store_mask, - Intrinsic::riscv_seg4_store_mask, Intrinsic::riscv_seg5_store_mask, - Intrinsic::riscv_seg6_store_mask, Intrinsic::riscv_seg7_store_mask, - Intrinsic::riscv_seg8_store_mask}; - -static const Intrinsic::ID ScalableVssegIntrIds[] = { - Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask, - Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask, - Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask, - Intrinsic::riscv_vsseg8_mask}; - /// Lower an interleaved store into a vssegN intrinsic. /// /// E.g. Lower an interleaved store (Factor = 3): @@ -191,7 +265,8 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI, Value *Offset = ConstantInt::get(XLenTy, Index * ScalarSizeInBytes); Value *BasePtr = Builder.CreatePtrAdd(SI->getPointerOperand(), Offset); Value *Mask = Builder.getAllOnesMask(DataVTy->getElementCount()); - Value *VL = Builder.getInt32(VTy->getNumElements()); + Value *VL = Builder.CreateElementCount(Builder.getInt32Ty(), + VTy->getElementCount()); CallInst *CI = Builder.CreateIntrinsic( Intrinsic::experimental_vp_strided_store, @@ -223,7 +298,7 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI, // This VL should be OK (should be executable in one vsseg instruction, // potentially under larger LMULs) because we checked that the fixed vector // type fits in isLegalInterleavedAccessType - Value *VL = ConstantInt::get(XLenTy, VTy->getNumElements()); + Value *VL = Builder.CreateElementCount(XLenTy, VTy->getElementCount()); Value *StoreMask = Builder.getAllOnesMask(VTy->getElementCount()); Ops.append({SI->getPointerOperand(), StoreMask, VL}); @@ -233,58 +308,57 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI, } bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad( - LoadInst *LI, ArrayRef<Value *> DeinterleaveValues) const { - const unsigned Factor = DeinterleaveValues.size(); + Instruction *Load, Value *Mask, IntrinsicInst *DI) const { + const unsigned Factor = getDeinterleaveIntrinsicFactor(DI->getIntrinsicID()); if (Factor > 8) return false; - assert(LI->isSimple()); - IRBuilder<> Builder(LI); + IRBuilder<> Builder(Load); - Value *FirstActive = - *llvm::find_if(DeinterleaveValues, [](Value *V) { return V != nullptr; }); - VectorType *ResVTy = cast<VectorType>(FirstActive->getType()); + VectorType *ResVTy = getDeinterleavedVectorType(DI); - const DataLayout &DL = LI->getDataLayout(); + const DataLayout &DL = Load->getDataLayout(); + auto *XLenTy = Type::getIntNTy(Load->getContext(), Subtarget.getXLen()); - if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(), - LI->getPointerAddressSpace(), DL)) + Value *Ptr, *VL; + Align Alignment; + if (!getMemOperands(Factor, ResVTy, XLenTy, Load, Ptr, Mask, VL, Alignment)) return false; - Value *Return; - Type *PtrTy = LI->getPointerOperandType(); - Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen()); + Type *PtrTy = Ptr->getType(); + unsigned AS = PtrTy->getPointerAddressSpace(); + if (!isLegalInterleavedAccessType(ResVTy, Factor, Alignment, AS, DL)) + return false; - if (auto *FVTy = dyn_cast<FixedVectorType>(ResVTy)) { - Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements()); - Value *Mask = Builder.getAllOnesMask(FVTy->getElementCount()); + Value *Return; + if (isa<FixedVectorType>(ResVTy)) { Return = Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2], - {ResVTy, PtrTy, XLenTy}, - {LI->getPointerOperand(), Mask, VL}); + {ResVTy, PtrTy, XLenTy}, {Ptr, Mask, VL}); } else { - static const Intrinsic::ID IntrIds[] = { - Intrinsic::riscv_vlseg2, Intrinsic::riscv_vlseg3, - Intrinsic::riscv_vlseg4, Intrinsic::riscv_vlseg5, - Intrinsic::riscv_vlseg6, Intrinsic::riscv_vlseg7, - Intrinsic::riscv_vlseg8}; - unsigned SEW = DL.getTypeSizeInBits(ResVTy->getElementType()); unsigned NumElts = ResVTy->getElementCount().getKnownMinValue(); Type *VecTupTy = TargetExtType::get( - LI->getContext(), "riscv.vector.tuple", - ScalableVectorType::get(Type::getInt8Ty(LI->getContext()), + Load->getContext(), "riscv.vector.tuple", + ScalableVectorType::get(Type::getInt8Ty(Load->getContext()), NumElts * SEW / 8), Factor); + Function *VlsegNFunc = Intrinsic::getOrInsertDeclaration( + Load->getModule(), ScalableVlsegIntrIds[Factor - 2], + {VecTupTy, PtrTy, Mask->getType(), VL->getType()}); - Value *VL = Constant::getAllOnesValue(XLenTy); + Value *Operands[] = { + PoisonValue::get(VecTupTy), + Ptr, + Mask, + VL, + ConstantInt::get(XLenTy, + RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC), + ConstantInt::get(XLenTy, Log2_64(SEW))}; - Value *Vlseg = Builder.CreateIntrinsic( - IntrIds[Factor - 2], {VecTupTy, PtrTy, XLenTy}, - {PoisonValue::get(VecTupTy), LI->getPointerOperand(), VL, - ConstantInt::get(XLenTy, Log2_64(SEW))}); + CallInst *Vlseg = Builder.CreateCall(VlsegNFunc, Operands); SmallVector<Type *, 2> AggrTypes{Factor, ResVTy}; - Return = PoisonValue::get(StructType::get(LI->getContext(), AggrTypes)); + Return = PoisonValue::get(StructType::get(Load->getContext(), AggrTypes)); for (unsigned i = 0; i < Factor; ++i) { Value *VecExtract = Builder.CreateIntrinsic( Intrinsic::riscv_tuple_extract, {ResVTy, VecTupTy}, @@ -293,217 +367,61 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad( } } - for (auto [Idx, DIV] : enumerate(DeinterleaveValues)) { - if (!DIV) - continue; - // We have to create a brand new ExtractValue to replace each - // of these old ExtractValue instructions. - Value *NewEV = - Builder.CreateExtractValue(Return, {static_cast<unsigned>(Idx)}); - DIV->replaceAllUsesWith(NewEV); - } - + DI->replaceAllUsesWith(Return); return true; } bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore( - StoreInst *SI, ArrayRef<Value *> InterleaveValues) const { + Instruction *Store, Value *Mask, ArrayRef<Value *> InterleaveValues) const { unsigned Factor = InterleaveValues.size(); if (Factor > 8) return false; - assert(SI->isSimple()); - IRBuilder<> Builder(SI); + IRBuilder<> Builder(Store); auto *InVTy = cast<VectorType>(InterleaveValues[0]->getType()); - auto *PtrTy = SI->getPointerOperandType(); - const DataLayout &DL = SI->getDataLayout(); + const DataLayout &DL = Store->getDataLayout(); + Type *XLenTy = Type::getIntNTy(Store->getContext(), Subtarget.getXLen()); - if (!isLegalInterleavedAccessType(InVTy, Factor, SI->getAlign(), - SI->getPointerAddressSpace(), DL)) + Value *Ptr, *VL; + Align Alignment; + if (!getMemOperands(Factor, InVTy, XLenTy, Store, Ptr, Mask, VL, Alignment)) + return false; + Type *PtrTy = Ptr->getType(); + unsigned AS = Ptr->getType()->getPointerAddressSpace(); + if (!isLegalInterleavedAccessType(InVTy, Factor, Alignment, AS, DL)) return false; - Type *XLenTy = Type::getIntNTy(SI->getContext(), Subtarget.getXLen()); - - if (auto *FVTy = dyn_cast<FixedVectorType>(InVTy)) { + if (isa<FixedVectorType>(InVTy)) { Function *VssegNFunc = Intrinsic::getOrInsertDeclaration( - SI->getModule(), FixedVssegIntrIds[Factor - 2], {InVTy, PtrTy, XLenTy}); - + Store->getModule(), FixedVssegIntrIds[Factor - 2], + {InVTy, PtrTy, XLenTy}); SmallVector<Value *, 10> Ops(InterleaveValues); - Value *VL = ConstantInt::get(XLenTy, FVTy->getNumElements()); - Value *Mask = Builder.getAllOnesMask(FVTy->getElementCount()); - Ops.append({SI->getPointerOperand(), Mask, VL}); - + Ops.append({Ptr, Mask, VL}); Builder.CreateCall(VssegNFunc, Ops); - } else { - static const Intrinsic::ID IntrIds[] = { - Intrinsic::riscv_vsseg2, Intrinsic::riscv_vsseg3, - Intrinsic::riscv_vsseg4, Intrinsic::riscv_vsseg5, - Intrinsic::riscv_vsseg6, Intrinsic::riscv_vsseg7, - Intrinsic::riscv_vsseg8}; - - unsigned SEW = DL.getTypeSizeInBits(InVTy->getElementType()); - unsigned NumElts = InVTy->getElementCount().getKnownMinValue(); - Type *VecTupTy = TargetExtType::get( - SI->getContext(), "riscv.vector.tuple", - ScalableVectorType::get(Type::getInt8Ty(SI->getContext()), - NumElts * SEW / 8), - Factor); - - Function *VssegNFunc = Intrinsic::getOrInsertDeclaration( - SI->getModule(), IntrIds[Factor - 2], {VecTupTy, PtrTy, XLenTy}); - - Value *VL = Constant::getAllOnesValue(XLenTy); - - Value *StoredVal = PoisonValue::get(VecTupTy); - for (unsigned i = 0; i < Factor; ++i) - StoredVal = Builder.CreateIntrinsic( - Intrinsic::riscv_tuple_insert, {VecTupTy, InVTy}, - {StoredVal, InterleaveValues[i], Builder.getInt32(i)}); - - Builder.CreateCall(VssegNFunc, {StoredVal, SI->getPointerOperand(), VL, - ConstantInt::get(XLenTy, Log2_64(SEW))}); - } - - return true; -} - -static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) { - assert(N); - if (N == 1) - return true; - - using namespace PatternMatch; - // Right now we're only recognizing the simplest pattern. - uint64_t C; - if (match(V, m_CombineOr(m_ConstantInt(C), - m_c_Mul(m_Value(), m_ConstantInt(C)))) && - C && C % N == 0) return true; - - if (isPowerOf2_32(N)) { - KnownBits KB = llvm::computeKnownBits(V, DL); - return KB.countMinTrailingZeros() >= Log2_32(N); } + unsigned SEW = DL.getTypeSizeInBits(InVTy->getElementType()); + unsigned NumElts = InVTy->getElementCount().getKnownMinValue(); + Type *VecTupTy = TargetExtType::get( + Store->getContext(), "riscv.vector.tuple", + ScalableVectorType::get(Type::getInt8Ty(Store->getContext()), + NumElts * SEW / 8), + Factor); - return false; -} - -/// Lower an interleaved vp.load into a vlsegN intrinsic. -/// -/// E.g. Lower an interleaved vp.load (Factor = 2): -/// %l = call <vscale x 64 x i8> @llvm.vp.load.nxv64i8.p0(ptr %ptr, -/// %mask, -/// i32 %wide.rvl) -/// %dl = tail call { <vscale x 32 x i8>, <vscale x 32 x i8> } -/// @llvm.vector.deinterleave2.nxv64i8( -/// <vscale x 64 x i8> %l) -/// %r0 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %dl, 0 -/// %r1 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %dl, 1 -/// -/// Into: -/// %rvl = udiv %wide.rvl, 2 -/// %sl = call { <vscale x 32 x i8>, <vscale x 32 x i8> } -/// @llvm.riscv.vlseg2.mask.nxv32i8.i64(<vscale x 32 x i8> undef, -/// <vscale x 32 x i8> undef, -/// ptr %ptr, -/// %mask, -/// i64 %rvl, -/// i64 1) -/// %r0 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %sl, 0 -/// %r1 = extractvalue { <vscale x 32 x i8>, <vscale x 32 x i8> } %sl, 1 -/// -/// NOTE: the deinterleave2 intrinsic won't be touched and is expected to be -/// removed by the caller -/// TODO: We probably can loosen the dependency on matching extractvalue when -/// dealing with factor of 2 (extractvalue is still required for most of other -/// factors though). -bool RISCVTargetLowering::lowerInterleavedVPLoad( - VPIntrinsic *Load, Value *Mask, - ArrayRef<Value *> DeinterleaveResults) const { - const unsigned Factor = DeinterleaveResults.size(); - assert(Mask && "Expect a valid mask"); - assert(Load->getIntrinsicID() == Intrinsic::vp_load && - "Unexpected intrinsic"); - - Value *FirstActive = *llvm::find_if(DeinterleaveResults, - [](Value *V) { return V != nullptr; }); - VectorType *VTy = cast<VectorType>(FirstActive->getType()); - - auto &DL = Load->getModule()->getDataLayout(); - Align Alignment = Load->getParamAlign(0).value_or( - DL.getABITypeAlign(VTy->getElementType())); - if (!isLegalInterleavedAccessType( - VTy, Factor, Alignment, - Load->getArgOperand(0)->getType()->getPointerAddressSpace(), DL)) - return false; - - IRBuilder<> Builder(Load); - - Value *WideEVL = Load->getVectorLengthParam(); - // Conservatively check if EVL is a multiple of factor, otherwise some - // (trailing) elements might be lost after the transformation. - if (!isMultipleOfN(WideEVL, Load->getDataLayout(), Factor)) - return false; - - auto *PtrTy = Load->getArgOperand(0)->getType(); - auto *XLenTy = Type::getIntNTy(Load->getContext(), Subtarget.getXLen()); - Value *EVL = Builder.CreateZExt( - Builder.CreateUDiv(WideEVL, ConstantInt::get(WideEVL->getType(), Factor)), - XLenTy); - - Value *Return = nullptr; - if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) { - Return = Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2], - {FVTy, PtrTy, XLenTy}, - {Load->getArgOperand(0), Mask, EVL}); - } else { - unsigned SEW = DL.getTypeSizeInBits(VTy->getElementType()); - unsigned NumElts = VTy->getElementCount().getKnownMinValue(); - Type *VecTupTy = TargetExtType::get( - Load->getContext(), "riscv.vector.tuple", - ScalableVectorType::get(Type::getInt8Ty(Load->getContext()), - NumElts * SEW / 8), - Factor); - - Value *PoisonVal = PoisonValue::get(VecTupTy); - - Function *VlsegNFunc = Intrinsic::getOrInsertDeclaration( - Load->getModule(), ScalableVlsegIntrIds[Factor - 2], - {VecTupTy, PtrTy, Mask->getType(), EVL->getType()}); - - Value *Operands[] = { - PoisonVal, - Load->getArgOperand(0), - Mask, - EVL, - ConstantInt::get(XLenTy, - RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC), - ConstantInt::get(XLenTy, Log2_64(SEW))}; - - CallInst *VlsegN = Builder.CreateCall(VlsegNFunc, Operands); - - SmallVector<Type *, 8> AggrTypes{Factor, VTy}; - Return = PoisonValue::get(StructType::get(Load->getContext(), AggrTypes)); - Function *VecExtractFunc = Intrinsic::getOrInsertDeclaration( - Load->getModule(), Intrinsic::riscv_tuple_extract, {VTy, VecTupTy}); - for (unsigned i = 0; i < Factor; ++i) { - Value *VecExtract = - Builder.CreateCall(VecExtractFunc, {VlsegN, Builder.getInt32(i)}); - Return = Builder.CreateInsertValue(Return, VecExtract, i); - } - } + Value *StoredVal = PoisonValue::get(VecTupTy); + for (unsigned i = 0; i < Factor; ++i) + StoredVal = Builder.CreateIntrinsic( + Intrinsic::riscv_tuple_insert, {VecTupTy, InVTy}, + {StoredVal, InterleaveValues[i], Builder.getInt32(i)}); - for (auto [Idx, DIO] : enumerate(DeinterleaveResults)) { - if (!DIO) - continue; - // We have to create a brand new ExtractValue to replace each - // of these old ExtractValue instructions. - Value *NewEV = - Builder.CreateExtractValue(Return, {static_cast<unsigned>(Idx)}); - DIO->replaceAllUsesWith(NewEV); - } + Function *VssegNFunc = Intrinsic::getOrInsertDeclaration( + Store->getModule(), ScalableVssegIntrIds[Factor - 2], + {VecTupTy, PtrTy, Mask->getType(), VL->getType()}); + Value *Operands[] = {StoredVal, Ptr, Mask, VL, + ConstantInt::get(XLenTy, Log2_64(SEW))}; + Builder.CreateCall(VssegNFunc, Operands); return true; } @@ -557,15 +475,15 @@ bool RISCVTargetLowering::lowerInterleavedVPStore( auto *PtrTy = Store->getArgOperand(1)->getType(); auto *XLenTy = Type::getIntNTy(Store->getContext(), Subtarget.getXLen()); - Value *EVL = Builder.CreateZExt( - Builder.CreateUDiv(WideEVL, ConstantInt::get(WideEVL->getType(), Factor)), - XLenTy); + auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor); + Value *EVL = + Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy); - if (auto *FVTy = dyn_cast<FixedVectorType>(VTy)) { + if (isa<FixedVectorType>(VTy)) { SmallVector<Value *, 8> Operands(InterleaveOperands); Operands.append({Store->getArgOperand(1), Mask, EVL}); Builder.CreateIntrinsic(FixedVssegIntrIds[Factor - 2], - {FVTy, PtrTy, XLenTy}, Operands); + {VTy, PtrTy, XLenTy}, Operands); return true; } |
