summaryrefslogtreecommitdiff
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Analysis/TargetTransformInfo.cpp9
-rw-r--r--llvm/lib/CodeGen/Analysis.cpp52
-rw-r--r--llvm/lib/CodeGen/BranchFolding.cpp12
-rw-r--r--llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp17
-rw-r--r--llvm/lib/CodeGen/PHIElimination.cpp22
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp29
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h4
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp36
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h1
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp123
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp60
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp2
-rw-r--r--llvm/lib/CodeGen/TargetLoweringBase.cpp12
-rw-r--r--llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp93
-rw-r--r--llvm/lib/IR/Core.cpp10
-rw-r--r--llvm/lib/IR/DebugInfo.cpp19
-rw-r--r--llvm/lib/IR/RuntimeLibcalls.cpp66
-rw-r--r--llvm/lib/MCA/Instruction.cpp7
-rw-r--r--llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp2
-rw-r--r--llvm/lib/ProfileData/InstrProf.cpp4
-rw-r--r--llvm/lib/Support/APInt.cpp16
-rw-r--r--llvm/lib/Support/MemoryBuffer.cpp10
-rw-r--r--llvm/lib/Target/AArch64/AArch64FrameLowering.cpp16
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp47
-rw-r--r--llvm/lib/Target/AArch64/AArch64InstrInfo.cpp2
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp21
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h5
-rw-r--r--llvm/lib/Target/AArch64/SMEABIPass.cpp31
-rw-r--r--llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp41
-rw-r--r--llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h18
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPU.h4
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def1
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp119
-rw-r--r--llvm/lib/Target/AMDGPU/CMakeLists.txt1
-rw-r--r--llvm/lib/Target/AMDGPU/SIDefines.h6
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td1
-rw-r--r--llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp56
-rw-r--r--llvm/lib/Target/AMDGPU/SOPInstructions.td4
-rw-r--r--llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp4
-rw-r--r--llvm/lib/Target/ARM/ARMISelLowering.cpp169
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp42
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.h5
-rw-r--r--llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp4
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp7
-rw-r--r--llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h5
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp8
-rw-r--r--llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td18
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp7
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h1
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp33
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXISelLowering.h4
-rw-r--r--llvm/lib/Target/NVPTX/NVPTXInstrInfo.td5
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp47
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h1
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp4
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td14
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoP.td92
-rw-r--r--llvm/lib/Target/RISCV/RISCVSchedAndes45.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp22
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp12
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp9
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.h5
-rw-r--r--llvm/lib/Transforms/Coroutines/CoroSplit.cpp4
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp32
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineInternal.h1
-rw-r--r--llvm/lib/Transforms/Scalar/LICM.cpp2
-rw-r--r--llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp4
-rw-r--r--llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp85
-rw-r--r--llvm/lib/Transforms/Vectorize/LoopVectorize.cpp106
-rw-r--r--llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp33
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h207
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp3
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp12
-rw-r--r--llvm/lib/Transforms/Vectorize/VPlanTransforms.h10
-rw-r--r--llvm/lib/Transforms/Vectorize/VectorCombine.cpp4
78 files changed, 1052 insertions, 965 deletions
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 4f04209cf4cf..3141060a710c 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1230,10 +1230,11 @@ unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
return TTIImpl->getNumberOfParts(Tp);
}
-InstructionCost
-TargetTransformInfo::getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
- const SCEV *Ptr) const {
- InstructionCost Cost = TTIImpl->getAddressComputationCost(PtrTy, SE, Ptr);
+InstructionCost TargetTransformInfo::getAddressComputationCost(
+ Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr,
+ TTI::TargetCostKind CostKind) const {
+ InstructionCost Cost =
+ TTIImpl->getAddressComputationCost(PtrTy, SE, Ptr, CostKind);
assert(Cost >= 0 && "TTI should not produce negative costs!");
return Cost;
}
diff --git a/llvm/lib/CodeGen/Analysis.cpp b/llvm/lib/CodeGen/Analysis.cpp
index e7b9417de8c9..2ef96cc4400f 100644
--- a/llvm/lib/CodeGen/Analysis.cpp
+++ b/llvm/lib/CodeGen/Analysis.cpp
@@ -69,18 +69,10 @@ unsigned llvm::ComputeLinearIndex(Type *Ty,
return CurIndex + 1;
}
-/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
-/// EVTs that represent all the individual underlying
-/// non-aggregate types that comprise it.
-///
-/// If Offsets is non-null, it points to a vector to be filled in
-/// with the in-memory offsets of each of the individual values.
-///
-void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
- Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
- SmallVectorImpl<EVT> *MemVTs,
- SmallVectorImpl<TypeSize> *Offsets,
- TypeSize StartingOffset) {
+void llvm::ComputeValueTypes(const DataLayout &DL, Type *Ty,
+ SmallVectorImpl<Type *> &Types,
+ SmallVectorImpl<TypeSize> *Offsets,
+ TypeSize StartingOffset) {
assert((Ty->isScalableTy() == StartingOffset.isScalable() ||
StartingOffset.isZero()) &&
"Offset/TypeSize mismatch!");
@@ -90,15 +82,13 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
// us to support structs with scalable vectors for operations that don't
// need offsets.
const StructLayout *SL = Offsets ? DL.getStructLayout(STy) : nullptr;
- for (StructType::element_iterator EB = STy->element_begin(),
- EI = EB,
+ for (StructType::element_iterator EB = STy->element_begin(), EI = EB,
EE = STy->element_end();
EI != EE; ++EI) {
// Don't compute the element offset if we didn't get a StructLayout above.
TypeSize EltOffset =
SL ? SL->getElementOffset(EI - EB) : TypeSize::getZero();
- ComputeValueVTs(TLI, DL, *EI, ValueVTs, MemVTs, Offsets,
- StartingOffset + EltOffset);
+ ComputeValueTypes(DL, *EI, Types, Offsets, StartingOffset + EltOffset);
}
return;
}
@@ -107,21 +97,39 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
Type *EltTy = ATy->getElementType();
TypeSize EltSize = DL.getTypeAllocSize(EltTy);
for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
- ComputeValueVTs(TLI, DL, EltTy, ValueVTs, MemVTs, Offsets,
- StartingOffset + i * EltSize);
+ ComputeValueTypes(DL, EltTy, Types, Offsets,
+ StartingOffset + i * EltSize);
return;
}
// Interpret void as zero return values.
if (Ty->isVoidTy())
return;
- // Base case: we can get an EVT for this LLVM IR type.
- ValueVTs.push_back(TLI.getValueType(DL, Ty));
- if (MemVTs)
- MemVTs->push_back(TLI.getMemValueType(DL, Ty));
+ Types.push_back(Ty);
if (Offsets)
Offsets->push_back(StartingOffset);
}
+/// ComputeValueVTs - Given an LLVM IR type, compute a sequence of
+/// EVTs that represent all the individual underlying
+/// non-aggregate types that comprise it.
+///
+/// If Offsets is non-null, it points to a vector to be filled in
+/// with the in-memory offsets of each of the individual values.
+///
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+ Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<EVT> *MemVTs,
+ SmallVectorImpl<TypeSize> *Offsets,
+ TypeSize StartingOffset) {
+ SmallVector<Type *> Types;
+ ComputeValueTypes(DL, Ty, Types, Offsets, StartingOffset);
+ for (Type *Ty : Types) {
+ ValueVTs.push_back(TLI.getValueType(DL, Ty));
+ if (MemVTs)
+ MemVTs->push_back(TLI.getMemValueType(DL, Ty));
+ }
+}
+
void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
SmallVectorImpl<EVT> *MemVTs,
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index dcfd9aad70fc..7292bc2be0df 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -1787,10 +1787,18 @@ ReoptimizeBlock:
// below were performed for EH "FallThrough" blocks. Therefore, even if
// that appears not to be happening anymore, we should assume that it is
// possible and not remove the "!FallThrough()->isEHPad" condition below.
+ //
+ // Similarly, the analyzeBranch call does not consider callbr, which also
+ // introduces the possibility of infinite rotation, as there may be
+ // multiple successors of PrevBB. Thus we check such case by
+ // FallThrough->isInlineAsmBrIndirectTarget().
+ // NOTE: Checking if PrevBB contains callbr is more precise, but much
+ // more expensive.
MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr;
SmallVector<MachineOperand, 4> PrevCond;
- if (FallThrough != MF.end() &&
- !FallThrough->isEHPad() &&
+
+ if (FallThrough != MF.end() && !FallThrough->isEHPad() &&
+ !FallThrough->isInlineAsmBrIndirectTarget() &&
!TII->analyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
PrevBB.isSuccessor(&*FallThrough)) {
MBB->moveAfter(&MF.back());
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index d9d3569affa3..008c18837a52 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -5574,12 +5574,19 @@ LegalizerHelper::fewerElementsBitcast(MachineInstr &MI, unsigned int TypeIdx,
unsigned NewElemCount =
NarrowTy.getSizeInBits() / SrcTy.getScalarSizeInBits();
- LLT SrcNarrowTy = LLT::fixed_vector(NewElemCount, SrcTy.getElementType());
-
- // Split the Src and Dst Reg into smaller registers
SmallVector<Register> SrcVRegs, BitcastVRegs;
- if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
- return UnableToLegalize;
+ if (NewElemCount == 1) {
+ LLT SrcNarrowTy = SrcTy.getElementType();
+
+ auto Unmerge = MIRBuilder.buildUnmerge(SrcNarrowTy, SrcReg);
+ getUnmergeResults(SrcVRegs, *Unmerge);
+ } else {
+ LLT SrcNarrowTy = LLT::fixed_vector(NewElemCount, SrcTy.getElementType());
+
+ // Split the Src and Dst Reg into smaller registers
+ if (extractGCDType(SrcVRegs, DstTy, SrcNarrowTy, SrcReg) != SrcNarrowTy)
+ return UnableToLegalize;
+ }
// Build new smaller bitcast instructions
// Not supporting Leftover types for now but will have to
diff --git a/llvm/lib/CodeGen/PHIElimination.cpp b/llvm/lib/CodeGen/PHIElimination.cpp
index a93a89ecaa96..34a9d5d0e401 100644
--- a/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/llvm/lib/CodeGen/PHIElimination.cpp
@@ -30,6 +30,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
@@ -72,6 +73,7 @@ class PHIEliminationImpl {
LiveIntervals *LIS = nullptr;
MachineLoopInfo *MLI = nullptr;
MachineDominatorTree *MDT = nullptr;
+ MachinePostDominatorTree *PDT = nullptr;
/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
/// in predecessor basic blocks.
@@ -123,17 +125,22 @@ public:
auto *MLIWrapper = P->getAnalysisIfAvailable<MachineLoopInfoWrapperPass>();
auto *MDTWrapper =
P->getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
+ auto *PDTWrapper =
+ P->getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>();
LV = LVWrapper ? &LVWrapper->getLV() : nullptr;
LIS = LISWrapper ? &LISWrapper->getLIS() : nullptr;
MLI = MLIWrapper ? &MLIWrapper->getLI() : nullptr;
MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
+ PDT = PDTWrapper ? &PDTWrapper->getPostDomTree() : nullptr;
}
PHIEliminationImpl(MachineFunction &MF, MachineFunctionAnalysisManager &AM)
: LV(AM.getCachedResult<LiveVariablesAnalysis>(MF)),
LIS(AM.getCachedResult<LiveIntervalsAnalysis>(MF)),
MLI(AM.getCachedResult<MachineLoopAnalysis>(MF)),
- MDT(AM.getCachedResult<MachineDominatorTreeAnalysis>(MF)), MFAM(&AM) {}
+ MDT(AM.getCachedResult<MachineDominatorTreeAnalysis>(MF)),
+ PDT(AM.getCachedResult<MachinePostDominatorTreeAnalysis>(MF)),
+ MFAM(&AM) {}
bool run(MachineFunction &MF);
};
@@ -172,6 +179,7 @@ PHIEliminationPass::run(MachineFunction &MF,
PA.preserve<LiveVariablesAnalysis>();
PA.preserve<SlotIndexesAnalysis>();
PA.preserve<MachineDominatorTreeAnalysis>();
+ PA.preserve<MachinePostDominatorTreeAnalysis>();
PA.preserve<MachineLoopAnalysis>();
return PA;
}
@@ -197,6 +205,7 @@ void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<SlotIndexesWrapperPass>();
AU.addPreserved<LiveIntervalsWrapperPass>();
AU.addPreserved<MachineDominatorTreeWrapperPass>();
+ AU.addPreserved<MachinePostDominatorTreeWrapperPass>();
AU.addPreserved<MachineLoopInfoWrapperPass>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -204,15 +213,8 @@ void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
bool PHIEliminationImpl::run(MachineFunction &MF) {
MRI = &MF.getRegInfo();
- MachineDominatorTree *MDT = nullptr;
- if (P) {
- auto *MDTWrapper =
- P->getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
- MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
- } else {
- MDT = MFAM->getCachedResult<MachineDominatorTreeAnalysis>(MF);
- }
- MachineDomTreeUpdater MDTU(MDT, MachineDomTreeUpdater::UpdateStrategy::Lazy);
+ MachineDomTreeUpdater MDTU(MDT, PDT,
+ MachineDomTreeUpdater::UpdateStrategy::Lazy);
bool Changed = false;
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 763b3868a99c..1a63518ab37a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -81,12 +81,11 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses,
/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
/// implicit physical register output.
-void InstrEmitter::EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone,
- Register SrcReg, VRBaseMapType &VRBaseMap) {
+void InstrEmitter::EmitCopyFromReg(SDValue Op, bool IsClone, Register SrcReg,
+ VRBaseMapType &VRBaseMap) {
Register VRBase;
if (SrcReg.isVirtual()) {
// Just use the input register directly!
- SDValue Op(Node, ResNo);
if (IsClone)
VRBaseMap.erase(Op);
bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
@@ -99,17 +98,15 @@ void InstrEmitter::EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone,
// the CopyToReg'd destination register instead of creating a new vreg.
bool MatchReg = true;
const TargetRegisterClass *UseRC = nullptr;
- MVT VT = Node->getSimpleValueType(ResNo);
+ MVT VT = Op.getSimpleValueType();
// Stick to the preferred register classes for legal types.
if (TLI->isTypeLegal(VT))
- UseRC = TLI->getRegClassFor(VT, Node->isDivergent());
+ UseRC = TLI->getRegClassFor(VT, Op->isDivergent());
- for (SDNode *User : Node->users()) {
+ for (SDNode *User : Op->users()) {
bool Match = true;
- if (User->getOpcode() == ISD::CopyToReg &&
- User->getOperand(2).getNode() == Node &&
- User->getOperand(2).getResNo() == ResNo) {
+ if (User->getOpcode() == ISD::CopyToReg && User->getOperand(2) == Op) {
Register DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
if (DestReg.isVirtual()) {
VRBase = DestReg;
@@ -118,10 +115,8 @@ void InstrEmitter::EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone,
Match = false;
} else {
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
- SDValue Op = User->getOperand(i);
- if (Op.getNode() != Node || Op.getResNo() != ResNo)
+ if (User->getOperand(i) != Op)
continue;
- MVT VT = Node->getSimpleValueType(Op.getResNo());
if (VT == MVT::Other || VT == MVT::Glue)
continue;
Match = false;
@@ -170,11 +165,11 @@ void InstrEmitter::EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone,
} else {
// Create the reg, emit the copy.
VRBase = MRI->createVirtualRegister(DstRC);
- BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
- VRBase).addReg(SrcReg);
+ BuildMI(*MBB, InsertPos, Op.getDebugLoc(), TII->get(TargetOpcode::COPY),
+ VRBase)
+ .addReg(SrcReg);
}
- SDValue Op(Node, ResNo);
if (IsClone)
VRBaseMap.erase(Op);
bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
@@ -1170,7 +1165,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
continue;
// This implicitly defined physreg has a use.
UsedRegs.push_back(Reg);
- EmitCopyFromReg(Node, i, IsClone, Reg, VRBaseMap);
+ EmitCopyFromReg(SDValue(Node, i), IsClone, Reg, VRBaseMap);
}
}
@@ -1283,7 +1278,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
}
case ISD::CopyFromReg: {
Register SrcReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
- EmitCopyFromReg(Node, 0, IsClone, SrcReg, VRBaseMap);
+ EmitCopyFromReg(SDValue(Node, 0), IsClone, SrcReg, VRBaseMap);
break;
}
case ISD::EH_LABEL:
diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 16d754cdc233..b465de8397c7 100644
--- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -48,8 +48,8 @@ private:
/// EmitCopyFromReg - Generate machine code for an CopyFromReg node or an
/// implicit physical register output.
- void EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone,
- Register SrcReg, VRBaseMapType &VRBaseMap);
+ void EmitCopyFromReg(SDValue Op, bool IsClone, Register SrcReg,
+ VRBaseMapType &VRBaseMap);
void CreateVirtualRegisters(SDNode *Node,
MachineInstrBuilder &MIB,
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 2cad36eff9c8..83bb1dfe86c6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -197,7 +197,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_Unary(SDNode *N, RTLIB::Libcall LC) {
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
TargetLowering::MakeLibCallOptions CallOptions;
EVT OpVT = N->getOperand(0 + Offset).getValueType();
- CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
+ CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0));
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
CallOptions, SDLoc(N),
Chain);
@@ -218,7 +218,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_Binary(SDNode *N, RTLIB::Libcall LC) {
TargetLowering::MakeLibCallOptions CallOptions;
EVT OpsVT[2] = { N->getOperand(0 + Offset).getValueType(),
N->getOperand(1 + Offset).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0));
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Ops,
CallOptions, SDLoc(N),
Chain);
@@ -558,7 +558,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
EVT OpsVT[3] = { N->getOperand(0 + Offset).getValueType(),
N->getOperand(1 + Offset).getValueType(),
N->getOperand(2 + Offset).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0));
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG,
GetFPLibCall(N->getValueType(0),
RTLIB::FMA_F32,
@@ -642,7 +642,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
TargetLowering::MakeLibCallOptions CallOptions;
EVT OpVT = N->getOperand(IsStrict ? 1 : 0).getValueType();
- CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
+ CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0));
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
CallOptions, SDLoc(N),
Chain);
@@ -658,7 +658,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
SDValue Op = N->getOperand(0);
TargetLowering::MakeLibCallOptions CallOptions;
EVT OpsVT[1] = { N->getOperand(0).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0));
SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op,
CallOptions, SDLoc(N)).first;
if (N->getValueType(0) == MVT::f32)
@@ -694,7 +694,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
TargetLowering::MakeLibCallOptions CallOptions;
EVT OpVT = N->getOperand(IsStrict ? 1 : 0).getValueType();
- CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
+ CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0));
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
CallOptions, SDLoc(N),
Chain);
@@ -742,7 +742,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ExpOp(SDNode *N) {
TargetLowering::MakeLibCallOptions CallOptions;
EVT OpsVT[2] = { N->getOperand(0 + Offset).getValueType(),
N->getOperand(1 + Offset).getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0));
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Ops,
CallOptions, SDLoc(N),
Chain);
@@ -779,7 +779,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFREXP(SDNode *N) {
// TODO: setTypeListBeforeSoften can't properly express multiple return types,
// but we only really need to handle the 0th one for softening anyway.
- CallOptions.setTypeListBeforeSoften({OpsVT}, VT0, true)
+ CallOptions.setTypeListBeforeSoften({OpsVT}, VT0)
.setOpsTypeOverrides(CallOpsTypeOverrides);
auto [ReturnVal, Chain] = TLI.makeLibCall(DAG, LC, NVT0, Ops, CallOptions, DL,
@@ -828,7 +828,7 @@ bool DAGTypeLegalizer::SoftenFloatRes_UnaryWithTwoFPResults(
TargetLowering::MakeLibCallOptions CallOptions;
// TODO: setTypeListBeforeSoften can't properly express multiple return types,
// but since both returns have the same type it should be okay.
- CallOptions.setTypeListBeforeSoften({OpsVT}, VT, true)
+ CallOptions.setTypeListBeforeSoften({OpsVT}, VT)
.setOpsTypeOverrides(CallOpsTypeOverrides);
auto [ReturnVal, Chain] = TLI.makeLibCall(DAG, LC, NVT, Ops, CallOptions, DL,
@@ -1100,7 +1100,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
NVT, N->getOperand(IsStrict ? 1 : 0));
TargetLowering::MakeLibCallOptions CallOptions;
CallOptions.setIsSigned(Signed);
- CallOptions.setTypeListBeforeSoften(SVT, RVT, true);
+ CallOptions.setTypeListBeforeSoften(SVT, RVT);
std::pair<SDValue, SDValue> Tmp =
TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
Op, CallOptions, dl, Chain);
@@ -1222,7 +1222,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
Op = GetSoftenedFloat(Op);
TargetLowering::MakeLibCallOptions CallOptions;
- CallOptions.setTypeListBeforeSoften(SVT, RVT, true);
+ CallOptions.setTypeListBeforeSoften(SVT, RVT);
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, RVT, Op,
CallOptions, SDLoc(N),
Chain);
@@ -1298,7 +1298,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
Op = GetSoftenedFloat(Op);
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
TargetLowering::MakeLibCallOptions CallOptions;
- CallOptions.setTypeListBeforeSoften(SVT, RVT, true);
+ CallOptions.setTypeListBeforeSoften(SVT, RVT);
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
CallOptions, dl, Chain);
@@ -1453,7 +1453,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_Unary(SDNode *N, RTLIB::Libcall LC) {
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
TargetLowering::MakeLibCallOptions CallOptions;
EVT OpVT = N->getOperand(0 + Offset).getValueType();
- CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
+ CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0));
std::pair<SDValue, SDValue> Tmp = TLI.makeLibCall(DAG, LC, NVT, Op,
CallOptions, SDLoc(N),
Chain);
@@ -1551,6 +1551,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::VAARG: ExpandRes_VAARG(N, Lo, Hi); break;
case ISD::ConstantFP: ExpandFloatRes_ConstantFP(N, Lo, Hi); break;
+ case ISD::AssertNoFPClass: ExpandFloatRes_AssertNoFPClass(N, Lo, Hi); break;
case ISD::FABS: ExpandFloatRes_FABS(N, Lo, Hi); break;
case ISD::STRICT_FMINNUM:
case ISD::FMINNUM: ExpandFloatRes_FMINNUM(N, Lo, Hi); break;
@@ -1966,6 +1967,13 @@ void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi);
}
+void DAGTypeLegalizer::ExpandFloatRes_AssertNoFPClass(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ // TODO: Handle ppcf128 by preserving AssertNoFPClass for one of the halves.
+ SDLoc dl(N);
+ GetExpandedFloat(N->getOperand(0), Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo,
SDValue &Hi) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
@@ -3559,7 +3567,7 @@ SDValue DAGTypeLegalizer::SoftPromoteHalfRes_FP_ROUND(SDNode *N) {
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
Op = GetSoftenedFloat(Op);
TargetLowering::MakeLibCallOptions CallOptions;
- CallOptions.setTypeListBeforeSoften(SVT, RVT, true);
+ CallOptions.setTypeListBeforeSoften(SVT, RVT);
std::pair<SDValue, SDValue> Tmp =
TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N), Chain);
if (IsStrict)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 63544e63e1da..33fa3012618b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -681,6 +681,7 @@ private:
SDNode *N, RTLIB::Libcall LC, std::optional<unsigned> CallRetResNo = {});
// clang-format off
+ void ExpandFloatRes_AssertNoFPClass(SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FABS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FACOS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandFloatRes_FASIN (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 68ea72c732e1..4b7fc4590811 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5460,6 +5460,83 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
}
return true;
+ case ISD::EXTRACT_SUBVECTOR: {
+ SDValue Src = Op.getOperand(0);
+ if (Src.getValueType().isScalableVector())
+ break;
+ uint64_t Idx = Op.getConstantOperandVal(1);
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt DemandedSrcElts = DemandedElts.zext(NumSrcElts).shl(Idx);
+ return isGuaranteedNotToBeUndefOrPoison(Src, DemandedSrcElts, PoisonOnly,
+ Depth + 1);
+ }
+
+ case ISD::INSERT_SUBVECTOR: {
+ if (Op.getValueType().isScalableVector())
+ break;
+ SDValue Src = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ uint64_t Idx = Op.getConstantOperandVal(2);
+ unsigned NumSubElts = Sub.getValueType().getVectorNumElements();
+ APInt DemandedSubElts = DemandedElts.extractBits(NumSubElts, Idx);
+ APInt DemandedSrcElts = DemandedElts;
+ DemandedSrcElts.clearBits(Idx, Idx + NumSubElts);
+
+ if (!!DemandedSubElts && !isGuaranteedNotToBeUndefOrPoison(
+ Sub, DemandedSubElts, PoisonOnly, Depth + 1))
+ return false;
+ if (!!DemandedSrcElts && !isGuaranteedNotToBeUndefOrPoison(
+ Src, DemandedSrcElts, PoisonOnly, Depth + 1))
+ return false;
+ return true;
+ }
+
+ case ISD::EXTRACT_VECTOR_ELT: {
+ SDValue Src = Op.getOperand(0);
+ auto *IndexC = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ EVT SrcVT = Src.getValueType();
+ if (SrcVT.isFixedLengthVector() && IndexC &&
+ IndexC->getAPIntValue().ult(SrcVT.getVectorNumElements())) {
+ APInt DemandedSrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(),
+ IndexC->getZExtValue());
+ return isGuaranteedNotToBeUndefOrPoison(Src, DemandedSrcElts, PoisonOnly,
+ Depth + 1);
+ }
+ break;
+ }
+
+ case ISD::INSERT_VECTOR_ELT: {
+ SDValue InVec = Op.getOperand(0);
+ SDValue InVal = Op.getOperand(1);
+ SDValue EltNo = Op.getOperand(2);
+ EVT VT = InVec.getValueType();
+ auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
+ if (IndexC && VT.isFixedLengthVector() &&
+ IndexC->getAPIntValue().ult(VT.getVectorNumElements())) {
+ if (DemandedElts[IndexC->getZExtValue()] &&
+ !isGuaranteedNotToBeUndefOrPoison(InVal, PoisonOnly, Depth + 1))
+ return false;
+ APInt InVecDemandedElts = DemandedElts;
+ InVecDemandedElts.clearBit(IndexC->getZExtValue());
+ if (!!InVecDemandedElts &&
+ !isGuaranteedNotToBeUndefOrPoison(InVec, InVecDemandedElts,
+ PoisonOnly, Depth + 1))
+ return false;
+ return true;
+ }
+ break;
+ }
+
+ case ISD::SCALAR_TO_VECTOR:
+ // Check upper (known undef) elements.
+ if (DemandedElts.ugt(1) && !PoisonOnly)
+ return false;
+ // Check element zero.
+ if (DemandedElts[0] && !isGuaranteedNotToBeUndefOrPoison(
+ Op.getOperand(0), PoisonOnly, Depth + 1))
+ return false;
+ return true;
+
case ISD::SPLAT_VECTOR:
return isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), PoisonOnly,
Depth + 1);
@@ -5482,6 +5559,52 @@ bool SelectionDAG::isGuaranteedNotToBeUndefOrPoison(SDValue Op,
return true;
}
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ // Shift amount operand is checked by canCreateUndefOrPoison. So it is
+ // enough to check operand 0 if Op can't create undef/poison.
+ return !canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly,
+ /*ConsiderFlags*/ true, Depth) &&
+ isGuaranteedNotToBeUndefOrPoison(Op.getOperand(0), DemandedElts,
+ PoisonOnly, Depth + 1);
+
+ case ISD::BSWAP:
+ case ISD::CTPOP:
+ case ISD::BITREVERSE:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::MUL:
+ case ISD::SADDSAT:
+ case ISD::UADDSAT:
+ case ISD::SSUBSAT:
+ case ISD::USUBSAT:
+ case ISD::SSHLSAT:
+ case ISD::USHLSAT:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ case ISD::UMIN:
+ case ISD::UMAX:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ANY_EXTEND:
+ case ISD::TRUNCATE:
+ case ISD::VSELECT: {
+ // If Op can't create undef/poison and none of its operands are undef/poison
+ // then Op is never undef/poison. A difference from the more common check
+ // below, outside the switch, is that we handle elementwise operations for
+ // which the DemandedElts mask is valid for all operands here.
+ return !canCreateUndefOrPoison(Op, DemandedElts, PoisonOnly,
+ /*ConsiderFlags*/ true, Depth) &&
+ all_of(Op->ops(), [&](SDValue V) {
+ return isGuaranteedNotToBeUndefOrPoison(V, DemandedElts,
+ PoisonOnly, Depth + 1);
+ });
+ }
+
// TODO: Search for noundef attributes from library functions.
// TODO: Pointers dereferenced by ISD::LOAD/STORE ops are noundef.
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index a71b4409a6b2..366a230eef95 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -2211,9 +2211,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
MVT::Other, Chains);
} else if (I.getNumOperands() != 0) {
- SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
- unsigned NumValues = ValueVTs.size();
+ SmallVector<Type *, 4> Types;
+ ComputeValueTypes(DL, I.getOperand(0)->getType(), Types);
+ unsigned NumValues = Types.size();
if (NumValues) {
SDValue RetOp = getValue(I.getOperand(0));
@@ -2233,7 +2233,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
bool RetInReg = F->getAttributes().hasRetAttr(Attribute::InReg);
for (unsigned j = 0; j != NumValues; ++j) {
- EVT VT = ValueVTs[j];
+ EVT VT = TLI.getValueType(DL, Types[j]);
if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
@@ -2275,7 +2275,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
for (unsigned i = 0; i < NumParts; ++i) {
Outs.push_back(ISD::OutputArg(Flags,
Parts[i].getValueType().getSimpleVT(),
- VT, I.getOperand(0)->getType(), 0, 0));
+ VT, Types[j], 0, 0));
OutVals.push_back(Parts[i]);
}
}
@@ -10983,15 +10983,21 @@ std::pair<SDValue, SDValue>
TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// Handle the incoming return values from the call.
CLI.Ins.clear();
- SmallVector<EVT, 4> RetTys;
+ SmallVector<Type *, 4> RetOrigTys;
SmallVector<TypeSize, 4> Offsets;
auto &DL = CLI.DAG.getDataLayout();
- ComputeValueVTs(*this, DL, CLI.RetTy, RetTys, &Offsets);
+ ComputeValueTypes(DL, CLI.RetTy, RetOrigTys, &Offsets);
+
+ SmallVector<EVT, 4> RetTys;
+ for (Type *Ty : RetOrigTys)
+ RetTys.push_back(getValueType(DL, Ty));
if (CLI.IsPostTypeLegalization) {
// If we are lowering a libcall after legalization, split the return type.
+ SmallVector<Type *, 4> OldRetOrigTys;
SmallVector<EVT, 4> OldRetTys;
SmallVector<TypeSize, 4> OldOffsets;
+ RetOrigTys.swap(OldRetOrigTys);
RetTys.swap(OldRetTys);
Offsets.swap(OldOffsets);
@@ -11001,6 +11007,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), RetVT);
unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), RetVT);
unsigned RegisterVTByteSZ = RegisterVT.getSizeInBits() / 8;
+ RetOrigTys.append(NumRegs, OldRetOrigTys[i]);
RetTys.append(NumRegs, RegisterVT);
for (unsigned j = 0; j != NumRegs; ++j)
Offsets.push_back(TypeSize::getFixed(Offset + j * RegisterVTByteSZ));
@@ -11069,7 +11076,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
unsigned NumRegs = getNumRegistersForCallingConv(CLI.RetTy->getContext(),
CLI.CallConv, VT);
for (unsigned i = 0; i != NumRegs; ++i) {
- ISD::InputArg Ret(Flags, RegisterVT, VT, CLI.RetTy,
+ ISD::InputArg Ret(Flags, RegisterVT, VT, RetOrigTys[I],
CLI.IsReturnValueUsed, ISD::InputArg::NoArgIndex, 0);
if (CLI.RetTy->isPointerTy()) {
Ret.Flags.setPointer();
@@ -11106,18 +11113,18 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
CLI.Outs.clear();
CLI.OutVals.clear();
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
- SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*this, DL, Args[i].Ty, ValueVTs);
+ SmallVector<Type *, 4> ArgTys;
+ ComputeValueTypes(DL, Args[i].Ty, ArgTys);
// FIXME: Split arguments if CLI.IsPostTypeLegalization
Type *FinalType = Args[i].Ty;
if (Args[i].IsByVal)
FinalType = Args[i].IndirectType;
bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
FinalType, CLI.CallConv, CLI.IsVarArg, DL);
- for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues;
+ for (unsigned Value = 0, NumValues = ArgTys.size(); Value != NumValues;
++Value) {
- EVT VT = ValueVTs[Value];
- Type *ArgTy = VT.getTypeForEVT(CLI.RetTy->getContext());
+ Type *ArgTy = ArgTys[Value];
+ EVT VT = getValueType(DL, ArgTy);
SDValue Op = SDValue(Args[i].Node.getNode(),
Args[i].Node.getResNo() + Value);
ISD::ArgFlagsTy Flags;
@@ -11130,10 +11137,9 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
if (i >= CLI.NumFixedArgs)
Flags.setVarArg();
- if (Args[i].Ty->isPointerTy()) {
+ if (ArgTy->isPointerTy()) {
Flags.setPointer();
- Flags.setPointerAddrSpace(
- cast<PointerType>(Args[i].Ty)->getAddressSpace());
+ Flags.setPointerAddrSpace(cast<PointerType>(ArgTy)->getAddressSpace());
}
if (Args[i].IsZExt)
Flags.setZExt();
@@ -11252,7 +11258,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// For scalable vectors the scalable part is currently handled
// by individual targets, so we just use the known minimum size here.
ISD::OutputArg MyFlags(
- Flags, Parts[j].getValueType().getSimpleVT(), VT, Args[i].Ty, i,
+ Flags, Parts[j].getValueType().getSimpleVT(), VT, ArgTy, i,
j * Parts[j].getValueType().getStoreSize().getKnownMinValue());
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
@@ -11645,8 +11651,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Set up the incoming argument description vector.
for (const Argument &Arg : F.args()) {
unsigned ArgNo = Arg.getArgNo();
- SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(*TLI, DAG.getDataLayout(), Arg.getType(), ValueVTs);
+ SmallVector<Type *, 4> Types;
+ ComputeValueTypes(DAG.getDataLayout(), Arg.getType(), Types);
bool isArgValueUsed = !Arg.use_empty();
unsigned PartBase = 0;
Type *FinalType = Arg.getType();
@@ -11654,17 +11660,15 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
FinalType = Arg.getParamByValType();
bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
FinalType, F.getCallingConv(), F.isVarArg(), DL);
- for (unsigned Value = 0, NumValues = ValueVTs.size();
- Value != NumValues; ++Value) {
- EVT VT = ValueVTs[Value];
- Type *ArgTy = VT.getTypeForEVT(*DAG.getContext());
+ for (unsigned Value = 0, NumValues = Types.size(); Value != NumValues;
+ ++Value) {
+ Type *ArgTy = Types[Value];
+ EVT VT = TLI->getValueType(DL, ArgTy);
ISD::ArgFlagsTy Flags;
-
- if (Arg.getType()->isPointerTy()) {
+ if (ArgTy->isPointerTy()) {
Flags.setPointer();
- Flags.setPointerAddrSpace(
- cast<PointerType>(Arg.getType())->getAddressSpace());
+ Flags.setPointerAddrSpace(cast<PointerType>(ArgTy)->getAddressSpace());
}
if (Arg.hasAttribute(Attribute::ZExt))
Flags.setZExt();
@@ -11768,7 +11772,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// are responsible for handling scalable vector arguments and
// return values.
ISD::InputArg MyFlags(
- Flags, RegisterVT, VT, Arg.getType(), isArgValueUsed, ArgNo,
+ Flags, RegisterVT, VT, ArgTy, isArgValueUsed, ArgNo,
PartBase + i * RegisterVT.getStoreSize().getKnownMinValue());
if (NumRegs > 1 && i == 0)
MyFlags.Flags.setSplit();
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8fbabfa55e93..911bbabc42aa 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -420,7 +420,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
TargetLowering::MakeLibCallOptions CallOptions;
EVT OpsVT[2] = { OldLHS.getValueType(),
OldRHS.getValueType() };
- CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
+ CallOptions.setTypeListBeforeSoften(OpsVT, RetVT);
auto Call = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl, Chain);
NewLHS = Call.first;
NewRHS = DAG.getConstant(0, dl, RetVT);
diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 61ff2dfe5be2..350948a92a3a 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -1738,13 +1738,13 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
AttributeList attr,
SmallVectorImpl<ISD::OutputArg> &Outs,
const TargetLowering &TLI, const DataLayout &DL) {
- SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(TLI, DL, ReturnType, ValueVTs);
- unsigned NumValues = ValueVTs.size();
+ SmallVector<Type *, 4> Types;
+ ComputeValueTypes(DL, ReturnType, Types);
+ unsigned NumValues = Types.size();
if (NumValues == 0) return;
- for (unsigned j = 0, f = NumValues; j != f; ++j) {
- EVT VT = ValueVTs[j];
+ for (Type *Ty : Types) {
+ EVT VT = TLI.getValueType(DL, Ty);
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
if (attr.hasRetAttr(Attribute::SExt))
@@ -1772,7 +1772,7 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
Flags.setZExt();
for (unsigned i = 0; i < NumParts; ++i)
- Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, ReturnType, 0, 0));
+ Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, Ty, 0, 0));
}
}
diff --git a/llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp b/llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
index 19c000e2472a..d460cf6de8a4 100644
--- a/llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/ThreadSafeModule.cpp
@@ -14,40 +14,39 @@
namespace llvm {
namespace orc {
-ThreadSafeModule cloneToContext(const ThreadSafeModule &TSM,
- ThreadSafeContext TSCtx,
- GVPredicate ShouldCloneDef,
- GVModifier UpdateClonedDefSource) {
- assert(TSM && "Can not clone null module");
-
- if (!ShouldCloneDef)
- ShouldCloneDef = [](const GlobalValue &) { return true; };
-
- // First copy the source module into a buffer.
+static std::pair<std::string, SmallVector<char, 1>>
+serializeModule(const Module &M, GVPredicate ShouldCloneDef,
+ GVModifier UpdateClonedDefSource) {
std::string ModuleName;
SmallVector<char, 1> ClonedModuleBuffer;
- TSM.withModuleDo([&](Module &M) {
- ModuleName = M.getModuleIdentifier();
- std::set<GlobalValue *> ClonedDefsInSrc;
- ValueToValueMapTy VMap;
- auto Tmp = CloneModule(M, VMap, [&](const GlobalValue *GV) {
- if (ShouldCloneDef(*GV)) {
- ClonedDefsInSrc.insert(const_cast<GlobalValue *>(GV));
- return true;
- }
- return false;
- });
-
- if (UpdateClonedDefSource)
- for (auto *GV : ClonedDefsInSrc)
- UpdateClonedDefSource(*GV);
-
- BitcodeWriter BCWriter(ClonedModuleBuffer);
- BCWriter.writeModule(*Tmp);
- BCWriter.writeSymtab();
- BCWriter.writeStrtab();
+
+ ModuleName = M.getModuleIdentifier();
+ std::set<GlobalValue *> ClonedDefsInSrc;
+ ValueToValueMapTy VMap;
+ auto Tmp = CloneModule(M, VMap, [&](const GlobalValue *GV) {
+ if (ShouldCloneDef(*GV)) {
+ ClonedDefsInSrc.insert(const_cast<GlobalValue *>(GV));
+ return true;
+ }
+ return false;
});
+ if (UpdateClonedDefSource)
+ for (auto *GV : ClonedDefsInSrc)
+ UpdateClonedDefSource(*GV);
+
+ BitcodeWriter BCWriter(ClonedModuleBuffer);
+ BCWriter.writeModule(*Tmp);
+ BCWriter.writeSymtab();
+ BCWriter.writeStrtab();
+
+ return {std::move(ModuleName), std::move(ClonedModuleBuffer)};
+}
+
+ThreadSafeModule
+deserializeModule(std::string ModuleName,
+ const SmallVector<char, 1> &ClonedModuleBuffer,
+ ThreadSafeContext TSCtx) {
MemoryBufferRef ClonedModuleBufferRef(
StringRef(ClonedModuleBuffer.data(), ClonedModuleBuffer.size()),
"cloned module buffer");
@@ -63,6 +62,40 @@ ThreadSafeModule cloneToContext(const ThreadSafeModule &TSM,
return ThreadSafeModule(std::move(M), std::move(TSCtx));
}
+ThreadSafeModule
+cloneExternalModuleToContext(const Module &M, ThreadSafeContext TSCtx,
+ GVPredicate ShouldCloneDef,
+ GVModifier UpdateClonedDefSource) {
+
+ if (!ShouldCloneDef)
+ ShouldCloneDef = [](const GlobalValue &) { return true; };
+
+ auto [ModuleName, ClonedModuleBuffer] = serializeModule(
+ M, std::move(ShouldCloneDef), std::move(UpdateClonedDefSource));
+
+ return deserializeModule(std::move(ModuleName), ClonedModuleBuffer,
+ std::move(TSCtx));
+}
+
+ThreadSafeModule cloneToContext(const ThreadSafeModule &TSM,
+ ThreadSafeContext TSCtx,
+ GVPredicate ShouldCloneDef,
+ GVModifier UpdateClonedDefSource) {
+ assert(TSM && "Can not clone null module");
+
+ if (!ShouldCloneDef)
+ ShouldCloneDef = [](const GlobalValue &) { return true; };
+
+ // First copy the source module into a buffer.
+ auto [ModuleName, ClonedModuleBuffer] = TSM.withModuleDo([&](Module &M) {
+ return serializeModule(M, std::move(ShouldCloneDef),
+ std::move(UpdateClonedDefSource));
+ });
+
+ return deserializeModule(std::move(ModuleName), ClonedModuleBuffer,
+ std::move(TSCtx));
+}
+
ThreadSafeModule cloneToNewContext(const ThreadSafeModule &TSM,
GVPredicate ShouldCloneDef,
GVModifier UpdateClonedDefSource) {
diff --git a/llvm/lib/IR/Core.cpp b/llvm/lib/IR/Core.cpp
index f7ef4aa473ef..8b5965ba45a6 100644
--- a/llvm/lib/IR/Core.cpp
+++ b/llvm/lib/IR/Core.cpp
@@ -2186,6 +2186,11 @@ void LLVMGlobalSetMetadata(LLVMValueRef Global, unsigned Kind,
unwrap<GlobalObject>(Global)->setMetadata(Kind, unwrap<MDNode>(MD));
}
+void LLVMGlobalAddMetadata(LLVMValueRef Global, unsigned Kind,
+ LLVMMetadataRef MD) {
+ unwrap<GlobalObject>(Global)->addMetadata(Kind, *unwrap<MDNode>(MD));
+}
+
void LLVMGlobalEraseMetadata(LLVMValueRef Global, unsigned Kind) {
unwrap<GlobalObject>(Global)->eraseMetadata(Kind);
}
@@ -2194,6 +2199,11 @@ void LLVMGlobalClearMetadata(LLVMValueRef Global) {
unwrap<GlobalObject>(Global)->clearMetadata();
}
+void LLVMGlobalAddDebugInfo(LLVMValueRef Global, LLVMMetadataRef GVE) {
+ unwrap<GlobalVariable>(Global)->addDebugInfo(
+ unwrap<DIGlobalVariableExpression>(GVE));
+}
+
/*--.. Operations on global variables ......................................--*/
LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name) {
diff --git a/llvm/lib/IR/DebugInfo.cpp b/llvm/lib/IR/DebugInfo.cpp
index 65d48408438b..e9425e1fabd5 100644
--- a/llvm/lib/IR/DebugInfo.cpp
+++ b/llvm/lib/IR/DebugInfo.cpp
@@ -1915,23 +1915,16 @@ void at::RAUW(DIAssignID *Old, DIAssignID *New) {
}
void at::deleteAll(Function *F) {
- SmallVector<DbgAssignIntrinsic *, 12> ToDelete;
- SmallVector<DbgVariableRecord *, 12> DPToDelete;
for (BasicBlock &BB : *F) {
for (Instruction &I : BB) {
- for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange()))
+ for (DbgVariableRecord &DVR :
+ make_early_inc_range(filterDbgVars(I.getDbgRecordRange())))
if (DVR.isDbgAssign())
- DPToDelete.push_back(&DVR);
- if (auto *DAI = dyn_cast<DbgAssignIntrinsic>(&I))
- ToDelete.push_back(DAI);
- else
- I.setMetadata(LLVMContext::MD_DIAssignID, nullptr);
+ DVR.eraseFromParent();
+
+ I.setMetadata(LLVMContext::MD_DIAssignID, nullptr);
}
}
- for (auto *DAI : ToDelete)
- DAI->eraseFromParent();
- for (auto *DVR : DPToDelete)
- DVR->eraseFromParent();
}
/// FIXME: Remove this wrapper function and call
@@ -2008,8 +2001,6 @@ void at::remapAssignID(DenseMap<DIAssignID *, DIAssignID *> &Map,
}
if (auto *ID = I.getMetadata(LLVMContext::MD_DIAssignID))
I.setMetadata(LLVMContext::MD_DIAssignID, GetNewID(ID));
- else if (auto *DAI = dyn_cast<DbgAssignIntrinsic>(&I))
- DAI->setAssignId(GetNewID(DAI->getAssignID()));
}
/// Collect constant properies (base, size, offset) of \p StoreDest.
diff --git a/llvm/lib/IR/RuntimeLibcalls.cpp b/llvm/lib/IR/RuntimeLibcalls.cpp
index a8e6c7938cf5..ac845c499878 100644
--- a/llvm/lib/IR/RuntimeLibcalls.cpp
+++ b/llvm/lib/IR/RuntimeLibcalls.cpp
@@ -9,6 +9,7 @@
#include "llvm/IR/RuntimeLibcalls.h"
#include "llvm/ADT/StringTable.h"
#include "llvm/Support/Debug.h"
+#include "llvm/TargetParser/ARMTargetParser.h"
#define DEBUG_TYPE "runtime-libcalls-info"
@@ -21,61 +22,33 @@ using namespace RTLIB;
#undef GET_INIT_RUNTIME_LIBCALL_NAMES
#undef GET_SET_TARGET_RUNTIME_LIBCALL_SETS
-static void setARMLibcallNames(RuntimeLibcallsInfo &Info, const Triple &TT,
- FloatABI::ABIType FloatABIType,
- EABI EABIVersion) {
- static const RTLIB::LibcallImpl AAPCS_Libcalls[] = {
- RTLIB::__aeabi_dadd, RTLIB::__aeabi_ddiv,
- RTLIB::__aeabi_dmul, RTLIB::__aeabi_dsub,
- RTLIB::__aeabi_dcmpeq__oeq, RTLIB::__aeabi_dcmpeq__une,
- RTLIB::__aeabi_dcmplt, RTLIB::__aeabi_dcmple,
- RTLIB::__aeabi_dcmpge, RTLIB::__aeabi_dcmpgt,
- RTLIB::__aeabi_dcmpun, RTLIB::__aeabi_fadd,
- RTLIB::__aeabi_fdiv, RTLIB::__aeabi_fmul,
- RTLIB::__aeabi_fsub, RTLIB::__aeabi_fcmpeq__oeq,
- RTLIB::__aeabi_fcmpeq__une, RTLIB::__aeabi_fcmplt,
- RTLIB::__aeabi_fcmple, RTLIB::__aeabi_fcmpge,
- RTLIB::__aeabi_fcmpgt, RTLIB::__aeabi_fcmpun,
- RTLIB::__aeabi_d2iz, RTLIB::__aeabi_d2uiz,
- RTLIB::__aeabi_d2lz, RTLIB::__aeabi_d2ulz,
- RTLIB::__aeabi_f2iz, RTLIB::__aeabi_f2uiz,
- RTLIB::__aeabi_f2lz, RTLIB::__aeabi_f2ulz,
- RTLIB::__aeabi_d2f, RTLIB::__aeabi_d2h,
- RTLIB::__aeabi_f2d, RTLIB::__aeabi_i2d,
- RTLIB::__aeabi_ui2d, RTLIB::__aeabi_l2d,
- RTLIB::__aeabi_ul2d, RTLIB::__aeabi_i2f,
- RTLIB::__aeabi_ui2f, RTLIB::__aeabi_l2f,
- RTLIB::__aeabi_ul2f, RTLIB::__aeabi_lmul,
- RTLIB::__aeabi_llsl, RTLIB::__aeabi_llsr,
- RTLIB::__aeabi_lasr, RTLIB::__aeabi_idiv,
- RTLIB::__aeabi_idivmod, RTLIB::__aeabi_uidivmod,
- RTLIB::__aeabi_ldivmod, RTLIB::__aeabi_uidiv,
- RTLIB::__aeabi_uldivmod, RTLIB::__aeabi_f2h,
- RTLIB::__aeabi_d2h, RTLIB::__aeabi_h2f,
- RTLIB::__aeabi_memcpy, RTLIB::__aeabi_memmove,
- RTLIB::__aeabi_memset, RTLIB::__aeabi_memcpy4,
- RTLIB::__aeabi_memcpy8, RTLIB::__aeabi_memmove4,
- RTLIB::__aeabi_memmove8, RTLIB::__aeabi_memset4,
- RTLIB::__aeabi_memset8, RTLIB::__aeabi_memclr,
- RTLIB::__aeabi_memclr4, RTLIB::__aeabi_memclr8};
-
- for (RTLIB::LibcallImpl Impl : AAPCS_Libcalls)
- Info.setLibcallImplCallingConv(Impl, CallingConv::ARM_AAPCS);
-}
-
/// Set default libcall names. If a target wants to opt-out of a libcall it
/// should be placed here.
void RuntimeLibcallsInfo::initLibcalls(const Triple &TT,
ExceptionHandling ExceptionModel,
FloatABI::ABIType FloatABI,
EABI EABIVersion, StringRef ABIName) {
- setTargetRuntimeLibcallSets(TT, FloatABI);
+ setTargetRuntimeLibcallSets(TT, FloatABI, EABIVersion, ABIName);
if (ExceptionModel == ExceptionHandling::SjLj)
setLibcallImpl(RTLIB::UNWIND_RESUME, RTLIB::_Unwind_SjLj_Resume);
if (TT.isARM() || TT.isThumb()) {
- setARMLibcallNames(*this, TT, FloatABI, EABIVersion);
+ // The half <-> float conversion functions are always soft-float on
+ // non-watchos platforms, but are needed for some targets which use a
+ // hard-float calling convention by default.
+ if (!TT.isWatchABI()) {
+ if (isAAPCS_ABI(TT, ABIName)) {
+ setLibcallImplCallingConv(RTLIB::__truncsfhf2, CallingConv::ARM_AAPCS);
+ setLibcallImplCallingConv(RTLIB::__truncdfhf2, CallingConv::ARM_AAPCS);
+ setLibcallImplCallingConv(RTLIB::__extendhfsf2, CallingConv::ARM_AAPCS);
+ } else {
+ setLibcallImplCallingConv(RTLIB::__truncsfhf2, CallingConv::ARM_APCS);
+ setLibcallImplCallingConv(RTLIB::__truncdfhf2, CallingConv::ARM_APCS);
+ setLibcallImplCallingConv(RTLIB::__extendhfsf2, CallingConv::ARM_APCS);
+ }
+ }
+
return;
}
@@ -130,6 +103,11 @@ RuntimeLibcallsInfo::getRecognizedLibcallImpls(StringRef FuncName) {
return make_range(EntriesBegin, EntriesEnd);
}
+bool RuntimeLibcallsInfo::isAAPCS_ABI(const Triple &TT, StringRef ABIName) {
+ const ARM::ARMABI TargetABI = ARM::computeTargetABI(TT, ABIName);
+ return TargetABI == ARM::ARM_ABI_AAPCS || TargetABI == ARM::ARM_ABI_AAPCS16;
+}
+
bool RuntimeLibcallsInfo::darwinHasExp10(const Triple &TT) {
switch (TT.getOS()) {
case Triple::MacOSX:
diff --git a/llvm/lib/MCA/Instruction.cpp b/llvm/lib/MCA/Instruction.cpp
index d4adfce59713..79667080ecbb 100644
--- a/llvm/lib/MCA/Instruction.cpp
+++ b/llvm/lib/MCA/Instruction.cpp
@@ -128,6 +128,13 @@ void WriteState::dump() const {
}
#endif
+#ifndef NDEBUG
+void ReadState::dump() const {
+ dbgs() << "{ OpIdx=" << RD->OpIndex << ", RegID " << getRegisterID()
+ << ", Cycles Left=" << CyclesLeft << " }";
+}
+#endif
+
const CriticalDependency &Instruction::computeCriticalRegDep() {
if (CriticalRegDep.Cycles)
return CriticalRegDep;
diff --git a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
index cdf4412c6477..fc2577e6ada5 100644
--- a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
+++ b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
@@ -519,7 +519,7 @@ Error InstrProfSymtab::create(SectionRef &Section) {
return Error::success();
}
-StringRef InstrProfSymtab::getFuncName(uint64_t Pointer, size_t Size) {
+StringRef InstrProfSymtab::getFuncName(uint64_t Pointer, size_t Size) const {
if (Pointer < Address)
return StringRef();
auto Offset = Pointer - Address;
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 542572975dc8..7885e127c341 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -684,13 +684,13 @@ Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName,
return Error::success();
}
-uint64_t InstrProfSymtab::getVTableHashFromAddress(uint64_t Address) {
+uint64_t InstrProfSymtab::getVTableHashFromAddress(uint64_t Address) const {
// Given a runtime address, look up the hash value in the interval map, and
// fallback to value 0 if a hash value is not found.
return VTableAddrMap.lookup(Address, 0);
}
-uint64_t InstrProfSymtab::getFunctionHashFromAddress(uint64_t Address) {
+uint64_t InstrProfSymtab::getFunctionHashFromAddress(uint64_t Address) const {
finalizeSymtab();
auto It = partition_point(AddrToMD5Map, [=](std::pair<uint64_t, uint64_t> A) {
return A.first < Address;
diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp
index 954af7fff92a..1547f48bc7ac 100644
--- a/llvm/lib/Support/APInt.cpp
+++ b/llvm/lib/Support/APInt.cpp
@@ -3136,6 +3136,22 @@ APInt APIntOps::mulhu(const APInt &C1, const APInt &C2) {
return (C1Ext * C2Ext).extractBits(C1.getBitWidth(), C1.getBitWidth());
}
+APInt APIntOps::mulsExtended(const APInt &C1, const APInt &C2) {
+ assert(C1.getBitWidth() == C2.getBitWidth() && "Unequal bitwidths");
+ unsigned FullWidth = C1.getBitWidth() * 2;
+ APInt C1Ext = C1.sext(FullWidth);
+ APInt C2Ext = C2.sext(FullWidth);
+ return C1Ext * C2Ext;
+}
+
+APInt APIntOps::muluExtended(const APInt &C1, const APInt &C2) {
+ assert(C1.getBitWidth() == C2.getBitWidth() && "Unequal bitwidths");
+ unsigned FullWidth = C1.getBitWidth() * 2;
+ APInt C1Ext = C1.zext(FullWidth);
+ APInt C2Ext = C2.zext(FullWidth);
+ return C1Ext * C2Ext;
+}
+
APInt APIntOps::pow(const APInt &X, int64_t N) {
assert(N >= 0 && "negative exponents not supported.");
APInt Acc = APInt(X.getBitWidth(), 1);
diff --git a/llvm/lib/Support/MemoryBuffer.cpp b/llvm/lib/Support/MemoryBuffer.cpp
index 601f11f6d23c..1c4645ad8364 100644
--- a/llvm/lib/Support/MemoryBuffer.cpp
+++ b/llvm/lib/Support/MemoryBuffer.cpp
@@ -501,8 +501,14 @@ getOpenFileImpl(sys::fs::file_t FD, const Twine &Filename, uint64_t FileSize,
std::unique_ptr<MB> Result(
new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile<MB>(
RequiresNullTerminator, FD, MapSize, Offset, EC));
- if (!EC)
- return std::move(Result);
+ if (!EC) {
+ // On at least Linux, and possibly on other systems, mmap may return pages
+ // from the page cache that are not properly filled with trailing zeroes,
+ // if some prior user of the page wrote non-zero bytes. Detect this and
+ // don't use mmap in that case.
+ if (!RequiresNullTerminator || *Result->getBufferEnd() == '\0')
+ return std::move(Result);
+ }
}
#ifdef __MVS__
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index ba02c82b25aa..885f2a94f85f 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1487,11 +1487,8 @@ bool isVGInstruction(MachineBasicBlock::iterator MBBI) {
if (Opc == AArch64::BL) {
auto Op1 = MBBI->getOperand(0);
- auto &TLI =
- *MBBI->getMF()->getSubtarget<AArch64Subtarget>().getTargetLowering();
- char const *GetCurrentVG =
- TLI.getLibcallName(RTLIB::SMEABI_GET_CURRENT_VG);
- return Op1.isSymbol() && StringRef(Op1.getSymbolName()) == GetCurrentVG;
+ return Op1.isSymbol() &&
+ (StringRef(Op1.getSymbolName()) == "__arm_get_current_vg");
}
}
@@ -3471,7 +3468,6 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
MachineFunction &MF = *MBB.getParent();
- auto &TLI = *MF.getSubtarget<AArch64Subtarget>().getTargetLowering();
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
bool NeedsWinCFI = needsWinCFI(MF);
@@ -3585,11 +3581,11 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
.addReg(AArch64::X0, RegState::Implicit)
.setMIFlag(MachineInstr::FrameSetup);
- RTLIB::Libcall LC = RTLIB::SMEABI_GET_CURRENT_VG;
- const uint32_t *RegMask =
- TRI->getCallPreservedMask(MF, TLI.getLibcallCallingConv(LC));
+ const uint32_t *RegMask = TRI->getCallPreservedMask(
+ MF,
+ CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1);
BuildMI(MBB, MI, DL, TII.get(AArch64::BL))
- .addExternalSymbol(TLI.getLibcallName(LC))
+ .addExternalSymbol("__arm_get_current_vg")
.addRegMask(RegMask)
.addReg(AArch64::X0, RegState::ImplicitDefine)
.setMIFlag(MachineInstr::FrameSetup);
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 224bbe7e38a1..2072e48914ae 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3083,12 +3083,13 @@ AArch64TargetLowering::EmitGetSMESaveSize(MachineInstr &MI,
AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
if (FuncInfo->isSMESaveBufferUsed()) {
- RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE_SIZE;
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::BL))
- .addExternalSymbol(getLibcallName(LC))
+ .addExternalSymbol("__arm_sme_state_size")
.addReg(AArch64::X0, RegState::ImplicitDefine)
- .addRegMask(TRI->getCallPreservedMask(*MF, getLibcallCallingConv(LC)));
+ .addRegMask(TRI->getCallPreservedMask(
+ *MF, CallingConv::
+ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1));
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
MI.getOperand(0).getReg())
.addReg(AArch64::X0);
@@ -3108,12 +3109,13 @@ AArch64TargetLowering::EmitEntryPStateSM(MachineInstr &MI,
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
Register ResultReg = MI.getOperand(0).getReg();
if (FuncInfo->isPStateSMRegUsed()) {
- RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE;
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::BL))
- .addExternalSymbol(getLibcallName(LC))
+ .addExternalSymbol("__arm_sme_state")
.addReg(AArch64::X0, RegState::ImplicitDefine)
- .addRegMask(TRI->getCallPreservedMask(*MF, getLibcallCallingConv(LC)));
+ .addRegMask(TRI->getCallPreservedMask(
+ *MF, CallingConv::
+ AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2));
BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), ResultReg)
.addReg(AArch64::X0);
} else {
@@ -5737,15 +5739,15 @@ static SDValue getSVEPredicateBitCast(EVT VT, SDValue Op, SelectionDAG &DAG) {
SDValue AArch64TargetLowering::getRuntimePStateSM(SelectionDAG &DAG,
SDValue Chain, SDLoc DL,
EVT VT) const {
- RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE;
- SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
+ SDValue Callee = DAG.getExternalSymbol("__arm_sme_state",
getPointerTy(DAG.getDataLayout()));
Type *Int64Ty = Type::getInt64Ty(*DAG.getContext());
Type *RetTy = StructType::get(Int64Ty, Int64Ty);
TargetLowering::CallLoweringInfo CLI(DAG);
ArgListTy Args;
CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
- getLibcallCallingConv(LC), RetTy, Callee, std::move(Args));
+ CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X2,
+ RetTy, Callee, std::move(Args));
std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
SDValue Mask = DAG.getConstant(/*PSTATE.SM*/ 1, DL, MVT::i64);
return DAG.getNode(ISD::AND, DL, MVT::i64, CallResult.first.getOperand(0),
@@ -8598,12 +8600,12 @@ static void analyzeCallOperands(const AArch64TargetLowering &TLI,
}
static SMECallAttrs
-getSMECallAttrs(const Function &Caller, const TargetLowering &TLI,
+getSMECallAttrs(const Function &Caller,
const TargetLowering::CallLoweringInfo &CLI) {
if (CLI.CB)
- return SMECallAttrs(*CLI.CB, &TLI);
+ return SMECallAttrs(*CLI.CB);
if (auto *ES = dyn_cast<ExternalSymbolSDNode>(CLI.Callee))
- return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(ES->getSymbol(), TLI));
+ return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(ES->getSymbol()));
return SMECallAttrs(SMEAttrs(Caller), SMEAttrs(SMEAttrs::Normal));
}
@@ -8625,7 +8627,7 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
// SME Streaming functions are not eligible for TCO as they may require
// the streaming mode or ZA to be restored after returning from the call.
- SMECallAttrs CallAttrs = getSMECallAttrs(CallerF, *this, CLI);
+ SMECallAttrs CallAttrs = getSMECallAttrs(CallerF, CLI);
if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() ||
CallAttrs.requiresPreservingAllZAState() ||
CallAttrs.caller().hasStreamingBody())
@@ -8919,14 +8921,14 @@ static SDValue emitSMEStateSaveRestore(const AArch64TargetLowering &TLI,
DAG.getCopyFromReg(Chain, DL, Info->getSMESaveBufferAddr(), MVT::i64);
Args.push_back(Entry);
- RTLIB::Libcall LC =
- IsSave ? RTLIB::SMEABI_SME_SAVE : RTLIB::SMEABI_SME_RESTORE;
- SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
- TLI.getPointerTy(DAG.getDataLayout()));
+ SDValue Callee =
+ DAG.getExternalSymbol(IsSave ? "__arm_sme_save" : "__arm_sme_restore",
+ TLI.getPointerTy(DAG.getDataLayout()));
auto *RetTy = Type::getVoidTy(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(DL).setChain(Chain).setLibCallee(
- TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args));
+ CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X1, RetTy,
+ Callee, std::move(Args));
return TLI.LowerCallTo(CLI).second;
}
@@ -9114,7 +9116,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
}
// Determine whether we need any streaming mode changes.
- SMECallAttrs CallAttrs = getSMECallAttrs(MF.getFunction(), *this, CLI);
+ SMECallAttrs CallAttrs = getSMECallAttrs(MF.getFunction(), CLI);
auto DescribeCallsite =
[&](OptimizationRemarkAnalysis &R) -> OptimizationRemarkAnalysis & {
@@ -9691,12 +9693,11 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
if (RequiresLazySave) {
// Conditionally restore the lazy save using a pseudo node.
- RTLIB::Libcall LC = RTLIB::SMEABI_TPIDR2_RESTORE;
TPIDR2Object &TPIDR2 = FuncInfo->getTPIDR2Obj();
SDValue RegMask = DAG.getRegisterMask(
- TRI->getCallPreservedMask(MF, getLibcallCallingConv(LC)));
+ TRI->SMEABISupportRoutinesCallPreservedMaskFromX0());
SDValue RestoreRoutine = DAG.getTargetExternalSymbol(
- getLibcallName(LC), getPointerTy(DAG.getDataLayout()));
+ "__arm_tpidr2_restore", getPointerTy(DAG.getDataLayout()));
SDValue TPIDR2_EL0 = DAG.getNode(
ISD::INTRINSIC_W_CHAIN, DL, MVT::i64, Result,
DAG.getConstant(Intrinsic::aarch64_sme_get_tpidr2, DL, MVT::i32));
@@ -29035,7 +29036,7 @@ bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const {
// Checks to allow the use of SME instructions
if (auto *Base = dyn_cast<CallBase>(&Inst)) {
- auto CallAttrs = SMECallAttrs(*Base, this);
+ auto CallAttrs = SMECallAttrs(*Base);
if (CallAttrs.requiresSMChange() || CallAttrs.requiresLazySave() ||
CallAttrs.requiresPreservingZT0() ||
CallAttrs.requiresPreservingAllZAState())
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index fb59c9f131fb..a55f103bff38 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -5920,7 +5920,7 @@ static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI,
// Build up the expression (Reg + NumBytes + VG * NumVGScaledBytes)
SmallString<64> Expr;
unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
- assert(DwarfReg >= 0 && DwarfReg <= 31 && "DwarfReg out of bounds (0..31)");
+ assert(DwarfReg <= 31 && "DwarfReg out of bounds (0..31)");
// Reg + NumBytes
Expr.push_back(dwarf::DW_OP_breg0 + DwarfReg);
appendLEB128<LEB128Sign::Signed>(Expr, NumBytes);
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 4523c659dd39..3fba7e853eaf 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -220,16 +220,20 @@ static cl::opt<bool> EnableFixedwidthAutovecInStreamingMode(
static cl::opt<bool> EnableScalableAutovecInStreamingMode(
"enable-scalable-autovec-in-streaming-mode", cl::init(false), cl::Hidden);
-static bool isSMEABIRoutineCall(const CallInst &CI, const TargetLowering &TLI) {
+static bool isSMEABIRoutineCall(const CallInst &CI) {
const auto *F = CI.getCalledFunction();
- return F && SMEAttrs(F->getName(), TLI).isSMEABIRoutine();
+ return F && StringSwitch<bool>(F->getName())
+ .Case("__arm_sme_state", true)
+ .Case("__arm_tpidr2_save", true)
+ .Case("__arm_tpidr2_restore", true)
+ .Case("__arm_za_disable", true)
+ .Default(false);
}
/// Returns true if the function has explicit operations that can only be
/// lowered using incompatible instructions for the selected mode. This also
/// returns true if the function F may use or modify ZA state.
-static bool hasPossibleIncompatibleOps(const Function *F,
- const TargetLowering &TLI) {
+static bool hasPossibleIncompatibleOps(const Function *F) {
for (const BasicBlock &BB : *F) {
for (const Instruction &I : BB) {
// Be conservative for now and assume that any call to inline asm or to
@@ -238,7 +242,7 @@ static bool hasPossibleIncompatibleOps(const Function *F,
// all native LLVM instructions can be lowered to compatible instructions.
if (isa<CallInst>(I) && !I.isDebugOrPseudoInst() &&
(cast<CallInst>(I).isInlineAsm() || isa<IntrinsicInst>(I) ||
- isSMEABIRoutineCall(cast<CallInst>(I), TLI)))
+ isSMEABIRoutineCall(cast<CallInst>(I))))
return true;
}
}
@@ -286,7 +290,7 @@ bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
if (CallAttrs.requiresLazySave() || CallAttrs.requiresSMChange() ||
CallAttrs.requiresPreservingZT0() ||
CallAttrs.requiresPreservingAllZAState()) {
- if (hasPossibleIncompatibleOps(Callee, *getTLI()))
+ if (hasPossibleIncompatibleOps(Callee))
return false;
}
@@ -353,7 +357,7 @@ AArch64TTIImpl::getInlineCallPenalty(const Function *F, const CallBase &Call,
// change only once and avoid inlining of G into F.
SMEAttrs FAttrs(*F);
- SMECallAttrs CallAttrs(Call, getTLI());
+ SMECallAttrs CallAttrs(Call);
if (SMECallAttrs(FAttrs, CallAttrs.callee()).requiresSMChange()) {
if (F == Call.getCaller()) // (1)
@@ -4333,7 +4337,8 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
InstructionCost
AArch64TTIImpl::getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
- const SCEV *Ptr) const {
+ const SCEV *Ptr,
+ TTI::TargetCostKind CostKind) const {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 647b242d74fb..9c96fdd42781 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -238,8 +238,9 @@ public:
ArrayRef<const Value *> Args = {},
const Instruction *CxtI = nullptr) const override;
- InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
- const SCEV *Ptr) const override;
+ InstructionCost
+ getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr,
+ TTI::TargetCostKind CostKind) const override;
InstructionCost getCmpSelInstrCost(
unsigned Opcode, Type *ValTy, Type *CondTy, CmpInst::Predicate VecPred,
diff --git a/llvm/lib/Target/AArch64/SMEABIPass.cpp b/llvm/lib/Target/AArch64/SMEABIPass.cpp
index 2008516885c3..4af4d4930662 100644
--- a/llvm/lib/Target/AArch64/SMEABIPass.cpp
+++ b/llvm/lib/Target/AArch64/SMEABIPass.cpp
@@ -15,16 +15,11 @@
#include "AArch64.h"
#include "Utils/AArch64SMEAttributes.h"
#include "llvm/ADT/StringRef.h"
-#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsAArch64.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/RuntimeLibcalls.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Transforms/Utils/Cloning.h"
using namespace llvm;
@@ -38,13 +33,9 @@ struct SMEABI : public FunctionPass {
bool runOnFunction(Function &F) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<TargetPassConfig>();
- }
-
private:
bool updateNewStateFunctions(Module *M, Function *F, IRBuilder<> &Builder,
- SMEAttrs FnAttrs, const TargetLowering &TLI);
+ SMEAttrs FnAttrs);
};
} // end anonymous namespace
@@ -60,16 +51,14 @@ FunctionPass *llvm::createSMEABIPass() { return new SMEABI(); }
//===----------------------------------------------------------------------===//
// Utility function to emit a call to __arm_tpidr2_save and clear TPIDR2_EL0.
-void emitTPIDR2Save(Module *M, IRBuilder<> &Builder, const TargetLowering &TLI,
- bool ZT0IsUndef = false) {
+void emitTPIDR2Save(Module *M, IRBuilder<> &Builder, bool ZT0IsUndef = false) {
auto &Ctx = M->getContext();
auto *TPIDR2SaveTy =
FunctionType::get(Builder.getVoidTy(), {}, /*IsVarArgs=*/false);
auto Attrs =
AttributeList().addFnAttribute(Ctx, "aarch64_pstate_sm_compatible");
- RTLIB::Libcall LC = RTLIB::SMEABI_TPIDR2_SAVE;
FunctionCallee Callee =
- M->getOrInsertFunction(TLI.getLibcallName(LC), TPIDR2SaveTy, Attrs);
+ M->getOrInsertFunction("__arm_tpidr2_save", TPIDR2SaveTy, Attrs);
CallInst *Call = Builder.CreateCall(Callee);
// If ZT0 is undefined (i.e. we're at the entry of a "new_zt0" function), mark
@@ -78,7 +67,8 @@ void emitTPIDR2Save(Module *M, IRBuilder<> &Builder, const TargetLowering &TLI,
if (ZT0IsUndef)
Call->addFnAttr(Attribute::get(Ctx, "aarch64_zt0_undef"));
- Call->setCallingConv(TLI.getLibcallCallingConv(LC));
+ Call->setCallingConv(
+ CallingConv::AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0);
// A save to TPIDR2 should be followed by clearing TPIDR2_EL0.
Function *WriteIntr =
@@ -108,8 +98,7 @@ void emitTPIDR2Save(Module *M, IRBuilder<> &Builder, const TargetLowering &TLI,
/// interface if it does not share ZA or ZT0.
///
bool SMEABI::updateNewStateFunctions(Module *M, Function *F,
- IRBuilder<> &Builder, SMEAttrs FnAttrs,
- const TargetLowering &TLI) {
+ IRBuilder<> &Builder, SMEAttrs FnAttrs) {
LLVMContext &Context = F->getContext();
BasicBlock *OrigBB = &F->getEntryBlock();
Builder.SetInsertPoint(&OrigBB->front());
@@ -135,7 +124,7 @@ bool SMEABI::updateNewStateFunctions(Module *M, Function *F,
// Create a call __arm_tpidr2_save, which commits the lazy save.
Builder.SetInsertPoint(&SaveBB->back());
- emitTPIDR2Save(M, Builder, TLI, /*ZT0IsUndef=*/FnAttrs.isNewZT0());
+ emitTPIDR2Save(M, Builder, /*ZT0IsUndef=*/FnAttrs.isNewZT0());
// Enable pstate.za at the start of the function.
Builder.SetInsertPoint(&OrigBB->front());
@@ -183,14 +172,10 @@ bool SMEABI::runOnFunction(Function &F) {
if (F.isDeclaration() || F.hasFnAttribute("aarch64_expanded_pstate_za"))
return false;
- const TargetMachine &TM =
- getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
- const TargetLowering &TLI = *TM.getSubtargetImpl(F)->getTargetLowering();
-
bool Changed = false;
SMEAttrs FnAttrs(F);
if (FnAttrs.isNewZA() || FnAttrs.isNewZT0())
- Changed |= updateNewStateFunctions(M, &F, Builder, FnAttrs, TLI);
+ Changed |= updateNewStateFunctions(M, &F, Builder, FnAttrs);
return Changed;
}
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
index 934f68b29922..271094f935e0 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.cpp
@@ -7,9 +7,7 @@
//===----------------------------------------------------------------------===//
#include "AArch64SMEAttributes.h"
-#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/InstrTypes.h"
-#include "llvm/IR/RuntimeLibcalls.h"
#include <cassert>
using namespace llvm;
@@ -79,36 +77,19 @@ SMEAttrs::SMEAttrs(const AttributeList &Attrs) {
Bitmask |= encodeZT0State(StateValue::New);
}
-void SMEAttrs::addKnownFunctionAttrs(StringRef FuncName,
- const TargetLowering &TLI) {
- RTLIB::LibcallImpl Impl = TLI.getSupportedLibcallImpl(FuncName);
- if (Impl == RTLIB::Unsupported)
- return;
- RTLIB::Libcall LC = RTLIB::RuntimeLibcallsInfo::getLibcallFromImpl(Impl);
+void SMEAttrs::addKnownFunctionAttrs(StringRef FuncName) {
unsigned KnownAttrs = SMEAttrs::Normal;
- switch (LC) {
- case RTLIB::SMEABI_SME_STATE:
- case RTLIB::SMEABI_TPIDR2_SAVE:
- case RTLIB::SMEABI_GET_CURRENT_VG:
- case RTLIB::SMEABI_SME_STATE_SIZE:
- case RTLIB::SMEABI_SME_SAVE:
- case RTLIB::SMEABI_SME_RESTORE:
- KnownAttrs |= SMEAttrs::SM_Compatible | SMEAttrs::SME_ABI_Routine;
- break;
- case RTLIB::SMEABI_ZA_DISABLE:
- case RTLIB::SMEABI_TPIDR2_RESTORE:
+ if (FuncName == "__arm_tpidr2_save" || FuncName == "__arm_sme_state")
+ KnownAttrs |= (SMEAttrs::SM_Compatible | SMEAttrs::SME_ABI_Routine);
+ if (FuncName == "__arm_tpidr2_restore")
KnownAttrs |= SMEAttrs::SM_Compatible | encodeZAState(StateValue::In) |
SMEAttrs::SME_ABI_Routine;
- break;
- case RTLIB::SC_MEMCPY:
- case RTLIB::SC_MEMMOVE:
- case RTLIB::SC_MEMSET:
- case RTLIB::SC_MEMCHR:
+ if (FuncName == "__arm_sc_memcpy" || FuncName == "__arm_sc_memset" ||
+ FuncName == "__arm_sc_memmove" || FuncName == "__arm_sc_memchr")
KnownAttrs |= SMEAttrs::SM_Compatible;
- break;
- default:
- break;
- }
+ if (FuncName == "__arm_sme_save" || FuncName == "__arm_sme_restore" ||
+ FuncName == "__arm_sme_state_size")
+ KnownAttrs |= SMEAttrs::SM_Compatible | SMEAttrs::SME_ABI_Routine;
set(KnownAttrs);
}
@@ -129,11 +110,11 @@ bool SMECallAttrs::requiresSMChange() const {
return true;
}
-SMECallAttrs::SMECallAttrs(const CallBase &CB, const TargetLowering *TLI)
+SMECallAttrs::SMECallAttrs(const CallBase &CB)
: CallerFn(*CB.getFunction()), CalledFn(SMEAttrs::Normal),
Callsite(CB.getAttributes()), IsIndirect(CB.isIndirectCall()) {
if (auto *CalledFunction = CB.getCalledFunction())
- CalledFn = SMEAttrs(*CalledFunction, TLI);
+ CalledFn = SMEAttrs(*CalledFunction, SMEAttrs::InferAttrsFromName::Yes);
// FIXME: We probably should not allow SME attributes on direct calls but
// clang duplicates streaming mode attributes at each callsite.
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
index 06376c74025f..f1be0ecbee7e 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64SMEAttributes.h
@@ -13,8 +13,6 @@
namespace llvm {
-class TargetLowering;
-
class Function;
class CallBase;
class AttributeList;
@@ -50,17 +48,17 @@ public:
CallSiteFlags_Mask = ZT0_Undef
};
+ enum class InferAttrsFromName { No, Yes };
+
SMEAttrs() = default;
SMEAttrs(unsigned Mask) { set(Mask); }
- SMEAttrs(const Function &F, const TargetLowering *TLI = nullptr)
+ SMEAttrs(const Function &F, InferAttrsFromName Infer = InferAttrsFromName::No)
: SMEAttrs(F.getAttributes()) {
- if (TLI)
- addKnownFunctionAttrs(F.getName(), *TLI);
+ if (Infer == InferAttrsFromName::Yes)
+ addKnownFunctionAttrs(F.getName());
}
SMEAttrs(const AttributeList &L);
- SMEAttrs(StringRef FuncName, const TargetLowering &TLI) {
- addKnownFunctionAttrs(FuncName, TLI);
- };
+ SMEAttrs(StringRef FuncName) { addKnownFunctionAttrs(FuncName); };
void set(unsigned M, bool Enable = true);
@@ -148,7 +146,7 @@ public:
}
private:
- void addKnownFunctionAttrs(StringRef FuncName, const TargetLowering &TLI);
+ void addKnownFunctionAttrs(StringRef FuncName);
};
/// SMECallAttrs is a utility class to hold the SMEAttrs for a callsite. It has
@@ -165,7 +163,7 @@ public:
SMEAttrs Callsite = SMEAttrs::Normal)
: CallerFn(Caller), CalledFn(Callee), Callsite(Callsite) {}
- SMECallAttrs(const CallBase &CB, const TargetLowering *TLI);
+ SMECallAttrs(const CallBase &CB);
SMEAttrs &caller() { return CallerFn; }
SMEAttrs &callee() { return IsIndirect ? Callsite : CalledFn; }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.h b/llvm/lib/Target/AMDGPU/AMDGPU.h
index 007b481f8496..0059a862ba9b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -439,10 +439,6 @@ struct AMDGPUPrintfRuntimeBindingPass
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};
-struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
- PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
-};
-
void initializeSIOptimizeExecMaskingPreRALegacyPass(PassRegistry &);
extern char &SIOptimizeExecMaskingPreRAID;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
index b6c6d927d0e8..6ddfa386e8ac 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPassRegistry.def
@@ -29,7 +29,6 @@ MODULE_PASS("amdgpu-preload-kernel-arguments", AMDGPUPreloadKernelArgumentsPass(
MODULE_PASS("amdgpu-printf-runtime-binding", AMDGPUPrintfRuntimeBindingPass())
MODULE_PASS("amdgpu-remove-incompatible-functions", AMDGPURemoveIncompatibleFunctionsPass(*this))
MODULE_PASS("amdgpu-sw-lower-lds", AMDGPUSwLowerLDSPass(*this))
-MODULE_PASS("amdgpu-unify-metadata", AMDGPUUnifyMetadataPass())
#undef MODULE_PASS
#ifndef MODULE_PASS_WITH_PARAMS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
index 5a6ad405a026..8c56c2162112 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegBankLegalizeRules.cpp
@@ -724,10 +724,10 @@ RegBankLegalizeRules::RegBankLegalizeRules(const GCNSubtarget &_ST,
.Any({{S32}, {{}, {Vgpr32, SgprV4S32, Vgpr32, Vgpr32, Sgpr32}}});
addRulesForGOpcs({G_PTR_ADD})
- .Any({{UniP1}, {{SgprP1}, {SgprP1, Sgpr64}}})
- .Any({{DivP1}, {{VgprP1}, {VgprP1, Vgpr64}}})
- .Any({{DivP0}, {{VgprP0}, {VgprP0, Vgpr64}}})
- .Any({{UniP4}, {{SgprP4}, {SgprP4, Sgpr64}}});
+ .Any({{UniPtr32}, {{SgprPtr32}, {SgprPtr32, Sgpr32}}})
+ .Any({{DivPtr32}, {{VgprPtr32}, {VgprPtr32, Vgpr32}}})
+ .Any({{UniPtr64}, {{SgprPtr64}, {SgprPtr64, Sgpr64}}})
+ .Any({{DivPtr64}, {{VgprPtr64}, {VgprPtr64, Vgpr64}}});
addRulesForGOpcs({G_INTTOPTR})
.Any({{UniPtr32}, {{SgprPtr32}, {Sgpr32}}})
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index c1f17033d04a..e393aa198774 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -848,8 +848,6 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
if (Level == OptimizationLevel::O0)
return;
- PM.addPass(AMDGPUUnifyMetadataPass());
-
// We don't want to run internalization at per-module stage.
if (InternalizeSymbols && !isLTOPreLink(Phase)) {
PM.addPass(InternalizePass(mustPreserveGV));
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp
deleted file mode 100644
index e400491c3860..000000000000
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp
+++ /dev/null
@@ -1,119 +0,0 @@
-//===- AMDGPUUnifyMetadata.cpp - Unify OpenCL metadata --------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// \file
-// This pass that unifies multiple OpenCL metadata due to linking.
-//
-//===----------------------------------------------------------------------===//
-
-#include "AMDGPU.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/Pass.h"
-
-using namespace llvm;
-
-namespace {
-
- namespace kOCLMD {
-
- const char SpirVer[] = "opencl.spir.version";
- const char OCLVer[] = "opencl.ocl.version";
- const char UsedExt[] = "opencl.used.extensions";
- const char UsedOptCoreFeat[] = "opencl.used.optional.core.features";
- const char CompilerOptions[] = "opencl.compiler.options";
- const char LLVMIdent[] = "llvm.ident";
-
- } // end namespace kOCLMD
-
- /// Unify version metadata.
- /// \return true if changes are made.
- /// Assume the named metadata has operands each of which is a pair of
- /// integer constant, e.g.
- /// !Name = {!n1, !n2}
- /// !n1 = {i32 1, i32 2}
- /// !n2 = {i32 2, i32 0}
- /// Keep the largest version as the sole operand if PickFirst is false.
- /// Otherwise pick it from the first value, representing kernel module.
- bool unifyVersionMD(Module &M, StringRef Name, bool PickFirst) {
- auto *NamedMD = M.getNamedMetadata(Name);
- if (!NamedMD || NamedMD->getNumOperands() <= 1)
- return false;
- MDNode *MaxMD = nullptr;
- auto MaxVer = 0U;
- for (auto *VersionMD : NamedMD->operands()) {
- assert(VersionMD->getNumOperands() == 2);
- auto *CMajor = mdconst::extract<ConstantInt>(VersionMD->getOperand(0));
- auto VersionMajor = CMajor->getZExtValue();
- auto *CMinor = mdconst::extract<ConstantInt>(VersionMD->getOperand(1));
- auto VersionMinor = CMinor->getZExtValue();
- auto Ver = (VersionMajor * 100) + (VersionMinor * 10);
- if (Ver > MaxVer) {
- MaxVer = Ver;
- MaxMD = VersionMD;
- }
- if (PickFirst)
- break;
- }
- NamedMD->eraseFromParent();
- NamedMD = M.getOrInsertNamedMetadata(Name);
- NamedMD->addOperand(MaxMD);
- return true;
- }
-
- /// Unify version metadata.
- /// \return true if changes are made.
- /// Assume the named metadata has operands each of which is a list e.g.
- /// !Name = {!n1, !n2}
- /// !n1 = !{!"cl_khr_fp16", {!"cl_khr_fp64"}}
- /// !n2 = !{!"cl_khr_image"}
- /// Combine it into a single list with unique operands.
- bool unifyExtensionMD(Module &M, StringRef Name) {
- auto *NamedMD = M.getNamedMetadata(Name);
- if (!NamedMD || NamedMD->getNumOperands() == 1)
- return false;
-
- SmallVector<Metadata *, 4> All;
- for (auto *MD : NamedMD->operands())
- for (const auto &Op : MD->operands())
- if (!llvm::is_contained(All, Op.get()))
- All.push_back(Op.get());
-
- NamedMD->eraseFromParent();
- NamedMD = M.getOrInsertNamedMetadata(Name);
- for (const auto &MD : All)
- NamedMD->addOperand(MDNode::get(M.getContext(), MD));
-
- return true;
- }
-
- /// Unify multiple OpenCL metadata due to linking.
- bool unifyMetadataImpl(Module &M) {
- const char *Vers[] = {kOCLMD::SpirVer, kOCLMD::OCLVer};
- const char *Exts[] = {kOCLMD::UsedExt, kOCLMD::UsedOptCoreFeat,
- kOCLMD::CompilerOptions, kOCLMD::LLVMIdent};
-
- bool Changed = false;
-
- for (auto &I : Vers)
- Changed |= unifyVersionMD(M, I, true);
-
- for (auto &I : Exts)
- Changed |= unifyExtensionMD(M, I);
-
- return Changed;
- }
-
- } // end anonymous namespace
-
- PreservedAnalyses AMDGPUUnifyMetadataPass::run(Module &M,
- ModuleAnalysisManager &AM) {
- return unifyMetadataImpl(M) ? PreservedAnalyses::none()
- : PreservedAnalyses::all();
- }
diff --git a/llvm/lib/Target/AMDGPU/CMakeLists.txt b/llvm/lib/Target/AMDGPU/CMakeLists.txt
index c466f9cf0f35..dc9dd220130e 100644
--- a/llvm/lib/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/CMakeLists.txt
@@ -114,7 +114,6 @@ add_llvm_target(AMDGPUCodeGen
AMDGPUTargetTransformInfo.cpp
AMDGPUWaitSGPRHazards.cpp
AMDGPUUnifyDivergentExitNodes.cpp
- AMDGPUUnifyMetadata.cpp
R600MachineCFGStructurizer.cpp
GCNCreateVOPD.cpp
GCNDPPCombine.cpp
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 2d0102fffe5e..7c019031ff24 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -197,7 +197,7 @@ enum ClassFlags : unsigned {
namespace AMDGPU {
enum OperandType : unsigned {
- /// Operands with register or 32-bit immediate
+ /// Operands with register, 32-bit, or 64-bit immediate
OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET,
OPERAND_REG_IMM_INT64,
OPERAND_REG_IMM_INT16,
@@ -407,7 +407,7 @@ enum CPol {
SCAL = 1 << 11, // Scale offset bit
- ALL = TH | SCOPE,
+ ALL = TH | SCOPE | NV,
// Helper bits
TH_TYPE_LOAD = 1 << 7, // TH_LOAD policy
@@ -440,6 +440,7 @@ enum Id { // Message ID, width(4) [3:0].
ID_EARLY_PRIM_DEALLOC = 8, // added in GFX9, removed in GFX10
ID_GS_ALLOC_REQ = 9, // added in GFX9
ID_GET_DOORBELL = 10, // added in GFX9, removed in GFX11
+ ID_SAVEWAVE_HAS_TDM = 10, // added in GFX1250
ID_GET_DDID = 11, // added in GFX10, removed in GFX11
ID_SYSMSG = 15,
@@ -513,6 +514,7 @@ enum Id { // HwRegCode, (6) [5:0]
ID_HW_ID2 = 24,
ID_POPS_PACKER = 25,
ID_PERF_SNAPSHOT_DATA_gfx11 = 27,
+ ID_IB_STS2 = 28,
ID_SHADER_CYCLES = 29,
ID_SHADER_CYCLES_HI = 30,
ID_DVGPR_ALLOC_LO = 31,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 2e76225bbc54..f58fde421f77 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16894,6 +16894,11 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_,
const TargetRegisterClass *RC = nullptr;
if (Constraint.size() == 1) {
+ // Check if we cannot determine the bit size of the given value type. This
+ // can happen, for example, in this situation where we have an empty struct
+ // (size 0): `call void asm "", "v"({} poison)`-
+ if (VT == MVT::Other)
+ return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
const unsigned BitWidth = VT.getSizeInBits();
switch (Constraint[0]) {
default:
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 9278b859a806..c425d9753dd1 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2708,7 +2708,6 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
isModifierType<Src2VT>.ret,
HasOMod);
field bit HasNeg = HasModifiers;
- field bit HasMatrixReuse = 0;
field bit HasMatrixFMT = 0;
field bit HasMatrixScale = 0;
field bit HasMatrixReuse = 0;
diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index f8878f32f829..f7a9a584a6b5 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -57,6 +57,7 @@
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
#include "llvm/Target/TargetMachine.h"
using namespace llvm;
@@ -76,6 +77,7 @@ private:
LiveIntervals *LIS = nullptr;
LiveVariables *LV = nullptr;
MachineDominatorTree *MDT = nullptr;
+ MachinePostDominatorTree *PDT = nullptr;
MachineRegisterInfo *MRI = nullptr;
SetVector<MachineInstr*> LoweredEndCf;
DenseSet<Register> LoweredIf;
@@ -138,8 +140,8 @@ private:
public:
SILowerControlFlow(LiveIntervals *LIS, LiveVariables *LV,
- MachineDominatorTree *MDT)
- : LIS(LIS), LV(LV), MDT(MDT) {}
+ MachineDominatorTree *MDT, MachinePostDominatorTree *PDT)
+ : LIS(LIS), LV(LV), MDT(MDT), PDT(PDT) {}
bool run(MachineFunction &MF);
};
@@ -159,6 +161,7 @@ public:
AU.addUsedIfAvailable<LiveIntervalsWrapperPass>();
// Should preserve the same set that TwoAddressInstructions does.
AU.addPreserved<MachineDominatorTreeWrapperPass>();
+ AU.addPreserved<MachinePostDominatorTreeWrapperPass>();
AU.addPreserved<SlotIndexesWrapperPass>();
AU.addPreserved<LiveIntervalsWrapperPass>();
AU.addPreserved<LiveVariablesWrapperPass>();
@@ -506,13 +509,18 @@ MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &MI) {
MachineBasicBlock *SplitBB = &MBB;
if (NeedBlockSplit) {
SplitBB = MBB.splitAt(MI, /*UpdateLiveIns*/true, LIS);
- if (MDT && SplitBB != &MBB) {
- MachineDomTreeNode *MBBNode = (*MDT)[&MBB];
- SmallVector<MachineDomTreeNode *> Children(MBBNode->begin(),
- MBBNode->end());
- MachineDomTreeNode *SplitBBNode = MDT->addNewBlock(SplitBB, &MBB);
- for (MachineDomTreeNode *Child : Children)
- MDT->changeImmediateDominator(Child, SplitBBNode);
+ if (SplitBB != &MBB && (MDT || PDT)) {
+ using DomTreeT = DomTreeBase<MachineBasicBlock>;
+ SmallVector<DomTreeT::UpdateType, 16> DTUpdates;
+ for (MachineBasicBlock *Succ : SplitBB->successors()) {
+ DTUpdates.push_back({DomTreeT::Insert, SplitBB, Succ});
+ DTUpdates.push_back({DomTreeT::Delete, &MBB, Succ});
+ }
+ DTUpdates.push_back({DomTreeT::Insert, &MBB, SplitBB});
+ if (MDT)
+ MDT->applyUpdates(DTUpdates);
+ if (PDT)
+ PDT->applyUpdates(DTUpdates);
}
Opcode = OrTermrOpc;
InsPt = MI;
@@ -727,26 +735,27 @@ bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) {
MachineBasicBlock *Succ = *MBB.succ_begin();
MachineBasicBlock *FallThrough = nullptr;
+ using DomTreeT = DomTreeBase<MachineBasicBlock>;
+ SmallVector<DomTreeT::UpdateType, 8> DTUpdates;
+
while (!MBB.predecessors().empty()) {
MachineBasicBlock *P = *MBB.pred_begin();
if (P->getFallThrough(false) == &MBB)
FallThrough = P;
P->ReplaceUsesOfBlockWith(&MBB, Succ);
+ DTUpdates.push_back({DomTreeT::Insert, P, Succ});
+ DTUpdates.push_back({DomTreeT::Delete, P, &MBB});
}
MBB.removeSuccessor(Succ);
if (LIS) {
for (auto &I : MBB.instrs())
LIS->RemoveMachineInstrFromMaps(I);
}
- if (MDT) {
- // If Succ, the single successor of MBB, is dominated by MBB, MDT needs
- // updating by changing Succ's idom to the one of MBB; otherwise, MBB must
- // be a leaf node in MDT and could be erased directly.
- if (MDT->dominates(&MBB, Succ))
- MDT->changeImmediateDominator(MDT->getNode(Succ),
- MDT->getNode(&MBB)->getIDom());
- MDT->eraseNode(&MBB);
- }
+ if (MDT)
+ MDT->applyUpdates(DTUpdates);
+ if (PDT)
+ PDT->applyUpdates(DTUpdates);
+
MBB.clear();
MBB.eraseFromParent();
if (FallThrough && !FallThrough->isLayoutSuccessor(Succ)) {
@@ -875,7 +884,11 @@ bool SILowerControlFlowLegacy::runOnMachineFunction(MachineFunction &MF) {
LiveVariables *LV = LVWrapper ? &LVWrapper->getLV() : nullptr;
auto *MDTWrapper = getAnalysisIfAvailable<MachineDominatorTreeWrapperPass>();
MachineDominatorTree *MDT = MDTWrapper ? &MDTWrapper->getDomTree() : nullptr;
- return SILowerControlFlow(LIS, LV, MDT).run(MF);
+ auto *PDTWrapper =
+ getAnalysisIfAvailable<MachinePostDominatorTreeWrapperPass>();
+ MachinePostDominatorTree *PDT =
+ PDTWrapper ? &PDTWrapper->getPostDomTree() : nullptr;
+ return SILowerControlFlow(LIS, LV, MDT, PDT).run(MF);
}
PreservedAnalyses
@@ -885,13 +898,16 @@ SILowerControlFlowPass::run(MachineFunction &MF,
LiveVariables *LV = MFAM.getCachedResult<LiveVariablesAnalysis>(MF);
MachineDominatorTree *MDT =
MFAM.getCachedResult<MachineDominatorTreeAnalysis>(MF);
+ MachinePostDominatorTree *PDT =
+ MFAM.getCachedResult<MachinePostDominatorTreeAnalysis>(MF);
- bool Changed = SILowerControlFlow(LIS, LV, MDT).run(MF);
+ bool Changed = SILowerControlFlow(LIS, LV, MDT, PDT).run(MF);
if (!Changed)
return PreservedAnalyses::all();
auto PA = getMachineFunctionPassPreservedAnalyses();
PA.preserve<MachineDominatorTreeAnalysis>();
+ PA.preserve<MachinePostDominatorTreeAnalysis>();
PA.preserve<SlotIndexesAnalysis>();
PA.preserve<LiveIntervalsAnalysis>();
PA.preserve<LiveVariablesAnalysis>();
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index c2f4dbfa247d..a003a46191a8 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -1665,7 +1665,9 @@ def S_WAITCNT_lds_direct : SPseudoInstSI<(outs), (ins)> {
def S_SETHALT : SOPP_Pseudo <"s_sethalt" , (ins i32imm:$simm16), "$simm16",
[(int_amdgcn_s_sethalt timm:$simm16)]>;
-def S_SETKILL : SOPP_Pseudo <"s_setkill" , (ins i16imm:$simm16), "$simm16">;
+def S_SETKILL : SOPP_Pseudo <"s_setkill" , (ins i16imm:$simm16), "$simm16"> {
+ let SubtargetPredicate = isNotGFX1250Plus;
+}
// On SI the documentation says sleep for approximately 64 * low 2
// bits, consistent with the reported maximum of 448. On VI the
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index 3d9455fc51a3..c740b5e0f09d 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -106,7 +106,7 @@ static constexpr CustomOperand MsgOperands[] = {
{{"MSG_GET_DDID"}, ID_GET_DDID, isGFX10},
{{"MSG_HS_TESSFACTOR"}, ID_HS_TESSFACTOR_GFX11Plus, isGFX11Plus},
{{"MSG_DEALLOC_VGPRS"}, ID_DEALLOC_VGPRS_GFX11Plus, isGFX11Plus},
- {{""}},
+ {{"MSG_SAVEWAVE_HAS_TDM"}, ID_SAVEWAVE_HAS_TDM, isGFX1250},
{{"MSG_SYSMSG"}, ID_SYSMSG},
{{"MSG_RTN_GET_DOORBELL"}, ID_RTN_GET_DOORBELL, isGFX11Plus},
{{"MSG_RTN_GET_DDID"}, ID_RTN_GET_DDID, isGFX11Plus},
@@ -195,7 +195,7 @@ static constexpr CustomOperand Operands[] = {
{{"HW_REG_POPS_PACKER"}, ID_POPS_PACKER, isGFX10},
{{""}},
{{"HW_REG_PERF_SNAPSHOT_DATA"}, ID_PERF_SNAPSHOT_DATA_gfx11, isGFX11},
- {{""}},
+ {{"HW_REG_IB_STS2"}, ID_IB_STS2, isGFX1250},
{{"HW_REG_SHADER_CYCLES"}, ID_SHADER_CYCLES, isGFX10_3_GFX11},
{{"HW_REG_SHADER_CYCLES_HI"}, ID_SHADER_CYCLES_HI, isGFX12Plus},
{{"HW_REG_DVGPR_ALLOC_LO"}, ID_DVGPR_ALLOC_LO, isGFX12Plus},
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index d386c917a256..8ea567cfb9d3 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -587,167 +587,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
}
}
- // RTLIB
- if (TM.isAAPCS_ABI() && (TT.isTargetAEABI() || TT.isTargetGNUAEABI() ||
- TT.isTargetMuslAEABI() || TT.isAndroid())) {
- // FIXME: This does not depend on the subtarget and should go directly into
- // RuntimeLibcalls. This is only here because of missing support for setting
- // the calling convention of an implementation.
- // clang-format off
- static const struct {
- const RTLIB::Libcall Op;
- const RTLIB::LibcallImpl Impl;
- } LibraryCalls[] = {
- // Double-precision floating-point arithmetic helper functions
- // RTABI chapter 4.1.2, Table 2
- { RTLIB::ADD_F64, RTLIB::__aeabi_dadd },
- { RTLIB::DIV_F64, RTLIB::__aeabi_ddiv },
- { RTLIB::MUL_F64, RTLIB::__aeabi_dmul },
- { RTLIB::SUB_F64, RTLIB::__aeabi_dsub },
-
- // Double-precision floating-point comparison helper functions
- // RTABI chapter 4.1.2, Table 3
- { RTLIB::OEQ_F64, RTLIB::__aeabi_dcmpeq__oeq },
- { RTLIB::UNE_F64, RTLIB::__aeabi_dcmpeq__une },
- { RTLIB::OLT_F64, RTLIB::__aeabi_dcmplt },
- { RTLIB::OLE_F64, RTLIB::__aeabi_dcmple },
- { RTLIB::OGE_F64, RTLIB::__aeabi_dcmpge },
- { RTLIB::OGT_F64, RTLIB::__aeabi_dcmpgt },
- { RTLIB::UO_F64, RTLIB::__aeabi_dcmpun },
-
- // Single-precision floating-point arithmetic helper functions
- // RTABI chapter 4.1.2, Table 4
- { RTLIB::ADD_F32, RTLIB::__aeabi_fadd },
- { RTLIB::DIV_F32, RTLIB::__aeabi_fdiv },
- { RTLIB::MUL_F32, RTLIB::__aeabi_fmul },
- { RTLIB::SUB_F32, RTLIB::__aeabi_fsub },
-
- // Single-precision floating-point comparison helper functions
- // RTABI chapter 4.1.2, Table 5
- { RTLIB::OEQ_F32, RTLIB::__aeabi_fcmpeq__oeq },
- { RTLIB::UNE_F32, RTLIB::__aeabi_fcmpeq__une },
- { RTLIB::OLT_F32, RTLIB::__aeabi_fcmplt},
- { RTLIB::OLE_F32, RTLIB::__aeabi_fcmple },
- { RTLIB::OGE_F32, RTLIB::__aeabi_fcmpge },
- { RTLIB::OGT_F32, RTLIB::__aeabi_fcmpgt },
- { RTLIB::UO_F32, RTLIB::__aeabi_fcmpun },
-
- // Floating-point to integer conversions.
- // RTABI chapter 4.1.2, Table 6
- { RTLIB::FPTOSINT_F64_I32, RTLIB::__aeabi_d2iz },
- { RTLIB::FPTOUINT_F64_I32, RTLIB::__aeabi_d2uiz },
- { RTLIB::FPTOSINT_F64_I64, RTLIB::__aeabi_d2lz },
- { RTLIB::FPTOUINT_F64_I64, RTLIB::__aeabi_d2ulz },
- { RTLIB::FPTOSINT_F32_I32, RTLIB::__aeabi_f2iz },
- { RTLIB::FPTOUINT_F32_I32, RTLIB::__aeabi_f2uiz },
- { RTLIB::FPTOSINT_F32_I64, RTLIB::__aeabi_f2lz },
- { RTLIB::FPTOUINT_F32_I64, RTLIB::__aeabi_f2ulz },
-
- // Conversions between floating types.
- // RTABI chapter 4.1.2, Table 7
- { RTLIB::FPROUND_F64_F32, RTLIB::__aeabi_d2f },
- { RTLIB::FPROUND_F64_F16, RTLIB::__aeabi_d2h },
- { RTLIB::FPEXT_F32_F64, RTLIB::__aeabi_f2d },
-
- // Integer to floating-point conversions.
- // RTABI chapter 4.1.2, Table 8
- { RTLIB::SINTTOFP_I32_F64, RTLIB::__aeabi_i2d },
- { RTLIB::UINTTOFP_I32_F64, RTLIB::__aeabi_ui2d },
- { RTLIB::SINTTOFP_I64_F64, RTLIB::__aeabi_l2d },
- { RTLIB::UINTTOFP_I64_F64, RTLIB::__aeabi_ul2d },
- { RTLIB::SINTTOFP_I32_F32, RTLIB::__aeabi_i2f },
- { RTLIB::UINTTOFP_I32_F32, RTLIB::__aeabi_ui2f },
- { RTLIB::SINTTOFP_I64_F32, RTLIB::__aeabi_l2f },
- { RTLIB::UINTTOFP_I64_F32, RTLIB::__aeabi_ul2f },
-
- // Long long helper functions
- // RTABI chapter 4.2, Table 9
- { RTLIB::MUL_I64, RTLIB::__aeabi_lmul },
- { RTLIB::SHL_I64, RTLIB::__aeabi_llsl },
- { RTLIB::SRL_I64, RTLIB::__aeabi_llsr },
- { RTLIB::SRA_I64, RTLIB::__aeabi_lasr },
-
- // Integer division functions
- // RTABI chapter 4.3.1
- { RTLIB::SDIV_I32, RTLIB::__aeabi_idiv },
- { RTLIB::SDIV_I64, RTLIB::__aeabi_ldivmod },
- { RTLIB::UDIV_I32, RTLIB::__aeabi_uidiv },
- { RTLIB::UDIV_I64, RTLIB::__aeabi_uldivmod },
- };
- // clang-format on
-
- for (const auto &LC : LibraryCalls)
- setLibcallImpl(LC.Op, LC.Impl);
-
- // EABI dependent RTLIB
- if (TM.Options.EABIVersion == EABI::EABI4 ||
- TM.Options.EABIVersion == EABI::EABI5) {
- static const struct {
- const RTLIB::Libcall Op;
- const RTLIB::LibcallImpl Impl;
- } MemOpsLibraryCalls[] = {
- // Memory operations
- // RTABI chapter 4.3.4
- {RTLIB::MEMCPY, RTLIB::__aeabi_memcpy},
- {RTLIB::MEMMOVE, RTLIB::__aeabi_memmove},
- {RTLIB::MEMSET, RTLIB::__aeabi_memset},
- {RTLIB::AEABI_MEMCPY4, RTLIB::__aeabi_memcpy4},
- {RTLIB::AEABI_MEMCPY8, RTLIB::__aeabi_memcpy8},
- {RTLIB::AEABI_MEMMOVE4, RTLIB::__aeabi_memmove4},
- {RTLIB::AEABI_MEMMOVE8, RTLIB::__aeabi_memmove8},
- {RTLIB::AEABI_MEMSET4, RTLIB::__aeabi_memset4},
- {RTLIB::AEABI_MEMSET8, RTLIB::__aeabi_memset8},
- {RTLIB::AEABI_MEMCLR, RTLIB::__aeabi_memclr},
- {RTLIB::AEABI_MEMCLR4, RTLIB::__aeabi_memclr4},
- {RTLIB::AEABI_MEMCLR8, RTLIB::__aeabi_memclr8},
- };
-
- for (const auto &LC : MemOpsLibraryCalls)
- setLibcallImpl(LC.Op, LC.Impl);
- }
- }
-
- // The half <-> float conversion functions are always soft-float on
- // non-watchos platforms, but are needed for some targets which use a
- // hard-float calling convention by default.
- if (!TT.isWatchABI()) {
- if (TM.isAAPCS_ABI()) {
- setLibcallImplCallingConv(RTLIB::__truncsfhf2, CallingConv::ARM_AAPCS);
- setLibcallImplCallingConv(RTLIB::__truncdfhf2, CallingConv::ARM_AAPCS);
- setLibcallImplCallingConv(RTLIB::__extendhfsf2, CallingConv::ARM_AAPCS);
- setLibcallImplCallingConv(RTLIB::__gnu_h2f_ieee, CallingConv::ARM_AAPCS);
- setLibcallImplCallingConv(RTLIB::__gnu_f2h_ieee, CallingConv::ARM_AAPCS);
- } else {
- setLibcallImplCallingConv(RTLIB::__truncsfhf2, CallingConv::ARM_APCS);
- setLibcallImplCallingConv(RTLIB::__truncdfhf2, CallingConv::ARM_APCS);
- setLibcallImplCallingConv(RTLIB::__extendhfsf2, CallingConv::ARM_APCS);
- setLibcallImplCallingConv(RTLIB::__gnu_h2f_ieee, CallingConv::ARM_APCS);
- setLibcallImplCallingConv(RTLIB::__gnu_f2h_ieee, CallingConv::ARM_APCS);
- }
- }
-
- // In EABI, these functions have an __aeabi_ prefix, but in GNUEABI they have
- // a __gnu_ prefix (which is the default).
- if (TT.isTargetAEABI()) {
- // FIXME: This does not depend on the subtarget and should go directly into
- // RuntimeLibcalls. This is only here because of missing support for setting
- // the calling convention of an implementation.
- static const struct {
- const RTLIB::Libcall Op;
- const RTLIB::LibcallImpl Impl;
- } LibraryCalls[] = {
- {RTLIB::FPROUND_F32_F16, RTLIB::__aeabi_f2h},
- {RTLIB::FPEXT_F16_F32, RTLIB::__aeabi_h2f},
- };
-
- for (const auto &LC : LibraryCalls) {
- setLibcallImpl(LC.Op, LC.Impl);
- }
- } else if (!TT.isOSBinFormatMachO()) {
- setLibcallImpl(RTLIB::FPROUND_F32_F16, RTLIB::__gnu_f2h_ieee);
- setLibcallImpl(RTLIB::FPEXT_F16_F32, RTLIB::__gnu_h2f_ieee);
- }
-
if (Subtarget->isThumb1Only())
addRegisterClass(MVT::i32, &ARM::tGPRRegClass);
else
@@ -7406,7 +7245,7 @@ static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
return false;
unsigned NumElts = VT.getVectorNumElements();
- if (M.size() != NumElts && M.size() != NumElts*2)
+ if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
return false;
// If the mask is twice as long as the input vector then we need to check the
@@ -7438,7 +7277,7 @@ static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
return false;
unsigned NumElts = VT.getVectorNumElements();
- if (M.size() != NumElts && M.size() != NumElts*2)
+ if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
@@ -7541,7 +7380,7 @@ static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
return false;
unsigned NumElts = VT.getVectorNumElements();
- if (M.size() != NumElts && M.size() != NumElts*2)
+ if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
@@ -7574,7 +7413,7 @@ static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
return false;
unsigned NumElts = VT.getVectorNumElements();
- if (M.size() != NumElts && M.size() != NumElts*2)
+ if ((M.size() != NumElts && M.size() != NumElts * 2) || NumElts % 2 != 0)
return false;
for (unsigned i = 0; i < M.size(); i += NumElts) {
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 393cf2d97380..6b2854171c81 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -1084,9 +1084,10 @@ InstructionCost ARMTTIImpl::getCmpSelInstrCost(
CostKind, Op1Info, Op2Info, I);
}
-InstructionCost ARMTTIImpl::getAddressComputationCost(Type *PtrTy,
- ScalarEvolution *SE,
- const SCEV *Ptr) const {
+InstructionCost
+ARMTTIImpl::getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
+ const SCEV *Ptr,
+ TTI::TargetCostKind CostKind) const {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
@@ -1103,7 +1104,7 @@ InstructionCost ARMTTIImpl::getAddressComputationCost(Type *PtrTy,
// addressing mode.
return 1;
}
- return BaseT::getAddressComputationCost(PtrTy, SE, Ptr);
+ return BaseT::getAddressComputationCost(PtrTy, SE, Ptr, CostKind);
}
bool ARMTTIImpl::isProfitableLSRChainElement(Instruction *I) const {
@@ -1335,6 +1336,39 @@ InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
if (!Mask.empty()) {
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(SrcTy);
+ // Check for LD2/LD4 instructions, which are represented in llvm IR as
+ // deinterleaving-shuffle(load). The shuffle cost could potentially be
+ // free, but we model it with a cost of LT.first so that LD2/LD4 have a
+ // higher cost than just the load.
+ if (Args.size() >= 1 && isa<LoadInst>(Args[0]) &&
+ (LT.second.getScalarSizeInBits() == 8 ||
+ LT.second.getScalarSizeInBits() == 16 ||
+ LT.second.getScalarSizeInBits() == 32) &&
+ LT.second.getSizeInBits() == 128 &&
+ ((TLI->getMaxSupportedInterleaveFactor() >= 2 &&
+ ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, 2)) ||
+ (TLI->getMaxSupportedInterleaveFactor() == 4 &&
+ ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, 4))))
+ return ST->getMVEVectorCostFactor(CostKind) *
+ std::max<InstructionCost>(1, LT.first / 4);
+
+ // Check for ST2/ST4 instructions, which are represented in llvm IR as
+ // store(interleaving-shuffle). The shuffle cost could potentially be
+ // free, but we model it with a cost of LT.first so that ST2/ST4 have a
+ // higher cost than just the store.
+ if (CxtI && CxtI->hasOneUse() && isa<StoreInst>(*CxtI->user_begin()) &&
+ (LT.second.getScalarSizeInBits() == 8 ||
+ LT.second.getScalarSizeInBits() == 16 ||
+ LT.second.getScalarSizeInBits() == 32) &&
+ LT.second.getSizeInBits() == 128 &&
+ ((TLI->getMaxSupportedInterleaveFactor() >= 2 &&
+ ShuffleVectorInst::isInterleaveMask(
+ Mask, 2, SrcTy->getElementCount().getKnownMinValue() * 2)) ||
+ (TLI->getMaxSupportedInterleaveFactor() == 4 &&
+ ShuffleVectorInst::isInterleaveMask(
+ Mask, 4, SrcTy->getElementCount().getKnownMinValue() * 2))))
+ return ST->getMVEVectorCostFactor(CostKind) * LT.first;
+
if (LT.second.isVector() &&
Mask.size() <= LT.second.getVectorNumElements() &&
(isVREVMask(Mask, LT.second, 16) || isVREVMask(Mask, LT.second, 32) ||
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 522c235a90a8..cdd8bcb9f741 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -257,8 +257,9 @@ public:
unsigned Index, const Value *Op0,
const Value *Op1) const override;
- InstructionCost getAddressComputationCost(Type *Val, ScalarEvolution *SE,
- const SCEV *Ptr) const override;
+ InstructionCost
+ getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr,
+ TTI::TargetCostKind CostKind) const override;
InstructionCost getArithmeticInstrCost(
unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index ece6c10e828d..0e974838a7c6 100644
--- a/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -3373,12 +3373,12 @@ public:
void addMSRMaskOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::createImm(unsigned(getMSRMask())));
+ Inst.addOperand(MCOperand::createImm(getMSRMask()));
}
void addBankedRegOperands(MCInst &Inst, unsigned N) const {
assert(N == 1 && "Invalid number of operands!");
- Inst.addOperand(MCOperand::createImm(unsigned(getBankedReg())));
+ Inst.addOperand(MCOperand::createImm(getBankedReg()));
}
void addProcIFlagsOperands(MCInst &Inst, unsigned N) const {
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index 5c212816fbdb..171e2949366a 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -156,9 +156,10 @@ HexagonTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}
-InstructionCost HexagonTTIImpl::getAddressComputationCost(Type *PtrTy,
- ScalarEvolution *SE,
- const SCEV *S) const {
+InstructionCost
+HexagonTTIImpl::getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
+ const SCEV *S,
+ TTI::TargetCostKind CostKind) const {
return 0;
}
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index 0a5766d1dadf..dbf16c99c314 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -111,8 +111,9 @@ public:
InstructionCost
getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) const override;
- InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
- const SCEV *S) const override;
+ InstructionCost
+ getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *S,
+ TTI::TargetCostKind CostKind) const override;
InstructionCost getMemoryOpCost(
unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 14472419a10f..a2a41d0062ff 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -2786,7 +2786,7 @@ SDValue LoongArchTargetLowering::lowerUINT_TO_FP(SDValue Op,
EVT RetVT = Op.getValueType();
RTLIB::Libcall LC = RTLIB::getUINTTOFP(OpVT, RetVT);
MakeLibCallOptions CallOptions;
- CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
+ CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
SDValue Chain = SDValue();
SDValue Result;
std::tie(Result, Chain) =
@@ -2811,7 +2811,7 @@ SDValue LoongArchTargetLowering::lowerSINT_TO_FP(SDValue Op,
EVT RetVT = Op.getValueType();
RTLIB::Libcall LC = RTLIB::getSINTTOFP(OpVT, RetVT);
MakeLibCallOptions CallOptions;
- CallOptions.setTypeListBeforeSoften(OpVT, RetVT, true);
+ CallOptions.setTypeListBeforeSoften(OpVT, RetVT);
SDValue Chain = SDValue();
SDValue Result;
std::tie(Result, Chain) =
@@ -4107,7 +4107,7 @@ void LoongArchTargetLowering::ReplaceNodeResults(
LC = RTLIB::getFPTOSINT(Src.getValueType(), VT);
MakeLibCallOptions CallOptions;
EVT OpVT = Src.getValueType();
- CallOptions.setTypeListBeforeSoften(OpVT, VT, true);
+ CallOptions.setTypeListBeforeSoften(OpVT, VT);
SDValue Chain = SDValue();
SDValue Result;
std::tie(Result, Chain) =
@@ -4360,7 +4360,7 @@ void LoongArchTargetLowering::ReplaceNodeResults(
RTLIB::Libcall LC =
OpVT == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
MakeLibCallOptions CallOptions;
- CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
+ CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
Results.push_back(Result);
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index d8bb16fe9b94..0696b11d62ac 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1640,6 +1640,24 @@ defm : PairInsertExtractPatV8<v8f32, f32>;
defm : PairInsertExtractPatV4<v4i64, GRLenVT>;
defm : PairInsertExtractPatV4<v4f64, f64>;
+def : Pat<(vector_insert v8i32:$xd, (GRLenVT(vector_extract v8i32:$xj, 0)),
+ uimm3:$imm),
+ (XVINSVE0_W v8i32:$xd, v8i32:$xj, uimm3:$imm)>;
+
+def : Pat<(vector_insert v4i64:$xd, (GRLenVT(vector_extract v4i64:$xj, 0)),
+ uimm2:$imm),
+ (XVINSVE0_D v4i64:$xd, v4i64:$xj, uimm2:$imm)>;
+
+def : Pat<(vector_insert v8i32:$xd,
+ (GRLenVT(vector_extract v8i32:$xj, uimm3:$imm1)), uimm3:$imm2),
+ (XVINSVE0_W v8i32:$xd, (XVPICKVE_W v8i32:$xj, uimm3:$imm1),
+ uimm3:$imm2)>;
+
+def : Pat<(vector_insert v4i64:$xd,
+ (GRLenVT(vector_extract v4i64:$xj, uimm2:$imm1)), uimm2:$imm2),
+ (XVINSVE0_D v4i64:$xd, (XVPICKVE_D v4i64:$xj, uimm2:$imm1),
+ uimm2:$imm2)>;
+
// PseudoXVINSGR2VR_{B/H}
def : Pat<(vector_insert v32i8:$xd, GRLenVT:$rj, uimm5:$imm),
(PseudoXVINSGR2VR_B v32i8:$xd, GRLenVT:$rj, uimm5:$imm)>;
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 18aeda6a7935..2445005bf98c 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -70,7 +70,7 @@ NVPTXDAGToDAGISel::getDivF32Level(const SDNode *N) const {
}
bool NVPTXDAGToDAGISel::usePrecSqrtF32(const SDNode *N) const {
- return Subtarget->getTargetLowering()->usePrecSqrtF32(*MF, N);
+ return Subtarget->getTargetLowering()->usePrecSqrtF32(N);
}
bool NVPTXDAGToDAGISel::useF32FTZ() const {
@@ -82,11 +82,6 @@ bool NVPTXDAGToDAGISel::allowFMA() const {
return TL->allowFMA(*MF, OptLevel);
}
-bool NVPTXDAGToDAGISel::allowUnsafeFPMath() const {
- const NVPTXTargetLowering *TL = Subtarget->getTargetLowering();
- return TL->allowUnsafeFPMath(*MF);
-}
-
bool NVPTXDAGToDAGISel::doRsqrtOpt() const { return EnableRsqrtOpt; }
/// Select - Select instructions not customized! Used for
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 357e915fd077..65731722f534 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -44,7 +44,6 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
bool usePrecSqrtF32(const SDNode *N) const;
bool useF32FTZ() const;
bool allowFMA() const;
- bool allowUnsafeFPMath() const;
bool doRsqrtOpt() const;
NVPTXScopes Scopes{};
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 3daf25d55152..b94cbd0bd9c1 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -125,10 +125,6 @@ NVPTXTargetLowering::getDivF32Level(const MachineFunction &MF,
if (UsePrecDivF32.getNumOccurrences() > 0)
return UsePrecDivF32;
- // Otherwise, use div.approx if fast math is enabled
- if (allowUnsafeFPMath(MF))
- return NVPTX::DivPrecisionLevel::Approx;
-
const SDNodeFlags Flags = N.getFlags();
if (Flags.hasApproximateFuncs())
return NVPTX::DivPrecisionLevel::Approx;
@@ -136,16 +132,11 @@ NVPTXTargetLowering::getDivF32Level(const MachineFunction &MF,
return NVPTX::DivPrecisionLevel::IEEE754;
}
-bool NVPTXTargetLowering::usePrecSqrtF32(const MachineFunction &MF,
- const SDNode *N) const {
+bool NVPTXTargetLowering::usePrecSqrtF32(const SDNode *N) const {
// If nvptx-prec-sqrtf32 is used on the command-line, always honor it
if (UsePrecSqrtF32.getNumOccurrences() > 0)
return UsePrecSqrtF32;
- // Otherwise, use sqrt.approx if fast math is enabled
- if (allowUnsafeFPMath(MF))
- return false;
-
if (N) {
const SDNodeFlags Flags = N->getFlags();
if (Flags.hasApproximateFuncs())
@@ -1193,8 +1184,7 @@ SDValue NVPTXTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
bool &UseOneConst,
bool Reciprocal) const {
if (!(Enabled == ReciprocalEstimate::Enabled ||
- (Enabled == ReciprocalEstimate::Unspecified &&
- !usePrecSqrtF32(DAG.getMachineFunction()))))
+ (Enabled == ReciprocalEstimate::Unspecified && !usePrecSqrtF32())))
return SDValue();
if (ExtraSteps == ReciprocalEstimate::Unspecified)
@@ -2851,8 +2841,7 @@ static SDValue lowerROT(SDValue Op, SelectionDAG &DAG) {
SDLoc(Op), Opcode, DAG);
}
-static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG,
- bool AllowUnsafeFPMath) {
+static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG) {
// Lower (frem x, y) into (sub x, (mul (ftrunc (div x, y)) y)),
// i.e. "poor man's fmod()". When y is infinite, x is returned. This matches
// the semantics of LLVM's frem.
@@ -2869,7 +2858,7 @@ static SDValue lowerFREM(SDValue Op, SelectionDAG &DAG,
SDValue Sub = DAG.getNode(ISD::FSUB, DL, Ty, X, Mul,
Flags | SDNodeFlags::AllowContract);
- if (AllowUnsafeFPMath || Flags.hasNoInfs())
+ if (Flags.hasNoInfs())
return Sub;
// If Y is infinite, return X
@@ -3014,7 +3003,7 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::CTLZ:
return lowerCTLZCTPOP(Op, DAG);
case ISD::FREM:
- return lowerFREM(Op, DAG, allowUnsafeFPMath(DAG.getMachineFunction()));
+ return lowerFREM(Op, DAG);
default:
llvm_unreachable("Custom lowering not defined for operation");
@@ -4868,17 +4857,7 @@ bool NVPTXTargetLowering::allowFMA(MachineFunction &MF,
if (MF.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast)
return true;
- return allowUnsafeFPMath(MF);
-}
-
-bool NVPTXTargetLowering::allowUnsafeFPMath(const MachineFunction &MF) const {
- // Honor TargetOptions flags that explicitly say unsafe math is okay.
- if (MF.getTarget().Options.UnsafeFPMath)
- return true;
-
- // Allow unsafe math if unsafe-fp-math attribute explicitly says so.
- const Function &F = MF.getFunction();
- return F.getFnAttribute("unsafe-fp-math").getValueAsBool();
+ return false;
}
static bool isConstZero(const SDValue &Operand) {
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 43e721a9c2a4..27f099e22097 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -206,8 +206,7 @@ public:
// Get whether we should use a precise or approximate 32-bit floating point
// sqrt instruction.
- bool usePrecSqrtF32(const MachineFunction &MF,
- const SDNode *N = nullptr) const;
+ bool usePrecSqrtF32(const SDNode *N = nullptr) const;
// Get whether we should use instructions that flush floating-point denormals
// to sign-preserving zero.
@@ -220,7 +219,6 @@ public:
unsigned combineRepeatedFPDivisors() const override { return 2; }
bool allowFMA(MachineFunction &MF, CodeGenOptLevel OptLevel) const;
- bool allowUnsafeFPMath(const MachineFunction &MF) const;
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
EVT) const override {
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index bd54d1db9156..ebb5e32f5e6f 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -1133,9 +1133,8 @@ defm FMA_F64 : FMA<F64RT, allow_ftz = false>;
// sin/cos/tanh
class UnaryOpAllowsApproxFn<SDPatternOperator operator>
- : PatFrag<(ops node:$A),
- (operator node:$A), [{
- return allowUnsafeFPMath() || N->getFlags().hasApproximateFuncs();
+ : PatFrag<(ops node:$A), (operator node:$A), [{
+ return N->getFlags().hasApproximateFuncs();
}]>;
def SIN_APPROX_f32 :
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 599865312920..9e1530a2d00f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -18,6 +18,7 @@
#include "RISCVInstrInfo.h"
#include "RISCVSelectionDAGInfo.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/SDPatternMatch.h"
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Support/Debug.h"
@@ -772,6 +773,49 @@ bool RISCVDAGToDAGISel::trySignedBitfieldInsertInSign(SDNode *Node) {
return false;
}
+// (xor X, (and (xor X, C1), C2))
+// -> (qc.insbi X, (C1 >> ShAmt), Width, ShAmt)
+// where C2 is a shifted mask with width=Width and shift=ShAmt
+bool RISCVDAGToDAGISel::tryBitfieldInsertOpFromXor(SDNode *Node) {
+
+ if (!Subtarget->hasVendorXqcibm())
+ return false;
+
+ using namespace SDPatternMatch;
+
+ SDValue X;
+ APInt CImm, CMask;
+ if (!sd_match(
+ Node,
+ m_Xor(m_Value(X),
+ m_OneUse(m_And(m_OneUse(m_Xor(m_Deferred(X), m_ConstInt(CImm))),
+ m_ConstInt(CMask))))))
+ return false;
+
+ unsigned Width, ShAmt;
+ if (!CMask.isShiftedMask(ShAmt, Width))
+ return false;
+
+ int64_t Imm = CImm.getSExtValue();
+ Imm >>= ShAmt;
+
+ SDLoc DL(Node);
+ SDValue ImmNode;
+ auto Opc = RISCV::QC_INSB;
+
+ if (isInt<5>(Imm)) {
+ Opc = RISCV::QC_INSBI;
+ ImmNode = CurDAG->getSignedTargetConstant(Imm, DL, MVT::i32);
+ } else {
+ ImmNode = selectImm(CurDAG, DL, MVT::i32, Imm, *Subtarget);
+ }
+ SDValue Ops[] = {X, ImmNode, CurDAG->getTargetConstant(Width, DL, MVT::i32),
+ CurDAG->getTargetConstant(ShAmt, DL, MVT::i32)};
+ ReplaceNode(Node, CurDAG->getMachineNode(Opc, DL, MVT::i32, Ops));
+
+ return true;
+}
+
bool RISCVDAGToDAGISel::tryUnsignedBitfieldExtract(SDNode *Node,
const SDLoc &DL, MVT VT,
SDValue X, unsigned Msb,
@@ -1349,6 +1393,9 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (tryShrinkShlLogicImm(Node))
return;
+ if (tryBitfieldInsertOpFromXor(Node))
+ return;
+
break;
case ISD::AND: {
auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index ee3a86e25add..9d4cd0e6e339 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -75,6 +75,7 @@ public:
bool trySignedBitfieldExtract(SDNode *Node);
bool trySignedBitfieldInsertInSign(SDNode *Node);
bool trySignedBitfieldInsertInMask(SDNode *Node);
+ bool tryBitfieldInsertOpFromXor(SDNode *Node);
bool tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT,
SDValue X, unsigned Msb, unsigned Lsb);
bool tryUnsignedBitfieldInsertInZero(SDNode *Node, const SDLoc &DL, MVT VT,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 8bc42ad8758c..4f52f68d35aa 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -14333,7 +14333,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
MakeLibCallOptions CallOptions;
EVT OpVT = Op0.getValueType();
- CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
+ CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0));
SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
SDValue Result;
std::tie(Result, Chain) =
@@ -14368,7 +14368,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Op0.getValueType() == MVT::f64 ? RTLIB::LROUND_F64 : RTLIB::LROUND_F32;
MakeLibCallOptions CallOptions;
EVT OpVT = Op0.getValueType();
- CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64, true);
+ CallOptions.setTypeListBeforeSoften(OpVT, MVT::i64);
SDValue Result = makeLibCall(DAG, LC, MVT::i64, Op0, CallOptions, DL).first;
Result = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Result);
Results.push_back(Result);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 8bd383033f11..2a34a24a6ae2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1694,6 +1694,20 @@ multiclass SelectCC_GPR_riirr<DAGOperand valty, DAGOperand imm> {
valty:$truev, valty:$falsev), []>;
}
+let Predicates = [IsRV32] in {
+def : Pat<(i32 (seteq (i32 (and GPR:$rs1, 0xffffffff80000000)), 0)),
+ (XORI (i32 (SRLI GPR:$rs1, 31)), 1)>;
+def : Pat<(i32 (setlt (i32 GPR:$rs1), 0)), (SRLI GPR:$rs1, 31)>; // compressible
+}
+let Predicates = [IsRV64] in {
+def : Pat<(i64 (seteq (i64 (and GPR:$rs1, 0x8000000000000000)), 0)),
+ (XORI (i64 (SRLI GPR:$rs1, 63)), 1)>;
+def : Pat<(i64 (seteq (i64 (and GPR:$rs1, 0x0000000080000000)), 0)),
+ (XORI (i64 (SRLIW GPR:$rs1, 31)), 1)>;
+def : Pat<(i64 (setlt (i64 GPR:$rs1), 0)), (SRLI GPR:$rs1, 63)>; // compressible
+def : Pat<(i64 (setlt (sext_inreg GPR:$rs1, i32), 0)), (SRLIW GPR:$rs1, 31)>;
+}
+
/// Branches and jumps
// Match `riscv_brcc` and lower to the appropriate RISC-V branch instruction.
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 8297d5050ced..d17330f9da9f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -98,6 +98,14 @@ class RVPShift_ri<bits<3> f, bits<3> funct3, string opcodestr, Operand ImmType>
let Inst{27} = 0b0;
}
+class RVPShiftD_ri<bits<3> f, bits<3> funct3, string opcodestr>
+ : RVPShift_ri<f, funct3, opcodestr, uimm6> {
+ bits<6> shamt;
+
+ let Inst{26} = 0b1;
+ let Inst{25-20} = shamt;
+}
+
class RVPShiftW_ri<bits<3> f, bits<3> funct3, string opcodestr>
: RVPShift_ri<f, funct3, opcodestr, uimm5> {
bits<5> shamt;
@@ -136,34 +144,36 @@ class RVPUnary_ri<bits<2> w, bits<5> uf, string opcodestr>
//===----------------------------------------------------------------------===//
let Predicates = [HasStdExtP] in {
-let IsSignExtendingOpW = 1 in
-def CLS : Unary_r<0b011000000011, 0b001, "cls">;
-def ABS : Unary_r<0b011000000111, 0b001, "abs">;
+ let IsSignExtendingOpW = 1 in
+ def CLS : Unary_r<0b011000000011, 0b001, "cls">;
+ def ABS : Unary_r<0b011000000111, 0b001, "abs">;
} // Predicates = [HasStdExtP]
-let Predicates = [HasStdExtP, IsRV32] in
-def REV_RV32 : Unary_r<0b011010011111, 0b101, "rev">;
+
+let Predicates = [HasStdExtP, IsRV32] in {
+ def REV_RV32 : Unary_r<0b011010011111, 0b101, "rev">;
+} // Predicates = [HasStdExtP, IsRV32]
let Predicates = [HasStdExtP, IsRV64] in {
-def REV16 : Unary_r<0b011010110000, 0b101, "rev16">;
-def REV_RV64 : Unary_r<0b011010111111, 0b101, "rev">;
+ def REV16 : Unary_r<0b011010110000, 0b101, "rev16">;
+ def REV_RV64 : Unary_r<0b011010111111, 0b101, "rev">;
-let IsSignExtendingOpW = 1 in {
-def CLSW : UnaryW_r<0b011000000011, 0b001, "clsw">;
-def ABSW : UnaryW_r<0b011000000111, 0b001, "absw">;
-}
+ let IsSignExtendingOpW = 1 in {
+ def CLSW : UnaryW_r<0b011000000011, 0b001, "clsw">;
+ def ABSW : UnaryW_r<0b011000000111, 0b001, "absw">;
+ }
} // Predicates = [HasStdExtP, IsRV64]
let Predicates = [HasStdExtP] in {
-def PSLLI_B : RVPShiftB_ri<0b000, 0b010, "pslli.b">;
-def PSLLI_H : RVPShiftH_ri<0b000, 0b010, "pslli.h">;
-def PSSLAI_H : RVPShiftH_ri<0b101, 0b010, "psslai.h">;
+ def PSLLI_B : RVPShiftB_ri<0b000, 0b010, "pslli.b">;
+ def PSLLI_H : RVPShiftH_ri<0b000, 0b010, "pslli.h">;
+ def PSSLAI_H : RVPShiftH_ri<0b101, 0b010, "psslai.h">;
} // Predicates = [HasStdExtP]
-let DecoderNamespace = "RV32Only",
- Predicates = [HasStdExtP, IsRV32] in
-def SSLAI : RVPShiftW_ri<0b101, 0b010, "sslai">;
+let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+ def SSLAI : RVPShiftW_ri<0b101, 0b010, "sslai">;
+} // Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only"
let Predicates = [HasStdExtP, IsRV64] in {
-def PSLLI_W : RVPShiftW_ri<0b000, 0b010, "pslli.w">;
-def PSSLAI_W : RVPShiftW_ri<0b101, 0b010, "psslai.w">;
+ def PSLLI_W : RVPShiftW_ri<0b000, 0b010, "pslli.w">;
+ def PSSLAI_W : RVPShiftW_ri<0b101, 0b010, "psslai.w">;
} // Predicates = [HasStdExtP, IsRV64]
let Predicates = [HasStdExtP] in
@@ -174,16 +184,50 @@ let Predicates = [HasStdExtP] in
def PLI_B : PLI_B_i<0b10110100, "pli.b">;
let Predicates = [HasStdExtP] in {
-def PSEXT_H_B : RVPUnary_ri<0b00, 0b00100, "psext.h.b">;
-def PSABS_H : RVPUnary_ri<0b00, 0b00111, "psabs.h">;
-def PSABS_B : RVPUnary_ri<0b10, 0b00111, "psabs.b">;
+ def PSEXT_H_B : RVPUnary_ri<0b00, 0b00100, "psext.h.b">;
+ def PSABS_H : RVPUnary_ri<0b00, 0b00111, "psabs.h">;
+ def PSABS_B : RVPUnary_ri<0b10, 0b00111, "psabs.b">;
} // Predicates = [HasStdExtP]
let Predicates = [HasStdExtP, IsRV64] in {
-def PSEXT_W_B : RVPUnary_ri<0b01, 0b00100, "psext.w.b">;
-def PSEXT_W_H : RVPUnary_ri<0b01, 0b00101, "psext.w.h">;
+ def PSEXT_W_B : RVPUnary_ri<0b01, 0b00100, "psext.w.b">;
+ def PSEXT_W_H : RVPUnary_ri<0b01, 0b00101, "psext.w.h">;
} // Predicates = [HasStdExtP, IsRV64]
let Predicates = [HasStdExtP] in
def PLUI_H : PLUI_i<0b1111000, "plui.h">;
let Predicates = [HasStdExtP, IsRV64] in
def PLUI_W : PLUI_i<0b1111001, "plui.w">;
+
+let Predicates = [HasStdExtP] in {
+ def PSRLI_B : RVPShiftB_ri<0b000, 0b100, "psrli.b">;
+ def PSRLI_H : RVPShiftH_ri<0b000, 0b100, "psrli.h">;
+
+ def PUSATI_H : RVPShiftH_ri<0b010, 0b100, "pusati.h">;
+
+ def PSRAI_B : RVPShiftB_ri<0b100, 0b100, "psrai.b">;
+ def PSRAI_H : RVPShiftH_ri<0b100, 0b100, "psrai.h">;
+
+ def PSRARI_H : RVPShiftH_ri<0b101, 0b100, "psrari.h">;
+
+ def PSATI_H : RVPShiftH_ri<0b110, 0b100, "psati.h">;
+} // Predicates = [HasStdExtP]
+let Predicates = [HasStdExtP, IsRV32], DecoderNamespace = "RV32Only" in {
+ def USATI_RV32 : RVPShiftW_ri<0b010, 0b100, "usati">;
+
+ def SRARI_RV32 : RVPShiftW_ri<0b101, 0b100, "srari">;
+
+ def SATI_RV32 : RVPShiftW_ri<0b110, 0b100, "sati">;
+} // Predicates = [HasStdExtP, IsRV32]
+let Predicates = [HasStdExtP, IsRV64] in {
+ def PSRLI_W : RVPShiftW_ri<0b000, 0b100, "psrli.w">;
+ def PSRAI_W : RVPShiftW_ri<0b100, 0b100, "psrai.w">;
+
+ def PUSATI_W : RVPShiftW_ri<0b010, 0b100, "pusati.w">;
+ def USATI_RV64 : RVPShiftD_ri<0b010, 0b100, "usati">;
+
+ def PSRARI_W : RVPShiftW_ri<0b101, 0b100, "psrari.w">;
+ def SRARI_RV64 : RVPShiftD_ri<0b101, 0b100, "srari">;
+
+ def PSATI_W : RVPShiftW_ri<0b110, 0b100, "psati.w">;
+ def SATI_RV64 : RVPShiftD_ri<0b110, 0b100, "sati">;
+} // Predicates = [HasStdExtP, IsRV64]
diff --git a/llvm/lib/Target/RISCV/RISCVSchedAndes45.td b/llvm/lib/Target/RISCV/RISCVSchedAndes45.td
index 5ef858a787c7..8cf15fa26e22 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedAndes45.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedAndes45.td
@@ -24,7 +24,7 @@ let SchedModel = Andes45Model in {
//===----------------------------------------------------------------------===//
// Andes 45 series CPU
-// - 2 Interger Arithmetic and Logical Units (ALU)
+// - 2 Integer Arithmetic and Logical Units (ALU)
// - Multiply / Divide Unit (MDU)
// - Load Store Unit (LSU)
// - Control and Status Register Unit (CSR)
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 28c8f401321f..f013898e8520 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -497,6 +497,10 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
case RISCV::VANDN_VX:
// Vector Reverse Bits in Elements
case RISCV::VBREV_V:
+ // Vector Reverse Bits in Bytes
+ case RISCV::VBREV8_V:
+ // Vector Reverse Bytes
+ case RISCV::VREV8_V:
// Vector Count Leading Zeros
case RISCV::VCLZ_V:
// Vector Count Trailing Zeros
@@ -510,6 +514,13 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
case RISCV::VROR_VI:
case RISCV::VROR_VV:
case RISCV::VROR_VX:
+ // Vector Carry-less Multiplication Instructions (Zvbc)
+ // Vector Carry-less Multiply
+ case RISCV::VCLMUL_VV:
+ case RISCV::VCLMUL_VX:
+ // Vector Carry-less Multiply Return High Half
+ case RISCV::VCLMULH_VV:
+ case RISCV::VCLMULH_VX:
return MILog2SEW;
// Vector Widening Shift Left Logical (Zvbb)
@@ -1046,6 +1057,10 @@ static bool isSupportedInstr(const MachineInstr &MI) {
case RISCV::VANDN_VX:
// Vector Reverse Bits in Elements
case RISCV::VBREV_V:
+ // Vector Reverse Bits in Bytes
+ case RISCV::VBREV8_V:
+ // Vector Reverse Bytes
+ case RISCV::VREV8_V:
// Vector Count Leading Zeros
case RISCV::VCLZ_V:
// Vector Count Trailing Zeros
@@ -1063,6 +1078,13 @@ static bool isSupportedInstr(const MachineInstr &MI) {
case RISCV::VWSLL_VI:
case RISCV::VWSLL_VX:
case RISCV::VWSLL_VV:
+ // Vector Carry-less Multiplication Instructions (Zvbc)
+ // Vector Carry-less Multiply
+ case RISCV::VCLMUL_VV:
+ case RISCV::VCLMUL_VX:
+ // Vector Carry-less Multiply Return High Half
+ case RISCV::VCLMULH_VV:
+ case RISCV::VCLMULH_VX:
// Vector Mask Instructions
// Vector Mask-Register Logical Instructions
// vmsbf.m set-before-first mask bit
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f366094c3195..97cdf5b784bc 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -15419,18 +15419,18 @@ static SDValue lowerShuffleAsLanePermuteAndPermute(
return SDValue();
}
- // Avoid returning the same shuffle operation. For example,
- // t7: v16i16 = vector_shuffle<8,9,10,11,4,5,6,7,0,1,2,3,12,13,14,15> t5,
- // undef:v16i16
- if (CrossLaneMask == Mask || InLaneMask == Mask)
- return SDValue();
-
// Simplify CrossLaneMask based on the actual demanded elements.
if (V1.hasOneUse())
for (int i = 0; i != NumElts; ++i)
if (!DemandedCrossLane[i])
CrossLaneMask[i] = SM_SentinelUndef;
+ // Avoid returning the same shuffle operation. For example,
+ // t7: v16i16 = vector_shuffle<8,9,10,11,4,5,6,7,0,1,2,3,12,13,14,15> t5,
+ // undef:v16i16
+ if (CrossLaneMask == Mask || InLaneMask == Mask)
+ return SDValue();
+
SDValue CrossLane = DAG.getVectorShuffle(VT, DL, V1, V2, CrossLaneMask);
return DAG.getVectorShuffle(VT, DL, CrossLane, DAG.getUNDEF(VT),
InLaneMask);
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 9ef21faea2b6..cae6bb99d963 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -5488,9 +5488,10 @@ InstructionCost X86TTIImpl::getPointersChainCost(
return BaseT::getPointersChainCost(Ptrs, Base, Info, AccessTy, CostKind);
}
-InstructionCost X86TTIImpl::getAddressComputationCost(Type *PtrTy,
- ScalarEvolution *SE,
- const SCEV *Ptr) const {
+InstructionCost
+X86TTIImpl::getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
+ const SCEV *Ptr,
+ TTI::TargetCostKind CostKind) const {
// Address computations in vectorized code with non-consecutive addresses will
// likely result in more instructions compared to scalar code where the
// computation can more often be merged into the index mode. The resulting
@@ -5513,7 +5514,7 @@ InstructionCost X86TTIImpl::getAddressComputationCost(Type *PtrTy,
return 1;
}
- return BaseT::getAddressComputationCost(PtrTy, SE, Ptr);
+ return BaseT::getAddressComputationCost(PtrTy, SE, Ptr, CostKind);
}
InstructionCost
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index bc06c4746c3c..5718c0c9535f 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -194,8 +194,9 @@ public:
getPointersChainCost(ArrayRef<const Value *> Ptrs, const Value *Base,
const TTI::PointersChainInfo &Info, Type *AccessTy,
TTI::TargetCostKind CostKind) const override;
- InstructionCost getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
- const SCEV *Ptr) const override;
+ InstructionCost
+ getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, const SCEV *Ptr,
+ TTI::TargetCostKind CostKind) const override;
std::optional<Instruction *>
instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const override;
diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index ab906f9fa03a..180ac9c61e7d 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -2252,6 +2252,10 @@ PreservedAnalyses CoroSplitPass::run(LazyCallGraph::SCC &C,
UR.CWorklist.insert(CurrentSCC);
for (Function *Clone : Clones)
UR.CWorklist.insert(CG.lookupSCC(CG.get(*Clone)));
+ } else if (Shape.ABI == coro::ABI::Async) {
+ // Reprocess the function to inline the tail called return function of
+ // coro.async.end.
+ UR.CWorklist.insert(&C);
}
}
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index cf94d2810048..a64f422c3eed 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1320,6 +1320,35 @@ Instruction *InstCombinerImpl::foldICmpWithZero(ICmpInst &Cmp) {
return nullptr;
}
+/// Fold icmp eq (num + mask) & ~mask, num
+/// to
+/// icmp eq (and num, mask), 0
+/// Where mask is a low bit mask.
+Instruction *InstCombinerImpl::foldIsMultipleOfAPowerOfTwo(ICmpInst &Cmp) {
+ Value *Num;
+ CmpPredicate Pred;
+ const APInt *Mask, *Neg;
+
+ if (!match(&Cmp,
+ m_c_ICmp(Pred, m_Value(Num),
+ m_OneUse(m_c_And(m_OneUse(m_c_Add(m_Deferred(Num),
+ m_LowBitMask(Mask))),
+ m_APInt(Neg))))))
+ return nullptr;
+
+ if (*Neg != ~*Mask)
+ return nullptr;
+
+ if (!ICmpInst::isEquality(Pred))
+ return nullptr;
+
+ // Create new icmp eq (num & mask), 0
+ auto *NewAnd = Builder.CreateAnd(Num, *Mask);
+ auto *Zero = Constant::getNullValue(Num->getType());
+
+ return new ICmpInst(Pred, NewAnd, Zero);
+}
+
/// Fold icmp Pred X, C.
/// TODO: This code structure does not make sense. The saturating add fold
/// should be moved to some other helper and extended as noted below (it is also
@@ -7644,6 +7673,9 @@ Instruction *InstCombinerImpl::visitICmpInst(ICmpInst &I) {
if (Instruction *Res = foldICmpUsingKnownBits(I))
return Res;
+ if (Instruction *Res = foldIsMultipleOfAPowerOfTwo(I))
+ return Res;
+
// Test if the ICmpInst instruction is used exclusively by a select as
// part of a minimum or maximum operation. If so, refrain from doing
// any other folding. This helps out other analyses which understand
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index c67e27e5b3e7..2340028ce93d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -721,6 +721,7 @@ public:
Instruction *foldICmpUsingKnownBits(ICmpInst &Cmp);
Instruction *foldICmpWithDominatingICmp(ICmpInst &Cmp);
Instruction *foldICmpWithConstant(ICmpInst &Cmp);
+ Instruction *foldIsMultipleOfAPowerOfTwo(ICmpInst &Cmp);
Instruction *foldICmpUsingBoolRange(ICmpInst &I);
Instruction *foldICmpInstWithConstant(ICmpInst &Cmp);
Instruction *foldICmpInstWithConstantNotInt(ICmpInst &Cmp);
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 1ef4fcc5fac4..4c035a2464c8 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -2867,7 +2867,7 @@ static bool hoistBOAssociation(Instruction &I, Loop &L,
bool LVInRHS = L.isLoopInvariant(BO->getOperand(0));
auto *BO0 = dyn_cast<BinaryOperator>(BO->getOperand(LVInRHS));
if (!BO0 || BO0->getOpcode() != Opcode || !BO0->isAssociative() ||
- BO0->hasNUsesOrMore(3))
+ BO0->hasNUsesOrMore(BO0->getType()->isIntegerTy() ? 2 : 3))
return false;
Value *LV = BO0->getOperand(0);
diff --git a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 844219a64d2f..8b15445ae92d 100644
--- a/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -2309,7 +2309,9 @@ chainToBasePointerCost(SmallVectorImpl<Instruction *> &Chain,
} else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
// Cost of the address calculation
- Cost += TTI.getAddressComputationCost(GEP->getType());
+ Cost += TTI.getAddressComputationCost(
+ GEP->getType(), nullptr, nullptr,
+ TargetTransformInfo::TCK_SizeAndLatency);
// And cost of the GEP itself
// TODO: Use TTI->getGEPCost here (it exists, but appears to be not
diff --git a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index 6ffe8413a507..fc96589f8323 100644
--- a/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -294,6 +294,10 @@ private:
bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,
bool NonNegative);
+ /// Analyze XOR instruction to extract disjoint constant bits that behave
+ /// like addition operations for improved address mode folding.
+ APInt extractDisjointBitsFromXor(BinaryOperator *XorInst);
+
/// The path from the constant offset to the old GEP index. e.g., if the GEP
/// index is "a * b + (c + 5)". After running function find, UserChain[0] will
/// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
@@ -596,6 +600,9 @@ APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
// Trace into subexpressions for more hoisting opportunities.
if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative))
ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
+ // Handle XOR with disjoint bits that can be treated as addition.
+ else if (BO->getOpcode() == Instruction::Xor)
+ ConstantOffset = extractDisjointBitsFromXor(BO);
} else if (isa<TruncInst>(V)) {
ConstantOffset =
find(U->getOperand(0), SignExtended, ZeroExtended, NonNegative)
@@ -708,11 +715,20 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
Value *NextInChain = removeConstOffset(ChainIndex - 1);
Value *TheOther = BO->getOperand(1 - OpNo);
- // If NextInChain is 0 and not the LHS of a sub, we can simplify the
- // sub-expression to be just TheOther.
if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) {
- if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0))
- return TheOther;
+ if (CI->isZero()) {
+ // Custom XOR handling for disjoint bits - preserves original XOR
+ // with non-disjoint constant bits.
+ // TODO: The design should be updated to support partial constant
+ // extraction.
+ if (BO->getOpcode() == Instruction::Xor)
+ return BO;
+
+ // If NextInChain is 0 and not the LHS of a sub, we can simplify the
+ // sub-expression to be just TheOther.
+ if (!(BO->getOpcode() == Instruction::Sub && OpNo == 0))
+ return TheOther;
+ }
}
BinaryOperator::BinaryOps NewOp = BO->getOpcode();
@@ -743,6 +759,67 @@ Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
return NewBO;
}
+/// Analyze XOR instruction to extract disjoint constant bits for address
+/// folding
+///
+/// This function identifies bits in an XOR constant operand that are disjoint
+/// from the base operand's known set bits. For these disjoint bits, XOR behaves
+/// identically to addition, allowing us to extract them as constant offsets
+/// that can be folded into addressing modes.
+///
+/// Transformation: `Base ^ Const` becomes `(Base ^ NonDisjointBits) +
+/// DisjointBits` where DisjointBits = Const & KnownZeros(Base)
+///
+/// Example with ptr having known-zero low bit:
+/// Original: `xor %ptr, 3` ; 3 = 0b11
+/// Analysis: DisjointBits = 3 & KnownZeros(%ptr) = 0b11 & 0b01 = 0b01
+/// Result: `(xor %ptr, 2) + 1` where 1 can be folded into address mode
+///
+/// \param XorInst The XOR binary operator to analyze
+/// \return APInt containing the disjoint bits that can be extracted as offset,
+/// or zero if no disjoint bits exist
+APInt ConstantOffsetExtractor::extractDisjointBitsFromXor(
+ BinaryOperator *XorInst) {
+ assert(XorInst && XorInst->getOpcode() == Instruction::Xor &&
+ "Expected XOR instruction");
+
+ const unsigned BitWidth = XorInst->getType()->getScalarSizeInBits();
+ Value *BaseOperand;
+ ConstantInt *XorConstant;
+
+ // Match pattern: xor BaseOperand, Constant.
+ if (!match(XorInst, m_Xor(m_Value(BaseOperand), m_ConstantInt(XorConstant))))
+ return APInt::getZero(BitWidth);
+
+ // Compute known bits for the base operand.
+ const SimplifyQuery SQ(DL);
+ const KnownBits BaseKnownBits = computeKnownBits(BaseOperand, SQ);
+ const APInt &ConstantValue = XorConstant->getValue();
+
+ // Identify disjoint bits: constant bits that are known zero in base.
+ const APInt DisjointBits = ConstantValue & BaseKnownBits.Zero;
+
+ // Early exit if no disjoint bits found.
+ if (DisjointBits.isZero())
+ return APInt::getZero(BitWidth);
+
+ // Compute the remaining non-disjoint bits that stay in the XOR.
+ const APInt NonDisjointBits = ConstantValue & ~DisjointBits;
+
+ // FIXME: Enhance XOR constant extraction to handle nested binary operations.
+ // Currently we only extract disjoint bits from the immediate XOR constant,
+ // but we could recursively process cases like:
+ // xor (add %base, C1), C2 -> add %base, (C1 ^ disjoint_bits(C2))
+ // This requires careful analysis to ensure the transformation preserves
+ // semantics, particularly around sign extension and overflow behavior.
+
+ // Add the non-disjoint constant to the user chain for later transformation
+ // This will replace the original constant in the XOR with the new
+ // constant.
+ UserChain.push_back(ConstantInt::get(XorInst->getType(), NonDisjointBits));
+ return DisjointBits;
+}
+
/// A helper function to check if reassociating through an entry in the user
/// chain would invalidate the GEP's nuw flag.
static bool allowsPreservingNUW(const User *U) {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index a7720b11a2b3..cb37ec3e9480 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -499,18 +499,16 @@ class InnerLoopVectorizer {
public:
InnerLoopVectorizer(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
LoopInfo *LI, DominatorTree *DT,
- const TargetLibraryInfo *TLI,
const TargetTransformInfo *TTI, AssumptionCache *AC,
- OptimizationRemarkEmitter *ORE, ElementCount VecWidth,
+ ElementCount VecWidth,
ElementCount MinProfitableTripCount,
unsigned UnrollFactor, LoopVectorizationCostModel *CM,
BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
GeneratedRTChecks &RTChecks, VPlan &Plan)
- : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
- AC(AC), ORE(ORE), VF(VecWidth),
- MinProfitableTripCount(MinProfitableTripCount), UF(UnrollFactor),
- Builder(PSE.getSE()->getContext()), Cost(CM), BFI(BFI), PSI(PSI),
- RTChecks(RTChecks), Plan(Plan),
+ : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TTI(TTI), AC(AC),
+ VF(VecWidth), MinProfitableTripCount(MinProfitableTripCount),
+ UF(UnrollFactor), Builder(PSE.getSE()->getContext()), Cost(CM),
+ BFI(BFI), PSI(PSI), RTChecks(RTChecks), Plan(Plan),
VectorPHVPBB(cast<VPBasicBlock>(
Plan.getVectorLoopRegion()->getSinglePredecessor())) {}
@@ -584,18 +582,12 @@ protected:
/// Dominator Tree.
DominatorTree *DT;
- /// Target Library Info.
- const TargetLibraryInfo *TLI;
-
/// Target Transform Info.
const TargetTransformInfo *TTI;
/// Assumption Cache.
AssumptionCache *AC;
- /// Interface to emit optimization remarks.
- OptimizationRemarkEmitter *ORE;
-
/// The vectorization SIMD factor to use. Each vector will have this many
/// vector elements.
ElementCount VF;
@@ -617,9 +609,6 @@ protected:
/// The scalar-loop preheader.
BasicBlock *LoopScalarPreHeader = nullptr;
- /// Middle Block between the vector and the scalar.
- BasicBlock *LoopMiddleBlock = nullptr;
-
/// Trip count of the original loop.
Value *TripCount = nullptr;
@@ -684,14 +673,12 @@ class InnerLoopAndEpilogueVectorizer : public InnerLoopVectorizer {
public:
InnerLoopAndEpilogueVectorizer(
Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI,
- DominatorTree *DT, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI, AssumptionCache *AC,
- OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
- LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
- ProfileSummaryInfo *PSI, GeneratedRTChecks &Checks, VPlan &Plan,
- ElementCount VecWidth, ElementCount MinProfitableTripCount,
- unsigned UnrollFactor)
- : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, VecWidth,
+ DominatorTree *DT, const TargetTransformInfo *TTI, AssumptionCache *AC,
+ EpilogueLoopVectorizationInfo &EPI, LoopVectorizationCostModel *CM,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+ GeneratedRTChecks &Checks, VPlan &Plan, ElementCount VecWidth,
+ ElementCount MinProfitableTripCount, unsigned UnrollFactor)
+ : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TTI, AC, VecWidth,
MinProfitableTripCount, UnrollFactor, CM, BFI, PSI,
Checks, Plan),
EPI(EPI) {}
@@ -721,16 +708,17 @@ public:
/// epilogues.
class EpilogueVectorizerMainLoop : public InnerLoopAndEpilogueVectorizer {
public:
- EpilogueVectorizerMainLoop(
- Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI,
- DominatorTree *DT, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI, AssumptionCache *AC,
- OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
- LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
- ProfileSummaryInfo *PSI, GeneratedRTChecks &Check, VPlan &Plan)
- : InnerLoopAndEpilogueVectorizer(
- OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, EPI, CM, BFI, PSI, Check,
- Plan, EPI.MainLoopVF, EPI.MainLoopVF, EPI.MainLoopUF) {}
+ EpilogueVectorizerMainLoop(Loop *OrigLoop, PredicatedScalarEvolution &PSE,
+ LoopInfo *LI, DominatorTree *DT,
+ const TargetTransformInfo *TTI,
+ AssumptionCache *AC,
+ EpilogueLoopVectorizationInfo &EPI,
+ LoopVectorizationCostModel *CM,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+ GeneratedRTChecks &Check, VPlan &Plan)
+ : InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TTI, AC, EPI, CM,
+ BFI, PSI, Check, Plan, EPI.MainLoopVF,
+ EPI.MainLoopVF, EPI.MainLoopUF) {}
/// Implements the interface for creating a vectorized skeleton using the
/// *main loop* strategy (ie the first pass of vplan execution).
BasicBlock *createEpilogueVectorizedLoopSkeleton() final;
@@ -751,14 +739,13 @@ class EpilogueVectorizerEpilogueLoop : public InnerLoopAndEpilogueVectorizer {
public:
EpilogueVectorizerEpilogueLoop(
Loop *OrigLoop, PredicatedScalarEvolution &PSE, LoopInfo *LI,
- DominatorTree *DT, const TargetLibraryInfo *TLI,
- const TargetTransformInfo *TTI, AssumptionCache *AC,
- OptimizationRemarkEmitter *ORE, EpilogueLoopVectorizationInfo &EPI,
- LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
- ProfileSummaryInfo *PSI, GeneratedRTChecks &Checks, VPlan &Plan)
- : InnerLoopAndEpilogueVectorizer(
- OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, EPI, CM, BFI, PSI, Checks,
- Plan, EPI.EpilogueVF, EPI.EpilogueVF, EPI.EpilogueUF) {
+ DominatorTree *DT, const TargetTransformInfo *TTI, AssumptionCache *AC,
+ EpilogueLoopVectorizationInfo &EPI, LoopVectorizationCostModel *CM,
+ BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI,
+ GeneratedRTChecks &Checks, VPlan &Plan)
+ : InnerLoopAndEpilogueVectorizer(OrigLoop, PSE, LI, DT, TTI, AC, EPI, CM,
+ BFI, PSI, Checks, Plan, EPI.EpilogueVF,
+ EPI.EpilogueVF, EPI.EpilogueUF) {
TripCount = EPI.TripCount;
}
/// Implements the interface for creating a vectorized skeleton using the
@@ -5227,8 +5214,8 @@ LoopVectorizationCostModel::getMemInstScalarizationCost(Instruction *I,
const SCEV *PtrSCEV = getAddressAccessSCEV(Ptr, Legal, PSE, TheLoop);
// Get the cost of the scalar memory instruction and address computation.
- InstructionCost Cost =
- VF.getFixedValue() * TTI.getAddressComputationCost(PtrTy, SE, PtrSCEV);
+ InstructionCost Cost = VF.getFixedValue() * TTI.getAddressComputationCost(
+ PtrTy, SE, PtrSCEV, CostKind);
// Don't pass *I here, since it is scalar but will actually be part of a
// vectorized loop where the user of it is a vectorized instruction.
@@ -5304,7 +5291,7 @@ LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
const Align Alignment = getLoadStoreAlignment(I);
unsigned AS = getLoadStoreAddressSpace(I);
if (isa<LoadInst>(I)) {
- return TTI.getAddressComputationCost(PtrTy) +
+ return TTI.getAddressComputationCost(PtrTy, nullptr, nullptr, CostKind) +
TTI.getMemoryOpCost(Instruction::Load, ValTy, Alignment, AS,
CostKind) +
TTI.getShuffleCost(TargetTransformInfo::SK_Broadcast, VectorTy,
@@ -5317,7 +5304,7 @@ LoopVectorizationCostModel::getUniformMemOpCost(Instruction *I,
// VF.getKnownMinValue() - 1 from a scalable vector. This does not represent
// the actual generated code, which involves extracting the last element of
// a scalable vector where the lane to extract is unknown at compile time.
- return TTI.getAddressComputationCost(PtrTy) +
+ return TTI.getAddressComputationCost(PtrTy, nullptr, nullptr, CostKind) +
TTI.getMemoryOpCost(Instruction::Store, ValTy, Alignment, AS,
CostKind) +
(IsLoopInvariantStoreValue
@@ -5335,7 +5322,7 @@ LoopVectorizationCostModel::getGatherScatterCost(Instruction *I,
const Value *Ptr = getLoadStorePointerOperand(I);
Type *PtrTy = toVectorTy(Ptr->getType(), VF);
- return TTI.getAddressComputationCost(PtrTy) +
+ return TTI.getAddressComputationCost(PtrTy, nullptr, nullptr, CostKind) +
TTI.getGatherScatterOpCost(I->getOpcode(), VectorTy, Ptr,
Legal->isMaskRequired(I), Alignment,
CostKind, I);
@@ -5575,7 +5562,7 @@ LoopVectorizationCostModel::getMemoryInstructionCost(Instruction *I,
unsigned AS = getLoadStoreAddressSpace(I);
TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(I->getOperand(0));
- return TTI.getAddressComputationCost(PtrTy) +
+ return TTI.getAddressComputationCost(PtrTy, nullptr, nullptr, CostKind) +
TTI.getMemoryOpCost(I->getOpcode(), ValTy, Alignment, AS, CostKind,
OpInfo, I);
}
@@ -7294,15 +7281,14 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
VPBasicBlock *VectorPH = cast<VPBasicBlock>(BestVPlan.getVectorPreheader());
VPlanTransforms::optimizeForVFAndUF(BestVPlan, BestVF, BestUF, PSE);
- VPlanTransforms::simplifyRecipes(BestVPlan, *Legal->getWidestInductionType());
+ VPlanTransforms::simplifyRecipes(BestVPlan);
VPlanTransforms::removeBranchOnConst(BestVPlan);
VPlanTransforms::narrowInterleaveGroups(
BestVPlan, BestVF,
TTI.getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector));
VPlanTransforms::removeDeadRecipes(BestVPlan);
- VPlanTransforms::convertToConcreteRecipes(BestVPlan,
- *Legal->getWidestInductionType());
+ VPlanTransforms::convertToConcreteRecipes(BestVPlan);
// Regions are dissolved after optimizing for VF and UF, which completely
// removes unneeded loop regions first.
VPlanTransforms::dissolveLoopRegions(BestVPlan);
@@ -9476,8 +9462,8 @@ static bool processLoopInVPlanNativePath(
{
GeneratedRTChecks Checks(PSE, DT, LI, TTI, F->getDataLayout(), CM.CostKind);
- InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
- VF.Width, 1, &CM, BFI, PSI, Checks, BestPlan);
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width, VF.Width, 1, &CM,
+ BFI, PSI, Checks, BestPlan);
LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
<< L->getHeader()->getParent()->getName() << "\"\n");
LVP.executePlan(VF.Width, 1, BestPlan, LB, DT, false);
@@ -10259,7 +10245,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// interleave it.
VPlan &BestPlan = LVP.getPlanFor(VF.Width);
InnerLoopVectorizer Unroller(
- L, PSE, LI, DT, TLI, TTI, AC, ORE, ElementCount::getFixed(1),
+ L, PSE, LI, DT, TTI, AC, ElementCount::getFixed(1),
ElementCount::getFixed(1), IC, &CM, BFI, PSI, Checks, BestPlan);
// TODO: Move to general VPlan pipeline once epilogue loops are also
@@ -10294,18 +10280,16 @@ bool LoopVectorizePass::processLoop(Loop *L) {
preparePlanForMainVectorLoop(*BestMainPlan, BestEpiPlan);
EpilogueLoopVectorizationInfo EPI(VF.Width, IC, EpilogueVF.Width, 1,
BestEpiPlan);
- EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TLI, TTI, AC, ORE,
- EPI, &CM, BFI, PSI, Checks,
- *BestMainPlan);
+ EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TTI, AC, EPI, &CM,
+ BFI, PSI, Checks, *BestMainPlan);
auto ExpandedSCEVs = LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF,
*BestMainPlan, MainILV, DT, false);
++LoopsVectorized;
// Second pass vectorizes the epilogue and adjusts the control flow
// edges from the first pass.
- EpilogueVectorizerEpilogueLoop EpilogILV(L, PSE, LI, DT, TLI, TTI, AC,
- ORE, EPI, &CM, BFI, PSI,
- Checks, BestEpiPlan);
+ EpilogueVectorizerEpilogueLoop EpilogILV(
+ L, PSE, LI, DT, TTI, AC, EPI, &CM, BFI, PSI, Checks, BestEpiPlan);
EpilogILV.setTripCount(MainILV.getTripCount());
preparePlanForEpilogueVectorLoop(BestEpiPlan, L, ExpandedSCEVs, EPI);
@@ -10330,7 +10314,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
if (!Checks.hasChecks())
DisableRuntimeUnroll = true;
} else {
- InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width,
+ InnerLoopVectorizer LB(L, PSE, LI, DT, TTI, AC, VF.Width,
VF.MinProfitableTripCount, IC, &CM, BFI, PSI,
Checks, BestPlan);
// TODO: Move to general VPlan pipeline once epilogue loops are also
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 98011174bd70..b9b33140e09f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5689,7 +5689,8 @@ private:
/// Updates the dependency information of a bundle and of all instructions/
/// bundles which depend on the original bundle.
void calculateDependencies(ScheduleBundle &Bundle, bool InsertInReadyList,
- BoUpSLP *SLP);
+ BoUpSLP *SLP,
+ ArrayRef<ScheduleData *> ControlDeps = {});
/// Sets all instruction in the scheduling region to un-scheduled.
void resetSchedule();
@@ -20727,15 +20728,21 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
LLVM_DEBUG(dbgs() << "SLP: bundle: " << *S.getMainOp() << "\n");
auto TryScheduleBundleImpl = [=](bool ReSchedule, ScheduleBundle &Bundle) {
- // Clear deps or reculate the region, if the memory instruction is a
- // copyable. It may have memory deps, which must be reaculated.
+ // Clear deps or recalculate the region, if the memory instruction is a
+ // copyable. It may have memory deps, which must be recalculated.
+ SmallVector<ScheduleData *> ControlDependentMembers;
auto CheckIfNeedToClearDeps = [&](ScheduleBundle &Bundle) {
SmallDenseMap<std::pair<Instruction *, Value *>, unsigned> UserOpToNumOps;
for (ScheduleEntity *SE : Bundle.getBundle()) {
if (ScheduleCopyableData *SD = dyn_cast<ScheduleCopyableData>(SE)) {
if (ScheduleData *BundleMember = getScheduleData(SD->getInst());
- BundleMember && BundleMember->hasValidDependencies())
+ BundleMember && BundleMember->hasValidDependencies()) {
BundleMember->clearDirectDependencies();
+ if (RegionHasStackSave ||
+ !isGuaranteedToTransferExecutionToSuccessor(
+ BundleMember->getInst()))
+ ControlDependentMembers.push_back(BundleMember);
+ }
continue;
}
auto *SD = cast<ScheduleData>(SE);
@@ -20748,8 +20755,12 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
if (auto *Op = dyn_cast<Instruction>(U.get());
Op && areAllOperandsReplacedByCopyableData(SD->getInst(), Op,
*SLP, NumOps)) {
- if (ScheduleData *OpSD = getScheduleData(Op))
+ if (ScheduleData *OpSD = getScheduleData(Op)) {
OpSD->clearDirectDependencies();
+ if (RegionHasStackSave ||
+ !isGuaranteedToTransferExecutionToSuccessor(OpSD->getInst()))
+ ControlDependentMembers.push_back(OpSD);
+ }
}
}
}
@@ -20783,7 +20794,8 @@ BoUpSLP::BlockScheduling::tryScheduleBundle(ArrayRef<Value *> VL, BoUpSLP *SLP,
CheckIfNeedToClearDeps(Bundle);
LLVM_DEBUG(dbgs() << "SLP: try schedule bundle " << Bundle << " in block "
<< BB->getName() << "\n");
- calculateDependencies(Bundle, /*InsertInReadyList=*/!ReSchedule, SLP);
+ calculateDependencies(Bundle, /*InsertInReadyList=*/!ReSchedule, SLP,
+ ControlDependentMembers);
}
if (ReSchedule) {
@@ -21048,9 +21060,9 @@ void BoUpSLP::BlockScheduling::initScheduleData(Instruction *FromI,
}
}
-void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleBundle &Bundle,
- bool InsertInReadyList,
- BoUpSLP *SLP) {
+void BoUpSLP::BlockScheduling::calculateDependencies(
+ ScheduleBundle &Bundle, bool InsertInReadyList, BoUpSLP *SLP,
+ ArrayRef<ScheduleData *> ControlDeps) {
SmallVector<ScheduleEntity *> WorkList;
auto ProcessNode = [&](ScheduleEntity *SE) {
if (auto *CD = dyn_cast<ScheduleCopyableData>(SE)) {
@@ -21293,6 +21305,7 @@ void BoUpSLP::BlockScheduling::calculateDependencies(ScheduleBundle &Bundle,
};
WorkList.push_back(Bundle.getBundle().front());
+ WorkList.append(ControlDeps.begin(), ControlDeps.end());
SmallPtrSet<ScheduleBundle *, 16> Visited;
while (!WorkList.empty()) {
ScheduleEntity *SD = WorkList.pop_back_val();
@@ -21362,7 +21375,7 @@ void BoUpSLP::BlockScheduling::resetSchedule() {
});
// Reset schedule data for copyable elements.
for (auto &P : ScheduleCopyableDataMap) {
- if (isInSchedulingRegion(*P.second.get())) {
+ if (isInSchedulingRegion(*P.second)) {
P.second->setScheduled(/*Scheduled=*/false);
P.second->resetUnscheduledDeps();
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
index 8818843a3062..9f036fbd569b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h
@@ -200,15 +200,11 @@ template <typename Ops_t, unsigned Opcode, bool Commutative,
struct Recipe_match {
Ops_t Ops;
- Recipe_match() : Ops() {
- static_assert(std::tuple_size<Ops_t>::value == 0 &&
- "constructor can only be used with zero operands");
- }
- Recipe_match(Ops_t Ops) : Ops(Ops) {}
- template <typename A_t, typename B_t>
- Recipe_match(A_t A, B_t B) : Ops({A, B}) {
- static_assert(std::tuple_size<Ops_t>::value == 2 &&
- "constructor can only be used for binary matcher");
+ template <typename... OpTy> Recipe_match(OpTy... Ops) : Ops(Ops...) {
+ static_assert(std::tuple_size<Ops_t>::value == sizeof...(Ops) &&
+ "number of operands in constructor doesn't match Ops_t");
+ static_assert((!Commutative || std::tuple_size<Ops_t>::value == 2) &&
+ "only binary ops can be commutative");
}
bool match(const VPValue *V) const {
@@ -254,7 +250,6 @@ private:
// Check for recipes that do not have opcodes.
if constexpr (std::is_same<RecipeTy, VPScalarIVStepsRecipe>::value ||
std::is_same<RecipeTy, VPCanonicalIVPHIRecipe>::value ||
- std::is_same<RecipeTy, VPWidenSelectRecipe>::value ||
std::is_same<RecipeTy, VPDerivedIVRecipe>::value ||
std::is_same<RecipeTy, VPWidenGEPRecipe>::value)
return DefR;
@@ -270,195 +265,128 @@ private:
}
};
-template <unsigned Opcode, typename... RecipeTys>
-using ZeroOpRecipe_match =
- Recipe_match<std::tuple<>, Opcode, false, RecipeTys...>;
-
-template <typename Op0_t, unsigned Opcode, typename... RecipeTys>
-using UnaryRecipe_match =
- Recipe_match<std::tuple<Op0_t>, Opcode, false, RecipeTys...>;
-
-template <typename Op0_t, unsigned Opcode>
-using UnaryVPInstruction_match =
- UnaryRecipe_match<Op0_t, Opcode, VPInstruction>;
+template <unsigned Opcode, typename... OpTys>
+using AllRecipe_match =
+ Recipe_match<std::tuple<OpTys...>, Opcode, /*Commutative*/ false,
+ VPWidenRecipe, VPReplicateRecipe, VPWidenCastRecipe,
+ VPInstruction, VPWidenSelectRecipe>;
-template <unsigned Opcode>
-using ZeroOpVPInstruction_match = ZeroOpRecipe_match<Opcode, VPInstruction>;
+template <unsigned Opcode, typename... OpTys>
+using AllRecipe_commutative_match =
+ Recipe_match<std::tuple<OpTys...>, Opcode, /*Commutative*/ true,
+ VPWidenRecipe, VPReplicateRecipe, VPInstruction>;
-template <typename Op0_t, unsigned Opcode>
-using AllUnaryRecipe_match =
- UnaryRecipe_match<Op0_t, Opcode, VPWidenRecipe, VPReplicateRecipe,
- VPWidenCastRecipe, VPInstruction>;
+template <unsigned Opcode, typename... OpTys>
+using VPInstruction_match = Recipe_match<std::tuple<OpTys...>, Opcode,
+ /*Commutative*/ false, VPInstruction>;
-template <typename Op0_t, typename Op1_t, unsigned Opcode, bool Commutative,
- typename... RecipeTys>
-using BinaryRecipe_match =
- Recipe_match<std::tuple<Op0_t, Op1_t>, Opcode, Commutative, RecipeTys...>;
-
-template <typename Op0_t, typename Op1_t, unsigned Opcode>
-using BinaryVPInstruction_match =
- BinaryRecipe_match<Op0_t, Op1_t, Opcode, /*Commutative*/ false,
- VPInstruction>;
-
-template <typename Op0_t, typename Op1_t, typename Op2_t, unsigned Opcode,
- bool Commutative, typename... RecipeTys>
-using TernaryRecipe_match = Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t>,
- Opcode, Commutative, RecipeTys...>;
-
-template <typename Op0_t, typename Op1_t, typename Op2_t, unsigned Opcode>
-using TernaryVPInstruction_match =
- TernaryRecipe_match<Op0_t, Op1_t, Op2_t, Opcode, /*Commutative*/ false,
- VPInstruction>;
-
-template <typename Op0_t, typename Op1_t, unsigned Opcode,
- bool Commutative = false>
-using AllBinaryRecipe_match =
- BinaryRecipe_match<Op0_t, Op1_t, Opcode, Commutative, VPWidenRecipe,
- VPReplicateRecipe, VPWidenCastRecipe, VPInstruction>;
+template <unsigned Opcode, typename... OpTys>
+inline VPInstruction_match<Opcode, OpTys...>
+m_VPInstruction(const OpTys &...Ops) {
+ return VPInstruction_match<Opcode, OpTys...>(Ops...);
+}
/// BuildVector is matches only its opcode, w/o matching its operands as the
/// number of operands is not fixed.
-inline ZeroOpVPInstruction_match<VPInstruction::BuildVector> m_BuildVector() {
- return ZeroOpVPInstruction_match<VPInstruction::BuildVector>();
-}
-
-template <unsigned Opcode, typename Op0_t>
-inline UnaryVPInstruction_match<Op0_t, Opcode>
-m_VPInstruction(const Op0_t &Op0) {
- return UnaryVPInstruction_match<Op0_t, Opcode>(Op0);
-}
-
-template <unsigned Opcode, typename Op0_t, typename Op1_t>
-inline BinaryVPInstruction_match<Op0_t, Op1_t, Opcode>
-m_VPInstruction(const Op0_t &Op0, const Op1_t &Op1) {
- return BinaryVPInstruction_match<Op0_t, Op1_t, Opcode>(Op0, Op1);
+inline VPInstruction_match<VPInstruction::BuildVector> m_BuildVector() {
+ return m_VPInstruction<VPInstruction::BuildVector>();
}
-template <unsigned Opcode, typename Op0_t, typename Op1_t, typename Op2_t>
-inline TernaryVPInstruction_match<Op0_t, Op1_t, Op2_t, Opcode>
-m_VPInstruction(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
- return TernaryVPInstruction_match<Op0_t, Op1_t, Op2_t, Opcode>(
- {Op0, Op1, Op2});
-}
-
-template <typename Op0_t, typename Op1_t, typename Op2_t, typename Op3_t,
- unsigned Opcode, bool Commutative, typename... RecipeTys>
-using Recipe4Op_match = Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t, Op3_t>,
- Opcode, Commutative, RecipeTys...>;
-
-template <typename Op0_t, typename Op1_t, typename Op2_t, typename Op3_t,
- unsigned Opcode>
-using VPInstruction4Op_match =
- Recipe4Op_match<Op0_t, Op1_t, Op2_t, Op3_t, Opcode, /*Commutative*/ false,
- VPInstruction>;
-
-template <unsigned Opcode, typename Op0_t, typename Op1_t, typename Op2_t,
- typename Op3_t>
-inline VPInstruction4Op_match<Op0_t, Op1_t, Op2_t, Op3_t, Opcode>
-m_VPInstruction(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2,
- const Op3_t &Op3) {
- return VPInstruction4Op_match<Op0_t, Op1_t, Op2_t, Op3_t, Opcode>(
- {Op0, Op1, Op2, Op3});
-}
template <typename Op0_t>
-inline UnaryVPInstruction_match<Op0_t, Instruction::Freeze>
+inline VPInstruction_match<Instruction::Freeze, Op0_t>
m_Freeze(const Op0_t &Op0) {
return m_VPInstruction<Instruction::Freeze>(Op0);
}
template <typename Op0_t>
-inline UnaryVPInstruction_match<Op0_t, VPInstruction::BranchOnCond>
+inline VPInstruction_match<VPInstruction::BranchOnCond, Op0_t>
m_BranchOnCond(const Op0_t &Op0) {
return m_VPInstruction<VPInstruction::BranchOnCond>(Op0);
}
template <typename Op0_t>
-inline UnaryVPInstruction_match<Op0_t, VPInstruction::Broadcast>
+inline VPInstruction_match<VPInstruction::Broadcast, Op0_t>
m_Broadcast(const Op0_t &Op0) {
return m_VPInstruction<VPInstruction::Broadcast>(Op0);
}
template <typename Op0_t, typename Op1_t>
-inline BinaryVPInstruction_match<Op0_t, Op1_t, VPInstruction::ActiveLaneMask>
+inline VPInstruction_match<VPInstruction::ActiveLaneMask, Op0_t, Op1_t>
m_ActiveLaneMask(const Op0_t &Op0, const Op1_t &Op1) {
return m_VPInstruction<VPInstruction::ActiveLaneMask>(Op0, Op1);
}
template <typename Op0_t, typename Op1_t>
-inline BinaryVPInstruction_match<Op0_t, Op1_t, VPInstruction::BranchOnCount>
+inline VPInstruction_match<VPInstruction::BranchOnCount, Op0_t, Op1_t>
m_BranchOnCount(const Op0_t &Op0, const Op1_t &Op1) {
return m_VPInstruction<VPInstruction::BranchOnCount>(Op0, Op1);
}
template <unsigned Opcode, typename Op0_t>
-inline AllUnaryRecipe_match<Op0_t, Opcode> m_Unary(const Op0_t &Op0) {
- return AllUnaryRecipe_match<Op0_t, Opcode>(Op0);
+inline AllRecipe_match<Opcode, Op0_t> m_Unary(const Op0_t &Op0) {
+ return AllRecipe_match<Opcode, Op0_t>(Op0);
}
template <typename Op0_t>
-inline AllUnaryRecipe_match<Op0_t, Instruction::Trunc>
-m_Trunc(const Op0_t &Op0) {
+inline AllRecipe_match<Instruction::Trunc, Op0_t> m_Trunc(const Op0_t &Op0) {
return m_Unary<Instruction::Trunc, Op0_t>(Op0);
}
template <typename Op0_t>
-inline AllUnaryRecipe_match<Op0_t, Instruction::ZExt> m_ZExt(const Op0_t &Op0) {
+inline AllRecipe_match<Instruction::ZExt, Op0_t> m_ZExt(const Op0_t &Op0) {
return m_Unary<Instruction::ZExt, Op0_t>(Op0);
}
template <typename Op0_t>
-inline AllUnaryRecipe_match<Op0_t, Instruction::SExt> m_SExt(const Op0_t &Op0) {
+inline AllRecipe_match<Instruction::SExt, Op0_t> m_SExt(const Op0_t &Op0) {
return m_Unary<Instruction::SExt, Op0_t>(Op0);
}
template <typename Op0_t>
-inline match_combine_or<AllUnaryRecipe_match<Op0_t, Instruction::ZExt>,
- AllUnaryRecipe_match<Op0_t, Instruction::SExt>>
+inline match_combine_or<AllRecipe_match<Instruction::ZExt, Op0_t>,
+ AllRecipe_match<Instruction::SExt, Op0_t>>
m_ZExtOrSExt(const Op0_t &Op0) {
return m_CombineOr(m_ZExt(Op0), m_SExt(Op0));
}
-template <unsigned Opcode, typename Op0_t, typename Op1_t,
- bool Commutative = false>
-inline AllBinaryRecipe_match<Op0_t, Op1_t, Opcode, Commutative>
-m_Binary(const Op0_t &Op0, const Op1_t &Op1) {
- return AllBinaryRecipe_match<Op0_t, Op1_t, Opcode, Commutative>(Op0, Op1);
+template <unsigned Opcode, typename Op0_t, typename Op1_t>
+inline AllRecipe_match<Opcode, Op0_t, Op1_t> m_Binary(const Op0_t &Op0,
+ const Op1_t &Op1) {
+ return AllRecipe_match<Opcode, Op0_t, Op1_t>(Op0, Op1);
}
template <unsigned Opcode, typename Op0_t, typename Op1_t>
-inline AllBinaryRecipe_match<Op0_t, Op1_t, Opcode, true>
+inline AllRecipe_commutative_match<Opcode, Op0_t, Op1_t>
m_c_Binary(const Op0_t &Op0, const Op1_t &Op1) {
- return AllBinaryRecipe_match<Op0_t, Op1_t, Opcode, true>(Op0, Op1);
+ return AllRecipe_commutative_match<Opcode, Op0_t, Op1_t>(Op0, Op1);
}
template <typename Op0_t, typename Op1_t>
-inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Mul>
-m_Mul(const Op0_t &Op0, const Op1_t &Op1) {
+inline AllRecipe_match<Instruction::Mul, Op0_t, Op1_t> m_Mul(const Op0_t &Op0,
+ const Op1_t &Op1) {
return m_Binary<Instruction::Mul, Op0_t, Op1_t>(Op0, Op1);
}
template <typename Op0_t, typename Op1_t>
-inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Mul,
- /* Commutative =*/true>
+inline AllRecipe_commutative_match<Instruction::Mul, Op0_t, Op1_t>
m_c_Mul(const Op0_t &Op0, const Op1_t &Op1) {
- return m_Binary<Instruction::Mul, Op0_t, Op1_t, true>(Op0, Op1);
+ return m_c_Binary<Instruction::Mul, Op0_t, Op1_t>(Op0, Op1);
}
/// Match a binary OR operation. Note that while conceptually the operands can
/// be matched commutatively, \p Commutative defaults to false in line with the
/// IR-based pattern matching infrastructure. Use m_c_BinaryOr for a commutative
/// version of the matcher.
-template <typename Op0_t, typename Op1_t, bool Commutative = false>
-inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Or, Commutative>
+template <typename Op0_t, typename Op1_t>
+inline AllRecipe_match<Instruction::Or, Op0_t, Op1_t>
m_BinaryOr(const Op0_t &Op0, const Op1_t &Op1) {
- return m_Binary<Instruction::Or, Op0_t, Op1_t, Commutative>(Op0, Op1);
+ return m_Binary<Instruction::Or, Op0_t, Op1_t>(Op0, Op1);
}
template <typename Op0_t, typename Op1_t>
-inline AllBinaryRecipe_match<Op0_t, Op1_t, Instruction::Or,
- /*Commutative*/ true>
+inline AllRecipe_commutative_match<Instruction::Or, Op0_t, Op1_t>
m_c_BinaryOr(const Op0_t &Op0, const Op1_t &Op1) {
- return m_BinaryOr<Op0_t, Op1_t, /*Commutative*/ true>(Op0, Op1);
+ return m_c_Binary<Instruction::Or, Op0_t, Op1_t>(Op0, Op1);
}
/// ICmp_match is a variant of BinaryRecipe_match that also binds the comparison
@@ -523,9 +451,9 @@ m_SpecificICmp(CmpPredicate MatchPred, const Op0_t &Op0, const Op1_t &Op1) {
template <typename Op0_t, typename Op1_t>
using GEPLikeRecipe_match =
- BinaryRecipe_match<Op0_t, Op1_t, Instruction::GetElementPtr, false,
- VPWidenRecipe, VPReplicateRecipe, VPWidenGEPRecipe,
- VPInstruction>;
+ Recipe_match<std::tuple<Op0_t, Op1_t>, Instruction::GetElementPtr,
+ /*Commutative*/ false, VPWidenRecipe, VPReplicateRecipe,
+ VPWidenGEPRecipe, VPInstruction>;
template <typename Op0_t, typename Op1_t>
inline GEPLikeRecipe_match<Op0_t, Op1_t> m_GetElementPtr(const Op0_t &Op0,
@@ -533,22 +461,17 @@ inline GEPLikeRecipe_match<Op0_t, Op1_t> m_GetElementPtr(const Op0_t &Op0,
return GEPLikeRecipe_match<Op0_t, Op1_t>(Op0, Op1);
}
-template <typename Op0_t, typename Op1_t, typename Op2_t, unsigned Opcode>
-using AllTernaryRecipe_match =
- Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t>, Opcode, false,
- VPReplicateRecipe, VPInstruction, VPWidenSelectRecipe>;
-
template <typename Op0_t, typename Op1_t, typename Op2_t>
-inline AllTernaryRecipe_match<Op0_t, Op1_t, Op2_t, Instruction::Select>
+inline AllRecipe_match<Instruction::Select, Op0_t, Op1_t, Op2_t>
m_Select(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) {
- return AllTernaryRecipe_match<Op0_t, Op1_t, Op2_t, Instruction::Select>(
+ return AllRecipe_match<Instruction::Select, Op0_t, Op1_t, Op2_t>(
{Op0, Op1, Op2});
}
template <typename Op0_t>
-inline match_combine_or<UnaryVPInstruction_match<Op0_t, VPInstruction::Not>,
- AllBinaryRecipe_match<int_pred_ty<is_all_ones>, Op0_t,
- Instruction::Xor, true>>
+inline match_combine_or<VPInstruction_match<VPInstruction::Not, Op0_t>,
+ AllRecipe_commutative_match<
+ Instruction::Xor, int_pred_ty<is_all_ones>, Op0_t>>
m_Not(const Op0_t &Op0) {
return m_CombineOr(m_VPInstruction<VPInstruction::Not>(Op0),
m_c_Binary<Instruction::Xor>(m_AllOnes(), Op0));
@@ -556,9 +479,8 @@ m_Not(const Op0_t &Op0) {
template <typename Op0_t, typename Op1_t>
inline match_combine_or<
- BinaryVPInstruction_match<Op0_t, Op1_t, VPInstruction::LogicalAnd>,
- AllTernaryRecipe_match<Op0_t, Op1_t, specific_intval<1>,
- Instruction::Select>>
+ VPInstruction_match<VPInstruction::LogicalAnd, Op0_t, Op1_t>,
+ AllRecipe_match<Instruction::Select, Op0_t, Op1_t, specific_intval<1>>>
m_LogicalAnd(const Op0_t &Op0, const Op1_t &Op1) {
return m_CombineOr(
m_VPInstruction<VPInstruction::LogicalAnd, Op0_t, Op1_t>(Op0, Op1),
@@ -566,15 +488,14 @@ m_LogicalAnd(const Op0_t &Op0, const Op1_t &Op1) {
}
template <typename Op0_t, typename Op1_t>
-inline AllTernaryRecipe_match<Op0_t, specific_intval<1>, Op1_t,
- Instruction::Select>
+inline AllRecipe_match<Instruction::Select, Op0_t, specific_intval<1>, Op1_t>
m_LogicalOr(const Op0_t &Op0, const Op1_t &Op1) {
return m_Select(Op0, m_True(), Op1);
}
template <typename Op0_t, typename Op1_t, typename Op2_t>
-using VPScalarIVSteps_match =
- TernaryRecipe_match<Op0_t, Op1_t, Op2_t, 0, false, VPScalarIVStepsRecipe>;
+using VPScalarIVSteps_match = Recipe_match<std::tuple<Op0_t, Op1_t, Op2_t>, 0,
+ false, VPScalarIVStepsRecipe>;
template <typename Op0_t, typename Op1_t, typename Op2_t>
inline VPScalarIVSteps_match<Op0_t, Op1_t, Op2_t>
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 23c10d2b2526..7bbd0dc325a9 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -3130,7 +3130,8 @@ InstructionCost VPWidenMemoryRecipe::computeCost(ElementCount VF,
Type *PtrTy = toVectorTy(Ptr->getType(), VF);
assert(!Reverse &&
"Inconsecutive memory access should not have the order.");
- return Ctx.TTI.getAddressComputationCost(PtrTy) +
+ return Ctx.TTI.getAddressComputationCost(PtrTy, nullptr, nullptr,
+ Ctx.CostKind) +
Ctx.TTI.getGatherScatterOpCost(Opcode, Ty, Ptr, IsMasked, Alignment,
Ctx.CostKind, &Ingredient);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index f75b2f21b6f1..c999ef2d666b 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1231,7 +1231,7 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
}
}
-void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
+void VPlanTransforms::simplifyRecipes(VPlan &Plan) {
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
Plan.getEntry());
VPTypeAnalysis TypeInfo(Plan);
@@ -1498,7 +1498,6 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
// the region, otherwise replace the terminator controlling the latch with
// (BranchOnCond true).
auto *Header = cast<VPBasicBlock>(VectorRegion->getEntry());
- auto *CanIVTy = Plan.getCanonicalIV()->getScalarType();
if (all_of(Header->phis(),
IsaPred<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,
VPFirstOrderRecurrencePHIRecipe, VPPhi>)) {
@@ -1518,7 +1517,7 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
VPBlockUtils::connectBlocks(Preheader, Header);
VPBlockUtils::connectBlocks(ExitingVPBB, Exit);
- VPlanTransforms::simplifyRecipes(Plan, *CanIVTy);
+ VPlanTransforms::simplifyRecipes(Plan);
} else {
// The vector region contains header phis for which we cannot remove the
// loop region yet.
@@ -1932,13 +1931,13 @@ void VPlanTransforms::optimize(VPlan &Plan) {
runPass(removeRedundantCanonicalIVs, Plan);
runPass(removeRedundantInductionCasts, Plan);
- runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
+ runPass(simplifyRecipes, Plan);
runPass(simplifyBlends, Plan);
runPass(removeDeadRecipes, Plan);
runPass(narrowToSingleScalarRecipes, Plan);
runPass(legalizeAndOptimizeInductions, Plan);
runPass(removeRedundantExpandSCEVRecipes, Plan);
- runPass(simplifyRecipes, Plan, *Plan.getCanonicalIV()->getScalarType());
+ runPass(simplifyRecipes, Plan);
runPass(removeBranchOnConst, Plan);
runPass(removeDeadRecipes, Plan);
@@ -2853,8 +2852,7 @@ void VPlanTransforms::dissolveLoopRegions(VPlan &Plan) {
R->dissolveToCFGLoop();
}
-void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan,
- Type &CanonicalIVTy) {
+void VPlanTransforms::convertToConcreteRecipes(VPlan &Plan) {
VPTypeAnalysis TypeInfo(Plan);
SmallVector<VPRecipeBase *> ToRemove;
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
index 5de1483783a4..35fa45ced53e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h
@@ -229,9 +229,8 @@ struct VPlanTransforms {
/// EVLIVInc, TripCount).
static void canonicalizeEVLLoops(VPlan &Plan);
- /// Lower abstract recipes to concrete ones, that can be codegen'd. Use \p
- /// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
- static void convertToConcreteRecipes(VPlan &Plan, Type &CanonicalIVTy);
+ /// Lower abstract recipes to concrete ones, that can be codegen'd.
+ static void convertToConcreteRecipes(VPlan &Plan);
/// This function converts initial recipes to the abstract recipes and clamps
/// \p Range based on cost model for following optimizations and cost
@@ -240,9 +239,8 @@ struct VPlanTransforms {
static void convertToAbstractRecipes(VPlan &Plan, VPCostContext &Ctx,
VFRange &Range);
- /// Perform instcombine-like simplifications on recipes in \p Plan. Use \p
- /// CanonicalIVTy as type for all un-typed live-ins in VPTypeAnalysis.
- static void simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy);
+ /// Perform instcombine-like simplifications on recipes in \p Plan.
+ static void simplifyRecipes(VPlan &Plan);
/// Remove BranchOnCond recipes with true or false conditions together with
/// removing dead edges to their successors.
diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index c45005db6016..4a681cbdab8c 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -1796,8 +1796,8 @@ bool VectorCombine::scalarizeLoadExtract(Instruction &I) {
ScalarizedCost +=
TTI.getMemoryOpCost(Instruction::Load, VecTy->getElementType(),
Align(1), LI->getPointerAddressSpace(), CostKind);
- ScalarizedCost +=
- TTI.getAddressComputationCost(LI->getPointerOperandType());
+ ScalarizedCost += TTI.getAddressComputationCost(LI->getPointerOperandType(),
+ nullptr, nullptr, CostKind);
}
LLVM_DEBUG(dbgs() << "Found all extractions of a vector load: " << I