summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/RISCV
diff options
context:
space:
mode:
authorMingming Liu <mingmingl@google.com>2025-09-10 15:25:31 -0700
committerGitHub <noreply@github.com>2025-09-10 15:25:31 -0700
commit1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch)
tree57f4b1f313c8cf74eed8819870f39c36ea263c68 /llvm/lib/Target/RISCV
parent898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff)
parentb8cefcb601ddaa18482555c4ff363c01a270c2fe (diff)
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'llvm/lib/Target/RISCV')
-rw-r--r--llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp27
-rw-r--r--llvm/lib/Target/RISCV/CMakeLists.txt4
-rw-r--r--llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp58
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp4
-rw-r--r--llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp22
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp6
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h3
-rw-r--r--llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp7
-rw-r--r--llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp42
-rw-r--r--llvm/lib/Target/RISCV/RISCVFeatures.td20
-rw-r--r--llvm/lib/Target/RISCV/RISCVFrameLowering.cpp19
-rw-r--r--llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp5
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp137
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h3
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.cpp424
-rw-r--r--llvm/lib/Target/RISCV/RISCVISelLowering.h4
-rw-r--r--llvm/lib/Target/RISCV/RISCVIndirectBranchTracking.cpp2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrFormatsC.td1
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.cpp29
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.h2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfo.td6
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoC.td200
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoP.td310
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td216
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td155
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td23
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td34
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoXwch.td4
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZb.td12
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td2
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td15
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td31
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrInfoZvqdotq.td100
-rw-r--r--llvm/lib/Target/RISCV/RISCVInstrPredicates.td8
-rw-r--r--llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp47
-rw-r--r--llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp26
-rw-r--r--llvm/lib/Target/RISCV/RISCVProcessors.td3
-rw-r--r--llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp8
-rw-r--r--llvm/lib/Target/RISCV/RISCVSubtarget.h6
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetMachine.cpp3
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp16
-rw-r--r--llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h6
-rw-r--r--llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp167
-rw-r--r--llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp24
44 files changed, 1522 insertions, 719 deletions
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 9ce44d0ff7fd..cd8392849ac4 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -121,7 +121,7 @@ class RISCVAsmParser : public MCTargetAsmParser {
bool parseVTypeToken(const AsmToken &Tok, VTypeState &State, unsigned &Sew,
unsigned &Lmul, bool &Fractional, bool &TailAgnostic,
- bool &MaskAgnostic);
+ bool &MaskAgnostic, bool &AltFmt);
bool generateVTypeError(SMLoc ErrorLoc);
bool generateXSfmmVTypeError(SMLoc ErrorLoc);
@@ -2261,14 +2261,23 @@ ParseStatus RISCVAsmParser::parseJALOffset(OperandVector &Operands) {
bool RISCVAsmParser::parseVTypeToken(const AsmToken &Tok, VTypeState &State,
unsigned &Sew, unsigned &Lmul,
bool &Fractional, bool &TailAgnostic,
- bool &MaskAgnostic) {
+ bool &MaskAgnostic, bool &AltFmt) {
if (Tok.isNot(AsmToken::Identifier))
return true;
StringRef Identifier = Tok.getIdentifier();
if (State < VTypeState::SeenSew && Identifier.consume_front("e")) {
- if (Identifier.getAsInteger(10, Sew))
- return true;
+ if (Identifier.getAsInteger(10, Sew)) {
+ if (Identifier == "16alt") {
+ AltFmt = true;
+ Sew = 16;
+ } else if (Identifier == "8alt") {
+ AltFmt = true;
+ Sew = 8;
+ } else {
+ return true;
+ }
+ }
if (!RISCVVType::isValidSEW(Sew))
return true;
@@ -2340,11 +2349,12 @@ ParseStatus RISCVAsmParser::parseVTypeI(OperandVector &Operands) {
bool Fractional = false;
bool TailAgnostic = false;
bool MaskAgnostic = false;
+ bool AltFmt = false;
VTypeState State = VTypeState::SeenNothingYet;
do {
if (parseVTypeToken(getTok(), State, Sew, Lmul, Fractional, TailAgnostic,
- MaskAgnostic)) {
+ MaskAgnostic, AltFmt)) {
// The first time, errors return NoMatch rather than Failure
if (State == VTypeState::SeenNothingYet)
return ParseStatus::NoMatch;
@@ -2370,12 +2380,17 @@ ParseStatus RISCVAsmParser::parseVTypeI(OperandVector &Operands) {
}
unsigned VTypeI =
- RISCVVType::encodeVTYPE(VLMUL, Sew, TailAgnostic, MaskAgnostic);
+ RISCVVType::encodeVTYPE(VLMUL, Sew, TailAgnostic, MaskAgnostic, AltFmt);
Operands.push_back(RISCVOperand::createVType(VTypeI, S));
return ParseStatus::Success;
}
bool RISCVAsmParser::generateVTypeError(SMLoc ErrorLoc) {
+ if (STI->hasFeature(RISCV::FeatureStdExtZvfbfa))
+ return Error(
+ ErrorLoc,
+ "operand must be "
+ "e[8|8alt|16|16alt|32|64],m[1|2|4|8|f2|f4|f8],[ta|tu],[ma|mu]");
return Error(
ErrorLoc,
"operand must be "
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index 47329b2c2f4d..0ff178e1f195 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -7,7 +7,8 @@ tablegen(LLVM RISCVGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM RISCVGenCompressInstEmitter.inc -gen-compress-inst-emitter)
tablegen(LLVM RISCVGenMacroFusion.inc -gen-macro-fusion-pred)
tablegen(LLVM RISCVGenDAGISel.inc -gen-dag-isel)
-tablegen(LLVM RISCVGenDisassemblerTables.inc -gen-disassembler)
+tablegen(LLVM RISCVGenDisassemblerTables.inc -gen-disassembler
+ --specialize-decoders-per-bitwidth)
tablegen(LLVM RISCVGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM RISCVGenMCCodeEmitter.inc -gen-emitter)
tablegen(LLVM RISCVGenMCPseudoLowering.inc -gen-pseudo-lowering)
@@ -87,6 +88,7 @@ add_llvm_target(RISCVCodeGen
GlobalISel
IPO
MC
+ Passes
RISCVDesc
RISCVInfo
Scalar
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index dbb16fce8390..89df9d82f878 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -46,8 +46,6 @@ public:
raw_ostream &CStream) const override;
private:
- void addSPOperands(MCInst &MI) const;
-
DecodeStatus getInstruction48(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &CStream) const;
@@ -196,6 +194,12 @@ static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, uint32_t RegNo,
return MCDisassembler::Success;
}
+static DecodeStatus DecodeSPRegisterClass(MCInst &Inst,
+ const MCDisassembler *Decoder) {
+ Inst.addOperand(MCOperand::createReg(RISCV::X2));
+ return MCDisassembler::Success;
+}
+
static DecodeStatus DecodeGPRNoX0RegisterClass(MCInst &Inst, uint32_t RegNo,
uint64_t Address,
const MCDisassembler *Decoder) {
@@ -558,7 +562,7 @@ static DecodeStatus decodeXqccmpRlistS0(MCInst &Inst, uint32_t Imm,
return decodeZcmpRlist(Inst, Imm, Address, Decoder);
}
-static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn,
+static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint16_t Insn,
uint64_t Address,
const MCDisassembler *Decoder) {
uint32_t Rs1 = fieldFromInstruction(Insn, 7, 5);
@@ -600,15 +604,6 @@ static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn,
#include "RISCVGenDisassemblerTables.inc"
-// Add implied SP operand for C.*SP compressed instructions. The SP operand
-// isn't explicitly encoded in the instruction.
-void RISCVDisassembler::addSPOperands(MCInst &MI) const {
- const MCInstrDesc &MCID = MCII->get(MI.getOpcode());
- for (unsigned i = 0; i < MCID.getNumOperands(); i++)
- if (MCID.operands()[i].RegClass == RISCV::SPRegClassID)
- MI.insert(MI.begin() + i, MCOperand::createReg(RISCV::X2));
-}
-
namespace {
struct DecoderListEntry {
@@ -656,6 +651,13 @@ static constexpr FeatureBitset XSfSystemGroup = {
RISCV::FeatureVendorXSiFivecflushdlone,
};
+static constexpr FeatureBitset XMIPSGroup = {
+ RISCV::FeatureVendorXMIPSLSP,
+ RISCV::FeatureVendorXMIPSCMov,
+ RISCV::FeatureVendorXMIPSCBOP,
+ RISCV::FeatureVendorXMIPSEXECTL,
+};
+
static constexpr FeatureBitset XTHeadGroup = {
RISCV::FeatureVendorXTHeadBa, RISCV::FeatureVendorXTHeadBb,
RISCV::FeatureVendorXTHeadBs, RISCV::FeatureVendorXTHeadCondMov,
@@ -684,13 +686,7 @@ static constexpr DecoderListEntry DecoderList32[]{
{DecoderTableXSfvector32, XSfVectorGroup, "SiFive vector extensions"},
{DecoderTableXSfsystem32, XSfSystemGroup, "SiFive system extensions"},
{DecoderTableXSfcease32, {RISCV::FeatureVendorXSfcease}, "SiFive sf.cease"},
- {DecoderTableXmipslsp32, {RISCV::FeatureVendorXMIPSLSP}, "MIPS mips.lsp"},
- {DecoderTableXmipscmov32,
- {RISCV::FeatureVendorXMIPSCMov},
- "MIPS mips.ccmov"},
- {DecoderTableXmipscbop32,
- {RISCV::FeatureVendorXMIPSCBOP},
- "MIPS mips.pref"},
+ {DecoderTableXMIPS32, XMIPSGroup, "Mips extensions"},
{DecoderTableXAndes32, XAndesGroup, "Andes extensions"},
{DecoderTableXSMT32, XSMTGroup, "SpacemiT extensions"},
// Standard Extensions
@@ -700,6 +696,14 @@ static constexpr DecoderListEntry DecoderList32[]{
{DecoderTableZdinxRV32Only32, {}, "RV32-only Zdinx (Double in Integer)"},
};
+namespace {
+// Define bitwidths for various types used to instantiate the decoder.
+template <> constexpr uint32_t InsnBitWidth<uint16_t> = 16;
+template <> constexpr uint32_t InsnBitWidth<uint32_t> = 32;
+// Use uint64_t to represent 48 bit instructions.
+template <> constexpr uint32_t InsnBitWidth<uint64_t> = 48;
+} // namespace
+
DecodeStatus RISCVDisassembler::getInstruction32(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
@@ -710,9 +714,7 @@ DecodeStatus RISCVDisassembler::getInstruction32(MCInst &MI, uint64_t &Size,
}
Size = 4;
- // Use uint64_t to match getInstruction48. decodeInstruction is templated
- // on the Insn type.
- uint64_t Insn = support::endian::read32le(Bytes.data());
+ uint32_t Insn = support::endian::read32le(Bytes.data());
for (const DecoderListEntry &Entry : DecoderList32) {
if (!Entry.haveContainedFeatures(STI.getFeatureBits()))
@@ -758,9 +760,7 @@ DecodeStatus RISCVDisassembler::getInstruction16(MCInst &MI, uint64_t &Size,
}
Size = 2;
- // Use uint64_t to match getInstruction48. decodeInstruction is templated
- // on the Insn type.
- uint64_t Insn = support::endian::read16le(Bytes.data());
+ uint16_t Insn = support::endian::read16le(Bytes.data());
for (const DecoderListEntry &Entry : DecoderList16) {
if (!Entry.haveContainedFeatures(STI.getFeatureBits()))
@@ -769,12 +769,8 @@ DecodeStatus RISCVDisassembler::getInstruction16(MCInst &MI, uint64_t &Size,
LLVM_DEBUG(dbgs() << "Trying " << Entry.Desc << " table:\n");
DecodeStatus Result =
decodeInstruction(Entry.Table, MI, Insn, Address, this, STI);
- if (Result == MCDisassembler::Fail)
- continue;
-
- addSPOperands(MI);
-
- return Result;
+ if (Result != MCDisassembler::Fail)
+ return Result;
}
return MCDisassembler::Fail;
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
index 51ea3fc5f677..7df1b7e58000 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp
@@ -1158,8 +1158,8 @@ bool RISCVInstructionSelector::selectAddr(MachineInstr &MI,
switch (TM.getCodeModel()) {
default: {
- reportGISelFailure(const_cast<MachineFunction &>(*MF), *TPC, *MORE,
- getName(), "Unsupported code model for lowering", MI);
+ reportGISelFailure(*MF, *TPC, *MORE, getName(),
+ "Unsupported code model for lowering", MI);
return false;
}
case CodeModel::Small: {
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index e88f33d6859e..564657ac65fd 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -26,6 +26,8 @@
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/IR/Type.h"
using namespace llvm;
@@ -152,7 +154,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
getActionDefinitionsBuilder({G_SADDO, G_SSUBO}).minScalar(0, sXLen).lower();
// TODO: Use Vector Single-Width Saturating Instructions for vector types.
- getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})
+ getActionDefinitionsBuilder(
+ {G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT, G_SSHLSAT, G_USHLSAT})
.lower();
getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR})
@@ -485,6 +488,10 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.minScalar(ST.hasStdExtZbb(), 0, sXLen)
.lower();
+ getActionDefinitionsBuilder({G_ABDS, G_ABDU})
+ .minScalar(ST.hasStdExtZbb(), 0, sXLen)
+ .lower();
+
getActionDefinitionsBuilder({G_UMAX, G_UMIN, G_SMAX, G_SMIN})
.legalFor(ST.hasStdExtZbb(), {sXLen})
.minScalar(ST.hasStdExtZbb(), 0, sXLen)
@@ -692,6 +699,16 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
.customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),
typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)));
+ getActionDefinitionsBuilder(G_ATOMICRMW_ADD)
+ .legalFor(ST.hasStdExtA(), {{sXLen, p0}})
+ .libcallFor(!ST.hasStdExtA(), {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
+ .clampScalar(0, sXLen, sXLen);
+
+ getActionDefinitionsBuilder(G_ATOMICRMW_SUB)
+ .libcallFor(!ST.hasStdExtA(), {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}})
+ .clampScalar(0, sXLen, sXLen)
+ .lower();
+
getLegacyLegalizerInfo().computeTables();
verify(*ST.getInstrInfo());
}
@@ -729,6 +746,9 @@ bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
MI.eraseFromParent();
return true;
}
+ case Intrinsic::riscv_masked_atomicrmw_add:
+ case Intrinsic::riscv_masked_atomicrmw_sub:
+ return true;
}
}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
index 543c4c5ddfc9..37fe32531800 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
@@ -36,6 +36,12 @@ RISCVTargetELFStreamer::RISCVTargetELFStreamer(MCStreamer &S,
setFlagsFromFeatures(STI);
}
+RISCVELFStreamer::RISCVELFStreamer(MCContext &C,
+ std::unique_ptr<MCAsmBackend> MAB,
+ std::unique_ptr<MCObjectWriter> MOW,
+ std::unique_ptr<MCCodeEmitter> MCE)
+ : MCELFStreamer(C, std::move(MAB), std::move(MOW), std::move(MCE)) {}
+
RISCVELFStreamer &RISCVTargetELFStreamer::getStreamer() {
return static_cast<RISCVELFStreamer &>(Streamer);
}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
index 98948cd3e949..26da2441d4ae 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
@@ -28,8 +28,7 @@ class RISCVELFStreamer : public MCELFStreamer {
public:
RISCVELFStreamer(MCContext &C, std::unique_ptr<MCAsmBackend> MAB,
std::unique_ptr<MCObjectWriter> MOW,
- std::unique_ptr<MCCodeEmitter> MCE)
- : MCELFStreamer(C, std::move(MAB), std::move(MOW), std::move(MCE)) {}
+ std::unique_ptr<MCCodeEmitter> MCE);
void changeSection(MCSection *Section, uint32_t Subsection) override;
void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
index b0c27ce6010f..50f5a5d09a69 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp
@@ -216,9 +216,12 @@ void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
unsigned Imm = MI->getOperand(OpNo).getImm();
// Print the raw immediate for reserved values: vlmul[2:0]=4, vsew[2:0]=0b1xx,
- // or non-zero in bits 8 and above.
+ // altfmt=1 without zvfbfa extension, or non-zero in bits 9 and above.
if (RISCVVType::getVLMUL(Imm) == RISCVVType::VLMUL::LMUL_RESERVED ||
- RISCVVType::getSEW(Imm) > 64 || (Imm >> 8) != 0) {
+ RISCVVType::getSEW(Imm) > 64 ||
+ (RISCVVType::isAltFmt(Imm) &&
+ !STI.hasFeature(RISCV::FeatureStdExtZvfbfa)) ||
+ (Imm >> 9) != 0) {
O << formatImm(Imm);
return;
}
diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
index 83566b1c5778..66ca43604670 100644
--- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
+++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp
@@ -126,7 +126,7 @@ private:
void LowerPATCHABLE_TAIL_CALL(const MachineInstr *MI);
void emitSled(const MachineInstr *MI, SledKind Kind);
- bool lowerToMCInst(const MachineInstr *MI, MCInst &OutMI);
+ void lowerToMCInst(const MachineInstr *MI, MCInst &OutMI);
};
}
@@ -329,12 +329,17 @@ void RISCVAsmPrinter::emitInstruction(const MachineInstr *MI) {
case TargetOpcode::STATEPOINT:
return LowerSTATEPOINT(*OutStreamer, SM, *MI);
case TargetOpcode::PATCHABLE_FUNCTION_ENTER: {
- // patchable-function-entry is handled in lowerToMCInst
- // Therefore, we break out of the switch statement if we encounter it here.
const Function &F = MI->getParent()->getParent()->getFunction();
- if (F.hasFnAttribute("patchable-function-entry"))
- break;
-
+ if (F.hasFnAttribute("patchable-function-entry")) {
+ unsigned Num;
+ [[maybe_unused]] bool Result =
+ F.getFnAttribute("patchable-function-entry")
+ .getValueAsString()
+ .getAsInteger(10, Num);
+ assert(!Result && "Enforced by the verifier");
+ emitNops(Num);
+ return;
+ }
LowerPATCHABLE_FUNCTION_ENTER(MI);
return;
}
@@ -347,8 +352,8 @@ void RISCVAsmPrinter::emitInstruction(const MachineInstr *MI) {
}
MCInst OutInst;
- if (!lowerToMCInst(MI, OutInst))
- EmitToStreamer(*OutStreamer, OutInst);
+ lowerToMCInst(MI, OutInst);
+ EmitToStreamer(*OutStreamer, OutInst);
}
bool RISCVAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -1174,9 +1179,9 @@ static bool lowerRISCVVMachineInstrToMCInst(const MachineInstr *MI,
return true;
}
-bool RISCVAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
+void RISCVAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
if (lowerRISCVVMachineInstrToMCInst(MI, OutMI, STI))
- return false;
+ return;
OutMI.setOpcode(MI->getOpcode());
@@ -1185,23 +1190,6 @@ bool RISCVAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
if (lowerOperand(MO, MCOp))
OutMI.addOperand(MCOp);
}
-
- switch (OutMI.getOpcode()) {
- case TargetOpcode::PATCHABLE_FUNCTION_ENTER: {
- const Function &F = MI->getParent()->getParent()->getFunction();
- if (F.hasFnAttribute("patchable-function-entry")) {
- unsigned Num;
- if (F.getFnAttribute("patchable-function-entry")
- .getValueAsString()
- .getAsInteger(10, Num))
- return false;
- emitNops(Num);
- return true;
- }
- break;
- }
- }
- return false;
}
void RISCVAsmPrinter::emitMachineConstantPoolValue(
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 3b738e4cc11a..063963d4ec36 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -680,6 +680,13 @@ def FeatureStdExtV
[FeatureStdExtZvl128b, FeatureStdExtZve64d]>,
RISCVExtensionBitmask<0, 21>;
+def FeatureStdExtZvfbfa
+ : RISCVExperimentalExtension<0, 1, "Additional BF16 vector compute support",
+ [FeatureStdExtZve32f, FeatureStdExtZfbfmin]>;
+def HasStdExtZvfbfa : Predicate<"Subtarget->hasStdExtZvfbfa()">,
+ AssemblerPredicate<(all_of FeatureStdExtZvfbfa),
+ "'Zvfbfa' (Additional BF16 vector compute support)">;
+
def FeatureStdExtZvfbfmin
: RISCVExtension<1, 0, "Vector BF16 Converts", [FeatureStdExtZve32f]>;
def HasStdExtZvfbfmin : Predicate<"Subtarget->hasStdExtZvfbfmin()">,
@@ -1396,20 +1403,27 @@ def HasVendorXMIPSCMov
AssemblerPredicate<(all_of FeatureVendorXMIPSCMov),
"'Xmipscmov' ('mips.ccmov' instruction)">;
def UseCCMovInsn : Predicate<"Subtarget->useCCMovInsn()">;
+
def FeatureVendorXMIPSLSP
: RISCVExtension<1, 0, "MIPS optimization for hardware load-store bonding">;
def HasVendorXMIPSLSP
: Predicate<"Subtarget->hasVendorXMIPSLSP()">,
AssemblerPredicate<(all_of FeatureVendorXMIPSLSP),
"'Xmipslsp' (load and store pair instructions)">;
-def FeatureVendorXMIPSCBOP
- : RISCVExtension<1, 0, "MIPS Software Prefetch">;
+
+def FeatureVendorXMIPSCBOP : RISCVExtension<1, 0, "MIPS Software Prefetch">;
def HasVendorXMIPSCBOP
: Predicate<"Subtarget->hasVendorXMIPSCBOP()">,
AssemblerPredicate<(all_of FeatureVendorXMIPSCBOP),
"'Xmipscbop' (MIPS hardware prefetch)">;
def NoVendorXMIPSCBOP : Predicate<"!Subtarget->hasVendorXMIPSCBOP()">;
+def FeatureVendorXMIPSEXECTL : RISCVExtension<1, 0, "MIPS execution control">;
+def HasVendorXMIPSEXECTL
+ : Predicate<"Subtarget->hasVendorXMIPSEXT()">,
+ AssemblerPredicate<(all_of FeatureVendorXMIPSEXECTL),
+ "'Xmipsexectl' (MIPS execution control)">;
+
// WCH / Nanjing Qinheng Microelectronics Extension(s)
def FeatureVendorXwchc
@@ -1668,7 +1682,7 @@ def IsRV32 : Predicate<"!Subtarget->is64Bit()">,
"RV32I Base Instruction Set">;
defvar RV32 = DefaultMode;
-def RV64 : HwMode<"+64bit", [IsRV64]>;
+def RV64 : HwMode<[IsRV64]>;
def FeatureRelax
: SubtargetFeature<"relax", "EnableLinkerRelax", "true",
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 9fc0d815ceee..06ce91771c9e 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -106,8 +106,14 @@ static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const DebugLoc &DL) {
const auto &STI = MF.getSubtarget<RISCVSubtarget>();
+ // We check Zimop instead of (Zimop || Zcmop) to determine whether HW shadow
+ // stack is available despite the fact that sspush/sspopchk both have a
+ // compressed form, because if only Zcmop is available, we would need to
+ // reserve X5 due to c.sspopchk only takes X5 and we currently do not support
+ // using X5 as the return address register.
+ // However, we can still aggressively use c.sspush x1 if zcmop is available.
bool HasHWShadowStack = MF.getFunction().hasFnAttribute("hw-shadow-stack") &&
- STI.hasStdExtZicfiss();
+ STI.hasStdExtZimop();
bool HasSWShadowStack =
MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack);
if (!HasHWShadowStack && !HasSWShadowStack)
@@ -124,7 +130,12 @@ static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB,
const RISCVInstrInfo *TII = STI.getInstrInfo();
if (HasHWShadowStack) {
- BuildMI(MBB, MI, DL, TII->get(RISCV::SSPUSH)).addReg(RAReg);
+ if (STI.hasStdExtZcmop()) {
+ static_assert(RAReg == RISCV::X1, "C.SSPUSH only accepts X1");
+ BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoMOP_C_SSPUSH));
+ } else {
+ BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoMOP_SSPUSH)).addReg(RAReg);
+ }
return;
}
@@ -172,7 +183,7 @@ static void emitSCSEpilogue(MachineFunction &MF, MachineBasicBlock &MBB,
const DebugLoc &DL) {
const auto &STI = MF.getSubtarget<RISCVSubtarget>();
bool HasHWShadowStack = MF.getFunction().hasFnAttribute("hw-shadow-stack") &&
- STI.hasStdExtZicfiss();
+ STI.hasStdExtZimop();
bool HasSWShadowStack =
MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack);
if (!HasHWShadowStack && !HasSWShadowStack)
@@ -186,7 +197,7 @@ static void emitSCSEpilogue(MachineFunction &MF, MachineBasicBlock &MBB,
const RISCVInstrInfo *TII = STI.getInstrInfo();
if (HasHWShadowStack) {
- BuildMI(MBB, MI, DL, TII->get(RISCV::SSPOPCHK)).addReg(RAReg);
+ BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoMOP_SSPOPCHK)).addReg(RAReg);
return;
}
diff --git a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
index 80a48c5ec11f..52dc53e4545e 100644
--- a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp
@@ -561,7 +561,7 @@ bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II) {
EVL = Builder.CreateElementCount(
Builder.getInt32Ty(), cast<VectorType>(DataType)->getElementCount());
- CallInst *Call;
+ Value *Call;
if (!StoreVal) {
Call = Builder.CreateIntrinsic(
@@ -571,8 +571,7 @@ bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II) {
// Merge llvm.masked.gather's passthru
if (II->getIntrinsicID() == Intrinsic::masked_gather)
- Call = Builder.CreateIntrinsic(Intrinsic::vp_select, {DataType},
- {Mask, Call, II->getArgOperand(3), EVL});
+ Call = Builder.CreateSelect(Mask, Call, II->getArgOperand(3));
} else
Call = Builder.CreateIntrinsic(
Intrinsic::experimental_vp_strided_store,
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index f9f35f66319b..c7f15415ebb9 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -819,49 +819,6 @@ bool RISCVDAGToDAGISel::trySignedBitfieldInsertInSign(SDNode *Node) {
return false;
}
-// (xor X, (and (xor X, C1), C2))
-// -> (qc.insbi X, (C1 >> ShAmt), Width, ShAmt)
-// where C2 is a shifted mask with width=Width and shift=ShAmt
-bool RISCVDAGToDAGISel::tryBitfieldInsertOpFromXor(SDNode *Node) {
-
- if (!Subtarget->hasVendorXqcibm())
- return false;
-
- using namespace SDPatternMatch;
-
- SDValue X;
- APInt CImm, CMask;
- if (!sd_match(
- Node,
- m_Xor(m_Value(X),
- m_OneUse(m_And(m_OneUse(m_Xor(m_Deferred(X), m_ConstInt(CImm))),
- m_ConstInt(CMask))))))
- return false;
-
- unsigned Width, ShAmt;
- if (!CMask.isShiftedMask(ShAmt, Width))
- return false;
-
- int64_t Imm = CImm.getSExtValue();
- Imm >>= ShAmt;
-
- SDLoc DL(Node);
- SDValue ImmNode;
- auto Opc = RISCV::QC_INSB;
-
- if (isInt<5>(Imm)) {
- Opc = RISCV::QC_INSBI;
- ImmNode = CurDAG->getSignedTargetConstant(Imm, DL, MVT::i32);
- } else {
- ImmNode = selectImm(CurDAG, DL, MVT::i32, Imm, *Subtarget);
- }
- SDValue Ops[] = {X, ImmNode, CurDAG->getTargetConstant(Width, DL, MVT::i32),
- CurDAG->getTargetConstant(ShAmt, DL, MVT::i32)};
- ReplaceNode(Node, CurDAG->getMachineNode(Opc, DL, MVT::i32, Ops));
-
- return true;
-}
-
bool RISCVDAGToDAGISel::tryUnsignedBitfieldExtract(SDNode *Node,
const SDLoc &DL, MVT VT,
SDValue X, unsigned Msb,
@@ -1095,7 +1052,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
SDLoc DL(Node);
MVT VT = Node->getSimpleValueType(0);
- bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs();
+ bool HasBitTest = Subtarget->hasBEXTILike();
switch (Opcode) {
case ISD::Constant: {
@@ -1442,9 +1399,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (tryShrinkShlLogicImm(Node))
return;
- if (tryBitfieldInsertOpFromXor(Node))
- return;
-
break;
case ISD::AND: {
auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1));
@@ -2951,6 +2905,65 @@ static bool isWorthFoldingAdd(SDValue Add) {
return true;
}
+bool isRegImmLoadOrStore(SDNode *User, SDValue Add) {
+ switch (User->getOpcode()) {
+ default:
+ return false;
+ case ISD::LOAD:
+ case RISCVISD::LD_RV32:
+ case ISD::ATOMIC_LOAD:
+ break;
+ case ISD::STORE:
+ // Don't allow stores of Add. It must only be used as the address.
+ if (cast<StoreSDNode>(User)->getValue() == Add)
+ return false;
+ break;
+ case RISCVISD::SD_RV32:
+ // Don't allow stores of Add. It must only be used as the address.
+ if (User->getOperand(0) == Add || User->getOperand(1) == Add)
+ return false;
+ break;
+ case ISD::ATOMIC_STORE:
+ // Don't allow stores of Add. It must only be used as the address.
+ if (cast<AtomicSDNode>(User)->getVal() == Add)
+ return false;
+ break;
+ }
+
+ return true;
+}
+
+// To prevent SelectAddrRegImm from folding offsets that conflict with the
+// fusion of PseudoMovAddr, check if the offset of every use of a given address
+// is within the alignment.
+bool RISCVDAGToDAGISel::areOffsetsWithinAlignment(SDValue Addr,
+ Align Alignment) {
+ assert(Addr->getOpcode() == RISCVISD::ADD_LO);
+ for (auto *User : Addr->users()) {
+ // If the user is a load or store, then the offset is 0 which is always
+ // within alignment.
+ if (isRegImmLoadOrStore(User, Addr))
+ continue;
+
+ if (CurDAG->isBaseWithConstantOffset(SDValue(User, 0))) {
+ int64_t CVal = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue();
+ if (!isInt<12>(CVal) || Alignment <= CVal)
+ return false;
+
+ // Make sure all uses are foldable load/stores.
+ for (auto *AddUser : User->users())
+ if (!isRegImmLoadOrStore(AddUser, SDValue(User, 0)))
+ return false;
+
+ continue;
+ }
+
+ return false;
+ }
+
+ return true;
+}
+
bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
SDValue &Offset) {
if (SelectAddrFrameIndex(Addr, Base, Offset))
@@ -2960,9 +2973,21 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
MVT VT = Addr.getSimpleValueType();
if (Addr.getOpcode() == RISCVISD::ADD_LO) {
- Base = Addr.getOperand(0);
- Offset = Addr.getOperand(1);
- return true;
+ bool CanFold = true;
+ // Unconditionally fold if operand 1 is not a global address (e.g.
+ // externsymbol)
+ if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) {
+ const DataLayout &DL = CurDAG->getDataLayout();
+ Align Alignment = commonAlignment(
+ GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
+ if (!areOffsetsWithinAlignment(Addr, Alignment))
+ CanFold = false;
+ }
+ if (CanFold) {
+ Base = Addr.getOperand(0);
+ Offset = Addr.getOperand(1);
+ return true;
+ }
}
if (CurDAG->isBaseWithConstantOffset(Addr)) {
@@ -2980,7 +3005,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
const DataLayout &DL = CurDAG->getDataLayout();
Align Alignment = commonAlignment(
GA->getGlobal()->getPointerAlignment(DL), GA->getOffset());
- if ((CVal == 0 || Alignment > CVal)) {
+ if ((CVal == 0 || Alignment > CVal) &&
+ areOffsetsWithinAlignment(Base, Alignment)) {
int64_t CombinedOffset = CVal + GA->getOffset();
Base = Base.getOperand(0);
Offset = CurDAG->getTargetGlobalAddress(
@@ -3983,6 +4009,15 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
if (Use.getOperandNo() == 0 && Bits >= 32)
break;
return false;
+ case RISCV::TH_EXT:
+ case RISCV::TH_EXTU: {
+ unsigned Msb = User->getConstantOperandVal(1);
+ unsigned Lsb = User->getConstantOperandVal(2);
+ // Behavior of Msb < Lsb is not well documented.
+ if (Msb >= Lsb && Bits > Msb)
+ break;
+ return false;
+ }
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index c329a4c6ec62..cf2f763abc06 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -45,6 +45,8 @@ public:
InlineAsm::ConstraintCode ConstraintID,
std::vector<SDValue> &OutOps) override;
+ bool areOffsetsWithinAlignment(SDValue Addr, Align Alignment);
+
bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset);
bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset);
bool SelectAddrRegImm9(SDValue Addr, SDValue &Base, SDValue &Offset);
@@ -75,7 +77,6 @@ public:
bool trySignedBitfieldExtract(SDNode *Node);
bool trySignedBitfieldInsertInSign(SDNode *Node);
bool trySignedBitfieldInsertInMask(SDNode *Node);
- bool tryBitfieldInsertOpFromXor(SDNode *Node);
bool tryBitfieldInsertOpFromOrAndImm(SDNode *Node);
bool tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT,
SDValue X, unsigned Msb, unsigned Lsb);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a33224845e2b..a68a3c14dc41 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2173,7 +2173,7 @@ bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial(
// on the basis that it's possible the sinking+duplication of the AND in
// CodeGenPrepare triggered by this hook wouldn't decrease the instruction
// count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ).
- if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs())
+ if (!Subtarget.hasBEXTILike())
return false;
ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1));
if (!Mask)
@@ -3744,9 +3744,11 @@ static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL,
// different
// FIXME: Support i1 vectors, maybe by promoting to i8?
MVT EltTy = VT.getVectorElementType();
+ if (EltTy == MVT::i1 ||
+ !DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType()))
+ return SDValue();
MVT SrcVT = Src.getSimpleValueType();
- if (EltTy == MVT::i1 || EltTy != SrcVT.getVectorElementType() ||
- !DAG.getTargetLoweringInfo().isTypeLegal(SrcVT))
+ if (EltTy != SrcVT.getVectorElementType())
return SDValue();
SDValue Idx = SplatVal.getOperand(1);
// The index must be a legal type.
@@ -4518,41 +4520,104 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC;
+ // General case: splat the first operand and slide other operands down one
+ // by one to form a vector. Alternatively, if every operand is an
+ // extraction from element 0 of a vector, we use that vector from the last
+ // extraction as the start value and slide up instead of slide down. Such that
+ // (1) we can avoid the initial splat (2) we can turn those vslide1up into
+ // vslideup of 1 later and eliminate the vector to scalar movement, which is
+ // something we cannot do with vslide1down/vslidedown.
+ // Of course, using vslide1up/vslideup might increase the register pressure,
+ // and that's why we conservatively limit to cases where every operand is an
+ // extraction from the first element.
+ SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end());
+ SDValue EVec;
+ bool SlideUp = false;
+ auto getVSlide = [&](EVT ContainerVT, SDValue Passthru, SDValue Vec,
+ SDValue Offset, SDValue Mask, SDValue VL) -> SDValue {
+ if (SlideUp)
+ return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
+ Mask, VL, Policy);
+ return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset,
+ Mask, VL, Policy);
+ };
+
+ // The reason we don't use all_of here is because we're also capturing EVec
+ // from the last non-undef operand. If the std::execution_policy of the
+ // underlying std::all_of is anything but std::sequenced_policy we might
+ // capture the wrong EVec.
+ for (SDValue V : Operands) {
+ using namespace SDPatternMatch;
+ SlideUp = V.isUndef() || sd_match(V, m_ExtractElt(m_Value(EVec), m_Zero()));
+ if (!SlideUp)
+ break;
+ }
+
+ if (SlideUp) {
+ MVT EVecContainerVT = EVec.getSimpleValueType();
+ // Make sure the original vector has scalable vector type.
+ if (EVecContainerVT.isFixedLengthVector()) {
+ EVecContainerVT =
+ getContainerForFixedLengthVector(DAG, EVecContainerVT, Subtarget);
+ EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget);
+ }
+
+ // Adapt EVec's type into ContainerVT.
+ if (EVecContainerVT.getVectorMinNumElements() <
+ ContainerVT.getVectorMinNumElements())
+ EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0);
+ else
+ EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0);
+
+ // Reverse the elements as we're going to slide up from the last element.
+ std::reverse(Operands.begin(), Operands.end());
+ }
+
SDValue Vec;
UndefCount = 0;
- for (SDValue V : Op->ops()) {
+ for (SDValue V : Operands) {
if (V.isUndef()) {
UndefCount++;
continue;
}
- // Start our sequence with a TA splat in the hopes that hardware is able to
- // recognize there's no dependency on the prior value of our temporary
- // register.
+ // Start our sequence with either a TA splat or extract source in the
+ // hopes that hardware is able to recognize there's no dependency on the
+ // prior value of our temporary register.
if (!Vec) {
- Vec = DAG.getSplatVector(VT, DL, V);
- Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
+ if (SlideUp) {
+ Vec = EVec;
+ } else {
+ Vec = DAG.getSplatVector(VT, DL, V);
+ Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
+ }
+
UndefCount = 0;
continue;
}
if (UndefCount) {
const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
- Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
- Vec, Offset, Mask, VL, Policy);
+ Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
+ VL);
UndefCount = 0;
}
- auto OpCode =
- VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL;
+
+ unsigned Opcode;
+ if (VT.isFloatingPoint())
+ Opcode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL;
+ else
+ Opcode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL;
+
if (!VT.isFloatingPoint())
V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V);
- Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
+ Vec = DAG.getNode(Opcode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec,
V, Mask, VL);
}
if (UndefCount) {
const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
- Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
- Vec, Offset, Mask, VL, Policy);
+ Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask,
+ VL);
}
return convertFromScalableVector(VT, Vec, DAG, Subtarget);
}
@@ -8193,6 +8258,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal);
return DAG.getLogicalNOT(DL, SetCC, VT);
}
+ // Lower (setugt X, 2047) as (setne (srl X, 11), 0).
+ if (CCVal == ISD::SETUGT && Imm == 2047) {
+ SDValue Shift = DAG.getNode(ISD::SRL, DL, OpVT, LHS,
+ DAG.getShiftAmountConstant(11, OpVT, DL));
+ return DAG.getSetCC(DL, VT, Shift, DAG.getConstant(0, DL, OpVT),
+ ISD::SETNE);
+ }
}
// Not a constant we could handle, swap the operands and condition code to
@@ -8815,7 +8887,15 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
reportFatalUsageError("Unsupported code model for lowering");
case CodeModel::Small: {
// Generate a sequence for accessing addresses within the first 2 GiB of
- // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
+ // address space.
+ if (Subtarget.hasVendorXqcili()) {
+ // Use QC.E.LI to generate the address, as this is easier to relax than
+ // LUI/ADDI.
+ SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
+ return DAG.getNode(RISCVISD::QC_E_LI, DL, Ty, Addr);
+ }
+
+ // This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi);
@@ -9036,8 +9116,12 @@ static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS,
return std::nullopt;
}
-static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
+static bool isSimm12Constant(SDValue V) {
+ return isa<ConstantSDNode>(V) && V->getAsAPIntVal().isSignedIntN(12);
+}
+
+static SDValue lowerSelectToBinOp(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
SDValue CondV = N->getOperand(0);
SDValue TrueV = N->getOperand(1);
SDValue FalseV = N->getOperand(2);
@@ -9057,14 +9141,16 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV));
}
+ const bool HasCZero = VT.isScalarInteger() && Subtarget.hasCZEROLike();
+
// (select c, 0, y) -> (c-1) & y
- if (isNullConstant(TrueV)) {
- SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV,
- DAG.getAllOnesConstant(DL, VT));
+ if (isNullConstant(TrueV) && (!HasCZero || isSimm12Constant(FalseV))) {
+ SDValue Neg =
+ DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT));
return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV));
}
// (select c, y, 0) -> -c & y
- if (isNullConstant(FalseV)) {
+ if (isNullConstant(FalseV) && (!HasCZero || isSimm12Constant(TrueV))) {
SDValue Neg = DAG.getNegative(CondV, DL, VT);
return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV));
}
@@ -9185,12 +9271,16 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV);
}
+ // Try some other optimizations before falling back to generic lowering.
+ if (SDValue V = lowerSelectToBinOp(Op.getNode(), DAG, Subtarget))
+ return V;
+
// When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ
// nodes to implement the SELECT. Performing the lowering here allows for
// greater control over when CZERO_{EQZ/NEZ} are used vs another branchless
// sequence or RISCVISD::SELECT_CC node (branch-based select).
- if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) &&
- VT.isScalarInteger()) {
+ if (Subtarget.hasCZEROLike() && VT.isScalarInteger()) {
+
// (select c, t, 0) -> (czero_eqz t, c)
if (isNullConstant(FalseV))
return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV);
@@ -9244,10 +9334,6 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV));
}
- // Try some other optimizations before falling back to generic lowering.
- if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
- return V;
-
// (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1)
// (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2)
if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) {
@@ -9280,19 +9366,38 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
}
}
- const int TrueValCost = RISCVMatInt::getIntMatCost(
- TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
- const int FalseValCost = RISCVMatInt::getIntMatCost(
- FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
- bool IsCZERO_NEZ = TrueValCost <= FalseValCost;
+ // Use SHL/ADDI (and possible XORI) to avoid having to materialize
+ // a constant in register
+ if ((TrueVal - FalseVal).isPowerOf2() && FalseVal.isSignedIntN(12)) {
+ SDValue Log2 = DAG.getConstant((TrueVal - FalseVal).logBase2(), DL, VT);
+ SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
+ return DAG.getNode(ISD::ADD, DL, VT, FalseV, BitDiff);
+ }
+ if ((FalseVal - TrueVal).isPowerOf2() && TrueVal.isSignedIntN(12)) {
+ SDValue Log2 = DAG.getConstant((FalseVal - TrueVal).logBase2(), DL, VT);
+ CondV = DAG.getLogicalNOT(DL, CondV, CondV->getValueType(0));
+ SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2);
+ return DAG.getNode(ISD::ADD, DL, VT, TrueV, BitDiff);
+ }
+
+ auto getCost = [&](const APInt &Delta, const APInt &Addend) {
+ const int DeltaCost = RISCVMatInt::getIntMatCost(
+ Delta, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
+ // Does the addend fold into an ADDI
+ if (Addend.isSignedIntN(12))
+ return DeltaCost;
+ const int AddendCost = RISCVMatInt::getIntMatCost(
+ Addend, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true);
+ return AddendCost + DeltaCost;
+ };
+ bool IsCZERO_NEZ = getCost(FalseVal - TrueVal, TrueVal) <=
+ getCost(TrueVal - FalseVal, FalseVal);
SDValue LHSVal = DAG.getConstant(
IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT);
- SDValue RHSVal =
- DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT);
SDValue CMOV =
DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ,
DL, VT, LHSVal, CondV);
- return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal);
+ return DAG.getNode(ISD::ADD, DL, VT, CMOV, IsCZERO_NEZ ? TrueV : FalseV);
}
// (select c, c1, t) -> (add (czero_nez t - c1, c), c1)
@@ -9327,12 +9432,10 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(
ISD::OR, DL, VT,
DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV),
- DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV));
+ DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV),
+ SDNodeFlags::Disjoint);
}
- if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget))
- return V;
-
if (Op.hasOneUse()) {
unsigned UseOpc = Op->user_begin()->getOpcode();
if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) {
@@ -10738,11 +10841,11 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1));
}
case Intrinsic::riscv_mopr:
- return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1),
+ return DAG.getNode(RISCVISD::MOP_R, DL, XLenVT, Op.getOperand(1),
Op.getOperand(2));
case Intrinsic::riscv_moprr: {
- return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1),
+ return DAG.getNode(RISCVISD::MOP_RR, DL, XLenVT, Op.getOperand(1),
Op.getOperand(2), Op.getOperand(3));
}
case Intrinsic::riscv_clmul:
@@ -14877,7 +14980,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
SDValue NewOp =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
SDValue Res = DAG.getNode(
- RISCVISD::MOPR, DL, MVT::i64, NewOp,
+ RISCVISD::MOP_R, DL, MVT::i64, NewOp,
DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
@@ -14890,7 +14993,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
SDValue NewOp1 =
DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
SDValue Res = DAG.getNode(
- RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1,
+ RISCVISD::MOP_RR, DL, MVT::i64, NewOp0, NewOp1,
DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64));
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
@@ -15381,9 +15484,7 @@ static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
if (!Subtarget.hasConditionalMoveFusion()) {
// (select cond, x, (and x, c)) has custom lowering with Zicond.
- if ((!Subtarget.hasStdExtZicond() &&
- !Subtarget.hasVendorXVentanaCondOps()) ||
- N->getOpcode() != ISD::AND)
+ if (!Subtarget.hasCZEROLike() || N->getOpcode() != ISD::AND)
return SDValue();
// Maybe harmful when condition code has multiple use.
@@ -16059,12 +16160,55 @@ static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1,
SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0),
Cond);
- SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0),
- Cond);
- SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1);
+ SDValue NewN1 =
+ DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), Cond);
+ SDValue NewOr =
+ DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1, SDNodeFlags::Disjoint);
return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1));
}
+// (xor X, (xor (and X, C2), Y))
+// ->(qc_insb X, (sra Y, ShAmt), Width, ShAmt)
+// where C2 is a shifted mask with width = Width and shift = ShAmt
+// qc_insb might become qc.insb or qc.insbi depending on the operands.
+static SDValue combineXorToBitfieldInsert(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ if (!Subtarget.hasVendorXqcibm())
+ return SDValue();
+
+ using namespace SDPatternMatch;
+
+ SDValue Base, Inserted;
+ APInt CMask;
+ if (!sd_match(N, m_Xor(m_Value(Base),
+ m_OneUse(m_Xor(m_OneUse(m_And(m_Deferred(Base),
+ m_ConstInt(CMask))),
+ m_Value(Inserted))))))
+ return SDValue();
+
+ if (N->getValueType(0) != MVT::i32)
+ return SDValue();
+
+ unsigned Width, ShAmt;
+ if (!CMask.isShiftedMask(ShAmt, Width))
+ return SDValue();
+
+ // Check if all zero bits in CMask are also zero in Inserted
+ if (!DAG.MaskedValueIsZero(Inserted, ~CMask))
+ return SDValue();
+
+ SDLoc DL(N);
+
+ // `Inserted` needs to be right shifted before it is put into the
+ // instruction.
+ Inserted = DAG.getNode(ISD::SRA, DL, MVT::i32, Inserted,
+ DAG.getShiftAmountConstant(ShAmt, MVT::i32, DL));
+
+ SDValue Ops[] = {Base, Inserted, DAG.getConstant(Width, DL, MVT::i32),
+ DAG.getConstant(ShAmt, DL, MVT::i32)};
+ return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops);
+}
+
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
SelectionDAG &DAG = DCI.DAG;
@@ -16108,8 +16252,8 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0));
SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1));
SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1);
- SDValue And = DAG.getNOT(DL, Shl, MVT::i64);
- return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And);
+ SDValue Not = DAG.getNOT(DL, Shl, MVT::i64);
+ return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Not);
}
// fold (xor (sllw 1, x), -1) -> (rolw ~1, x)
@@ -16137,6 +16281,9 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
}
}
+ if (SDValue V = combineXorToBitfieldInsert(N, DAG, Subtarget))
+ return V;
+
if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget))
return V;
if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget))
@@ -16590,10 +16737,6 @@ combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC,
DAG.getConstant(0, DL, XLenVT), CC);
}
-// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
-// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
-// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg
-// can become a sext.w instead of a shift pair.
static SDValue performSETCCCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const RISCVSubtarget &Subtarget) {
@@ -16613,20 +16756,44 @@ static SDValue performSETCCCombine(SDNode *N,
combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget))
return V;
- // (X & -4096) == 0 -> (X >> 12) == 0 if the AND constant can't use ANDI.
- if (DCI.isAfterLegalizeDAG() && isNullConstant(N1) &&
+ if (DCI.isAfterLegalizeDAG() && isa<ConstantSDNode>(N1) &&
N0.getOpcode() == ISD::AND && N0.hasOneUse() &&
isa<ConstantSDNode>(N0.getOperand(1))) {
- const APInt &AndRHSC =
- cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- if (!isInt<12>(AndRHSC.getSExtValue()) && AndRHSC.isNegatedPowerOf2()) {
+ const APInt &AndRHSC = N0.getConstantOperandAPInt(1);
+ // (X & -(1 << C)) == 0 -> (X >> C) == 0 if the AND constant can't use ANDI.
+ if (isNullConstant(N1) && !isInt<12>(AndRHSC.getSExtValue()) &&
+ AndRHSC.isNegatedPowerOf2()) {
unsigned ShiftBits = AndRHSC.countr_zero();
- SDValue Shift = DAG.getNode(ISD::SRL, dl, VT, N0.getOperand(0),
- DAG.getConstant(ShiftBits, dl, VT));
+ SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, N0.getOperand(0),
+ DAG.getConstant(ShiftBits, dl, OpVT));
return DAG.getSetCC(dl, VT, Shift, N1, Cond);
}
+
+ // Similar to above but handling the lower 32 bits by using sraiw. Allow
+ // comparing with constants other than 0 if the constant can be folded into
+ // addi or xori after shifting.
+ uint64_t N1Int = cast<ConstantSDNode>(N1)->getZExtValue();
+ uint64_t AndRHSInt = AndRHSC.getZExtValue();
+ if (OpVT == MVT::i64 && AndRHSInt <= 0xffffffff &&
+ isPowerOf2_32(-uint32_t(AndRHSInt)) && (N1Int & AndRHSInt) == N1Int) {
+ unsigned ShiftBits = llvm::countr_zero(AndRHSInt);
+ int64_t NewC = SignExtend64<32>(N1Int) >> ShiftBits;
+ if (NewC >= -2048 && NewC <= 2048) {
+ SDValue SExt =
+ DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OpVT, N0.getOperand(0),
+ DAG.getValueType(MVT::i32));
+ SDValue Shift = DAG.getNode(ISD::SRA, dl, OpVT, SExt,
+ DAG.getConstant(ShiftBits, dl, OpVT));
+ return DAG.getSetCC(dl, VT, Shift,
+ DAG.getSignedConstant(NewC, dl, OpVT), Cond);
+ }
+ }
}
+ // Replace (seteq (i64 (and X, 0xffffffff)), C1) with
+ // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from
+ // bit 31. Same for setne. C1' may be cheaper to materialize and the
+ // sext_inreg can become a sext.w instead of a shift pair.
if (OpVT != MVT::i64 || !Subtarget.is64Bit())
return SDValue();
@@ -18674,7 +18841,7 @@ static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG,
break;
}
- if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal))
+ if (!TrueVal.hasOneUse())
return SDValue();
unsigned OpToFold;
@@ -18746,6 +18913,10 @@ static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) {
if (Cond->getOperand(0) != CountZeroesArgument)
return SDValue();
+ unsigned BitWidth = CountZeroes.getValueSizeInBits();
+ if (!isPowerOf2_32(BitWidth))
+ return SDValue();
+
if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) {
CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes),
CountZeroes.getValueType(), CountZeroesArgument);
@@ -18754,7 +18925,6 @@ static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) {
CountZeroes.getValueType(), CountZeroesArgument);
}
- unsigned BitWidth = CountZeroes.getValueSizeInBits();
SDValue BitWidthMinusOne =
DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType());
@@ -18778,7 +18948,7 @@ static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG,
// Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate
// BEXTI, where C is power of 2.
if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() &&
- (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) {
+ (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())) {
SDValue LHS = Cond.getOperand(0);
SDValue RHS = Cond.getOperand(1);
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
@@ -18953,6 +19123,7 @@ static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL,
SelectionDAG &DAG,
const RISCVSubtarget &Subtarget,
const RISCVTargetLowering &TLI) {
+ using namespace SDPatternMatch;
// Note: We intentionally do not check the legality of the reduction type.
// We want to handle the m4/m8 *src* types, and thus need to let illegal
// intermediate types flow through here.
@@ -18960,11 +19131,10 @@ static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL,
!InVec.getValueType().getVectorElementCount().isKnownMultipleOf(4))
return SDValue();
- // Recurse through adds (since generic dag canonicalizes to that
- // form). TODO: Handle disjoint or here.
- if (InVec->getOpcode() == ISD::ADD) {
- SDValue A = InVec.getOperand(0);
- SDValue B = InVec.getOperand(1);
+ // Recurse through adds/disjoint ors (since generic dag canonicalizes to that
+ // form).
+ SDValue A, B;
+ if (sd_match(InVec, m_AddLike(m_Value(A), m_Value(B)))) {
SDValue AOpt = foldReduceOperandViaVQDOT(A, DL, DAG, Subtarget, TLI);
SDValue BOpt = foldReduceOperandViaVQDOT(B, DL, DAG, Subtarget, TLI);
if (AOpt || BOpt) {
@@ -19001,12 +19171,9 @@ static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL,
// mul (zext a, zext b) -> partial_reduce_umla 0, a, b
// mul (sext a, zext b) -> partial_reduce_ssmla 0, a, b
// mul (zext a, sext b) -> partial_reduce_smla 0, b, a (swapped)
- if (InVec.getOpcode() != ISD::MUL)
+ if (!sd_match(InVec, m_Mul(m_Value(A), m_Value(B))))
return SDValue();
- SDValue A = InVec.getOperand(0);
- SDValue B = InVec.getOperand(1);
-
if (!ISD::isExtOpcode(A.getOpcode()))
return SDValue();
@@ -20081,6 +20248,17 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return V;
break;
case ISD::FMUL: {
+ using namespace SDPatternMatch;
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue X, Y;
+ // InstCombine canonicalizes fneg (fmul x, y) -> fmul x, (fneg y), see
+ // hoistFNegAboveFMulFDiv.
+ // Undo this and sink the fneg so we match more fmsub/fnmadd patterns.
+ if (sd_match(N, m_FMul(m_Value(X), m_OneUse(m_FNeg(m_Value(Y))))))
+ return DAG.getNode(ISD::FNEG, DL, VT,
+ DAG.getNode(ISD::FMUL, DL, VT, X, Y));
+
// fmul X, (copysign 1.0, Y) -> fsgnjx X, Y
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -20091,13 +20269,12 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0));
if (!C || !C->getValueAPF().isExactlyValue(+1.0))
return SDValue();
- EVT VT = N->getValueType(0);
if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT))
return SDValue();
SDValue Sign = N0->getOperand(1);
if (Sign.getValueType() != VT)
return SDValue();
- return DAG.getNode(RISCVISD::FSGNJX, SDLoc(N), VT, N1, N0->getOperand(1));
+ return DAG.getNode(RISCVISD::FSGNJX, DL, VT, N1, N0->getOperand(1));
}
case ISD::FADD:
case ISD::UMAX:
@@ -20381,9 +20558,9 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
VT, DL, MGN->getChain(), BasePtr,
DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(),
EVL, MGN->getMemOperand());
- SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(),
- StridedLoad, MGN->getPassThru(), EVL);
- return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)},
+ SDValue Select = DAG.getSelect(DL, VT, MGN->getMask(), StridedLoad,
+ MGN->getPassThru());
+ return DAG.getMergeValues({Select, SDValue(StridedLoad.getNode(), 1)},
DL);
}
}
@@ -21060,6 +21237,38 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return N->getOperand(0);
break;
}
+ case RISCVISD::VSLIDE1UP_VL:
+ case RISCVISD::VFSLIDE1UP_VL: {
+ using namespace SDPatternMatch;
+ SDValue SrcVec;
+ SDLoc DL(N);
+ MVT VT = N->getSimpleValueType(0);
+ // If the scalar we're sliding in was extracted from the first element of a
+ // vector, we can use that vector as the passthru in a normal slideup of 1.
+ // This saves us an extract_element instruction (i.e. vfmv.f.s, vmv.x.s).
+ if (!N->getOperand(0).isUndef() ||
+ !sd_match(N->getOperand(2),
+ m_AnyOf(m_ExtractElt(m_Value(SrcVec), m_Zero()),
+ m_Node(RISCVISD::VMV_X_S, m_Value(SrcVec)))))
+ break;
+
+ MVT SrcVecVT = SrcVec.getSimpleValueType();
+ if (SrcVecVT.getVectorElementType() != VT.getVectorElementType())
+ break;
+ // Adapt the value type of source vector.
+ if (SrcVecVT.isFixedLengthVector()) {
+ SrcVecVT = getContainerForFixedLengthVector(SrcVecVT);
+ SrcVec = convertToScalableVector(SrcVecVT, SrcVec, DAG, Subtarget);
+ }
+ if (SrcVecVT.getVectorMinNumElements() < VT.getVectorMinNumElements())
+ SrcVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), SrcVec, 0);
+ else
+ SrcVec = DAG.getExtractSubvector(DL, VT, SrcVec, 0);
+
+ return getVSlideup(DAG, Subtarget, DL, VT, SrcVec, N->getOperand(1),
+ DAG.getConstant(1, DL, XLenVT), N->getOperand(3),
+ N->getOperand(4));
+ }
}
return SDValue();
@@ -21120,9 +21329,14 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift(
auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
- // Bail if we might break a sh{1,2,3}add pattern.
- if ((Subtarget.hasStdExtZba() || Subtarget.hasVendorXAndesPerf()) && C2 &&
- C2->getZExtValue() >= 1 && C2->getZExtValue() <= 3 && N->hasOneUse() &&
+ bool IsShXAdd =
+ (Subtarget.hasStdExtZba() || Subtarget.hasVendorXAndesPerf()) && C2 &&
+ C2->getZExtValue() >= 1 && C2->getZExtValue() <= 3;
+ bool IsQCShlAdd = Subtarget.hasVendorXqciac() && C2 &&
+ C2->getZExtValue() >= 4 && C2->getZExtValue() <= 31;
+
+ // Bail if we might break a sh{1,2,3}add/qc.shladd pattern.
+ if ((IsShXAdd || IsQCShlAdd) && N->hasOneUse() &&
N->user_begin()->getOpcode() == ISD::ADD &&
!isUsedByLdSt(*N->user_begin(), nullptr) &&
!isa<ConstantSDNode>(N->user_begin()->getOperand(1)))
@@ -21346,6 +21560,24 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known = Known.sext(BitWidth);
break;
}
+ case RISCVISD::SRLW: {
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::lshr(Known.trunc(32), Known2.trunc(5).zext(32));
+ // Restore the original width by sign extending.
+ Known = Known.sext(BitWidth);
+ break;
+ }
+ case RISCVISD::SRAW: {
+ KnownBits Known2;
+ Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+ Known = KnownBits::ashr(Known.trunc(32), Known2.trunc(5).zext(32));
+ // Restore the original width by sign extending.
+ Known = Known.sext(BitWidth);
+ break;
+ }
case RISCVISD::CTZW: {
KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
@@ -21451,8 +21683,16 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
if (Tmp < 33) return 1;
return 33;
}
+ case RISCVISD::SRAW: {
+ unsigned Tmp =
+ DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ // sraw produces at least 33 sign bits. If the input already has more than
+ // 33 sign bits sraw, will preserve them.
+ // TODO: A more precise answer could be calculated depending on known bits
+ // in the shift amount.
+ return std::max(Tmp, 33U);
+ }
case RISCVISD::SLLW:
- case RISCVISD::SRAW:
case RISCVISD::SRLW:
case RISCVISD::DIVW:
case RISCVISD::DIVUW:
@@ -21463,9 +21703,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
case RISCVISD::FCVT_WU_RV64:
case RISCVISD::STRICT_FCVT_W_RV64:
case RISCVISD::STRICT_FCVT_WU_RV64:
- // TODO: As the result is sign-extended, this is conservatively correct. A
- // more precise answer could be calculated for SRAW depending on known
- // bits in the shift amount.
+ // TODO: As the result is sign-extended, this is conservatively correct.
return 33;
case RISCVISD::VMV_X_S: {
// The number of sign bits of the scalar result is computed by obtaining the
@@ -21548,6 +21786,14 @@ bool RISCVTargetLowering::canCreateUndefOrPoisonForTargetNode(
// TODO: Add more target nodes.
switch (Op.getOpcode()) {
+ case RISCVISD::SLLW:
+ case RISCVISD::SRAW:
+ case RISCVISD::SRLW:
+ case RISCVISD::RORW:
+ case RISCVISD::ROLW:
+ // Only the lower 5 bits of RHS are read, guaranteeing the rotate/shift
+ // amount is bounds.
+ return false;
case RISCVISD::SELECT_CC:
// Integer comparisons cannot create poison.
assert(Op.getOperand(0).getValueType().isInteger() &&
@@ -24683,7 +24929,7 @@ RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest(
EVT VT, const APInt &AndMask) const {
- if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())
+ if (Subtarget.hasCZEROLike())
return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024);
return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask);
}
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index fb63ebcfaace..4581c11356af 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -435,8 +435,8 @@ public:
const APInt &GapMask) const override;
bool lowerInterleavedStore(Instruction *Store, Value *Mask,
- ShuffleVectorInst *SVI,
- unsigned Factor) const override;
+ ShuffleVectorInst *SVI, unsigned Factor,
+ const APInt &GapMask) const override;
bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask,
IntrinsicInst *DI) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVIndirectBranchTracking.cpp b/llvm/lib/Target/RISCV/RISCVIndirectBranchTracking.cpp
index 43621b8f0f33..9664ab345dcb 100644
--- a/llvm/lib/Target/RISCV/RISCVIndirectBranchTracking.cpp
+++ b/llvm/lib/Target/RISCV/RISCVIndirectBranchTracking.cpp
@@ -6,7 +6,7 @@
//
//===----------------------------------------------------------------------===//
//
-// The pass adds LPAD (AUIPC with rs1 = X0) machine instructions at the
+// The pass adds LPAD (AUIPC with rd = X0) machine instructions at the
// beginning of each basic block or function that is referenced by an indirect
// jump/call instruction.
//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td b/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
index 209c3fae63f4..4c7cd05723ac 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td
@@ -54,7 +54,6 @@ class RVInst16CSS<bits<3> funct3, bits<2> opcode, dag outs, dag ins,
: RVInst16<outs, ins, opcodestr, argstr, [], InstFormatCSS> {
bits<10> imm;
bits<5> rs2;
- bits<5> rs1;
let Inst{15-13} = funct3;
let Inst{12-7} = imm{5-0};
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 7b4a1de16769..d0bb57a3eaa1 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -80,8 +80,8 @@ namespace llvm::RISCV {
} // end namespace llvm::RISCV
-RISCVInstrInfo::RISCVInstrInfo(RISCVSubtarget &STI)
- : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP),
+RISCVInstrInfo::RISCVInstrInfo(const RISCVSubtarget &STI)
+ : RISCVGenInstrInfo(STI, RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP),
STI(STI) {}
#define GET_INSTRINFO_HELPERS
@@ -3511,6 +3511,9 @@ RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
return outliner::InstrType::Illegal;
}
+ if (isLPAD(MI))
+ return outliner::InstrType::Illegal;
+
return outliner::InstrType::Legal;
}
@@ -4796,8 +4799,22 @@ unsigned RISCV::getDestLog2EEW(const MCInstrDesc &Desc, unsigned Log2SEW) {
return Scaled;
}
-/// Given two VL operands, do we know that LHS <= RHS?
+static std::optional<int64_t> getEffectiveImm(const MachineOperand &MO) {
+ assert(MO.isImm() || MO.getReg().isVirtual());
+ if (MO.isImm())
+ return MO.getImm();
+ const MachineInstr *Def =
+ MO.getParent()->getMF()->getRegInfo().getVRegDef(MO.getReg());
+ int64_t Imm;
+ if (isLoadImm(Def, Imm))
+ return Imm;
+ return std::nullopt;
+}
+
+/// Given two VL operands, do we know that LHS <= RHS? Must be used in SSA form.
bool RISCV::isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS) {
+ assert((LHS.isImm() || LHS.getParent()->getMF()->getRegInfo().isSSA()) &&
+ (RHS.isImm() || RHS.getParent()->getMF()->getRegInfo().isSSA()));
if (LHS.isReg() && RHS.isReg() && LHS.getReg().isVirtual() &&
LHS.getReg() == RHS.getReg())
return true;
@@ -4807,9 +4824,11 @@ bool RISCV::isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS) {
return true;
if (LHS.isImm() && LHS.getImm() == RISCV::VLMaxSentinel)
return false;
- if (!LHS.isImm() || !RHS.isImm())
+ std::optional<int64_t> LHSImm = getEffectiveImm(LHS),
+ RHSImm = getEffectiveImm(RHS);
+ if (!LHSImm || !RHSImm)
return false;
- return LHS.getImm() <= RHS.getImm();
+ return LHSImm <= RHSImm;
}
namespace {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 785c8352d4a5..57ec431749eb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -62,7 +62,7 @@ enum RISCVMachineCombinerPattern : unsigned {
class RISCVInstrInfo : public RISCVGenInstrInfo {
public:
- explicit RISCVInstrInfo(RISCVSubtarget &STI);
+ explicit RISCVInstrInfo(const RISCVSubtarget &STI);
MCInst getNop() const override;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 23f5a848137c..92552b36aa0b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1698,8 +1698,6 @@ let Predicates = [IsRV32] in {
def : Pat<(i32 (setlt (i32 GPR:$rs1), 0)), (SRLI GPR:$rs1, 31)>; // compressible
}
let Predicates = [IsRV64] in {
-def : Pat<(i64 (seteq (i64 (and GPR:$rs1, 0x0000000080000000)), 0)),
- (XORI (i64 (SRLIW GPR:$rs1, 31)), 1)>;
def : Pat<(i64 (setlt (i64 GPR:$rs1), 0)), (SRLI GPR:$rs1, 63)>; // compressible
def : Pat<(i64 (setlt (sext_inreg GPR:$rs1, i32), 0)), (SRLIW GPR:$rs1, 31)>;
}
@@ -2330,7 +2328,6 @@ include "RISCVInstrInfoZalasr.td"
include "RISCVInstrInfoZimop.td"
include "RISCVInstrInfoZicbo.td"
include "RISCVInstrInfoZicond.td"
-include "RISCVInstrInfoZicfiss.td"
include "RISCVInstrInfoZilsd.td"
// Scalar FP
@@ -2359,6 +2356,9 @@ include "RISCVInstrInfoZc.td"
include "RISCVInstrInfoZcmop.td"
include "RISCVInstrInfoZclsd.td"
+// Control Flow Integriy, this requires Zimop/Zcmop
+include "RISCVInstrInfoZicfiss.td"
+
// Short Forward Branch
include "RISCVInstrInfoSFB.td"
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
index c5551fbdec28..9fc73662d970 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td
@@ -230,13 +230,17 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
class CStackLoad<bits<3> funct3, string OpcodeStr,
DAGOperand cls, DAGOperand opnd>
: RVInst16CI<funct3, 0b10, (outs cls:$rd), (ins SPMem:$rs1, opnd:$imm),
- OpcodeStr, "$rd, ${imm}(${rs1})">;
+ OpcodeStr, "$rd, ${imm}(${rs1})"> {
+ bits<0> rs1;
+}
let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in
class CStackStore<bits<3> funct3, string OpcodeStr,
DAGOperand cls, DAGOperand opnd>
: RVInst16CSS<funct3, 0b10, (outs), (ins cls:$rs2, SPMem:$rs1, opnd:$imm),
- OpcodeStr, "$rs2, ${imm}(${rs1})">;
+ OpcodeStr, "$rs2, ${imm}(${rs1})"> {
+ bits<0> rs1;
+}
let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in
class CLoad_ri<bits<3> funct3, string OpcodeStr,
@@ -301,14 +305,6 @@ def C_ADDI4SPN : RVInst16CIW<0b000, 0b00, (outs GPRC:$rd),
let Inst{5} = imm{3};
}
-let Predicates = [HasStdExtCOrZcd, HasStdExtD] in
-def C_FLD : CLoad_ri<0b001, "c.fld", FPR64C, uimm8_lsb000>,
- Sched<[WriteFLD64, ReadFMemBase]> {
- bits<8> imm;
- let Inst{12-10} = imm{5-3};
- let Inst{6-5} = imm{7-6};
-}
-
def C_LW : CLoad_ri<0b010, "c.lw", GPRC, uimm7_lsb00>,
Sched<[WriteLDW, ReadMemBase]> {
bits<7> imm;
@@ -326,16 +322,6 @@ def C_LW_INX : CLoad_ri<0b010, "c.lw", GPRF32C, uimm7_lsb00>,
let Inst{5} = imm{6};
}
-let DecoderNamespace = "RV32Only",
- Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in
-def C_FLW : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00>,
- Sched<[WriteFLD32, ReadFMemBase]> {
- bits<7> imm;
- let Inst{12-10} = imm{5-3};
- let Inst{6} = imm{2};
- let Inst{5} = imm{6};
-}
-
let Predicates = [HasStdExtZca, IsRV64] in
def C_LD : CLoad_ri<0b011, "c.ld", GPRC, uimm8_lsb000>,
Sched<[WriteLDD, ReadMemBase]> {
@@ -344,14 +330,6 @@ def C_LD : CLoad_ri<0b011, "c.ld", GPRC, uimm8_lsb000>,
let Inst{6-5} = imm{7-6};
}
-let Predicates = [HasStdExtCOrZcd, HasStdExtD] in
-def C_FSD : CStore_rri<0b101, "c.fsd", FPR64C, uimm8_lsb000>,
- Sched<[WriteFST64, ReadFStoreData, ReadFMemBase]> {
- bits<8> imm;
- let Inst{12-10} = imm{5-3};
- let Inst{6-5} = imm{7-6};
-}
-
def C_SW : CStore_rri<0b110, "c.sw", GPRC, uimm7_lsb00>,
Sched<[WriteSTW, ReadStoreData, ReadMemBase]> {
bits<7> imm;
@@ -369,16 +347,6 @@ def C_SW_INX : CStore_rri<0b110, "c.sw", GPRF32C, uimm7_lsb00>,
let Inst{5} = imm{6};
}
-let DecoderNamespace = "RV32Only",
- Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in
-def C_FSW : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00>,
- Sched<[WriteFST32, ReadFStoreData, ReadFMemBase]> {
- bits<7> imm;
- let Inst{12-10} = imm{5-3};
- let Inst{6} = imm{2};
- let Inst{5} = imm{6};
-}
-
let Predicates = [HasStdExtZca, IsRV64] in
def C_SD : CStore_rri<0b111, "c.sd", GPRC, uimm8_lsb000>,
Sched<[WriteSTD, ReadStoreData, ReadMemBase]> {
@@ -500,12 +468,6 @@ def C_SLLI : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb),
let Constraints = "$rd = $rd_wb";
}
-let Predicates = [HasStdExtCOrZcd, HasStdExtD] in
-def C_FLDSP : CStackLoad<0b001, "c.fldsp", FPR64, uimm9_lsb000>,
- Sched<[WriteFLD64, ReadFMemBase]> {
- let Inst{4-2} = imm{8-6};
-}
-
def C_LWSP : CStackLoad<0b010, "c.lwsp", GPRNoX0, uimm8_lsb00>,
Sched<[WriteLDW, ReadMemBase]> {
let Inst{3-2} = imm{7-6};
@@ -517,13 +479,6 @@ def C_LWSP_INX : CStackLoad<0b010, "c.lwsp", GPRF32NoX0, uimm8_lsb00>,
let Inst{3-2} = imm{7-6};
}
-let DecoderNamespace = "RV32Only",
- Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in
-def C_FLWSP : CStackLoad<0b011, "c.flwsp", FPR32, uimm8_lsb00>,
- Sched<[WriteFLD32, ReadFMemBase]> {
- let Inst{3-2} = imm{7-6};
-}
-
let Predicates = [HasStdExtZca, IsRV64] in
def C_LDSP : CStackLoad<0b011, "c.ldsp", GPRNoX0, uimm9_lsb000>,
Sched<[WriteLDD, ReadMemBase]> {
@@ -560,12 +515,6 @@ def C_ADD : RVInst16CR<0b1001, 0b10, (outs GPR:$rd),
let Constraints = "$rs1 = $rd";
}
-let Predicates = [HasStdExtCOrZcd, HasStdExtD] in
-def C_FSDSP : CStackStore<0b101, "c.fsdsp", FPR64, uimm9_lsb000>,
- Sched<[WriteFST64, ReadFStoreData, ReadFMemBase]> {
- let Inst{9-7} = imm{8-6};
-}
-
def C_SWSP : CStackStore<0b110, "c.swsp", GPR, uimm8_lsb00>,
Sched<[WriteSTW, ReadStoreData, ReadMemBase]> {
let Inst{8-7} = imm{7-6};
@@ -577,13 +526,6 @@ def C_SWSP_INX : CStackStore<0b110, "c.swsp", GPRF32, uimm8_lsb00>,
let Inst{8-7} = imm{7-6};
}
-let DecoderNamespace = "RV32Only",
- Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in
-def C_FSWSP : CStackStore<0b111, "c.fswsp", FPR32, uimm8_lsb00>,
- Sched<[WriteFST32, ReadFStoreData, ReadFMemBase]> {
- let Inst{8-7} = imm{7-6};
-}
-
let Predicates = [HasStdExtZca, IsRV64] in
def C_SDSP : CStackStore<0b111, "c.sdsp", GPR, uimm9_lsb000>,
Sched<[WriteSTD, ReadStoreData, ReadMemBase]> {
@@ -600,6 +542,61 @@ def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther>,
} // Predicates = [HasStdExtZca]
+let DecoderNamespace = "RV32Only",
+ Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
+ def C_FLW : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00>,
+ Sched<[WriteFLD32, ReadFMemBase]> {
+ bits<7> imm;
+ let Inst{12-10} = imm{5-3};
+ let Inst{6} = imm{2};
+ let Inst{5} = imm{6};
+ }
+
+ def C_FSW : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00>,
+ Sched<[WriteFST32, ReadFStoreData, ReadFMemBase]> {
+ bits<7> imm;
+ let Inst{12-10} = imm{5-3};
+ let Inst{6} = imm{2};
+ let Inst{5} = imm{6};
+ }
+
+ def C_FLWSP : CStackLoad<0b011, "c.flwsp", FPR32, uimm8_lsb00>,
+ Sched<[WriteFLD32, ReadFMemBase]> {
+ let Inst{3-2} = imm{7-6};
+ }
+
+ def C_FSWSP : CStackStore<0b111, "c.fswsp", FPR32, uimm8_lsb00>,
+ Sched<[WriteFST32, ReadFStoreData, ReadFMemBase]> {
+ let Inst{8-7} = imm{7-6};
+ }
+} // DecoderNamespace = "RV32Only", Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32]
+
+let Predicates = [HasStdExtCOrZcd, HasStdExtD] in {
+ def C_FLD : CLoad_ri<0b001, "c.fld", FPR64C, uimm8_lsb000>,
+ Sched<[WriteFLD64, ReadFMemBase]> {
+ bits<8> imm;
+ let Inst{12-10} = imm{5-3};
+ let Inst{6-5} = imm{7-6};
+ }
+
+ def C_FSD : CStore_rri<0b101, "c.fsd", FPR64C, uimm8_lsb000>,
+ Sched<[WriteFST64, ReadFStoreData, ReadFMemBase]> {
+ bits<8> imm;
+ let Inst{12-10} = imm{5-3};
+ let Inst{6-5} = imm{7-6};
+ }
+
+ def C_FLDSP : CStackLoad<0b001, "c.fldsp", FPR64, uimm9_lsb000>,
+ Sched<[WriteFLD64, ReadFMemBase]> {
+ let Inst{4-2} = imm{8-6};
+ }
+
+ def C_FSDSP : CStackStore<0b101, "c.fsdsp", FPR64, uimm9_lsb000>,
+ Sched<[WriteFST64, ReadFStoreData, ReadFMemBase]> {
+ let Inst{9-7} = imm{8-6};
+ }
+} // Predicates = [HasStdExtCOrZcd, HasStdExtD] in {
+
//===----------------------------------------------------------------------===//
// HINT Instructions
//===----------------------------------------------------------------------===//
@@ -767,20 +764,17 @@ def : InstAlias<".insn_cj $opcode, $funct3, $imm11",
// Compress Instruction tablegen backend.
//===----------------------------------------------------------------------===//
-// Patterns are defined in the same order the compressed instructions appear
+// Zca patterns are defined in the same order the compressed instructions appear
// under the "RVC Instruction Set Listings" section of the ISA manual.
+// Zca Instructions
+
// Quadrant 0
let Predicates = [HasStdExtZca] in {
def : CompressPat<(ADDI GPRC:$rd, SP:$rs1, uimm10_lsb00nonzero:$imm),
(C_ADDI4SPN GPRC:$rd, SP:$rs1, uimm10_lsb00nonzero:$imm)>;
} // Predicates = [HasStdExtZca]
-let Predicates = [HasStdExtCOrZcd, HasStdExtD] in {
-def : CompressPat<(FLD FPR64C:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm),
- (C_FLD FPR64C:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm)>;
-} // Predicates = [HasStdExtCOrZcd, HasStdExtD]
-
let Predicates = [HasStdExtZca] in {
def : CompressPat<(LW GPRC:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm),
(C_LW GPRC:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
@@ -790,21 +784,11 @@ def : CompressPat<(LW_INX GPRF32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm),
(C_LW_INX GPRF32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
} // Predicates = [HasStdExtZca]
-let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
-def : CompressPat<(FLW FPR32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm),
- (C_FLW FPR32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
-} // Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32]
-
let Predicates = [HasStdExtZca, IsRV64] in {
def : CompressPat<(LD GPRC:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm),
(C_LD GPRC:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm)>;
} // Predicates = [HasStdExtZca, IsRV64]
-let Predicates = [HasStdExtCOrZcd, HasStdExtD] in {
-def : CompressPat<(FSD FPR64C:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm),
- (C_FSD FPR64C:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm)>;
-} // Predicates = [HasStdExtCOrZcd, HasStdExtD]
-
let Predicates = [HasStdExtZca] in {
def : CompressPat<(SW GPRC:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm),
(C_SW GPRC:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
@@ -814,11 +798,6 @@ def : CompressPat<(SW_INX GPRF32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm),
(C_SW_INX GPRF32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
} // Predicates = [HasStdExtZca]
-let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
-def : CompressPat<(FSW FPR32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm),
- (C_FSW FPR32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
-} // Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32]
-
let Predicates = [HasStdExtZca, IsRV64] in {
def : CompressPat<(SD GPRC:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm),
(C_SD GPRC:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm)>;
@@ -907,11 +886,6 @@ def : CompressPat<(SLLI GPRNoX0:$rs1, GPRNoX0:$rs1, uimmlog2xlennonzero:$imm),
(C_SLLI GPRNoX0:$rs1, uimmlog2xlennonzero:$imm)>;
} // Predicates = [HasStdExtZca]
-let Predicates = [HasStdExtCOrZcd, HasStdExtD] in {
-def : CompressPat<(FLD FPR64:$rd, SPMem:$rs1, uimm9_lsb000:$imm),
- (C_FLDSP FPR64:$rd, SPMem:$rs1, uimm9_lsb000:$imm)>;
-} // Predicates = [HasStdExtCOrZcd, HasStdExtD]
-
let Predicates = [HasStdExtZca] in {
def : CompressPat<(LW GPRNoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm),
(C_LWSP GPRNoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>;
@@ -921,11 +895,6 @@ def : CompressPat<(LW_INX GPRF32NoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm),
(C_LWSP_INX GPRF32NoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>;
} // Predicates = [HasStdExtZca]
-let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
-def : CompressPat<(FLW FPR32:$rd, SPMem:$rs1, uimm8_lsb00:$imm),
- (C_FLWSP FPR32:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>;
-} // Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32]
-
let Predicates = [HasStdExtZca, IsRV64] in {
def : CompressPat<(LD GPRNoX0:$rd, SPMem:$rs1, uimm9_lsb000:$imm),
(C_LDSP GPRNoX0:$rd, SPMem:$rs1, uimm9_lsb000:$imm)>;
@@ -953,11 +922,6 @@ def : CompressPat<(ADD GPRNoX0:$rs1, GPRNoX0:$rs2, GPRNoX0:$rs1),
(C_ADD GPRNoX0:$rs1, GPRNoX0:$rs2)>;
} // Predicates = [HasStdExtZca]
-let Predicates = [HasStdExtCOrZcd, HasStdExtD] in {
-def : CompressPat<(FSD FPR64:$rs2, SPMem:$rs1, uimm9_lsb000:$imm),
- (C_FSDSP FPR64:$rs2, SPMem:$rs1, uimm9_lsb000:$imm)>;
-} // Predicates = [HasStdExtCOrZcd, HasStdExtD]
-
let Predicates = [HasStdExtZca] in {
def : CompressPat<(SW GPR:$rs2, SPMem:$rs1, uimm8_lsb00:$imm),
(C_SWSP GPR:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>;
@@ -967,12 +931,38 @@ def : CompressPat<(SW_INX GPRF32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm),
(C_SWSP_INX GPRF32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>;
} // Predicates = [HasStdExtZca]
-let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
-def : CompressPat<(FSW FPR32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm),
- (C_FSWSP FPR32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>;
-} // Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32]
-
let Predicates = [HasStdExtZca, IsRV64] in {
def : CompressPat<(SD GPR:$rs2, SPMem:$rs1, uimm9_lsb000:$imm),
(C_SDSP GPR:$rs2, SPMem:$rs1, uimm9_lsb000:$imm)>;
} // Predicates = [HasStdExtZca, IsRV64]
+
+// Zcf Instructions
+let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in {
+ // Quadrant 0
+ def : CompressPat<(FLW FPR32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm),
+ (C_FLW FPR32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
+ def : CompressPat<(FSW FPR32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm),
+ (C_FSW FPR32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>;
+
+ // Quadrant 2
+ def : CompressPat<(FLW FPR32:$rd, SPMem:$rs1, uimm8_lsb00:$imm),
+ (C_FLWSP FPR32:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>;
+ def : CompressPat<(FSW FPR32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm),
+ (C_FSWSP FPR32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>;
+} // Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32]
+
+// Zcd Instructions
+let Predicates = [HasStdExtCOrZcd, HasStdExtD] in {
+ // Quadrant 0
+ def : CompressPat<(FLD FPR64C:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm),
+ (C_FLD FPR64C:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm)>;
+ def : CompressPat<(FSD FPR64C:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm),
+ (C_FSD FPR64C:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm)>;
+
+ // Quadrant 2
+ def : CompressPat<(FLD FPR64:$rd, SPMem:$rs1, uimm9_lsb000:$imm),
+ (C_FLDSP FPR64:$rd, SPMem:$rs1, uimm9_lsb000:$imm)>;
+ def : CompressPat<(FSD FPR64:$rs2, SPMem:$rs1, uimm9_lsb000:$imm),
+ (C_FSDSP FPR64:$rs2, SPMem:$rs1, uimm9_lsb000:$imm)>;
+} // Predicates = [HasStdExtCOrZcd, HasStdExtD]
+
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index c342b41e41d0..6840dacaea54 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -25,7 +25,7 @@ def SImm8UnsignedAsmOperand : SImmAsmOperand<8, "Unsigned"> {
}
// A 8-bit signed immediate allowing range [-128, 255]
-// but represented as [-128, 255].
+// but represented as [-128, 127].
def simm8_unsigned : RISCVOp {
let ParserMatchClass = SImm8UnsignedAsmOperand;
let EncoderMethod = "getImmOpValue";
@@ -98,6 +98,40 @@ class PLUI_i<bits<7> funct7, string opcodestr>
let Inst{23-15} = imm10{9-1};
}
+// Common base for widening Binary/Ternary ops
+class RVPWideningBase<bits<2> w, bit arith_shift, dag outs, dag ins,
+ string opcodestr>
+ : RVInst<outs, ins, opcodestr, "$rd, $rs1, $rs2", [], InstFormatOther> {
+ bits<5> rs2;
+ bits<5> rs1;
+ bits<5> rd;
+
+ let Inst{31} = 0b0;
+ let Inst{26-25} = w;
+ let Inst{24-20} = rs2;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = 0b010;
+ let Inst{11-8} = rd{4-1};
+ let Inst{7} = arith_shift;
+ let Inst{6-0} = OPC_OP_IMM_32.Value;
+}
+
+// Common base for narrowing ops
+class RVPNarrowingBase<bits<3> f, bit r, bits<4> funct4, dag outs, dag ins,
+ string opcodestr, string argstr>
+ : RVInst<outs, ins, opcodestr, argstr, [], InstFormatOther> {
+ bits<5> rs1;
+ bits<5> rd;
+
+ let Inst{31} = 0b0;
+ let Inst{30-28} = f;
+ let Inst{27} = r;
+ let Inst{19-16} = rs1{4-1};
+ let Inst{15-12} = funct4;
+ let Inst{11-7} = rd;
+ let Inst{6-0} = OPC_OP_IMM_32.Value;
+}
+
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class RVPShift_ri<bits<3> f, bits<3> funct3, string opcodestr, Operand ImmType>
: RVInstIBase<funct3, OPC_OP_IMM_32, (outs GPR:$rd),
@@ -141,6 +175,100 @@ class RVPShiftB_ri<bits<3> f, bits<3> funct3, string opcodestr>
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVPWideningShift_ri<bits<3> f, string opcodestr, Operand ImmType>
+ : RVInst<(outs GPRPairRV32:$rd), (ins GPR:$rs1, ImmType:$shamt), opcodestr,
+ "$rd, $rs1, $shamt", [], InstFormatOther> {
+ bits<5> rs1;
+ bits<5> rd;
+
+ let Inst{31} = 0b0;
+ let Inst{30-28} = f;
+ let Inst{27} = 0b0;
+ let Inst{19-15} = rs1;
+ let Inst{14-12} = 0b010;
+ let Inst{11-8} = rd{4-1};
+ let Inst{7} = 0b0;
+ let Inst{6-0} = OPC_OP_IMM_32.Value;
+
+ let hasSideEffects = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
+}
+
+class RVPWideningShiftW_ri<bits<3> f, string opcodestr>
+ : RVPWideningShift_ri<f, opcodestr, uimm6> {
+ bits<6> shamt;
+
+ let Inst{26} = 0b1;
+ let Inst{25-20} = shamt;
+}
+
+class RVPWideningShiftH_ri<bits<3> f, string opcodestr>
+ : RVPWideningShift_ri<f, opcodestr, uimm5> {
+ bits<5> shamt;
+
+ let Inst{26-25} = 0b01;
+ let Inst{24-20} = shamt;
+}
+
+class RVPWideningShiftB_ri<bits<3> f, string opcodestr>
+ : RVPWideningShift_ri<f, opcodestr, uimm4> {
+ bits<4> shamt;
+
+ let Inst{26-24} = 0b001;
+ let Inst{23-20} = shamt;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVPNarrowingShift_ri<bits<3> f, string opcodestr, Operand ImmType>
+ : RVPNarrowingBase<f, 0b0, 0b1100, (outs GPR:$rd),
+ (ins GPRPairRV32:$rs1, ImmType:$shamt), opcodestr,
+ "$rd, $rs1, $shamt">;
+
+class RVPNarrowingShiftW_ri<bits<3> f, string opcodestr>
+ : RVPNarrowingShift_ri<f, opcodestr, uimm6> {
+ bits<6> shamt;
+
+ let Inst{26} = 0b1;
+ let Inst{25-20} = shamt;
+}
+
+class RVPNarrowingShiftH_ri<bits<3> f, string opcodestr>
+ : RVPNarrowingShift_ri<f, opcodestr, uimm5> {
+ bits<5> shamt;
+
+ let Inst{26-25} = 0b01;
+ let Inst{24-20} = shamt;
+}
+
+class RVPNarrowingShiftB_ri<bits<3> f, string opcodestr>
+ : RVPNarrowingShift_ri<f, opcodestr, uimm4> {
+ bits<4> shamt;
+
+ let Inst{26-24} = 0b001;
+ let Inst{23-20} = shamt;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVPNarrowingShift_rr<bits<3> f, bits<2> w, string opcodestr>
+ : RVPNarrowingBase<f, 0b1, 0b1100, (outs GPR:$rd),
+ (ins GPRPairRV32:$rs1, GPR:$rs2), opcodestr,
+ "$rd, $rs1, $rs2"> {
+ bits<5> rs2;
+
+ let Inst{26-25} = w;
+ let Inst{24-20} = rs2;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVPWideningShift_rr<bits<3> f, bits<2> w, string opcodestr>
+ : RVPWideningBase<w, 0b0, (outs GPRPairRV32:$rd), (ins GPR:$rs1, GPR:$rs2),
+ opcodestr> {
+ let Inst{30-28} = f;
+ let Inst{27} = 0b1;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class RVPUnary_ri<bits<2> w, bits<5> uf, string opcodestr>
: RVInstIBase<0b010, OPC_OP_IMM_32, (outs GPR:$rd), (ins GPR:$rs1),
opcodestr, "$rd, $rs1"> {
@@ -169,6 +297,24 @@ class RVPBinary_rr<bits<4> f, bits<2> w, bits<3> funct3, string opcodestr>
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVPWideningBinary_rr<bits<4> f, bits<2> w, string opcodestr>
+ : RVPWideningBase<w, 0b1, (outs GPRPairRV32:$rd), (ins GPR:$rs1, GPR:$rs2),
+ opcodestr> {
+ let Inst{30-27} = f;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVPNarrowingBinary_rr<bits<3> f, bits<2> w, string opcodestr>
+ : RVPNarrowingBase<f, 0b1, 0b0100, (outs GPR:$rd),
+ (ins GPRPairRV32:$rs1, GPR:$rs2), opcodestr,
+ "$rd, $rs1, $rs2"> {
+ bits<5> rs2;
+
+ let Inst{26-25} = w;
+ let Inst{24-20} = rs2;
+}
+
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class RVPTernary_rrr<bits<4> f, bits<2> w, bits<3> funct3, string opcodestr>
: RVInstRBase<funct3, OPC_OP_32, (outs GPR:$rd_wb),
(ins GPR:$rd, GPR:$rs1, GPR:$rs2), opcodestr,
@@ -180,6 +326,15 @@ class RVPTernary_rrr<bits<4> f, bits<2> w, bits<3> funct3, string opcodestr>
let Constraints = "$rd = $rd_wb";
}
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
+class RVPWideningTernary_rrr<bits<4> f, bits<2> w, string opcodestr>
+ : RVPWideningBase<w, 0b1, (outs GPRPairRV32:$rd_wb),
+ (ins GPR:$rd, GPR:$rs1, GPR:$rs2), opcodestr> {
+ let Inst{30-27} = f;
+
+ let Constraints = "$rd = $rd_wb";
+}
+
// Common base for pli.db/h/w and plui.dh/w
class RVPPairLoadImm_i<bits<7> funct7, dag ins, string opcodestr,
string argstr>
@@ -889,3 +1044,156 @@ let Predicates = [HasStdExtP, IsRV32] in {
let Inst{23-15} = imm10{9-1};
}
}
+
+let Predicates = [HasStdExtP, IsRV32] in {
+ def PWSLLI_B : RVPWideningShiftB_ri<0b000, "pwslli.b">;
+ def PWSLLI_H : RVPWideningShiftH_ri<0b000, "pwslli.h">;
+ def WSLLI : RVPWideningShiftW_ri<0b000, "wslli">;
+
+ def PWSLAI_B : RVPWideningShiftB_ri<0b100, "pwslai.b">;
+ def PWSLAI_H : RVPWideningShiftH_ri<0b100, "pwslai.h">;
+ def WSLAI : RVPWideningShiftW_ri<0b100, "wslai">;
+
+ def PWSLL_BS : RVPWideningShift_rr<0b000, 0b00, "pwsll.bs">;
+ def PWSLL_HS : RVPWideningShift_rr<0b000, 0b01, "pwsll.hs">;
+ def WSLL : RVPWideningShift_rr<0b000, 0b11, "wsll">;
+
+ def PWSLA_BS : RVPWideningShift_rr<0b100, 0b00, "pwsla.bs">;
+ def PWSLA_HS : RVPWideningShift_rr<0b100, 0b01, "pwsla.hs">;
+ def WSLA : RVPWideningShift_rr<0b100, 0b11, "wsla">;
+
+ def WZIP8P : RVPWideningShift_rr<0b111, 0b00, "wzip8p">;
+ def WZIP16P : RVPWideningShift_rr<0b111, 0b01, "wzip16p">;
+
+ def PWADD_H : RVPWideningBinary_rr<0b0000, 0b00, "pwadd.h">;
+ def WADD : RVPWideningBinary_rr<0b0000, 0b01, "wadd">;
+ def PWADD_B : RVPWideningBinary_rr<0b0000, 0b10, "pwadd.b">;
+ def PM2WADD_H : RVPWideningBinary_rr<0b0000, 0b11, "pm2wadd.h">;
+
+ def PWADDA_H : RVPWideningTernary_rrr<0b0001, 0b00, "pwadda.h">;
+ def WADDA : RVPWideningTernary_rrr<0b0001, 0b01, "wadda">;
+ def PWADDA_B : RVPWideningTernary_rrr<0b0001, 0b10, "pwadda.b">;
+ def PM2WADDA_H : RVPWideningTernary_rrr<0b0001, 0b11, "pm2wadda.h">;
+
+ def PWADDU_H : RVPWideningBinary_rr<0b0010, 0b00, "pwaddu.h">;
+ def WADDU : RVPWideningBinary_rr<0b0010, 0b01, "waddu">;
+ def PWADDU_B : RVPWideningBinary_rr<0b0010, 0b10, "pwaddu.b">;
+ def PM2WADD_HX : RVPWideningBinary_rr<0b0010, 0b11, "pm2wadd.hx">;
+
+ def PWADDAU_H : RVPWideningTernary_rrr<0b0011, 0b00, "pwaddau.h">;
+ def WADDAU : RVPWideningTernary_rrr<0b0011, 0b01, "waddau">;
+ def PWADDAU_B : RVPWideningTernary_rrr<0b0011, 0b10, "pwaddau.b">;
+ def PM2WADDA_HX : RVPWideningTernary_rrr<0b0011, 0b11, "pm2wadda.hx">;
+
+ def PWMUL_H : RVPWideningBinary_rr<0b0100, 0b00, "pwmul.h">;
+ def WMUL : RVPWideningBinary_rr<0b0100, 0b01, "wmul">;
+ def PWMUL_B : RVPWideningBinary_rr<0b0100, 0b10, "pwmul.b">;
+ def PM2WADDU_H : RVPWideningBinary_rr<0b0100, 0b11, "pm2waddu.h">;
+
+ def PWMACC_H : RVPWideningTernary_rrr<0b0101, 0b00, "pwmacc.h">;
+ def WMACC : RVPWideningTernary_rrr<0b0101, 0b01, "wmacc">;
+ def PM2WADDAU_H : RVPWideningTernary_rrr<0b0101, 0b11, "pm2waddau.h">;
+
+ def PWMULU_H : RVPWideningBinary_rr<0b0110, 0b00, "pwmulu.h">;
+ def WMULU : RVPWideningBinary_rr<0b0110, 0b01, "wmulu">;
+ def PWMULU_B : RVPWideningBinary_rr<0b0110, 0b10, "pwmulu.b">;
+
+ def PWMACCU_H : RVPWideningTernary_rrr<0b0111, 0b00, "pwmaccu.h">;
+ def WMACCU : RVPWideningTernary_rrr<0b0111, 0b01, "wmaccu">;
+
+ def PWSUB_H : RVPWideningBinary_rr<0b1000, 0b00, "pwsub.h">;
+ def WSUB : RVPWideningBinary_rr<0b1000, 0b01, "wsub">;
+ def PWSUB_B : RVPWideningBinary_rr<0b1000, 0b10, "pwsub.b">;
+ def PM2WSUB_H : RVPWideningBinary_rr<0b1000, 0b11, "pm2wsub.h">;
+
+ def PWSUBA_H : RVPWideningTernary_rrr<0b1001, 0b00, "pwsuba.h">;
+ def WSUBA : RVPWideningTernary_rrr<0b1001, 0b01, "wsuba">;
+ def PWSUBA_B : RVPWideningTernary_rrr<0b1001, 0b10, "pwsuba.b">;
+ def PM2WSUBA_H : RVPWideningTernary_rrr<0b1001, 0b11, "pm2wsuba.h">;
+
+ def PWSUBU_H : RVPWideningBinary_rr<0b1010, 0b00, "pwsubu.h">;
+ def WSUBU : RVPWideningBinary_rr<0b1010, 0b01, "wsubu">;
+ def PWSUBU_B : RVPWideningBinary_rr<0b1010, 0b10, "pwsubu.b">;
+ def PM2WSUB_HX : RVPWideningBinary_rr<0b1010, 0b11, "pm2wsub.hx">;
+
+ def PWSUBAU_H : RVPWideningTernary_rrr<0b1011, 0b00, "pwsubau.h">;
+ def WSUBAU : RVPWideningTernary_rrr<0b1011, 0b01, "wsubau">;
+ def PWSUBAU_B : RVPWideningTernary_rrr<0b1011, 0b10, "pwsubau.b">;
+ def PM2WSUBA_HX : RVPWideningTernary_rrr<0b1011, 0b11, "pm2wsuba.hx">;
+
+ def PWMULSU_H : RVPWideningBinary_rr<0b1100, 0b00, "pwmulsu.h">;
+ def WMULSU : RVPWideningBinary_rr<0b1100, 0b01, "wmulsu">;
+ def PWMULSU_B : RVPWideningBinary_rr<0b1100, 0b10, "pwmulsu.b">;
+ def PM2WADDSU_H : RVPWideningBinary_rr<0b1100, 0b11, "pm2waddsu.h">;
+
+ def PWMACCSU_H : RVPWideningTernary_rrr<0b1101, 0b00, "pwmaccsu.h">;
+ def WMACCSU : RVPWideningTernary_rrr<0b1101, 0b01, "wmaccsu">;
+ def PM2WADDASU_H : RVPWideningTernary_rrr<0b1101, 0b11, "pm2waddasu.h">;
+
+ def PMQWACC_H : RVPWideningTernary_rrr<0b1111, 0b00, "pmqwacc.h">;
+ def PMQWACC : RVPWideningTernary_rrr<0b1111, 0b01, "pmqwacc">;
+ def PMQRWACC_H : RVPWideningTernary_rrr<0b1111, 0b10, "pmqrwacc.h">;
+ def PMQRWACC : RVPWideningTernary_rrr<0b1111, 0b11, "pmqrwacc">;
+
+ def PREDSUM_DHS : RVPNarrowingBinary_rr<0b001, 0b00, "predsum.dhs">;
+ def PREDSUM_DBS : RVPNarrowingBinary_rr<0b001, 0b10, "predsum.dbs">;
+
+ def PREDSUMU_DHS : RVPNarrowingBinary_rr<0b011, 0b00, "predsumu.dhs">;
+ def PREDSUMU_DBS : RVPNarrowingBinary_rr<0b011, 0b10, "predsumu.dbs">;
+
+ def PNSRLI_B : RVPNarrowingShiftB_ri<0b000, "pnsrli.b">;
+ def PNSRLI_H : RVPNarrowingShiftH_ri<0b000, "pnsrli.h">;
+ def NSRLI : RVPNarrowingShiftW_ri<0b000, "nsrli">;
+
+ def PNCLIPIU_B : RVPNarrowingShiftB_ri<0b010, "pnclipiu.b">;
+ def PNCLIPIU_H : RVPNarrowingShiftH_ri<0b010, "pnclipiu.h">;
+ def NCLIPIU : RVPNarrowingShiftW_ri<0b010, "nclipiu">;
+
+ def PNCLIPRIU_B : RVPNarrowingShiftB_ri<0b011, "pnclipriu.b">;
+ def PNCLIPRIU_H : RVPNarrowingShiftH_ri<0b011, "pnclipriu.h">;
+ def NCLIPRIU : RVPNarrowingShiftW_ri<0b011, "nclipriu">;
+
+ def PNSRAI_B : RVPNarrowingShiftB_ri<0b100, "pnsrai.b">;
+ def PNSRAI_H : RVPNarrowingShiftH_ri<0b100, "pnsrai.h">;
+ def NSRAI : RVPNarrowingShiftW_ri<0b100, "nsrai">;
+
+ def PNSARI_B : RVPNarrowingShiftB_ri<0b101, "pnsari.b">;
+ def PNSARI_H : RVPNarrowingShiftH_ri<0b101, "pnsari.h">;
+ def NSARI : RVPNarrowingShiftW_ri<0b101, "nsari">;
+
+ def PNCLIPI_B : RVPNarrowingShiftB_ri<0b110, "pnclipi.b">;
+ def PNCLIPI_H : RVPNarrowingShiftH_ri<0b110, "pnclipi.h">;
+ def NCLIPI : RVPNarrowingShiftW_ri<0b110, "nclipi">;
+
+ def PNCLIPRI_B : RVPNarrowingShiftB_ri<0b111, "pnclipri.b">;
+ def PNCLIPRI_H : RVPNarrowingShiftH_ri<0b111, "pnclipri.h">;
+ def NCLIPRI : RVPNarrowingShiftW_ri<0b111, "nclipri">;
+
+ def PNSRL_BS : RVPNarrowingShift_rr<0b000, 0b00, "pnsrl.bs">;
+ def PNSRL_HS : RVPNarrowingShift_rr<0b000, 0b01, "pnsrl.hs">;
+ def NSRL : RVPNarrowingShift_rr<0b000, 0b11, "nsrl">;
+
+ def PNCLIPU_BS : RVPNarrowingShift_rr<0b010, 0b00, "pnclipu.bs">;
+ def PNCLIPU_HS : RVPNarrowingShift_rr<0b010, 0b01, "pnclipu.hs">;
+ def NCLIPU : RVPNarrowingShift_rr<0b010, 0b11, "nclipu">;
+
+ def PNCLIPRU_BS : RVPNarrowingShift_rr<0b011, 0b00, "pnclipru.bs">;
+ def PNCLIPRU_HS : RVPNarrowingShift_rr<0b011, 0b01, "pnclipru.hs">;
+ def NCLIPRU : RVPNarrowingShift_rr<0b011, 0b11, "nclipru">;
+
+ def PNSRA_BS : RVPNarrowingShift_rr<0b100, 0b00, "pnsra.bs">;
+ def PNSRA_HS : RVPNarrowingShift_rr<0b100, 0b01, "pnsra.hs">;
+ def NSRA : RVPNarrowingShift_rr<0b100, 0b11, "nsra">;
+
+ def PNSRAR_BS : RVPNarrowingShift_rr<0b101, 0b00, "pnsrar.bs">;
+ def PNSRAR_HS : RVPNarrowingShift_rr<0b101, 0b01, "pnsrar.hs">;
+ def NSRAR : RVPNarrowingShift_rr<0b101, 0b11, "nsrar">;
+
+ def PNCLIP_BS : RVPNarrowingShift_rr<0b110, 0b00, "pnclip.bs">;
+ def PNCLIP_HS : RVPNarrowingShift_rr<0b110, 0b01, "pnclip.hs">;
+ def NCLIP : RVPNarrowingShift_rr<0b110, 0b11, "nclip">;
+
+ def PNCLIPR_BS : RVPNarrowingShift_rr<0b111, 0b00, "pnclipr.bs">;
+ def PNCLIPR_HS : RVPNarrowingShift_rr<0b111, 0b01, "pnclipr.hs">;
+ def NCLIPR : RVPNarrowingShift_rr<0b111, 0b11, "nclipr">;
+} // Predicates = [HasStdExtP, IsRV32]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
index 32f533b8f114..f732ab13e5f8 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td
@@ -44,153 +44,95 @@ def PseudoCCMOVGPRNoX0 : Pseudo<(outs GPRNoX0:$dst),
Sched<[]>;
}
+class SFBALU_rr
+ : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1,
+ GPR:$rs2), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU,
+ ReadSFBALU]> {
+ let hasSideEffects = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let Size = 8;
+ let Constraints = "$dst = $falsev";
+}
+
+class SFBALU_ri
+ : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1,
+ simm12:$imm), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]> {
+ let hasSideEffects = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let Size = 8;
+ let Constraints = "$dst = $falsev";
+}
+
+class SFBShift_ri
+ : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1,
+ uimmlog2xlen:$imm), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]> {
+ let hasSideEffects = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let Size = 8;
+ let Constraints = "$dst = $falsev";
+}
+
+class SFBShiftW_ri
+ : Pseudo<(outs GPR:$dst),
+ (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1,
+ uimm5:$imm), []>,
+ Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]> {
+ let hasSideEffects = 0;
+ let mayLoad = 0;
+ let mayStore = 0;
+ let Size = 8;
+ let Constraints = "$dst = $falsev";
+}
+
// Conditional binops, that updates update $dst to (op rs1, rs2) when condition
// is true. Returns $falsev otherwise. Selected by optimizeSelect.
// TODO: Can we use DefaultOperands on the regular binop to accomplish this more
// like how ARM does predication?
-let Predicates = [HasShortForwardBranchOpt], hasSideEffects = 0,
- mayLoad = 0, mayStore = 0, Size = 8, Constraints = "$dst = $falsev" in {
-def PseudoCCADD : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
- ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
-def PseudoCCSUB : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
- ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
-def PseudoCCSLL : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
- ReadSFBALU, ReadSFBALU]>;
-def PseudoCCSRL : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
- ReadSFBALU, ReadSFBALU]>;
-def PseudoCCSRA : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
- ReadSFBALU, ReadSFBALU]>;
-def PseudoCCAND : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
- ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
-def PseudoCCOR : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
- ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
-def PseudoCCXOR : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
- ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
+let Predicates = [HasShortForwardBranchOpt] in {
+def PseudoCCADD : SFBALU_rr;
+def PseudoCCSUB : SFBALU_rr;
+def PseudoCCSLL : SFBALU_rr;
+def PseudoCCSRL : SFBALU_rr;
+def PseudoCCSRA : SFBALU_rr;
+def PseudoCCAND : SFBALU_rr;
+def PseudoCCOR : SFBALU_rr;
+def PseudoCCXOR : SFBALU_rr;
-def PseudoCCADDI : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
- ReadSFBALU]>;
-def PseudoCCSLLI : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, uimmlog2xlen:$shamt), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
- ReadSFBALU]>;
-def PseudoCCSRLI : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, uimmlog2xlen:$shamt), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
- ReadSFBALU]>;
-def PseudoCCSRAI : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, uimmlog2xlen:$shamt), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
- ReadSFBALU]>;
-def PseudoCCANDI : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
- ReadSFBALU]>;
-def PseudoCCORI : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
- ReadSFBALU]>;
-def PseudoCCXORI : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
- ReadSFBALU]>;
+def PseudoCCADDI : SFBALU_ri;
+def PseudoCCANDI : SFBALU_ri;
+def PseudoCCORI : SFBALU_ri;
+def PseudoCCXORI : SFBALU_ri;
+
+def PseudoCCSLLI : SFBShift_ri;
+def PseudoCCSRLI : SFBShift_ri;
+def PseudoCCSRAI : SFBShift_ri;
// RV64I instructions
-def PseudoCCADDW : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
- ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
-def PseudoCCSUBW : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
- ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
-def PseudoCCSLLW : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
- ReadSFBALU, ReadSFBALU]>;
-def PseudoCCSRLW : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
- ReadSFBALU, ReadSFBALU]>;
-def PseudoCCSRAW : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
- ReadSFBALU, ReadSFBALU]>;
+def PseudoCCADDW : SFBALU_rr;
+def PseudoCCSUBW : SFBALU_rr;
+def PseudoCCSLLW : SFBALU_rr;
+def PseudoCCSRLW : SFBALU_rr;
+def PseudoCCSRAW : SFBALU_rr;
+
+def PseudoCCADDIW : SFBALU_ri;
-def PseudoCCADDIW : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, simm12:$rs2), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
- ReadSFBALU]>;
-def PseudoCCSLLIW : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, uimm5:$shamt), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
- ReadSFBALU]>;
-def PseudoCCSRLIW : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, uimm5:$shamt), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
- ReadSFBALU]>;
-def PseudoCCSRAIW : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, uimm5:$shamt), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU,
- ReadSFBALU]>;
+def PseudoCCSLLIW : SFBShiftW_ri;
+def PseudoCCSRLIW : SFBShiftW_ri;
+def PseudoCCSRAIW : SFBShiftW_ri;
// Zbb/Zbkb instructions
-def PseudoCCANDN : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
- ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
-def PseudoCCORN : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
- ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
-def PseudoCCXNOR : Pseudo<(outs GPR:$dst),
- (ins GPR:$lhs, GPR:$rhs, cond_code:$cc,
- GPR:$falsev, GPR:$rs1, GPR:$rs2), []>,
- Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp,
- ReadSFBALU, ReadSFBALU, ReadSFBALU]>;
+def PseudoCCANDN : SFBALU_rr;
+def PseudoCCORN : SFBALU_rr;
+def PseudoCCXNOR : SFBALU_rr;
}
let Predicates = [HasShortForwardBranchOpt] in
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index acbccddce2b5..063ee5c5e8b9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -830,19 +830,6 @@ multiclass VPatTiedBinaryNoMaskVL_V<SDNode vop,
result_reg_class:$rs1,
op2_reg_class:$rs2,
GPR:$vl, sew, TAIL_AGNOSTIC)>;
- // Tail undisturbed
- def : Pat<(riscv_vmerge_vl true_mask,
- (result_type (vop
- result_reg_class:$rs1,
- (op2_type op2_reg_class:$rs2),
- srcvalue,
- true_mask,
- VLOpFrag)),
- result_reg_class:$rs1, result_reg_class:$rs1, VLOpFrag),
- (!cast<Instruction>(instruction_name#"_"#suffix#"_"# vlmul.MX#"_TIED")
- result_reg_class:$rs1,
- op2_reg_class:$rs2,
- GPR:$vl, sew, TU_MU)>;
}
class VPatTiedBinaryMaskVL_V<SDNode vop,
@@ -892,22 +879,6 @@ multiclass VPatTiedBinaryNoMaskVL_V_RM<SDNode vop,
// RISCVInsertReadWriteCSR
FRM_DYN,
GPR:$vl, log2sew, TAIL_AGNOSTIC)>;
- // Tail undisturbed
- def : Pat<(riscv_vmerge_vl true_mask,
- (result_type (vop
- result_reg_class:$rs1,
- (op2_type op2_reg_class:$rs2),
- srcvalue,
- true_mask,
- VLOpFrag)),
- result_reg_class:$rs1, result_reg_class:$rs1, VLOpFrag),
- (!cast<Instruction>(name)
- result_reg_class:$rs1,
- op2_reg_class:$rs2,
- // Value to indicate no rounding mode change in
- // RISCVInsertReadWriteCSR
- FRM_DYN,
- GPR:$vl, log2sew, TU_MU)>;
}
class VPatBinaryVL_XI<SDPatternOperator vop,
@@ -1755,50 +1726,6 @@ multiclass VPatMultiplyAddVL_VV_VX<SDNode op, string instruction_name> {
}
}
-multiclass VPatMultiplyAccVL_VV_VX<PatFrag op, string instruction_name> {
- foreach vti = AllIntegerVectors in {
- defvar suffix = vti.LMul.MX;
- let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm),
- (vti.Vector (op vti.RegClass:$rd,
- (riscv_mul_vl_oneuse vti.RegClass:$rs1, vti.RegClass:$rs2,
- srcvalue, (vti.Mask true_mask), VLOpFrag),
- srcvalue, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag),
- (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK")
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TU_MU)>;
- def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm),
- (vti.Vector (op vti.RegClass:$rd,
- (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rs2,
- srcvalue, (vti.Mask true_mask), VLOpFrag),
- srcvalue, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag),
- (!cast<Instruction>(instruction_name#"_VX_"# suffix #"_MASK")
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TU_MU)>;
- def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm),
- (vti.Vector (op vti.RegClass:$rd,
- (riscv_mul_vl_oneuse vti.RegClass:$rs1, vti.RegClass:$rs2,
- srcvalue, (vti.Mask true_mask), VLOpFrag),
- srcvalue, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, undef, VLOpFrag),
- (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK")
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm),
- (vti.Vector (op vti.RegClass:$rd,
- (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rs2,
- srcvalue, (vti.Mask true_mask), VLOpFrag),
- srcvalue, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, undef, VLOpFrag),
- (!cast<Instruction>(instruction_name#"_VX_"# suffix #"_MASK")
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- }
- }
-}
-
multiclass VPatWidenMultiplyAddVL_VV_VX<SDNode vwmacc_op, string instr_name> {
foreach vtiTowti = AllWidenableIntVectors in {
defvar vti = vtiTowti.Vti;
@@ -1898,82 +1825,6 @@ multiclass VPatFPMulAddVL_VV_VF_RM<SDPatternOperator vop, string instruction_nam
}
}
-multiclass VPatFPMulAccVL_VV_VF_RM<PatFrag vop, string instruction_name> {
- foreach vti = AllFloatVectors in {
- defvar suffix = vti.LMul.MX # "_E" # vti.SEW;
- let Predicates = GetVTypePredicates<vti>.Predicates in {
- def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm),
- (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2,
- vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag),
- (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK")
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask VMV0:$vm),
- // Value to indicate no rounding mode change in
- // RISCVInsertReadWriteCSR
- FRM_DYN,
- GPR:$vl, vti.Log2SEW, TU_MU)>;
- def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm),
- (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2,
- vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag),
- (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK")
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask VMV0:$vm),
- // Value to indicate no rounding mode change in
- // RISCVInsertReadWriteCSR
- FRM_DYN,
- GPR:$vl, vti.Log2SEW, TU_MU)>;
- def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm),
- (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2,
- vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, undef, VLOpFrag),
- (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK")
- vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask VMV0:$vm),
- // Value to indicate no rounding mode change in
- // RISCVInsertReadWriteCSR
- FRM_DYN,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm),
- (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2,
- vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)),
- vti.RegClass:$rd, undef, VLOpFrag),
- (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK")
- vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask VMV0:$vm),
- // Value to indicate no rounding mode change in
- // RISCVInsertReadWriteCSR
- FRM_DYN,
- GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>;
- }
- }
-}
-
-multiclass VPatWidenFPMulAccVL_VV_VF<SDNode vop, string instruction_name> {
- foreach vtiToWti = AllWidenableFloatVectors in {
- defvar vti = vtiToWti.Vti;
- defvar wti = vtiToWti.Wti;
- let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates,
- GetVTypePredicates<wti>.Predicates) in {
- def : Pat<(vop (vti.Vector vti.RegClass:$rs1),
- (vti.Vector vti.RegClass:$rs2),
- (wti.Vector wti.RegClass:$rd), (vti.Mask VMV0:$vm),
- VLOpFrag),
- (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX #"_MASK")
- wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>;
- def : Pat<(vop (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)),
- (vti.Vector vti.RegClass:$rs2),
- (wti.Vector wti.RegClass:$rd), (vti.Mask VMV0:$vm),
- VLOpFrag),
- (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX #"_MASK")
- wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2,
- (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>;
- }
- }
-}
-
multiclass VPatWidenFPMulAccVL_VV_VF_RM<SDNode vop, string instruction_name,
list<VTypeInfoToWide> vtiToWtis =
AllWidenableFloatVectors> {
@@ -2331,8 +2182,6 @@ defm : VPatBinaryWVL_VV_VX<riscv_vwmulsu_vl, "PseudoVWMULSU">;
// 11.13 Vector Single-Width Integer Multiply-Add Instructions
defm : VPatMultiplyAddVL_VV_VX<riscv_add_vl, "PseudoVMADD">;
defm : VPatMultiplyAddVL_VV_VX<riscv_sub_vl, "PseudoVNMSUB">;
-defm : VPatMultiplyAccVL_VV_VX<riscv_add_vl_oneuse, "PseudoVMACC">;
-defm : VPatMultiplyAccVL_VV_VX<riscv_sub_vl_oneuse, "PseudoVNMSAC">;
// 11.14. Vector Widening Integer Multiply-Add Instructions
defm : VPatWidenMultiplyAddVL_VV_VX<riscv_vwmacc_vl, "PseudoVWMACC">;
@@ -2470,10 +2319,6 @@ defm : VPatFPMulAddVL_VV_VF_RM<any_riscv_vfmadd_vl, "PseudoVFMADD">;
defm : VPatFPMulAddVL_VV_VF_RM<any_riscv_vfmsub_vl, "PseudoVFMSUB">;
defm : VPatFPMulAddVL_VV_VF_RM<any_riscv_vfnmadd_vl, "PseudoVFNMADD">;
defm : VPatFPMulAddVL_VV_VF_RM<any_riscv_vfnmsub_vl, "PseudoVFNMSUB">;
-defm : VPatFPMulAccVL_VV_VF_RM<riscv_vfmadd_vl_oneuse, "PseudoVFMACC">;
-defm : VPatFPMulAccVL_VV_VF_RM<riscv_vfmsub_vl_oneuse, "PseudoVFMSAC">;
-defm : VPatFPMulAccVL_VV_VF_RM<riscv_vfnmadd_vl_oneuse, "PseudoVFNMACC">;
-defm : VPatFPMulAccVL_VV_VF_RM<riscv_vfnmsub_vl_oneuse, "PseudoVFNMSAC">;
// 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions
defm : VPatWidenFPMulAccVL_VV_VF_RM<riscv_vfwmadd_vl, "PseudoVFWMACC">;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td
index 889ea9802257..d615094329b2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td
@@ -125,10 +125,25 @@ class Mips_prefetch_ri<dag outs, dag ins, string opcodestr, string argstr>
let Inst{6-0} = OPC_CUSTOM_0.Value;
}
+// MIPS Custom Barrier Insns Format.
+let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in
+class MIPSExtInst_ri<bits<6> shimm5, string opcodestr>
+ : RVInstIShift<0b00000, 0b001, OPC_OP_IMM, (outs), (ins), opcodestr, ""> {
+ let shamt = shimm5;
+ let rd = 0;
+ let rs1 = 0;
+}
+
//===----------------------------------------------------------------------===//
// MIPS extensions
//===----------------------------------------------------------------------===//
-let Predicates = [HasVendorXMIPSCBOP] ,DecoderNamespace = "Xmipscbop" in {
+let Predicates = [HasVendorXMIPSEXECTL], DecoderNamespace = "XMIPS" in {
+ def MIPS_EHB : MIPSExtInst_ri<0b000011, "mips.ehb">;
+ def MIPS_IHB : MIPSExtInst_ri<0b000001, "mips.ihb">;
+ def MIPS_PAUSE : MIPSExtInst_ri<0b000101, "mips.pause">;
+}
+
+let Predicates = [HasVendorXMIPSCBOP], DecoderNamespace = "XMIPS" in {
def MIPS_PREF : Mips_prefetch_ri<(outs), (ins GPR:$rs1, uimm9:$imm9, uimm5:$hint),
"mips.pref", "$hint, ${imm9}(${rs1})">,
Sched<[]>;
@@ -146,7 +161,7 @@ let Predicates = [HasVendorXMIPSCBOP] in {
}
let Predicates = [HasVendorXMIPSCMov], hasSideEffects = 0, mayLoad = 0, mayStore = 0,
- DecoderNamespace = "Xmipscmov" in {
+ DecoderNamespace = "XMIPS" in {
def MIPS_CCMOV : RVInstR4<0b11, 0b011, OPC_CUSTOM_0, (outs GPR:$rd),
(ins GPR:$rs1, GPR:$rs2, GPR:$rs3),
"mips.ccmov", "$rd, $rs2, $rs1, $rs3">,
@@ -166,7 +181,7 @@ def : Pat<(select (XLenVT GPR:$rs2), (XLenVT GPR:$rs1), (XLenVT GPR:$rs3)),
}
let Predicates = [HasVendorXMIPSLSP], hasSideEffects = 0,
- DecoderNamespace = "Xmipslsp" in {
+ DecoderNamespace = "XMIPS" in {
let mayLoad = 1, mayStore = 0 in {
def MIPS_LWP : LWPFormat<(outs GPR:$rd1, GPR:$rd2), (ins GPR:$rs1, uimm7_lsb00:$imm7),
"mips.lwp", "$rd1, $rd2, ${imm7}(${rs1})">,
@@ -184,4 +199,4 @@ def MIPS_SDP : SDPFormat<(outs), (ins GPR:$rs2, GPR:$rs3, GPR:$rs1, uimm7_lsb000
"mips.sdp", "$rs2, $rs3, ${imm7}(${rs1})">,
Sched<[WriteSTD, ReadStoreData, ReadStoreData, ReadMemBase]>;
} // mayLoad = 0, mayStore = 1
-} // Predicates = [HasVendorXMIPSLSP], hasSideEffects = 0, DecoderNamespace = "Xmipslsp"
+} // Predicates = [HasVendorXMIPSLSP], hasSideEffects = 0, DecoderNamespace = "XMIPS"
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
index 2c64b0c220fb..69796a68ecd6 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td
@@ -22,6 +22,15 @@ def SDT_SetMultiple : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>,
def qc_setwmi : RVSDNode<"QC_SETWMI", SDT_SetMultiple,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def qc_insb : RVSDNode<"QC_INSB", SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisVT<0, i32>,
+ SDTCisInt<3>,
+ SDTCisInt<4>]>,
+ []>;
+
+def qc_e_li : RVSDNode<"QC_E_LI", SDTIntUnaryOp>;
+
def uimm5nonzero : RISCVOp<XLenVT>,
ImmLeaf<XLenVT, [{return (Imm != 0) && isUInt<5>(Imm);}]> {
let ParserMatchClass = UImmAsmOperand<5, "NonZero">;
@@ -1508,6 +1517,11 @@ def : Pat<(i32 (and GPRNoX0:$rs, 1023)), (QC_EXTU GPRNoX0:$rs, 10, 0)>;
def : Pat<(i32 (and GPRNoX0:$rs, 2047)), (QC_EXTU GPRNoX0:$rs, 11, 0)>;
def : Pat<(i32 (bitreverse GPRNoX0:$rs1)), (QC_BREV32 GPRNoX0:$rs1)>;
+
+def : Pat<(qc_insb GPRNoX0:$rd, simm5:$imm5, uimm5_plus1:$width, uimm5:$shamt),
+ (QC_INSBI GPRNoX0:$rd, simm5:$imm5, uimm5_plus1:$width, uimm5:$shamt)>;
+def : Pat<(qc_insb GPRNoX0:$rd, GPR:$rs1, uimm5_plus1:$width, uimm5:$shamt),
+ (QC_INSB GPRNoX0:$rd, GPR:$rs1, uimm5_plus1:$width, uimm5:$shamt)>;
} // Predicates = [HasVendorXqcibm, IsRV32]
// If Zbb is enabled sext.b/h is preferred since they are compressible
@@ -1605,6 +1619,13 @@ def : Pat<(qc_setwmi GPR:$rs3, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uim
(QC_SETWMI GPR:$rs3, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7)>;
} // Predicates = [HasVendorXqcilsm, IsRV32]
+let Predicates = [HasVendorXqcili, IsRV32] in {
+def: Pat<(qc_e_li tglobaladdr:$A), (QC_E_LI bare_simm32:$A)>;
+def: Pat<(qc_e_li tblockaddress:$A), (QC_E_LI bare_simm32:$A)>;
+def: Pat<(qc_e_li tjumptable:$A), (QC_E_LI bare_simm32:$A)>;
+def: Pat<(qc_e_li tconstpool:$A), (QC_E_LI bare_simm32:$A)>;
+} // Predicates = [HasVendorXqcili, IsRV32]
+
//===----------------------------------------------------------------------===/i
// Compress Instruction tablegen backend.
//===----------------------------------------------------------------------===//
@@ -1738,10 +1759,19 @@ def : CompressPat<(QC_E_XORAI GPRNoX0:$rd, simm12:$imm),
(XORI GPRNoX0:$rd, GPRNoX0:$rd, simm12:$imm)>;
} // let isCompressOnly = true, Predicates = [HasVendorXqcilia, IsRV32]
-let Predicates = [HasVendorXqciac, IsRV32] in {
+let isCompressOnly = true, Predicates = [HasVendorXqciac, IsRV32] in {
def : CompressPat<(QC_MULIADD GPRC:$rd, GPRC:$rs1, uimm5:$imm5),
(QC_C_MULIADD GPRC:$rd, GPRC:$rs1, uimm5:$imm5)>;
-}
+} // isCompressOnly = true, Predicates = [HasVendorXqciac, IsRV32]
+
+let isCompressOnly = true, Predicates = [HasVendorXqciac, HasStdExtZba, IsRV32] in {
+def : CompressPat<(SH1ADD GPRC:$rd, GPRC:$rs1, GPRC:$rd),
+ (QC_C_MULIADD GPRC:$rd, GPRC:$rs1, 2)>;
+def : CompressPat<(SH2ADD GPRC:$rd, GPRC:$rs1, GPRC:$rd),
+ (QC_C_MULIADD GPRC:$rd, GPRC:$rs1, 4)>;
+def : CompressPat<(SH3ADD GPRC:$rd, GPRC:$rs1, GPRC:$rd),
+ (QC_C_MULIADD GPRC:$rd, GPRC:$rs1, 8)>;
+} // isCompressOnly = true, Predicates = [HasVendorXqciac, HasStdExtZba, IsRV32]
let isCompressOnly = true, Predicates = [HasVendorXqcibi, IsRV32] in {
def : CompressPat<(QC_E_BEQI GPRNoX0:$rs1, simm5nonzero:$imm5, bare_simm13_lsb0:$imm12),
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXwch.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXwch.td
index a43cbadf6f30..bb1862cc88d6 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXwch.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXwch.td
@@ -106,6 +106,7 @@ def QK_C_LBUSP : QKStackInst<0b00, (outs GPRC:$rd_rs2),
(ins SPMem:$rs1, uimm4:$imm),
"qk.c.lbusp", "$rd_rs2, ${imm}(${rs1})">,
Sched<[WriteLDB, ReadMemBase]> {
+ bits<0> rs1;
bits<4> imm;
let Inst{10-7} = imm;
}
@@ -115,6 +116,7 @@ def QK_C_SBSP : QKStackInst<0b10, (outs),
uimm4:$imm),
"qk.c.sbsp", "$rd_rs2, ${imm}(${rs1})">,
Sched<[WriteSTB, ReadStoreData, ReadMemBase]> {
+ bits<0> rs1;
bits<4> imm;
let Inst{10-7} = imm;
}
@@ -124,6 +126,7 @@ def QK_C_LHUSP : QKStackInst<0b01, (outs GPRC:$rd_rs2),
(ins SPMem:$rs1, uimm5_lsb0:$imm),
"qk.c.lhusp", "$rd_rs2, ${imm}(${rs1})">,
Sched<[WriteLDH, ReadMemBase]> {
+ bits<0> rs1;
bits<5> imm;
let Inst{10-8} = imm{3-1};
let Inst{7} = imm{4};
@@ -133,6 +136,7 @@ def QK_C_SHSP : QKStackInst<0b11, (outs),
(ins GPRC:$rd_rs2, SPMem:$rs1, uimm5_lsb0:$imm),
"qk.c.shsp", "$rd_rs2, ${imm}(${rs1})">,
Sched<[WriteSTH, ReadStoreData, ReadMemBase]> {
+ bits<0> rs1;
bits<5> imm;
let Inst{10-8} = imm{3-1};
let Inst{7} = imm{4};
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
index 2abd3e613a03..a2b4302e19ed 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
@@ -459,15 +459,15 @@ let Predicates = [HasStdExtZba, IsRV64] in {
def : InstAlias<"zext.w $rd, $rs", (ADD_UW GPR:$rd, GPR:$rs, X0)>;
} // Predicates = [HasStdExtZba, IsRV64]
-let Predicates = [HasStdExtZbb] in {
+let Predicates = [HasStdExtZbbOrZbkb] in {
def : InstAlias<"ror $rd, $rs1, $shamt",
- (RORI GPR:$rd, GPR:$rs1, uimmlog2xlen:$shamt), 0>;
-} // Predicates = [HasStdExtZbb]
+ (RORI GPR:$rd, GPR:$rs1, uimmlog2xlen:$shamt), 0>;
+} // Predicates = [HasStdExtZbbOrZbkb]
-let Predicates = [HasStdExtZbb, IsRV64] in {
+let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in {
def : InstAlias<"rorw $rd, $rs1, $shamt",
- (RORIW GPR:$rd, GPR:$rs1, uimm5:$shamt), 0>;
-} // Predicates = [HasStdExtZbb, IsRV64]
+ (RORIW GPR:$rd, GPR:$rs1, uimm5:$shamt), 0>;
+} // Predicates = [HasStdExtZbbOrZbkb, IsRV64]
let Predicates = [HasStdExtZbs] in {
def : InstAlias<"bset $rd, $rs1, $shamt",
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td
index 32e7f962aa2a..76dc027ffd1d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td
@@ -22,5 +22,5 @@ class CMOPInst<bits<3> imm3, string opcodestr>
foreach n = [1, 3, 5, 7, 9, 11, 13, 15] in {
let Predicates = [HasStdExtZcmop] in
- def C_MOP # n : CMOPInst<!srl(n, 1), "c.mop." # n>, Sched<[]>;
+ def C_MOP_ # n : CMOPInst<!srl(n, 1), "c.mop." # n>, Sched<[]>;
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td
index 49a57f86cccd..50ebaa995197 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td
@@ -62,6 +62,21 @@ defm SSAMOSWAP_W : AMO_rr_aq_rl<0b01001, 0b010, "ssamoswap.w">;
let Predicates = [HasStdExtZicfiss, IsRV64] in
defm SSAMOSWAP_D : AMO_rr_aq_rl<0b01001, 0b011, "ssamoswap.d">;
+let Predicates = [HasStdExtZimop] in {
+let hasSideEffects = 1, mayLoad = 0, mayStore = 1 in
+def PseudoMOP_SSPUSH : Pseudo<(outs), (ins GPRX1X5:$rs2), []>,
+ PseudoInstExpansion<(MOP_RR_7 X0, X0, GPR:$rs2)>;
+let hasSideEffects = 1, mayLoad = 1, mayStore = 0 in
+def PseudoMOP_SSPOPCHK : Pseudo<(outs), (ins GPRX1X5:$rs1), []>,
+ PseudoInstExpansion<(MOP_R_28 X0, GPR:$rs1)>;
+} // Predicates = [HasStdExtZimop]
+
+let Predicates = [HasStdExtZcmop] in {
+let Uses = [X1], hasSideEffects = 1, mayLoad = 0, mayStore = 1 in
+def PseudoMOP_C_SSPUSH : Pseudo<(outs), (ins), []>,
+ PseudoInstExpansion<(C_MOP_1)>;
+} // Predicates = [HasStdExtZcmop]
+
//===----------------------------------------------------------------------===/
// Compress Instruction tablegen backend.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td
index 960f5669b488..0d08176f9799 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td
@@ -33,13 +33,13 @@ class RVInstRMoprr<bits<4> imm4, bits<3> imm3, bits<3> funct3, RISCVOpcode opcod
}
// May-Be-Operations
-def riscv_mopr : RVSDNode<"MOPR",
- SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
- SDTCisSameAs<0, 2>]>>;
-def riscv_moprr : RVSDNode<"MOPRR",
- SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
- SDTCisSameAs<0, 2>,
- SDTCisSameAs<0, 3>]>>;
+def riscv_mop_r : RVSDNode<"MOP_R",
+ SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>]>>;
+def riscv_mop_rr : RVSDNode<"MOP_RR",
+ SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>]>>;
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class RVMopr<bits<7> imm7, bits<5> imm5, bits<3> funct3,
@@ -50,31 +50,32 @@ class RVMopr<bits<7> imm7, bits<5> imm5, bits<3> funct3,
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in
class RVMoprr<bits<4> imm4, bits<3> imm3, bits<3> funct3,
RISCVOpcode opcode, string opcodestr>
- : RVInstRMoprr<imm4, imm3, funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2),
+ : RVInstRMoprr<imm4, imm3, funct3, opcode, (outs GPR:$rd),
+ (ins GPR:$rs1, GPR:$rs2),
opcodestr, "$rd, $rs1, $rs2">;
foreach i = 0...31 in {
let Predicates = [HasStdExtZimop] in
- def MOPR#i : RVMopr<0b1000111, i, 0b100, OPC_SYSTEM, "mop.r."#i>,
- Sched<[]>;
+ def MOP_R_#i : RVMopr<0b1000111, i, 0b100, OPC_SYSTEM, "mop.r."#i>,
+ Sched<[]>;
}
foreach i = 0...7 in {
let Predicates = [HasStdExtZimop] in
- def MOPRR#i : RVMoprr<0b1001, i, 0b100, OPC_SYSTEM, "mop.rr."#i>,
+ def MOP_RR_#i : RVMoprr<0b1001, i, 0b100, OPC_SYSTEM, "mop.rr."#i>,
Sched<[]>;
}
let Predicates = [HasStdExtZimop] in {
// Zimop instructions
foreach i = 0...31 in {
- def : Pat<(XLenVT (riscv_mopr GPR:$rs1, (XLenVT i))),
- (!cast<Instruction>("MOPR"#i) GPR:$rs1)>;
+ def : Pat<(XLenVT (riscv_mop_r GPR:$rs1, (XLenVT i))),
+ (!cast<Instruction>("MOP_R_"#i) GPR:$rs1)>;
}
foreach i = 0...7 in {
- def : Pat<(XLenVT (riscv_moprr GPR:$rs1, GPR:$rs2, (XLenVT i))),
- (!cast<Instruction>("MOPRR"#i) GPR:$rs1, GPR:$rs2)>;
+ def : Pat<(XLenVT (riscv_mop_rr GPR:$rs1, GPR:$rs2, (XLenVT i))),
+ (!cast<Instruction>("MOP_RR_"#i) GPR:$rs1, GPR:$rs2)>;
}
} // Predicates = [HasStdExtZimop]
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvqdotq.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvqdotq.td
index 27959eaccd90..00c4e83e18a0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvqdotq.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvqdotq.td
@@ -17,16 +17,39 @@
// Instructions
//===----------------------------------------------------------------------===//
+class VQDOTVV<bits<6> funct6, RISCVVFormat opv, string opcodestr>
+ : RVInstVV<funct6, opv, (outs VR:$vd_wb),
+ (ins VR:$vd, VR:$vs2, VR:$vs1, VMaskOp:$vm),
+ opcodestr, "$vd, $vs2, $vs1$vm"> {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = "$vd = $vd_wb";
+}
+
+class VQDOTVX<bits<6> funct6, RISCVVFormat opv, string opcodestr>
+ : RVInstVX<funct6, opv, (outs VR:$vd_wb),
+ (ins VR:$vd, VR:$vs2, GPR:$rs1, VMaskOp:$vm),
+ opcodestr, "$vd, $vs2, $rs1$vm"> {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = "$vd = $vd_wb";
+}
+
let Predicates = [HasStdExtZvqdotq] in {
- def VQDOT_VV : VALUVV<0b101100, OPMVV, "vqdot.vv">;
- def VQDOT_VX : VALUVX<0b101100, OPMVX, "vqdot.vx">;
- def VQDOTU_VV : VALUVV<0b101000, OPMVV, "vqdotu.vv">;
- def VQDOTU_VX : VALUVX<0b101000, OPMVX, "vqdotu.vx">;
- def VQDOTSU_VV : VALUVV<0b101010, OPMVV, "vqdotsu.vv">;
- def VQDOTSU_VX : VALUVX<0b101010, OPMVX, "vqdotsu.vx">;
- def VQDOTUS_VX : VALUVX<0b101110, OPMVX, "vqdotus.vx">;
+ def VQDOT_VV : VQDOTVV<0b101100, OPMVV, "vqdot.vv">;
+ def VQDOT_VX : VQDOTVX<0b101100, OPMVX, "vqdot.vx">;
+ def VQDOTU_VV : VQDOTVV<0b101000, OPMVV, "vqdotu.vv">;
+ def VQDOTU_VX : VQDOTVX<0b101000, OPMVX, "vqdotu.vx">;
+ def VQDOTSU_VV : VQDOTVV<0b101010, OPMVV, "vqdotsu.vv">;
+ def VQDOTSU_VX : VQDOTVX<0b101010, OPMVX, "vqdotsu.vx">;
+ def VQDOTUS_VX : VQDOTVX<0b101110, OPMVX, "vqdotus.vx">;
} // Predicates = [HasStdExtZvqdotq]
+//===----------------------------------------------------------------------===//
+// Helpers to define the VL patterns.
+//===----------------------------------------------------------------------===//
let HasPassthruOp = true, HasMaskOp = true in {
def riscv_vqdot_vl : RVSDNode<"VQDOT_VL", SDT_RISCVIntBinOp_VL>;
@@ -34,6 +57,10 @@ let HasPassthruOp = true, HasMaskOp = true in {
def riscv_vqdotsu_vl : RVSDNode<"VQDOTSU_VL", SDT_RISCVIntBinOp_VL>;
} // let HasPassthruOp = true, HasMaskOp = true
+//===----------------------------------------------------------------------===//
+// Pseudo Instructions for CodeGen
+//===----------------------------------------------------------------------===//
+
multiclass VPseudoVQDOT_VV_VX {
foreach m = MxSet<32>.m in {
defm "" : VPseudoBinaryV_VV<m>,
@@ -52,10 +79,69 @@ let Predicates = [HasStdExtZvqdotq], mayLoad = 0, mayStore = 0,
defm PseudoVQDOT : VPseudoVQDOT_VV_VX;
defm PseudoVQDOTU : VPseudoVQDOT_VV_VX;
defm PseudoVQDOTSU : VPseudoVQDOT_VV_VX;
+ // VQDOTUS does not have a VV variant
+ foreach m = MxListVF4 in {
+ defm "PseudoVQDOTUS_VX" : VPseudoTernaryWithPolicy<m.vrclass, m.vrclass, GPR, m>;
+ }
}
+//===----------------------------------------------------------------------===//
+// Patterns.
+//===----------------------------------------------------------------------===//
+
defvar AllE32Vectors = [VI32MF2, VI32M1, VI32M2, VI32M4, VI32M8];
defm : VPatBinaryVL_VV_VX<riscv_vqdot_vl, "PseudoVQDOT", AllE32Vectors>;
defm : VPatBinaryVL_VV_VX<riscv_vqdotu_vl, "PseudoVQDOTU", AllE32Vectors>;
defm : VPatBinaryVL_VV_VX<riscv_vqdotsu_vl, "PseudoVQDOTSU", AllE32Vectors>;
+// These VPat definitions are for vqdot because they have a different operand
+// order with other ternary instructions (i.e. vop.vx vd, vs2, rs1)
+multiclass VPatTernaryV_VX_AABX<string intrinsic, string instruction,
+ list<VTypeInfoToWide> info_pairs> {
+ foreach pair = info_pairs in {
+ defvar VdInfo = pair.Wti;
+ defvar Vs2Info = pair.Vti;
+ let Predicates = GetVTypePredicates<VdInfo>.Predicates in
+ defm : VPatTernaryWithPolicy<intrinsic, instruction,
+ "V"#VdInfo.ScalarSuffix,
+ VdInfo.Vector, Vs2Info.Vector, Vs2Info.Scalar,
+ VdInfo.Mask, VdInfo.Log2SEW, VdInfo.LMul,
+ VdInfo.RegClass, Vs2Info.RegClass,
+ Vs2Info.ScalarRegClass>;
+ }
+}
+
+multiclass VPatTernaryV_VV_AABX<string intrinsic, string instruction,
+ list<VTypeInfoToWide> info_pairs> {
+ foreach pair = info_pairs in {
+ defvar VdInfo = pair.Wti;
+ defvar Vs2Info = pair.Vti;
+ let Predicates = GetVTypePredicates<VdInfo>.Predicates in
+ defm : VPatTernaryWithPolicy<intrinsic, instruction,
+ "VV",
+ VdInfo.Vector, Vs2Info.Vector, Vs2Info.Vector,
+ VdInfo.Mask, VdInfo.Log2SEW, VdInfo.LMul,
+ VdInfo.RegClass, Vs2Info.RegClass,
+ Vs2Info.RegClass>;
+ }
+}
+
+multiclass VPatTernaryV_VV_VX_AABX<string intrinsic, string instruction,
+ list<VTypeInfoToWide> info_pairs>
+ : VPatTernaryV_VV_AABX<intrinsic, instruction, info_pairs>,
+ VPatTernaryV_VX_AABX<intrinsic, instruction, info_pairs>;
+
+defset list<VTypeInfoToWide> VQDOTInfoPairs = {
+ def : VTypeInfoToWide<VI8MF2, VI32MF2>;
+ def : VTypeInfoToWide<VI8M1, VI32M1>;
+ def : VTypeInfoToWide<VI8M2, VI32M2>;
+ def : VTypeInfoToWide<VI8M4, VI32M4>;
+ def : VTypeInfoToWide<VI8M8, VI32M8>;
+}
+
+let Predicates = [HasStdExtZvqdotq] in {
+ defm : VPatTernaryV_VV_VX_AABX<"int_riscv_vqdot", "PseudoVQDOT", VQDOTInfoPairs>;
+ defm : VPatTernaryV_VV_VX_AABX<"int_riscv_vqdotu", "PseudoVQDOTU", VQDOTInfoPairs>;
+ defm : VPatTernaryV_VV_VX_AABX<"int_riscv_vqdotsu", "PseudoVQDOTSU", VQDOTInfoPairs>;
+ defm : VPatTernaryV_VX_AABX<"int_riscv_vqdotus", "PseudoVQDOTUS", VQDOTInfoPairs>;
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
index 4abe62f4e874..06309262f1b0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td
@@ -148,6 +148,14 @@ def isNonZeroLoadImmediate
CheckNot<CheckImmOperand<2, 0>>
]>>>;
+def isLPAD
+ : TIIPredicate<"isLPAD",
+ MCReturnStatement<CheckAll<[
+ CheckOpcode<[AUIPC]>,
+ CheckIsRegOperand<0>,
+ CheckRegOperand<0, X0>,
+ ]>>>;
+
def ignoresVXRM
: TIIPredicate<"ignoresVXRM",
MCOpcodeSwitchStatement<
diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
index c7b96f5c3d0c..5e1063155ba0 100644
--- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
@@ -81,6 +81,12 @@ static const Intrinsic::ID FixedVssegIntrIds[] = {
Intrinsic::riscv_seg6_store_mask, Intrinsic::riscv_seg7_store_mask,
Intrinsic::riscv_seg8_store_mask};
+static const Intrinsic::ID FixedVsssegIntrIds[] = {
+ Intrinsic::riscv_sseg2_store_mask, Intrinsic::riscv_sseg3_store_mask,
+ Intrinsic::riscv_sseg4_store_mask, Intrinsic::riscv_sseg5_store_mask,
+ Intrinsic::riscv_sseg6_store_mask, Intrinsic::riscv_sseg7_store_mask,
+ Intrinsic::riscv_sseg8_store_mask};
+
static const Intrinsic::ID ScalableVssegIntrIds[] = {
Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
@@ -275,7 +281,16 @@ bool RISCVTargetLowering::lowerInterleavedLoad(
bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
Value *LaneMask,
ShuffleVectorInst *SVI,
- unsigned Factor) const {
+ unsigned Factor,
+ const APInt &GapMask) const {
+ assert(GapMask.getBitWidth() == Factor);
+
+ // We only support cases where the skipped fields are the trailing ones.
+ // TODO: Lower to strided store if there is only a single active field.
+ unsigned MaskFactor = GapMask.popcount();
+ if (MaskFactor < 2 || !GapMask.isMask())
+ return false;
+
IRBuilder<> Builder(Store);
const DataLayout &DL = Store->getDataLayout();
auto Mask = SVI->getShuffleMask();
@@ -287,21 +302,31 @@ bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
Value *Ptr, *VL;
Align Alignment;
- if (!getMemOperands(Factor, VTy, XLenTy, Store, Ptr, LaneMask, VL, Alignment))
+ if (!getMemOperands(MaskFactor, VTy, XLenTy, Store, Ptr, LaneMask, VL,
+ Alignment))
return false;
Type *PtrTy = Ptr->getType();
unsigned AS = PtrTy->getPointerAddressSpace();
- if (!isLegalInterleavedAccessType(VTy, Factor, Alignment, AS, DL))
+ if (!isLegalInterleavedAccessType(VTy, MaskFactor, Alignment, AS, DL))
return false;
- Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
- Store->getModule(), FixedVssegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy});
+ Function *SegStoreFunc;
+ if (MaskFactor < Factor)
+ // Strided segmented store.
+ SegStoreFunc = Intrinsic::getOrInsertDeclaration(
+ Store->getModule(), FixedVsssegIntrIds[MaskFactor - 2],
+ {VTy, PtrTy, XLenTy, XLenTy});
+ else
+ // Normal segmented store.
+ SegStoreFunc = Intrinsic::getOrInsertDeclaration(
+ Store->getModule(), FixedVssegIntrIds[Factor - 2],
+ {VTy, PtrTy, XLenTy});
SmallVector<Value *, 10> Ops;
SmallVector<int, 16> NewShuffleMask;
- for (unsigned i = 0; i < Factor; i++) {
+ for (unsigned i = 0; i < MaskFactor; i++) {
// Collect shuffle mask for this lane.
for (unsigned j = 0; j < VTy->getNumElements(); j++)
NewShuffleMask.push_back(Mask[i + Factor * j]);
@@ -312,8 +337,14 @@ bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
NewShuffleMask.clear();
}
- Ops.append({Ptr, LaneMask, VL});
- Builder.CreateCall(VssegNFunc, Ops);
+ Ops.push_back(Ptr);
+ if (MaskFactor < Factor) {
+ // Insert the stride argument.
+ unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType());
+ Ops.push_back(ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes));
+ }
+ Ops.append({LaneMask, VL});
+ Builder.CreateCall(SegStoreFunc, Ops);
return true;
}
diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
index 3b19c3456ad6..d08115b72977 100644
--- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
+++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
@@ -356,6 +356,14 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI,
return false;
Worklist.emplace_back(UserMI, Bits);
break;
+ case RISCV::TH_EXT:
+ case RISCV::TH_EXTU:
+ unsigned Msb = UserMI->getOperand(2).getImm();
+ unsigned Lsb = UserMI->getOperand(3).getImm();
+ // Behavior of Msb < Lsb is not well documented.
+ if (Msb >= Lsb && Bits > Msb)
+ break;
+ return false;
}
}
}
@@ -409,6 +417,16 @@ static bool isSignExtendingOpW(const MachineInstr &MI, unsigned OpNo) {
assert(Log2SEW >= 3 && Log2SEW <= 6 && "Unexpected Log2SEW");
return Log2SEW <= 5;
}
+ case RISCV::TH_EXT: {
+ unsigned Msb = MI.getOperand(2).getImm();
+ unsigned Lsb = MI.getOperand(3).getImm();
+ return Msb >= Lsb && (Msb - Lsb + 1) <= 32;
+ }
+ case RISCV::TH_EXTU: {
+ unsigned Msb = MI.getOperand(2).getImm();
+ unsigned Lsb = MI.getOperand(3).getImm();
+ return Msb >= Lsb && (Msb - Lsb + 1) < 32;
+ }
}
return false;
@@ -519,9 +537,11 @@ static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST,
case RISCV::ANDI:
case RISCV::ORI:
case RISCV::XORI:
+ case RISCV::SRAI:
// |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R.
// DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1
// Logical operations use a sign extended 12-bit immediate.
+ // Arithmetic shift right can only increase the number of sign bits.
if (!AddRegToWorkList(MI->getOperand(1).getReg()))
return false;
@@ -556,6 +576,9 @@ static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST,
case RISCV::PseudoCCAND:
case RISCV::PseudoCCOR:
case RISCV::PseudoCCXOR:
+ case RISCV::PseudoCCANDN:
+ case RISCV::PseudoCCORN:
+ case RISCV::PseudoCCXNOR:
case RISCV::PHI: {
// If all incoming values are sign-extended, the output of AND, OR, XOR,
// MIN, MAX, or PHI is also sign-extended.
@@ -578,6 +601,9 @@ static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST,
case RISCV::PseudoCCAND:
case RISCV::PseudoCCOR:
case RISCV::PseudoCCXOR:
+ case RISCV::PseudoCCANDN:
+ case RISCV::PseudoCCORN:
+ case RISCV::PseudoCCXNOR:
B = 4;
E = 7;
break;
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index f89d94f41b69..36d63ed23b92 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -121,7 +121,8 @@ def MIPS_P8700 : RISCVProcessorModel<"mips-p8700",
FeatureStdExtZicsr,
FeatureVendorXMIPSCMov,
FeatureVendorXMIPSLSP,
- FeatureVendorXMIPSCBOP],
+ FeatureVendorXMIPSCBOP,
+ FeatureVendorXMIPSEXECTL],
[TuneMIPSP8700]>;
def ROCKET_RV32 : RISCVProcessorModel<"rocket-rv32",
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index f3966a55ce7d..40b641680b2c 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -966,7 +966,9 @@ bool RISCVRegisterInfo::getRegAllocationHints(
}
}
- // Add a hint if it would allow auipc/lui+addi(w) fusion.
+ // Add a hint if it would allow auipc/lui+addi(w) fusion. We do this even
+ // without the fusions explicitly enabled as the impact is rarely negative
+ // and some cores do implement this fusion.
if ((MI.getOpcode() == RISCV::ADDIW || MI.getOpcode() == RISCV::ADDI) &&
MI.getOperand(1).isReg()) {
const MachineBasicBlock &MBB = *MI.getParent();
@@ -974,9 +976,7 @@ bool RISCVRegisterInfo::getRegAllocationHints(
// Is the previous instruction a LUI or AUIPC that can be fused?
if (I != MBB.begin()) {
I = skipDebugInstructionsBackward(std::prev(I), MBB.begin());
- if (((I->getOpcode() == RISCV::LUI && Subtarget.hasLUIADDIFusion()) ||
- (I->getOpcode() == RISCV::AUIPC &&
- Subtarget.hasAUIPCADDIFusion())) &&
+ if ((I->getOpcode() == RISCV::LUI || I->getOpcode() == RISCV::AUIPC) &&
I->getOperand(0).getReg() == MI.getOperand(1).getReg()) {
if (OpIdx == 0)
tryAddHint(MO, MI.getOperand(1), /*NeedGPRC=*/false);
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index fd57e02c25d0..50e76df56e57 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -186,6 +186,12 @@ public:
return HasStdExtZfhmin || HasStdExtZfbfmin;
}
+ bool hasBEXTILike() const { return HasStdExtZbs || HasVendorXTHeadBs; }
+
+ bool hasCZEROLike() const {
+ return HasStdExtZicond || HasVendorXVentanaCondOps;
+ }
+
bool hasConditionalMoveFusion() const {
// Do we support fusing a branch+mv or branch+c.mv as a conditional move.
return (hasConditionalCompressedMoveFusion() && hasStdExtZca()) ||
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index d70b1d0dc8d5..460bb33f2553 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -652,7 +652,8 @@ void RISCVPassConfig::addPostRegAlloc() {
void RISCVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
PB.registerLateLoopOptimizationsEPCallback([=](LoopPassManager &LPM,
OptimizationLevel Level) {
- LPM.addPass(LoopIdiomVectorizePass(LoopIdiomVectorizeStyle::Predicated));
+ if (Level != OptimizationLevel::O0)
+ LPM.addPass(LoopIdiomVectorizePass(LoopIdiomVectorizeStyle::Predicated));
});
}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index c707fb110b10..1ca513214f67 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1566,6 +1566,18 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
return BaseT::getIntrinsicInstrCost(ICA, CostKind);
}
+InstructionCost
+RISCVTTIImpl::getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE,
+ const SCEV *Ptr,
+ TTI::TargetCostKind CostKind) const {
+ // Address computations for vector indexed load/store likely require an offset
+ // and/or scaling.
+ if (ST->hasVInstructions() && PtrTy->isVectorTy())
+ return getArithmeticInstrCost(Instruction::Add, PtrTy, CostKind);
+
+ return BaseT::getAddressComputationCost(PtrTy, SE, Ptr, CostKind);
+}
+
InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
Type *Src,
TTI::CastContextHint CCH,
@@ -2731,6 +2743,10 @@ unsigned RISCVTTIImpl::getMinTripCountTailFoldingThreshold() const {
return RVVMinTripCount;
}
+bool RISCVTTIImpl::preferAlternateOpcodeVectorization() const {
+ return ST->enableUnalignedVectorMem();
+}
+
TTI::AddressingModeKind
RISCVTTIImpl::getPreferredAddressingMode(const Loop *L,
ScalarEvolution *SE) const {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 3236b2a35c85..6bd7d51daff6 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -132,7 +132,7 @@ public:
unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override;
- bool preferAlternateOpcodeVectorization() const override { return false; }
+ bool preferAlternateOpcodeVectorization() const override;
bool preferEpilogueVectorization() const override {
// Epilogue vectorization is usually unprofitable - tail folding or
@@ -177,6 +177,10 @@ public:
getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
TTI::TargetCostKind CostKind) const override;
+ InstructionCost
+ getAddressComputationCost(Type *PTy, ScalarEvolution *SE, const SCEV *Ptr,
+ TTI::TargetCostKind CostKind) const override;
+
InstructionCost getInterleavedMemoryOpCost(
unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 53557049ea33..29526cf5a527 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -178,8 +178,20 @@ static unsigned getIntegerExtensionOperandEEW(unsigned Factor,
return Log2EEW;
}
-static std::optional<unsigned>
-getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
+#define VSEG_CASES(Prefix, EEW) \
+ RISCV::Prefix##SEG2E##EEW##_V: \
+ case RISCV::Prefix##SEG3E##EEW##_V: \
+ case RISCV::Prefix##SEG4E##EEW##_V: \
+ case RISCV::Prefix##SEG5E##EEW##_V: \
+ case RISCV::Prefix##SEG6E##EEW##_V: \
+ case RISCV::Prefix##SEG7E##EEW##_V: \
+ case RISCV::Prefix##SEG8E##EEW##_V
+#define VSSEG_CASES(EEW) VSEG_CASES(VS, EEW)
+#define VSSSEG_CASES(EEW) VSEG_CASES(VSS, EEW)
+#define VSUXSEG_CASES(EEW) VSEG_CASES(VSUX, I##EEW)
+#define VSOXSEG_CASES(EEW) VSEG_CASES(VSOX, I##EEW)
+
+static std::optional<unsigned> getOperandLog2EEW(const MachineOperand &MO) {
const MachineInstr &MI = *MO.getParent();
const MCInstrDesc &Desc = MI.getDesc();
const RISCVVPseudosTable::PseudoInfo *RVV =
@@ -225,21 +237,29 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
case RISCV::VSE8_V:
case RISCV::VLSE8_V:
case RISCV::VSSE8_V:
+ case VSSEG_CASES(8):
+ case VSSSEG_CASES(8):
return 3;
case RISCV::VLE16_V:
case RISCV::VSE16_V:
case RISCV::VLSE16_V:
case RISCV::VSSE16_V:
+ case VSSEG_CASES(16):
+ case VSSSEG_CASES(16):
return 4;
case RISCV::VLE32_V:
case RISCV::VSE32_V:
case RISCV::VLSE32_V:
case RISCV::VSSE32_V:
+ case VSSEG_CASES(32):
+ case VSSSEG_CASES(32):
return 5;
case RISCV::VLE64_V:
case RISCV::VSE64_V:
case RISCV::VLSE64_V:
case RISCV::VSSE64_V:
+ case VSSEG_CASES(64):
+ case VSSSEG_CASES(64):
return 6;
// Vector Indexed Instructions
@@ -248,7 +268,9 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
case RISCV::VLUXEI8_V:
case RISCV::VLOXEI8_V:
case RISCV::VSUXEI8_V:
- case RISCV::VSOXEI8_V: {
+ case RISCV::VSOXEI8_V:
+ case VSUXSEG_CASES(8):
+ case VSOXSEG_CASES(8): {
if (MO.getOperandNo() == 0)
return MILog2SEW;
return 3;
@@ -256,7 +278,9 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
case RISCV::VLUXEI16_V:
case RISCV::VLOXEI16_V:
case RISCV::VSUXEI16_V:
- case RISCV::VSOXEI16_V: {
+ case RISCV::VSOXEI16_V:
+ case VSUXSEG_CASES(16):
+ case VSOXSEG_CASES(16): {
if (MO.getOperandNo() == 0)
return MILog2SEW;
return 4;
@@ -264,7 +288,9 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
case RISCV::VLUXEI32_V:
case RISCV::VLOXEI32_V:
case RISCV::VSUXEI32_V:
- case RISCV::VSOXEI32_V: {
+ case RISCV::VSOXEI32_V:
+ case VSUXSEG_CASES(32):
+ case VSOXSEG_CASES(32): {
if (MO.getOperandNo() == 0)
return MILog2SEW;
return 5;
@@ -272,7 +298,9 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
case RISCV::VLUXEI64_V:
case RISCV::VLOXEI64_V:
case RISCV::VSUXEI64_V:
- case RISCV::VSOXEI64_V: {
+ case RISCV::VSOXEI64_V:
+ case VSUXSEG_CASES(64):
+ case VSOXSEG_CASES(64): {
if (MO.getOperandNo() == 0)
return MILog2SEW;
return 6;
@@ -422,9 +450,6 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
case RISCV::VRGATHER_VI:
case RISCV::VRGATHER_VV:
case RISCV::VRGATHER_VX:
- // Vector Compress Instruction
- // EEW=SEW.
- case RISCV::VCOMPRESS_VM:
// Vector Element Index Instruction
case RISCV::VID_V:
// Vector Single-Width Floating-Point Add/Subtract Instructions
@@ -674,6 +699,12 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
return MILog2SEW;
}
+ // Vector Compress Instruction
+ // EEW=SEW, except the mask operand has EEW=1. Mask operand is not handled
+ // before this switch.
+ case RISCV::VCOMPRESS_VM:
+ return MO.getOperandNo() == 3 ? 0 : MILog2SEW;
+
// Vector Iota Instruction
// EEW=SEW, except the mask operand has EEW=1. Mask operand is not handled
// before this switch.
@@ -778,14 +809,13 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
}
}
-static std::optional<OperandInfo>
-getOperandInfo(const MachineOperand &MO, const MachineRegisterInfo *MRI) {
+static std::optional<OperandInfo> getOperandInfo(const MachineOperand &MO) {
const MachineInstr &MI = *MO.getParent();
const RISCVVPseudosTable::PseudoInfo *RVV =
RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
assert(RVV && "Could not find MI in PseudoTable");
- std::optional<unsigned> Log2EEW = getOperandLog2EEW(MO, MRI);
+ std::optional<unsigned> Log2EEW = getOperandLog2EEW(MO);
if (!Log2EEW)
return std::nullopt;
@@ -900,13 +930,6 @@ static bool isSupportedInstr(const MachineInstr &MI) {
case RISCV::VSEXT_VF4:
case RISCV::VZEXT_VF8:
case RISCV::VSEXT_VF8:
- // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
- // FIXME: Add support
- case RISCV::VMADC_VV:
- case RISCV::VMADC_VI:
- case RISCV::VMADC_VX:
- case RISCV::VMSBC_VV:
- case RISCV::VMSBC_VX:
// Vector Narrowing Integer Right Shift Instructions
case RISCV::VNSRL_WX:
case RISCV::VNSRL_WI:
@@ -993,6 +1016,11 @@ static bool isSupportedInstr(const MachineInstr &MI) {
case RISCV::VSBC_VXM:
case RISCV::VMSBC_VVM:
case RISCV::VMSBC_VXM:
+ case RISCV::VMADC_VV:
+ case RISCV::VMADC_VI:
+ case RISCV::VMADC_VX:
+ case RISCV::VMSBC_VV:
+ case RISCV::VMSBC_VX:
// Vector Widening Integer Multiply-Add Instructions
case RISCV::VWMACCU_VV:
case RISCV::VWMACCU_VX:
@@ -1001,10 +1029,7 @@ static bool isSupportedInstr(const MachineInstr &MI) {
case RISCV::VWMACCSU_VV:
case RISCV::VWMACCSU_VX:
case RISCV::VWMACCUS_VX:
- // Vector Integer Merge Instructions
- // FIXME: Add support
// Vector Integer Move Instructions
- // FIXME: Add support
case RISCV::VMV_V_I:
case RISCV::VMV_V_X:
case RISCV::VMV_V_V:
@@ -1306,7 +1331,8 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const {
// TODO: Use a better approach than a white-list, such as adding
// properties to instructions using something like TSFlags.
if (!isSupportedInstr(MI)) {
- LLVM_DEBUG(dbgs() << "Not a candidate due to unsupported instruction\n");
+ LLVM_DEBUG(dbgs() << "Not a candidate due to unsupported instruction: "
+ << MI);
return false;
}
@@ -1328,14 +1354,14 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const {
const MCInstrDesc &Desc = UserMI.getDesc();
if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) {
- LLVM_DEBUG(dbgs() << " Abort due to lack of VL, assume that"
+ LLVM_DEBUG(dbgs() << " Abort due to lack of VL, assume that"
" use VLMAX\n");
return std::nullopt;
}
if (RISCVII::readsPastVL(
TII->get(RISCV::getRVVMCOpcode(UserMI.getOpcode())).TSFlags)) {
- LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n");
+ LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n");
return std::nullopt;
}
@@ -1352,7 +1378,7 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const {
RISCVII::isFirstDefTiedToFirstUse(UserMI.getDesc()));
auto DemandedVL = DemandedVLs.lookup(&UserMI);
if (!DemandedVL || !RISCV::isVLKnownLE(*DemandedVL, VLOp)) {
- LLVM_DEBUG(dbgs() << " Abort because user is passthru in "
+ LLVM_DEBUG(dbgs() << " Abort because user is passthru in "
"instruction with demanded tail\n");
return std::nullopt;
}
@@ -1376,6 +1402,54 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const {
return VLOp;
}
+/// Return true if MI is an instruction used for assembling registers
+/// for segmented store instructions, namely, RISCVISD::TUPLE_INSERT.
+/// Currently it's lowered to INSERT_SUBREG.
+static bool isTupleInsertInstr(const MachineInstr &MI) {
+ if (!MI.isInsertSubreg())
+ return false;
+
+ const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
+ const TargetRegisterClass *DstRC = MRI.getRegClass(MI.getOperand(0).getReg());
+ const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
+ if (!RISCVRI::isVRegClass(DstRC->TSFlags))
+ return false;
+ unsigned NF = RISCVRI::getNF(DstRC->TSFlags);
+ if (NF < 2)
+ return false;
+
+ // Check whether INSERT_SUBREG has the correct subreg index for tuple inserts.
+ auto VLMul = RISCVRI::getLMul(DstRC->TSFlags);
+ unsigned SubRegIdx = MI.getOperand(3).getImm();
+ [[maybe_unused]] auto [LMul, IsFractional] = RISCVVType::decodeVLMUL(VLMul);
+ assert(!IsFractional && "unexpected LMUL for tuple register classes");
+ return TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock * LMul;
+}
+
+static bool isSegmentedStoreInstr(const MachineInstr &MI) {
+ switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
+ case VSSEG_CASES(8):
+ case VSSSEG_CASES(8):
+ case VSUXSEG_CASES(8):
+ case VSOXSEG_CASES(8):
+ case VSSEG_CASES(16):
+ case VSSSEG_CASES(16):
+ case VSUXSEG_CASES(16):
+ case VSOXSEG_CASES(16):
+ case VSSEG_CASES(32):
+ case VSSSEG_CASES(32):
+ case VSUXSEG_CASES(32):
+ case VSOXSEG_CASES(32):
+ case VSSEG_CASES(64):
+ case VSSSEG_CASES(64):
+ case VSUXSEG_CASES(64):
+ case VSOXSEG_CASES(64):
+ return true;
+ default:
+ return false;
+ }
+}
+
std::optional<MachineOperand>
RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
std::optional<MachineOperand> CommonVL;
@@ -1396,6 +1470,23 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
continue;
}
+ if (isTupleInsertInstr(UserMI)) {
+ LLVM_DEBUG(dbgs().indent(4) << "Peeking through uses of INSERT_SUBREG\n");
+ for (MachineOperand &UseOp :
+ MRI->use_operands(UserMI.getOperand(0).getReg())) {
+ const MachineInstr &CandidateMI = *UseOp.getParent();
+ // We should not propagate the VL if the user is not a segmented store
+ // or another INSERT_SUBREG, since VL just works differently
+ // between segmented operations (per-field) v.s. other RVV ops (on the
+ // whole register group).
+ if (!isTupleInsertInstr(CandidateMI) &&
+ !isSegmentedStoreInstr(CandidateMI))
+ return std::nullopt;
+ Worklist.insert(&UseOp);
+ }
+ continue;
+ }
+
if (UserMI.isPHI()) {
// Don't follow PHI cycles
if (!PHISeen.insert(&UserMI).second)
@@ -1425,9 +1516,8 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
return std::nullopt;
}
- std::optional<OperandInfo> ConsumerInfo = getOperandInfo(UserOp, MRI);
- std::optional<OperandInfo> ProducerInfo =
- getOperandInfo(MI.getOperand(0), MRI);
+ std::optional<OperandInfo> ConsumerInfo = getOperandInfo(UserOp);
+ std::optional<OperandInfo> ProducerInfo = getOperandInfo(MI.getOperand(0));
if (!ConsumerInfo || !ProducerInfo) {
LLVM_DEBUG(dbgs() << " Abort due to unknown operand information.\n");
LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n");
@@ -1449,7 +1539,7 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
}
bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const {
- LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI << "\n");
+ LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI);
unsigned VLOpNum = RISCVII::getVLOpNum(MI.getDesc());
MachineOperand &VLOp = MI.getOperand(VLOpNum);
@@ -1468,14 +1558,23 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const {
assert((CommonVL->isImm() || CommonVL->getReg().isVirtual()) &&
"Expected VL to be an Imm or virtual Reg");
+ // If the VL is defined by a vleff that doesn't dominate MI, try using the
+ // vleff's AVL. It will be greater than or equal to the output VL.
+ if (CommonVL->isReg()) {
+ const MachineInstr *VLMI = MRI->getVRegDef(CommonVL->getReg());
+ if (RISCVInstrInfo::isFaultOnlyFirstLoad(*VLMI) &&
+ !MDT->dominates(VLMI, &MI))
+ CommonVL = VLMI->getOperand(RISCVII::getVLOpNum(VLMI->getDesc()));
+ }
+
if (!RISCV::isVLKnownLE(*CommonVL, VLOp)) {
- LLVM_DEBUG(dbgs() << " Abort due to CommonVL not <= VLOp.\n");
+ LLVM_DEBUG(dbgs() << " Abort due to CommonVL not <= VLOp.\n");
return false;
}
if (CommonVL->isIdenticalTo(VLOp)) {
LLVM_DEBUG(
- dbgs() << " Abort due to CommonVL == VLOp, no point in reducing.\n");
+ dbgs() << " Abort due to CommonVL == VLOp, no point in reducing.\n");
return false;
}
@@ -1486,8 +1585,10 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const {
return true;
}
const MachineInstr *VLMI = MRI->getVRegDef(CommonVL->getReg());
- if (!MDT->dominates(VLMI, &MI))
+ if (!MDT->dominates(VLMI, &MI)) {
+ LLVM_DEBUG(dbgs() << " Abort due to VL not dominating.\n");
return false;
+ }
LLVM_DEBUG(
dbgs() << " Reduce VL from " << VLOp << " to "
<< printReg(CommonVL->getReg(), MRI->getTargetRegisterInfo())
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index 050de3d58a2f..62651185137c 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -745,12 +745,24 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const {
if (PassthruReg && !isKnownSameDefs(PassthruReg, FalseReg))
return false;
+ std::optional<std::pair<unsigned, unsigned>> NeedsCommute;
+
// If True has a passthru operand then it needs to be the same as vmerge's
// False, since False will be used for the result's passthru operand.
Register TruePassthru = True.getOperand(True.getNumExplicitDefs()).getReg();
if (RISCVII::isFirstDefTiedToFirstUse(True.getDesc()) && TruePassthru &&
- !isKnownSameDefs(TruePassthru, FalseReg))
- return false;
+ !isKnownSameDefs(TruePassthru, FalseReg)) {
+ // If True's passthru != False, check if it uses False in another operand
+ // and try to commute it.
+ int OtherIdx = True.findRegisterUseOperandIdx(FalseReg, TRI);
+ if (OtherIdx == -1)
+ return false;
+ unsigned OpIdx1 = OtherIdx;
+ unsigned OpIdx2 = True.getNumExplicitDefs();
+ if (!TII->findCommutedOpIndices(True, OpIdx1, OpIdx2))
+ return false;
+ NeedsCommute = {OpIdx1, OpIdx2};
+ }
// Make sure it doesn't raise any observable fp exceptions, since changing the
// active elements will affect how fflags is set.
@@ -796,6 +808,14 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const {
if (!ensureDominates(MaskOp, True))
return false;
+ if (NeedsCommute) {
+ auto [OpIdx1, OpIdx2] = *NeedsCommute;
+ [[maybe_unused]] bool Commuted =
+ TII->commuteInstruction(True, /*NewMI=*/false, OpIdx1, OpIdx2);
+ assert(Commuted && "Failed to commute True?");
+ Info = RISCV::lookupMaskedIntrinsicByUnmasked(True.getOpcode());
+ }
+
True.setDesc(TII->get(Info->MaskedPseudo));
// Insert the mask operand.