diff options
| author | Mingming Liu <mingmingl@google.com> | 2025-09-10 15:25:31 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-10 15:25:31 -0700 |
| commit | 1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch) | |
| tree | 57f4b1f313c8cf74eed8819870f39c36ea263c68 /llvm/lib/Target/RISCV | |
| parent | 898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff) | |
| parent | b8cefcb601ddaa18482555c4ff363c01a270c2fe (diff) | |
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'llvm/lib/Target/RISCV')
44 files changed, 1522 insertions, 719 deletions
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index 9ce44d0ff7fd..cd8392849ac4 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -121,7 +121,7 @@ class RISCVAsmParser : public MCTargetAsmParser { bool parseVTypeToken(const AsmToken &Tok, VTypeState &State, unsigned &Sew, unsigned &Lmul, bool &Fractional, bool &TailAgnostic, - bool &MaskAgnostic); + bool &MaskAgnostic, bool &AltFmt); bool generateVTypeError(SMLoc ErrorLoc); bool generateXSfmmVTypeError(SMLoc ErrorLoc); @@ -2261,14 +2261,23 @@ ParseStatus RISCVAsmParser::parseJALOffset(OperandVector &Operands) { bool RISCVAsmParser::parseVTypeToken(const AsmToken &Tok, VTypeState &State, unsigned &Sew, unsigned &Lmul, bool &Fractional, bool &TailAgnostic, - bool &MaskAgnostic) { + bool &MaskAgnostic, bool &AltFmt) { if (Tok.isNot(AsmToken::Identifier)) return true; StringRef Identifier = Tok.getIdentifier(); if (State < VTypeState::SeenSew && Identifier.consume_front("e")) { - if (Identifier.getAsInteger(10, Sew)) - return true; + if (Identifier.getAsInteger(10, Sew)) { + if (Identifier == "16alt") { + AltFmt = true; + Sew = 16; + } else if (Identifier == "8alt") { + AltFmt = true; + Sew = 8; + } else { + return true; + } + } if (!RISCVVType::isValidSEW(Sew)) return true; @@ -2340,11 +2349,12 @@ ParseStatus RISCVAsmParser::parseVTypeI(OperandVector &Operands) { bool Fractional = false; bool TailAgnostic = false; bool MaskAgnostic = false; + bool AltFmt = false; VTypeState State = VTypeState::SeenNothingYet; do { if (parseVTypeToken(getTok(), State, Sew, Lmul, Fractional, TailAgnostic, - MaskAgnostic)) { + MaskAgnostic, AltFmt)) { // The first time, errors return NoMatch rather than Failure if (State == VTypeState::SeenNothingYet) return ParseStatus::NoMatch; @@ -2370,12 +2380,17 @@ ParseStatus RISCVAsmParser::parseVTypeI(OperandVector &Operands) { } unsigned VTypeI = - RISCVVType::encodeVTYPE(VLMUL, Sew, TailAgnostic, MaskAgnostic); + RISCVVType::encodeVTYPE(VLMUL, Sew, TailAgnostic, MaskAgnostic, AltFmt); Operands.push_back(RISCVOperand::createVType(VTypeI, S)); return ParseStatus::Success; } bool RISCVAsmParser::generateVTypeError(SMLoc ErrorLoc) { + if (STI->hasFeature(RISCV::FeatureStdExtZvfbfa)) + return Error( + ErrorLoc, + "operand must be " + "e[8|8alt|16|16alt|32|64],m[1|2|4|8|f2|f4|f8],[ta|tu],[ma|mu]"); return Error( ErrorLoc, "operand must be " diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index 47329b2c2f4d..0ff178e1f195 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -7,7 +7,8 @@ tablegen(LLVM RISCVGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM RISCVGenCompressInstEmitter.inc -gen-compress-inst-emitter) tablegen(LLVM RISCVGenMacroFusion.inc -gen-macro-fusion-pred) tablegen(LLVM RISCVGenDAGISel.inc -gen-dag-isel) -tablegen(LLVM RISCVGenDisassemblerTables.inc -gen-disassembler) +tablegen(LLVM RISCVGenDisassemblerTables.inc -gen-disassembler + --specialize-decoders-per-bitwidth) tablegen(LLVM RISCVGenInstrInfo.inc -gen-instr-info) tablegen(LLVM RISCVGenMCCodeEmitter.inc -gen-emitter) tablegen(LLVM RISCVGenMCPseudoLowering.inc -gen-pseudo-lowering) @@ -87,6 +88,7 @@ add_llvm_target(RISCVCodeGen GlobalISel IPO MC + Passes RISCVDesc RISCVInfo Scalar diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp index dbb16fce8390..89df9d82f878 100644 --- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp +++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp @@ -46,8 +46,6 @@ public: raw_ostream &CStream) const override; private: - void addSPOperands(MCInst &MI) const; - DecodeStatus getInstruction48(MCInst &Instr, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, raw_ostream &CStream) const; @@ -196,6 +194,12 @@ static DecodeStatus DecodeFPR128RegisterClass(MCInst &Inst, uint32_t RegNo, return MCDisassembler::Success; } +static DecodeStatus DecodeSPRegisterClass(MCInst &Inst, + const MCDisassembler *Decoder) { + Inst.addOperand(MCOperand::createReg(RISCV::X2)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeGPRNoX0RegisterClass(MCInst &Inst, uint32_t RegNo, uint64_t Address, const MCDisassembler *Decoder) { @@ -558,7 +562,7 @@ static DecodeStatus decodeXqccmpRlistS0(MCInst &Inst, uint32_t Imm, return decodeZcmpRlist(Inst, Imm, Address, Decoder); } -static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint32_t Insn, +static DecodeStatus decodeCSSPushPopchk(MCInst &Inst, uint16_t Insn, uint64_t Address, const MCDisassembler *Decoder) { uint32_t Rs1 = fieldFromInstruction(Insn, 7, 5); @@ -600,15 +604,6 @@ static DecodeStatus decodeXTHeadMemPair(MCInst &Inst, uint32_t Insn, #include "RISCVGenDisassemblerTables.inc" -// Add implied SP operand for C.*SP compressed instructions. The SP operand -// isn't explicitly encoded in the instruction. -void RISCVDisassembler::addSPOperands(MCInst &MI) const { - const MCInstrDesc &MCID = MCII->get(MI.getOpcode()); - for (unsigned i = 0; i < MCID.getNumOperands(); i++) - if (MCID.operands()[i].RegClass == RISCV::SPRegClassID) - MI.insert(MI.begin() + i, MCOperand::createReg(RISCV::X2)); -} - namespace { struct DecoderListEntry { @@ -656,6 +651,13 @@ static constexpr FeatureBitset XSfSystemGroup = { RISCV::FeatureVendorXSiFivecflushdlone, }; +static constexpr FeatureBitset XMIPSGroup = { + RISCV::FeatureVendorXMIPSLSP, + RISCV::FeatureVendorXMIPSCMov, + RISCV::FeatureVendorXMIPSCBOP, + RISCV::FeatureVendorXMIPSEXECTL, +}; + static constexpr FeatureBitset XTHeadGroup = { RISCV::FeatureVendorXTHeadBa, RISCV::FeatureVendorXTHeadBb, RISCV::FeatureVendorXTHeadBs, RISCV::FeatureVendorXTHeadCondMov, @@ -684,13 +686,7 @@ static constexpr DecoderListEntry DecoderList32[]{ {DecoderTableXSfvector32, XSfVectorGroup, "SiFive vector extensions"}, {DecoderTableXSfsystem32, XSfSystemGroup, "SiFive system extensions"}, {DecoderTableXSfcease32, {RISCV::FeatureVendorXSfcease}, "SiFive sf.cease"}, - {DecoderTableXmipslsp32, {RISCV::FeatureVendorXMIPSLSP}, "MIPS mips.lsp"}, - {DecoderTableXmipscmov32, - {RISCV::FeatureVendorXMIPSCMov}, - "MIPS mips.ccmov"}, - {DecoderTableXmipscbop32, - {RISCV::FeatureVendorXMIPSCBOP}, - "MIPS mips.pref"}, + {DecoderTableXMIPS32, XMIPSGroup, "Mips extensions"}, {DecoderTableXAndes32, XAndesGroup, "Andes extensions"}, {DecoderTableXSMT32, XSMTGroup, "SpacemiT extensions"}, // Standard Extensions @@ -700,6 +696,14 @@ static constexpr DecoderListEntry DecoderList32[]{ {DecoderTableZdinxRV32Only32, {}, "RV32-only Zdinx (Double in Integer)"}, }; +namespace { +// Define bitwidths for various types used to instantiate the decoder. +template <> constexpr uint32_t InsnBitWidth<uint16_t> = 16; +template <> constexpr uint32_t InsnBitWidth<uint32_t> = 32; +// Use uint64_t to represent 48 bit instructions. +template <> constexpr uint32_t InsnBitWidth<uint64_t> = 48; +} // namespace + DecodeStatus RISCVDisassembler::getInstruction32(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, @@ -710,9 +714,7 @@ DecodeStatus RISCVDisassembler::getInstruction32(MCInst &MI, uint64_t &Size, } Size = 4; - // Use uint64_t to match getInstruction48. decodeInstruction is templated - // on the Insn type. - uint64_t Insn = support::endian::read32le(Bytes.data()); + uint32_t Insn = support::endian::read32le(Bytes.data()); for (const DecoderListEntry &Entry : DecoderList32) { if (!Entry.haveContainedFeatures(STI.getFeatureBits())) @@ -758,9 +760,7 @@ DecodeStatus RISCVDisassembler::getInstruction16(MCInst &MI, uint64_t &Size, } Size = 2; - // Use uint64_t to match getInstruction48. decodeInstruction is templated - // on the Insn type. - uint64_t Insn = support::endian::read16le(Bytes.data()); + uint16_t Insn = support::endian::read16le(Bytes.data()); for (const DecoderListEntry &Entry : DecoderList16) { if (!Entry.haveContainedFeatures(STI.getFeatureBits())) @@ -769,12 +769,8 @@ DecodeStatus RISCVDisassembler::getInstruction16(MCInst &MI, uint64_t &Size, LLVM_DEBUG(dbgs() << "Trying " << Entry.Desc << " table:\n"); DecodeStatus Result = decodeInstruction(Entry.Table, MI, Insn, Address, this, STI); - if (Result == MCDisassembler::Fail) - continue; - - addSPOperands(MI); - - return Result; + if (Result != MCDisassembler::Fail) + return Result; } return MCDisassembler::Fail; diff --git a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp index 51ea3fc5f677..7df1b7e58000 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVInstructionSelector.cpp @@ -1158,8 +1158,8 @@ bool RISCVInstructionSelector::selectAddr(MachineInstr &MI, switch (TM.getCodeModel()) { default: { - reportGISelFailure(const_cast<MachineFunction &>(*MF), *TPC, *MORE, - getName(), "Unsupported code model for lowering", MI); + reportGISelFailure(*MF, *TPC, *MORE, getName(), + "Unsupported code model for lowering", MI); return false; } case CodeModel::Small: { diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index e88f33d6859e..564657ac65fd 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -26,6 +26,8 @@ #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/CodeGen/ValueTypes.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsRISCV.h" #include "llvm/IR/Type.h" using namespace llvm; @@ -152,7 +154,8 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) getActionDefinitionsBuilder({G_SADDO, G_SSUBO}).minScalar(0, sXLen).lower(); // TODO: Use Vector Single-Width Saturating Instructions for vector types. - getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT}) + getActionDefinitionsBuilder( + {G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT, G_SSHLSAT, G_USHLSAT}) .lower(); getActionDefinitionsBuilder({G_SHL, G_ASHR, G_LSHR}) @@ -485,6 +488,10 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) .minScalar(ST.hasStdExtZbb(), 0, sXLen) .lower(); + getActionDefinitionsBuilder({G_ABDS, G_ABDU}) + .minScalar(ST.hasStdExtZbb(), 0, sXLen) + .lower(); + getActionDefinitionsBuilder({G_UMAX, G_UMIN, G_SMAX, G_SMIN}) .legalFor(ST.hasStdExtZbb(), {sXLen}) .minScalar(ST.hasStdExtZbb(), 0, sXLen) @@ -692,6 +699,16 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) .customIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST))); + getActionDefinitionsBuilder(G_ATOMICRMW_ADD) + .legalFor(ST.hasStdExtA(), {{sXLen, p0}}) + .libcallFor(!ST.hasStdExtA(), {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}}) + .clampScalar(0, sXLen, sXLen); + + getActionDefinitionsBuilder(G_ATOMICRMW_SUB) + .libcallFor(!ST.hasStdExtA(), {{s8, p0}, {s16, p0}, {s32, p0}, {s64, p0}}) + .clampScalar(0, sXLen, sXLen) + .lower(); + getLegacyLegalizerInfo().computeTables(); verify(*ST.getInstrInfo()); } @@ -729,6 +746,9 @@ bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MI.eraseFromParent(); return true; } + case Intrinsic::riscv_masked_atomicrmw_add: + case Intrinsic::riscv_masked_atomicrmw_sub: + return true; } } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp index 543c4c5ddfc9..37fe32531800 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp @@ -36,6 +36,12 @@ RISCVTargetELFStreamer::RISCVTargetELFStreamer(MCStreamer &S, setFlagsFromFeatures(STI); } +RISCVELFStreamer::RISCVELFStreamer(MCContext &C, + std::unique_ptr<MCAsmBackend> MAB, + std::unique_ptr<MCObjectWriter> MOW, + std::unique_ptr<MCCodeEmitter> MCE) + : MCELFStreamer(C, std::move(MAB), std::move(MOW), std::move(MCE)) {} + RISCVELFStreamer &RISCVTargetELFStreamer::getStreamer() { return static_cast<RISCVELFStreamer &>(Streamer); } diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h index 98948cd3e949..26da2441d4ae 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h @@ -28,8 +28,7 @@ class RISCVELFStreamer : public MCELFStreamer { public: RISCVELFStreamer(MCContext &C, std::unique_ptr<MCAsmBackend> MAB, std::unique_ptr<MCObjectWriter> MOW, - std::unique_ptr<MCCodeEmitter> MCE) - : MCELFStreamer(C, std::move(MAB), std::move(MOW), std::move(MCE)) {} + std::unique_ptr<MCCodeEmitter> MCE); void changeSection(MCSection *Section, uint32_t Subsection) override; void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp index b0c27ce6010f..50f5a5d09a69 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVInstPrinter.cpp @@ -216,9 +216,12 @@ void RISCVInstPrinter::printVTypeI(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI, raw_ostream &O) { unsigned Imm = MI->getOperand(OpNo).getImm(); // Print the raw immediate for reserved values: vlmul[2:0]=4, vsew[2:0]=0b1xx, - // or non-zero in bits 8 and above. + // altfmt=1 without zvfbfa extension, or non-zero in bits 9 and above. if (RISCVVType::getVLMUL(Imm) == RISCVVType::VLMUL::LMUL_RESERVED || - RISCVVType::getSEW(Imm) > 64 || (Imm >> 8) != 0) { + RISCVVType::getSEW(Imm) > 64 || + (RISCVVType::isAltFmt(Imm) && + !STI.hasFeature(RISCV::FeatureStdExtZvfbfa)) || + (Imm >> 9) != 0) { O << formatImm(Imm); return; } diff --git a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp index 83566b1c5778..66ca43604670 100644 --- a/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp +++ b/llvm/lib/Target/RISCV/RISCVAsmPrinter.cpp @@ -126,7 +126,7 @@ private: void LowerPATCHABLE_TAIL_CALL(const MachineInstr *MI); void emitSled(const MachineInstr *MI, SledKind Kind); - bool lowerToMCInst(const MachineInstr *MI, MCInst &OutMI); + void lowerToMCInst(const MachineInstr *MI, MCInst &OutMI); }; } @@ -329,12 +329,17 @@ void RISCVAsmPrinter::emitInstruction(const MachineInstr *MI) { case TargetOpcode::STATEPOINT: return LowerSTATEPOINT(*OutStreamer, SM, *MI); case TargetOpcode::PATCHABLE_FUNCTION_ENTER: { - // patchable-function-entry is handled in lowerToMCInst - // Therefore, we break out of the switch statement if we encounter it here. const Function &F = MI->getParent()->getParent()->getFunction(); - if (F.hasFnAttribute("patchable-function-entry")) - break; - + if (F.hasFnAttribute("patchable-function-entry")) { + unsigned Num; + [[maybe_unused]] bool Result = + F.getFnAttribute("patchable-function-entry") + .getValueAsString() + .getAsInteger(10, Num); + assert(!Result && "Enforced by the verifier"); + emitNops(Num); + return; + } LowerPATCHABLE_FUNCTION_ENTER(MI); return; } @@ -347,8 +352,8 @@ void RISCVAsmPrinter::emitInstruction(const MachineInstr *MI) { } MCInst OutInst; - if (!lowerToMCInst(MI, OutInst)) - EmitToStreamer(*OutStreamer, OutInst); + lowerToMCInst(MI, OutInst); + EmitToStreamer(*OutStreamer, OutInst); } bool RISCVAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, @@ -1174,9 +1179,9 @@ static bool lowerRISCVVMachineInstrToMCInst(const MachineInstr *MI, return true; } -bool RISCVAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { +void RISCVAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { if (lowerRISCVVMachineInstrToMCInst(MI, OutMI, STI)) - return false; + return; OutMI.setOpcode(MI->getOpcode()); @@ -1185,23 +1190,6 @@ bool RISCVAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { if (lowerOperand(MO, MCOp)) OutMI.addOperand(MCOp); } - - switch (OutMI.getOpcode()) { - case TargetOpcode::PATCHABLE_FUNCTION_ENTER: { - const Function &F = MI->getParent()->getParent()->getFunction(); - if (F.hasFnAttribute("patchable-function-entry")) { - unsigned Num; - if (F.getFnAttribute("patchable-function-entry") - .getValueAsString() - .getAsInteger(10, Num)) - return false; - emitNops(Num); - return true; - } - break; - } - } - return false; } void RISCVAsmPrinter::emitMachineConstantPoolValue( diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 3b738e4cc11a..063963d4ec36 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -680,6 +680,13 @@ def FeatureStdExtV [FeatureStdExtZvl128b, FeatureStdExtZve64d]>, RISCVExtensionBitmask<0, 21>; +def FeatureStdExtZvfbfa + : RISCVExperimentalExtension<0, 1, "Additional BF16 vector compute support", + [FeatureStdExtZve32f, FeatureStdExtZfbfmin]>; +def HasStdExtZvfbfa : Predicate<"Subtarget->hasStdExtZvfbfa()">, + AssemblerPredicate<(all_of FeatureStdExtZvfbfa), + "'Zvfbfa' (Additional BF16 vector compute support)">; + def FeatureStdExtZvfbfmin : RISCVExtension<1, 0, "Vector BF16 Converts", [FeatureStdExtZve32f]>; def HasStdExtZvfbfmin : Predicate<"Subtarget->hasStdExtZvfbfmin()">, @@ -1396,20 +1403,27 @@ def HasVendorXMIPSCMov AssemblerPredicate<(all_of FeatureVendorXMIPSCMov), "'Xmipscmov' ('mips.ccmov' instruction)">; def UseCCMovInsn : Predicate<"Subtarget->useCCMovInsn()">; + def FeatureVendorXMIPSLSP : RISCVExtension<1, 0, "MIPS optimization for hardware load-store bonding">; def HasVendorXMIPSLSP : Predicate<"Subtarget->hasVendorXMIPSLSP()">, AssemblerPredicate<(all_of FeatureVendorXMIPSLSP), "'Xmipslsp' (load and store pair instructions)">; -def FeatureVendorXMIPSCBOP - : RISCVExtension<1, 0, "MIPS Software Prefetch">; + +def FeatureVendorXMIPSCBOP : RISCVExtension<1, 0, "MIPS Software Prefetch">; def HasVendorXMIPSCBOP : Predicate<"Subtarget->hasVendorXMIPSCBOP()">, AssemblerPredicate<(all_of FeatureVendorXMIPSCBOP), "'Xmipscbop' (MIPS hardware prefetch)">; def NoVendorXMIPSCBOP : Predicate<"!Subtarget->hasVendorXMIPSCBOP()">; +def FeatureVendorXMIPSEXECTL : RISCVExtension<1, 0, "MIPS execution control">; +def HasVendorXMIPSEXECTL + : Predicate<"Subtarget->hasVendorXMIPSEXT()">, + AssemblerPredicate<(all_of FeatureVendorXMIPSEXECTL), + "'Xmipsexectl' (MIPS execution control)">; + // WCH / Nanjing Qinheng Microelectronics Extension(s) def FeatureVendorXwchc @@ -1668,7 +1682,7 @@ def IsRV32 : Predicate<"!Subtarget->is64Bit()">, "RV32I Base Instruction Set">; defvar RV32 = DefaultMode; -def RV64 : HwMode<"+64bit", [IsRV64]>; +def RV64 : HwMode<[IsRV64]>; def FeatureRelax : SubtargetFeature<"relax", "EnableLinkerRelax", "true", diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index 9fc0d815ceee..06ce91771c9e 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -106,8 +106,14 @@ static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL) { const auto &STI = MF.getSubtarget<RISCVSubtarget>(); + // We check Zimop instead of (Zimop || Zcmop) to determine whether HW shadow + // stack is available despite the fact that sspush/sspopchk both have a + // compressed form, because if only Zcmop is available, we would need to + // reserve X5 due to c.sspopchk only takes X5 and we currently do not support + // using X5 as the return address register. + // However, we can still aggressively use c.sspush x1 if zcmop is available. bool HasHWShadowStack = MF.getFunction().hasFnAttribute("hw-shadow-stack") && - STI.hasStdExtZicfiss(); + STI.hasStdExtZimop(); bool HasSWShadowStack = MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack); if (!HasHWShadowStack && !HasSWShadowStack) @@ -124,7 +130,12 @@ static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB, const RISCVInstrInfo *TII = STI.getInstrInfo(); if (HasHWShadowStack) { - BuildMI(MBB, MI, DL, TII->get(RISCV::SSPUSH)).addReg(RAReg); + if (STI.hasStdExtZcmop()) { + static_assert(RAReg == RISCV::X1, "C.SSPUSH only accepts X1"); + BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoMOP_C_SSPUSH)); + } else { + BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoMOP_SSPUSH)).addReg(RAReg); + } return; } @@ -172,7 +183,7 @@ static void emitSCSEpilogue(MachineFunction &MF, MachineBasicBlock &MBB, const DebugLoc &DL) { const auto &STI = MF.getSubtarget<RISCVSubtarget>(); bool HasHWShadowStack = MF.getFunction().hasFnAttribute("hw-shadow-stack") && - STI.hasStdExtZicfiss(); + STI.hasStdExtZimop(); bool HasSWShadowStack = MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack); if (!HasHWShadowStack && !HasSWShadowStack) @@ -186,7 +197,7 @@ static void emitSCSEpilogue(MachineFunction &MF, MachineBasicBlock &MBB, const RISCVInstrInfo *TII = STI.getInstrInfo(); if (HasHWShadowStack) { - BuildMI(MBB, MI, DL, TII->get(RISCV::SSPOPCHK)).addReg(RAReg); + BuildMI(MBB, MI, DL, TII->get(RISCV::PseudoMOP_SSPOPCHK)).addReg(RAReg); return; } diff --git a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp index 80a48c5ec11f..52dc53e4545e 100644 --- a/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp @@ -561,7 +561,7 @@ bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II) { EVL = Builder.CreateElementCount( Builder.getInt32Ty(), cast<VectorType>(DataType)->getElementCount()); - CallInst *Call; + Value *Call; if (!StoreVal) { Call = Builder.CreateIntrinsic( @@ -571,8 +571,7 @@ bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II) { // Merge llvm.masked.gather's passthru if (II->getIntrinsicID() == Intrinsic::masked_gather) - Call = Builder.CreateIntrinsic(Intrinsic::vp_select, {DataType}, - {Mask, Call, II->getArgOperand(3), EVL}); + Call = Builder.CreateSelect(Mask, Call, II->getArgOperand(3)); } else Call = Builder.CreateIntrinsic( Intrinsic::experimental_vp_strided_store, diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index f9f35f66319b..c7f15415ebb9 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -819,49 +819,6 @@ bool RISCVDAGToDAGISel::trySignedBitfieldInsertInSign(SDNode *Node) { return false; } -// (xor X, (and (xor X, C1), C2)) -// -> (qc.insbi X, (C1 >> ShAmt), Width, ShAmt) -// where C2 is a shifted mask with width=Width and shift=ShAmt -bool RISCVDAGToDAGISel::tryBitfieldInsertOpFromXor(SDNode *Node) { - - if (!Subtarget->hasVendorXqcibm()) - return false; - - using namespace SDPatternMatch; - - SDValue X; - APInt CImm, CMask; - if (!sd_match( - Node, - m_Xor(m_Value(X), - m_OneUse(m_And(m_OneUse(m_Xor(m_Deferred(X), m_ConstInt(CImm))), - m_ConstInt(CMask)))))) - return false; - - unsigned Width, ShAmt; - if (!CMask.isShiftedMask(ShAmt, Width)) - return false; - - int64_t Imm = CImm.getSExtValue(); - Imm >>= ShAmt; - - SDLoc DL(Node); - SDValue ImmNode; - auto Opc = RISCV::QC_INSB; - - if (isInt<5>(Imm)) { - Opc = RISCV::QC_INSBI; - ImmNode = CurDAG->getSignedTargetConstant(Imm, DL, MVT::i32); - } else { - ImmNode = selectImm(CurDAG, DL, MVT::i32, Imm, *Subtarget); - } - SDValue Ops[] = {X, ImmNode, CurDAG->getTargetConstant(Width, DL, MVT::i32), - CurDAG->getTargetConstant(ShAmt, DL, MVT::i32)}; - ReplaceNode(Node, CurDAG->getMachineNode(Opc, DL, MVT::i32, Ops)); - - return true; -} - bool RISCVDAGToDAGISel::tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, @@ -1095,7 +1052,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { SDLoc DL(Node); MVT VT = Node->getSimpleValueType(0); - bool HasBitTest = Subtarget->hasStdExtZbs() || Subtarget->hasVendorXTHeadBs(); + bool HasBitTest = Subtarget->hasBEXTILike(); switch (Opcode) { case ISD::Constant: { @@ -1442,9 +1399,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) { if (tryShrinkShlLogicImm(Node)) return; - if (tryBitfieldInsertOpFromXor(Node)) - return; - break; case ISD::AND: { auto *N1C = dyn_cast<ConstantSDNode>(Node->getOperand(1)); @@ -2951,6 +2905,65 @@ static bool isWorthFoldingAdd(SDValue Add) { return true; } +bool isRegImmLoadOrStore(SDNode *User, SDValue Add) { + switch (User->getOpcode()) { + default: + return false; + case ISD::LOAD: + case RISCVISD::LD_RV32: + case ISD::ATOMIC_LOAD: + break; + case ISD::STORE: + // Don't allow stores of Add. It must only be used as the address. + if (cast<StoreSDNode>(User)->getValue() == Add) + return false; + break; + case RISCVISD::SD_RV32: + // Don't allow stores of Add. It must only be used as the address. + if (User->getOperand(0) == Add || User->getOperand(1) == Add) + return false; + break; + case ISD::ATOMIC_STORE: + // Don't allow stores of Add. It must only be used as the address. + if (cast<AtomicSDNode>(User)->getVal() == Add) + return false; + break; + } + + return true; +} + +// To prevent SelectAddrRegImm from folding offsets that conflict with the +// fusion of PseudoMovAddr, check if the offset of every use of a given address +// is within the alignment. +bool RISCVDAGToDAGISel::areOffsetsWithinAlignment(SDValue Addr, + Align Alignment) { + assert(Addr->getOpcode() == RISCVISD::ADD_LO); + for (auto *User : Addr->users()) { + // If the user is a load or store, then the offset is 0 which is always + // within alignment. + if (isRegImmLoadOrStore(User, Addr)) + continue; + + if (CurDAG->isBaseWithConstantOffset(SDValue(User, 0))) { + int64_t CVal = cast<ConstantSDNode>(User->getOperand(1))->getSExtValue(); + if (!isInt<12>(CVal) || Alignment <= CVal) + return false; + + // Make sure all uses are foldable load/stores. + for (auto *AddUser : User->users()) + if (!isRegImmLoadOrStore(AddUser, SDValue(User, 0))) + return false; + + continue; + } + + return false; + } + + return true; +} + bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset) { if (SelectAddrFrameIndex(Addr, Base, Offset)) @@ -2960,9 +2973,21 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, MVT VT = Addr.getSimpleValueType(); if (Addr.getOpcode() == RISCVISD::ADD_LO) { - Base = Addr.getOperand(0); - Offset = Addr.getOperand(1); - return true; + bool CanFold = true; + // Unconditionally fold if operand 1 is not a global address (e.g. + // externsymbol) + if (auto *GA = dyn_cast<GlobalAddressSDNode>(Addr.getOperand(1))) { + const DataLayout &DL = CurDAG->getDataLayout(); + Align Alignment = commonAlignment( + GA->getGlobal()->getPointerAlignment(DL), GA->getOffset()); + if (!areOffsetsWithinAlignment(Addr, Alignment)) + CanFold = false; + } + if (CanFold) { + Base = Addr.getOperand(0); + Offset = Addr.getOperand(1); + return true; + } } if (CurDAG->isBaseWithConstantOffset(Addr)) { @@ -2980,7 +3005,8 @@ bool RISCVDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base, const DataLayout &DL = CurDAG->getDataLayout(); Align Alignment = commonAlignment( GA->getGlobal()->getPointerAlignment(DL), GA->getOffset()); - if ((CVal == 0 || Alignment > CVal)) { + if ((CVal == 0 || Alignment > CVal) && + areOffsetsWithinAlignment(Base, Alignment)) { int64_t CombinedOffset = CVal + GA->getOffset(); Base = Base.getOperand(0); Offset = CurDAG->getTargetGlobalAddress( @@ -3983,6 +4009,15 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits, if (Use.getOperandNo() == 0 && Bits >= 32) break; return false; + case RISCV::TH_EXT: + case RISCV::TH_EXTU: { + unsigned Msb = User->getConstantOperandVal(1); + unsigned Lsb = User->getConstantOperandVal(2); + // Behavior of Msb < Lsb is not well documented. + if (Msb >= Lsb && Bits > Msb) + break; + return false; + } } } diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h index c329a4c6ec62..cf2f763abc06 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h @@ -45,6 +45,8 @@ public: InlineAsm::ConstraintCode ConstraintID, std::vector<SDValue> &OutOps) override; + bool areOffsetsWithinAlignment(SDValue Addr, Align Alignment); + bool SelectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset); bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset); bool SelectAddrRegImm9(SDValue Addr, SDValue &Base, SDValue &Offset); @@ -75,7 +77,6 @@ public: bool trySignedBitfieldExtract(SDNode *Node); bool trySignedBitfieldInsertInSign(SDNode *Node); bool trySignedBitfieldInsertInMask(SDNode *Node); - bool tryBitfieldInsertOpFromXor(SDNode *Node); bool tryBitfieldInsertOpFromOrAndImm(SDNode *Node); bool tryUnsignedBitfieldExtract(SDNode *Node, const SDLoc &DL, MVT VT, SDValue X, unsigned Msb, unsigned Lsb); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index a33224845e2b..a68a3c14dc41 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -2173,7 +2173,7 @@ bool RISCVTargetLowering::isMaskAndCmp0FoldingBeneficial( // on the basis that it's possible the sinking+duplication of the AND in // CodeGenPrepare triggered by this hook wouldn't decrease the instruction // count and would increase code size (e.g. ANDI+BNEZ => BEXTI+BNEZ). - if (!Subtarget.hasStdExtZbs() && !Subtarget.hasVendorXTHeadBs()) + if (!Subtarget.hasBEXTILike()) return false; ConstantInt *Mask = dyn_cast<ConstantInt>(AndI.getOperand(1)); if (!Mask) @@ -3744,9 +3744,11 @@ static SDValue matchSplatAsGather(SDValue SplatVal, MVT VT, const SDLoc &DL, // different // FIXME: Support i1 vectors, maybe by promoting to i8? MVT EltTy = VT.getVectorElementType(); + if (EltTy == MVT::i1 || + !DAG.getTargetLoweringInfo().isTypeLegal(Src.getValueType())) + return SDValue(); MVT SrcVT = Src.getSimpleValueType(); - if (EltTy == MVT::i1 || EltTy != SrcVT.getVectorElementType() || - !DAG.getTargetLoweringInfo().isTypeLegal(SrcVT)) + if (EltTy != SrcVT.getVectorElementType()) return SDValue(); SDValue Idx = SplatVal.getOperand(1); // The index must be a legal type. @@ -4518,41 +4520,104 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC; + // General case: splat the first operand and slide other operands down one + // by one to form a vector. Alternatively, if every operand is an + // extraction from element 0 of a vector, we use that vector from the last + // extraction as the start value and slide up instead of slide down. Such that + // (1) we can avoid the initial splat (2) we can turn those vslide1up into + // vslideup of 1 later and eliminate the vector to scalar movement, which is + // something we cannot do with vslide1down/vslidedown. + // Of course, using vslide1up/vslideup might increase the register pressure, + // and that's why we conservatively limit to cases where every operand is an + // extraction from the first element. + SmallVector<SDValue> Operands(Op->op_begin(), Op->op_end()); + SDValue EVec; + bool SlideUp = false; + auto getVSlide = [&](EVT ContainerVT, SDValue Passthru, SDValue Vec, + SDValue Offset, SDValue Mask, SDValue VL) -> SDValue { + if (SlideUp) + return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset, + Mask, VL, Policy); + return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset, + Mask, VL, Policy); + }; + + // The reason we don't use all_of here is because we're also capturing EVec + // from the last non-undef operand. If the std::execution_policy of the + // underlying std::all_of is anything but std::sequenced_policy we might + // capture the wrong EVec. + for (SDValue V : Operands) { + using namespace SDPatternMatch; + SlideUp = V.isUndef() || sd_match(V, m_ExtractElt(m_Value(EVec), m_Zero())); + if (!SlideUp) + break; + } + + if (SlideUp) { + MVT EVecContainerVT = EVec.getSimpleValueType(); + // Make sure the original vector has scalable vector type. + if (EVecContainerVT.isFixedLengthVector()) { + EVecContainerVT = + getContainerForFixedLengthVector(DAG, EVecContainerVT, Subtarget); + EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget); + } + + // Adapt EVec's type into ContainerVT. + if (EVecContainerVT.getVectorMinNumElements() < + ContainerVT.getVectorMinNumElements()) + EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0); + else + EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0); + + // Reverse the elements as we're going to slide up from the last element. + std::reverse(Operands.begin(), Operands.end()); + } + SDValue Vec; UndefCount = 0; - for (SDValue V : Op->ops()) { + for (SDValue V : Operands) { if (V.isUndef()) { UndefCount++; continue; } - // Start our sequence with a TA splat in the hopes that hardware is able to - // recognize there's no dependency on the prior value of our temporary - // register. + // Start our sequence with either a TA splat or extract source in the + // hopes that hardware is able to recognize there's no dependency on the + // prior value of our temporary register. if (!Vec) { - Vec = DAG.getSplatVector(VT, DL, V); - Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); + if (SlideUp) { + Vec = EVec; + } else { + Vec = DAG.getSplatVector(VT, DL, V); + Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); + } + UndefCount = 0; continue; } if (UndefCount) { const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT()); - Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), - Vec, Offset, Mask, VL, Policy); + Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask, + VL); UndefCount = 0; } - auto OpCode = - VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL; + + unsigned Opcode; + if (VT.isFloatingPoint()) + Opcode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL; + else + Opcode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL; + if (!VT.isFloatingPoint()) V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V); - Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, + Vec = DAG.getNode(Opcode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, V, Mask, VL); } if (UndefCount) { const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT()); - Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), - Vec, Offset, Mask, VL, Policy); + Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask, + VL); } return convertFromScalableVector(VT, Vec, DAG, Subtarget); } @@ -8193,6 +8258,13 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, DL, VT, LHS, DAG.getSignedConstant(Imm + 1, DL, OpVT), CCVal); return DAG.getLogicalNOT(DL, SetCC, VT); } + // Lower (setugt X, 2047) as (setne (srl X, 11), 0). + if (CCVal == ISD::SETUGT && Imm == 2047) { + SDValue Shift = DAG.getNode(ISD::SRL, DL, OpVT, LHS, + DAG.getShiftAmountConstant(11, OpVT, DL)); + return DAG.getSetCC(DL, VT, Shift, DAG.getConstant(0, DL, OpVT), + ISD::SETNE); + } } // Not a constant we could handle, swap the operands and condition code to @@ -8815,7 +8887,15 @@ SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG, reportFatalUsageError("Unsupported code model for lowering"); case CodeModel::Small: { // Generate a sequence for accessing addresses within the first 2 GiB of - // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)). + // address space. + if (Subtarget.hasVendorXqcili()) { + // Use QC.E.LI to generate the address, as this is easier to relax than + // LUI/ADDI. + SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0); + return DAG.getNode(RISCVISD::QC_E_LI, DL, Ty, Addr); + } + + // This generates the pattern (addi (lui %hi(sym)) %lo(sym)). SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI); SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO); SDValue MNHi = DAG.getNode(RISCVISD::HI, DL, Ty, AddrHi); @@ -9036,8 +9116,12 @@ static std::optional<bool> matchSetCC(SDValue LHS, SDValue RHS, return std::nullopt; } -static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, - const RISCVSubtarget &Subtarget) { +static bool isSimm12Constant(SDValue V) { + return isa<ConstantSDNode>(V) && V->getAsAPIntVal().isSignedIntN(12); +} + +static SDValue lowerSelectToBinOp(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { SDValue CondV = N->getOperand(0); SDValue TrueV = N->getOperand(1); SDValue FalseV = N->getOperand(2); @@ -9057,14 +9141,16 @@ static SDValue combineSelectToBinOp(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::OR, DL, VT, Neg, DAG.getFreeze(TrueV)); } + const bool HasCZero = VT.isScalarInteger() && Subtarget.hasCZEROLike(); + // (select c, 0, y) -> (c-1) & y - if (isNullConstant(TrueV)) { - SDValue Neg = DAG.getNode(ISD::ADD, DL, VT, CondV, - DAG.getAllOnesConstant(DL, VT)); + if (isNullConstant(TrueV) && (!HasCZero || isSimm12Constant(FalseV))) { + SDValue Neg = + DAG.getNode(ISD::ADD, DL, VT, CondV, DAG.getAllOnesConstant(DL, VT)); return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(FalseV)); } // (select c, y, 0) -> -c & y - if (isNullConstant(FalseV)) { + if (isNullConstant(FalseV) && (!HasCZero || isSimm12Constant(TrueV))) { SDValue Neg = DAG.getNegative(CondV, DL, VT); return DAG.getNode(ISD::AND, DL, VT, Neg, DAG.getFreeze(TrueV)); } @@ -9185,12 +9271,16 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::VSELECT, DL, VT, CondSplat, TrueV, FalseV); } + // Try some other optimizations before falling back to generic lowering. + if (SDValue V = lowerSelectToBinOp(Op.getNode(), DAG, Subtarget)) + return V; + // When Zicond or XVentanaCondOps is present, emit CZERO_EQZ and CZERO_NEZ // nodes to implement the SELECT. Performing the lowering here allows for // greater control over when CZERO_{EQZ/NEZ} are used vs another branchless // sequence or RISCVISD::SELECT_CC node (branch-based select). - if ((Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) && - VT.isScalarInteger()) { + if (Subtarget.hasCZEROLike() && VT.isScalarInteger()) { + // (select c, t, 0) -> (czero_eqz t, c) if (isNullConstant(FalseV)) return DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV); @@ -9244,10 +9334,6 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV)); } - // Try some other optimizations before falling back to generic lowering. - if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget)) - return V; - // (select c, c1, c2) -> (add (czero_nez c2 - c1, c), c1) // (select c, c1, c2) -> (add (czero_eqz c1 - c2, c), c2) if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV)) { @@ -9280,19 +9366,38 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { } } - const int TrueValCost = RISCVMatInt::getIntMatCost( - TrueVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true); - const int FalseValCost = RISCVMatInt::getIntMatCost( - FalseVal, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true); - bool IsCZERO_NEZ = TrueValCost <= FalseValCost; + // Use SHL/ADDI (and possible XORI) to avoid having to materialize + // a constant in register + if ((TrueVal - FalseVal).isPowerOf2() && FalseVal.isSignedIntN(12)) { + SDValue Log2 = DAG.getConstant((TrueVal - FalseVal).logBase2(), DL, VT); + SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2); + return DAG.getNode(ISD::ADD, DL, VT, FalseV, BitDiff); + } + if ((FalseVal - TrueVal).isPowerOf2() && TrueVal.isSignedIntN(12)) { + SDValue Log2 = DAG.getConstant((FalseVal - TrueVal).logBase2(), DL, VT); + CondV = DAG.getLogicalNOT(DL, CondV, CondV->getValueType(0)); + SDValue BitDiff = DAG.getNode(ISD::SHL, DL, VT, CondV, Log2); + return DAG.getNode(ISD::ADD, DL, VT, TrueV, BitDiff); + } + + auto getCost = [&](const APInt &Delta, const APInt &Addend) { + const int DeltaCost = RISCVMatInt::getIntMatCost( + Delta, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true); + // Does the addend fold into an ADDI + if (Addend.isSignedIntN(12)) + return DeltaCost; + const int AddendCost = RISCVMatInt::getIntMatCost( + Addend, Subtarget.getXLen(), Subtarget, /*CompressionCost=*/true); + return AddendCost + DeltaCost; + }; + bool IsCZERO_NEZ = getCost(FalseVal - TrueVal, TrueVal) <= + getCost(TrueVal - FalseVal, FalseVal); SDValue LHSVal = DAG.getConstant( IsCZERO_NEZ ? FalseVal - TrueVal : TrueVal - FalseVal, DL, VT); - SDValue RHSVal = - DAG.getConstant(IsCZERO_NEZ ? TrueVal : FalseVal, DL, VT); SDValue CMOV = DAG.getNode(IsCZERO_NEZ ? RISCVISD::CZERO_NEZ : RISCVISD::CZERO_EQZ, DL, VT, LHSVal, CondV); - return DAG.getNode(ISD::ADD, DL, VT, CMOV, RHSVal); + return DAG.getNode(ISD::ADD, DL, VT, CMOV, IsCZERO_NEZ ? TrueV : FalseV); } // (select c, c1, t) -> (add (czero_nez t - c1, c), c1) @@ -9327,12 +9432,10 @@ SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode( ISD::OR, DL, VT, DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV, CondV), - DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV)); + DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV, CondV), + SDNodeFlags::Disjoint); } - if (SDValue V = combineSelectToBinOp(Op.getNode(), DAG, Subtarget)) - return V; - if (Op.hasOneUse()) { unsigned UseOpc = Op->user_begin()->getOpcode(); if (isBinOp(UseOpc) && DAG.isSafeToSpeculativelyExecute(UseOpc)) { @@ -10738,11 +10841,11 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1)); } case Intrinsic::riscv_mopr: - return DAG.getNode(RISCVISD::MOPR, DL, XLenVT, Op.getOperand(1), + return DAG.getNode(RISCVISD::MOP_R, DL, XLenVT, Op.getOperand(1), Op.getOperand(2)); case Intrinsic::riscv_moprr: { - return DAG.getNode(RISCVISD::MOPRR, DL, XLenVT, Op.getOperand(1), + return DAG.getNode(RISCVISD::MOP_RR, DL, XLenVT, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); } case Intrinsic::riscv_clmul: @@ -14877,7 +14980,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, SDValue NewOp = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1)); SDValue Res = DAG.getNode( - RISCVISD::MOPR, DL, MVT::i64, NewOp, + RISCVISD::MOP_R, DL, MVT::i64, NewOp, DAG.getTargetConstant(N->getConstantOperandVal(2), DL, MVT::i64)); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); return; @@ -14890,7 +14993,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N, SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2)); SDValue Res = DAG.getNode( - RISCVISD::MOPRR, DL, MVT::i64, NewOp0, NewOp1, + RISCVISD::MOP_RR, DL, MVT::i64, NewOp0, NewOp1, DAG.getTargetConstant(N->getConstantOperandVal(3), DL, MVT::i64)); Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res)); return; @@ -15381,9 +15484,7 @@ static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, if (!Subtarget.hasConditionalMoveFusion()) { // (select cond, x, (and x, c)) has custom lowering with Zicond. - if ((!Subtarget.hasStdExtZicond() && - !Subtarget.hasVendorXVentanaCondOps()) || - N->getOpcode() != ISD::AND) + if (!Subtarget.hasCZEROLike() || N->getOpcode() != ISD::AND) return SDValue(); // Maybe harmful when condition code has multiple use. @@ -16059,12 +16160,55 @@ static SDValue combineOrOfCZERO(SDNode *N, SDValue N0, SDValue N1, SDValue NewN0 = DAG.getNode(RISCVISD::CZERO_EQZ, DL, VT, TrueV.getOperand(0), Cond); - SDValue NewN1 = DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), - Cond); - SDValue NewOr = DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1); + SDValue NewN1 = + DAG.getNode(RISCVISD::CZERO_NEZ, DL, VT, FalseV.getOperand(0), Cond); + SDValue NewOr = + DAG.getNode(ISD::OR, DL, VT, NewN0, NewN1, SDNodeFlags::Disjoint); return DAG.getNode(ISD::XOR, DL, VT, NewOr, TrueV.getOperand(1)); } +// (xor X, (xor (and X, C2), Y)) +// ->(qc_insb X, (sra Y, ShAmt), Width, ShAmt) +// where C2 is a shifted mask with width = Width and shift = ShAmt +// qc_insb might become qc.insb or qc.insbi depending on the operands. +static SDValue combineXorToBitfieldInsert(SDNode *N, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + if (!Subtarget.hasVendorXqcibm()) + return SDValue(); + + using namespace SDPatternMatch; + + SDValue Base, Inserted; + APInt CMask; + if (!sd_match(N, m_Xor(m_Value(Base), + m_OneUse(m_Xor(m_OneUse(m_And(m_Deferred(Base), + m_ConstInt(CMask))), + m_Value(Inserted)))))) + return SDValue(); + + if (N->getValueType(0) != MVT::i32) + return SDValue(); + + unsigned Width, ShAmt; + if (!CMask.isShiftedMask(ShAmt, Width)) + return SDValue(); + + // Check if all zero bits in CMask are also zero in Inserted + if (!DAG.MaskedValueIsZero(Inserted, ~CMask)) + return SDValue(); + + SDLoc DL(N); + + // `Inserted` needs to be right shifted before it is put into the + // instruction. + Inserted = DAG.getNode(ISD::SRA, DL, MVT::i32, Inserted, + DAG.getShiftAmountConstant(ShAmt, MVT::i32, DL)); + + SDValue Ops[] = {Base, Inserted, DAG.getConstant(Width, DL, MVT::i32), + DAG.getConstant(ShAmt, DL, MVT::i32)}; + return DAG.getNode(RISCVISD::QC_INSB, DL, MVT::i32, Ops); +} + static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget) { SelectionDAG &DAG = DCI.DAG; @@ -16108,8 +16252,8 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N0.getOperand(0)); SDValue Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N0.getOperand(1)); SDValue Shl = DAG.getNode(ISD::SHL, DL, MVT::i64, Op0, Op1); - SDValue And = DAG.getNOT(DL, Shl, MVT::i64); - return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, And); + SDValue Not = DAG.getNOT(DL, Shl, MVT::i64); + return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Not); } // fold (xor (sllw 1, x), -1) -> (rolw ~1, x) @@ -16137,6 +16281,9 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG, } } + if (SDValue V = combineXorToBitfieldInsert(N, DAG, Subtarget)) + return V; + if (SDValue V = combineBinOpToReduce(N, DAG, Subtarget)) return V; if (SDValue V = combineBinOpOfExtractToReduceTree(N, DAG, Subtarget)) @@ -16590,10 +16737,6 @@ combineVectorSizedSetCCEquality(EVT VT, SDValue X, SDValue Y, ISD::CondCode CC, DAG.getConstant(0, DL, XLenVT), CC); } -// Replace (seteq (i64 (and X, 0xffffffff)), C1) with -// (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from -// bit 31. Same for setne. C1' may be cheaper to materialize and the sext_inreg -// can become a sext.w instead of a shift pair. static SDValue performSETCCCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const RISCVSubtarget &Subtarget) { @@ -16613,20 +16756,44 @@ static SDValue performSETCCCombine(SDNode *N, combineVectorSizedSetCCEquality(VT, N0, N1, Cond, dl, DAG, Subtarget)) return V; - // (X & -4096) == 0 -> (X >> 12) == 0 if the AND constant can't use ANDI. - if (DCI.isAfterLegalizeDAG() && isNullConstant(N1) && + if (DCI.isAfterLegalizeDAG() && isa<ConstantSDNode>(N1) && N0.getOpcode() == ISD::AND && N0.hasOneUse() && isa<ConstantSDNode>(N0.getOperand(1))) { - const APInt &AndRHSC = - cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); - if (!isInt<12>(AndRHSC.getSExtValue()) && AndRHSC.isNegatedPowerOf2()) { + const APInt &AndRHSC = N0.getConstantOperandAPInt(1); + // (X & -(1 << C)) == 0 -> (X >> C) == 0 if the AND constant can't use ANDI. + if (isNullConstant(N1) && !isInt<12>(AndRHSC.getSExtValue()) && + AndRHSC.isNegatedPowerOf2()) { unsigned ShiftBits = AndRHSC.countr_zero(); - SDValue Shift = DAG.getNode(ISD::SRL, dl, VT, N0.getOperand(0), - DAG.getConstant(ShiftBits, dl, VT)); + SDValue Shift = DAG.getNode(ISD::SRL, dl, OpVT, N0.getOperand(0), + DAG.getConstant(ShiftBits, dl, OpVT)); return DAG.getSetCC(dl, VT, Shift, N1, Cond); } + + // Similar to above but handling the lower 32 bits by using sraiw. Allow + // comparing with constants other than 0 if the constant can be folded into + // addi or xori after shifting. + uint64_t N1Int = cast<ConstantSDNode>(N1)->getZExtValue(); + uint64_t AndRHSInt = AndRHSC.getZExtValue(); + if (OpVT == MVT::i64 && AndRHSInt <= 0xffffffff && + isPowerOf2_32(-uint32_t(AndRHSInt)) && (N1Int & AndRHSInt) == N1Int) { + unsigned ShiftBits = llvm::countr_zero(AndRHSInt); + int64_t NewC = SignExtend64<32>(N1Int) >> ShiftBits; + if (NewC >= -2048 && NewC <= 2048) { + SDValue SExt = + DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, OpVT, N0.getOperand(0), + DAG.getValueType(MVT::i32)); + SDValue Shift = DAG.getNode(ISD::SRA, dl, OpVT, SExt, + DAG.getConstant(ShiftBits, dl, OpVT)); + return DAG.getSetCC(dl, VT, Shift, + DAG.getSignedConstant(NewC, dl, OpVT), Cond); + } + } } + // Replace (seteq (i64 (and X, 0xffffffff)), C1) with + // (seteq (i64 (sext_inreg (X, i32)), C1')) where C1' is C1 sign extended from + // bit 31. Same for setne. C1' may be cheaper to materialize and the + // sext_inreg can become a sext.w instead of a shift pair. if (OpVT != MVT::i64 || !Subtarget.is64Bit()) return SDValue(); @@ -18674,7 +18841,7 @@ static SDValue tryFoldSelectIntoOp(SDNode *N, SelectionDAG &DAG, break; } - if (!TrueVal.hasOneUse() || isa<ConstantSDNode>(FalseVal)) + if (!TrueVal.hasOneUse()) return SDValue(); unsigned OpToFold; @@ -18746,6 +18913,10 @@ static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) { if (Cond->getOperand(0) != CountZeroesArgument) return SDValue(); + unsigned BitWidth = CountZeroes.getValueSizeInBits(); + if (!isPowerOf2_32(BitWidth)) + return SDValue(); + if (CountZeroes.getOpcode() == ISD::CTTZ_ZERO_UNDEF) { CountZeroes = DAG.getNode(ISD::CTTZ, SDLoc(CountZeroes), CountZeroes.getValueType(), CountZeroesArgument); @@ -18754,7 +18925,6 @@ static SDValue foldSelectOfCTTZOrCTLZ(SDNode *N, SelectionDAG &DAG) { CountZeroes.getValueType(), CountZeroesArgument); } - unsigned BitWidth = CountZeroes.getValueSizeInBits(); SDValue BitWidthMinusOne = DAG.getConstant(BitWidth - 1, SDLoc(N), CountZeroes.getValueType()); @@ -18778,7 +18948,7 @@ static SDValue useInversedSetcc(SDNode *N, SelectionDAG &DAG, // Replace (setcc eq (and x, C)) with (setcc ne (and x, C))) to generate // BEXTI, where C is power of 2. if (Subtarget.hasStdExtZbs() && VT.isScalarInteger() && - (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps())) { + (Subtarget.hasCZEROLike() || Subtarget.hasVendorXTHeadCondMov())) { SDValue LHS = Cond.getOperand(0); SDValue RHS = Cond.getOperand(1); ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); @@ -18953,6 +19123,7 @@ static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL, SelectionDAG &DAG, const RISCVSubtarget &Subtarget, const RISCVTargetLowering &TLI) { + using namespace SDPatternMatch; // Note: We intentionally do not check the legality of the reduction type. // We want to handle the m4/m8 *src* types, and thus need to let illegal // intermediate types flow through here. @@ -18960,11 +19131,10 @@ static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL, !InVec.getValueType().getVectorElementCount().isKnownMultipleOf(4)) return SDValue(); - // Recurse through adds (since generic dag canonicalizes to that - // form). TODO: Handle disjoint or here. - if (InVec->getOpcode() == ISD::ADD) { - SDValue A = InVec.getOperand(0); - SDValue B = InVec.getOperand(1); + // Recurse through adds/disjoint ors (since generic dag canonicalizes to that + // form). + SDValue A, B; + if (sd_match(InVec, m_AddLike(m_Value(A), m_Value(B)))) { SDValue AOpt = foldReduceOperandViaVQDOT(A, DL, DAG, Subtarget, TLI); SDValue BOpt = foldReduceOperandViaVQDOT(B, DL, DAG, Subtarget, TLI); if (AOpt || BOpt) { @@ -19001,12 +19171,9 @@ static SDValue foldReduceOperandViaVQDOT(SDValue InVec, const SDLoc &DL, // mul (zext a, zext b) -> partial_reduce_umla 0, a, b // mul (sext a, zext b) -> partial_reduce_ssmla 0, a, b // mul (zext a, sext b) -> partial_reduce_smla 0, b, a (swapped) - if (InVec.getOpcode() != ISD::MUL) + if (!sd_match(InVec, m_Mul(m_Value(A), m_Value(B)))) return SDValue(); - SDValue A = InVec.getOperand(0); - SDValue B = InVec.getOperand(1); - if (!ISD::isExtOpcode(A.getOpcode())) return SDValue(); @@ -20081,6 +20248,17 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return V; break; case ISD::FMUL: { + using namespace SDPatternMatch; + SDLoc DL(N); + EVT VT = N->getValueType(0); + SDValue X, Y; + // InstCombine canonicalizes fneg (fmul x, y) -> fmul x, (fneg y), see + // hoistFNegAboveFMulFDiv. + // Undo this and sink the fneg so we match more fmsub/fnmadd patterns. + if (sd_match(N, m_FMul(m_Value(X), m_OneUse(m_FNeg(m_Value(Y)))))) + return DAG.getNode(ISD::FNEG, DL, VT, + DAG.getNode(ISD::FMUL, DL, VT, X, Y)); + // fmul X, (copysign 1.0, Y) -> fsgnjx X, Y SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -20091,13 +20269,12 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N0->getOperand(0)); if (!C || !C->getValueAPF().isExactlyValue(+1.0)) return SDValue(); - EVT VT = N->getValueType(0); if (VT.isVector() || !isOperationLegal(ISD::FCOPYSIGN, VT)) return SDValue(); SDValue Sign = N0->getOperand(1); if (Sign.getValueType() != VT) return SDValue(); - return DAG.getNode(RISCVISD::FSGNJX, SDLoc(N), VT, N1, N0->getOperand(1)); + return DAG.getNode(RISCVISD::FSGNJX, DL, VT, N1, N0->getOperand(1)); } case ISD::FADD: case ISD::UMAX: @@ -20381,9 +20558,9 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, VT, DL, MGN->getChain(), BasePtr, DAG.getSignedConstant(StepNumerator, DL, XLenVT), MGN->getMask(), EVL, MGN->getMemOperand()); - SDValue VPSelect = DAG.getNode(ISD::VP_SELECT, DL, VT, MGN->getMask(), - StridedLoad, MGN->getPassThru(), EVL); - return DAG.getMergeValues({VPSelect, SDValue(StridedLoad.getNode(), 1)}, + SDValue Select = DAG.getSelect(DL, VT, MGN->getMask(), StridedLoad, + MGN->getPassThru()); + return DAG.getMergeValues({Select, SDValue(StridedLoad.getNode(), 1)}, DL); } } @@ -21060,6 +21237,38 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N, return N->getOperand(0); break; } + case RISCVISD::VSLIDE1UP_VL: + case RISCVISD::VFSLIDE1UP_VL: { + using namespace SDPatternMatch; + SDValue SrcVec; + SDLoc DL(N); + MVT VT = N->getSimpleValueType(0); + // If the scalar we're sliding in was extracted from the first element of a + // vector, we can use that vector as the passthru in a normal slideup of 1. + // This saves us an extract_element instruction (i.e. vfmv.f.s, vmv.x.s). + if (!N->getOperand(0).isUndef() || + !sd_match(N->getOperand(2), + m_AnyOf(m_ExtractElt(m_Value(SrcVec), m_Zero()), + m_Node(RISCVISD::VMV_X_S, m_Value(SrcVec))))) + break; + + MVT SrcVecVT = SrcVec.getSimpleValueType(); + if (SrcVecVT.getVectorElementType() != VT.getVectorElementType()) + break; + // Adapt the value type of source vector. + if (SrcVecVT.isFixedLengthVector()) { + SrcVecVT = getContainerForFixedLengthVector(SrcVecVT); + SrcVec = convertToScalableVector(SrcVecVT, SrcVec, DAG, Subtarget); + } + if (SrcVecVT.getVectorMinNumElements() < VT.getVectorMinNumElements()) + SrcVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(VT), SrcVec, 0); + else + SrcVec = DAG.getExtractSubvector(DL, VT, SrcVec, 0); + + return getVSlideup(DAG, Subtarget, DL, VT, SrcVec, N->getOperand(1), + DAG.getConstant(1, DL, XLenVT), N->getOperand(3), + N->getOperand(4)); + } } return SDValue(); @@ -21120,9 +21329,14 @@ bool RISCVTargetLowering::isDesirableToCommuteWithShift( auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1)); auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)); - // Bail if we might break a sh{1,2,3}add pattern. - if ((Subtarget.hasStdExtZba() || Subtarget.hasVendorXAndesPerf()) && C2 && - C2->getZExtValue() >= 1 && C2->getZExtValue() <= 3 && N->hasOneUse() && + bool IsShXAdd = + (Subtarget.hasStdExtZba() || Subtarget.hasVendorXAndesPerf()) && C2 && + C2->getZExtValue() >= 1 && C2->getZExtValue() <= 3; + bool IsQCShlAdd = Subtarget.hasVendorXqciac() && C2 && + C2->getZExtValue() >= 4 && C2->getZExtValue() <= 31; + + // Bail if we might break a sh{1,2,3}add/qc.shladd pattern. + if ((IsShXAdd || IsQCShlAdd) && N->hasOneUse() && N->user_begin()->getOpcode() == ISD::ADD && !isUsedByLdSt(*N->user_begin(), nullptr) && !isa<ConstantSDNode>(N->user_begin()->getOperand(1))) @@ -21346,6 +21560,24 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known = Known.sext(BitWidth); break; } + case RISCVISD::SRLW: { + KnownBits Known2; + Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = KnownBits::lshr(Known.trunc(32), Known2.trunc(5).zext(32)); + // Restore the original width by sign extending. + Known = Known.sext(BitWidth); + break; + } + case RISCVISD::SRAW: { + KnownBits Known2; + Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); + Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); + Known = KnownBits::ashr(Known.trunc(32), Known2.trunc(5).zext(32)); + // Restore the original width by sign extending. + Known = Known.sext(BitWidth); + break; + } case RISCVISD::CTZW: { KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros(); @@ -21451,8 +21683,16 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( if (Tmp < 33) return 1; return 33; } + case RISCVISD::SRAW: { + unsigned Tmp = + DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); + // sraw produces at least 33 sign bits. If the input already has more than + // 33 sign bits sraw, will preserve them. + // TODO: A more precise answer could be calculated depending on known bits + // in the shift amount. + return std::max(Tmp, 33U); + } case RISCVISD::SLLW: - case RISCVISD::SRAW: case RISCVISD::SRLW: case RISCVISD::DIVW: case RISCVISD::DIVUW: @@ -21463,9 +21703,7 @@ unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode( case RISCVISD::FCVT_WU_RV64: case RISCVISD::STRICT_FCVT_W_RV64: case RISCVISD::STRICT_FCVT_WU_RV64: - // TODO: As the result is sign-extended, this is conservatively correct. A - // more precise answer could be calculated for SRAW depending on known - // bits in the shift amount. + // TODO: As the result is sign-extended, this is conservatively correct. return 33; case RISCVISD::VMV_X_S: { // The number of sign bits of the scalar result is computed by obtaining the @@ -21548,6 +21786,14 @@ bool RISCVTargetLowering::canCreateUndefOrPoisonForTargetNode( // TODO: Add more target nodes. switch (Op.getOpcode()) { + case RISCVISD::SLLW: + case RISCVISD::SRAW: + case RISCVISD::SRLW: + case RISCVISD::RORW: + case RISCVISD::ROLW: + // Only the lower 5 bits of RHS are read, guaranteeing the rotate/shift + // amount is bounds. + return false; case RISCVISD::SELECT_CC: // Integer comparisons cannot create poison. assert(Op.getOperand(0).getValueType().isInteger() && @@ -24683,7 +24929,7 @@ RISCVTargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, bool RISCVTargetLowering::shouldFoldSelectWithSingleBitTest( EVT VT, const APInt &AndMask) const { - if (Subtarget.hasStdExtZicond() || Subtarget.hasVendorXVentanaCondOps()) + if (Subtarget.hasCZEROLike()) return !Subtarget.hasStdExtZbs() && AndMask.ugt(1024); return TargetLowering::shouldFoldSelectWithSingleBitTest(VT, AndMask); } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index fb63ebcfaace..4581c11356af 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -435,8 +435,8 @@ public: const APInt &GapMask) const override; bool lowerInterleavedStore(Instruction *Store, Value *Mask, - ShuffleVectorInst *SVI, - unsigned Factor) const override; + ShuffleVectorInst *SVI, unsigned Factor, + const APInt &GapMask) const override; bool lowerDeinterleaveIntrinsicToLoad(Instruction *Load, Value *Mask, IntrinsicInst *DI) const override; diff --git a/llvm/lib/Target/RISCV/RISCVIndirectBranchTracking.cpp b/llvm/lib/Target/RISCV/RISCVIndirectBranchTracking.cpp index 43621b8f0f33..9664ab345dcb 100644 --- a/llvm/lib/Target/RISCV/RISCVIndirectBranchTracking.cpp +++ b/llvm/lib/Target/RISCV/RISCVIndirectBranchTracking.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // -// The pass adds LPAD (AUIPC with rs1 = X0) machine instructions at the +// The pass adds LPAD (AUIPC with rd = X0) machine instructions at the // beginning of each basic block or function that is referenced by an indirect // jump/call instruction. // diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td b/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td index 209c3fae63f4..4c7cd05723ac 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormatsC.td @@ -54,7 +54,6 @@ class RVInst16CSS<bits<3> funct3, bits<2> opcode, dag outs, dag ins, : RVInst16<outs, ins, opcodestr, argstr, [], InstFormatCSS> { bits<10> imm; bits<5> rs2; - bits<5> rs1; let Inst{15-13} = funct3; let Inst{12-7} = imm{5-0}; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 7b4a1de16769..d0bb57a3eaa1 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -80,8 +80,8 @@ namespace llvm::RISCV { } // end namespace llvm::RISCV -RISCVInstrInfo::RISCVInstrInfo(RISCVSubtarget &STI) - : RISCVGenInstrInfo(RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP), +RISCVInstrInfo::RISCVInstrInfo(const RISCVSubtarget &STI) + : RISCVGenInstrInfo(STI, RISCV::ADJCALLSTACKDOWN, RISCV::ADJCALLSTACKUP), STI(STI) {} #define GET_INSTRINFO_HELPERS @@ -3511,6 +3511,9 @@ RISCVInstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI, return outliner::InstrType::Illegal; } + if (isLPAD(MI)) + return outliner::InstrType::Illegal; + return outliner::InstrType::Legal; } @@ -4796,8 +4799,22 @@ unsigned RISCV::getDestLog2EEW(const MCInstrDesc &Desc, unsigned Log2SEW) { return Scaled; } -/// Given two VL operands, do we know that LHS <= RHS? +static std::optional<int64_t> getEffectiveImm(const MachineOperand &MO) { + assert(MO.isImm() || MO.getReg().isVirtual()); + if (MO.isImm()) + return MO.getImm(); + const MachineInstr *Def = + MO.getParent()->getMF()->getRegInfo().getVRegDef(MO.getReg()); + int64_t Imm; + if (isLoadImm(Def, Imm)) + return Imm; + return std::nullopt; +} + +/// Given two VL operands, do we know that LHS <= RHS? Must be used in SSA form. bool RISCV::isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS) { + assert((LHS.isImm() || LHS.getParent()->getMF()->getRegInfo().isSSA()) && + (RHS.isImm() || RHS.getParent()->getMF()->getRegInfo().isSSA())); if (LHS.isReg() && RHS.isReg() && LHS.getReg().isVirtual() && LHS.getReg() == RHS.getReg()) return true; @@ -4807,9 +4824,11 @@ bool RISCV::isVLKnownLE(const MachineOperand &LHS, const MachineOperand &RHS) { return true; if (LHS.isImm() && LHS.getImm() == RISCV::VLMaxSentinel) return false; - if (!LHS.isImm() || !RHS.isImm()) + std::optional<int64_t> LHSImm = getEffectiveImm(LHS), + RHSImm = getEffectiveImm(RHS); + if (!LHSImm || !RHSImm) return false; - return LHS.getImm() <= RHS.getImm(); + return LHSImm <= RHSImm; } namespace { diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 785c8352d4a5..57ec431749eb 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -62,7 +62,7 @@ enum RISCVMachineCombinerPattern : unsigned { class RISCVInstrInfo : public RISCVGenInstrInfo { public: - explicit RISCVInstrInfo(RISCVSubtarget &STI); + explicit RISCVInstrInfo(const RISCVSubtarget &STI); MCInst getNop() const override; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 23f5a848137c..92552b36aa0b 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -1698,8 +1698,6 @@ let Predicates = [IsRV32] in { def : Pat<(i32 (setlt (i32 GPR:$rs1), 0)), (SRLI GPR:$rs1, 31)>; // compressible } let Predicates = [IsRV64] in { -def : Pat<(i64 (seteq (i64 (and GPR:$rs1, 0x0000000080000000)), 0)), - (XORI (i64 (SRLIW GPR:$rs1, 31)), 1)>; def : Pat<(i64 (setlt (i64 GPR:$rs1), 0)), (SRLI GPR:$rs1, 63)>; // compressible def : Pat<(i64 (setlt (sext_inreg GPR:$rs1, i32), 0)), (SRLIW GPR:$rs1, 31)>; } @@ -2330,7 +2328,6 @@ include "RISCVInstrInfoZalasr.td" include "RISCVInstrInfoZimop.td" include "RISCVInstrInfoZicbo.td" include "RISCVInstrInfoZicond.td" -include "RISCVInstrInfoZicfiss.td" include "RISCVInstrInfoZilsd.td" // Scalar FP @@ -2359,6 +2356,9 @@ include "RISCVInstrInfoZc.td" include "RISCVInstrInfoZcmop.td" include "RISCVInstrInfoZclsd.td" +// Control Flow Integriy, this requires Zimop/Zcmop +include "RISCVInstrInfoZicfiss.td" + // Short Forward Branch include "RISCVInstrInfoSFB.td" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td index c5551fbdec28..9fc73662d970 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td @@ -230,13 +230,17 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in class CStackLoad<bits<3> funct3, string OpcodeStr, DAGOperand cls, DAGOperand opnd> : RVInst16CI<funct3, 0b10, (outs cls:$rd), (ins SPMem:$rs1, opnd:$imm), - OpcodeStr, "$rd, ${imm}(${rs1})">; + OpcodeStr, "$rd, ${imm}(${rs1})"> { + bits<0> rs1; +} let hasSideEffects = 0, mayLoad = 0, mayStore = 1 in class CStackStore<bits<3> funct3, string OpcodeStr, DAGOperand cls, DAGOperand opnd> : RVInst16CSS<funct3, 0b10, (outs), (ins cls:$rs2, SPMem:$rs1, opnd:$imm), - OpcodeStr, "$rs2, ${imm}(${rs1})">; + OpcodeStr, "$rs2, ${imm}(${rs1})"> { + bits<0> rs1; +} let hasSideEffects = 0, mayLoad = 1, mayStore = 0 in class CLoad_ri<bits<3> funct3, string OpcodeStr, @@ -301,14 +305,6 @@ def C_ADDI4SPN : RVInst16CIW<0b000, 0b00, (outs GPRC:$rd), let Inst{5} = imm{3}; } -let Predicates = [HasStdExtCOrZcd, HasStdExtD] in -def C_FLD : CLoad_ri<0b001, "c.fld", FPR64C, uimm8_lsb000>, - Sched<[WriteFLD64, ReadFMemBase]> { - bits<8> imm; - let Inst{12-10} = imm{5-3}; - let Inst{6-5} = imm{7-6}; -} - def C_LW : CLoad_ri<0b010, "c.lw", GPRC, uimm7_lsb00>, Sched<[WriteLDW, ReadMemBase]> { bits<7> imm; @@ -326,16 +322,6 @@ def C_LW_INX : CLoad_ri<0b010, "c.lw", GPRF32C, uimm7_lsb00>, let Inst{5} = imm{6}; } -let DecoderNamespace = "RV32Only", - Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in -def C_FLW : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00>, - Sched<[WriteFLD32, ReadFMemBase]> { - bits<7> imm; - let Inst{12-10} = imm{5-3}; - let Inst{6} = imm{2}; - let Inst{5} = imm{6}; -} - let Predicates = [HasStdExtZca, IsRV64] in def C_LD : CLoad_ri<0b011, "c.ld", GPRC, uimm8_lsb000>, Sched<[WriteLDD, ReadMemBase]> { @@ -344,14 +330,6 @@ def C_LD : CLoad_ri<0b011, "c.ld", GPRC, uimm8_lsb000>, let Inst{6-5} = imm{7-6}; } -let Predicates = [HasStdExtCOrZcd, HasStdExtD] in -def C_FSD : CStore_rri<0b101, "c.fsd", FPR64C, uimm8_lsb000>, - Sched<[WriteFST64, ReadFStoreData, ReadFMemBase]> { - bits<8> imm; - let Inst{12-10} = imm{5-3}; - let Inst{6-5} = imm{7-6}; -} - def C_SW : CStore_rri<0b110, "c.sw", GPRC, uimm7_lsb00>, Sched<[WriteSTW, ReadStoreData, ReadMemBase]> { bits<7> imm; @@ -369,16 +347,6 @@ def C_SW_INX : CStore_rri<0b110, "c.sw", GPRF32C, uimm7_lsb00>, let Inst{5} = imm{6}; } -let DecoderNamespace = "RV32Only", - Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in -def C_FSW : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00>, - Sched<[WriteFST32, ReadFStoreData, ReadFMemBase]> { - bits<7> imm; - let Inst{12-10} = imm{5-3}; - let Inst{6} = imm{2}; - let Inst{5} = imm{6}; -} - let Predicates = [HasStdExtZca, IsRV64] in def C_SD : CStore_rri<0b111, "c.sd", GPRC, uimm8_lsb000>, Sched<[WriteSTD, ReadStoreData, ReadMemBase]> { @@ -500,12 +468,6 @@ def C_SLLI : RVInst16CI<0b000, 0b10, (outs GPR:$rd_wb), let Constraints = "$rd = $rd_wb"; } -let Predicates = [HasStdExtCOrZcd, HasStdExtD] in -def C_FLDSP : CStackLoad<0b001, "c.fldsp", FPR64, uimm9_lsb000>, - Sched<[WriteFLD64, ReadFMemBase]> { - let Inst{4-2} = imm{8-6}; -} - def C_LWSP : CStackLoad<0b010, "c.lwsp", GPRNoX0, uimm8_lsb00>, Sched<[WriteLDW, ReadMemBase]> { let Inst{3-2} = imm{7-6}; @@ -517,13 +479,6 @@ def C_LWSP_INX : CStackLoad<0b010, "c.lwsp", GPRF32NoX0, uimm8_lsb00>, let Inst{3-2} = imm{7-6}; } -let DecoderNamespace = "RV32Only", - Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in -def C_FLWSP : CStackLoad<0b011, "c.flwsp", FPR32, uimm8_lsb00>, - Sched<[WriteFLD32, ReadFMemBase]> { - let Inst{3-2} = imm{7-6}; -} - let Predicates = [HasStdExtZca, IsRV64] in def C_LDSP : CStackLoad<0b011, "c.ldsp", GPRNoX0, uimm9_lsb000>, Sched<[WriteLDD, ReadMemBase]> { @@ -560,12 +515,6 @@ def C_ADD : RVInst16CR<0b1001, 0b10, (outs GPR:$rd), let Constraints = "$rs1 = $rd"; } -let Predicates = [HasStdExtCOrZcd, HasStdExtD] in -def C_FSDSP : CStackStore<0b101, "c.fsdsp", FPR64, uimm9_lsb000>, - Sched<[WriteFST64, ReadFStoreData, ReadFMemBase]> { - let Inst{9-7} = imm{8-6}; -} - def C_SWSP : CStackStore<0b110, "c.swsp", GPR, uimm8_lsb00>, Sched<[WriteSTW, ReadStoreData, ReadMemBase]> { let Inst{8-7} = imm{7-6}; @@ -577,13 +526,6 @@ def C_SWSP_INX : CStackStore<0b110, "c.swsp", GPRF32, uimm8_lsb00>, let Inst{8-7} = imm{7-6}; } -let DecoderNamespace = "RV32Only", - Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in -def C_FSWSP : CStackStore<0b111, "c.fswsp", FPR32, uimm8_lsb00>, - Sched<[WriteFST32, ReadFStoreData, ReadFMemBase]> { - let Inst{8-7} = imm{7-6}; -} - let Predicates = [HasStdExtZca, IsRV64] in def C_SDSP : CStackStore<0b111, "c.sdsp", GPR, uimm9_lsb000>, Sched<[WriteSTD, ReadStoreData, ReadMemBase]> { @@ -600,6 +542,61 @@ def C_UNIMP : RVInst16<(outs), (ins), "c.unimp", "", [], InstFormatOther>, } // Predicates = [HasStdExtZca] +let DecoderNamespace = "RV32Only", + Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in { + def C_FLW : CLoad_ri<0b011, "c.flw", FPR32C, uimm7_lsb00>, + Sched<[WriteFLD32, ReadFMemBase]> { + bits<7> imm; + let Inst{12-10} = imm{5-3}; + let Inst{6} = imm{2}; + let Inst{5} = imm{6}; + } + + def C_FSW : CStore_rri<0b111, "c.fsw", FPR32C, uimm7_lsb00>, + Sched<[WriteFST32, ReadFStoreData, ReadFMemBase]> { + bits<7> imm; + let Inst{12-10} = imm{5-3}; + let Inst{6} = imm{2}; + let Inst{5} = imm{6}; + } + + def C_FLWSP : CStackLoad<0b011, "c.flwsp", FPR32, uimm8_lsb00>, + Sched<[WriteFLD32, ReadFMemBase]> { + let Inst{3-2} = imm{7-6}; + } + + def C_FSWSP : CStackStore<0b111, "c.fswsp", FPR32, uimm8_lsb00>, + Sched<[WriteFST32, ReadFStoreData, ReadFMemBase]> { + let Inst{8-7} = imm{7-6}; + } +} // DecoderNamespace = "RV32Only", Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] + +let Predicates = [HasStdExtCOrZcd, HasStdExtD] in { + def C_FLD : CLoad_ri<0b001, "c.fld", FPR64C, uimm8_lsb000>, + Sched<[WriteFLD64, ReadFMemBase]> { + bits<8> imm; + let Inst{12-10} = imm{5-3}; + let Inst{6-5} = imm{7-6}; + } + + def C_FSD : CStore_rri<0b101, "c.fsd", FPR64C, uimm8_lsb000>, + Sched<[WriteFST64, ReadFStoreData, ReadFMemBase]> { + bits<8> imm; + let Inst{12-10} = imm{5-3}; + let Inst{6-5} = imm{7-6}; + } + + def C_FLDSP : CStackLoad<0b001, "c.fldsp", FPR64, uimm9_lsb000>, + Sched<[WriteFLD64, ReadFMemBase]> { + let Inst{4-2} = imm{8-6}; + } + + def C_FSDSP : CStackStore<0b101, "c.fsdsp", FPR64, uimm9_lsb000>, + Sched<[WriteFST64, ReadFStoreData, ReadFMemBase]> { + let Inst{9-7} = imm{8-6}; + } +} // Predicates = [HasStdExtCOrZcd, HasStdExtD] in { + //===----------------------------------------------------------------------===// // HINT Instructions //===----------------------------------------------------------------------===// @@ -767,20 +764,17 @@ def : InstAlias<".insn_cj $opcode, $funct3, $imm11", // Compress Instruction tablegen backend. //===----------------------------------------------------------------------===// -// Patterns are defined in the same order the compressed instructions appear +// Zca patterns are defined in the same order the compressed instructions appear // under the "RVC Instruction Set Listings" section of the ISA manual. +// Zca Instructions + // Quadrant 0 let Predicates = [HasStdExtZca] in { def : CompressPat<(ADDI GPRC:$rd, SP:$rs1, uimm10_lsb00nonzero:$imm), (C_ADDI4SPN GPRC:$rd, SP:$rs1, uimm10_lsb00nonzero:$imm)>; } // Predicates = [HasStdExtZca] -let Predicates = [HasStdExtCOrZcd, HasStdExtD] in { -def : CompressPat<(FLD FPR64C:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm), - (C_FLD FPR64C:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm)>; -} // Predicates = [HasStdExtCOrZcd, HasStdExtD] - let Predicates = [HasStdExtZca] in { def : CompressPat<(LW GPRC:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm), (C_LW GPRC:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>; @@ -790,21 +784,11 @@ def : CompressPat<(LW_INX GPRF32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm), (C_LW_INX GPRF32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>; } // Predicates = [HasStdExtZca] -let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in { -def : CompressPat<(FLW FPR32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm), - (C_FLW FPR32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>; -} // Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] - let Predicates = [HasStdExtZca, IsRV64] in { def : CompressPat<(LD GPRC:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm), (C_LD GPRC:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm)>; } // Predicates = [HasStdExtZca, IsRV64] -let Predicates = [HasStdExtCOrZcd, HasStdExtD] in { -def : CompressPat<(FSD FPR64C:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm), - (C_FSD FPR64C:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm)>; -} // Predicates = [HasStdExtCOrZcd, HasStdExtD] - let Predicates = [HasStdExtZca] in { def : CompressPat<(SW GPRC:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm), (C_SW GPRC:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>; @@ -814,11 +798,6 @@ def : CompressPat<(SW_INX GPRF32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm), (C_SW_INX GPRF32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>; } // Predicates = [HasStdExtZca] -let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in { -def : CompressPat<(FSW FPR32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm), - (C_FSW FPR32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>; -} // Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] - let Predicates = [HasStdExtZca, IsRV64] in { def : CompressPat<(SD GPRC:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm), (C_SD GPRC:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm)>; @@ -907,11 +886,6 @@ def : CompressPat<(SLLI GPRNoX0:$rs1, GPRNoX0:$rs1, uimmlog2xlennonzero:$imm), (C_SLLI GPRNoX0:$rs1, uimmlog2xlennonzero:$imm)>; } // Predicates = [HasStdExtZca] -let Predicates = [HasStdExtCOrZcd, HasStdExtD] in { -def : CompressPat<(FLD FPR64:$rd, SPMem:$rs1, uimm9_lsb000:$imm), - (C_FLDSP FPR64:$rd, SPMem:$rs1, uimm9_lsb000:$imm)>; -} // Predicates = [HasStdExtCOrZcd, HasStdExtD] - let Predicates = [HasStdExtZca] in { def : CompressPat<(LW GPRNoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm), (C_LWSP GPRNoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>; @@ -921,11 +895,6 @@ def : CompressPat<(LW_INX GPRF32NoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm), (C_LWSP_INX GPRF32NoX0:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>; } // Predicates = [HasStdExtZca] -let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in { -def : CompressPat<(FLW FPR32:$rd, SPMem:$rs1, uimm8_lsb00:$imm), - (C_FLWSP FPR32:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>; -} // Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] - let Predicates = [HasStdExtZca, IsRV64] in { def : CompressPat<(LD GPRNoX0:$rd, SPMem:$rs1, uimm9_lsb000:$imm), (C_LDSP GPRNoX0:$rd, SPMem:$rs1, uimm9_lsb000:$imm)>; @@ -953,11 +922,6 @@ def : CompressPat<(ADD GPRNoX0:$rs1, GPRNoX0:$rs2, GPRNoX0:$rs1), (C_ADD GPRNoX0:$rs1, GPRNoX0:$rs2)>; } // Predicates = [HasStdExtZca] -let Predicates = [HasStdExtCOrZcd, HasStdExtD] in { -def : CompressPat<(FSD FPR64:$rs2, SPMem:$rs1, uimm9_lsb000:$imm), - (C_FSDSP FPR64:$rs2, SPMem:$rs1, uimm9_lsb000:$imm)>; -} // Predicates = [HasStdExtCOrZcd, HasStdExtD] - let Predicates = [HasStdExtZca] in { def : CompressPat<(SW GPR:$rs2, SPMem:$rs1, uimm8_lsb00:$imm), (C_SWSP GPR:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>; @@ -967,12 +931,38 @@ def : CompressPat<(SW_INX GPRF32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm), (C_SWSP_INX GPRF32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>; } // Predicates = [HasStdExtZca] -let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in { -def : CompressPat<(FSW FPR32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm), - (C_FSWSP FPR32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>; -} // Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] - let Predicates = [HasStdExtZca, IsRV64] in { def : CompressPat<(SD GPR:$rs2, SPMem:$rs1, uimm9_lsb000:$imm), (C_SDSP GPR:$rs2, SPMem:$rs1, uimm9_lsb000:$imm)>; } // Predicates = [HasStdExtZca, IsRV64] + +// Zcf Instructions +let Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] in { + // Quadrant 0 + def : CompressPat<(FLW FPR32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm), + (C_FLW FPR32C:$rd, GPRCMem:$rs1, uimm7_lsb00:$imm)>; + def : CompressPat<(FSW FPR32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm), + (C_FSW FPR32C:$rs2, GPRCMem:$rs1, uimm7_lsb00:$imm)>; + + // Quadrant 2 + def : CompressPat<(FLW FPR32:$rd, SPMem:$rs1, uimm8_lsb00:$imm), + (C_FLWSP FPR32:$rd, SPMem:$rs1, uimm8_lsb00:$imm)>; + def : CompressPat<(FSW FPR32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm), + (C_FSWSP FPR32:$rs2, SPMem:$rs1, uimm8_lsb00:$imm)>; +} // Predicates = [HasStdExtCOrZcfOrZce, HasStdExtF, IsRV32] + +// Zcd Instructions +let Predicates = [HasStdExtCOrZcd, HasStdExtD] in { + // Quadrant 0 + def : CompressPat<(FLD FPR64C:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm), + (C_FLD FPR64C:$rd, GPRCMem:$rs1, uimm8_lsb000:$imm)>; + def : CompressPat<(FSD FPR64C:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm), + (C_FSD FPR64C:$rs2, GPRCMem:$rs1, uimm8_lsb000:$imm)>; + + // Quadrant 2 + def : CompressPat<(FLD FPR64:$rd, SPMem:$rs1, uimm9_lsb000:$imm), + (C_FLDSP FPR64:$rd, SPMem:$rs1, uimm9_lsb000:$imm)>; + def : CompressPat<(FSD FPR64:$rs2, SPMem:$rs1, uimm9_lsb000:$imm), + (C_FSDSP FPR64:$rs2, SPMem:$rs1, uimm9_lsb000:$imm)>; +} // Predicates = [HasStdExtCOrZcd, HasStdExtD] + diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td index c342b41e41d0..6840dacaea54 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td @@ -25,7 +25,7 @@ def SImm8UnsignedAsmOperand : SImmAsmOperand<8, "Unsigned"> { } // A 8-bit signed immediate allowing range [-128, 255] -// but represented as [-128, 255]. +// but represented as [-128, 127]. def simm8_unsigned : RISCVOp { let ParserMatchClass = SImm8UnsignedAsmOperand; let EncoderMethod = "getImmOpValue"; @@ -98,6 +98,40 @@ class PLUI_i<bits<7> funct7, string opcodestr> let Inst{23-15} = imm10{9-1}; } +// Common base for widening Binary/Ternary ops +class RVPWideningBase<bits<2> w, bit arith_shift, dag outs, dag ins, + string opcodestr> + : RVInst<outs, ins, opcodestr, "$rd, $rs1, $rs2", [], InstFormatOther> { + bits<5> rs2; + bits<5> rs1; + bits<5> rd; + + let Inst{31} = 0b0; + let Inst{26-25} = w; + let Inst{24-20} = rs2; + let Inst{19-15} = rs1; + let Inst{14-12} = 0b010; + let Inst{11-8} = rd{4-1}; + let Inst{7} = arith_shift; + let Inst{6-0} = OPC_OP_IMM_32.Value; +} + +// Common base for narrowing ops +class RVPNarrowingBase<bits<3> f, bit r, bits<4> funct4, dag outs, dag ins, + string opcodestr, string argstr> + : RVInst<outs, ins, opcodestr, argstr, [], InstFormatOther> { + bits<5> rs1; + bits<5> rd; + + let Inst{31} = 0b0; + let Inst{30-28} = f; + let Inst{27} = r; + let Inst{19-16} = rs1{4-1}; + let Inst{15-12} = funct4; + let Inst{11-7} = rd; + let Inst{6-0} = OPC_OP_IMM_32.Value; +} + let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class RVPShift_ri<bits<3> f, bits<3> funct3, string opcodestr, Operand ImmType> : RVInstIBase<funct3, OPC_OP_IMM_32, (outs GPR:$rd), @@ -141,6 +175,100 @@ class RVPShiftB_ri<bits<3> f, bits<3> funct3, string opcodestr> } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class RVPWideningShift_ri<bits<3> f, string opcodestr, Operand ImmType> + : RVInst<(outs GPRPairRV32:$rd), (ins GPR:$rs1, ImmType:$shamt), opcodestr, + "$rd, $rs1, $shamt", [], InstFormatOther> { + bits<5> rs1; + bits<5> rd; + + let Inst{31} = 0b0; + let Inst{30-28} = f; + let Inst{27} = 0b0; + let Inst{19-15} = rs1; + let Inst{14-12} = 0b010; + let Inst{11-8} = rd{4-1}; + let Inst{7} = 0b0; + let Inst{6-0} = OPC_OP_IMM_32.Value; + + let hasSideEffects = 0; + let mayLoad = 0; + let mayStore = 0; +} + +class RVPWideningShiftW_ri<bits<3> f, string opcodestr> + : RVPWideningShift_ri<f, opcodestr, uimm6> { + bits<6> shamt; + + let Inst{26} = 0b1; + let Inst{25-20} = shamt; +} + +class RVPWideningShiftH_ri<bits<3> f, string opcodestr> + : RVPWideningShift_ri<f, opcodestr, uimm5> { + bits<5> shamt; + + let Inst{26-25} = 0b01; + let Inst{24-20} = shamt; +} + +class RVPWideningShiftB_ri<bits<3> f, string opcodestr> + : RVPWideningShift_ri<f, opcodestr, uimm4> { + bits<4> shamt; + + let Inst{26-24} = 0b001; + let Inst{23-20} = shamt; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class RVPNarrowingShift_ri<bits<3> f, string opcodestr, Operand ImmType> + : RVPNarrowingBase<f, 0b0, 0b1100, (outs GPR:$rd), + (ins GPRPairRV32:$rs1, ImmType:$shamt), opcodestr, + "$rd, $rs1, $shamt">; + +class RVPNarrowingShiftW_ri<bits<3> f, string opcodestr> + : RVPNarrowingShift_ri<f, opcodestr, uimm6> { + bits<6> shamt; + + let Inst{26} = 0b1; + let Inst{25-20} = shamt; +} + +class RVPNarrowingShiftH_ri<bits<3> f, string opcodestr> + : RVPNarrowingShift_ri<f, opcodestr, uimm5> { + bits<5> shamt; + + let Inst{26-25} = 0b01; + let Inst{24-20} = shamt; +} + +class RVPNarrowingShiftB_ri<bits<3> f, string opcodestr> + : RVPNarrowingShift_ri<f, opcodestr, uimm4> { + bits<4> shamt; + + let Inst{26-24} = 0b001; + let Inst{23-20} = shamt; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class RVPNarrowingShift_rr<bits<3> f, bits<2> w, string opcodestr> + : RVPNarrowingBase<f, 0b1, 0b1100, (outs GPR:$rd), + (ins GPRPairRV32:$rs1, GPR:$rs2), opcodestr, + "$rd, $rs1, $rs2"> { + bits<5> rs2; + + let Inst{26-25} = w; + let Inst{24-20} = rs2; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class RVPWideningShift_rr<bits<3> f, bits<2> w, string opcodestr> + : RVPWideningBase<w, 0b0, (outs GPRPairRV32:$rd), (ins GPR:$rs1, GPR:$rs2), + opcodestr> { + let Inst{30-28} = f; + let Inst{27} = 0b1; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class RVPUnary_ri<bits<2> w, bits<5> uf, string opcodestr> : RVInstIBase<0b010, OPC_OP_IMM_32, (outs GPR:$rd), (ins GPR:$rs1), opcodestr, "$rd, $rs1"> { @@ -169,6 +297,24 @@ class RVPBinary_rr<bits<4> f, bits<2> w, bits<3> funct3, string opcodestr> } let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class RVPWideningBinary_rr<bits<4> f, bits<2> w, string opcodestr> + : RVPWideningBase<w, 0b1, (outs GPRPairRV32:$rd), (ins GPR:$rs1, GPR:$rs2), + opcodestr> { + let Inst{30-27} = f; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class RVPNarrowingBinary_rr<bits<3> f, bits<2> w, string opcodestr> + : RVPNarrowingBase<f, 0b1, 0b0100, (outs GPR:$rd), + (ins GPRPairRV32:$rs1, GPR:$rs2), opcodestr, + "$rd, $rs1, $rs2"> { + bits<5> rs2; + + let Inst{26-25} = w; + let Inst{24-20} = rs2; +} + +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class RVPTernary_rrr<bits<4> f, bits<2> w, bits<3> funct3, string opcodestr> : RVInstRBase<funct3, OPC_OP_32, (outs GPR:$rd_wb), (ins GPR:$rd, GPR:$rs1, GPR:$rs2), opcodestr, @@ -180,6 +326,15 @@ class RVPTernary_rrr<bits<4> f, bits<2> w, bits<3> funct3, string opcodestr> let Constraints = "$rd = $rd_wb"; } +let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in +class RVPWideningTernary_rrr<bits<4> f, bits<2> w, string opcodestr> + : RVPWideningBase<w, 0b1, (outs GPRPairRV32:$rd_wb), + (ins GPR:$rd, GPR:$rs1, GPR:$rs2), opcodestr> { + let Inst{30-27} = f; + + let Constraints = "$rd = $rd_wb"; +} + // Common base for pli.db/h/w and plui.dh/w class RVPPairLoadImm_i<bits<7> funct7, dag ins, string opcodestr, string argstr> @@ -889,3 +1044,156 @@ let Predicates = [HasStdExtP, IsRV32] in { let Inst{23-15} = imm10{9-1}; } } + +let Predicates = [HasStdExtP, IsRV32] in { + def PWSLLI_B : RVPWideningShiftB_ri<0b000, "pwslli.b">; + def PWSLLI_H : RVPWideningShiftH_ri<0b000, "pwslli.h">; + def WSLLI : RVPWideningShiftW_ri<0b000, "wslli">; + + def PWSLAI_B : RVPWideningShiftB_ri<0b100, "pwslai.b">; + def PWSLAI_H : RVPWideningShiftH_ri<0b100, "pwslai.h">; + def WSLAI : RVPWideningShiftW_ri<0b100, "wslai">; + + def PWSLL_BS : RVPWideningShift_rr<0b000, 0b00, "pwsll.bs">; + def PWSLL_HS : RVPWideningShift_rr<0b000, 0b01, "pwsll.hs">; + def WSLL : RVPWideningShift_rr<0b000, 0b11, "wsll">; + + def PWSLA_BS : RVPWideningShift_rr<0b100, 0b00, "pwsla.bs">; + def PWSLA_HS : RVPWideningShift_rr<0b100, 0b01, "pwsla.hs">; + def WSLA : RVPWideningShift_rr<0b100, 0b11, "wsla">; + + def WZIP8P : RVPWideningShift_rr<0b111, 0b00, "wzip8p">; + def WZIP16P : RVPWideningShift_rr<0b111, 0b01, "wzip16p">; + + def PWADD_H : RVPWideningBinary_rr<0b0000, 0b00, "pwadd.h">; + def WADD : RVPWideningBinary_rr<0b0000, 0b01, "wadd">; + def PWADD_B : RVPWideningBinary_rr<0b0000, 0b10, "pwadd.b">; + def PM2WADD_H : RVPWideningBinary_rr<0b0000, 0b11, "pm2wadd.h">; + + def PWADDA_H : RVPWideningTernary_rrr<0b0001, 0b00, "pwadda.h">; + def WADDA : RVPWideningTernary_rrr<0b0001, 0b01, "wadda">; + def PWADDA_B : RVPWideningTernary_rrr<0b0001, 0b10, "pwadda.b">; + def PM2WADDA_H : RVPWideningTernary_rrr<0b0001, 0b11, "pm2wadda.h">; + + def PWADDU_H : RVPWideningBinary_rr<0b0010, 0b00, "pwaddu.h">; + def WADDU : RVPWideningBinary_rr<0b0010, 0b01, "waddu">; + def PWADDU_B : RVPWideningBinary_rr<0b0010, 0b10, "pwaddu.b">; + def PM2WADD_HX : RVPWideningBinary_rr<0b0010, 0b11, "pm2wadd.hx">; + + def PWADDAU_H : RVPWideningTernary_rrr<0b0011, 0b00, "pwaddau.h">; + def WADDAU : RVPWideningTernary_rrr<0b0011, 0b01, "waddau">; + def PWADDAU_B : RVPWideningTernary_rrr<0b0011, 0b10, "pwaddau.b">; + def PM2WADDA_HX : RVPWideningTernary_rrr<0b0011, 0b11, "pm2wadda.hx">; + + def PWMUL_H : RVPWideningBinary_rr<0b0100, 0b00, "pwmul.h">; + def WMUL : RVPWideningBinary_rr<0b0100, 0b01, "wmul">; + def PWMUL_B : RVPWideningBinary_rr<0b0100, 0b10, "pwmul.b">; + def PM2WADDU_H : RVPWideningBinary_rr<0b0100, 0b11, "pm2waddu.h">; + + def PWMACC_H : RVPWideningTernary_rrr<0b0101, 0b00, "pwmacc.h">; + def WMACC : RVPWideningTernary_rrr<0b0101, 0b01, "wmacc">; + def PM2WADDAU_H : RVPWideningTernary_rrr<0b0101, 0b11, "pm2waddau.h">; + + def PWMULU_H : RVPWideningBinary_rr<0b0110, 0b00, "pwmulu.h">; + def WMULU : RVPWideningBinary_rr<0b0110, 0b01, "wmulu">; + def PWMULU_B : RVPWideningBinary_rr<0b0110, 0b10, "pwmulu.b">; + + def PWMACCU_H : RVPWideningTernary_rrr<0b0111, 0b00, "pwmaccu.h">; + def WMACCU : RVPWideningTernary_rrr<0b0111, 0b01, "wmaccu">; + + def PWSUB_H : RVPWideningBinary_rr<0b1000, 0b00, "pwsub.h">; + def WSUB : RVPWideningBinary_rr<0b1000, 0b01, "wsub">; + def PWSUB_B : RVPWideningBinary_rr<0b1000, 0b10, "pwsub.b">; + def PM2WSUB_H : RVPWideningBinary_rr<0b1000, 0b11, "pm2wsub.h">; + + def PWSUBA_H : RVPWideningTernary_rrr<0b1001, 0b00, "pwsuba.h">; + def WSUBA : RVPWideningTernary_rrr<0b1001, 0b01, "wsuba">; + def PWSUBA_B : RVPWideningTernary_rrr<0b1001, 0b10, "pwsuba.b">; + def PM2WSUBA_H : RVPWideningTernary_rrr<0b1001, 0b11, "pm2wsuba.h">; + + def PWSUBU_H : RVPWideningBinary_rr<0b1010, 0b00, "pwsubu.h">; + def WSUBU : RVPWideningBinary_rr<0b1010, 0b01, "wsubu">; + def PWSUBU_B : RVPWideningBinary_rr<0b1010, 0b10, "pwsubu.b">; + def PM2WSUB_HX : RVPWideningBinary_rr<0b1010, 0b11, "pm2wsub.hx">; + + def PWSUBAU_H : RVPWideningTernary_rrr<0b1011, 0b00, "pwsubau.h">; + def WSUBAU : RVPWideningTernary_rrr<0b1011, 0b01, "wsubau">; + def PWSUBAU_B : RVPWideningTernary_rrr<0b1011, 0b10, "pwsubau.b">; + def PM2WSUBA_HX : RVPWideningTernary_rrr<0b1011, 0b11, "pm2wsuba.hx">; + + def PWMULSU_H : RVPWideningBinary_rr<0b1100, 0b00, "pwmulsu.h">; + def WMULSU : RVPWideningBinary_rr<0b1100, 0b01, "wmulsu">; + def PWMULSU_B : RVPWideningBinary_rr<0b1100, 0b10, "pwmulsu.b">; + def PM2WADDSU_H : RVPWideningBinary_rr<0b1100, 0b11, "pm2waddsu.h">; + + def PWMACCSU_H : RVPWideningTernary_rrr<0b1101, 0b00, "pwmaccsu.h">; + def WMACCSU : RVPWideningTernary_rrr<0b1101, 0b01, "wmaccsu">; + def PM2WADDASU_H : RVPWideningTernary_rrr<0b1101, 0b11, "pm2waddasu.h">; + + def PMQWACC_H : RVPWideningTernary_rrr<0b1111, 0b00, "pmqwacc.h">; + def PMQWACC : RVPWideningTernary_rrr<0b1111, 0b01, "pmqwacc">; + def PMQRWACC_H : RVPWideningTernary_rrr<0b1111, 0b10, "pmqrwacc.h">; + def PMQRWACC : RVPWideningTernary_rrr<0b1111, 0b11, "pmqrwacc">; + + def PREDSUM_DHS : RVPNarrowingBinary_rr<0b001, 0b00, "predsum.dhs">; + def PREDSUM_DBS : RVPNarrowingBinary_rr<0b001, 0b10, "predsum.dbs">; + + def PREDSUMU_DHS : RVPNarrowingBinary_rr<0b011, 0b00, "predsumu.dhs">; + def PREDSUMU_DBS : RVPNarrowingBinary_rr<0b011, 0b10, "predsumu.dbs">; + + def PNSRLI_B : RVPNarrowingShiftB_ri<0b000, "pnsrli.b">; + def PNSRLI_H : RVPNarrowingShiftH_ri<0b000, "pnsrli.h">; + def NSRLI : RVPNarrowingShiftW_ri<0b000, "nsrli">; + + def PNCLIPIU_B : RVPNarrowingShiftB_ri<0b010, "pnclipiu.b">; + def PNCLIPIU_H : RVPNarrowingShiftH_ri<0b010, "pnclipiu.h">; + def NCLIPIU : RVPNarrowingShiftW_ri<0b010, "nclipiu">; + + def PNCLIPRIU_B : RVPNarrowingShiftB_ri<0b011, "pnclipriu.b">; + def PNCLIPRIU_H : RVPNarrowingShiftH_ri<0b011, "pnclipriu.h">; + def NCLIPRIU : RVPNarrowingShiftW_ri<0b011, "nclipriu">; + + def PNSRAI_B : RVPNarrowingShiftB_ri<0b100, "pnsrai.b">; + def PNSRAI_H : RVPNarrowingShiftH_ri<0b100, "pnsrai.h">; + def NSRAI : RVPNarrowingShiftW_ri<0b100, "nsrai">; + + def PNSARI_B : RVPNarrowingShiftB_ri<0b101, "pnsari.b">; + def PNSARI_H : RVPNarrowingShiftH_ri<0b101, "pnsari.h">; + def NSARI : RVPNarrowingShiftW_ri<0b101, "nsari">; + + def PNCLIPI_B : RVPNarrowingShiftB_ri<0b110, "pnclipi.b">; + def PNCLIPI_H : RVPNarrowingShiftH_ri<0b110, "pnclipi.h">; + def NCLIPI : RVPNarrowingShiftW_ri<0b110, "nclipi">; + + def PNCLIPRI_B : RVPNarrowingShiftB_ri<0b111, "pnclipri.b">; + def PNCLIPRI_H : RVPNarrowingShiftH_ri<0b111, "pnclipri.h">; + def NCLIPRI : RVPNarrowingShiftW_ri<0b111, "nclipri">; + + def PNSRL_BS : RVPNarrowingShift_rr<0b000, 0b00, "pnsrl.bs">; + def PNSRL_HS : RVPNarrowingShift_rr<0b000, 0b01, "pnsrl.hs">; + def NSRL : RVPNarrowingShift_rr<0b000, 0b11, "nsrl">; + + def PNCLIPU_BS : RVPNarrowingShift_rr<0b010, 0b00, "pnclipu.bs">; + def PNCLIPU_HS : RVPNarrowingShift_rr<0b010, 0b01, "pnclipu.hs">; + def NCLIPU : RVPNarrowingShift_rr<0b010, 0b11, "nclipu">; + + def PNCLIPRU_BS : RVPNarrowingShift_rr<0b011, 0b00, "pnclipru.bs">; + def PNCLIPRU_HS : RVPNarrowingShift_rr<0b011, 0b01, "pnclipru.hs">; + def NCLIPRU : RVPNarrowingShift_rr<0b011, 0b11, "nclipru">; + + def PNSRA_BS : RVPNarrowingShift_rr<0b100, 0b00, "pnsra.bs">; + def PNSRA_HS : RVPNarrowingShift_rr<0b100, 0b01, "pnsra.hs">; + def NSRA : RVPNarrowingShift_rr<0b100, 0b11, "nsra">; + + def PNSRAR_BS : RVPNarrowingShift_rr<0b101, 0b00, "pnsrar.bs">; + def PNSRAR_HS : RVPNarrowingShift_rr<0b101, 0b01, "pnsrar.hs">; + def NSRAR : RVPNarrowingShift_rr<0b101, 0b11, "nsrar">; + + def PNCLIP_BS : RVPNarrowingShift_rr<0b110, 0b00, "pnclip.bs">; + def PNCLIP_HS : RVPNarrowingShift_rr<0b110, 0b01, "pnclip.hs">; + def NCLIP : RVPNarrowingShift_rr<0b110, 0b11, "nclip">; + + def PNCLIPR_BS : RVPNarrowingShift_rr<0b111, 0b00, "pnclipr.bs">; + def PNCLIPR_HS : RVPNarrowingShift_rr<0b111, 0b01, "pnclipr.hs">; + def NCLIPR : RVPNarrowingShift_rr<0b111, 0b11, "nclipr">; +} // Predicates = [HasStdExtP, IsRV32] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td index 32f533b8f114..f732ab13e5f8 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoSFB.td @@ -44,153 +44,95 @@ def PseudoCCMOVGPRNoX0 : Pseudo<(outs GPRNoX0:$dst), Sched<[]>; } +class SFBALU_rr + : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, + GPR:$rs2), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU, + ReadSFBALU]> { + let hasSideEffects = 0; + let mayLoad = 0; + let mayStore = 0; + let Size = 8; + let Constraints = "$dst = $falsev"; +} + +class SFBALU_ri + : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, + simm12:$imm), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]> { + let hasSideEffects = 0; + let mayLoad = 0; + let mayStore = 0; + let Size = 8; + let Constraints = "$dst = $falsev"; +} + +class SFBShift_ri + : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, + uimmlog2xlen:$imm), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]> { + let hasSideEffects = 0; + let mayLoad = 0; + let mayStore = 0; + let Size = 8; + let Constraints = "$dst = $falsev"; +} + +class SFBShiftW_ri + : Pseudo<(outs GPR:$dst), + (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, GPR:$falsev, GPR:$rs1, + uimm5:$imm), []>, + Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, ReadSFBALU]> { + let hasSideEffects = 0; + let mayLoad = 0; + let mayStore = 0; + let Size = 8; + let Constraints = "$dst = $falsev"; +} + // Conditional binops, that updates update $dst to (op rs1, rs2) when condition // is true. Returns $falsev otherwise. Selected by optimizeSelect. // TODO: Can we use DefaultOperands on the regular binop to accomplish this more // like how ARM does predication? -let Predicates = [HasShortForwardBranchOpt], hasSideEffects = 0, - mayLoad = 0, mayStore = 0, Size = 8, Constraints = "$dst = $falsev" in { -def PseudoCCADD : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, - ReadSFBALU, ReadSFBALU, ReadSFBALU]>; -def PseudoCCSUB : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, - ReadSFBALU, ReadSFBALU, ReadSFBALU]>; -def PseudoCCSLL : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, - ReadSFBALU, ReadSFBALU]>; -def PseudoCCSRL : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, - ReadSFBALU, ReadSFBALU]>; -def PseudoCCSRA : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, - ReadSFBALU, ReadSFBALU]>; -def PseudoCCAND : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, - ReadSFBALU, ReadSFBALU, ReadSFBALU]>; -def PseudoCCOR : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, - ReadSFBALU, ReadSFBALU, ReadSFBALU]>; -def PseudoCCXOR : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, - ReadSFBALU, ReadSFBALU, ReadSFBALU]>; +let Predicates = [HasShortForwardBranchOpt] in { +def PseudoCCADD : SFBALU_rr; +def PseudoCCSUB : SFBALU_rr; +def PseudoCCSLL : SFBALU_rr; +def PseudoCCSRL : SFBALU_rr; +def PseudoCCSRA : SFBALU_rr; +def PseudoCCAND : SFBALU_rr; +def PseudoCCOR : SFBALU_rr; +def PseudoCCXOR : SFBALU_rr; -def PseudoCCADDI : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, - ReadSFBALU]>; -def PseudoCCSLLI : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, uimmlog2xlen:$shamt), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, - ReadSFBALU]>; -def PseudoCCSRLI : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, uimmlog2xlen:$shamt), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, - ReadSFBALU]>; -def PseudoCCSRAI : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, uimmlog2xlen:$shamt), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, - ReadSFBALU]>; -def PseudoCCANDI : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, - ReadSFBALU]>; -def PseudoCCORI : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, - ReadSFBALU]>; -def PseudoCCXORI : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, - ReadSFBALU]>; +def PseudoCCADDI : SFBALU_ri; +def PseudoCCANDI : SFBALU_ri; +def PseudoCCORI : SFBALU_ri; +def PseudoCCXORI : SFBALU_ri; + +def PseudoCCSLLI : SFBShift_ri; +def PseudoCCSRLI : SFBShift_ri; +def PseudoCCSRAI : SFBShift_ri; // RV64I instructions -def PseudoCCADDW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, - ReadSFBALU, ReadSFBALU, ReadSFBALU]>; -def PseudoCCSUBW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, - ReadSFBALU, ReadSFBALU, ReadSFBALU]>; -def PseudoCCSLLW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, - ReadSFBALU, ReadSFBALU]>; -def PseudoCCSRLW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, - ReadSFBALU, ReadSFBALU]>; -def PseudoCCSRAW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, - ReadSFBALU, ReadSFBALU]>; +def PseudoCCADDW : SFBALU_rr; +def PseudoCCSUBW : SFBALU_rr; +def PseudoCCSLLW : SFBALU_rr; +def PseudoCCSRLW : SFBALU_rr; +def PseudoCCSRAW : SFBALU_rr; + +def PseudoCCADDIW : SFBALU_ri; -def PseudoCCADDIW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, simm12:$rs2), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, - ReadSFBALU]>; -def PseudoCCSLLIW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, uimm5:$shamt), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, - ReadSFBALU]>; -def PseudoCCSRLIW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, uimm5:$shamt), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, - ReadSFBALU]>; -def PseudoCCSRAIW : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, uimm5:$shamt), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, ReadSFBALU, - ReadSFBALU]>; +def PseudoCCSLLIW : SFBShiftW_ri; +def PseudoCCSRLIW : SFBShiftW_ri; +def PseudoCCSRAIW : SFBShiftW_ri; // Zbb/Zbkb instructions -def PseudoCCANDN : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, - ReadSFBALU, ReadSFBALU, ReadSFBALU]>; -def PseudoCCORN : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, - ReadSFBALU, ReadSFBALU, ReadSFBALU]>; -def PseudoCCXNOR : Pseudo<(outs GPR:$dst), - (ins GPR:$lhs, GPR:$rhs, cond_code:$cc, - GPR:$falsev, GPR:$rs1, GPR:$rs2), []>, - Sched<[WriteSFB, ReadSFBJmp, ReadSFBJmp, - ReadSFBALU, ReadSFBALU, ReadSFBALU]>; +def PseudoCCANDN : SFBALU_rr; +def PseudoCCORN : SFBALU_rr; +def PseudoCCXNOR : SFBALU_rr; } let Predicates = [HasShortForwardBranchOpt] in diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index acbccddce2b5..063ee5c5e8b9 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -830,19 +830,6 @@ multiclass VPatTiedBinaryNoMaskVL_V<SDNode vop, result_reg_class:$rs1, op2_reg_class:$rs2, GPR:$vl, sew, TAIL_AGNOSTIC)>; - // Tail undisturbed - def : Pat<(riscv_vmerge_vl true_mask, - (result_type (vop - result_reg_class:$rs1, - (op2_type op2_reg_class:$rs2), - srcvalue, - true_mask, - VLOpFrag)), - result_reg_class:$rs1, result_reg_class:$rs1, VLOpFrag), - (!cast<Instruction>(instruction_name#"_"#suffix#"_"# vlmul.MX#"_TIED") - result_reg_class:$rs1, - op2_reg_class:$rs2, - GPR:$vl, sew, TU_MU)>; } class VPatTiedBinaryMaskVL_V<SDNode vop, @@ -892,22 +879,6 @@ multiclass VPatTiedBinaryNoMaskVL_V_RM<SDNode vop, // RISCVInsertReadWriteCSR FRM_DYN, GPR:$vl, log2sew, TAIL_AGNOSTIC)>; - // Tail undisturbed - def : Pat<(riscv_vmerge_vl true_mask, - (result_type (vop - result_reg_class:$rs1, - (op2_type op2_reg_class:$rs2), - srcvalue, - true_mask, - VLOpFrag)), - result_reg_class:$rs1, result_reg_class:$rs1, VLOpFrag), - (!cast<Instruction>(name) - result_reg_class:$rs1, - op2_reg_class:$rs2, - // Value to indicate no rounding mode change in - // RISCVInsertReadWriteCSR - FRM_DYN, - GPR:$vl, log2sew, TU_MU)>; } class VPatBinaryVL_XI<SDPatternOperator vop, @@ -1755,50 +1726,6 @@ multiclass VPatMultiplyAddVL_VV_VX<SDNode op, string instruction_name> { } } -multiclass VPatMultiplyAccVL_VV_VX<PatFrag op, string instruction_name> { - foreach vti = AllIntegerVectors in { - defvar suffix = vti.LMul.MX; - let Predicates = GetVTypePredicates<vti>.Predicates in { - def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), - (vti.Vector (op vti.RegClass:$rd, - (riscv_mul_vl_oneuse vti.RegClass:$rs1, vti.RegClass:$rs2, - srcvalue, (vti.Mask true_mask), VLOpFrag), - srcvalue, (vti.Mask true_mask), VLOpFrag)), - vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag), - (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK") - vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), - (vti.Vector (op vti.RegClass:$rd, - (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rs2, - srcvalue, (vti.Mask true_mask), VLOpFrag), - srcvalue, (vti.Mask true_mask), VLOpFrag)), - vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag), - (!cast<Instruction>(instruction_name#"_VX_"# suffix #"_MASK") - vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), - (vti.Vector (op vti.RegClass:$rd, - (riscv_mul_vl_oneuse vti.RegClass:$rs1, vti.RegClass:$rs2, - srcvalue, (vti.Mask true_mask), VLOpFrag), - srcvalue, (vti.Mask true_mask), VLOpFrag)), - vti.RegClass:$rd, undef, VLOpFrag), - (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK") - vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), - (vti.Vector (op vti.RegClass:$rd, - (riscv_mul_vl_oneuse (SplatPat XLenVT:$rs1), vti.RegClass:$rs2, - srcvalue, (vti.Mask true_mask), VLOpFrag), - srcvalue, (vti.Mask true_mask), VLOpFrag)), - vti.RegClass:$rd, undef, VLOpFrag), - (!cast<Instruction>(instruction_name#"_VX_"# suffix #"_MASK") - vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - } - } -} - multiclass VPatWidenMultiplyAddVL_VV_VX<SDNode vwmacc_op, string instr_name> { foreach vtiTowti = AllWidenableIntVectors in { defvar vti = vtiTowti.Vti; @@ -1898,82 +1825,6 @@ multiclass VPatFPMulAddVL_VV_VF_RM<SDPatternOperator vop, string instruction_nam } } -multiclass VPatFPMulAccVL_VV_VF_RM<PatFrag vop, string instruction_name> { - foreach vti = AllFloatVectors in { - defvar suffix = vti.LMul.MX # "_E" # vti.SEW; - let Predicates = GetVTypePredicates<vti>.Predicates in { - def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), - (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2, - vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), - vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag), - (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK") - vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask VMV0:$vm), - // Value to indicate no rounding mode change in - // RISCVInsertReadWriteCSR - FRM_DYN, - GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), - (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2, - vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), - vti.RegClass:$rd, vti.RegClass:$rd, VLOpFrag), - (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") - vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask VMV0:$vm), - // Value to indicate no rounding mode change in - // RISCVInsertReadWriteCSR - FRM_DYN, - GPR:$vl, vti.Log2SEW, TU_MU)>; - def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), - (vti.Vector (vop vti.RegClass:$rs1, vti.RegClass:$rs2, - vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), - vti.RegClass:$rd, undef, VLOpFrag), - (!cast<Instruction>(instruction_name#"_VV_"# suffix #"_MASK") - vti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask VMV0:$vm), - // Value to indicate no rounding mode change in - // RISCVInsertReadWriteCSR - FRM_DYN, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - def : Pat<(riscv_vmerge_vl (vti.Mask VMV0:$vm), - (vti.Vector (vop (SplatFPOp vti.ScalarRegClass:$rs1), vti.RegClass:$rs2, - vti.RegClass:$rd, (vti.Mask true_mask), VLOpFrag)), - vti.RegClass:$rd, undef, VLOpFrag), - (!cast<Instruction>(instruction_name#"_V" # vti.ScalarSuffix # "_" # suffix # "_MASK") - vti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask VMV0:$vm), - // Value to indicate no rounding mode change in - // RISCVInsertReadWriteCSR - FRM_DYN, - GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; - } - } -} - -multiclass VPatWidenFPMulAccVL_VV_VF<SDNode vop, string instruction_name> { - foreach vtiToWti = AllWidenableFloatVectors in { - defvar vti = vtiToWti.Vti; - defvar wti = vtiToWti.Wti; - let Predicates = !listconcat(GetVTypePredicates<vti>.Predicates, - GetVTypePredicates<wti>.Predicates) in { - def : Pat<(vop (vti.Vector vti.RegClass:$rs1), - (vti.Vector vti.RegClass:$rs2), - (wti.Vector wti.RegClass:$rd), (vti.Mask VMV0:$vm), - VLOpFrag), - (!cast<Instruction>(instruction_name#"_VV_"#vti.LMul.MX #"_MASK") - wti.RegClass:$rd, vti.RegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; - def : Pat<(vop (vti.Vector (SplatFPOp vti.ScalarRegClass:$rs1)), - (vti.Vector vti.RegClass:$rs2), - (wti.Vector wti.RegClass:$rd), (vti.Mask VMV0:$vm), - VLOpFrag), - (!cast<Instruction>(instruction_name#"_V"#vti.ScalarSuffix#"_"#vti.LMul.MX #"_MASK") - wti.RegClass:$rd, vti.ScalarRegClass:$rs1, vti.RegClass:$rs2, - (vti.Mask VMV0:$vm), GPR:$vl, vti.Log2SEW, TA_MA)>; - } - } -} - multiclass VPatWidenFPMulAccVL_VV_VF_RM<SDNode vop, string instruction_name, list<VTypeInfoToWide> vtiToWtis = AllWidenableFloatVectors> { @@ -2331,8 +2182,6 @@ defm : VPatBinaryWVL_VV_VX<riscv_vwmulsu_vl, "PseudoVWMULSU">; // 11.13 Vector Single-Width Integer Multiply-Add Instructions defm : VPatMultiplyAddVL_VV_VX<riscv_add_vl, "PseudoVMADD">; defm : VPatMultiplyAddVL_VV_VX<riscv_sub_vl, "PseudoVNMSUB">; -defm : VPatMultiplyAccVL_VV_VX<riscv_add_vl_oneuse, "PseudoVMACC">; -defm : VPatMultiplyAccVL_VV_VX<riscv_sub_vl_oneuse, "PseudoVNMSAC">; // 11.14. Vector Widening Integer Multiply-Add Instructions defm : VPatWidenMultiplyAddVL_VV_VX<riscv_vwmacc_vl, "PseudoVWMACC">; @@ -2470,10 +2319,6 @@ defm : VPatFPMulAddVL_VV_VF_RM<any_riscv_vfmadd_vl, "PseudoVFMADD">; defm : VPatFPMulAddVL_VV_VF_RM<any_riscv_vfmsub_vl, "PseudoVFMSUB">; defm : VPatFPMulAddVL_VV_VF_RM<any_riscv_vfnmadd_vl, "PseudoVFNMADD">; defm : VPatFPMulAddVL_VV_VF_RM<any_riscv_vfnmsub_vl, "PseudoVFNMSUB">; -defm : VPatFPMulAccVL_VV_VF_RM<riscv_vfmadd_vl_oneuse, "PseudoVFMACC">; -defm : VPatFPMulAccVL_VV_VF_RM<riscv_vfmsub_vl_oneuse, "PseudoVFMSAC">; -defm : VPatFPMulAccVL_VV_VF_RM<riscv_vfnmadd_vl_oneuse, "PseudoVFNMACC">; -defm : VPatFPMulAccVL_VV_VF_RM<riscv_vfnmsub_vl_oneuse, "PseudoVFNMSAC">; // 13.7. Vector Widening Floating-Point Fused Multiply-Add Instructions defm : VPatWidenFPMulAccVL_VV_VF_RM<riscv_vfwmadd_vl, "PseudoVFWMACC">; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td index 889ea9802257..d615094329b2 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXMips.td @@ -125,10 +125,25 @@ class Mips_prefetch_ri<dag outs, dag ins, string opcodestr, string argstr> let Inst{6-0} = OPC_CUSTOM_0.Value; } +// MIPS Custom Barrier Insns Format. +let hasSideEffects = 1, mayLoad = 0, mayStore = 0 in +class MIPSExtInst_ri<bits<6> shimm5, string opcodestr> + : RVInstIShift<0b00000, 0b001, OPC_OP_IMM, (outs), (ins), opcodestr, ""> { + let shamt = shimm5; + let rd = 0; + let rs1 = 0; +} + //===----------------------------------------------------------------------===// // MIPS extensions //===----------------------------------------------------------------------===// -let Predicates = [HasVendorXMIPSCBOP] ,DecoderNamespace = "Xmipscbop" in { +let Predicates = [HasVendorXMIPSEXECTL], DecoderNamespace = "XMIPS" in { + def MIPS_EHB : MIPSExtInst_ri<0b000011, "mips.ehb">; + def MIPS_IHB : MIPSExtInst_ri<0b000001, "mips.ihb">; + def MIPS_PAUSE : MIPSExtInst_ri<0b000101, "mips.pause">; +} + +let Predicates = [HasVendorXMIPSCBOP], DecoderNamespace = "XMIPS" in { def MIPS_PREF : Mips_prefetch_ri<(outs), (ins GPR:$rs1, uimm9:$imm9, uimm5:$hint), "mips.pref", "$hint, ${imm9}(${rs1})">, Sched<[]>; @@ -146,7 +161,7 @@ let Predicates = [HasVendorXMIPSCBOP] in { } let Predicates = [HasVendorXMIPSCMov], hasSideEffects = 0, mayLoad = 0, mayStore = 0, - DecoderNamespace = "Xmipscmov" in { + DecoderNamespace = "XMIPS" in { def MIPS_CCMOV : RVInstR4<0b11, 0b011, OPC_CUSTOM_0, (outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2, GPR:$rs3), "mips.ccmov", "$rd, $rs2, $rs1, $rs3">, @@ -166,7 +181,7 @@ def : Pat<(select (XLenVT GPR:$rs2), (XLenVT GPR:$rs1), (XLenVT GPR:$rs3)), } let Predicates = [HasVendorXMIPSLSP], hasSideEffects = 0, - DecoderNamespace = "Xmipslsp" in { + DecoderNamespace = "XMIPS" in { let mayLoad = 1, mayStore = 0 in { def MIPS_LWP : LWPFormat<(outs GPR:$rd1, GPR:$rd2), (ins GPR:$rs1, uimm7_lsb00:$imm7), "mips.lwp", "$rd1, $rd2, ${imm7}(${rs1})">, @@ -184,4 +199,4 @@ def MIPS_SDP : SDPFormat<(outs), (ins GPR:$rs2, GPR:$rs3, GPR:$rs1, uimm7_lsb000 "mips.sdp", "$rs2, $rs3, ${imm7}(${rs1})">, Sched<[WriteSTD, ReadStoreData, ReadStoreData, ReadMemBase]>; } // mayLoad = 0, mayStore = 1 -} // Predicates = [HasVendorXMIPSLSP], hasSideEffects = 0, DecoderNamespace = "Xmipslsp" +} // Predicates = [HasVendorXMIPSLSP], hasSideEffects = 0, DecoderNamespace = "XMIPS" diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td index 2c64b0c220fb..69796a68ecd6 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXqci.td @@ -22,6 +22,15 @@ def SDT_SetMultiple : SDTypeProfile<0, 4, [SDTCisSameAs<0, 1>, def qc_setwmi : RVSDNode<"QC_SETWMI", SDT_SetMultiple, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; +def qc_insb : RVSDNode<"QC_INSB", SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisVT<0, i32>, + SDTCisInt<3>, + SDTCisInt<4>]>, + []>; + +def qc_e_li : RVSDNode<"QC_E_LI", SDTIntUnaryOp>; + def uimm5nonzero : RISCVOp<XLenVT>, ImmLeaf<XLenVT, [{return (Imm != 0) && isUInt<5>(Imm);}]> { let ParserMatchClass = UImmAsmOperand<5, "NonZero">; @@ -1508,6 +1517,11 @@ def : Pat<(i32 (and GPRNoX0:$rs, 1023)), (QC_EXTU GPRNoX0:$rs, 10, 0)>; def : Pat<(i32 (and GPRNoX0:$rs, 2047)), (QC_EXTU GPRNoX0:$rs, 11, 0)>; def : Pat<(i32 (bitreverse GPRNoX0:$rs1)), (QC_BREV32 GPRNoX0:$rs1)>; + +def : Pat<(qc_insb GPRNoX0:$rd, simm5:$imm5, uimm5_plus1:$width, uimm5:$shamt), + (QC_INSBI GPRNoX0:$rd, simm5:$imm5, uimm5_plus1:$width, uimm5:$shamt)>; +def : Pat<(qc_insb GPRNoX0:$rd, GPR:$rs1, uimm5_plus1:$width, uimm5:$shamt), + (QC_INSB GPRNoX0:$rd, GPR:$rs1, uimm5_plus1:$width, uimm5:$shamt)>; } // Predicates = [HasVendorXqcibm, IsRV32] // If Zbb is enabled sext.b/h is preferred since they are compressible @@ -1605,6 +1619,13 @@ def : Pat<(qc_setwmi GPR:$rs3, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uim (QC_SETWMI GPR:$rs3, GPR:$rs1, tuimm5nonzero:$uimm5, tuimm7_lsb00:$uimm7)>; } // Predicates = [HasVendorXqcilsm, IsRV32] +let Predicates = [HasVendorXqcili, IsRV32] in { +def: Pat<(qc_e_li tglobaladdr:$A), (QC_E_LI bare_simm32:$A)>; +def: Pat<(qc_e_li tblockaddress:$A), (QC_E_LI bare_simm32:$A)>; +def: Pat<(qc_e_li tjumptable:$A), (QC_E_LI bare_simm32:$A)>; +def: Pat<(qc_e_li tconstpool:$A), (QC_E_LI bare_simm32:$A)>; +} // Predicates = [HasVendorXqcili, IsRV32] + //===----------------------------------------------------------------------===/i // Compress Instruction tablegen backend. //===----------------------------------------------------------------------===// @@ -1738,10 +1759,19 @@ def : CompressPat<(QC_E_XORAI GPRNoX0:$rd, simm12:$imm), (XORI GPRNoX0:$rd, GPRNoX0:$rd, simm12:$imm)>; } // let isCompressOnly = true, Predicates = [HasVendorXqcilia, IsRV32] -let Predicates = [HasVendorXqciac, IsRV32] in { +let isCompressOnly = true, Predicates = [HasVendorXqciac, IsRV32] in { def : CompressPat<(QC_MULIADD GPRC:$rd, GPRC:$rs1, uimm5:$imm5), (QC_C_MULIADD GPRC:$rd, GPRC:$rs1, uimm5:$imm5)>; -} +} // isCompressOnly = true, Predicates = [HasVendorXqciac, IsRV32] + +let isCompressOnly = true, Predicates = [HasVendorXqciac, HasStdExtZba, IsRV32] in { +def : CompressPat<(SH1ADD GPRC:$rd, GPRC:$rs1, GPRC:$rd), + (QC_C_MULIADD GPRC:$rd, GPRC:$rs1, 2)>; +def : CompressPat<(SH2ADD GPRC:$rd, GPRC:$rs1, GPRC:$rd), + (QC_C_MULIADD GPRC:$rd, GPRC:$rs1, 4)>; +def : CompressPat<(SH3ADD GPRC:$rd, GPRC:$rs1, GPRC:$rd), + (QC_C_MULIADD GPRC:$rd, GPRC:$rs1, 8)>; +} // isCompressOnly = true, Predicates = [HasVendorXqciac, HasStdExtZba, IsRV32] let isCompressOnly = true, Predicates = [HasVendorXqcibi, IsRV32] in { def : CompressPat<(QC_E_BEQI GPRNoX0:$rs1, simm5nonzero:$imm5, bare_simm13_lsb0:$imm12), diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXwch.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXwch.td index a43cbadf6f30..bb1862cc88d6 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoXwch.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXwch.td @@ -106,6 +106,7 @@ def QK_C_LBUSP : QKStackInst<0b00, (outs GPRC:$rd_rs2), (ins SPMem:$rs1, uimm4:$imm), "qk.c.lbusp", "$rd_rs2, ${imm}(${rs1})">, Sched<[WriteLDB, ReadMemBase]> { + bits<0> rs1; bits<4> imm; let Inst{10-7} = imm; } @@ -115,6 +116,7 @@ def QK_C_SBSP : QKStackInst<0b10, (outs), uimm4:$imm), "qk.c.sbsp", "$rd_rs2, ${imm}(${rs1})">, Sched<[WriteSTB, ReadStoreData, ReadMemBase]> { + bits<0> rs1; bits<4> imm; let Inst{10-7} = imm; } @@ -124,6 +126,7 @@ def QK_C_LHUSP : QKStackInst<0b01, (outs GPRC:$rd_rs2), (ins SPMem:$rs1, uimm5_lsb0:$imm), "qk.c.lhusp", "$rd_rs2, ${imm}(${rs1})">, Sched<[WriteLDH, ReadMemBase]> { + bits<0> rs1; bits<5> imm; let Inst{10-8} = imm{3-1}; let Inst{7} = imm{4}; @@ -133,6 +136,7 @@ def QK_C_SHSP : QKStackInst<0b11, (outs), (ins GPRC:$rd_rs2, SPMem:$rs1, uimm5_lsb0:$imm), "qk.c.shsp", "$rd_rs2, ${imm}(${rs1})">, Sched<[WriteSTH, ReadStoreData, ReadMemBase]> { + bits<0> rs1; bits<5> imm; let Inst{10-8} = imm{3-1}; let Inst{7} = imm{4}; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index 2abd3e613a03..a2b4302e19ed 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -459,15 +459,15 @@ let Predicates = [HasStdExtZba, IsRV64] in { def : InstAlias<"zext.w $rd, $rs", (ADD_UW GPR:$rd, GPR:$rs, X0)>; } // Predicates = [HasStdExtZba, IsRV64] -let Predicates = [HasStdExtZbb] in { +let Predicates = [HasStdExtZbbOrZbkb] in { def : InstAlias<"ror $rd, $rs1, $shamt", - (RORI GPR:$rd, GPR:$rs1, uimmlog2xlen:$shamt), 0>; -} // Predicates = [HasStdExtZbb] + (RORI GPR:$rd, GPR:$rs1, uimmlog2xlen:$shamt), 0>; +} // Predicates = [HasStdExtZbbOrZbkb] -let Predicates = [HasStdExtZbb, IsRV64] in { +let Predicates = [HasStdExtZbbOrZbkb, IsRV64] in { def : InstAlias<"rorw $rd, $rs1, $shamt", - (RORIW GPR:$rd, GPR:$rs1, uimm5:$shamt), 0>; -} // Predicates = [HasStdExtZbb, IsRV64] + (RORIW GPR:$rd, GPR:$rs1, uimm5:$shamt), 0>; +} // Predicates = [HasStdExtZbbOrZbkb, IsRV64] let Predicates = [HasStdExtZbs] in { def : InstAlias<"bset $rd, $rs1, $shamt", diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td index 32e7f962aa2a..76dc027ffd1d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZcmop.td @@ -22,5 +22,5 @@ class CMOPInst<bits<3> imm3, string opcodestr> foreach n = [1, 3, 5, 7, 9, 11, 13, 15] in { let Predicates = [HasStdExtZcmop] in - def C_MOP # n : CMOPInst<!srl(n, 1), "c.mop." # n>, Sched<[]>; + def C_MOP_ # n : CMOPInst<!srl(n, 1), "c.mop." # n>, Sched<[]>; } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td index 49a57f86cccd..50ebaa995197 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZicfiss.td @@ -62,6 +62,21 @@ defm SSAMOSWAP_W : AMO_rr_aq_rl<0b01001, 0b010, "ssamoswap.w">; let Predicates = [HasStdExtZicfiss, IsRV64] in defm SSAMOSWAP_D : AMO_rr_aq_rl<0b01001, 0b011, "ssamoswap.d">; +let Predicates = [HasStdExtZimop] in { +let hasSideEffects = 1, mayLoad = 0, mayStore = 1 in +def PseudoMOP_SSPUSH : Pseudo<(outs), (ins GPRX1X5:$rs2), []>, + PseudoInstExpansion<(MOP_RR_7 X0, X0, GPR:$rs2)>; +let hasSideEffects = 1, mayLoad = 1, mayStore = 0 in +def PseudoMOP_SSPOPCHK : Pseudo<(outs), (ins GPRX1X5:$rs1), []>, + PseudoInstExpansion<(MOP_R_28 X0, GPR:$rs1)>; +} // Predicates = [HasStdExtZimop] + +let Predicates = [HasStdExtZcmop] in { +let Uses = [X1], hasSideEffects = 1, mayLoad = 0, mayStore = 1 in +def PseudoMOP_C_SSPUSH : Pseudo<(outs), (ins), []>, + PseudoInstExpansion<(C_MOP_1)>; +} // Predicates = [HasStdExtZcmop] + //===----------------------------------------------------------------------===/ // Compress Instruction tablegen backend. //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td index 960f5669b488..0d08176f9799 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZimop.td @@ -33,13 +33,13 @@ class RVInstRMoprr<bits<4> imm4, bits<3> imm3, bits<3> funct3, RISCVOpcode opcod } // May-Be-Operations -def riscv_mopr : RVSDNode<"MOPR", - SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>]>>; -def riscv_moprr : RVSDNode<"MOPRR", - SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, - SDTCisSameAs<0, 2>, - SDTCisSameAs<0, 3>]>>; +def riscv_mop_r : RVSDNode<"MOP_R", + SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>]>>; +def riscv_mop_rr : RVSDNode<"MOP_RR", + SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>]>>; let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class RVMopr<bits<7> imm7, bits<5> imm5, bits<3> funct3, @@ -50,31 +50,32 @@ class RVMopr<bits<7> imm7, bits<5> imm5, bits<3> funct3, let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in class RVMoprr<bits<4> imm4, bits<3> imm3, bits<3> funct3, RISCVOpcode opcode, string opcodestr> - : RVInstRMoprr<imm4, imm3, funct3, opcode, (outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2), + : RVInstRMoprr<imm4, imm3, funct3, opcode, (outs GPR:$rd), + (ins GPR:$rs1, GPR:$rs2), opcodestr, "$rd, $rs1, $rs2">; foreach i = 0...31 in { let Predicates = [HasStdExtZimop] in - def MOPR#i : RVMopr<0b1000111, i, 0b100, OPC_SYSTEM, "mop.r."#i>, - Sched<[]>; + def MOP_R_#i : RVMopr<0b1000111, i, 0b100, OPC_SYSTEM, "mop.r."#i>, + Sched<[]>; } foreach i = 0...7 in { let Predicates = [HasStdExtZimop] in - def MOPRR#i : RVMoprr<0b1001, i, 0b100, OPC_SYSTEM, "mop.rr."#i>, + def MOP_RR_#i : RVMoprr<0b1001, i, 0b100, OPC_SYSTEM, "mop.rr."#i>, Sched<[]>; } let Predicates = [HasStdExtZimop] in { // Zimop instructions foreach i = 0...31 in { - def : Pat<(XLenVT (riscv_mopr GPR:$rs1, (XLenVT i))), - (!cast<Instruction>("MOPR"#i) GPR:$rs1)>; + def : Pat<(XLenVT (riscv_mop_r GPR:$rs1, (XLenVT i))), + (!cast<Instruction>("MOP_R_"#i) GPR:$rs1)>; } foreach i = 0...7 in { - def : Pat<(XLenVT (riscv_moprr GPR:$rs1, GPR:$rs2, (XLenVT i))), - (!cast<Instruction>("MOPRR"#i) GPR:$rs1, GPR:$rs2)>; + def : Pat<(XLenVT (riscv_mop_rr GPR:$rs1, GPR:$rs2, (XLenVT i))), + (!cast<Instruction>("MOP_RR_"#i) GPR:$rs1, GPR:$rs2)>; } } // Predicates = [HasStdExtZimop] diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvqdotq.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvqdotq.td index 27959eaccd90..00c4e83e18a0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvqdotq.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvqdotq.td @@ -17,16 +17,39 @@ // Instructions //===----------------------------------------------------------------------===// +class VQDOTVV<bits<6> funct6, RISCVVFormat opv, string opcodestr> + : RVInstVV<funct6, opv, (outs VR:$vd_wb), + (ins VR:$vd, VR:$vs2, VR:$vs1, VMaskOp:$vm), + opcodestr, "$vd, $vs2, $vs1$vm"> { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = "$vd = $vd_wb"; +} + +class VQDOTVX<bits<6> funct6, RISCVVFormat opv, string opcodestr> + : RVInstVX<funct6, opv, (outs VR:$vd_wb), + (ins VR:$vd, VR:$vs2, GPR:$rs1, VMaskOp:$vm), + opcodestr, "$vd, $vs2, $rs1$vm"> { + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let Constraints = "$vd = $vd_wb"; +} + let Predicates = [HasStdExtZvqdotq] in { - def VQDOT_VV : VALUVV<0b101100, OPMVV, "vqdot.vv">; - def VQDOT_VX : VALUVX<0b101100, OPMVX, "vqdot.vx">; - def VQDOTU_VV : VALUVV<0b101000, OPMVV, "vqdotu.vv">; - def VQDOTU_VX : VALUVX<0b101000, OPMVX, "vqdotu.vx">; - def VQDOTSU_VV : VALUVV<0b101010, OPMVV, "vqdotsu.vv">; - def VQDOTSU_VX : VALUVX<0b101010, OPMVX, "vqdotsu.vx">; - def VQDOTUS_VX : VALUVX<0b101110, OPMVX, "vqdotus.vx">; + def VQDOT_VV : VQDOTVV<0b101100, OPMVV, "vqdot.vv">; + def VQDOT_VX : VQDOTVX<0b101100, OPMVX, "vqdot.vx">; + def VQDOTU_VV : VQDOTVV<0b101000, OPMVV, "vqdotu.vv">; + def VQDOTU_VX : VQDOTVX<0b101000, OPMVX, "vqdotu.vx">; + def VQDOTSU_VV : VQDOTVV<0b101010, OPMVV, "vqdotsu.vv">; + def VQDOTSU_VX : VQDOTVX<0b101010, OPMVX, "vqdotsu.vx">; + def VQDOTUS_VX : VQDOTVX<0b101110, OPMVX, "vqdotus.vx">; } // Predicates = [HasStdExtZvqdotq] +//===----------------------------------------------------------------------===// +// Helpers to define the VL patterns. +//===----------------------------------------------------------------------===// let HasPassthruOp = true, HasMaskOp = true in { def riscv_vqdot_vl : RVSDNode<"VQDOT_VL", SDT_RISCVIntBinOp_VL>; @@ -34,6 +57,10 @@ let HasPassthruOp = true, HasMaskOp = true in { def riscv_vqdotsu_vl : RVSDNode<"VQDOTSU_VL", SDT_RISCVIntBinOp_VL>; } // let HasPassthruOp = true, HasMaskOp = true +//===----------------------------------------------------------------------===// +// Pseudo Instructions for CodeGen +//===----------------------------------------------------------------------===// + multiclass VPseudoVQDOT_VV_VX { foreach m = MxSet<32>.m in { defm "" : VPseudoBinaryV_VV<m>, @@ -52,10 +79,69 @@ let Predicates = [HasStdExtZvqdotq], mayLoad = 0, mayStore = 0, defm PseudoVQDOT : VPseudoVQDOT_VV_VX; defm PseudoVQDOTU : VPseudoVQDOT_VV_VX; defm PseudoVQDOTSU : VPseudoVQDOT_VV_VX; + // VQDOTUS does not have a VV variant + foreach m = MxListVF4 in { + defm "PseudoVQDOTUS_VX" : VPseudoTernaryWithPolicy<m.vrclass, m.vrclass, GPR, m>; + } } +//===----------------------------------------------------------------------===// +// Patterns. +//===----------------------------------------------------------------------===// + defvar AllE32Vectors = [VI32MF2, VI32M1, VI32M2, VI32M4, VI32M8]; defm : VPatBinaryVL_VV_VX<riscv_vqdot_vl, "PseudoVQDOT", AllE32Vectors>; defm : VPatBinaryVL_VV_VX<riscv_vqdotu_vl, "PseudoVQDOTU", AllE32Vectors>; defm : VPatBinaryVL_VV_VX<riscv_vqdotsu_vl, "PseudoVQDOTSU", AllE32Vectors>; +// These VPat definitions are for vqdot because they have a different operand +// order with other ternary instructions (i.e. vop.vx vd, vs2, rs1) +multiclass VPatTernaryV_VX_AABX<string intrinsic, string instruction, + list<VTypeInfoToWide> info_pairs> { + foreach pair = info_pairs in { + defvar VdInfo = pair.Wti; + defvar Vs2Info = pair.Vti; + let Predicates = GetVTypePredicates<VdInfo>.Predicates in + defm : VPatTernaryWithPolicy<intrinsic, instruction, + "V"#VdInfo.ScalarSuffix, + VdInfo.Vector, Vs2Info.Vector, Vs2Info.Scalar, + VdInfo.Mask, VdInfo.Log2SEW, VdInfo.LMul, + VdInfo.RegClass, Vs2Info.RegClass, + Vs2Info.ScalarRegClass>; + } +} + +multiclass VPatTernaryV_VV_AABX<string intrinsic, string instruction, + list<VTypeInfoToWide> info_pairs> { + foreach pair = info_pairs in { + defvar VdInfo = pair.Wti; + defvar Vs2Info = pair.Vti; + let Predicates = GetVTypePredicates<VdInfo>.Predicates in + defm : VPatTernaryWithPolicy<intrinsic, instruction, + "VV", + VdInfo.Vector, Vs2Info.Vector, Vs2Info.Vector, + VdInfo.Mask, VdInfo.Log2SEW, VdInfo.LMul, + VdInfo.RegClass, Vs2Info.RegClass, + Vs2Info.RegClass>; + } +} + +multiclass VPatTernaryV_VV_VX_AABX<string intrinsic, string instruction, + list<VTypeInfoToWide> info_pairs> + : VPatTernaryV_VV_AABX<intrinsic, instruction, info_pairs>, + VPatTernaryV_VX_AABX<intrinsic, instruction, info_pairs>; + +defset list<VTypeInfoToWide> VQDOTInfoPairs = { + def : VTypeInfoToWide<VI8MF2, VI32MF2>; + def : VTypeInfoToWide<VI8M1, VI32M1>; + def : VTypeInfoToWide<VI8M2, VI32M2>; + def : VTypeInfoToWide<VI8M4, VI32M4>; + def : VTypeInfoToWide<VI8M8, VI32M8>; +} + +let Predicates = [HasStdExtZvqdotq] in { + defm : VPatTernaryV_VV_VX_AABX<"int_riscv_vqdot", "PseudoVQDOT", VQDOTInfoPairs>; + defm : VPatTernaryV_VV_VX_AABX<"int_riscv_vqdotu", "PseudoVQDOTU", VQDOTInfoPairs>; + defm : VPatTernaryV_VV_VX_AABX<"int_riscv_vqdotsu", "PseudoVQDOTSU", VQDOTInfoPairs>; + defm : VPatTernaryV_VX_AABX<"int_riscv_vqdotus", "PseudoVQDOTUS", VQDOTInfoPairs>; +} diff --git a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td index 4abe62f4e874..06309262f1b0 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrPredicates.td +++ b/llvm/lib/Target/RISCV/RISCVInstrPredicates.td @@ -148,6 +148,14 @@ def isNonZeroLoadImmediate CheckNot<CheckImmOperand<2, 0>> ]>>>; +def isLPAD + : TIIPredicate<"isLPAD", + MCReturnStatement<CheckAll<[ + CheckOpcode<[AUIPC]>, + CheckIsRegOperand<0>, + CheckRegOperand<0, X0>, + ]>>>; + def ignoresVXRM : TIIPredicate<"ignoresVXRM", MCOpcodeSwitchStatement< diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp index c7b96f5c3d0c..5e1063155ba0 100644 --- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp +++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp @@ -81,6 +81,12 @@ static const Intrinsic::ID FixedVssegIntrIds[] = { Intrinsic::riscv_seg6_store_mask, Intrinsic::riscv_seg7_store_mask, Intrinsic::riscv_seg8_store_mask}; +static const Intrinsic::ID FixedVsssegIntrIds[] = { + Intrinsic::riscv_sseg2_store_mask, Intrinsic::riscv_sseg3_store_mask, + Intrinsic::riscv_sseg4_store_mask, Intrinsic::riscv_sseg5_store_mask, + Intrinsic::riscv_sseg6_store_mask, Intrinsic::riscv_sseg7_store_mask, + Intrinsic::riscv_sseg8_store_mask}; + static const Intrinsic::ID ScalableVssegIntrIds[] = { Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask, Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask, @@ -275,7 +281,16 @@ bool RISCVTargetLowering::lowerInterleavedLoad( bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store, Value *LaneMask, ShuffleVectorInst *SVI, - unsigned Factor) const { + unsigned Factor, + const APInt &GapMask) const { + assert(GapMask.getBitWidth() == Factor); + + // We only support cases where the skipped fields are the trailing ones. + // TODO: Lower to strided store if there is only a single active field. + unsigned MaskFactor = GapMask.popcount(); + if (MaskFactor < 2 || !GapMask.isMask()) + return false; + IRBuilder<> Builder(Store); const DataLayout &DL = Store->getDataLayout(); auto Mask = SVI->getShuffleMask(); @@ -287,21 +302,31 @@ bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store, Value *Ptr, *VL; Align Alignment; - if (!getMemOperands(Factor, VTy, XLenTy, Store, Ptr, LaneMask, VL, Alignment)) + if (!getMemOperands(MaskFactor, VTy, XLenTy, Store, Ptr, LaneMask, VL, + Alignment)) return false; Type *PtrTy = Ptr->getType(); unsigned AS = PtrTy->getPointerAddressSpace(); - if (!isLegalInterleavedAccessType(VTy, Factor, Alignment, AS, DL)) + if (!isLegalInterleavedAccessType(VTy, MaskFactor, Alignment, AS, DL)) return false; - Function *VssegNFunc = Intrinsic::getOrInsertDeclaration( - Store->getModule(), FixedVssegIntrIds[Factor - 2], {VTy, PtrTy, XLenTy}); + Function *SegStoreFunc; + if (MaskFactor < Factor) + // Strided segmented store. + SegStoreFunc = Intrinsic::getOrInsertDeclaration( + Store->getModule(), FixedVsssegIntrIds[MaskFactor - 2], + {VTy, PtrTy, XLenTy, XLenTy}); + else + // Normal segmented store. + SegStoreFunc = Intrinsic::getOrInsertDeclaration( + Store->getModule(), FixedVssegIntrIds[Factor - 2], + {VTy, PtrTy, XLenTy}); SmallVector<Value *, 10> Ops; SmallVector<int, 16> NewShuffleMask; - for (unsigned i = 0; i < Factor; i++) { + for (unsigned i = 0; i < MaskFactor; i++) { // Collect shuffle mask for this lane. for (unsigned j = 0; j < VTy->getNumElements(); j++) NewShuffleMask.push_back(Mask[i + Factor * j]); @@ -312,8 +337,14 @@ bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store, NewShuffleMask.clear(); } - Ops.append({Ptr, LaneMask, VL}); - Builder.CreateCall(VssegNFunc, Ops); + Ops.push_back(Ptr); + if (MaskFactor < Factor) { + // Insert the stride argument. + unsigned ScalarSizeInBytes = DL.getTypeStoreSize(VTy->getElementType()); + Ops.push_back(ConstantInt::get(XLenTy, Factor * ScalarSizeInBytes)); + } + Ops.append({LaneMask, VL}); + Builder.CreateCall(SegStoreFunc, Ops); return true; } diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp index 3b19c3456ad6..d08115b72977 100644 --- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp +++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp @@ -356,6 +356,14 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI, return false; Worklist.emplace_back(UserMI, Bits); break; + case RISCV::TH_EXT: + case RISCV::TH_EXTU: + unsigned Msb = UserMI->getOperand(2).getImm(); + unsigned Lsb = UserMI->getOperand(3).getImm(); + // Behavior of Msb < Lsb is not well documented. + if (Msb >= Lsb && Bits > Msb) + break; + return false; } } } @@ -409,6 +417,16 @@ static bool isSignExtendingOpW(const MachineInstr &MI, unsigned OpNo) { assert(Log2SEW >= 3 && Log2SEW <= 6 && "Unexpected Log2SEW"); return Log2SEW <= 5; } + case RISCV::TH_EXT: { + unsigned Msb = MI.getOperand(2).getImm(); + unsigned Lsb = MI.getOperand(3).getImm(); + return Msb >= Lsb && (Msb - Lsb + 1) <= 32; + } + case RISCV::TH_EXTU: { + unsigned Msb = MI.getOperand(2).getImm(); + unsigned Lsb = MI.getOperand(3).getImm(); + return Msb >= Lsb && (Msb - Lsb + 1) < 32; + } } return false; @@ -519,9 +537,11 @@ static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST, case RISCV::ANDI: case RISCV::ORI: case RISCV::XORI: + case RISCV::SRAI: // |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R. // DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1 // Logical operations use a sign extended 12-bit immediate. + // Arithmetic shift right can only increase the number of sign bits. if (!AddRegToWorkList(MI->getOperand(1).getReg())) return false; @@ -556,6 +576,9 @@ static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST, case RISCV::PseudoCCAND: case RISCV::PseudoCCOR: case RISCV::PseudoCCXOR: + case RISCV::PseudoCCANDN: + case RISCV::PseudoCCORN: + case RISCV::PseudoCCXNOR: case RISCV::PHI: { // If all incoming values are sign-extended, the output of AND, OR, XOR, // MIN, MAX, or PHI is also sign-extended. @@ -578,6 +601,9 @@ static bool isSignExtendedW(Register SrcReg, const RISCVSubtarget &ST, case RISCV::PseudoCCAND: case RISCV::PseudoCCOR: case RISCV::PseudoCCXOR: + case RISCV::PseudoCCANDN: + case RISCV::PseudoCCORN: + case RISCV::PseudoCCXNOR: B = 4; E = 7; break; diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index f89d94f41b69..36d63ed23b92 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -121,7 +121,8 @@ def MIPS_P8700 : RISCVProcessorModel<"mips-p8700", FeatureStdExtZicsr, FeatureVendorXMIPSCMov, FeatureVendorXMIPSLSP, - FeatureVendorXMIPSCBOP], + FeatureVendorXMIPSCBOP, + FeatureVendorXMIPSEXECTL], [TuneMIPSP8700]>; def ROCKET_RV32 : RISCVProcessorModel<"rocket-rv32", diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp index f3966a55ce7d..40b641680b2c 100644 --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -966,7 +966,9 @@ bool RISCVRegisterInfo::getRegAllocationHints( } } - // Add a hint if it would allow auipc/lui+addi(w) fusion. + // Add a hint if it would allow auipc/lui+addi(w) fusion. We do this even + // without the fusions explicitly enabled as the impact is rarely negative + // and some cores do implement this fusion. if ((MI.getOpcode() == RISCV::ADDIW || MI.getOpcode() == RISCV::ADDI) && MI.getOperand(1).isReg()) { const MachineBasicBlock &MBB = *MI.getParent(); @@ -974,9 +976,7 @@ bool RISCVRegisterInfo::getRegAllocationHints( // Is the previous instruction a LUI or AUIPC that can be fused? if (I != MBB.begin()) { I = skipDebugInstructionsBackward(std::prev(I), MBB.begin()); - if (((I->getOpcode() == RISCV::LUI && Subtarget.hasLUIADDIFusion()) || - (I->getOpcode() == RISCV::AUIPC && - Subtarget.hasAUIPCADDIFusion())) && + if ((I->getOpcode() == RISCV::LUI || I->getOpcode() == RISCV::AUIPC) && I->getOperand(0).getReg() == MI.getOperand(1).getReg()) { if (OpIdx == 0) tryAddHint(MO, MI.getOperand(1), /*NeedGPRC=*/false); diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index fd57e02c25d0..50e76df56e57 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -186,6 +186,12 @@ public: return HasStdExtZfhmin || HasStdExtZfbfmin; } + bool hasBEXTILike() const { return HasStdExtZbs || HasVendorXTHeadBs; } + + bool hasCZEROLike() const { + return HasStdExtZicond || HasVendorXVentanaCondOps; + } + bool hasConditionalMoveFusion() const { // Do we support fusing a branch+mv or branch+c.mv as a conditional move. return (hasConditionalCompressedMoveFusion() && hasStdExtZca()) || diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index d70b1d0dc8d5..460bb33f2553 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -652,7 +652,8 @@ void RISCVPassConfig::addPostRegAlloc() { void RISCVTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { PB.registerLateLoopOptimizationsEPCallback([=](LoopPassManager &LPM, OptimizationLevel Level) { - LPM.addPass(LoopIdiomVectorizePass(LoopIdiomVectorizeStyle::Predicated)); + if (Level != OptimizationLevel::O0) + LPM.addPass(LoopIdiomVectorizePass(LoopIdiomVectorizeStyle::Predicated)); }); } diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index c707fb110b10..1ca513214f67 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -1566,6 +1566,18 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, return BaseT::getIntrinsicInstrCost(ICA, CostKind); } +InstructionCost +RISCVTTIImpl::getAddressComputationCost(Type *PtrTy, ScalarEvolution *SE, + const SCEV *Ptr, + TTI::TargetCostKind CostKind) const { + // Address computations for vector indexed load/store likely require an offset + // and/or scaling. + if (ST->hasVInstructions() && PtrTy->isVectorTy()) + return getArithmeticInstrCost(Instruction::Add, PtrTy, CostKind); + + return BaseT::getAddressComputationCost(PtrTy, SE, Ptr, CostKind); +} + InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, @@ -2731,6 +2743,10 @@ unsigned RISCVTTIImpl::getMinTripCountTailFoldingThreshold() const { return RVVMinTripCount; } +bool RISCVTTIImpl::preferAlternateOpcodeVectorization() const { + return ST->enableUnalignedVectorMem(); +} + TTI::AddressingModeKind RISCVTTIImpl::getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const { diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index 3236b2a35c85..6bd7d51daff6 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -132,7 +132,7 @@ public: unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override; - bool preferAlternateOpcodeVectorization() const override { return false; } + bool preferAlternateOpcodeVectorization() const override; bool preferEpilogueVectorization() const override { // Epilogue vectorization is usually unprofitable - tail folding or @@ -177,6 +177,10 @@ public: getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const override; + InstructionCost + getAddressComputationCost(Type *PTy, ScalarEvolution *SE, const SCEV *Ptr, + TTI::TargetCostKind CostKind) const override; + InstructionCost getInterleavedMemoryOpCost( unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp index 53557049ea33..29526cf5a527 100644 --- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp +++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp @@ -178,8 +178,20 @@ static unsigned getIntegerExtensionOperandEEW(unsigned Factor, return Log2EEW; } -static std::optional<unsigned> -getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { +#define VSEG_CASES(Prefix, EEW) \ + RISCV::Prefix##SEG2E##EEW##_V: \ + case RISCV::Prefix##SEG3E##EEW##_V: \ + case RISCV::Prefix##SEG4E##EEW##_V: \ + case RISCV::Prefix##SEG5E##EEW##_V: \ + case RISCV::Prefix##SEG6E##EEW##_V: \ + case RISCV::Prefix##SEG7E##EEW##_V: \ + case RISCV::Prefix##SEG8E##EEW##_V +#define VSSEG_CASES(EEW) VSEG_CASES(VS, EEW) +#define VSSSEG_CASES(EEW) VSEG_CASES(VSS, EEW) +#define VSUXSEG_CASES(EEW) VSEG_CASES(VSUX, I##EEW) +#define VSOXSEG_CASES(EEW) VSEG_CASES(VSOX, I##EEW) + +static std::optional<unsigned> getOperandLog2EEW(const MachineOperand &MO) { const MachineInstr &MI = *MO.getParent(); const MCInstrDesc &Desc = MI.getDesc(); const RISCVVPseudosTable::PseudoInfo *RVV = @@ -225,21 +237,29 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { case RISCV::VSE8_V: case RISCV::VLSE8_V: case RISCV::VSSE8_V: + case VSSEG_CASES(8): + case VSSSEG_CASES(8): return 3; case RISCV::VLE16_V: case RISCV::VSE16_V: case RISCV::VLSE16_V: case RISCV::VSSE16_V: + case VSSEG_CASES(16): + case VSSSEG_CASES(16): return 4; case RISCV::VLE32_V: case RISCV::VSE32_V: case RISCV::VLSE32_V: case RISCV::VSSE32_V: + case VSSEG_CASES(32): + case VSSSEG_CASES(32): return 5; case RISCV::VLE64_V: case RISCV::VSE64_V: case RISCV::VLSE64_V: case RISCV::VSSE64_V: + case VSSEG_CASES(64): + case VSSSEG_CASES(64): return 6; // Vector Indexed Instructions @@ -248,7 +268,9 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { case RISCV::VLUXEI8_V: case RISCV::VLOXEI8_V: case RISCV::VSUXEI8_V: - case RISCV::VSOXEI8_V: { + case RISCV::VSOXEI8_V: + case VSUXSEG_CASES(8): + case VSOXSEG_CASES(8): { if (MO.getOperandNo() == 0) return MILog2SEW; return 3; @@ -256,7 +278,9 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { case RISCV::VLUXEI16_V: case RISCV::VLOXEI16_V: case RISCV::VSUXEI16_V: - case RISCV::VSOXEI16_V: { + case RISCV::VSOXEI16_V: + case VSUXSEG_CASES(16): + case VSOXSEG_CASES(16): { if (MO.getOperandNo() == 0) return MILog2SEW; return 4; @@ -264,7 +288,9 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { case RISCV::VLUXEI32_V: case RISCV::VLOXEI32_V: case RISCV::VSUXEI32_V: - case RISCV::VSOXEI32_V: { + case RISCV::VSOXEI32_V: + case VSUXSEG_CASES(32): + case VSOXSEG_CASES(32): { if (MO.getOperandNo() == 0) return MILog2SEW; return 5; @@ -272,7 +298,9 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { case RISCV::VLUXEI64_V: case RISCV::VLOXEI64_V: case RISCV::VSUXEI64_V: - case RISCV::VSOXEI64_V: { + case RISCV::VSOXEI64_V: + case VSUXSEG_CASES(64): + case VSOXSEG_CASES(64): { if (MO.getOperandNo() == 0) return MILog2SEW; return 6; @@ -422,9 +450,6 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { case RISCV::VRGATHER_VI: case RISCV::VRGATHER_VV: case RISCV::VRGATHER_VX: - // Vector Compress Instruction - // EEW=SEW. - case RISCV::VCOMPRESS_VM: // Vector Element Index Instruction case RISCV::VID_V: // Vector Single-Width Floating-Point Add/Subtract Instructions @@ -674,6 +699,12 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { return MILog2SEW; } + // Vector Compress Instruction + // EEW=SEW, except the mask operand has EEW=1. Mask operand is not handled + // before this switch. + case RISCV::VCOMPRESS_VM: + return MO.getOperandNo() == 3 ? 0 : MILog2SEW; + // Vector Iota Instruction // EEW=SEW, except the mask operand has EEW=1. Mask operand is not handled // before this switch. @@ -778,14 +809,13 @@ getOperandLog2EEW(const MachineOperand &MO, const MachineRegisterInfo *MRI) { } } -static std::optional<OperandInfo> -getOperandInfo(const MachineOperand &MO, const MachineRegisterInfo *MRI) { +static std::optional<OperandInfo> getOperandInfo(const MachineOperand &MO) { const MachineInstr &MI = *MO.getParent(); const RISCVVPseudosTable::PseudoInfo *RVV = RISCVVPseudosTable::getPseudoInfo(MI.getOpcode()); assert(RVV && "Could not find MI in PseudoTable"); - std::optional<unsigned> Log2EEW = getOperandLog2EEW(MO, MRI); + std::optional<unsigned> Log2EEW = getOperandLog2EEW(MO); if (!Log2EEW) return std::nullopt; @@ -900,13 +930,6 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VSEXT_VF4: case RISCV::VZEXT_VF8: case RISCV::VSEXT_VF8: - // Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions - // FIXME: Add support - case RISCV::VMADC_VV: - case RISCV::VMADC_VI: - case RISCV::VMADC_VX: - case RISCV::VMSBC_VV: - case RISCV::VMSBC_VX: // Vector Narrowing Integer Right Shift Instructions case RISCV::VNSRL_WX: case RISCV::VNSRL_WI: @@ -993,6 +1016,11 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VSBC_VXM: case RISCV::VMSBC_VVM: case RISCV::VMSBC_VXM: + case RISCV::VMADC_VV: + case RISCV::VMADC_VI: + case RISCV::VMADC_VX: + case RISCV::VMSBC_VV: + case RISCV::VMSBC_VX: // Vector Widening Integer Multiply-Add Instructions case RISCV::VWMACCU_VV: case RISCV::VWMACCU_VX: @@ -1001,10 +1029,7 @@ static bool isSupportedInstr(const MachineInstr &MI) { case RISCV::VWMACCSU_VV: case RISCV::VWMACCSU_VX: case RISCV::VWMACCUS_VX: - // Vector Integer Merge Instructions - // FIXME: Add support // Vector Integer Move Instructions - // FIXME: Add support case RISCV::VMV_V_I: case RISCV::VMV_V_X: case RISCV::VMV_V_V: @@ -1306,7 +1331,8 @@ bool RISCVVLOptimizer::isCandidate(const MachineInstr &MI) const { // TODO: Use a better approach than a white-list, such as adding // properties to instructions using something like TSFlags. if (!isSupportedInstr(MI)) { - LLVM_DEBUG(dbgs() << "Not a candidate due to unsupported instruction\n"); + LLVM_DEBUG(dbgs() << "Not a candidate due to unsupported instruction: " + << MI); return false; } @@ -1328,14 +1354,14 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const { const MCInstrDesc &Desc = UserMI.getDesc(); if (!RISCVII::hasVLOp(Desc.TSFlags) || !RISCVII::hasSEWOp(Desc.TSFlags)) { - LLVM_DEBUG(dbgs() << " Abort due to lack of VL, assume that" + LLVM_DEBUG(dbgs() << " Abort due to lack of VL, assume that" " use VLMAX\n"); return std::nullopt; } if (RISCVII::readsPastVL( TII->get(RISCV::getRVVMCOpcode(UserMI.getOpcode())).TSFlags)) { - LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n"); + LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n"); return std::nullopt; } @@ -1352,7 +1378,7 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const { RISCVII::isFirstDefTiedToFirstUse(UserMI.getDesc())); auto DemandedVL = DemandedVLs.lookup(&UserMI); if (!DemandedVL || !RISCV::isVLKnownLE(*DemandedVL, VLOp)) { - LLVM_DEBUG(dbgs() << " Abort because user is passthru in " + LLVM_DEBUG(dbgs() << " Abort because user is passthru in " "instruction with demanded tail\n"); return std::nullopt; } @@ -1376,6 +1402,54 @@ RISCVVLOptimizer::getMinimumVLForUser(const MachineOperand &UserOp) const { return VLOp; } +/// Return true if MI is an instruction used for assembling registers +/// for segmented store instructions, namely, RISCVISD::TUPLE_INSERT. +/// Currently it's lowered to INSERT_SUBREG. +static bool isTupleInsertInstr(const MachineInstr &MI) { + if (!MI.isInsertSubreg()) + return false; + + const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo(); + const TargetRegisterClass *DstRC = MRI.getRegClass(MI.getOperand(0).getReg()); + const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); + if (!RISCVRI::isVRegClass(DstRC->TSFlags)) + return false; + unsigned NF = RISCVRI::getNF(DstRC->TSFlags); + if (NF < 2) + return false; + + // Check whether INSERT_SUBREG has the correct subreg index for tuple inserts. + auto VLMul = RISCVRI::getLMul(DstRC->TSFlags); + unsigned SubRegIdx = MI.getOperand(3).getImm(); + [[maybe_unused]] auto [LMul, IsFractional] = RISCVVType::decodeVLMUL(VLMul); + assert(!IsFractional && "unexpected LMUL for tuple register classes"); + return TRI->getSubRegIdxSize(SubRegIdx) == RISCV::RVVBitsPerBlock * LMul; +} + +static bool isSegmentedStoreInstr(const MachineInstr &MI) { + switch (RISCV::getRVVMCOpcode(MI.getOpcode())) { + case VSSEG_CASES(8): + case VSSSEG_CASES(8): + case VSUXSEG_CASES(8): + case VSOXSEG_CASES(8): + case VSSEG_CASES(16): + case VSSSEG_CASES(16): + case VSUXSEG_CASES(16): + case VSOXSEG_CASES(16): + case VSSEG_CASES(32): + case VSSSEG_CASES(32): + case VSUXSEG_CASES(32): + case VSOXSEG_CASES(32): + case VSSEG_CASES(64): + case VSSSEG_CASES(64): + case VSUXSEG_CASES(64): + case VSOXSEG_CASES(64): + return true; + default: + return false; + } +} + std::optional<MachineOperand> RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { std::optional<MachineOperand> CommonVL; @@ -1396,6 +1470,23 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { continue; } + if (isTupleInsertInstr(UserMI)) { + LLVM_DEBUG(dbgs().indent(4) << "Peeking through uses of INSERT_SUBREG\n"); + for (MachineOperand &UseOp : + MRI->use_operands(UserMI.getOperand(0).getReg())) { + const MachineInstr &CandidateMI = *UseOp.getParent(); + // We should not propagate the VL if the user is not a segmented store + // or another INSERT_SUBREG, since VL just works differently + // between segmented operations (per-field) v.s. other RVV ops (on the + // whole register group). + if (!isTupleInsertInstr(CandidateMI) && + !isSegmentedStoreInstr(CandidateMI)) + return std::nullopt; + Worklist.insert(&UseOp); + } + continue; + } + if (UserMI.isPHI()) { // Don't follow PHI cycles if (!PHISeen.insert(&UserMI).second) @@ -1425,9 +1516,8 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { return std::nullopt; } - std::optional<OperandInfo> ConsumerInfo = getOperandInfo(UserOp, MRI); - std::optional<OperandInfo> ProducerInfo = - getOperandInfo(MI.getOperand(0), MRI); + std::optional<OperandInfo> ConsumerInfo = getOperandInfo(UserOp); + std::optional<OperandInfo> ProducerInfo = getOperandInfo(MI.getOperand(0)); if (!ConsumerInfo || !ProducerInfo) { LLVM_DEBUG(dbgs() << " Abort due to unknown operand information.\n"); LLVM_DEBUG(dbgs() << " ConsumerInfo is: " << ConsumerInfo << "\n"); @@ -1449,7 +1539,7 @@ RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const { } bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const { - LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI << "\n"); + LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI); unsigned VLOpNum = RISCVII::getVLOpNum(MI.getDesc()); MachineOperand &VLOp = MI.getOperand(VLOpNum); @@ -1468,14 +1558,23 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const { assert((CommonVL->isImm() || CommonVL->getReg().isVirtual()) && "Expected VL to be an Imm or virtual Reg"); + // If the VL is defined by a vleff that doesn't dominate MI, try using the + // vleff's AVL. It will be greater than or equal to the output VL. + if (CommonVL->isReg()) { + const MachineInstr *VLMI = MRI->getVRegDef(CommonVL->getReg()); + if (RISCVInstrInfo::isFaultOnlyFirstLoad(*VLMI) && + !MDT->dominates(VLMI, &MI)) + CommonVL = VLMI->getOperand(RISCVII::getVLOpNum(VLMI->getDesc())); + } + if (!RISCV::isVLKnownLE(*CommonVL, VLOp)) { - LLVM_DEBUG(dbgs() << " Abort due to CommonVL not <= VLOp.\n"); + LLVM_DEBUG(dbgs() << " Abort due to CommonVL not <= VLOp.\n"); return false; } if (CommonVL->isIdenticalTo(VLOp)) { LLVM_DEBUG( - dbgs() << " Abort due to CommonVL == VLOp, no point in reducing.\n"); + dbgs() << " Abort due to CommonVL == VLOp, no point in reducing.\n"); return false; } @@ -1486,8 +1585,10 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const { return true; } const MachineInstr *VLMI = MRI->getVRegDef(CommonVL->getReg()); - if (!MDT->dominates(VLMI, &MI)) + if (!MDT->dominates(VLMI, &MI)) { + LLVM_DEBUG(dbgs() << " Abort due to VL not dominating.\n"); return false; + } LLVM_DEBUG( dbgs() << " Reduce VL from " << VLOp << " to " << printReg(CommonVL->getReg(), MRI->getTargetRegisterInfo()) diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp index 050de3d58a2f..62651185137c 100644 --- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp +++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp @@ -745,12 +745,24 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const { if (PassthruReg && !isKnownSameDefs(PassthruReg, FalseReg)) return false; + std::optional<std::pair<unsigned, unsigned>> NeedsCommute; + // If True has a passthru operand then it needs to be the same as vmerge's // False, since False will be used for the result's passthru operand. Register TruePassthru = True.getOperand(True.getNumExplicitDefs()).getReg(); if (RISCVII::isFirstDefTiedToFirstUse(True.getDesc()) && TruePassthru && - !isKnownSameDefs(TruePassthru, FalseReg)) - return false; + !isKnownSameDefs(TruePassthru, FalseReg)) { + // If True's passthru != False, check if it uses False in another operand + // and try to commute it. + int OtherIdx = True.findRegisterUseOperandIdx(FalseReg, TRI); + if (OtherIdx == -1) + return false; + unsigned OpIdx1 = OtherIdx; + unsigned OpIdx2 = True.getNumExplicitDefs(); + if (!TII->findCommutedOpIndices(True, OpIdx1, OpIdx2)) + return false; + NeedsCommute = {OpIdx1, OpIdx2}; + } // Make sure it doesn't raise any observable fp exceptions, since changing the // active elements will affect how fflags is set. @@ -796,6 +808,14 @@ bool RISCVVectorPeephole::foldVMergeToMask(MachineInstr &MI) const { if (!ensureDominates(MaskOp, True)) return false; + if (NeedsCommute) { + auto [OpIdx1, OpIdx2] = *NeedsCommute; + [[maybe_unused]] bool Commuted = + TII->commuteInstruction(True, /*NewMI=*/false, OpIdx1, OpIdx2); + assert(Commuted && "Failed to commute True?"); + Info = RISCV::lookupMaskedIntrinsicByUnmasked(True.getOpcode()); + } + True.setDesc(TII->get(Info->MaskedPseudo)); // Insert the mask operand. |
