summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombine.td14
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp71
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h13
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp3
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp10
-rw-r--r--llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp8
-rw-r--r--llvm/lib/Target/AMDGPU/BUFInstructions.td8
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp25
-rw-r--r--llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h1
-rw-r--r--llvm/lib/Target/AMDGPU/MIMGInstructions.td66
-rw-r--r--llvm/lib/Target/AMDGPU/SIFoldOperands.cpp15
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.cpp114
-rw-r--r--llvm/lib/Target/AMDGPU/SIISelLowering.h4
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp44
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstructions.td21
-rw-r--r--llvm/lib/Target/AMDGPU/SIRegisterInfo.td47
-rw-r--r--llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp5
-rw-r--r--llvm/lib/Target/AMDGPU/SMInstructions.td84
-rw-r--r--llvm/lib/Target/AMDGPU/VOP1Instructions.td20
-rw-r--r--llvm/lib/Target/AMDGPU/VOP3Instructions.td20
-rw-r--r--llvm/lib/Target/AMDGPU/VOPCInstructions.td132
-rw-r--r--llvm/lib/Target/AMDGPU/VOPInstructions.td4
26 files changed, 448 insertions, 290 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index e844904ebd1e..0f9798838d7f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -1523,7 +1523,8 @@ Value *AMDGPUCodeGenPrepareImpl::shrinkDivRem64(IRBuilder<> &Builder,
bool IsDiv = Opc == Instruction::SDiv || Opc == Instruction::UDiv;
bool IsSigned = Opc == Instruction::SDiv || Opc == Instruction::SRem;
- int NumDivBits = getDivNumBits(I, Num, Den, 32, IsSigned);
+ unsigned BitWidth = Num->getType()->getScalarSizeInBits();
+ int NumDivBits = getDivNumBits(I, Num, Den, BitWidth - 32, IsSigned);
if (NumDivBits == -1)
return nullptr;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
index 985fa8f1deff..da47aaf8a3b5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombine.td
@@ -124,6 +124,16 @@ def sign_extension_in_reg : GICombineRule<
[{ return matchCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }]),
(apply [{ applyCombineSignExtendInReg(*${sign_inreg}, ${matchinfo}); }])>;
+// Do the following combines :
+// fmul x, select(y, A, B) -> fldexp (x, select i32 (y, a, b))
+// fmul x, select(y, -A, -B) -> fldexp ((fneg x), select i32 (y, a, b))
+def combine_fmul_with_select_to_fldexp : GICombineRule<
+ (defs root:$root, build_fn_matchinfo:$matchinfo),
+ (match (G_FMUL $dst, $x, $select):$root,
+ (G_SELECT $select, $y, $A, $B):$sel,
+ [{ return Helper.matchCombineFmulWithSelectToFldexp(*${root}, *${sel}, ${matchinfo}); }]),
+ (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
let Predicates = [Has16BitInsts, NotHasMed3_16] in {
// For gfx8, expand f16-fmed3-as-f32 into a min/max f16 sequence. This
@@ -153,13 +163,13 @@ def gfx8_combines : GICombineGroup<[expand_promoted_fmed3]>;
def AMDGPUPreLegalizerCombiner: GICombiner<
"AMDGPUPreLegalizerCombinerImpl",
- [all_combines, clamp_i64_to_i16, foldable_fneg]> {
+ [all_combines, combine_fmul_with_select_to_fldexp, clamp_i64_to_i16, foldable_fneg]> {
let CombineAllMethodName = "tryCombineAllImpl";
}
def AMDGPUPostLegalizerCombiner: GICombiner<
"AMDGPUPostLegalizerCombinerImpl",
- [all_combines, gfx6gfx7_combines, gfx8_combines,
+ [all_combines, gfx6gfx7_combines, gfx8_combines, combine_fmul_with_select_to_fldexp,
uchar_to_float, cvt_f32_ubyteN, remove_fcanonicalize, foldable_fneg,
rcp_sqrt_to_rsq, fdiv_by_sqrt_to_rsq_f16, sign_extension_in_reg, smulu64]> {
let CombineAllMethodName = "tryCombineAllImpl";
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
index e5a376ab7357..f6f9f4bc0fb1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.cpp
@@ -17,6 +17,13 @@
using namespace llvm;
using namespace MIPatternMatch;
+AMDGPUCombinerHelper::AMDGPUCombinerHelper(
+ GISelChangeObserver &Observer, MachineIRBuilder &B, bool IsPreLegalize,
+ GISelKnownBits *KB, MachineDominatorTree *MDT, const LegalizerInfo *LI,
+ const GCNSubtarget &STI)
+ : CombinerHelper(Observer, B, IsPreLegalize, KB, MDT, LI), STI(STI),
+ TII(*STI.getInstrInfo()) {}
+
LLVM_READNONE
static bool fnegFoldsIntoMI(const MachineInstr &MI) {
switch (MI.getOpcode()) {
@@ -445,3 +452,67 @@ void AMDGPUCombinerHelper::applyExpandPromotedF16FMed3(MachineInstr &MI,
Builder.buildFMinNumIEEE(MI.getOperand(0), B1, C1);
MI.eraseFromParent();
}
+
+bool AMDGPUCombinerHelper::matchCombineFmulWithSelectToFldexp(
+ MachineInstr &MI, MachineInstr &Sel,
+ std::function<void(MachineIRBuilder &)> &MatchInfo) {
+ assert(MI.getOpcode() == TargetOpcode::G_FMUL);
+ assert(Sel.getOpcode() == TargetOpcode::G_SELECT);
+ assert(MI.getOperand(2).getReg() == Sel.getOperand(0).getReg());
+
+ Register Dst = MI.getOperand(0).getReg();
+ LLT DestTy = MRI.getType(Dst);
+ LLT ScalarDestTy = DestTy.getScalarType();
+
+ if ((ScalarDestTy != LLT::float64() && ScalarDestTy != LLT::float32() &&
+ ScalarDestTy != LLT::float16()) ||
+ !MRI.hasOneNonDBGUse(Sel.getOperand(0).getReg()))
+ return false;
+
+ Register SelectCondReg = Sel.getOperand(1).getReg();
+ MachineInstr *SelectTrue = MRI.getVRegDef(Sel.getOperand(2).getReg());
+ MachineInstr *SelectFalse = MRI.getVRegDef(Sel.getOperand(3).getReg());
+
+ const auto SelectTrueVal =
+ isConstantOrConstantSplatVectorFP(*SelectTrue, MRI);
+ if (!SelectTrueVal)
+ return false;
+ const auto SelectFalseVal =
+ isConstantOrConstantSplatVectorFP(*SelectFalse, MRI);
+ if (!SelectFalseVal)
+ return false;
+
+ if (SelectTrueVal->isNegative() != SelectFalseVal->isNegative())
+ return false;
+
+ // For f32, only non-inline constants should be transformed.
+ if (ScalarDestTy == LLT::float32() && TII.isInlineConstant(*SelectTrueVal) &&
+ TII.isInlineConstant(*SelectFalseVal))
+ return false;
+
+ int SelectTrueLog2Val = SelectTrueVal->getExactLog2Abs();
+ if (SelectTrueLog2Val == INT_MIN)
+ return false;
+ int SelectFalseLog2Val = SelectFalseVal->getExactLog2Abs();
+ if (SelectFalseLog2Val == INT_MIN)
+ return false;
+
+ MatchInfo = [=, &MI](MachineIRBuilder &Builder) {
+ LLT IntDestTy = DestTy.changeElementType(LLT::scalar(32));
+ auto NewSel = Builder.buildSelect(
+ IntDestTy, SelectCondReg,
+ Builder.buildConstant(IntDestTy, SelectTrueLog2Val),
+ Builder.buildConstant(IntDestTy, SelectFalseLog2Val));
+
+ Register XReg = MI.getOperand(1).getReg();
+ if (SelectTrueVal->isNegative()) {
+ auto NegX =
+ Builder.buildFNeg(DestTy, XReg, MRI.getVRegDef(XReg)->getFlags());
+ Builder.buildFLdexp(Dst, NegX, NewSel, MI.getFlags());
+ } else {
+ Builder.buildFLdexp(Dst, XReg, NewSel, MI.getFlags());
+ }
+ };
+
+ return true;
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
index 6510abe9d232..893b3f5415f8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUCombinerHelper.h
@@ -15,13 +15,22 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUCOMBINERHELPER_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUCOMBINERHELPER_H
+#include "GCNSubtarget.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
namespace llvm {
class AMDGPUCombinerHelper : public CombinerHelper {
+protected:
+ const GCNSubtarget &STI;
+ const SIInstrInfo &TII;
+
public:
using CombinerHelper::CombinerHelper;
+ AMDGPUCombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B,
+ bool IsPreLegalize, GISelKnownBits *KB,
+ MachineDominatorTree *MDT, const LegalizerInfo *LI,
+ const GCNSubtarget &STI);
bool matchFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo);
void applyFoldableFneg(MachineInstr &MI, MachineInstr *&MatchInfo);
@@ -30,6 +39,10 @@ public:
Register Src1, Register Src2);
void applyExpandPromotedF16FMed3(MachineInstr &MI, Register Src0,
Register Src1, Register Src2);
+
+ bool matchCombineFmulWithSelectToFldexp(
+ MachineInstr &MI, MachineInstr &Sel,
+ std::function<void(MachineIRBuilder &)> &MatchInfo);
};
} // namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index d9eaf82c5214..27e9018d68a0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1997,7 +1997,7 @@ bool AMDGPUDAGToDAGISel::SelectScratchSVAddr(SDNode *N, SDValue Addr,
if (checkFlatScratchSVSSwizzleBug(VAddr, SAddr, ImmOffset))
return false;
SAddr = SelectSAddrFI(CurDAG, SAddr);
- Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i32);
+ Offset = CurDAG->getSignedTargetConstant(ImmOffset, SDLoc(), MVT::i32);
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 3be865f03df1..041b9b4d66f6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1125,8 +1125,9 @@ static int getV_CMPOpcode(CmpInst::Predicate P, unsigned Size,
unsigned FakeS16Opc, unsigned S32Opc,
unsigned S64Opc) {
if (Size == 16)
+ // FIXME-TRUE16 use TrueS16Opc when realtrue16 is supported for CMP code
return ST.hasTrue16BitInsts()
- ? ST.useRealTrue16Insts() ? TrueS16Opc : FakeS16Opc
+ ? ST.useRealTrue16Insts() ? FakeS16Opc : FakeS16Opc
: S16Opc;
if (Size == 32)
return S32Opc;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
index 54d927c33fc5..888817e52e35 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPostLegalizerCombiner.cpp
@@ -134,7 +134,7 @@ AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl(
const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
: Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
TII(*STI.getInstrInfo()),
- Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
+ Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI, STI),
#define GET_GICOMBINER_CONSTRUCTOR_INITS
#include "AMDGPUGenPostLegalizeGICombiner.inc"
#undef GET_GICOMBINER_CONSTRUCTOR_INITS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
index ff8189ce31f7..e1564d5de415 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUPreLegalizerCombiner.cpp
@@ -94,7 +94,7 @@ AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl(
const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig,
const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
: Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
- Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI),
+ Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI, STI),
#define GET_GICOMBINER_CONSTRUCTOR_INITS
#include "AMDGPUGenPreLegalizeGICombiner.inc"
#undef GET_GICOMBINER_CONSTRUCTOR_INITS
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index d94c400ad142..08e23cbf34e4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -1190,9 +1190,13 @@ bool AMDGPURegisterBankInfo::applyMappingDynStackAlloc(
const RegisterBank *SizeBank = getRegBank(AllocSize, MRI, *TRI);
- // TODO: Need to emit a wave reduction to get the maximum size.
- if (SizeBank != &AMDGPU::SGPRRegBank)
- return false;
+ if (SizeBank != &AMDGPU::SGPRRegBank) {
+ auto WaveReduction =
+ B.buildIntrinsic(Intrinsic::amdgcn_wave_reduce_umax, {LLT::scalar(32)})
+ .addUse(AllocSize)
+ .addImm(0);
+ AllocSize = WaveReduction.getReg(0);
+ }
LLT PtrTy = MRI.getType(Dst);
LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index ed956a1f755c..d8f441d1ccfe 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -9760,10 +9760,14 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
case MCK_SReg_64:
case MCK_SReg_64_XEXEC:
// Null is defined as a 32-bit register but
- // it should also be enabled with 64-bit operands.
- // The following code enables it for SReg_64 operands
+ // it should also be enabled with 64-bit operands or larger.
+ // The following code enables it for SReg_64 and larger operands
// used as source and destination. Remaining source
// operands are handled in isInlinableImm.
+ case MCK_SReg_96:
+ case MCK_SReg_128:
+ case MCK_SReg_256:
+ case MCK_SReg_512:
return Operand.isNull() ? Match_Success : Match_InvalidOperand;
default:
return Match_InvalidOperand;
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index a351f451584f..f2686bdf56b4 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -168,7 +168,7 @@ class getMTBUFInsDA<list<RegisterClass> vdataList,
dag SOffset = !if(hasRestrictedSOffset, (ins SReg_32:$soffset),
(ins SCSrc_b32:$soffset));
- dag NonVaddrInputs = !con((ins SReg_128:$srsrc), SOffset,
+ dag NonVaddrInputs = !con((ins SReg_128_XNULL:$srsrc), SOffset,
(ins Offset:$offset, FORMAT:$format, CPol_0:$cpol, i1imm_0:$swz));
dag Inputs = !if(!empty(vaddrList),
@@ -418,7 +418,7 @@ class getMUBUFInsDA<list<RegisterClass> vdataList,
RegisterOperand vdata_op = getLdStVDataRegisterOperand<vdataClass, isTFE>.ret;
dag SOffset = !if(hasRestrictedSOffset, (ins SReg_32:$soffset), (ins SCSrc_b32:$soffset));
- dag NonVaddrInputs = !con((ins SReg_128:$srsrc), SOffset, (ins Offset:$offset, CPol_0:$cpol, i1imm_0:$swz));
+ dag NonVaddrInputs = !con((ins SReg_128_XNULL:$srsrc), SOffset, (ins Offset:$offset, CPol_0:$cpol, i1imm_0:$swz));
dag Inputs = !if(!empty(vaddrList), NonVaddrInputs, !con((ins vaddrClass:$vaddr), NonVaddrInputs));
dag ret = !if(!empty(vdataList), Inputs, !con((ins vdata_op:$vdata), Inputs));
@@ -680,7 +680,7 @@ multiclass MUBUF_Pseudo_Stores<string opName, ValueType store_vt = i32> {
class MUBUF_Pseudo_Store_Lds<string opName>
: MUBUF_Pseudo<opName,
(outs),
- (ins SReg_128:$srsrc, SCSrc_b32:$soffset, Offset:$offset, CPol:$cpol, i1imm:$swz),
+ (ins SReg_128_XNULL:$srsrc, SCSrc_b32:$soffset, Offset:$offset, CPol:$cpol, i1imm:$swz),
" $srsrc, $soffset$offset lds$cpol"> {
let LGKM_CNT = 1;
let mayLoad = 1;
@@ -703,7 +703,7 @@ class getMUBUFAtomicInsDA<RegisterClass vdataClass, bit vdata_in, bit hasRestric
dag VData = !if(vdata_in, (ins vdata_op:$vdata_in), (ins vdata_op:$vdata));
dag Data = !if(!empty(vaddrList), VData, !con(VData, (ins vaddrClass:$vaddr)));
dag SOffset = !if(hasRestrictedSOffset, (ins SReg_32:$soffset), (ins SCSrc_b32:$soffset));
- dag MainInputs = !con((ins SReg_128:$srsrc), SOffset, (ins Offset:$offset));
+ dag MainInputs = !con((ins SReg_128_XNULL:$srsrc), SOffset, (ins Offset:$offset));
dag CPol = !if(vdata_in, (ins CPol_GLC_WithDefault:$cpol),
(ins CPol_NonGLC_WithDefault:$cpol));
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 590835180572..d2363274965a 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -279,7 +279,9 @@ DECODE_OPERAND_REG_7(SReg_64_XEXEC, OPW64)
DECODE_OPERAND_REG_7(SReg_64_XEXEC_XNULL, OPW64)
DECODE_OPERAND_REG_7(SReg_96, OPW96)
DECODE_OPERAND_REG_7(SReg_128, OPW128)
+DECODE_OPERAND_REG_7(SReg_128_XNULL, OPW128)
DECODE_OPERAND_REG_7(SReg_256, OPW256)
+DECODE_OPERAND_REG_7(SReg_256_XNULL, OPW256)
DECODE_OPERAND_REG_7(SReg_512, OPW512)
DECODE_OPERAND_REG_8(AGPR_32)
@@ -1692,6 +1694,11 @@ AMDGPUDisassembler::decodeNonVGPRSrcOp(const OpWidthTy Width, unsigned Val,
case OPW64:
case OPWV232:
return decodeSpecialReg64(Val);
+ case OPW96:
+ case OPW128:
+ case OPW256:
+ case OPW512:
+ return decodeSpecialReg96Plus(Val);
default:
llvm_unreachable("unexpected immediate type");
}
@@ -1778,6 +1785,24 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
return errOperand(Val, "unknown operand encoding " + Twine(Val));
}
+MCOperand AMDGPUDisassembler::decodeSpecialReg96Plus(unsigned Val) const {
+ using namespace AMDGPU;
+
+ switch (Val) {
+ case 124:
+ if (isGFX11Plus())
+ return createRegOperand(SGPR_NULL);
+ break;
+ case 125:
+ if (!isGFX11Plus())
+ return createRegOperand(SGPR_NULL);
+ break;
+ default:
+ break;
+ }
+ return errOperand(Val, "unknown operand encoding " + Twine(Val));
+}
+
MCOperand
AMDGPUDisassembler::decodeSDWASrc(const OpWidthTy Width, const unsigned Val,
unsigned ImmWidth,
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index b19e4b74a394..9a06cc3dc8c7 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -259,6 +259,7 @@ public:
MCOperand decodeVOPDDstYOp(MCInst &Inst, unsigned Val) const;
MCOperand decodeSpecialReg32(unsigned Val) const;
MCOperand decodeSpecialReg64(unsigned Val) const;
+ MCOperand decodeSpecialReg96Plus(unsigned Val) const;
MCOperand decodeSDWASrc(const OpWidthTy Width, unsigned Val,
unsigned ImmWidth,
diff --git a/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
index 4722d3362f6a..1b94d6c43392 100644
--- a/llvm/lib/Target/AMDGPU/MIMGInstructions.td
+++ b/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -422,7 +422,7 @@ class MIMG_NoSampler_Helper <mimgopc op, string asm,
RegisterClass addr_rc,
string dns="">
: MIMG_gfx6789 <op.GFX10M, (outs dst_rc:$vdata), dns> {
- let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
+ let InOperandList = !con((ins addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -435,7 +435,7 @@ class MIMG_NoSampler_Helper_gfx90a <mimgopc op, string asm,
RegisterClass addr_rc,
string dns="">
: MIMG_gfx90a <op.GFX10M, (outs getLdStRegisterOperand<dst_rc>.ret:$vdata), dns> {
- let InOperandList = !con((ins addr_rc:$vaddr, SReg_256:$srsrc,
+ let InOperandList = !con((ins addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -447,7 +447,7 @@ class MIMG_NoSampler_gfx10<mimgopc op, string opcode,
RegisterClass DataRC, RegisterClass AddrRC,
string dns="">
: MIMG_gfx10<op.GFX10M, (outs DataRC:$vdata), dns> {
- let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask,
+ let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256_XNULL:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -460,7 +460,7 @@ class MIMG_NoSampler_nsa_gfx10<mimgopc op, string opcode,
string dns="">
: MIMG_nsa_gfx10<op.GFX10M, (outs DataRC:$vdata), num_addrs, dns> {
let InOperandList = !con(AddrIns,
- (ins SReg_256:$srsrc, DMask:$dmask,
+ (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -472,7 +472,7 @@ class MIMG_NoSampler_gfx11<mimgopc op, string opcode,
RegisterClass DataRC, RegisterClass AddrRC,
string dns="">
: MIMG_gfx11<op.GFX11, (outs DataRC:$vdata), dns> {
- let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256:$srsrc, DMask:$dmask,
+ let InOperandList = !con((ins AddrRC:$vaddr0, SReg_256_XNULL:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -485,7 +485,7 @@ class MIMG_NoSampler_nsa_gfx11<mimgopc op, string opcode,
string dns="">
: MIMG_nsa_gfx11<op.GFX11, (outs DataRC:$vdata), num_addrs, dns> {
let InOperandList = !con(AddrIns,
- (ins SReg_256:$srsrc, DMask:$dmask,
+ (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -498,7 +498,7 @@ class VIMAGE_NoSampler_gfx12<mimgopc op, string opcode,
string dns="">
: VIMAGE_gfx12<op.GFX12, (outs DataRC:$vdata), num_addrs, dns> {
let InOperandList = !con(AddrIns,
- (ins SReg_256:$rsrc, DMask:$dmask, Dim:$dim,
+ (ins SReg_256_XNULL:$rsrc, DMask:$dmask, Dim:$dim,
CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = opcode#" $vdata, "#AddrAsm#", $rsrc$dmask$dim$cpol$r128$a16$tfe"
@@ -510,8 +510,8 @@ class VSAMPLE_Sampler_gfx12<mimgopc op, string opcode, RegisterClass DataRC,
string dns="">
: VSAMPLE_gfx12<op.GFX12, (outs DataRC:$vdata), num_addrs, dns, Addr3RC> {
let InOperandList = !con(AddrIns,
- (ins SReg_256:$rsrc),
- !if(BaseOpcode.Sampler, (ins SReg_128:$samp), (ins)),
+ (ins SReg_256_XNULL:$rsrc),
+ !if(BaseOpcode.Sampler, (ins SReg_128_XNULL:$samp), (ins)),
(ins DMask:$dmask, Dim:$dim, UNorm:$unorm,
CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe,
LWE:$lwe),
@@ -527,8 +527,8 @@ class VSAMPLE_Sampler_nortn_gfx12<mimgopc op, string opcode,
string dns="">
: VSAMPLE_gfx12<op.GFX12, (outs), num_addrs, dns, Addr3RC> {
let InOperandList = !con(AddrIns,
- (ins SReg_256:$rsrc),
- !if(BaseOpcode.Sampler, (ins SReg_128:$samp), (ins)),
+ (ins SReg_256_XNULL:$rsrc),
+ !if(BaseOpcode.Sampler, (ins SReg_128_XNULL:$samp), (ins)),
(ins DMask:$dmask, Dim:$dim, UNorm:$unorm,
CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe,
LWE:$lwe),
@@ -679,7 +679,7 @@ class MIMG_Store_Helper <mimgopc op, string asm,
RegisterClass addr_rc,
string dns = "">
: MIMG_gfx6789<op.GFX10M, (outs), dns> {
- let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
+ let InOperandList = !con((ins data_rc:$vdata, addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -693,7 +693,7 @@ class MIMG_Store_Helper_gfx90a <mimgopc op, string asm,
string dns = "">
: MIMG_gfx90a<op.GFX10M, (outs), dns> {
let InOperandList = !con((ins getLdStRegisterOperand<data_rc>.ret:$vdata,
- addr_rc:$vaddr, SReg_256:$srsrc,
+ addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -705,7 +705,7 @@ class MIMG_Store_gfx10<mimgopc op, string opcode,
RegisterClass DataRC, RegisterClass AddrRC,
string dns="">
: MIMG_gfx10<op.GFX10M, (outs), dns> {
- let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
+ let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -719,7 +719,7 @@ class MIMG_Store_nsa_gfx10<mimgopc op, string opcode,
: MIMG_nsa_gfx10<op.GFX10M, (outs), num_addrs, dns> {
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256:$srsrc, DMask:$dmask,
+ (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -731,7 +731,7 @@ class MIMG_Store_gfx11<mimgopc op, string opcode,
RegisterClass DataRC, RegisterClass AddrRC,
string dns="">
: MIMG_gfx11<op.GFX11, (outs), dns> {
- let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
+ let InOperandList = !con((ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -745,7 +745,7 @@ class MIMG_Store_nsa_gfx11<mimgopc op, string opcode,
: MIMG_nsa_gfx11<op.GFX11, (outs), num_addrs, dns> {
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256:$srsrc, DMask:$dmask,
+ (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -759,7 +759,7 @@ class VIMAGE_Store_gfx12<mimgopc op, string opcode,
: VIMAGE_gfx12<op.GFX12, (outs), num_addrs, dns> {
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256:$rsrc, DMask:$dmask, Dim:$dim,
+ (ins SReg_256_XNULL:$rsrc, DMask:$dmask, Dim:$dim,
CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
let AsmString = opcode#" $vdata, "#AddrAsm#", $rsrc$dmask$dim$cpol$r128$a16$tfe"
@@ -875,7 +875,7 @@ class MIMG_Atomic_gfx6789_base <bits<8> op, string asm, RegisterClass data_rc,
: MIMG_gfx6789 <op, (outs data_rc:$vdst), dns> {
let Constraints = "$vdst = $vdata";
- let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
+ let InOperandList = (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da);
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$cpol$r128$tfe$lwe$da";
@@ -887,7 +887,7 @@ class MIMG_Atomic_gfx90a_base <bits<8> op, string asm, RegisterClass data_rc,
let Constraints = "$vdst = $vdata";
let InOperandList = (ins getLdStRegisterOperand<data_rc>.ret:$vdata,
- addr_rc:$vaddr, SReg_256:$srsrc,
+ addr_rc:$vaddr, SReg_256_XNULL:$srsrc,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, LWE:$lwe, DA:$da);
let AsmString = asm#" $vdst, $vaddr, $srsrc$dmask$unorm$cpol$r128$lwe$da";
@@ -921,7 +921,7 @@ class MIMG_Atomic_gfx10<mimgopc op, string opcode,
!if(enableDisasm, "GFX10", "")> {
let Constraints = "$vdst = $vdata";
- let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
+ let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe);
let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
@@ -936,7 +936,7 @@ class MIMG_Atomic_nsa_gfx10<mimgopc op, string opcode,
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256:$srsrc, DMask:$dmask,
+ (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe));
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
@@ -949,7 +949,7 @@ class MIMG_Atomic_gfx11<mimgopc op, string opcode,
!if(enableDisasm, "GFX11", "")> {
let Constraints = "$vdst = $vdata";
- let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256:$srsrc,
+ let InOperandList = (ins DataRC:$vdata, AddrRC:$vaddr0, SReg_256_XNULL:$srsrc,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe);
let AsmString = opcode#" $vdst, $vaddr0, $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
@@ -964,7 +964,7 @@ class MIMG_Atomic_nsa_gfx11<mimgopc op, string opcode,
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256:$srsrc, DMask:$dmask,
+ (ins SReg_256_XNULL:$srsrc, DMask:$dmask,
Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe));
let AsmString = opcode#" $vdata, "#AddrAsm#", $srsrc$dmask$dim$unorm$cpol$r128$a16$tfe$lwe";
@@ -978,7 +978,7 @@ class VIMAGE_Atomic_gfx12<mimgopc op, string opcode, RegisterClass DataRC,
let InOperandList = !con((ins DataRC:$vdata),
AddrIns,
- (ins SReg_256:$rsrc, DMask:$dmask, Dim:$dim,
+ (ins SReg_256_XNULL:$rsrc, DMask:$dmask, Dim:$dim,
CPol:$cpol, R128A16:$r128, A16:$a16, TFE:$tfe));
let AsmString = !if(!empty(renamed), opcode, renamed)#" $vdata, "#AddrAsm#
", $rsrc$dmask$dim$cpol$r128$a16$tfe";
@@ -1128,7 +1128,7 @@ multiclass MIMG_Atomic_Renamed <mimgopc op, string asm, string renamed,
class MIMG_Sampler_Helper <mimgopc op, string asm, RegisterClass dst_rc,
RegisterClass src_rc, string dns="">
: MIMG_gfx6789 <op.VI, (outs dst_rc:$vdata), dns> {
- let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
+ let InOperandList = !con((ins src_rc:$vaddr, SReg_256_XNULL:$srsrc, SReg_128_XNULL:$ssamp,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, TFE:$tfe, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -1139,7 +1139,7 @@ class MIMG_Sampler_Helper <mimgopc op, string asm, RegisterClass dst_rc,
class MIMG_Sampler_gfx90a<mimgopc op, string asm, RegisterClass dst_rc,
RegisterClass src_rc, string dns="">
: MIMG_gfx90a<op.GFX10M, (outs getLdStRegisterOperand<dst_rc>.ret:$vdata), dns> {
- let InOperandList = !con((ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
+ let InOperandList = !con((ins src_rc:$vaddr, SReg_256_XNULL:$srsrc, SReg_128_XNULL:$ssamp,
DMask:$dmask, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, LWE:$lwe, DA:$da),
!if(BaseOpcode.HasD16, (ins D16:$d16), (ins)));
@@ -1149,7 +1149,7 @@ class MIMG_Sampler_gfx90a<mimgopc op, string asm, RegisterClass dst_rc,
class MIMG_Sampler_OpList_gfx10p<dag OpPrefix, bit HasD16> {
dag ret = !con(OpPrefix,
- (ins SReg_256:$srsrc, SReg_128:$ssamp,
+ (ins SReg_256_XNULL:$srsrc, SReg_128_XNULL:$ssamp,
DMask:$dmask, Dim:$dim, UNorm:$unorm, CPol:$cpol,
R128A16:$r128, A16:$a16, TFE:$tfe, LWE:$lwe),
!if(HasD16, (ins D16:$d16), (ins)));
@@ -1524,7 +1524,7 @@ class MIMG_IntersectRay_Helper<bit Is64, bit IsA16> {
class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC>
: MIMG_gfx10<op.GFX10M, (outs VReg_128:$vdata), "GFX10"> {
- let InOperandList = (ins AddrRC:$vaddr0, SReg_128:$srsrc, A16:$a16);
+ let InOperandList = (ins AddrRC:$vaddr0, SReg_128_XNULL:$srsrc, A16:$a16);
let AsmString = opcode#" $vdata, $vaddr0, $srsrc$a16";
let nsa = 0;
@@ -1532,13 +1532,13 @@ class MIMG_IntersectRay_gfx10<mimgopc op, string opcode, RegisterClass AddrRC>
class MIMG_IntersectRay_nsa_gfx10<mimgopc op, string opcode, int num_addrs>
: MIMG_nsa_gfx10<op.GFX10M, (outs VReg_128:$vdata), num_addrs, "GFX10"> {
- let InOperandList = !con(nsah.AddrIns, (ins SReg_128:$srsrc, A16:$a16));
+ let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$srsrc, A16:$a16));
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc$a16";
}
class MIMG_IntersectRay_gfx11<mimgopc op, string opcode, RegisterClass AddrRC>
: MIMG_gfx11<op.GFX11, (outs VReg_128:$vdata), "GFX11"> {
- let InOperandList = (ins AddrRC:$vaddr0, SReg_128:$srsrc, A16:$a16);
+ let InOperandList = (ins AddrRC:$vaddr0, SReg_128_XNULL:$srsrc, A16:$a16);
let AsmString = opcode#" $vdata, $vaddr0, $srsrc$a16";
let nsa = 0;
@@ -1548,7 +1548,7 @@ class MIMG_IntersectRay_nsa_gfx11<mimgopc op, string opcode, int num_addrs,
list<RegisterClass> addr_types>
: MIMG_nsa_gfx11<op.GFX11, (outs VReg_128:$vdata), num_addrs, "GFX11",
addr_types> {
- let InOperandList = !con(nsah.AddrIns, (ins SReg_128:$srsrc, A16:$a16));
+ let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$srsrc, A16:$a16));
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $srsrc$a16";
}
@@ -1556,7 +1556,7 @@ class VIMAGE_IntersectRay_gfx12<mimgopc op, string opcode, int num_addrs,
list<RegisterClass> addr_types>
: VIMAGE_gfx12<op.GFX12, (outs VReg_128:$vdata),
num_addrs, "GFX12", addr_types> {
- let InOperandList = !con(nsah.AddrIns, (ins SReg_128:$rsrc, A16:$a16));
+ let InOperandList = !con(nsah.AddrIns, (ins SReg_128_XNULL:$rsrc, A16:$a16));
let AsmString = opcode#" $vdata, "#nsah.AddrAsm#", $rsrc$a16";
}
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 4fb5cb066be7..2bc19137b1ca 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -199,7 +199,7 @@ static unsigned macToMad(unsigned Opc) {
case AMDGPU::V_FMAC_F16_e64:
return AMDGPU::V_FMA_F16_gfx9_e64;
case AMDGPU::V_FMAC_F16_fake16_e64:
- return AMDGPU::V_FMA_F16_gfx9_e64;
+ return AMDGPU::V_FMA_F16_gfx9_fake16_e64;
case AMDGPU::V_FMAC_LEGACY_F32_e64:
return AMDGPU::V_FMA_LEGACY_F32_e64;
case AMDGPU::V_FMAC_F64_e64:
@@ -1096,21 +1096,8 @@ void SIFoldOperandsImpl::foldOperand(
B.addImm(Defs[I].second);
}
LLVM_DEBUG(dbgs() << "Folded " << *UseMI);
- return;
}
- if (Size != 4)
- return;
-
- Register Reg0 = UseMI->getOperand(0).getReg();
- Register Reg1 = UseMI->getOperand(1).getReg();
- if (TRI->isAGPR(*MRI, Reg0) && TRI->isVGPR(*MRI, Reg1))
- UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64));
- else if (TRI->isVGPR(*MRI, Reg0) && TRI->isAGPR(*MRI, Reg1))
- UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64));
- else if (ST->hasGFX90AInsts() && TRI->isAGPR(*MRI, Reg0) &&
- TRI->isAGPR(*MRI, Reg1))
- UseMI->setDesc(TII->get(AMDGPU::V_ACCVGPR_MOV_B32));
return;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 58b061f5c1af..0ac84f4e1f02 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4017,29 +4017,26 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
}
// This is similar to the default implementation in ExpandDYNAMIC_STACKALLOC,
-// except for stack growth direction(default: downwards, AMDGPU: upwards) and
-// applying the wave size scale to the increment amount.
-SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(SDValue Op,
- SelectionDAG &DAG) const {
+// except for:
+// 1. Stack growth direction(default: downwards, AMDGPU: upwards), and
+// 2. Scale size where, scale = wave-reduction(alloca-size) * wave-size
+SDValue SITargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
+ SelectionDAG &DAG) const {
const MachineFunction &MF = DAG.getMachineFunction();
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
SDLoc dl(Op);
EVT VT = Op.getValueType();
- SDValue Tmp1 = Op;
- SDValue Tmp2 = Op.getValue(1);
- SDValue Tmp3 = Op.getOperand(2);
- SDValue Chain = Tmp1.getOperand(0);
-
+ SDValue Chain = Op.getOperand(0);
Register SPReg = Info->getStackPtrOffsetReg();
// Chain the dynamic stack allocation so that it doesn't modify the stack
// pointer when other instructions are using the stack.
Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
- SDValue Size = Tmp2.getOperand(1);
+ SDValue Size = Op.getOperand(1);
SDValue BaseAddr = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
- Align Alignment = cast<ConstantSDNode>(Tmp3)->getAlignValue();
+ Align Alignment = cast<ConstantSDNode>(Op.getOperand(2))->getAlignValue();
const TargetFrameLowering *TFL = Subtarget->getFrameLowering();
assert(TFL->getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp &&
@@ -4057,30 +4054,36 @@ SDValue SITargetLowering::lowerDYNAMIC_STACKALLOCImpl(SDValue Op,
DAG.getSignedConstant(-ScaledAlignment, dl, VT));
}
- SDValue ScaledSize = DAG.getNode(
- ISD::SHL, dl, VT, Size,
- DAG.getConstant(Subtarget->getWavefrontSizeLog2(), dl, MVT::i32));
-
- SDValue NewSP = DAG.getNode(ISD::ADD, dl, VT, BaseAddr, ScaledSize); // Value
+ assert(Size.getValueType() == MVT::i32 && "Size must be 32-bit");
+ SDValue NewSP;
+ if (isa<ConstantSDNode>(Size)) {
+ // For constant sized alloca, scale alloca size by wave-size
+ SDValue ScaledSize = DAG.getNode(
+ ISD::SHL, dl, VT, Size,
+ DAG.getConstant(Subtarget->getWavefrontSizeLog2(), dl, MVT::i32));
+ NewSP = DAG.getNode(ISD::ADD, dl, VT, BaseAddr, ScaledSize); // Value
+ } else {
+ // For dynamic sized alloca, perform wave-wide reduction to get max of
+ // alloca size(divergent) and then scale it by wave-size
+ SDValue WaveReduction =
+ DAG.getTargetConstant(Intrinsic::amdgcn_wave_reduce_umax, dl, MVT::i32);
+ Size = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, WaveReduction,
+ Size, DAG.getConstant(0, dl, MVT::i32));
+ SDValue ScaledSize = DAG.getNode(
+ ISD::SHL, dl, VT, Size,
+ DAG.getConstant(Subtarget->getWavefrontSizeLog2(), dl, MVT::i32));
+ NewSP =
+ DAG.getNode(ISD::ADD, dl, VT, BaseAddr, ScaledSize); // Value in vgpr.
+ SDValue ReadFirstLaneID =
+ DAG.getTargetConstant(Intrinsic::amdgcn_readfirstlane, dl, MVT::i32);
+ NewSP = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, ReadFirstLaneID,
+ NewSP);
+ }
Chain = DAG.getCopyToReg(Chain, dl, SPReg, NewSP); // Output chain
- Tmp2 = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl);
+ SDValue CallSeqEnd = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl);
- return DAG.getMergeValues({BaseAddr, Tmp2}, dl);
-}
-
-SDValue SITargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
- SelectionDAG &DAG) const {
- // We only handle constant sizes here to allow non-entry block, static sized
- // allocas. A truly dynamic value is more difficult to support because we
- // don't know if the size value is uniform or not. If the size isn't uniform,
- // we would need to do a wave reduction to get the maximum size to know how
- // much to increment the uniform stack pointer.
- SDValue Size = Op.getOperand(1);
- if (isa<ConstantSDNode>(Size))
- return lowerDYNAMIC_STACKALLOCImpl(Op, DAG); // Use "generic" expansion.
-
- return AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(Op, DAG);
+ return DAG.getMergeValues({BaseAddr, CallSeqEnd}, dl);
}
SDValue SITargetLowering::LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const {
@@ -13982,6 +13985,43 @@ SDValue SITargetLowering::tryFoldToMad64_32(SDNode *N,
return Accum;
}
+SDValue
+SITargetLowering::foldAddSub64WithZeroLowBitsTo32(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SDValue RHS = N->getOperand(1);
+ auto *CRHS = dyn_cast<ConstantSDNode>(RHS);
+ if (!CRHS)
+ return SDValue();
+
+ // TODO: Worth using computeKnownBits? Maybe expensive since it's so
+ // common.
+ uint64_t Val = CRHS->getZExtValue();
+ if (countr_zero(Val) >= 32) {
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc SL(N);
+ SDValue LHS = N->getOperand(0);
+
+ // Avoid carry machinery if we know the low half of the add does not
+ // contribute to the final result.
+ //
+ // add i64:x, K if computeTrailingZeros(K) >= 32
+ // => build_pair (add x.hi, K.hi), x.lo
+
+ // Breaking the 64-bit add here with this strange constant is unlikely
+ // to interfere with addressing mode patterns.
+
+ SDValue Hi = getHiHalf64(LHS, DAG);
+ SDValue ConstHi32 = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
+ SDValue AddHi =
+ DAG.getNode(N->getOpcode(), SL, MVT::i32, Hi, ConstHi32, N->getFlags());
+
+ SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS);
+ return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, Lo, AddHi);
+ }
+
+ return SDValue();
+}
+
// Collect the ultimate src of each of the mul node's operands, and confirm
// each operand is 8 bytes.
static std::optional<ByteProvider<SDValue>>
@@ -14258,6 +14298,11 @@ SDValue SITargetLowering::performAddCombine(SDNode *N,
return V;
}
+ if (VT == MVT::i64) {
+ if (SDValue Folded = foldAddSub64WithZeroLowBitsTo32(N, DCI))
+ return Folded;
+ }
+
if ((isMul(LHS) || isMul(RHS)) && Subtarget->hasDot7Insts() &&
(Subtarget->hasDot1Insts() || Subtarget->hasDot8Insts())) {
SDValue TempNode(N, 0);
@@ -14443,6 +14488,11 @@ SDValue SITargetLowering::performSubCombine(SDNode *N,
SelectionDAG &DAG = DCI.DAG;
EVT VT = N->getValueType(0);
+ if (VT == MVT::i64) {
+ if (SDValue Folded = foldAddSub64WithZeroLowBitsTo32(N, DCI))
+ return Folded;
+ }
+
if (VT != MVT::i32)
return SDValue();
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.h b/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 631f26542bbe..299c8f5f7392 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -212,6 +212,9 @@ private:
unsigned getFusedOpcode(const SelectionDAG &DAG,
const SDNode *N0, const SDNode *N1) const;
SDValue tryFoldToMad64_32(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue foldAddSub64WithZeroLowBitsTo32(SDNode *N,
+ DAGCombinerInfo &DCI) const;
+
SDValue performAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performAddCarrySubCarryCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
@@ -421,7 +424,6 @@ public:
SDValue LowerCall(CallLoweringInfo &CLI,
SmallVectorImpl<SDValue> &InVals) const override;
- SDValue lowerDYNAMIC_STACKALLOCImpl(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index f97ea40caa67..e6f333fbb878 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3805,6 +3805,36 @@ static void updateLiveVariables(LiveVariables *LV, MachineInstr &MI,
}
}
+static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc) {
+ switch (Opc) {
+ case AMDGPU::V_MAC_F16_e32:
+ case AMDGPU::V_MAC_F16_e64:
+ return AMDGPU::V_MAD_F16_e64;
+ case AMDGPU::V_MAC_F32_e32:
+ case AMDGPU::V_MAC_F32_e64:
+ return AMDGPU::V_MAD_F32_e64;
+ case AMDGPU::V_MAC_LEGACY_F32_e32:
+ case AMDGPU::V_MAC_LEGACY_F32_e64:
+ return AMDGPU::V_MAD_LEGACY_F32_e64;
+ case AMDGPU::V_FMAC_LEGACY_F32_e32:
+ case AMDGPU::V_FMAC_LEGACY_F32_e64:
+ return AMDGPU::V_FMA_LEGACY_F32_e64;
+ case AMDGPU::V_FMAC_F16_e32:
+ case AMDGPU::V_FMAC_F16_e64:
+ case AMDGPU::V_FMAC_F16_fake16_e64:
+ return ST.hasTrue16BitInsts() ? AMDGPU::V_FMA_F16_gfx9_fake16_e64
+ : AMDGPU::V_FMA_F16_gfx9_e64;
+ case AMDGPU::V_FMAC_F32_e32:
+ case AMDGPU::V_FMAC_F32_e64:
+ return AMDGPU::V_FMA_F32_e64;
+ case AMDGPU::V_FMAC_F64_e32:
+ case AMDGPU::V_FMAC_F64_e64:
+ return AMDGPU::V_FMA_F64_e64;
+ default:
+ llvm_unreachable("invalid instruction");
+ }
+}
+
MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
LiveVariables *LV,
LiveIntervals *LIS) const {
@@ -4040,14 +4070,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
if (Src0Literal && !ST.hasVOP3Literal())
return nullptr;
- unsigned NewOpc = IsFMA ? IsF16 ? AMDGPU::V_FMA_F16_gfx9_e64
- : IsF64 ? AMDGPU::V_FMA_F64_e64
- : IsLegacy
- ? AMDGPU::V_FMA_LEGACY_F32_e64
- : AMDGPU::V_FMA_F32_e64
- : IsF16 ? AMDGPU::V_MAD_F16_e64
- : IsLegacy ? AMDGPU::V_MAD_LEGACY_F32_e64
- : AMDGPU::V_MAD_F32_e64;
+ unsigned NewOpc = getNewFMAInst(ST, Opc);
+
if (pseudoToMCOpcode(NewOpc) == -1)
return nullptr;
@@ -6866,9 +6890,8 @@ SIInstrInfo::legalizeOperands(MachineInstr &MI,
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
if (RsrcIdx != -1) {
MachineOperand *Rsrc = &MI.getOperand(RsrcIdx);
- if (Rsrc->isReg() && !RI.isSGPRClass(MRI.getRegClass(Rsrc->getReg()))) {
+ if (Rsrc->isReg() && !RI.isSGPRReg(MRI, Rsrc->getReg()))
isRsrcLegal = false;
- }
}
// The operands are legal.
@@ -9294,6 +9317,7 @@ static bool isRenamedInGFX9(int Opcode) {
case AMDGPU::V_DIV_FIXUP_F16_gfx9_e64:
case AMDGPU::V_DIV_FIXUP_F16_gfx9_fake16_e64:
case AMDGPU::V_FMA_F16_gfx9_e64:
+ case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
case AMDGPU::V_INTERP_P2_F16:
case AMDGPU::V_MAD_F16_e64:
case AMDGPU::V_MAD_U16_e64:
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 789ce8815cf8..ee83dff227a8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2674,8 +2674,8 @@ let OtherPredicates = [NotHasTrue16BitInsts] in {
} // end OtherPredicates = [NotHasTrue16BitInsts]
let OtherPredicates = [HasTrue16BitInsts] in {
- def : FPToI1Pat<V_CMP_EQ_F16_t16_e64, CONST.FP16_ONE, i16, f16, fp_to_uint>;
- def : FPToI1Pat<V_CMP_EQ_F16_t16_e64, CONST.FP16_NEG_ONE, i16, f16, fp_to_sint>;
+ def : FPToI1Pat<V_CMP_EQ_F16_fake16_e64, CONST.FP16_ONE, i16, f16, fp_to_uint>;
+ def : FPToI1Pat<V_CMP_EQ_F16_fake16_e64, CONST.FP16_NEG_ONE, i16, f16, fp_to_sint>;
} // end OtherPredicates = [HasTrue16BitInsts]
def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, i32, f32, fp_to_uint>;
@@ -3055,7 +3055,7 @@ def : GCNPat<
(V_BFREV_B32_e64 (i32 (EXTRACT_SUBREG VReg_64:$a, sub0))), sub1)>;
// If fcanonicalize's operand is implicitly canonicalized, we only need a copy.
-let AddedComplexity = 1000 in {
+let AddedComplexity = 8 in {
foreach vt = [f16, v2f16, f32, v2f32, f64] in {
def : GCNPat<
(fcanonicalize (vt is_canonicalized:$src)),
@@ -3710,12 +3710,15 @@ def : IntMinMaxPat<V_MAXMIN_U32_e64, umin, umax_oneuse>;
def : IntMinMaxPat<V_MINMAX_U32_e64, umax, umin_oneuse>;
def : FPMinMaxPat<V_MINMAX_F32_e64, f32, fmaxnum_like, fminnum_like_oneuse>;
def : FPMinMaxPat<V_MAXMIN_F32_e64, f32, fminnum_like, fmaxnum_like_oneuse>;
-def : FPMinMaxPat<V_MINMAX_F16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
-def : FPMinMaxPat<V_MAXMIN_F16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
def : FPMinCanonMaxPat<V_MINMAX_F32_e64, f32, fmaxnum_like, fminnum_like_oneuse>;
def : FPMinCanonMaxPat<V_MAXMIN_F32_e64, f32, fminnum_like, fmaxnum_like_oneuse>;
-def : FPMinCanonMaxPat<V_MINMAX_F16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
-def : FPMinCanonMaxPat<V_MAXMIN_F16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
+}
+
+let True16Predicate = UseFakeTrue16Insts in {
+def : FPMinMaxPat<V_MINMAX_F16_fake16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
+def : FPMinMaxPat<V_MAXMIN_F16_fake16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
+def : FPMinCanonMaxPat<V_MINMAX_F16_fake16_e64, f16, fmaxnum_like, fminnum_like_oneuse>;
+def : FPMinCanonMaxPat<V_MAXMIN_F16_fake16_e64, f16, fminnum_like, fmaxnum_like_oneuse>;
}
let SubtargetPredicate = isGFX9Plus in {
@@ -3723,6 +3726,10 @@ let True16Predicate = NotHasTrue16BitInsts in {
defm : Int16Med3Pat<V_MED3_I16_e64, smin, smax, VSrc_b16>;
defm : Int16Med3Pat<V_MED3_U16_e64, umin, umax, VSrc_b16>;
}
+let True16Predicate = UseRealTrue16Insts in {
+ defm : Int16Med3Pat<V_MED3_I16_t16_e64, smin, smax, VSrcT_b16>;
+ defm : Int16Med3Pat<V_MED3_U16_t16_e64, umin, umax, VSrcT_b16>;
+}
let True16Predicate = UseFakeTrue16Insts in {
defm : Int16Med3Pat<V_MED3_I16_fake16_e64, smin, smax, VSrc_b16>;
defm : Int16Med3Pat<V_MED3_U16_fake16_e64, umin, umax, VSrc_b16>;
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 16a7a9cfbc49..7c98ccddb5dd 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -153,14 +153,16 @@ class SIRegisterClass <string n, list<ValueType> rTypes, int Align, dag rList>
}
multiclass SIRegLoHi16 <string n, bits<8> regIdx, bit ArtificialHigh = 1,
- bit isVGPR = 0, bit isAGPR = 0> {
+ bit isVGPR = 0, bit isAGPR = 0,
+ list<int> DwarfEncodings = [-1, -1]> {
def _LO16 : SIReg<n#".l", regIdx, isVGPR, isAGPR>;
def _HI16 : SIReg<!if(ArtificialHigh, "", n#".h"), regIdx, isVGPR, isAGPR,
/* isHi16 */ 1> {
let isArtificial = ArtificialHigh;
}
def "" : RegisterWithSubRegs<n, [!cast<Register>(NAME#"_LO16"),
- !cast<Register>(NAME#"_HI16")]> {
+ !cast<Register>(NAME#"_HI16")]>,
+ DwarfRegNum<DwarfEncodings> {
let Namespace = "AMDGPU";
let SubRegIndices = [lo16, hi16];
let CoveredBySubRegs = !not(ArtificialHigh);
@@ -197,7 +199,8 @@ def VCC : RegisterWithSubRegs<"vcc", [VCC_LO, VCC_HI]> {
let HWEncoding = VCC_LO.HWEncoding;
}
-defm EXEC_LO : SIRegLoHi16<"exec_lo", 126>, DwarfRegNum<[1, 1]>;
+defm EXEC_LO : SIRegLoHi16<"exec_lo", 126, /*ArtificialHigh=*/1, /*isVGPR=*/0,
+ /*isAGPR=*/0, /*DwarfEncodings=*/[1, 1]>;
defm EXEC_HI : SIRegLoHi16<"exec_hi", 127>;
def EXEC : RegisterWithSubRegs<"exec", [EXEC_LO, EXEC_HI]>, DwarfRegNum<[17, 1]> {
@@ -337,25 +340,26 @@ def FLAT_SCR : FlatReg<FLAT_SCR_LO, FLAT_SCR_HI, 0>;
// SGPR registers
foreach Index = 0...105 in {
defm SGPR#Index :
- SIRegLoHi16 <"s"#Index, Index>,
- DwarfRegNum<[!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)),
- !if(!le(Index, 63), !add(Index, 32), !add(Index, 1024))]>;
+ SIRegLoHi16 <"s"#Index, Index, /*ArtificialHigh=*/1,
+ /*isVGPR=*/0, /*isAGPR=*/0, /*DwarfEncodings=*/
+ [!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)),
+ !if(!le(Index, 63), !add(Index, 32), !add(Index, 1024))]>;
}
// VGPR registers
foreach Index = 0...255 in {
defm VGPR#Index :
- SIRegLoHi16 <"v"#Index, Index, /* ArtificialHigh= */ 0,
- /* isVGPR= */ 1, /* isAGPR= */ 0>,
- DwarfRegNum<[!add(Index, 2560), !add(Index, 1536)]>;
+ SIRegLoHi16 <"v"#Index, Index, /*ArtificialHigh=*/ 0,
+ /*isVGPR=*/ 1, /*isAGPR=*/ 0, /*DwarfEncodings=*/
+ [!add(Index, 2560), !add(Index, 1536)]>;
}
// AccVGPR registers
foreach Index = 0...255 in {
defm AGPR#Index :
- SIRegLoHi16 <"a"#Index, Index, /* ArtificialHigh= */ 1,
- /* isVGPR= */ 0, /* isAGPR= */ 1>,
- DwarfRegNum<[!add(Index, 3072), !add(Index, 2048)]>;
+ SIRegLoHi16 <"a"#Index, Index, /*ArtificialHigh=*/ 1,
+ /*isVGPR=*/ 0, /*isAGPR=*/ 1, /*DwarfEncodings=*/
+ [!add(Index, 3072), !add(Index, 2048)]>;
}
//===----------------------------------------------------------------------===//
@@ -809,6 +813,9 @@ def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16,
let BaseClassOrder = 32;
}
+def SGPR_NULL128 : SIReg<"null">;
+def SGPR_NULL256 : SIReg<"null">;
+
let GeneratePressureSet = 0 in {
def SRegOrLds_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, bf16, v2i16, v2f16, v2bf16], 32,
(add SReg_32, LDS_DIRECT_CLASS)> {
@@ -885,6 +892,7 @@ multiclass SRegClass<int numRegs,
list<ValueType> regTypes,
SIRegisterTuples regList,
SIRegisterTuples ttmpList = regList,
+ bit hasNull = 0,
int copyCost = !sra(!add(numRegs, 1), 1)> {
defvar hasTTMP = !ne(regList, ttmpList);
defvar suffix = !cast<string>(!mul(numRegs, 32));
@@ -901,7 +909,7 @@ multiclass SRegClass<int numRegs,
}
}
- def SReg_ # suffix :
+ def SReg_ # suffix # !if(hasNull, "_XNULL", ""):
SIRegisterClass<"AMDGPU", regTypes, 32,
!con((add !cast<RegisterClass>(sgprName)),
!if(hasTTMP,
@@ -910,15 +918,24 @@ multiclass SRegClass<int numRegs,
let isAllocatable = 0;
let BaseClassOrder = !mul(numRegs, 32);
}
+
+ if hasNull then {
+ def SReg_ # suffix :
+ SIRegisterClass<"AMDGPU", regTypes, 32,
+ (add !cast<RegisterClass>("SReg_" # suffix # "_XNULL"), !cast<Register>("SGPR_NULL" # suffix))> {
+ let isAllocatable = 0;
+ let BaseClassOrder = !mul(numRegs, 32);
+ }
+ }
}
}
defm "" : SRegClass<3, Reg96Types.types, SGPR_96Regs, TTMP_96Regs>;
-defm "" : SRegClass<4, Reg128Types.types, SGPR_128Regs, TTMP_128Regs>;
+defm "" : SRegClass<4, Reg128Types.types, SGPR_128Regs, TTMP_128Regs, /*hasNull*/ true>;
defm "" : SRegClass<5, [v5i32, v5f32], SGPR_160Regs, TTMP_160Regs>;
defm "" : SRegClass<6, [v6i32, v6f32, v3i64, v3f64], SGPR_192Regs, TTMP_192Regs>;
defm "" : SRegClass<7, [v7i32, v7f32], SGPR_224Regs, TTMP_224Regs>;
-defm "" : SRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16, v16bf16], SGPR_256Regs, TTMP_256Regs>;
+defm "" : SRegClass<8, [v8i32, v8f32, v4i64, v4f64, v16i16, v16f16, v16bf16], SGPR_256Regs, TTMP_256Regs, /*hasNull*/ true>;
defm "" : SRegClass<9, [v9i32, v9f32], SGPR_288Regs, TTMP_288Regs>;
defm "" : SRegClass<10, [v10i32, v10f32], SGPR_320Regs, TTMP_320Regs>;
defm "" : SRegClass<11, [v11i32, v11f32], SGPR_352Regs, TTMP_352Regs>;
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 42df4576a774..979812e07fc3 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -455,6 +455,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
break;
case AMDGPU::V_FMA_F16_e64:
case AMDGPU::V_FMA_F16_gfx9_e64:
+ case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_fake16
: AMDGPU::V_FMAAK_F16;
break;
@@ -484,6 +485,7 @@ void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
break;
case AMDGPU::V_FMA_F16_e64:
case AMDGPU::V_FMA_F16_gfx9_e64:
+ case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_fake16
: AMDGPU::V_FMAMK_F16;
break;
@@ -956,7 +958,8 @@ bool SIShrinkInstructions::run(MachineFunction &MF) {
MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||
MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
- MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64) {
+ MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 ||
+ MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64) {
shrinkMadFma(MI);
continue;
}
diff --git a/llvm/lib/Target/AMDGPU/SMInstructions.td b/llvm/lib/Target/AMDGPU/SMInstructions.td
index 1aeb4e8b20e8..37dcc1008625 100644
--- a/llvm/lib/Target/AMDGPU/SMInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -332,19 +332,19 @@ defm S_LOAD_I16 : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
defm S_LOAD_U16 : SM_Pseudo_Loads <SReg_64, SReg_32_XM0_XEXEC>;
let is_buffer = 1 in {
-defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
// FIXME: exec_lo/exec_hi appear to be allowed for SMRD loads on
// SI/CI, bit disallowed for SMEM on VI.
-defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads <SReg_128, SReg_64_XEXEC>;
+defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads <SReg_128_XNULL, SReg_64_XEXEC>;
let SubtargetPredicate = HasScalarDwordx3Loads in
- defm S_BUFFER_LOAD_DWORDX3 : SM_Pseudo_Loads <SReg_128, SReg_96>;
-defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_128, SReg_128>;
-defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_128, SReg_256>;
-defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_128, SReg_512>;
-defm S_BUFFER_LOAD_I8 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_LOAD_U8 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_LOAD_I16 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_LOAD_U16 : SM_Pseudo_Loads <SReg_128, SReg_32_XM0_XEXEC>;
+ defm S_BUFFER_LOAD_DWORDX3 : SM_Pseudo_Loads <SReg_128_XNULL, SReg_96>;
+defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <SReg_128_XNULL, SReg_128>;
+defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <SReg_128_XNULL, SReg_256>;
+defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <SReg_128_XNULL, SReg_512>;
+defm S_BUFFER_LOAD_I8 : SM_Pseudo_Loads <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_LOAD_U8 : SM_Pseudo_Loads <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_LOAD_I16 : SM_Pseudo_Loads <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_LOAD_U16 : SM_Pseudo_Loads <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
}
let SubtargetPredicate = HasScalarStores in {
@@ -353,9 +353,9 @@ defm S_STORE_DWORDX2 : SM_Pseudo_Stores <SReg_64, SReg_64_XEXEC>;
defm S_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_64, SReg_128>;
let is_buffer = 1 in {
-defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_128, SReg_128>;
+defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_128_XNULL, SReg_128>;
}
} // End SubtargetPredicate = HasScalarStores
@@ -375,7 +375,7 @@ def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb
defm S_ATC_PROBE : SM_Pseudo_Probe <SReg_64>;
let is_buffer = 1 in {
-defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <SReg_128>;
+defm S_ATC_PROBE_BUFFER : SM_Pseudo_Probe <SReg_128_XNULL>;
}
} // SubtargetPredicate = isGFX8Plus
@@ -401,33 +401,33 @@ defm S_SCRATCH_STORE_DWORDX4 : SM_Pseudo_Stores <SReg_64, SReg_128>;
let SubtargetPredicate = HasScalarAtomics in {
let is_buffer = 1 in {
-defm S_BUFFER_ATOMIC_SWAP : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_ADD : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_SUB : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_SMIN : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_UMIN : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_SMAX : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_UMAX : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_AND : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_OR : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_XOR : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_INC : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-defm S_BUFFER_ATOMIC_DEC : SM_Pseudo_Atomics <SReg_128, SReg_32_XM0_XEXEC>;
-
-defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Pseudo_Atomics <SReg_128, SReg_128>;
-defm S_BUFFER_ATOMIC_ADD_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_SUB_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_AND_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_OR_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_XOR_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_INC_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
-defm S_BUFFER_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <SReg_128, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_SWAP : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_CMPSWAP : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_ADD : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_SUB : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_SMIN : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_UMIN : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_SMAX : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_UMAX : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_AND : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_OR : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_XOR : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_INC : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+defm S_BUFFER_ATOMIC_DEC : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_32_XM0_XEXEC>;
+
+defm S_BUFFER_ATOMIC_SWAP_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_CMPSWAP_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_128>;
+defm S_BUFFER_ATOMIC_ADD_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_SUB_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_SMIN_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_UMIN_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_SMAX_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_UMAX_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_AND_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_OR_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_XOR_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_INC_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
+defm S_BUFFER_ATOMIC_DEC_X2 : SM_Pseudo_Atomics <SReg_128_XNULL, SReg_64_XEXEC>;
}
defm S_ATOMIC_SWAP : SM_Pseudo_Atomics <SReg_64, SReg_32_XM0_XEXEC>;
@@ -470,7 +470,7 @@ def S_PREFETCH_INST : SM_Prefetch_Pseudo <"s_prefetch_inst", SReg_64, 1>;
def S_PREFETCH_INST_PC_REL : SM_Prefetch_Pseudo <"s_prefetch_inst_pc_rel", SReg_64, 0>;
def S_PREFETCH_DATA : SM_Prefetch_Pseudo <"s_prefetch_data", SReg_64, 1>;
def S_PREFETCH_DATA_PC_REL : SM_Prefetch_Pseudo <"s_prefetch_data_pc_rel", SReg_64, 0>;
-def S_BUFFER_PREFETCH_DATA : SM_Prefetch_Pseudo <"s_buffer_prefetch_data", SReg_128, 1> {
+def S_BUFFER_PREFETCH_DATA : SM_Prefetch_Pseudo <"s_buffer_prefetch_data", SReg_128_XNULL, 1> {
let is_buffer = 1;
}
} // end let SubtargetPredicate = isGFX12Plus
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 1dd39be9e8d9..b9c73e6ce8ef 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -1018,9 +1018,9 @@ defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03b,
defm V_SWAP_B16 : VOP1Only_Real_gfx11_gfx12<0x066>;
defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11_gfx12<0x067>;
defm V_MOV_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x01c, "v_mov_b16">;
-defm V_NOT_B16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x069, "v_not_b16">;
-defm V_CVT_I32_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">;
-defm V_CVT_U32_U16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">;
+defm V_NOT_B16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x069, "v_not_b16">;
+defm V_CVT_I32_I16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x06a, "v_cvt_i32_i16">;
+defm V_CVT_U32_U16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x06b, "v_cvt_u32_u16">;
defm V_CVT_F16_U16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x050, "v_cvt_f16_u16">;
defm V_CVT_F16_I16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x051, "v_cvt_f16_i16">;
@@ -1036,18 +1036,18 @@ defm V_LOG_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16"
defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">;
defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
-defm V_FREXP_MANT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
+defm V_FREXP_MANT_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x059, "v_frexp_mant_f16">;
defm V_FREXP_EXP_I16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x05a, "v_frexp_exp_i16_f16">;
defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
defm V_CEIL_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
-defm V_TRUNC_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05d, "v_trunc_f16">;
-defm V_RNDNE_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05e, "v_rndne_f16">;
-defm V_FRACT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05f, "v_fract_f16">;
-defm V_SIN_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x060, "v_sin_f16">;
-defm V_COS_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x061, "v_cos_f16">;
-defm V_SAT_PK_U8_I16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">;
+defm V_TRUNC_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x05d, "v_trunc_f16">;
+defm V_RNDNE_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x05e, "v_rndne_f16">;
+defm V_FRACT_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x05f, "v_fract_f16">;
+defm V_SIN_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x060, "v_sin_f16">;
+defm V_COS_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x061, "v_cos_f16">;
+defm V_SAT_PK_U8_I16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x062, "v_sat_pk_u8_i16">;
defm V_CVT_NORM_I16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x063, "v_cvt_norm_i16_f16">;
defm V_CVT_NORM_U16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x064, "v_cvt_norm_u16_f16">;
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 22e457674c07..24a2eede9ca3 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -340,7 +340,7 @@ let FPDPRounding = 1 in {
let SubtargetPredicate = isGFX9Plus in {
defm V_DIV_FIXUP_F16_gfx9 : VOP3Inst_t16 <"v_div_fixup_f16_gfx9", VOP_F16_F16_F16_F16, AMDGPUdiv_fixup>;
- defm V_FMA_F16_gfx9 : VOP3Inst <"v_fma_f16_gfx9", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, any_fma>;
+ defm V_FMA_F16_gfx9 : VOP3Inst_t16 <"v_fma_f16_gfx9", VOP_F16_F16_F16_F16, any_fma>;
} // End SubtargetPredicate = isGFX9Plus
} // End FPDPRounding = 1
@@ -1374,8 +1374,8 @@ class VOP3_DOT_Profile_fake16<VOPProfile P, VOP3Features Features = VOP3_REGULAR
let SubtargetPredicate = isGFX11Plus in {
defm V_MAXMIN_F32 : VOP3Inst<"v_maxmin_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
defm V_MINMAX_F32 : VOP3Inst<"v_minmax_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
- defm V_MAXMIN_F16 : VOP3Inst<"v_maxmin_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>;
- defm V_MINMAX_F16 : VOP3Inst<"v_minmax_f16", VOP3_Profile<VOP_F16_F16_F16_F16>>;
+ defm V_MAXMIN_F16 : VOP3Inst_t16<"v_maxmin_f16", VOP_F16_F16_F16_F16>;
+ defm V_MINMAX_F16 : VOP3Inst_t16<"v_minmax_f16", VOP_F16_F16_F16_F16>;
defm V_MAXMIN_U32 : VOP3Inst<"v_maxmin_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
defm V_MINMAX_U32 : VOP3Inst<"v_minmax_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
defm V_MAXMIN_I32 : VOP3Inst<"v_maxmin_i32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
@@ -1578,8 +1578,8 @@ def : MinimumMaximumByMinimum3Maximum3<fmaximum, f32, V_MAXIMUM3_F32_e64>;
defm V_MIN3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x229, "V_MIN3_F32", "v_min3_num_f32">;
defm V_MAX3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x22a, "V_MAX3_F32", "v_max3_num_f32">;
-defm V_MIN3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x22b, "V_MIN3_F16", "v_min3_num_f16">;
-defm V_MAX3_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x22c, "V_MAX3_F16", "v_max3_num_f16">;
+defm V_MIN3_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x22b, "v_min3_num_f16", "V_MIN3_F16", "v_min3_f16">;
+defm V_MAX3_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x22c, "v_max3_num_f16", "V_MAX3_F16", "v_max3_f16">;
defm V_MINIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22d>;
defm V_MAXIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22e>;
defm V_MINIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12<0x22f>;
@@ -1588,8 +1588,8 @@ defm V_MED3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x231, "V_MED3_F32",
defm V_MED3_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x232, "v_med3_num_f16", "V_MED3_F16", "v_med3_f16">;
defm V_MINMAX_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x268, "V_MINMAX_F32", "v_minmax_num_f32">;
defm V_MAXMIN_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x269, "V_MAXMIN_F32", "v_maxmin_num_f32">;
-defm V_MINMAX_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x26a, "V_MINMAX_F16", "v_minmax_num_f16">;
-defm V_MAXMIN_NUM_F16 : VOP3_Realtriple_with_name_gfx12<0x26b, "V_MAXMIN_F16", "v_maxmin_num_f16">;
+defm V_MINMAX_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x26a, "v_minmax_num_f16", "V_MINMAX_F16", "v_minmax_f16">;
+defm V_MAXMIN_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x26b, "v_maxmin_num_f16", "V_MAXMIN_F16", "v_maxmin_f16">;
defm V_MINIMUMMAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26c>;
defm V_MAXIMUMMINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26d>;
defm V_MINIMUMMAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x26e>;
@@ -1708,7 +1708,7 @@ defm V_PERM_B32 : VOP3_Realtriple_gfx11_gfx12<0x244>;
defm V_XAD_U32 : VOP3_Realtriple_gfx11_gfx12<0x245>;
defm V_LSHL_ADD_U32 : VOP3_Realtriple_gfx11_gfx12<0x246>;
defm V_ADD_LSHL_U32 : VOP3_Realtriple_gfx11_gfx12<0x247>;
-defm V_FMA_F16 : VOP3_Realtriple_with_name_gfx11_gfx12<0x248, "V_FMA_F16_gfx9", "v_fma_f16">;
+defm V_FMA_F16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x248, "v_fma_f16", "V_FMA_F16_gfx9">;
defm V_MIN3_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx11<0x249, "v_min3_f16">;
defm V_MIN3_I16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x24a, "v_min3_i16">;
defm V_MIN3_U16 : VOP3_Realtriple_t16_and_fake16_gfx11_gfx12<0x24b, "v_min3_u16">;
@@ -1730,8 +1730,8 @@ defm V_PERMLANE16_B32 : VOP3_Real_Base_gfx11_gfx12<0x25b>;
defm V_PERMLANEX16_B32 : VOP3_Real_Base_gfx11_gfx12<0x25c>;
defm V_MAXMIN_F32 : VOP3_Realtriple_gfx11<0x25e>;
defm V_MINMAX_F32 : VOP3_Realtriple_gfx11<0x25f>;
-defm V_MAXMIN_F16 : VOP3_Realtriple_gfx11<0x260>;
-defm V_MINMAX_F16 : VOP3_Realtriple_gfx11<0x261>;
+defm V_MAXMIN_F16 : VOP3_Realtriple_t16_and_fake16_gfx11<0x260, "v_maxmin_f16">;
+defm V_MINMAX_F16 : VOP3_Realtriple_t16_and_fake16_gfx11<0x261, "v_minmax_f16">;
defm V_MAXMIN_U32 : VOP3_Realtriple_gfx11_gfx12<0x262>;
defm V_MINMAX_U32 : VOP3_Realtriple_gfx11_gfx12<0x263>;
defm V_MAXMIN_I32 : VOP3_Realtriple_gfx11_gfx12<0x264>;
diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
index 9bf043ea334f..8589d598f587 100644
--- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -1130,20 +1130,20 @@ defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I64_e64, i64>;
defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;
defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;
-let OtherPredicates = [HasTrue16BitInsts] in {
-defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_UGE, V_CMP_GE_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_ULT, V_CMP_LT_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_ULE, V_CMP_LE_U16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_t16_e64, i16>;
-defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_t16_e64, i16>;
-} // End OtherPredicates = [HasTrue16BitInsts]
-
-let OtherPredicates = [NotHasTrue16BitInsts] in {
+let True16Predicate = UseFakeTrue16Insts in {
+defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_UGE, V_CMP_GE_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_ULT, V_CMP_LT_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_ULE, V_CMP_LE_U16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_fake16_e64, i16>;
+defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_fake16_e64, i16>;
+} // End True16Predicate = UseFakeTrue16Insts
+
+let True16Predicate = NotHasTrue16BitInsts in {
defm : ICMP_Pattern <COND_EQ, V_CMP_EQ_U16_e64, i16>;
defm : ICMP_Pattern <COND_NE, V_CMP_NE_U16_e64, i16>;
defm : ICMP_Pattern <COND_UGT, V_CMP_GT_U16_e64, i16>;
@@ -1154,7 +1154,7 @@ defm : ICMP_Pattern <COND_SGT, V_CMP_GT_I16_e64, i16>;
defm : ICMP_Pattern <COND_SGE, V_CMP_GE_I16_e64, i16>;
defm : ICMP_Pattern <COND_SLT, V_CMP_LT_I16_e64, i16>;
defm : ICMP_Pattern <COND_SLE, V_CMP_LE_I16_e64, i16>;
-} // End OtherPredicates = [NotHasTrue16BitInsts]
+} // End True16Predicate = NotHasTrue16BitInsts
multiclass FCMP_Pattern <PatFrags cond, Instruction inst, ValueType vt> {
let WaveSizePredicate = isWave64 in
@@ -1215,25 +1215,25 @@ defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F64_e64, f64>;
defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
-let OtherPredicates = [HasTrue16BitInsts] in {
-defm : FCMP_Pattern <COND_O, V_CMP_O_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_t16_e64, f16>;
-
-defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_t16_e64, f16>;
-defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_t16_e64, f16>;
-} // End OtherPredicates = [HasTrue16BitInsts]
-
-let OtherPredicates = [NotHasTrue16BitInsts] in {
+let True16Predicate = UseFakeTrue16Insts in {
+defm : FCMP_Pattern <COND_O, V_CMP_O_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OGT, V_CMP_GT_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OGE, V_CMP_GE_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OLT, V_CMP_LT_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_OLE, V_CMP_LE_F16_fake16_e64, f16>;
+
+defm : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_fake16_e64, f16>;
+defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_fake16_e64, f16>;
+} // End True16Predicate = UseFakeTrue16Insts
+
+let True16Predicate = NotHasTrue16BitInsts in {
defm : FCMP_Pattern <COND_O, V_CMP_O_F16_e64, f16>;
defm : FCMP_Pattern <COND_UO, V_CMP_U_F16_e64, f16>;
defm : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F16_e64, f16>;
@@ -1249,7 +1249,7 @@ defm : FCMP_Pattern <COND_UGT, V_CMP_NLE_F16_e64, f16>;
defm : FCMP_Pattern <COND_UGE, V_CMP_NLT_F16_e64, f16>;
defm : FCMP_Pattern <COND_ULT, V_CMP_NGE_F16_e64, f16>;
defm : FCMP_Pattern <COND_ULE, V_CMP_NGT_F16_e64, f16>;
-} // End OtherPredicates = [NotHasTrue16BitInsts]
+} // End True16Predicate = NotHasTrue16BitInsts
//===----------------------------------------------------------------------===//
// DPP Encodings
@@ -1707,23 +1707,6 @@ multiclass VOPCX_Real_t16_gfx11_gfx12<bits<9> op, string asm_name,
VOPCX_Real_t16<GFX11Gen, op, asm_name, OpName, pseudo_mnemonic>,
VOPCX_Real_t16<GFX12Gen, op, asm_name, OpName, pseudo_mnemonic>;
-defm V_CMP_F_F16_t16 : VOPC_Real_t16_gfx11<0x000, "v_cmp_f_f16">;
-defm V_CMP_LT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x001, "v_cmp_lt_f16">;
-defm V_CMP_EQ_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x002, "v_cmp_eq_f16">;
-defm V_CMP_LE_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x003, "v_cmp_le_f16">;
-defm V_CMP_GT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x004, "v_cmp_gt_f16">;
-defm V_CMP_LG_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x005, "v_cmp_lg_f16">;
-defm V_CMP_GE_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x006, "v_cmp_ge_f16">;
-defm V_CMP_O_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x007, "v_cmp_o_f16">;
-defm V_CMP_U_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x008, "v_cmp_u_f16">;
-defm V_CMP_NGE_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x009, "v_cmp_nge_f16">;
-defm V_CMP_NLG_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00a, "v_cmp_nlg_f16">;
-defm V_CMP_NGT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00b, "v_cmp_ngt_f16">;
-defm V_CMP_NLE_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00c, "v_cmp_nle_f16">;
-defm V_CMP_NEQ_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00d, "v_cmp_neq_f16">;
-defm V_CMP_NLT_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x00e, "v_cmp_nlt_f16">;
-defm V_CMP_T_F16_t16 : VOPC_Real_t16_gfx11<0x00f, "v_cmp_t_f16", "V_CMP_TRU_F16_t16", "v_cmp_tru_f16">;
-
defm V_CMP_F_F16_fake16 : VOPC_Real_t16_gfx11<0x000, "v_cmp_f_f16">;
defm V_CMP_LT_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x001, "v_cmp_lt_f16">;
defm V_CMP_EQ_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x002, "v_cmp_eq_f16">;
@@ -1759,19 +1742,6 @@ defm V_CMP_NLT_F32 : VOPC_Real_gfx11_gfx12<0x01e>;
defm V_CMP_T_F32 : VOPC_Real_with_name_gfx11<0x01f, "V_CMP_TRU_F32", "v_cmp_t_f32">;
defm V_CMP_T_F64 : VOPC_Real_with_name_gfx11<0x02f, "V_CMP_TRU_F64", "v_cmp_t_f64">;
-defm V_CMP_LT_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x031, "v_cmp_lt_i16">;
-defm V_CMP_EQ_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x032, "v_cmp_eq_i16">;
-defm V_CMP_LE_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x033, "v_cmp_le_i16">;
-defm V_CMP_GT_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x034, "v_cmp_gt_i16">;
-defm V_CMP_NE_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x035, "v_cmp_ne_i16">;
-defm V_CMP_GE_I16_t16 : VOPC_Real_t16_gfx11_gfx12<0x036, "v_cmp_ge_i16">;
-defm V_CMP_LT_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x039, "v_cmp_lt_u16">;
-defm V_CMP_EQ_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03a, "v_cmp_eq_u16">;
-defm V_CMP_LE_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03b, "v_cmp_le_u16">;
-defm V_CMP_GT_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03c, "v_cmp_gt_u16">;
-defm V_CMP_NE_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03d, "v_cmp_ne_u16">;
-defm V_CMP_GE_U16_t16 : VOPC_Real_t16_gfx11_gfx12<0x03e, "v_cmp_ge_u16">;
-
defm V_CMP_LT_I16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x031, "v_cmp_lt_i16">;
defm V_CMP_EQ_I16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x032, "v_cmp_eq_i16">;
defm V_CMP_LE_I16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x033, "v_cmp_le_i16">;
@@ -1819,28 +1789,10 @@ defm V_CMP_NE_U64 : VOPC_Real_gfx11_gfx12<0x05d>;
defm V_CMP_GE_U64 : VOPC_Real_gfx11_gfx12<0x05e>;
defm V_CMP_T_U64 : VOPC_Real_gfx11<0x05f>;
-defm V_CMP_CLASS_F16_t16 : VOPC_Real_t16_gfx11_gfx12<0x07d, "v_cmp_class_f16">;
defm V_CMP_CLASS_F16_fake16 : VOPC_Real_t16_gfx11_gfx12<0x07d, "v_cmp_class_f16">;
defm V_CMP_CLASS_F32 : VOPC_Real_gfx11_gfx12<0x07e>;
defm V_CMP_CLASS_F64 : VOPC_Real_gfx11_gfx12<0x07f>;
-defm V_CMPX_F_F16_t16 : VOPCX_Real_t16_gfx11<0x080, "v_cmpx_f_f16">;
-defm V_CMPX_LT_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x081, "v_cmpx_lt_f16">;
-defm V_CMPX_EQ_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x082, "v_cmpx_eq_f16">;
-defm V_CMPX_LE_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x083, "v_cmpx_le_f16">;
-defm V_CMPX_GT_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x084, "v_cmpx_gt_f16">;
-defm V_CMPX_LG_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x085, "v_cmpx_lg_f16">;
-defm V_CMPX_GE_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x086, "v_cmpx_ge_f16">;
-defm V_CMPX_O_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x087, "v_cmpx_o_f16">;
-defm V_CMPX_U_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x088, "v_cmpx_u_f16">;
-defm V_CMPX_NGE_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x089, "v_cmpx_nge_f16">;
-defm V_CMPX_NLG_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08a, "v_cmpx_nlg_f16">;
-defm V_CMPX_NGT_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08b, "v_cmpx_ngt_f16">;
-defm V_CMPX_NLE_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08c, "v_cmpx_nle_f16">;
-defm V_CMPX_NEQ_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08d, "v_cmpx_neq_f16">;
-defm V_CMPX_NLT_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x08e, "v_cmpx_nlt_f16">;
-defm V_CMPX_T_F16_t16 : VOPCX_Real_with_name_gfx11<0x08f, "V_CMPX_TRU_F16_t16", "v_cmpx_t_f16", "v_cmpx_tru_f16">;
-
defm V_CMPX_F_F16_fake16 : VOPCX_Real_t16_gfx11<0x080, "v_cmpx_f_f16">;
defm V_CMPX_LT_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x081, "v_cmpx_lt_f16">;
defm V_CMPX_EQ_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x082, "v_cmpx_eq_f16">;
@@ -1892,19 +1844,6 @@ defm V_CMPX_NEQ_F64 : VOPCX_Real_gfx11_gfx12<0x0ad>;
defm V_CMPX_NLT_F64 : VOPCX_Real_gfx11_gfx12<0x0ae>;
defm V_CMPX_T_F64 : VOPCX_Real_with_name_gfx11<0x0af, "V_CMPX_TRU_F64", "v_cmpx_t_f64">;
-defm V_CMPX_LT_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b1, "v_cmpx_lt_i16">;
-defm V_CMPX_EQ_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b2, "v_cmpx_eq_i16">;
-defm V_CMPX_LE_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b3, "v_cmpx_le_i16">;
-defm V_CMPX_GT_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b4, "v_cmpx_gt_i16">;
-defm V_CMPX_NE_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b5, "v_cmpx_ne_i16">;
-defm V_CMPX_GE_I16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b6, "v_cmpx_ge_i16">;
-defm V_CMPX_LT_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0b9, "v_cmpx_lt_u16">;
-defm V_CMPX_EQ_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0ba, "v_cmpx_eq_u16">;
-defm V_CMPX_LE_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0bb, "v_cmpx_le_u16">;
-defm V_CMPX_GT_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0bc, "v_cmpx_gt_u16">;
-defm V_CMPX_NE_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0bd, "v_cmpx_ne_u16">;
-defm V_CMPX_GE_U16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0be, "v_cmpx_ge_u16">;
-
defm V_CMPX_LT_I16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x0b1, "v_cmpx_lt_i16">;
defm V_CMPX_EQ_I16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x0b2, "v_cmpx_eq_i16">;
defm V_CMPX_LE_I16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x0b3, "v_cmpx_le_i16">;
@@ -1951,7 +1890,6 @@ defm V_CMPX_GT_U64 : VOPCX_Real_gfx11_gfx12<0x0dc>;
defm V_CMPX_NE_U64 : VOPCX_Real_gfx11_gfx12<0x0dd>;
defm V_CMPX_GE_U64 : VOPCX_Real_gfx11_gfx12<0x0de>;
defm V_CMPX_T_U64 : VOPCX_Real_gfx11<0x0df>;
-defm V_CMPX_CLASS_F16_t16 : VOPCX_Real_t16_gfx11_gfx12<0x0fd, "v_cmpx_class_f16">;
defm V_CMPX_CLASS_F16_fake16 : VOPCX_Real_t16_gfx11_gfx12<0x0fd, "v_cmpx_class_f16">;
defm V_CMPX_CLASS_F32 : VOPCX_Real_gfx11_gfx12<0x0fe>;
defm V_CMPX_CLASS_F64 : VOPCX_Real_gfx11_gfx12<0x0ff>;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index d236907b0eec..930ed9a5e2d0 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -1909,8 +1909,8 @@ multiclass VOP3_Realtriple_t16_gfx11<bits<10> op, string asmName, string opName
multiclass VOP3_Realtriple_t16_and_fake16_gfx11<bits<10> op, string asmName, string opName = NAME,
string pseudo_mnemonic = "", bit isSingle = 0> {
- defm _t16: VOP3_Realtriple_t16_gfx11<op, opName#"_t16", asmName, pseudo_mnemonic, isSingle>;
- defm _fake16: VOP3_Realtriple_t16_gfx11<op, opName#"_fake16", asmName, pseudo_mnemonic, isSingle>;
+ defm _t16: VOP3_Realtriple_t16_gfx11<op, asmName, opName#"_t16", pseudo_mnemonic, isSingle>;
+ defm _fake16: VOP3_Realtriple_t16_gfx11<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
}
multiclass VOP3Only_Realtriple_t16_gfx11<bits<10> op, string asmName,