summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp119
1 files changed, 111 insertions, 8 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 202693b31612..25672a52345c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -447,6 +447,35 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
return;
}
+ bool IsGCN = CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
+ if (IsGCN && Subtarget->has64BitLiterals() && VT.getSizeInBits() == 64 &&
+ CurDAG->isConstantValueOfAnyType(SDValue(N, 0))) {
+ uint64_t C = 0;
+ bool AllConst = true;
+ unsigned EltSize = EltVT.getSizeInBits();
+ for (unsigned I = 0; I < NumVectorElts; ++I) {
+ SDValue Op = N->getOperand(I);
+ if (Op.isUndef()) {
+ AllConst = false;
+ break;
+ }
+ uint64_t Val;
+ if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Op)) {
+ Val = CF->getValueAPF().bitcastToAPInt().getZExtValue();
+ } else
+ Val = cast<ConstantSDNode>(Op)->getZExtValue();
+ C |= Val << (EltSize * I);
+ }
+ if (AllConst) {
+ SDValue CV = CurDAG->getTargetConstant(C, DL, MVT::i64);
+ MachineSDNode *Copy =
+ CurDAG->getMachineNode(AMDGPU::S_MOV_B64_IMM_PSEUDO, DL, VT, CV);
+ CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, VT, SDValue(Copy, 0),
+ RegClass);
+ return;
+ }
+ }
+
assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "
"supported yet");
// 32 = Max Num Vector Elements
@@ -454,7 +483,6 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) {
// 1 = Vector Register Class
SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1);
- bool IsGCN = CurDAG->getSubtarget().getTargetTriple().isAMDGCN();
RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32);
bool IsRegSeq = true;
unsigned NOps = N->getNumOperands();
@@ -676,7 +704,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
case ISD::Constant:
case ISD::ConstantFP: {
- if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N))
+ if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N) ||
+ Subtarget->has64BitLiterals())
break;
uint64_t Imm;
@@ -1632,8 +1661,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
SDValue &SRsrc,
SDValue &SOffset,
SDValue &Offset) const {
- const SIRegisterInfo *TRI =
- static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
+ const SIRegisterInfo *TRI = Subtarget->getRegisterInfo();
const SIInstrInfo *TII = Subtarget->getInstrInfo();
MachineFunction &MF = CurDAG->getMachineFunction();
const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
@@ -3245,6 +3273,7 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src,
return SelectVOP3PMods(In, Src, SrcMods, true);
}
+// Select neg_lo from the i1 immediate operand.
bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(SDValue In, SDValue &Src) const {
const ConstantSDNode *C = cast<ConstantSDNode>(In);
// Literal i1 value set in intrinsic, represents SrcMods for the next operand.
@@ -3260,6 +3289,47 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(SDValue In, SDValue &Src) const {
return true;
}
+// Select both neg_lo and neg_hi from the i1 immediate operand. This is
+// specifically for F16/BF16 operands in WMMA instructions, where neg_lo applies
+// to matrix's even k elements, and neg_hi applies to matrix's odd k elements.
+bool AMDGPUDAGToDAGISel::SelectVOP3PModsNegs(SDValue In, SDValue &Src) const {
+ const ConstantSDNode *C = cast<ConstantSDNode>(In);
+ // Literal i1 value set in intrinsic, represents SrcMods for the next operand.
+ // 1 promotes packed values to signed, 0 treats them as unsigned.
+ assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value");
+
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ unsigned SrcSign = C->getZExtValue();
+ if (SrcSign == 1)
+ Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI);
+
+ Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+}
+
+// Select neg, abs, or both neg and abs from the i16 immediate operans.
+bool AMDGPUDAGToDAGISel::SelectVOP3PModsNegAbs(SDValue In, SDValue &Src) const {
+ const ConstantSDNode *C = cast<ConstantSDNode>(In);
+ unsigned Mods = SISrcMods::OP_SEL_1;
+ unsigned SrcMod = C->getZExtValue();
+ switch (SrcMod) {
+ default: // Any other value will be silently ignored (considered as 0).
+ break;
+ case 1:
+ Mods ^= SISrcMods::NEG;
+ break;
+ case 2:
+ Mods ^= SISrcMods::ABS;
+ break;
+ case 3:
+ Mods ^= (SISrcMods::NEG | SISrcMods::ABS);
+ break;
+ }
+
+ Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32);
+ return true;
+}
+
bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In,
SDValue &Src) const {
const ConstantSDNode *C = cast<ConstantSDNode>(In);
@@ -3611,6 +3681,41 @@ bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(SDValue In, SDValue &Src,
return true;
}
+bool AMDGPUDAGToDAGISel::SelectSWMMACIndex32(SDValue In, SDValue &Src,
+ SDValue &IndexKey) const {
+ unsigned Key = 0;
+ Src = In;
+
+ SDValue InI32;
+
+ if (In.getOpcode() == ISD::ANY_EXTEND || In.getOpcode() == ISD::ZERO_EXTEND) {
+ const SDValue &ExtendSrc = In.getOperand(0);
+ if (ExtendSrc.getValueSizeInBits() == 32)
+ InI32 = ExtendSrc;
+ } else if (In->getOpcode() == ISD::BITCAST) {
+ const SDValue &CastSrc = In.getOperand(0);
+ if (CastSrc.getOpcode() == ISD::BUILD_VECTOR &&
+ CastSrc.getOperand(0).getValueSizeInBits() == 32) {
+ ConstantSDNode *Zero = dyn_cast<ConstantSDNode>(CastSrc.getOperand(1));
+ if (Zero && Zero->getZExtValue() == 0)
+ InI32 = CastSrc.getOperand(0);
+ }
+ }
+
+ if (InI32 && InI32.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
+ const SDValue &ExtractVecEltSrc = InI32.getOperand(0);
+ ConstantSDNode *EltIdx = dyn_cast<ConstantSDNode>(InI32.getOperand(1));
+ if (ExtractVecEltSrc.getValueSizeInBits() == 64 && EltIdx &&
+ EltIdx->getZExtValue() == 1) {
+ Key = 1;
+ Src = ExtractVecEltSrc;
+ }
+ }
+
+ IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32);
+ return true;
+}
+
bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src,
SDValue &SrcMods) const {
Src = In;
@@ -3885,10 +3990,8 @@ SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const {
bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const {
assert(CurDAG->getTarget().getTargetTriple().isAMDGCN());
- const SIRegisterInfo *SIRI =
- static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
- const SIInstrInfo * SII =
- static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
+ const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo();
+ const SIInstrInfo *SII = Subtarget->getInstrInfo();
unsigned Limit = 0;
bool AllUsesAcceptSReg = true;