diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 119 |
1 files changed, 111 insertions, 8 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp index 202693b31612..25672a52345c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp @@ -447,6 +447,35 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { return; } + bool IsGCN = CurDAG->getSubtarget().getTargetTriple().isAMDGCN(); + if (IsGCN && Subtarget->has64BitLiterals() && VT.getSizeInBits() == 64 && + CurDAG->isConstantValueOfAnyType(SDValue(N, 0))) { + uint64_t C = 0; + bool AllConst = true; + unsigned EltSize = EltVT.getSizeInBits(); + for (unsigned I = 0; I < NumVectorElts; ++I) { + SDValue Op = N->getOperand(I); + if (Op.isUndef()) { + AllConst = false; + break; + } + uint64_t Val; + if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Op)) { + Val = CF->getValueAPF().bitcastToAPInt().getZExtValue(); + } else + Val = cast<ConstantSDNode>(Op)->getZExtValue(); + C |= Val << (EltSize * I); + } + if (AllConst) { + SDValue CV = CurDAG->getTargetConstant(C, DL, MVT::i64); + MachineSDNode *Copy = + CurDAG->getMachineNode(AMDGPU::S_MOV_B64_IMM_PSEUDO, DL, VT, CV); + CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, VT, SDValue(Copy, 0), + RegClass); + return; + } + } + assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not " "supported yet"); // 32 = Max Num Vector Elements @@ -454,7 +483,6 @@ void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { // 1 = Vector Register Class SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); - bool IsGCN = CurDAG->getSubtarget().getTargetTriple().isAMDGCN(); RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); bool IsRegSeq = true; unsigned NOps = N->getNumOperands(); @@ -676,7 +704,8 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) { case ISD::Constant: case ISD::ConstantFP: { - if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) + if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N) || + Subtarget->has64BitLiterals()) break; uint64_t Imm; @@ -1632,8 +1661,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent, SDValue &SRsrc, SDValue &SOffset, SDValue &Offset) const { - const SIRegisterInfo *TRI = - static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); + const SIRegisterInfo *TRI = Subtarget->getRegisterInfo(); const SIInstrInfo *TII = Subtarget->getInstrInfo(); MachineFunction &MF = CurDAG->getMachineFunction(); const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); @@ -3245,6 +3273,7 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsDOT(SDValue In, SDValue &Src, return SelectVOP3PMods(In, Src, SrcMods, true); } +// Select neg_lo from the i1 immediate operand. bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(SDValue In, SDValue &Src) const { const ConstantSDNode *C = cast<ConstantSDNode>(In); // Literal i1 value set in intrinsic, represents SrcMods for the next operand. @@ -3260,6 +3289,47 @@ bool AMDGPUDAGToDAGISel::SelectVOP3PModsNeg(SDValue In, SDValue &Src) const { return true; } +// Select both neg_lo and neg_hi from the i1 immediate operand. This is +// specifically for F16/BF16 operands in WMMA instructions, where neg_lo applies +// to matrix's even k elements, and neg_hi applies to matrix's odd k elements. +bool AMDGPUDAGToDAGISel::SelectVOP3PModsNegs(SDValue In, SDValue &Src) const { + const ConstantSDNode *C = cast<ConstantSDNode>(In); + // Literal i1 value set in intrinsic, represents SrcMods for the next operand. + // 1 promotes packed values to signed, 0 treats them as unsigned. + assert(C->getAPIntValue().getBitWidth() == 1 && "expected i1 value"); + + unsigned Mods = SISrcMods::OP_SEL_1; + unsigned SrcSign = C->getZExtValue(); + if (SrcSign == 1) + Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); + + Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); + return true; +} + +// Select neg, abs, or both neg and abs from the i16 immediate operans. +bool AMDGPUDAGToDAGISel::SelectVOP3PModsNegAbs(SDValue In, SDValue &Src) const { + const ConstantSDNode *C = cast<ConstantSDNode>(In); + unsigned Mods = SISrcMods::OP_SEL_1; + unsigned SrcMod = C->getZExtValue(); + switch (SrcMod) { + default: // Any other value will be silently ignored (considered as 0). + break; + case 1: + Mods ^= SISrcMods::NEG; + break; + case 2: + Mods ^= SISrcMods::ABS; + break; + case 3: + Mods ^= (SISrcMods::NEG | SISrcMods::ABS); + break; + } + + Src = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); + return true; +} + bool AMDGPUDAGToDAGISel::SelectWMMAOpSelVOP3PMods(SDValue In, SDValue &Src) const { const ConstantSDNode *C = cast<ConstantSDNode>(In); @@ -3611,6 +3681,41 @@ bool AMDGPUDAGToDAGISel::SelectSWMMACIndex16(SDValue In, SDValue &Src, return true; } +bool AMDGPUDAGToDAGISel::SelectSWMMACIndex32(SDValue In, SDValue &Src, + SDValue &IndexKey) const { + unsigned Key = 0; + Src = In; + + SDValue InI32; + + if (In.getOpcode() == ISD::ANY_EXTEND || In.getOpcode() == ISD::ZERO_EXTEND) { + const SDValue &ExtendSrc = In.getOperand(0); + if (ExtendSrc.getValueSizeInBits() == 32) + InI32 = ExtendSrc; + } else if (In->getOpcode() == ISD::BITCAST) { + const SDValue &CastSrc = In.getOperand(0); + if (CastSrc.getOpcode() == ISD::BUILD_VECTOR && + CastSrc.getOperand(0).getValueSizeInBits() == 32) { + ConstantSDNode *Zero = dyn_cast<ConstantSDNode>(CastSrc.getOperand(1)); + if (Zero && Zero->getZExtValue() == 0) + InI32 = CastSrc.getOperand(0); + } + } + + if (InI32 && InI32.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { + const SDValue &ExtractVecEltSrc = InI32.getOperand(0); + ConstantSDNode *EltIdx = dyn_cast<ConstantSDNode>(InI32.getOperand(1)); + if (ExtractVecEltSrc.getValueSizeInBits() == 64 && EltIdx && + EltIdx->getZExtValue() == 1) { + Key = 1; + Src = ExtractVecEltSrc; + } + } + + IndexKey = CurDAG->getTargetConstant(Key, SDLoc(In), MVT::i32); + return true; +} + bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const { Src = In; @@ -3885,10 +3990,8 @@ SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const { bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const { assert(CurDAG->getTarget().getTargetTriple().isAMDGCN()); - const SIRegisterInfo *SIRI = - static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); - const SIInstrInfo * SII = - static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); + const SIRegisterInfo *SIRI = Subtarget->getRegisterInfo(); + const SIInstrInfo *SII = Subtarget->getInstrInfo(); unsigned Limit = 0; bool AllUsesAcceptSReg = true; |
