diff options
| author | Mingming Liu <mingmingl@google.com> | 2025-09-10 15:25:31 -0700 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-09-10 15:25:31 -0700 |
| commit | 1417dafa1db9cb1b2b09438aa9f53ea5ab6e36e2 (patch) | |
| tree | 57f4b1f313c8cf74eed8819870f39c36ea263c68 /llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | |
| parent | 898b813bc8a6d0276bf0f4769f5f2f64b34e632d (diff) | |
| parent | b8cefcb601ddaa18482555c4ff363c01a270c2fe (diff) | |
Merge branch 'main' into users/mingmingl-llvm/samplefdo-profile-formatusers/mingmingl-llvm/samplefdo-profile-format
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 35 |
1 files changed, 22 insertions, 13 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index 600a13096f55..f18536cd4ab9 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -2082,13 +2082,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, .scalarize(0) .lower(); - // TODO: Only Try to form v2s16 with legal packed instructions. - getActionDefinitionsBuilder(G_FSHR) - .legalFor({{S32, S32}}) - .lowerFor({{V2S16, V2S16}}) - .clampMaxNumElementsStrict(0, S16, 2) - .scalarize(0) - .lower(); + auto &FSHRActionDefs = getActionDefinitionsBuilder(G_FSHR); + FSHRActionDefs.legalFor({{S32, S32}}) + .clampMaxNumElementsStrict(0, S16, 2); + if (ST.hasVOP3PInsts()) + FSHRActionDefs.lowerFor({{V2S16, V2S16}}); + FSHRActionDefs.scalarize(0).lower(); if (ST.hasVOP3PInsts()) { getActionDefinitionsBuilder(G_FSHL) @@ -3414,10 +3413,7 @@ static bool valueIsKnownNeverF32Denorm(const MachineRegisterInfo &MRI, } static bool allowApproxFunc(const MachineFunction &MF, unsigned Flags) { - if (Flags & MachineInstr::FmAfn) - return true; - const auto &Options = MF.getTarget().Options; - return Options.ApproxFuncFPMath; + return Flags & MachineInstr::FmAfn; } static bool needsDenormHandlingF32(const MachineFunction &MF, Register Src, @@ -3522,8 +3518,7 @@ bool AMDGPULegalizerInfo::legalizeFlogCommon(MachineInstr &MI, const AMDGPUTargetMachine &TM = static_cast<const AMDGPUTargetMachine &>(MF.getTarget()); - if (Ty == F16 || MI.getFlag(MachineInstr::FmAfn) || - TM.Options.ApproxFuncFPMath) { + if (Ty == F16 || MI.getFlag(MachineInstr::FmAfn)) { if (Ty == F16 && !ST.has16BitInsts()) { Register LogVal = MRI.createGenericVirtualRegister(F32); auto PromoteSrc = B.buildFPExt(F32, X); @@ -7823,6 +7818,20 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MI.eraseFromParent(); return true; } + case Intrinsic::amdgcn_cooperative_atomic_load_32x4B: + case Intrinsic::amdgcn_cooperative_atomic_load_16x8B: + case Intrinsic::amdgcn_cooperative_atomic_load_8x16B: + assert(MI.hasOneMemOperand() && "Expected IRTranslator to set MemOp!"); + B.buildLoad(MI.getOperand(0), MI.getOperand(2), **MI.memoperands_begin()); + MI.eraseFromParent(); + return true; + case Intrinsic::amdgcn_cooperative_atomic_store_32x4B: + case Intrinsic::amdgcn_cooperative_atomic_store_16x8B: + case Intrinsic::amdgcn_cooperative_atomic_store_8x16B: + assert(MI.hasOneMemOperand() && "Expected IRTranslator to set MemOp!"); + B.buildStore(MI.getOperand(2), MI.getOperand(1), **MI.memoperands_begin()); + MI.eraseFromParent(); + return true; default: { if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr = AMDGPU::getImageDimIntrinsicInfo(IntrID)) |
