diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp | 33 |
1 files changed, 29 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp index 924b4034c2b9..6c8116abd8dc 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/InstIterator.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicsAMDGPU.h" +#include "llvm/IR/MDBuilder.h" #include "llvm/IR/PatternMatch.h" #include "llvm/Pass.h" @@ -82,6 +83,20 @@ Function *getBasePtrIntrinsic(Module &M, bool IsV5OrAbove) { } // end anonymous namespace +static void annotateGridSizeLoadWithRangeMD(LoadInst *Load, + uint32_t MaxNumGroups) { + if (MaxNumGroups == 0 || MaxNumGroups == std::numeric_limits<uint32_t>::max()) + return; + + if (!Load->getType()->isIntegerTy(32)) + return; + + // TODO: If there is existing range metadata, preserve it if it is stricter. + MDBuilder MDB(Load->getContext()); + MDNode *Range = MDB.createRange(APInt(32, 1), APInt(32, MaxNumGroups + 1)); + Load->setMetadata(LLVMContext::MD_range, Range); +} + static bool processUse(CallInst *CI, bool IsV5OrAbove) { Function *F = CI->getParent()->getParent(); @@ -91,7 +106,11 @@ static bool processUse(CallInst *CI, bool IsV5OrAbove) { const bool HasUniformWorkGroupSize = F->getFnAttribute("uniform-work-group-size").getValueAsBool(); - if (!HasReqdWorkGroupSize && !HasUniformWorkGroupSize) + SmallVector<unsigned> MaxNumWorkgroups = + AMDGPU::getIntegerVecAttribute(*F, "amdgpu-max-num-workgroups", 3); + + if (!HasReqdWorkGroupSize && !HasUniformWorkGroupSize && + none_of(MaxNumWorkgroups, [](unsigned X) { return X != 0; })) return false; Value *BlockCounts[3] = {nullptr, nullptr, nullptr}; @@ -132,16 +151,22 @@ static bool processUse(CallInst *CI, bool IsV5OrAbove) { if (IsV5OrAbove) { // Base is ImplicitArgPtr. switch (Offset) { case HIDDEN_BLOCK_COUNT_X: - if (LoadSize == 4) + if (LoadSize == 4) { BlockCounts[0] = Load; + annotateGridSizeLoadWithRangeMD(Load, MaxNumWorkgroups[0]); + } break; case HIDDEN_BLOCK_COUNT_Y: - if (LoadSize == 4) + if (LoadSize == 4) { BlockCounts[1] = Load; + annotateGridSizeLoadWithRangeMD(Load, MaxNumWorkgroups[1]); + } break; case HIDDEN_BLOCK_COUNT_Z: - if (LoadSize == 4) + if (LoadSize == 4) { BlockCounts[2] = Load; + annotateGridSizeLoadWithRangeMD(Load, MaxNumWorkgroups[2]); + } break; case HIDDEN_GROUP_SIZE_X: if (LoadSize == 2) |
