summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp33
1 files changed, 29 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
index 924b4034c2b9..6c8116abd8dc 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelAttributes.cpp
@@ -22,6 +22,7 @@
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
+#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/Pass.h"
@@ -82,6 +83,20 @@ Function *getBasePtrIntrinsic(Module &M, bool IsV5OrAbove) {
} // end anonymous namespace
+static void annotateGridSizeLoadWithRangeMD(LoadInst *Load,
+ uint32_t MaxNumGroups) {
+ if (MaxNumGroups == 0 || MaxNumGroups == std::numeric_limits<uint32_t>::max())
+ return;
+
+ if (!Load->getType()->isIntegerTy(32))
+ return;
+
+ // TODO: If there is existing range metadata, preserve it if it is stricter.
+ MDBuilder MDB(Load->getContext());
+ MDNode *Range = MDB.createRange(APInt(32, 1), APInt(32, MaxNumGroups + 1));
+ Load->setMetadata(LLVMContext::MD_range, Range);
+}
+
static bool processUse(CallInst *CI, bool IsV5OrAbove) {
Function *F = CI->getParent()->getParent();
@@ -91,7 +106,11 @@ static bool processUse(CallInst *CI, bool IsV5OrAbove) {
const bool HasUniformWorkGroupSize =
F->getFnAttribute("uniform-work-group-size").getValueAsBool();
- if (!HasReqdWorkGroupSize && !HasUniformWorkGroupSize)
+ SmallVector<unsigned> MaxNumWorkgroups =
+ AMDGPU::getIntegerVecAttribute(*F, "amdgpu-max-num-workgroups", 3);
+
+ if (!HasReqdWorkGroupSize && !HasUniformWorkGroupSize &&
+ none_of(MaxNumWorkgroups, [](unsigned X) { return X != 0; }))
return false;
Value *BlockCounts[3] = {nullptr, nullptr, nullptr};
@@ -132,16 +151,22 @@ static bool processUse(CallInst *CI, bool IsV5OrAbove) {
if (IsV5OrAbove) { // Base is ImplicitArgPtr.
switch (Offset) {
case HIDDEN_BLOCK_COUNT_X:
- if (LoadSize == 4)
+ if (LoadSize == 4) {
BlockCounts[0] = Load;
+ annotateGridSizeLoadWithRangeMD(Load, MaxNumWorkgroups[0]);
+ }
break;
case HIDDEN_BLOCK_COUNT_Y:
- if (LoadSize == 4)
+ if (LoadSize == 4) {
BlockCounts[1] = Load;
+ annotateGridSizeLoadWithRangeMD(Load, MaxNumWorkgroups[1]);
+ }
break;
case HIDDEN_BLOCK_COUNT_Z:
- if (LoadSize == 4)
+ if (LoadSize == 4) {
BlockCounts[2] = Load;
+ annotateGridSizeLoadWithRangeMD(Load, MaxNumWorkgroups[2]);
+ }
break;
case HIDDEN_GROUP_SIZE_X:
if (LoadSize == 2)