summaryrefslogtreecommitdiff
path: root/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp15
1 files changed, 8 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
index 9de4cf82d0fa..e9d009baa20a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp
@@ -144,20 +144,20 @@ public:
// Returns the maximum number of user SGPRs that we have available to preload
// arguments.
void setInitialFreeUserSGPRsCount() {
- const unsigned MaxUserSGPRs = ST.getMaxNumUserSGPRs();
GCNUserSGPRUsageInfo UserSGPRInfo(F, ST);
-
- NumFreeUserSGPRs = MaxUserSGPRs - UserSGPRInfo.getNumUsedUserSGPRs();
+ NumFreeUserSGPRs = UserSGPRInfo.getNumFreeUserSGPRs();
}
bool tryAllocPreloadSGPRs(unsigned AllocSize, uint64_t ArgOffset,
uint64_t LastExplicitArgOffset) {
// Check if this argument may be loaded into the same register as the
// previous argument.
- if (!isAligned(Align(4), ArgOffset) && AllocSize < 4)
+ if (ArgOffset - LastExplicitArgOffset < 4 &&
+ !isAligned(Align(4), ArgOffset))
return true;
// Pad SGPRs for kernarg alignment.
+ ArgOffset = alignDown(ArgOffset, 4);
unsigned Padding = ArgOffset - LastExplicitArgOffset;
unsigned PaddingSGPRs = alignTo(Padding, 4) / 4;
unsigned NumPreloadSGPRs = alignTo(AllocSize, 4) / 4;
@@ -170,6 +170,7 @@ public:
// Try to allocate SGPRs to preload implicit kernel arguments.
void tryAllocImplicitArgPreloadSGPRs(uint64_t ImplicitArgsBaseOffset,
+ uint64_t LastExplicitArgOffset,
IRBuilder<> &Builder) {
Function *ImplicitArgPtr = Intrinsic::getDeclarationIfExists(
F.getParent(), Intrinsic::amdgcn_implicitarg_ptr);
@@ -215,7 +216,6 @@ public:
// argument can actually be preloaded.
std::sort(ImplicitArgLoads.begin(), ImplicitArgLoads.end(), less_second());
- uint64_t LastExplicitArgOffset = ImplicitArgsBaseOffset;
// If we fail to preload any implicit argument we know we don't have SGPRs
// to preload any subsequent ones with larger offsets. Find the first
// argument that we cannot preload.
@@ -229,7 +229,8 @@ public:
LastExplicitArgOffset))
return true;
- LastExplicitArgOffset = LoadOffset + LoadSize;
+ LastExplicitArgOffset =
+ ImplicitArgsBaseOffset + LoadOffset + LoadSize;
return false;
});
@@ -486,7 +487,7 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) {
alignTo(ExplicitArgOffset, ST.getAlignmentForImplicitArgPtr()) +
BaseOffset;
PreloadInfo.tryAllocImplicitArgPreloadSGPRs(ImplicitArgsBaseOffset,
- Builder);
+ ExplicitArgOffset, Builder);
}
return true;