diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp | 15 |
1 files changed, 8 insertions, 7 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp index 9de4cf82d0fa..e9d009baa20a 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerKernelArguments.cpp @@ -144,20 +144,20 @@ public: // Returns the maximum number of user SGPRs that we have available to preload // arguments. void setInitialFreeUserSGPRsCount() { - const unsigned MaxUserSGPRs = ST.getMaxNumUserSGPRs(); GCNUserSGPRUsageInfo UserSGPRInfo(F, ST); - - NumFreeUserSGPRs = MaxUserSGPRs - UserSGPRInfo.getNumUsedUserSGPRs(); + NumFreeUserSGPRs = UserSGPRInfo.getNumFreeUserSGPRs(); } bool tryAllocPreloadSGPRs(unsigned AllocSize, uint64_t ArgOffset, uint64_t LastExplicitArgOffset) { // Check if this argument may be loaded into the same register as the // previous argument. - if (!isAligned(Align(4), ArgOffset) && AllocSize < 4) + if (ArgOffset - LastExplicitArgOffset < 4 && + !isAligned(Align(4), ArgOffset)) return true; // Pad SGPRs for kernarg alignment. + ArgOffset = alignDown(ArgOffset, 4); unsigned Padding = ArgOffset - LastExplicitArgOffset; unsigned PaddingSGPRs = alignTo(Padding, 4) / 4; unsigned NumPreloadSGPRs = alignTo(AllocSize, 4) / 4; @@ -170,6 +170,7 @@ public: // Try to allocate SGPRs to preload implicit kernel arguments. void tryAllocImplicitArgPreloadSGPRs(uint64_t ImplicitArgsBaseOffset, + uint64_t LastExplicitArgOffset, IRBuilder<> &Builder) { Function *ImplicitArgPtr = Intrinsic::getDeclarationIfExists( F.getParent(), Intrinsic::amdgcn_implicitarg_ptr); @@ -215,7 +216,6 @@ public: // argument can actually be preloaded. std::sort(ImplicitArgLoads.begin(), ImplicitArgLoads.end(), less_second()); - uint64_t LastExplicitArgOffset = ImplicitArgsBaseOffset; // If we fail to preload any implicit argument we know we don't have SGPRs // to preload any subsequent ones with larger offsets. Find the first // argument that we cannot preload. @@ -229,7 +229,8 @@ public: LastExplicitArgOffset)) return true; - LastExplicitArgOffset = LoadOffset + LoadSize; + LastExplicitArgOffset = + ImplicitArgsBaseOffset + LoadOffset + LoadSize; return false; }); @@ -486,7 +487,7 @@ static bool lowerKernelArguments(Function &F, const TargetMachine &TM) { alignTo(ExplicitArgOffset, ST.getAlignmentForImplicitArgPtr()) + BaseOffset; PreloadInfo.tryAllocImplicitArgPreloadSGPRs(ImplicitArgsBaseOffset, - Builder); + ExplicitArgOffset, Builder); } return true; |
