From 0ebd4334021e7579bfba7a92b692e0e4ece56cb9 Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Thu, 2 Oct 2025 21:15:48 +0100 Subject: [AMDGPU] Be less optimistic when allocating module scope lds (#161464) Make the test for when additional variables can be added to the struct allocated at address zero more stringent. Previously, variables can be added to it (for faster access) even when that increases the lds requested by a kernel. This corrects that oversight. Test case diff shows the change from all variables being allocated into the module lds to only some being, in particular the introduction of uses of the offset table and that some kernels now use less lds than before. Alternative to PR 160181 --- llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp') diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index f01d5f672682..6efa78ef902c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -608,6 +608,8 @@ public: ? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot] : EmptySet; + const size_t HybridModuleRootKernelsSize = HybridModuleRootKernels.size(); + for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) { // Each iteration of this loop assigns exactly one global variable to // exactly one of the implementation strategies. @@ -647,7 +649,8 @@ public: ModuleScopeVariables.insert(GV); } else if (K.second.size() == 1) { KernelAccessVariables.insert(GV); - } else if (set_is_subset(K.second, HybridModuleRootKernels)) { + } else if (K.second.size() == HybridModuleRootKernelsSize && + set_is_subset(K.second, HybridModuleRootKernels)) { ModuleScopeVariables.insert(GV); } else { TableLookupVariables.insert(GV); -- cgit v1.2.3 From 35530f4b65d04c79d3c9fb68272b82772c665823 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Manuel=20Martinez=20Caama=C3=B1o?= Date: Fri, 3 Oct 2025 12:44:51 +0200 Subject: [NFC][AMDGPU] Replace size & set_is_subset by operator== (#161813) --- llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp') diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp index 6efa78ef902c..a4ef524c4346 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerModuleLDSPass.cpp @@ -608,8 +608,6 @@ public: ? LDSToKernelsThatNeedToAccessItIndirectly[HybridModuleRoot] : EmptySet; - const size_t HybridModuleRootKernelsSize = HybridModuleRootKernels.size(); - for (auto &K : LDSToKernelsThatNeedToAccessItIndirectly) { // Each iteration of this loop assigns exactly one global variable to // exactly one of the implementation strategies. @@ -649,8 +647,7 @@ public: ModuleScopeVariables.insert(GV); } else if (K.second.size() == 1) { KernelAccessVariables.insert(GV); - } else if (K.second.size() == HybridModuleRootKernelsSize && - set_is_subset(K.second, HybridModuleRootKernels)) { + } else if (K.second == HybridModuleRootKernels) { ModuleScopeVariables.insert(GV); } else { TableLookupVariables.insert(GV); -- cgit v1.2.3