summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/IR/ReplaceConstant.cpp10
-rw-r--r--llvm/test/CodeGen/AMDGPU/same-lds-variable-multiple-use-in-one-phi-node.ll51
2 files changed, 60 insertions, 1 deletions
diff --git a/llvm/lib/IR/ReplaceConstant.cpp b/llvm/lib/IR/ReplaceConstant.cpp
index b3586b45a23f..f3d1914a8dc8 100644
--- a/llvm/lib/IR/ReplaceConstant.cpp
+++ b/llvm/lib/IR/ReplaceConstant.cpp
@@ -91,6 +91,11 @@ bool llvm::convertUsersOfConstantsToInstructions(ArrayRef<Constant *> Consts,
// Replace those expandable operands with instructions
bool Changed = false;
+ // We need to cache the instructions we've already expanded to avoid expanding
+ // the same constant multiple times in the same basic block, which is
+ // problematic when the same constant is used in a phi node multiple times.
+ DenseMap<std::pair<Constant *, BasicBlock *>, SmallVector<Instruction *, 4>>
+ ConstantToInstructionMap;
while (!InstructionWorklist.empty()) {
Instruction *I = InstructionWorklist.pop_back_val();
DebugLoc Loc = I->getDebugLoc();
@@ -105,7 +110,10 @@ bool llvm::convertUsersOfConstantsToInstructions(ArrayRef<Constant *> Consts,
if (auto *C = dyn_cast<Constant>(U.get())) {
if (ExpandableUsers.contains(C)) {
Changed = true;
- auto NewInsts = expandUser(BI, C);
+ SmallVector<Instruction *, 4> &NewInsts =
+ ConstantToInstructionMap[std::make_pair(C, BI->getParent())];
+ if (NewInsts.empty())
+ NewInsts = expandUser(BI, C);
for (auto *NI : NewInsts)
NI->setDebugLoc(Loc);
InstructionWorklist.insert_range(NewInsts);
diff --git a/llvm/test/CodeGen/AMDGPU/same-lds-variable-multiple-use-in-one-phi-node.ll b/llvm/test/CodeGen/AMDGPU/same-lds-variable-multiple-use-in-one-phi-node.ll
new file mode 100644
index 000000000000..35a9bee03411
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/same-lds-variable-multiple-use-in-one-phi-node.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -amdgpu-lower-module-lds %s -o - | FileCheck %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=amdgpu-lower-module-lds %s -o - | FileCheck %s
+
+@lds = internal unnamed_addr addrspace(3) global [6144 x half] poison, align 2
+
+define amdgpu_kernel void @test(ptr addrspace(1) %out) {
+; CHECK-LABEL: define amdgpu_kernel void @test(
+; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: switch i32 0, label %[[BB_3:.*]] [
+; CHECK-NEXT: i32 18, label %[[BB_2:.*]]
+; CHECK-NEXT: i32 1, label %[[BB_2]]
+; CHECK-NEXT: i32 0, label %[[BB_3]]
+; CHECK-NEXT: ]
+; CHECK: [[BB_1:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(3) @llvm.amdgcn.kernel.test.lds to ptr
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr [[TMP0]] to i64
+; CHECK-NEXT: switch i32 0, label %[[BB_3]] [
+; CHECK-NEXT: i32 18, label %[[BB_2]]
+; CHECK-NEXT: i32 1, label %[[BB_2]]
+; CHECK-NEXT: i32 0, label %[[BB_3]]
+; CHECK-NEXT: ]
+; CHECK: [[BB_2]]:
+; CHECK-NEXT: [[PHI:%.*]] = phi i64 [ [[TMP1]], %[[BB_1]] ], [ [[TMP1]], %[[BB_1]] ], [ 10, %[[ENTRY]] ], [ 10, %[[ENTRY]] ]
+; CHECK-NEXT: store i64 [[PHI]], ptr addrspace(1) [[OUT]], align 8
+; CHECK-NEXT: br label %[[BB_3]]
+; CHECK: [[BB_3]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ switch i32 0, label %bb.3 [
+ i32 18, label %bb.2
+ i32 1, label %bb.2
+ i32 0, label %bb.3
+ ]
+bb.1:
+ switch i32 0, label %bb.3 [
+ i32 18, label %bb.2
+ i32 1, label %bb.2
+ i32 0, label %bb.3
+ ]
+
+bb.2:
+ %phi = phi i64 [ ptrtoint (ptr addrspacecast (ptr addrspace(3) @lds to ptr) to i64), %bb.1 ], [ ptrtoint (ptr addrspacecast (ptr addrspace(3) @lds to ptr) to i64), %bb.1 ], [10, %entry], [10, %entry]
+ store i64 %phi, ptr addrspace(1) %out, align 8
+ br label %bb.3
+
+bb.3:
+ ret void
+}