summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorShashi Shankar <shashishankar1687@gmail.com>2025-11-20 15:20:34 +0100
committerGitHub <noreply@github.com>2025-11-20 22:20:34 +0800
commit5d0bfd1bf8ac6b1ceb37c7f30058d0f62e636036 (patch)
tree2a7dfdd485ae735d6b2bee77632da040f4ef76e9
parent4bb4ad477d80f66a267311afe9b656330caf3893 (diff)
[MLIR][SCFToGPU] Guard operands before AffineApplyOp::create to avoid crash (#167959)
This fixes a crash in SCF→GPU when building the per‑dim index for mapped scf.parallel. **Change**: - Map step/lb through cloningMap, then run ensureLaunchIndependent. - If either is still unavailable at launch scope, emit a match‑failure; otherwise build the affine.apply. **Why this is correct:** - Matches how the pass already handles launch bounds; avoids creating an op with invalid operands and replaces a segfault with a clear diagnostic. **Tests**: - Added two small regressions that lower to gpu.launch and exercise the affine.apply path. Fixes : #167654 Signed-off-by: Shashi Shankar <shashishankar1687@gmail.com>
-rw-r--r--mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp18
-rw-r--r--mlir/test/Conversion/SCFToGPU/parallel_loop.mlir48
2 files changed, 64 insertions, 2 deletions
diff --git a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
index 76a822b05a65..309121f52081 100644
--- a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
+++ b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
@@ -453,10 +453,24 @@ static LogicalResult processParallelLoop(
1, 2,
rewriter.getAffineDimExpr(0) * rewriter.getAffineSymbolExpr(0) +
rewriter.getAffineSymbolExpr(1));
+ // Map through cloningMap first so we use values valid at the launch
+ // scope, then ensure they are launch-independent (or cloned constants).
+ Value mappedStep = cloningMap.lookupOrDefault(step);
+ Value mappedLowerBound = cloningMap.lookupOrDefault(lowerBound);
+
+ mappedStep = ensureLaunchIndependent(mappedStep);
+ mappedLowerBound = ensureLaunchIndependent(mappedLowerBound);
+
+ // If either cannot be made available above the launch, fail gracefully.
+ if (!mappedStep || !mappedLowerBound) {
+ return rewriter.notifyMatchFailure(
+ parallelOp, "lower bound / step must be constant or defined above "
+ "the gpu.launch");
+ }
+
newIndex = AffineApplyOp::create(
rewriter, loc, annotation.getMap().compose(lowerAndStep),
- ValueRange{operand, ensureLaunchIndependent(step),
- ensureLaunchIndependent(lowerBound)});
+ ValueRange{operand, mappedStep, mappedLowerBound});
// If there was also a bound, insert that, too.
// TODO: Check that we do not assign bounds twice.
if (annotation.getBound()) {
diff --git a/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir b/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir
index 26f5a3e1f0ac..2f192df1dad2 100644
--- a/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir
+++ b/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir
@@ -673,3 +673,51 @@ func.func @nested_parallel_with_side_effect() {
// CHECK: gpu.launch
// CHECK-NOT: scf.parallel
+
+// -----
+
+func.func @scf2gpu_index_creation_2d() {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c32 = arith.constant 32 : index
+
+ // Single 2-D scf.parallel mapped to block_x and thread_x.
+ // Use both IVs so the conversion must compute indices.
+ scf.parallel (%bx, %tx) = (%c0, %c0) to (%c32, %c32) step (%c1, %c1) {
+ %u = arith.addi %bx, %c0 : index
+ %v = arith.addi %tx, %c0 : index
+ } {
+ mapping = [
+ #gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) -> (d0)>,
+ #gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0) -> (d0)>
+ ]
+ }
+ return
+}
+
+// CHECK-LABEL: func @scf2gpu_index_creation_2d
+// CHECK: gpu.launch
+// CHECK: %[[IDX:.*]] = affine.apply
+// CHECK: arith.addi %[[IDX]],
+
+// -----
+
+func.func @scf2gpu_index_creation_1d() {
+ %c0 = arith.constant 0 : index
+ %c1 = arith.constant 1 : index
+ %c64 = arith.constant 64 : index
+
+ scf.parallel (%t) = (%c0) to (%c64) step (%c1) {
+ %w = arith.addi %t, %c0 : index
+ } {
+ mapping = [
+ #gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0) -> (d0)>
+ ]
+ }
+ return
+}
+
+// CHECK-LABEL: func @scf2gpu_index_creation_1d
+// CHECK: gpu.launch
+// CHECK: %[[IDX:.*]] = affine.apply
+// CHECK: arith.addi %[[IDX]],