diff options
| author | Shashi Shankar <shashishankar1687@gmail.com> | 2025-11-20 15:20:34 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-11-20 22:20:34 +0800 |
| commit | 5d0bfd1bf8ac6b1ceb37c7f30058d0f62e636036 (patch) | |
| tree | 2a7dfdd485ae735d6b2bee77632da040f4ef76e9 | |
| parent | 4bb4ad477d80f66a267311afe9b656330caf3893 (diff) | |
[MLIR][SCFToGPU] Guard operands before AffineApplyOp::create to avoid crash (#167959)
This fixes a crash in SCF→GPU when building the per‑dim index for mapped
scf.parallel.
**Change**:
- Map step/lb through cloningMap, then run ensureLaunchIndependent.
- If either is still unavailable at launch scope, emit a match‑failure;
otherwise build the affine.apply.
**Why this is correct:**
- Matches how the pass already handles launch bounds; avoids creating an
op with invalid operands and replaces a segfault with a clear
diagnostic.
**Tests**:
- Added two small regressions that lower to gpu.launch and exercise the
affine.apply path.
Fixes : #167654
Signed-off-by: Shashi Shankar <shashishankar1687@gmail.com>
| -rw-r--r-- | mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp | 18 | ||||
| -rw-r--r-- | mlir/test/Conversion/SCFToGPU/parallel_loop.mlir | 48 |
2 files changed, 64 insertions, 2 deletions
diff --git a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp index 76a822b05a65..309121f52081 100644 --- a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp +++ b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp @@ -453,10 +453,24 @@ static LogicalResult processParallelLoop( 1, 2, rewriter.getAffineDimExpr(0) * rewriter.getAffineSymbolExpr(0) + rewriter.getAffineSymbolExpr(1)); + // Map through cloningMap first so we use values valid at the launch + // scope, then ensure they are launch-independent (or cloned constants). + Value mappedStep = cloningMap.lookupOrDefault(step); + Value mappedLowerBound = cloningMap.lookupOrDefault(lowerBound); + + mappedStep = ensureLaunchIndependent(mappedStep); + mappedLowerBound = ensureLaunchIndependent(mappedLowerBound); + + // If either cannot be made available above the launch, fail gracefully. + if (!mappedStep || !mappedLowerBound) { + return rewriter.notifyMatchFailure( + parallelOp, "lower bound / step must be constant or defined above " + "the gpu.launch"); + } + newIndex = AffineApplyOp::create( rewriter, loc, annotation.getMap().compose(lowerAndStep), - ValueRange{operand, ensureLaunchIndependent(step), - ensureLaunchIndependent(lowerBound)}); + ValueRange{operand, mappedStep, mappedLowerBound}); // If there was also a bound, insert that, too. // TODO: Check that we do not assign bounds twice. if (annotation.getBound()) { diff --git a/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir b/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir index 26f5a3e1f0ac..2f192df1dad2 100644 --- a/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir +++ b/mlir/test/Conversion/SCFToGPU/parallel_loop.mlir @@ -673,3 +673,51 @@ func.func @nested_parallel_with_side_effect() { // CHECK: gpu.launch // CHECK-NOT: scf.parallel + +// ----- + +func.func @scf2gpu_index_creation_2d() { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c32 = arith.constant 32 : index + + // Single 2-D scf.parallel mapped to block_x and thread_x. + // Use both IVs so the conversion must compute indices. + scf.parallel (%bx, %tx) = (%c0, %c0) to (%c32, %c32) step (%c1, %c1) { + %u = arith.addi %bx, %c0 : index + %v = arith.addi %tx, %c0 : index + } { + mapping = [ + #gpu.loop_dim_map<processor = block_x, map = (d0) -> (d0), bound = (d0) -> (d0)>, + #gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0) -> (d0)> + ] + } + return +} + +// CHECK-LABEL: func @scf2gpu_index_creation_2d +// CHECK: gpu.launch +// CHECK: %[[IDX:.*]] = affine.apply +// CHECK: arith.addi %[[IDX]], + +// ----- + +func.func @scf2gpu_index_creation_1d() { + %c0 = arith.constant 0 : index + %c1 = arith.constant 1 : index + %c64 = arith.constant 64 : index + + scf.parallel (%t) = (%c0) to (%c64) step (%c1) { + %w = arith.addi %t, %c0 : index + } { + mapping = [ + #gpu.loop_dim_map<processor = thread_x, map = (d0) -> (d0), bound = (d0) -> (d0)> + ] + } + return +} + +// CHECK-LABEL: func @scf2gpu_index_creation_1d +// CHECK: gpu.launch +// CHECK: %[[IDX:.*]] = affine.apply +// CHECK: arith.addi %[[IDX]], |
