diff options
| author | Matthias Springer <mspringer@nvidia.com> | 2025-04-02 17:58:34 +0200 |
|---|---|---|
| committer | Matthias Springer <mspringer@nvidia.com> | 2025-04-02 17:58:34 +0200 |
| commit | bb87c21647ac6117c9f28a6627c897c8a37f3e28 (patch) | |
| tree | 87d957cfc7a76486365f96299084503789f2dd58 | |
| parent | f99072bd8c6b479badfda0a8affd4964b26af4f7 (diff) | |
| -rw-r--r-- | mlir/lib/Interfaces/SubsetOpInterface.cpp | 59 | ||||
| -rw-r--r-- | mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir | 289 |
2 files changed, 81 insertions, 267 deletions
diff --git a/mlir/lib/Interfaces/SubsetOpInterface.cpp b/mlir/lib/Interfaces/SubsetOpInterface.cpp index d0bdadf500f6..d11b367564ef 100644 --- a/mlir/lib/Interfaces/SubsetOpInterface.cpp +++ b/mlir/lib/Interfaces/SubsetOpInterface.cpp @@ -9,6 +9,9 @@ #include "mlir/Interfaces/SubsetOpInterface.h" #include "mlir/Interfaces/DestinationStyleOpInterface.h" #include "mlir/Interfaces/ValueBoundsOpInterface.h" +#include "mlir/IR/Matchers.h" + +#include "llvm/ADT/APSInt.h" #include "mlir/Interfaces/SubsetOpInterface.cpp.inc" @@ -30,14 +33,68 @@ OpResult detail::defaultGetUpdatedDestination(Operation *op) { return dstOp.getTiedOpResult(&insertionOp.getDestinationOperand()); } +// === Copied from DialectUtils === +/// If ofr is a constant integer or an IntegerAttr, return the integer. +static std::optional<int64_t> getConstantIntValue(OpFoldResult ofr) { + // Case 1: Check for Constant integer. + if (auto val = llvm::dyn_cast_if_present<Value>(ofr)) { + APSInt intVal; + if (matchPattern(val, m_ConstantInt(&intVal))) + return intVal.getSExtValue(); + return std::nullopt; + } + // Case 2: Check for IntegerAttr. + Attribute attr = llvm::dyn_cast_if_present<Attribute>(ofr); + if (auto intAttr = dyn_cast_or_null<IntegerAttr>(attr)) + return intAttr.getValue().getSExtValue(); + return std::nullopt; +} + +static bool isConstantIntValue(OpFoldResult ofr, int64_t value) { + auto val = getConstantIntValue(ofr); + return val && *val == value; +} + +static bool areAllConstantIntValue(ArrayRef<OpFoldResult> ofrs, int64_t value) { + return llvm::all_of( + ofrs, [&](OpFoldResult ofr) { return isConstantIntValue(ofr, value); }); +} +// === End Copied from DialectUtils === + bool detail::defaultIsEquivalentSubset( Operation *op, Value candidate, function_ref<bool(Value, Value)> equivalenceFn) { assert(isa<SubsetInsertionOpInterface>(op) && "expected SubsetInsertionOpInterface"); + auto subsetOp = cast<SubsetOpInterface>(op); + + // Check if the insertion subset matches the candidate directly. + FailureOr<HyperrectangularSlice> slice = subsetOp.getAccessedHyperrectangularSlice(); + if (succeeded(slice)) { + bool allStridesOne = + areAllConstantIntValue(slice->getMixedStrides(), 1); + bool allOffsetsZero = + areAllConstantIntValue(slice->getMixedOffsets(), 0); + if (equivalenceFn(subsetOp.getTensorContainer(), candidate) && allOffsetsZero && allStridesOne) { + bool isEquivalentSlice = true; + auto candidateTensorType = dyn_cast<RankedTensorType>(candidate.getType()); + assert(slice->getMixedSizes().size() == candidateTensorType.getRank() && "rank mismatch"); + for (int64_t i = 0, e = candidateTensorType.getRank(); i < e; ++i) { + ValueBoundsConstraintSet::Variable var1(candidate, i); + ValueBoundsConstraintSet::Variable var2(slice->getMixedSizes()[i]); + if (!ValueBoundsConstraintSet::compare(var1, ValueBoundsConstraintSet::ComparisonOperator::EQ, var2)) { + isEquivalentSlice = false; + break; + } + } + if (isEquivalentSlice) + return true; + } + } + if (!candidate.getDefiningOp<SubsetExtractionOpInterface>()) return false; - return cast<SubsetOpInterface>(op).operatesOnEquivalentSubset( + return subsetOp.operatesOnEquivalentSubset( candidate.getDefiningOp<SubsetOpInterface>(), equivalenceFn); } diff --git a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir index e65c5b92949f..4585f1112eef 100644 --- a/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir +++ b/mlir/test/Dialect/Bufferization/Transforms/one-shot-bufferize.mlir @@ -1,271 +1,28 @@ // RUN: mlir-opt %s -one-shot-bufferize="allow-unknown-ops" -verify-diagnostics -split-input-file | FileCheck %s -// Run fuzzer with different seeds. -// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=23" -verify-diagnostics -split-input-file -o /dev/null -// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=59" -verify-diagnostics -split-input-file -o /dev/null -// RUN: mlir-opt %s -one-shot-bufferize="test-analysis-only analysis-heuristic=fuzzer analysis-fuzzer-seed=91" -verify-diagnostics -split-input-file -o /dev/null - -// Run with top-down analysis. -// RUN: mlir-opt %s -one-shot-bufferize="allow-unknown-ops analysis-heuristic=top-down" -verify-diagnostics -split-input-file | FileCheck %s --check-prefix=CHECK-TOP-DOWN-ANALYSIS - -// Test without analysis: Insert a copy on every buffer write. -// RUN: mlir-opt %s -allow-unregistered-dialect -one-shot-bufferize="allow-unknown-ops copy-before-write" -split-input-file | FileCheck %s --check-prefix=CHECK-COPY-BEFORE-WRITE - -// CHECK-LABEL: func @no_conflict -// CHECK: memref.alloc -// CHECK: memref.store -// CHECK-NEXT: memref.store -// CHECK-NEXT: memref.store -// CHECK-NEXT: memref.store -// CHECK-COPY-BEFORE-WRITE-LABEL: func @no_conflict -// CHECK-COPY-BEFORE-WRITE: memref.alloc -// CHECK-COPY-BEFORE-WRITE: memref.store -// CHECK-COPY-BEFORE-WRITE: memref.store -// CHECK-COPY-BEFORE-WRITE: memref.store -// CHECK-COPY-BEFORE-WRITE: memref.alloc -// CHECK-COPY-BEFORE-WRITE: memref.copy -// CHECK-COPY-BEFORE-WRITE: memref.store -func.func @no_conflict(%fill: f32, %f: f32, %idx: index) -> tensor<3xf32> { - %t = tensor.from_elements %fill, %fill, %fill : tensor<3xf32> - %i = tensor.insert %f into %t[%idx] : tensor<3xf32> - return %i : tensor<3xf32> -} - -// ----- - -// CHECK-LABEL: func @use_tensor_func_arg( -// CHECK-SAME: %[[A:.*]]: tensor<?xf32> -func.func @use_tensor_func_arg(%A : tensor<?xf32>) -> (vector<4xf32>) { +func.func @test(%14: index, %0 : memref<8x16xf16>, %1 : memref<8xi32>, %2 : memref<?x16xf16>) { + %16 = bufferization.to_tensor %0 restrict : memref<8x16xf16> to tensor<8x16xf16> + %17 = bufferization.to_tensor %1 restrict : memref<8xi32> to tensor<8xi32> + %18 = bufferization.to_tensor %2 restrict : memref<?x16xf16> to tensor<?x16xf16> + %cst = arith.constant 123.4 : f32 %c0 = arith.constant 0 : index - %f0 = arith.constant 0.0 : f32 - - // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] - // CHECK: %[[res:.*]] = vector.transfer_read %[[A_memref]] - %0 = vector.transfer_read %A[%c0], %f0 : tensor<?xf32>, vector<4xf32> - - // CHECK: return %[[res]] - return %0 : vector<4xf32> -} - -// ----- - -// CHECK-LABEL: func @return_tensor( -// CHECK-SAME: %[[A:.*]]: tensor<?xf32> -func.func @return_tensor(%A : tensor<?xf32>, %v : vector<4xf32>) -> (tensor<?xf32>) { - %c0 = arith.constant 0 : index - - // CHECK: %[[A_memref:.*]] = bufferization.to_memref %[[A]] - // CHECK: %[[dim:.*]] = memref.dim %[[A_memref]] - // CHECK: %[[alloc:.*]] = memref.alloc(%[[dim]]) - // CHECK: memref.copy %[[A_memref]], %[[alloc]] - // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] - // CHECK: %[[res_tensor:.*]] = bufferization.to_tensor %[[alloc]] - %0 = vector.transfer_write %v, %A[%c0] : vector<4xf32>, tensor<?xf32> - - // CHECK: return %[[res_tensor]] - return %0 : tensor<?xf32> -} - -// ----- - -// CHECK-LABEL: func @func_without_tensor_args -func.func @func_without_tensor_args(%v : vector<10xf32>) -> () { - // CHECK: %[[alloc:.*]] = memref.alloc() - %0 = bufferization.alloc_tensor() : tensor<10xf32> - - %c0 = arith.constant 0 : index - // CHECK: vector.transfer_write %{{.*}}, %[[alloc]] - %1 = vector.transfer_write %v, %0[%c0] : vector<10xf32>, tensor<10xf32> - - %cst = arith.constant 0.0 : f32 - // CHECK: vector.transfer_read %[[alloc]] - %r = vector.transfer_read %1[%c0], %cst : tensor<10xf32>, vector<11xf32> - - vector.print %r : vector<11xf32> - return -} - -// ----- - -// CHECK-LABEL: func private @private_func -func.func private @private_func(tensor<?xf32>) -> () - -// CHECK-LABEL: func @empty_func() -func.func @empty_func() -> () { + %19 = scf.forall (%arg0) in (2) shared_outs(%arg1 = %18) -> (tensor<?x16xf16>) { + %20 = affine.apply affine_map<(d0) -> (d0 * 8)>(%arg0) + %extracted_slice = tensor.extract_slice %arg1[0, %20] [%14, 8] [1, 1] : tensor<?x16xf16> to tensor<?x8xf16> + %21 = scf.forall (%arg2, %arg3) in (8, 1) shared_outs(%arg4 = %extracted_slice) -> (tensor<?x8xf16>) { + %dim0 = tensor.dim %arg4, %c0 : tensor<?x8xf16> + %extracted_slice_0 = tensor.extract_slice %16[%arg2, %20] [1, 8] [1, 1] : tensor<8x16xf16> to tensor<1x8xf16> + %extracted_slice_1 = tensor.extract_slice %17[%arg2] [1] [1] : tensor<8xi32> to tensor<1xi32> + %22 = linalg.fill ins(%cst : f32) outs(%arg4 : tensor<?x8xf16>) -> tensor<?x8xf16> + scf.forall.in_parallel { + tensor.parallel_insert_slice %22 into %arg4[0, 0] [%dim0, 8] [1, 1] : tensor<?x8xf16> into tensor<?x8xf16> + } + } + scf.forall.in_parallel { + tensor.parallel_insert_slice %21 into %arg1[0, %20] [%14, 8] [1, 1] : tensor<?x8xf16> into tensor<?x16xf16> + } + } + bufferization.materialize_in_destination + %19 in restrict writable %2 : (tensor<?x16xf16>, memref<?x16xf16>) -> () return } - -// ----- - -// CHECK-LABEL: func @read_after_write_conflict( -func.func @read_after_write_conflict(%cst : f32, %idx : index, %idx2 : index) - -> (f32, f32) { - // CHECK-DAG: %[[alloc:.*]] = memref.alloc - // CHECK-DAG: %[[dummy:.*]] = "test.dummy_op" - // CHECK-DAG: %[[dummy_m:.*]] = bufferization.to_memref %[[dummy]] - %t = "test.dummy_op"() : () -> (tensor<10xf32>) - - // CHECK: memref.copy %[[dummy_m]], %[[alloc]] - // CHECK: memref.store %{{.*}}, %[[alloc]] - %write = tensor.insert %cst into %t[%idx2] : tensor<10xf32> - - // CHECK: %[[read:.*]] = "test.some_use"(%[[dummy]]) - %read = "test.some_use"(%t) : (tensor<10xf32>) -> (f32) - // CHECK: %[[read2:.*]] = memref.load %[[alloc]] - %read2 = tensor.extract %write[%idx] : tensor<10xf32> - - // CHECK: return %[[read]], %[[read2]] - return %read, %read2 : f32, f32 -} - -// ----- - -// CHECK-LABEL: func @copy_deallocated( -func.func @copy_deallocated() -> tensor<10xf32> { - // CHECK: %[[alloc:.*]] = memref.alloc() - %0 = bufferization.alloc_tensor() : tensor<10xf32> - // CHECK: %[[alloc_tensor:.*]] = bufferization.to_tensor %[[alloc]] - // CHECK: return %[[alloc_tensor]] - return %0 : tensor<10xf32> -} - -// ----- - -// CHECK-LABEL: func @select_different_tensors( -// CHECK-SAME: %[[t:.*]]: tensor<?xf32> -func.func @select_different_tensors(%t: tensor<?xf32>, %sz: index, %pos: index, %c: i1) -> f32 { - // CHECK-DAG: %[[m:.*]] = bufferization.to_memref %[[t]] : tensor<?xf32> to memref<?xf32, strided{{.*}}> - // CHECK-DAG: %[[alloc:.*]] = memref.alloc(%{{.*}}) {{.*}} : memref<?xf32> - %0 = bufferization.alloc_tensor(%sz) : tensor<?xf32> - - // A cast must be inserted because %t and %0 have different memref types. - // CHECK: %[[casted:.*]] = memref.cast %[[alloc]] : memref<?xf32> to memref<?xf32, strided{{.*}}> - // CHECK: arith.select %{{.*}}, %[[casted]], %[[m]] - %1 = arith.select %c, %0, %t : tensor<?xf32> - %2 = tensor.extract %1[%pos] : tensor<?xf32> - return %2 : f32 -} - -// ----- - -// CHECK-LABEL: func @alloc_tensor_with_copy( -// CHECK-SAME: %[[t:.*]]: tensor<5xf32>) -// TODO: Add a test case with dynamic dim size. This is not possible at the -// moment because this would create a tensor op during bufferization. That is -// currently forbidden. -func.func @alloc_tensor_with_copy(%t: tensor<5xf32>) -> tensor<5xf32> { - // CHECK: %[[m:.*]] = bufferization.to_memref %[[t]] - // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32> - // CHECK: memref.copy %[[m]], %[[alloc]] - %0 = bufferization.alloc_tensor() copy(%t) : tensor<5xf32> - // CHECK: %[[r:.*]] = bufferization.to_tensor %[[alloc]] - // CHECK: return %[[r]] - return %0 : tensor<5xf32> -} - -// ----- - -// CHECK-LABEL: func @alloc_tensor_with_memory_space() -func.func @alloc_tensor_with_memory_space() -> tensor<5xf32> { - // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32, 1> - %0 = bufferization.alloc_tensor() {memory_space = 1 : i64} : tensor<5xf32> - // CHECK: %[[r:.*]] = bufferization.to_tensor %[[alloc]] - // CHECK: return %[[r]] - return %0 : tensor<5xf32> -} - -// ----- - -// CHECK-LABEL: func @read_of_alias -// CHECK-TOP-DOWN-ANALYSIS-LABEL: func @read_of_alias -func.func @read_of_alias(%t: tensor<100xf32>, %pos1: index, %pos2: index, - %pos3: index, %pos4: index, %sz: index, %f: f32) - -> (f32, f32) -{ - // CHECK: %[[alloc:.*]] = memref.alloc - // CHECK: memref.copy - // CHECK: memref.store %{{.*}}, %[[alloc]] - // CHECK-TOP-DOWN-ANALYSIS: %[[alloc:.*]] = memref.alloc - // CHECK-TOP-DOWN-ANALYSIS: memref.copy - // CHECK-TOP-DOWN-ANALYSIS: memref.store %{{.*}}, %[[alloc]] - %0 = tensor.insert %f into %t[%pos1] : tensor<100xf32> - %1 = tensor.extract_slice %t[%pos2][%sz][1] : tensor<100xf32> to tensor<?xf32> - %2 = tensor.extract %1[%pos3] : tensor<?xf32> - %3 = tensor.extract %0[%pos3] : tensor<100xf32> - return %2, %3 : f32, f32 -} - -// ----- - -// CHECK-LABEL: func @from_unranked_to_unranked( -// CHECK-SAME: %[[arg0:.*]]: tensor<*xi32> -func.func @from_unranked_to_unranked(%arg0: tensor<*xi32>) -> tensor<*xi32> { - // CHECK: %[[m:.*]] = bufferization.to_memref %[[arg0]] : tensor<*xi32> to memref<*xi32> - // CHECK: %[[t:.*]] = bufferization.to_tensor %[[m]] - // CHECK: return %[[t]] : tensor<*xi32> - %0 = tensor.cast %arg0 : tensor<*xi32> to tensor<*xi32> - return %0 : tensor<*xi32> -} - -// ----- - -// CHECK-LABEL: func @tensor_copy( -// CHECK-SAME: %[[arg0:.*]]: tensor<5xf32>) -func.func @tensor_copy(%arg0: tensor<5xf32>) -> tensor<5xf32> { - // CHECK: %[[m:.*]] = bufferization.to_memref %[[arg0]] - // CHECK: %[[alloc:.*]] = memref.alloc() {{.*}} : memref<5xf32> - // CHECK: memref.copy %[[m]], %[[alloc]] - // CHECK: %[[r:.*]] = bufferization.to_tensor %[[alloc]] - // CHECK: return %[[r]] - %dest = bufferization.alloc_tensor() : tensor<5xf32> - %0 = bufferization.materialize_in_destination %arg0 in %dest - : (tensor<5xf32>, tensor<5xf32>) -> tensor<5xf32> - return %0 : tensor<5xf32> -} - -// ----- - -// CHECK-LABEL: func @materialize_in_destination_buffer( -// CHECK-SAME: %[[t:.*]]: tensor<5xf32>, %[[m:.*]]: memref<5xf32>) -// CHECK: %[[b:.*]] = bufferization.to_memref %[[t]] : tensor<5xf32> to memref<5xf32, strided<[?], offset: ?>> -// CHECK: memref.copy %[[b]], %[[m]] -func.func @materialize_in_destination_buffer(%t: tensor<5xf32>, %m: memref<5xf32>) { - bufferization.materialize_in_destination %t in restrict writable %m - : (tensor<5xf32>, memref<5xf32>) -> () - return -} - -// ----- - -func.func @materialize_in_func_bbarg(%t: tensor<?xf32>, %dest: tensor<?xf32>) - -> tensor<?xf32> { - // This op is not bufferizable because function block arguments are - // read-only in regular One-Shot Bufferize. (Run One-Shot Module - // Bufferization instead.) - // expected-error @below{{not bufferizable under the given constraints: would write to read-only buffer}} - %0 = bufferization.materialize_in_destination %t in %dest - : (tensor<?xf32>, tensor<?xf32>) -> tensor<?xf32> - return %0 : tensor<?xf32> -} - -// ----- - -func.func @materialize_in_dest_raw(%f: f32, %f2: f32, %idx: index) -> (tensor<5xf32>, f32) { - %dest = bufferization.alloc_tensor() : tensor<5xf32> - // Note: The location of the RaW conflict may not be accurate (such as in this - // example). This is because the analysis operates on "alias sets" and not - // single SSA values. The location may point to any SSA value in the alias set - // that participates in the conflict. - // expected-error @below{{not bufferizable under the given constraints: cannot avoid RaW conflict}} - %dest_filled = linalg.fill ins(%f : f32) outs(%dest : tensor<5xf32>) -> tensor<5xf32> - %src = bufferization.alloc_tensor() : tensor<5xf32> - %src_filled = linalg.fill ins(%f2 : f32) outs(%src : tensor<5xf32>) -> tensor<5xf32> - - %0 = bufferization.materialize_in_destination %src_filled in %dest_filled - : (tensor<5xf32>, tensor<5xf32>) -> tensor<5xf32> - // Read from %dest_filled, which makes it impossible to bufferize the - // materialize_in_destination op in-place. - %r = tensor.extract %dest_filled[%idx] : tensor<5xf32> - - return %0, %r : tensor<5xf32>, f32 -}
\ No newline at end of file |
