diff options
author | Alexander Belyaev <pifon@google.com> | 2022-08-05 14:53:35 +0200 |
---|---|---|
committer | Alexander Belyaev <pifon@google.com> | 2022-08-05 14:53:35 +0200 |
commit | 6b03bae346821e906cdca242d88a54b50bac87cc (patch) | |
tree | 1ad13d63424c1d774a9a6ed93fee47b5b81c682d | |
parent | 1bd31a689844a43c9cde41a067119bef11159539 (diff) |
Revert "[mlir] Extract offsets-sizes-strides computation from `makeTiledShape(s)`."linaro-local/ci/tcwg_kernel/llvm-master-aarch64-norov-allnoconfig
This reverts commit 56d94b3b902e21ff79b1ce9a6fb606a3f7c1c4db.
-rw-r--r-- | mlir/include/mlir/Dialect/Linalg/Utils/Utils.h | 38 | ||||
-rw-r--r-- | mlir/lib/Dialect/Linalg/Utils/Utils.cpp | 129 | ||||
-rw-r--r-- | mlir/test/Dialect/Linalg/tile-and-distribute.mlir | 22 | ||||
-rw-r--r-- | mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir | 2 | ||||
-rw-r--r-- | mlir/test/Dialect/Linalg/tile-conv.mlir | 2 | ||||
-rw-r--r-- | mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir | 16 | ||||
-rw-r--r-- | mlir/test/Dialect/Linalg/tile.mlir | 44 | ||||
-rw-r--r-- | mlir/test/Dialect/Linalg/transform-op-split.mlir | 2 |
8 files changed, 87 insertions, 168 deletions
diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h index 4339f40d049f..4b81ffecb43e 100644 --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -214,44 +214,6 @@ Value materializeOpFoldResult(ImplicitLocOpBuilder &builder, Value materializeOpFoldResult(OpBuilder &b, Location loc, OpFoldResult opFoldResult); -/// A struct containg offsets-sizes-strides arguments of the tiled shape. -struct SliceParameters { - SmallVector<OpFoldResult, 3> offsets; - SmallVector<OpFoldResult, 3> sizes; - SmallVector<OpFoldResult, 3> strides; -}; - -/// Computes SliceParameters for a single `valueToTile`. `omitPartialTileCheck` -/// controls whether to omit the partial/boundary tile condition check in cases -/// where we statically know that it is unnecessary. -SliceParameters -computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, - ArrayRef<OpFoldResult> tileSizes, AffineMap map, - ArrayRef<OpFoldResult> lbs, ArrayRef<OpFoldResult> ubs, - ArrayRef<OpFoldResult> subShapeSizes, - bool omitPartialTileCheck); - -/// Computes SliceParamaters for all `valuesToTile` of the given -/// `linalgOp`, assuming `linalgOp` is being fused into a loop -/// nest for tiling with the given induction variables `ivs` and tile sizes -/// `tileSizes`. `sizeBounds` are the iteration space bounds for *all* the -/// implicit loops in `linalgOp`. `omitPartialTileCheck` controls whether to -/// omit the partial/boundary tile condition check in cases where we statically -/// know that it is unnecessary. -/// -/// Note that a constant zero in `tileSizes` means no tiling at that implicit -/// loop. The number of non-zero values in `tileSizes` should be equal to the -/// number of values in `ivs`. -/// -/// Some of the `valuesToTile` won't be affected by tiling. For these values, -/// llvm::None will be returned. -SmallVector<Optional<SliceParameters>> -computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, - ValueRange valuesToTile, ArrayRef<OpFoldResult> ivs, - ArrayRef<OpFoldResult> tileSizes, - ArrayRef<OpFoldResult> sizeBounds, - bool omitPartialTileCheck); - /// Creates an extract_slice/subview op for a single `valueToTile` with /// `builder`. This new operation extracts a tile of `valueToTile`, starting /// at offsets `lbs` and with sizes `subShapeSizes`. `omitPartialTileCheck` diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp index 0259f9a542f6..4f14164bf26c 100644 --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -802,61 +802,28 @@ void GenerateLoopNest<scf::ParallelOp>::doit( assert(ivs.size() == iteratorTypes.size() && "did not generate enough loops"); } -static Value materializeTiledShape(OpBuilder &builder, Location loc, - Value valueToTile, - const SliceParameters &sliceParams) { - auto shapedType = valueToTile.getType().dyn_cast<ShapedType>(); - auto *sliceOp = TypeSwitch<ShapedType, Operation *>(shapedType) - .Case([&](MemRefType) { - return builder.create<memref::SubViewOp>( - loc, valueToTile, sliceParams.offsets, - sliceParams.sizes, sliceParams.strides); - }) - .Case([&](RankedTensorType) { - return makeComposedExtractSliceOp( - builder, loc, valueToTile, sliceParams.offsets, - sliceParams.sizes, sliceParams.strides); - }) - .Default([](ShapedType) -> Operation * { - llvm_unreachable("Unexpected shaped type"); - }); - return sliceOp->getResult(0); -} - Value makeTiledShape(OpBuilder &builder, Location loc, Value valueToTile, ArrayRef<OpFoldResult> tileSizes, AffineMap map, ArrayRef<OpFoldResult> lbs, ArrayRef<OpFoldResult> ubs, ArrayRef<OpFoldResult> subShapeSizes, bool omitPartialTileCheck) { - SliceParameters sliceParams = - computeSliceParameters(builder, loc, valueToTile, tileSizes, map, lbs, - ubs, subShapeSizes, omitPartialTileCheck); - return materializeTiledShape(builder, loc, valueToTile, sliceParams); -} - -SliceParameters -computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, - ArrayRef<OpFoldResult> tileSizes, AffineMap map, - ArrayRef<OpFoldResult> lbs, ArrayRef<OpFoldResult> ubs, - ArrayRef<OpFoldResult> subShapeSizes, - bool omitPartialTileCheck) { auto shapedType = valueToTile.getType().dyn_cast<ShapedType>(); assert(shapedType && "only shaped types can be tiled"); ArrayRef<int64_t> shape = shapedType.getShape(); int64_t rank = shapedType.getRank(); // Construct a new subview / extract_slice for the tile. - SliceParameters sliceParams; - sliceParams.offsets.reserve(rank); - sliceParams.sizes.reserve(rank); - sliceParams.strides.reserve(rank); + SmallVector<OpFoldResult, 4> offsets, sizes, strides; + offsets.reserve(rank); + sizes.reserve(rank); + strides.reserve(rank); for (unsigned r = 0; r < rank; ++r) { - LLVM_DEBUG(llvm::dbgs() << "computeSliceParameters: for dim#" << r); + LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: for dim#" << r); if (!isTiled(map.getSubMap({r}), tileSizes)) { - sliceParams.offsets.push_back(builder.getIndexAttr(0)); + offsets.push_back(builder.getIndexAttr(0)); OpFoldResult dim = createFoldedDimOp(builder, loc, valueToTile, r); - sliceParams.sizes.push_back(dim); - sliceParams.strides.push_back(builder.getIndexAttr(1)); + sizes.push_back(dim); + strides.push_back(builder.getIndexAttr(1)); LLVM_DEBUG(llvm::dbgs() << ": not tiled: use size: " << dim << "\n"); continue; } @@ -865,27 +832,26 @@ computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, // Tiling creates a new slice at the proper index, the slice step is 1 // (i.e. the op does not subsample, stepping occurs in the loop). auto m = map.getSubMap({r}); - LLVM_DEBUG(llvm::dbgs() << "computeSliceParameters: submap: " << m << "\n"); + LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: submap: " << m << "\n"); IRRewriter rewriter(builder); OpFoldResult offset = makeComposedFoldedAffineApply(rewriter, loc, m, lbs); - sliceParams.offsets.push_back(offset); + offsets.push_back(offset); OpFoldResult closedIntSize = makeComposedFoldedAffineApply(rewriter, loc, m, subShapeSizes); // Resulting size needs to be made half open interval again. AffineExpr s0 = getAffineSymbolExpr(0, builder.getContext()); OpFoldResult size = makeComposedFoldedAffineApply(rewriter, loc, s0 + 1, closedIntSize); + LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: raw size: " << size << "\n"); LLVM_DEBUG(llvm::dbgs() - << "computeSliceParameters: raw size: " << size << "\n"); - LLVM_DEBUG(llvm::dbgs() - << "computeSliceParameters: new offset: " << offset << "\n"); - sliceParams.strides.push_back(builder.getIndexAttr(1)); + << "makeTiledShape: new offset: " << offset << "\n"); + strides.push_back(builder.getIndexAttr(1)); if (omitPartialTileCheck) { // We statically know that the partial/boundary tile condition is // unnecessary. LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n"); - sliceParams.sizes.push_back(size); + sizes.push_back(size); continue; } @@ -937,9 +903,22 @@ computeSliceParameters(OpBuilder &builder, Location loc, Value valueToTile, makeComposedFoldedAffineMin(rewriter, loc, minMap, {size, d, offset}); } LLVM_DEBUG(llvm::dbgs() << "makeTiledShape: new size: " << size << "\n"); - sliceParams.sizes.push_back(size); + sizes.push_back(size); } - return sliceParams; + + auto *sliceOp = TypeSwitch<ShapedType, Operation *>(shapedType) + .Case([&](MemRefType) { + return builder.create<memref::SubViewOp>( + loc, valueToTile, offsets, sizes, strides); + }) + .Case([&](RankedTensorType) { + return makeComposedExtractSliceOp( + builder, loc, valueToTile, offsets, sizes, strides); + }) + .Default([](ShapedType) -> Operation * { + llvm_unreachable("Unexpected shaped type"); + }); + return sliceOp->getResult(0); } SmallVector<OpFoldResult> computeTileOffsets(OpBuilder &b, Location loc, @@ -1024,12 +1003,12 @@ Value materializeOpFoldResult(OpBuilder &builder, Location loc, return materializeOpFoldResult(b, opFoldResult); } -SmallVector<Optional<SliceParameters>> -computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, - ValueRange valuesToTile, ArrayRef<OpFoldResult> ivs, - ArrayRef<OpFoldResult> tileSizes, - ArrayRef<OpFoldResult> sizeBounds, - bool omitPartialTileCheck) { +SmallVector<Value> makeTiledShapes(OpBuilder &b, Location loc, + LinalgOp linalgOp, ValueRange valuesToTile, + ArrayRef<OpFoldResult> ivs, + ArrayRef<OpFoldResult> tileSizes, + ArrayRef<OpFoldResult> sizeBounds, + bool omitPartialTileCheck) { assert(ivs.size() == static_cast<size_t>(llvm::count_if( llvm::make_range(tileSizes.begin(), tileSizes.end()), [](OpFoldResult v) { return !isZero(v); })) && @@ -1037,16 +1016,15 @@ computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, // Construct (potentially temporary) mins and maxes on which to apply maps // that define tile subshapes. - SmallVector<OpFoldResult> lbs = - computeTileOffsets(builder, loc, ivs, tileSizes); + SmallVector<OpFoldResult> lbs = computeTileOffsets(b, loc, ivs, tileSizes); SmallVector<OpFoldResult> subShapeSizes = - computeTileSizes(builder, loc, tileSizes, sizeBounds); + computeTileSizes(b, loc, tileSizes, sizeBounds); assert(static_cast<int64_t>(valuesToTile.size()) == linalgOp.getNumInputsAndOutputs() && "expected one value to tile for every operand"); - SmallVector<Optional<SliceParameters>> allSliceParams; - allSliceParams.reserve(valuesToTile.size()); + SmallVector<Value> tiledShapes; + tiledShapes.reserve(valuesToTile.size()); for (OpOperand *opOperand : linalgOp.getInputAndOutputOperands()) { Value shapedOp = valuesToTile[opOperand->getOperandNumber()]; LLVM_DEBUG(llvm::dbgs() << "makeTiledShapes: for operand " << shapedOp); @@ -1057,39 +1035,18 @@ computeAllSliceParameters(OpBuilder &builder, Location loc, LinalgOp linalgOp, // extract/insert slice pairs make the accessed iteration argument // subdomains explicit. if (!isTiled(map, tileSizes) && !linalgOp.isOutputTensor(opOperand)) { - allSliceParams.push_back(llvm::None); + tiledShapes.push_back(shapedOp); LLVM_DEBUG(llvm::dbgs() << ": not tiled: use shape: " << opOperand->get().getType() << "\n"); continue; } LLVM_DEBUG(llvm::dbgs() << ": tiled: figure out subshape...\n"); - allSliceParams.push_back(computeSliceParameters( - builder, loc, shapedOp, tileSizes, map, lbs, sizeBounds, subShapeSizes, - omitPartialTileCheck)); + tiledShapes.push_back(makeTiledShape(b, loc, shapedOp, tileSizes, map, lbs, + sizeBounds, subShapeSizes, + omitPartialTileCheck)); } - return allSliceParams; -} - -SmallVector<Value> makeTiledShapes(OpBuilder &builder, Location loc, - LinalgOp linalgOp, ValueRange valuesToTile, - ArrayRef<OpFoldResult> ivs, - ArrayRef<OpFoldResult> tileSizes, - ArrayRef<OpFoldResult> sizeBounds, - bool omitPartialTileCheck) { - SmallVector<Optional<SliceParameters>> allSliceParameter = - computeAllSliceParameters(builder, loc, linalgOp, valuesToTile, ivs, - tileSizes, sizeBounds, omitPartialTileCheck); - SmallVector<Value> tiledShapes; - for (auto item : llvm::zip(valuesToTile, allSliceParameter)) { - Value valueToTile = std::get<0>(item); - Optional<SliceParameters> sliceParams = std::get<1>(item); - tiledShapes.push_back( - sliceParams.hasValue() - ? materializeTiledShape(builder, loc, valueToTile, *sliceParams) - : valueToTile); - } return tiledShapes; } diff --git a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir index 6178aa393ee0..2f39e998a682 100644 --- a/mlir/test/Dialect/Linalg/tile-and-distribute.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-distribute.mlir @@ -16,12 +16,12 @@ func.func @gemm1(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32 // CHECK-DAG: %[[BIDX:.*]] = gpu.block_id x // CHECK: scf.for %[[ARG3:.*]] = // CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] // CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]] +// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] // CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]] -// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]] +// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] +// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] +// CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX]]] // CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]] // ----- @@ -48,11 +48,11 @@ func.func @gemm2(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32 // CHECK: scf.if %[[INBOUNDS]] // CHECK: scf.for %[[ARG3:.*]] = // CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] +// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]] // CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] +// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]] // CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] // CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]] -// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]] // CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]] // CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]] @@ -106,11 +106,11 @@ func.func @gemm4(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32 // CHECK: scf.if %[[INBOUNDS]] // CHECK: scf.for %[[ARG3:.*]] = // CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] +// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]] // CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] +// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]] // CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] // CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG3]]] -// CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG3]], %[[OFFSETX]]] // CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[OFFSETX_2]]] // CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]] @@ -139,9 +139,9 @@ func.func @gemm5(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32 // CHECK: scf.parallel (%[[ARG3:.*]]) = (%[[LBX]]) to (%{{.*}}) step (%[[STEPX]]) // CHECK: scf.for %[[ARG4:.*]] = // CHECK: %[[OFFSETY:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] -// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] // CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[OFFSETY]], %[[ARG4]]] // CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG4]], %[[ARG3]]] +// CHECK: %[[OFFSETY_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDY]]] // CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[OFFSETY_2]], %[[ARG3]]] // CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]] @@ -166,10 +166,10 @@ func.func @gemm6(%a : memref<?x?xf32>, %b : memref<?x?xf32>, %c : memref<?x?xf32 // CHECK: %[[STEPY:.*]] = affine.apply #[[MAP0]]()[%[[NBLOCKSY]]] // CHECK: scf.parallel (%[[ARG3:.*]]) = (%[[LBY]]) to (%{{.*}}) step (%[[STEPY]]) // CHECK: scf.for %[[ARG4:.*]] = -// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] -// CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] // CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG4]]] +// CHECK: %[[OFFSETX:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] // CHECK: %[[SV2:.*]] = memref.subview %[[ARG1]][%[[ARG4]], %[[OFFSETX]]] +// CHECK: %[[OFFSETX_2:.*]] = affine.apply #[[MAP0]]()[%[[BIDX]]] // CHECK: %[[SV3:.*]] = memref.subview %[[ARG2]][%[[ARG3]], %[[OFFSETX_2]]] // CHECK: linalg.matmul ins(%[[SV1]], %[[SV2]]{{.*}} outs(%[[SV3]] diff --git a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir index a0559d7690d8..3509b566ef96 100644 --- a/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir +++ b/mlir/test/Dialect/Linalg/tile-and-fuse-tensors.mlir @@ -241,9 +241,9 @@ func.func @conv_tensors_dynamic(%input: tensor<?x?x?x?xf32>, %filter: tensor<?x? // CHECK-NEXT: %[[SIZE_ELEM_OC:.+]] = affine.min #[[BOUND2_MAP]](%[[IV2]])[%[[ELEM_OC]]] // CHECK-NEXT: %[[OFFSET_OW:.+]] = affine.apply #[[X2_MAP]](%[[IV2]]) // CHECK-NEXT: %[[SIZE_INPUT_W:.+]] = affine.min #[[INPUT_BOUND]](%[[IV2]], %[[SIZE_ELEM_OW]])[%[[FILL_W]], %[[FILTER_W]]] -// CHECK-NEXT: %[[SIZE_ELEM_OW_2:.+]] = affine.min #[[BOUND4_MAP_2]](%[[IV2]])[%[[FILL_W]], %[[ELEM_OW]]] // CHECK-NEXT: %[[ST_INPUT:.+]] = tensor.extract_slice %[[INPUT]][%[[IV0]], %[[OFFSET_OH]], %[[OFFSET_OW]], 0] // CHECK-SAME: [%[[SIZE_INPUT_N]], %[[SIZE_INPUT_H]], %[[SIZE_INPUT_W]], %[[INPUT_C]]] +// CHECK-NEXT: %[[SIZE_ELEM_OW_2:.+]] = affine.min #[[BOUND4_MAP_2]](%[[IV2]])[%[[FILL_W]], %[[ELEM_OW]]] // CHECK-NEXT: scf.for %[[IV3:.+]] = %{{.+}} to %[[ELEM_OC]] step %{{.+}} iter_args(%[[ARG:[a-z0-9]+]] // CHECK-NEXT: %[[ST_ELEM:.+]] = tensor.extract_slice %[[ELEM]][%[[IV0]], %[[IV1]], %[[IV2]], %[[IV3]]] // CHECK-SAME: [%[[SIZE_ELEM_N]], %[[SIZE_ELEM_OH]], %[[SIZE_ELEM_OW]], %[[SIZE_ELEM_OC]]] diff --git a/mlir/test/Dialect/Linalg/tile-conv.mlir b/mlir/test/Dialect/Linalg/tile-conv.mlir index 028c93a4b196..77518acf1fc2 100644 --- a/mlir/test/Dialect/Linalg/tile-conv.mlir +++ b/mlir/test/Dialect/Linalg/tile-conv.mlir @@ -26,9 +26,9 @@ func.func @conv(%arg0 : memref<?x?xf32>, %arg1 : memref<?x?xf32>, %arg2 : memref // CHECK: scf.for %[[ARG4:.*]] = %[[C0]] to %[[T3]] step %[[C3]] // CHECK: %[[T4:.*]] = affine.min #[[MAP0]](%[[ARG3]])[%[[T2]], %[[T0]]] // CHECK: %[[T5:.*]] = affine.min #[[MAP1]](%[[ARG4]])[%[[T3]], %[[T1]]] +// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG4]]] [%[[T4]], %[[T5]]] // CHECK: %[[T6:.*]] = affine.min #[[MAP2]](%[[ARG3]])[%[[T2]] // CHECK: %[[T7:.*]] = affine.min #[[MAP3]](%[[ARG4]])[%[[T3]]] -// CHECK: %[[SV1:.*]] = memref.subview %[[ARG0]][%[[ARG3]], %[[ARG4]]] [%[[T4]], %[[T5]]] // CHECK: %[[SV2:.*]] = memref.subview %[[ARG2]][%[[ARG3]], %[[ARG4]]] [%[[T6]], %[[T7]]] // CHECK: linalg.conv_2d // CHECK-SAME: ins(%[[SV1]], %[[ARG1]] diff --git a/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir b/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir index 8a922fed70d5..ab97df361157 100644 --- a/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir +++ b/mlir/test/Dialect/Linalg/tile-to-foreach-thread.mlir @@ -64,12 +64,12 @@ func.func @matmul_static(%A: tensor<100x200xf32>, %B: tensor<200x300xf32>, %C: t // CHECK-NOT: affine.min // CHECK-NOT: affine.max // CHECK: %[[LB0:.+]] = affine.apply #[[$map2]](%[[IV0]]) - // CHECK: %[[LB1:.+]] = affine.apply #[[$map3]](%[[IV1]]) - // CHECK: %[[LB0_1:.+]] = affine.apply #[[$map2]](%[[IV0]]) - // CHECK: %[[LB1_1:.+]] = affine.apply #[[$map3]](%[[IV1]]) // CHECK: %[[tA:.+]] = tensor.extract_slice %[[A]][%[[LB0]], 0] [10, 200] [1, 1] : + // CHECK: %[[LB1:.+]] = affine.apply #[[$map3]](%[[IV1]]) // CHECK: %[[tB:.+]] = tensor.extract_slice %[[B]][0, %[[LB1]]] [200, %[[TS]]] [1, 1] : - // CHECK: %[[tC:.+]] = tensor.extract_slice %[[C]][%[[LB0_1]], %[[LB1_1]]] [10, %[[TS]]] [1, 1] : + // CHECK: %[[LB0:.+]] = affine.apply #[[$map2]](%[[IV0]]) + // CHECK: %[[LB1:.+]] = affine.apply #[[$map3]](%[[IV1]]) + // CHECK: %[[tC:.+]] = tensor.extract_slice %[[C]][%[[LB0]], %[[LB1]]] [10, %[[TS]]] [1, 1] : // CHECK: linalg.matmul // CHECK: scf.foreach_thread.perform_concurrently // CHECK-NEXT: tensor.parallel_insert_slice @@ -155,12 +155,12 @@ func.func @matmul_tile_size_static(%A: tensor<100x200xf32>, %B: tensor<200x300xf // CHECK-NOT: affine.max // CHECK-NOT: affine.min // CHECK: %[[LB0:.+]] = affine.apply #[[$map2]](%[[IV0]]) - // CHECK: %[[LB1:.+]] = affine.apply #[[$map3]](%[[IV1]]) - // CHECK: %[[LB0_1:.+]] = affine.apply #[[$map2]](%[[IV0]]) - // CHECK: %[[LB1_1:.+]] = affine.apply #[[$map3]](%[[IV1]]) // CHECK: %[[tA:.+]] = tensor.extract_slice %[[A]][%[[LB0]], 0] [10, 200] [1, 1] : + // CHECK: %[[LB1:.+]] = affine.apply #[[$map3]](%[[IV1]]) // CHECK: %[[tB:.+]] = tensor.extract_slice %[[B]][0, %[[LB1]]] [200, %[[TS]]] [1, 1] : - // CHECK: %[[tC:.+]] = tensor.extract_slice %[[C]][%[[LB0_1]], %[[LB1_1]]] [10, %[[TS]]] [1, 1] : + // CHECK: %[[LB0:.+]] = affine.apply #[[$map2]](%[[IV0]]) + // CHECK: %[[LB1:.+]] = affine.apply #[[$map3]](%[[IV1]]) + // CHECK: %[[tC:.+]] = tensor.extract_slice %[[C]][%[[LB0]], %[[LB1]]] [10, %[[TS]]] [1, 1] : // CHECK: linalg.matmul // CHECK: scf.foreach_thread.perform_concurrently // CHECK-NEXT: tensor.parallel_insert_slice diff --git a/mlir/test/Dialect/Linalg/tile.mlir b/mlir/test/Dialect/Linalg/tile.mlir index 86936a9f5843..8ec5a929dbba 100644 --- a/mlir/test/Dialect/Linalg/tile.mlir +++ b/mlir/test/Dialect/Linalg/tile.mlir @@ -40,9 +40,9 @@ func.func @matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, // TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} { // TILE-2: %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]] // TILE-2: %[[K:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]> +// TILE-2: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[K]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]> // TILE-2: %[[szK:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]] // TILE-2: %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]> -// TILE-2: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[K]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]> // TILE-2: %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szK]], %[[N]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]> // TILE-2: linalg.matmul ins(%[[sAi]]{{.*}} outs(%[[sCi]] @@ -53,9 +53,9 @@ func.func @matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, // TILE-02: scf.for %[[J:.*]] = %{{.*}} to %[[N]] step %{{.*}} { // TILE-02: %[[K:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]> // TILE-02: %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[N]]] +// TILE-02: %[[sBj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[K]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]> // TILE-02: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]> // TILE-02: %[[szK:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[N]]] -// TILE-02: %[[sBj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[K]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]> // TILE-02: %[[sCj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[M]], %[[szK]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]> // TILE-02: linalg.matmul ins(%{{.*}}, %[[sBj]]{{.*}} outs(%[[sCj]] @@ -66,10 +66,10 @@ func.func @matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, // TILE-002: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} { // TILE-002: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]> // TILE-002: %[[szK:.*]] = affine.min #[[$bound_map]](%[[K]])[%[[ubK]]] -// TILE-002: %[[szK_1:.*]] = affine.min #[[$bound_map]](%[[K]])[%[[ubK]]] -// TILE-002: %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]> // TILE-002: %[[sAj:.*]] = memref.subview %{{.*}}[0, %[[K]]] [%[[M]], %[[szK]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]> -// TILE-002: %[[sBj:.*]] = memref.subview %{{.*}}[%[[K]], 0] [%[[szK_1]], %[[N]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]> +// TILE-002: %[[szK:.*]] = affine.min #[[$bound_map]](%[[K]])[%[[ubK]]] +// TILE-002: %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]> +// TILE-002: %[[sBj:.*]] = memref.subview %{{.*}}[%[[K]], 0] [%[[szK]], %[[N]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]> // TILE-002: linalg.matmul ins(%[[sAj]], %[[sBj]]{{.*}} outs(%{{.*}} // TILE-234-LABEL: func @matmul( @@ -85,13 +85,13 @@ func.func @matmul(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, // TILE-234: scf.for %[[K:.*]] = %{{.*}}{{.*}} to %[[ubK]] step %{{.*}} { // TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubM]]] // TILE-234: %[[szK:.*]] = affine.min #[[$bound_map_4]](%[[K]])[%[[ubK]]] -// TILE-234: %[[szK_1:.*]] = affine.min #[[$bound_map_4]](%[[K]])[%[[ubK]]] -// TILE-234: %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[ubN]]] -// TILE-234: %[[szM_1:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubM]]] -// TILE-234: %[[szN_1:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[ubN]]] // TILE-234: %[[sAik:.*]] = memref.subview %{{.*}}[%[[I]], %[[K]]] [%[[szM]], %[[szK]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]> -// TILE-234: %[[sBkj:.*]] = memref.subview %{{.*}}[%[[K]], %[[J]]] [%[[szK_1]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]> -// TILE-234: %[[sCij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [%[[szM_1]], %[[szN_1]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]> +// TILE-234: %[[szK:.*]] = affine.min #[[$bound_map_4]](%[[K]])[%[[ubK]]] +// TILE-234: %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[ubN]]] +// TILE-234: %[[sBkj:.*]] = memref.subview %{{.*}}[%[[K]], %[[J]]] [%[[szK]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]> +// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubM]]] +// TILE-234: %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[ubN]]] +// TILE-234: %[[sCij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [%[[szM]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]> // // TILE-234: linalg.matmul ins(%[[sAik]], %[[sBkj]]{{.*}} outs(%[[sCij]] @@ -172,8 +172,8 @@ func.func @matvec(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: mem // TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} { // TILE-2: %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]] // TILE-2: %[[N:.*]] = memref.dim %{{.*}}, %c1 : memref<?x?xf32, #[[$strided2D]]> -// TILE-2: %[[szN:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]] // TILE-2: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]], 0] [%[[szM]], %[[N]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]> +// TILE-2: %[[szN:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]] // TILE-2: %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szN]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]> // TILE-2: linalg.matvec ins(%[[sAi]], %{{.*}} outs(%[[sCi]] @@ -187,9 +187,9 @@ func.func @matvec(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: mem // TILE-02: scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} { // TILE-02: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?x?xf32, #[[$strided2D]]> // TILE-02: %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[K]]] -// TILE-02: %[[szN_1:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[K]]] // TILE-02: %[[sAj:.*]] = memref.subview %{{.*}}[0, %[[J]]] [%[[M]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]> -// TILE-02: %[[sBj:.*]] = memref.subview %{{.*}}[%[[J]]] [%[[szN_1]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]> +// TILE-02: %[[szN:.*]] = affine.min #[[$bound_map]](%[[J]])[%[[K]]] +// TILE-02: %[[sBj:.*]] = memref.subview %{{.*}}[%[[J]]] [%[[szN]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]> // TILE-02: linalg.matvec ins(%[[sAj]], %[[sBj]]{{.*}} outs(%{{.*}} // TILE-002-LABEL: func @matvec( @@ -211,11 +211,11 @@ func.func @matvec(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: mem // TILE-234: scf.for %[[J:.*]] = %{{.*}}{{.*}} to %[[K]] step %{{.*}} { // TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[M]]] // TILE-234: %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[K]]] -// TILE-234: %[[szN_1:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[K]]] -// TILE-234: %[[szM_1:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[M]]] // TILE-234: %[[sAij:.*]] = memref.subview %{{.*}}[%[[I]], %[[J]]] [%[[szM]], %[[szN]]] [1, 1] : memref<?x?xf32, #[[$strided2D]]> to memref<?x?xf32, #[[$strided2D]]> -// TILE-234: %[[sBj:.*]] = memref.subview %{{.*}}[%[[J]]] [%[[szN_1]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]> -// TILE-234: %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM_1]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]> +// TILE-234: %[[szN:.*]] = affine.min #[[$bound_map_3]](%[[J]])[%[[K]]] +// TILE-234: %[[sBj:.*]] = memref.subview %{{.*}}[%[[J]]] [%[[szN]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]> +// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[M]]] +// TILE-234: %[[sCi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]> // // TILE-234: linalg.matvec ins(%[[sAij]], %[[sBj]]{{.*}} outs(%[[sCi]] @@ -231,9 +231,9 @@ func.func @dot(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf3 // TILE-2: %[[M:.*]] = memref.dim %{{.*}}, %c0 : memref<?xf32, #[[$strided1D]]> // TILE-2: scf.for %[[I:.*]] = %{{.*}}{{.*}} to %[[M]] step %{{.*}} { // TILE-2: %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]] -// TILE-2: %[[szM_1:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]] // TILE-2: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]> -// TILE-2: %[[sBi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM_1]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]> +// TILE-2: %[[szM:.*]] = affine.min #[[$bound_map]](%[[I]])[%[[M]]] +// TILE-2: %[[sBi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]> // TILE-2: linalg.dot ins(%[[sAi]], %[[sBi]]{{.*}} outs( // TILE-02-LABEL: func @dot( @@ -248,9 +248,9 @@ func.func @dot(%arg0: memref<?xf32, offset: ?, strides: [1]>, %arg1: memref<?xf3 // TILE-234: %[[ubK:.*]] = memref.dim %{{.*}}, %c0 : memref<?xf32, #[[$strided1D]]> // TILE-234: scf.for %[[I:.*]] = %{{.*}} to %[[ubK]] step %{{.*}} { // TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubK]]] -// TILE-234: %[[szM_1:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubK]]] // TILE-234: %[[sAi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]> -// TILE-234: %[[sBi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM_1]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]> +// TILE-234: %[[szM:.*]] = affine.min #[[$bound_map_2]](%[[I]])[%[[ubK]]] +// TILE-234: %[[sBi:.*]] = memref.subview %{{.*}}[%[[I]]] [%[[szM]]] [1] : memref<?xf32, #[[$strided1D]]> to memref<?xf32, #[[$strided1D]]> // TILE-234: linalg.dot ins(%[[sAi]], %[[sBi]]{{.*}} outs( func.func @fill_static(%arg0: memref<127x99xf32>, %arg1: f32) { diff --git a/mlir/test/Dialect/Linalg/transform-op-split.mlir b/mlir/test/Dialect/Linalg/transform-op-split.mlir index 9a22d3a44064..212712446ac5 100644 --- a/mlir/test/Dialect/Linalg/transform-op-split.mlir +++ b/mlir/test/Dialect/Linalg/transform-op-split.mlir @@ -102,8 +102,8 @@ func.func @dynamic(%arg0: tensor<100xf32>, %arg1: tensor<100xf32>) -> tensor<100 // CHECK: %[[PARTIAL:.+]] = tensor.insert_slice %[[RES_SLICE_LOW]] into %[[OUT]][0] [%[[SPLIT_LOW]]] [1] // // CHECK: %[[SPLIT_HIGH_2:.+]] = affine.apply #[[$MAP_S_MINUS_100]]()[%[[SPLIT_LOW]]] - // CHECK: %[[SPLIT_HIGH_3:.+]] = affine.apply #[[$MAP_S_MINUS_100]]()[%[[SPLIT_LOW]]] // CHECK: %[[IN_SLICE_HIGH:.+]] = tensor.extract_slice %[[IN:.+]][%[[SPLIT_LOW]]] [%[[SPLIT_HIGH_2]]] [1] : tensor<100xf32> to tensor<?xf32> + // CHECK: %[[SPLIT_HIGH_3:.+]] = affine.apply #[[$MAP_S_MINUS_100]]()[%[[SPLIT_LOW]]] // CHECK: %[[OUT_SLICE_HIGH:.+]] = tensor.extract_slice %[[PARTIAL:.+]][%[[SPLIT_LOW]]] [%[[SPLIT_HIGH_3]]] [1] : tensor<100xf32> to tensor<?xf32> // CHECK: %[[RES_SLICE_HIGH:.+]] = linalg.generic // CHECK: ins(%[[IN_SLICE_HIGH]] |