1// RUN: mlir-opt -test-tiling-interface=tile-using-scf-for -split-input-file %s | FileCheck %s 2 3func.func @simple_matmul(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>, 4 %arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> { 5 %0 = linalg.matmul {__internal_linalg_transform__ = "simple_gemm"} 6 ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) 7 outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> 8 return %0 : tensor<?x?xf32> 9} 10// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0, s1] -> (10, -d0 + s1)> 11// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0, s1] -> (20, -d0 + s1)> 12// CHECK: func.func @simple_matmul( 13// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32> 14// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32> 15// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<?x?xf32> 16// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index 17// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index 18// CHECK-DAG: %[[C10:.+]] = arith.constant 10 : index 19// CHECK-DAG: %[[C20:.+]] = arith.constant 20 : index 20// CHECK-DAG: %[[M:.+]] = tensor.dim %[[ARG0]], %[[C0]] 21// CHECK-DAG: %[[K:.+]] = tensor.dim %[[ARG0]], %[[C1]] 22// CHECK-DAG: %[[N:.+]] = tensor.dim %[[ARG1]], %[[C1]] 23// CHECK: %[[OUTER:[a-zA-Z0-9]+]] = scf.for %[[IV0:[a-zA-Z0-9]+]] = %[[C0]] to %[[M]] step %[[C10]] 24// CHECK-SAME: iter_args(%[[INIT0:.+]] = %[[ARG2]]) 25// CHECK: %[[TS_Y:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[C10]], %[[M]]] 26// CHECK: %[[INNER:[a-zA-Z0-9]+]] = scf.for %[[IV1:[a-zA-Z0-9]+]] = %[[C0]] to %[[N]] step %[[C20]] 27// CHECK-SAME: iter_args(%[[INIT1:.+]] = %[[INIT0]]) 28// CHECK: %[[TS_X:.+]] = affine.min #[[MAP1]](%[[IV1]])[%[[C20]], %[[N]]] 29// CHECK-DAG: %[[LHS_TILE:.+]] = tensor.extract_slice %[[ARG0]] 30// CHECK-SAME: [%[[IV0]], 0] [%[[TS_Y]], %[[K]]] [1, 1] 31// CHECK-DAG: %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]] 32// CHECK-SAME: [0, %[[IV1]]] [%[[K]], %[[TS_X]]] [1, 1] 33// CHECK-DAG: %[[INIT_TILE:.+]] = tensor.extract_slice %[[INIT1]] 34// CHECK-SAME: [%[[IV0]], %[[IV1]]] [%[[TS_Y]], %[[TS_X]]] [1, 1] 35// CHECK: %[[GEMM_TILE:.+]] = linalg.matmul 36// CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] : 37// CHECK-SAME: outs(%[[INIT_TILE]] : 38// CHECK: %[[UPDATE:.+]] = tensor.insert_slice %[[GEMM_TILE]] into %[[INIT1]] 39// CHECK-SAME: [%[[IV0]], %[[IV1]]] [%[[TS_Y]], %[[TS_X]]] [1, 1] 40// CHECK: scf.yield %[[UPDATE]] 41// CHECK: scf.yield %[[INNER]] 42// CHECK: return %[[OUTER]] 43 44// ----- 45 46func.func @simple_matmul_memref(%arg0 : memref<?x?xf32>, %arg1 : memref<?x?xf32>, 47 %arg2 : memref<?x?xf32>) { 48 linalg.matmul {__internal_linalg_transform__ = "simple_gemm_memref"} 49 ins(%arg0, %arg1 : memref<?x?xf32>, memref<?x?xf32>) 50 outs(%arg2 : memref<?x?xf32>) 51 return 52} 53// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0, s1] -> (10, -d0 + s1)> 54// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0, s1] -> (20, -d0 + s1)> 55// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0, s1] -> (30, -d0 + s1)> 56// CHECK: func.func @simple_matmul_memref( 57// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: memref<?x?xf32> 58// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: memref<?x?xf32> 59// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: memref<?x?xf32> 60// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index 61// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index 62// CHECK-DAG: %[[C10:.+]] = arith.constant 10 : index 63// CHECK-DAG: %[[C20:.+]] = arith.constant 20 : index 64// CHECK-DAG: %[[C30:.+]] = arith.constant 30 : index 65// CHECK-DAG: %[[M:.+]] = memref.dim %[[ARG0]], %[[C0]] 66// CHECK-DAG: %[[K:.+]] = memref.dim %[[ARG0]], %[[C1]] 67// CHECK-DAG: %[[N:.+]] = memref.dim %[[ARG1]], %[[C1]] 68// CHECK: scf.for %[[IV0:[a-zA-Z0-9]+]] = %[[C0]] to %[[M]] step %[[C10]] 69// CHECK: %[[TS_M:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[C10]], %[[M]]] 70// CHECK: scf.for %[[IV1:[a-zA-Z0-9]+]] = %[[C0]] to %[[N]] step %[[C20]] 71// CHECK: %[[TS_N:.+]] = affine.min #[[MAP1]](%[[IV1]])[%[[C20]], %[[N]]] 72// CHECK: scf.for %[[IV2:[a-zA-Z0-9]+]] = %[[C0]] to %[[K]] step %[[C30]] 73// CHECK: %[[TS_K:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[C30]], %[[K]]] 74// CHECK-DAG: %[[LHS_TILE:.+]] = memref.subview %[[ARG0]] 75// CHECK-SAME: [%[[IV0]], %[[IV2]]] [%[[TS_M]], %[[TS_K]]] [1, 1] 76// CHECK-DAG: %[[RHS_TILE:.+]] = memref.subview %[[ARG1]] 77// CHECK-SAME: [%[[IV2]], %[[IV1]]] [%[[TS_K]], %[[TS_N]]] [1, 1] 78// CHECK-DAG: %[[OUT_TILE:.+]] = memref.subview %[[ARG2]] 79// CHECK-SAME: [%[[IV0]], %[[IV1]]] [%[[TS_M]], %[[TS_N]]] [1, 1] 80// CHECK: linalg.matmul 81// CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] : 82// CHECK-SAME: outs(%[[OUT_TILE]] : 83 84// ----- 85 86#map0 = affine_map<(d0, d1, d2) -> (d0, d1, d2)> 87#map1 = affine_map<(d0, d1, d2) -> (d0, d2, d1)> 88#map2 = affine_map<(d0, d1, d2) -> (d2, d0, d1)> 89func.func @multi_result(%arg0 : tensor<128x200x300xf32>) -> (tensor<128x300x200xf32>, tensor<300x128x200xf32>) { 90 %init0 = linalg.init_tensor [128, 300, 200] : tensor<128x300x200xf32> 91 %init1 = linalg.init_tensor [300, 128, 200] : tensor<300x128x200xf32> 92 %0:2 = linalg.generic { 93 indexing_maps = [#map0, #map1, #map2], 94 iterator_types = ["parallel", "parallel", "parallel"]} 95 {__internal_linalg_transform__ = "parallel_generic_transpose"} 96 ins(%arg0 : tensor<128x200x300xf32>) 97 outs(%init0, %init1 : tensor<128x300x200xf32>, tensor<300x128x200xf32>) { 98 ^bb0(%b0 : f32, %b1 : f32, %b2 : f32): 99 linalg.yield %b0, %b0 : f32, f32 100 } -> (tensor<128x300x200xf32>, tensor<300x128x200xf32>) 101 return %0#0, %0#1 : tensor<128x300x200xf32>, tensor<300x128x200xf32> 102} 103// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0, s1] -> (10, -d0 + s1)> 104// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0, s1] -> (20, -d0 + s1)> 105// CHECK: func.func @multi_result( 106// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<128x200x300xf32>) 107// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index 108// CHECK-DAG: %[[C10:.+]] = arith.constant 10 : index 109// CHECK-DAG: %[[C20:.+]] = arith.constant 20 : index 110// CHECK-DAG: %[[C128:.+]] = arith.constant 128 : index 111// CHECK-DAG: %[[C300:.+]] = arith.constant 300 : index 112// CHECK-DAG: %[[INIT0:.+]] = linalg.init_tensor [128, 300, 200] 113// CHECK-DAG: %[[INIT1:.+]] = linalg.init_tensor [300, 128, 200] 114// CHECK: %[[OUTER:[a-zA-Z0-9]+]]:2 = scf.for %[[IV0:[a-zA-Z0-9]+]] = %[[C0]] to %[[C128]] step %[[C10]] 115// CHECK-SAME: iter_args(%[[ARG1:[a-zA-Z0-9]+]] = %[[INIT0]], %[[ARG2:[a-zA-Z0-9]+]] = %[[INIT1]]) 116// CHECK: %[[TS_Y:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[C10]], %[[C128]]] 117// CHECK: %[[INNER:[a-zA-Z0-9]+]]:2 = scf.for %[[IV1:[a-zA-Z0-9]+]] = %[[C0]] to %[[C300]] step %[[C20]] 118// CHECK-SAME: iter_args(%[[ARG3:[a-zA-Z0-9]+]] = %[[ARG1]], %[[ARG4:[a-zA-Z0-9]+]] = %[[ARG2]]) 119// CHECK: %[[TS_X:.+]] = affine.min #[[MAP1]](%[[IV1]])[%[[C20]], %[[C300]]] 120// CHECK-DAG: %[[ARG_TILE:.+]] = tensor.extract_slice %[[ARG0]] 121// CHECK-SAME: [%[[IV0]], 0, %[[IV1]]] [%[[TS_Y]], 200, %[[TS_X]]] [1, 1, 1] 122// CHECK-DAG: %[[INIT0_TILE:.+]] = tensor.extract_slice %[[ARG3]] 123// CHECK-SAME: [%[[IV0]], %[[IV1]], 0] [%[[TS_Y]], %[[TS_X]], 200] [1, 1, 1] 124// CHECK-DAG: %[[INIT1_TILE:.+]] = tensor.extract_slice %[[ARG4]] 125// CHECK-SAME: [%[[IV1]], %[[IV0]], 0] [%[[TS_X]], %[[TS_Y]], 200] [1, 1, 1] 126// CHECK: %[[RESULT_TILE:.+]]:2 = linalg.generic 127// CHECK-SAME: ins(%[[ARG_TILE]] : 128// CHECK-SAME: outs(%[[INIT0_TILE]], %[[INIT1_TILE]] : 129// CHECK: %[[UPDATE0:.+]] = tensor.insert_slice %[[RESULT_TILE]]#0 into %[[ARG3]] 130// CHECK-SAME: [%[[IV0]], %[[IV1]], 0] [%[[TS_Y]], %[[TS_X]], 200] [1, 1, 1] 131// CHECK: %[[UPDATE1:.+]] = tensor.insert_slice %[[RESULT_TILE]]#1 into %[[ARG4]] 132// CHECK-SAME: [%[[IV1]], %[[IV0]], 0] [%[[TS_X]], %[[TS_Y]], 200] [1, 1, 1] 133// CHECK: scf.yield %[[UPDATE0]], %[[UPDATE1]] 134// CHECK: scf.yield %[[INNER]]#0, %[[INNER]]#1 135// CHECK: return %[[OUTER]]#0, %[[OUTER]]#1 136 137// ----- 138 139func.func @conv2D(%arg0 : tensor<?x?x?x?xf32>, %arg1 : tensor<?x?x?x?xf32>, 140 %arg2 : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> { 141 %0 = linalg.conv_2d_nhwc_hwcf { 142 strides = dense<[2, 3]> : tensor<2xi64>, 143 dilation = dense<[4, 5]> : tensor<2xi64>, 144 __internal_linalg_transform__ = "simple_conv"} 145 ins(%arg0, %arg1 : tensor<?x?x?x?xf32>, tensor<?x?x?x?xf32>) 146 outs(%arg2 : tensor<?x?x?x?xf32>) -> tensor<?x?x?x?xf32> 147 return %0 : tensor<?x?x?x?xf32> 148} 149// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0, s1] -> (10, -d0 + s1)> 150// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0, s1] -> (20, -d0 + s1)> 151// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0, s1] -> (30, -d0 + s1)> 152// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0)[s0] -> (d0 + s0 * 2 - 2)> 153// CHECK-DAG: #[[MAP4:.+]] = affine_map<(d0)[s0] -> (d0 + s0 * 3 - 3)> 154// CHECK: func.func @conv2D( 155// CHECK-SAME: %[[INPUT:[a-zA-Z0-9]+]]: tensor<?x?x?x?xf32> 156// CHECK-SAME: %[[FILTER:[a-zA-Z0-9]+]]: tensor<?x?x?x?xf32> 157// CHECK-SAME: %[[INIT:[a-zA-Z0-9]+]]: tensor<?x?x?x?xf32> 158// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index 159// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index 160// CHECK-DAG: %[[C2:.+]] = arith.constant 2 : index 161// CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index 162// CHECK-DAG: %[[C10:.+]] = arith.constant 10 : index 163// CHECK-DAG: %[[C20:.+]] = arith.constant 20 : index 164// CHECK-DAG: %[[C30:.+]] = arith.constant 30 : index 165// CHECK-DAG: %[[N:.+]] = tensor.dim %[[INPUT]], %[[C0]] 166// CHECK-DAG: %[[C:.+]] = tensor.dim %[[INPUT]], %[[C3]] 167// CHECK-DAG: %[[P:.+]] = tensor.dim %[[FILTER]], %[[C0]] 168// CHECK-DAG: %[[Q:.+]] = tensor.dim %[[FILTER]], %[[C1]] 169// CHECK-DAG: %[[F:.+]] = tensor.dim %[[FILTER]], %[[C3]] 170// CHECK-DAG: %[[R:.+]] = tensor.dim %[[INIT]], %[[C1]] 171// CHECK-DAG: %[[S:.+]] = tensor.dim %[[INIT]], %[[C2]] 172// CHECK: scf.for %[[IV0:[a-zA-Z0-9]+]] = %[[C0]] to %[[P]] step %[[C10]] 173// CHECK-SAME: iter_args(%[[INIT0:.+]] = %[[INIT]]) 174// CHECK: %[[TS_P:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[C10]], %[[P]]] 175// CHECK: scf.for %[[IV1:[a-zA-Z0-9]+]] = %[[C0]] to %[[Q]] step %[[C20]] 176// CHECK-SAME: iter_args(%[[INIT1:.+]] = %[[INIT0]]) 177// CHECK: %[[TS_Q:.+]] = affine.min #[[MAP1]](%[[IV1]])[%[[C20]], %[[Q]]] 178// CHECK: scf.for %[[IV2:[a-zA-Z0-9]+]] = %[[C0]] to %[[C]] step %[[C30]] 179// CHECK-SAME: iter_args(%[[INIT2:.+]] = %[[INIT1]]) 180// CHECK-DAG: %[[TS_C:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[C30]], %[[C]]] 181// CHECK-DAG: %[[TS_H:.+]] = affine.apply #[[MAP3]](%[[TS_P]])[%[[R]]] 182// CHECK-DAG: %[[TS_W:.+]] = affine.apply #[[MAP4]](%[[TS_Q]])[%[[S]]] 183// CHECK-DAG: %[[INPUT_TILE:.+]] = tensor.extract_slice %[[INPUT]] 184// CHECK-SAME: [0, %[[IV0]], %[[IV1]], %[[IV2]]] [%[[N]], %[[TS_H]], %[[TS_W]], %[[TS_C]]] 185// CHECK-DAG: %[[FILTER_TILE:.+]] = tensor.extract_slice %[[FILTER]] 186// CHECK-SAME: [%[[IV0]], %[[IV1]], %[[IV2]], 0] [%[[TS_P]], %[[TS_Q]], %[[TS_C]], %[[F]]] 187// CHECK-DAG: %[[INIT_TILE:.+]] = tensor.extract_slice %[[INIT2]] 188// CHECK-SAME: [0, 0, 0, 0] [%[[N]], %[[R]], %[[S]], %[[F]]] 189// CHECK: %[[CONV_TILE:.+]] = linalg.conv_2d_nhwc_hwcf 190// CHECK-SAME: dilation = dense<[4, 5]> : tensor<2xi64>, strides = dense<[2, 3]> : tensor<2xi64> 191// CHECK-SAME: ins(%[[INPUT_TILE]], %[[FILTER_TILE]] : 192// CHECK-SAME: outs(%[[INIT_TILE]] : 193// CHECK: tensor.insert_slice %[[CONV_TILE]] into %[[INIT2]] 194// CHECK-SAME: [0, 0, 0, 0] [%[[N]], %[[R]], %[[S]], %[[F]]] 195 196// ----- 197 198// CHECK: #[[$MAP_ADD:.+]] = affine_map<(d0, d1) -> (d0 + d1)> 199 200// CHECK-LABEL: @indexed_semantics 201func.func @indexed_semantics(%arg0: tensor<?x?xf32>, %arg1: tensor<?x?xf32>) -> tensor<?x?xf32> { 202 // Check that we correctly amend "linalg.index" results. 203 204 // CHECK: scf.for %[[I0:.+]] = %{{.*}} to %{{.*}} step %{{.*}} 205 // CHECK: scf.for %[[I1:.+]] = %{{.*}} to %{{.*}} step %{{.*}} 206 %0 = linalg.generic { 207 indexing_maps = [affine_map<(d0, d1) -> (d0, d1)>, 208 affine_map<(d0, d1) -> (d0, d1)>], 209 iterator_types = ["parallel", "parallel"]} 210 {__internal_linalg_transform__ = "indexed_semantics"} 211 ins(%arg0: tensor<?x?xf32>) 212 outs(%arg1: tensor<?x?xf32>) { 213 ^bb0(%arg2: f32, %arg3: f32): 214 // CHECK: %[[INDEX0:.+]] = linalg.index 0 215 // CHECK: %[[INDEX0_AMENDED:.+]] = affine.apply #[[$MAP_ADD]](%[[INDEX0]], %[[I0]]) 216 %1 = linalg.index 0 : index 217 // CHECK: %[[INDEX1:.+]] = linalg.index 1 218 // CHECK: %[[INDEX1_AMENDED:.+]] = affine.apply #[[$MAP_ADD]](%[[INDEX1]], %[[I1]]) 219 %2 = linalg.index 1 : index 220 // CHECK: arith.addi %[[INDEX0_AMENDED]], %[[INDEX1_AMENDED]] 221 %3 = arith.addi %1, %2 : index 222 %4 = arith.index_cast %3 : index to i64 223 %5 = arith.uitofp %4 : i64 to f32 224 %6 = arith.addf %5, %arg2 : f32 225 linalg.yield %6 : f32 226 } -> (tensor<?x?xf32>) 227 return %0 : tensor<?x?xf32> 228} 229 230// ----- 231 232func.func @interchange_matmul(%arg0 : tensor<?x?xf32>, %arg1 : tensor<?x?xf32>, 233 %arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> { 234 %0 = linalg.matmul {__internal_linalg_transform__ = "gemm_interchange"} 235 ins(%arg0, %arg1 : tensor<?x?xf32>, tensor<?x?xf32>) 236 outs(%arg2 : tensor<?x?xf32>) -> tensor<?x?xf32> 237 return %0 : tensor<?x?xf32> 238} 239// CHECK-DAG: #[[MAP0:.+]] = affine_map<(d0)[s0, s1] -> (20, -d0 + s1)> 240// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0, s1] -> (30, -d0 + s1)> 241// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0, s1] -> (10, -d0 + s1)> 242// CHECK: func.func @interchange_matmul( 243// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor<?x?xf32> 244// CHECK-SAME: %[[ARG1:[a-zA-Z0-9]+]]: tensor<?x?xf32> 245// CHECK-SAME: %[[ARG2:[a-zA-Z0-9]+]]: tensor<?x?xf32> 246// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index 247// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index 248// CHECK-DAG: %[[C10:.+]] = arith.constant 10 : index 249// CHECK-DAG: %[[C20:.+]] = arith.constant 20 : index 250// CHECK-DAG: %[[C30:.+]] = arith.constant 30 : index 251// CHECK-DAG: %[[M:.+]] = tensor.dim %[[ARG0]], %[[C0]] 252// CHECK-DAG: %[[K:.+]] = tensor.dim %[[ARG0]], %[[C1]] 253// CHECK-DAG: %[[N:.+]] = tensor.dim %[[ARG1]], %[[C1]] 254// CHECK: %[[OUTER:[a-zA-Z0-9]+]] = scf.for %[[IV0:[a-zA-Z0-9]+]] = %[[C0]] to %[[N]] step %[[C20]] 255// CHECK-SAME: iter_args(%[[INIT0:.+]] = %[[ARG2]]) 256// CHECK: %[[TS_N:.+]] = affine.min #[[MAP0]](%[[IV0]])[%[[C20]], %[[N]]] 257// CHECK: %[[INNER1:[a-zA-Z0-9]+]] = scf.for %[[IV1:[a-zA-Z0-9]+]] = %[[C0]] to %[[K]] step %[[C30]] 258// CHECK-SAME: iter_args(%[[INIT1:.+]] = %[[INIT0]]) 259// CHECK: %[[TS_K:.+]] = affine.min #[[MAP1]](%[[IV1]])[%[[C30]], %[[K]]] 260// CHECK: %[[INNER2:[a-zA-Z0-9]+]] = scf.for %[[IV2:[a-zA-Z0-9]+]] = %[[C0]] to %[[M]] step %[[C10]] 261// CHECK-SAME: iter_args(%[[INIT2:.+]] = %[[INIT1]]) 262// CHECK-DAG: %[[TS_M:.+]] = affine.min #[[MAP2]](%[[IV2]])[%[[C10]], %[[M]]] 263// CHECK-DAG: %[[LHS_TILE:.+]] = tensor.extract_slice %[[ARG0]] 264// CHECK-SAME: [%[[IV2]], %[[IV1]]] [%[[TS_M]], %[[TS_K]]] [1, 1] 265// CHECK-DAG: %[[RHS_TILE:.+]] = tensor.extract_slice %[[ARG1]] 266// CHECK-SAME: [%[[IV1]], %[[IV0]]] [%[[TS_K]], %[[TS_N]]] [1, 1] 267// CHECK-DAG: %[[INIT_TILE:.+]] = tensor.extract_slice %[[INIT2]] 268// CHECK-SAME: [%[[IV2]], %[[IV0]]] [%[[TS_M]], %[[TS_N]]] [1, 1] 269// CHECK: %[[GEMM_TILE:.+]] = linalg.matmul 270// CHECK-SAME: ins(%[[LHS_TILE]], %[[RHS_TILE]] : 271// CHECK-SAME: outs(%[[INIT_TILE]] : 272// CHECK: %[[UPDATE:.+]] = tensor.insert_slice %[[GEMM_TILE]] into %[[INIT2]] 273// CHECK-SAME: [%[[IV2]], %[[IV0]]] [%[[TS_M]], %[[TS_N]]] [1, 1] 274// CHECK: scf.yield %[[UPDATE]] 275// CHECK: scf.yield %[[INNER2]] 276// CHECK: scf.yield %[[INNER1]] 277// CHECK: return %[[OUTER]] 278