1// RUN: mlir-opt %s -test-vector-transfer-full-partial-split -split-input-file | FileCheck %s 2// RUN: mlir-opt %s -test-vector-transfer-full-partial-split=use-memref-copy -split-input-file | FileCheck %s --check-prefix=LINALG 3 4// CHECK-DAG: #[[$map_p4:.*]] = affine_map<()[s0] -> (s0 + 4)> 5// CHECK-DAG: #[[$map_p8:.*]] = affine_map<()[s0] -> (s0 + 8)> 6// CHECK-DAG: #[[$map_2d_stride_1:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> 7 8// LINALG-DAG: #[[$map_p4:.*]] = affine_map<()[s0] -> (s0 + 4)> 9// LINALG-DAG: #[[$map_p8:.*]] = affine_map<()[s0] -> (s0 + 8)> 10// LINALG-DAG: #[[$map_2d_stride_1:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> 11// LINALG-DAG: #[[$map_2d_stride_8x1:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 8 + s0 + d1)> 12// LINALG-DAG: #[[$bounds_map_4:.*]] = affine_map<(d0, d1, d2) -> (d0 - d1, 4)> 13// LINALG-DAG: #[[$bounds_map_8:.*]] = affine_map<(d0, d1, d2) -> (d0 - d1, 8)> 14 15// CHECK-LABEL: split_vector_transfer_read_2d( 16// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref 17// CHECK-SAME: %[[i:[a-zA-Z0-9]*]]: index 18// CHECK-SAME: %[[j:[a-zA-Z0-9]*]]: index 19 20// LINALG-LABEL: split_vector_transfer_read_2d( 21// LINALG-SAME: %[[A:[a-zA-Z0-9]*]]: memref 22// LINALG-SAME: %[[i:[a-zA-Z0-9]*]]: index 23// LINALG-SAME: %[[j:[a-zA-Z0-9]*]]: index 24func.func @split_vector_transfer_read_2d(%A: memref<?x8xf32>, %i: index, %j: index) -> vector<4x8xf32> { 25 %c0 = arith.constant 0 : index 26 %f0 = arith.constant 0.0 : f32 27 28 // CHECK-DAG: %[[c8:.*]] = arith.constant 8 : index 29 // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index 30 // alloca for boundary full tile 31 // CHECK: %[[alloc:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32> 32 // %i + 4 <= dim(%A, 0) 33 // CHECK: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]] 34 // CHECK: %[[d0:.*]] = memref.dim %[[A]], %[[c0]] : memref<?x8xf32> 35 // CHECK: %[[cmp0:.*]] = arith.cmpi sle, %[[idx0]], %[[d0]] : index 36 // %j + 8 <= dim(%A, 1) 37 // CHECK: %[[idx1:.*]] = affine.apply #[[$map_p8]]()[%[[j]]] 38 // CHECK: %[[cmp1:.*]] = arith.cmpi sle, %[[idx1]], %[[c8]] : index 39 // are both conds true 40 // CHECK: %[[cond:.*]] = arith.andi %[[cmp0]], %[[cmp1]] : i1 41 // CHECK: %[[ifres:.*]]:3 = scf.if %[[cond]] -> (memref<?x8xf32>, index, index) { 42 // inBounds, just yield %A 43 // CHECK: scf.yield %[[A]], %[[i]], %[[j]] : memref<?x8xf32>, index, index 44 // CHECK: } else { 45 // slow path, fill tmp alloc and yield a memref_casted version of it 46 // CHECK: %[[slow:.*]] = vector.transfer_read %[[A]][%[[i]], %[[j]]], %cst : 47 // CHECK-SAME: memref<?x8xf32>, vector<4x8xf32> 48 // CHECK: %[[cast_alloc:.*]] = vector.type_cast %[[alloc]] : 49 // CHECK-SAME: memref<4x8xf32> to memref<vector<4x8xf32>> 50 // CHECK: store %[[slow]], %[[cast_alloc]][] : memref<vector<4x8xf32>> 51 // CHECK: %[[yielded:.*]] = memref.cast %[[alloc]] : 52 // CHECK-SAME: memref<4x8xf32> to memref<?x8xf32> 53 // CHECK: scf.yield %[[yielded]], %[[c0]], %[[c0]] : 54 // CHECK-SAME: memref<?x8xf32>, index, index 55 // CHECK: } 56 // CHECK: %[[res:.*]] = vector.transfer_read %[[ifres]]#0[%[[ifres]]#1, %[[ifres]]#2], %cst 57 // CHECK-SAME: {in_bounds = [true, true]} : memref<?x8xf32>, vector<4x8xf32> 58 59 // LINALG-DAG: %[[c0:.*]] = arith.constant 0 : index 60 // LINALG-DAG: %[[c4:.*]] = arith.constant 4 : index 61 // LINALG-DAG: %[[c8:.*]] = arith.constant 8 : index 62 // alloca for boundary full tile 63 // LINALG: %[[alloc:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32> 64 // %i + 4 <= dim(%A, 0) 65 // LINALG: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]] 66 // LINALG: %[[d0:.*]] = memref.dim %[[A]], %[[c0]] : memref<?x8xf32> 67 // LINALG: %[[cmp0:.*]] = arith.cmpi sle, %[[idx0]], %[[d0]] : index 68 // %j + 8 <= dim(%A, 1) 69 // LINALG: %[[idx1:.*]] = affine.apply #[[$map_p8]]()[%[[j]]] 70 // LINALG: %[[cmp1:.*]] = arith.cmpi sle, %[[idx1]], %[[c8]] : index 71 // are both conds true 72 // LINALG: %[[cond:.*]] = arith.andi %[[cmp0]], %[[cmp1]] : i1 73 // LINALG: %[[ifres:.*]]:3 = scf.if %[[cond]] -> (memref<?x8xf32>, index, index) { 74 // inBounds, just yield %A 75 // LINALG: scf.yield %[[A]], %[[i]], %[[j]] : memref<?x8xf32>, index, index 76 // LINALG: } else { 77 // slow path, fill tmp alloc and yield a memref_casted version of it 78 // LINALG: linalg.fill ins(%cst : f32) outs(%[[alloc]] : memref<4x8xf32>) 79 // LINALG: %[[d0:.*]] = memref.dim %[[A]], %[[c0]] : memref<?x8xf32> 80 // LINALG: %[[sv0:.*]] = affine.min #[[$bounds_map_4]](%[[d0]], %[[i]], %[[c4]]) 81 // LINALG: %[[sv1:.*]] = affine.min #[[$bounds_map_8]](%[[c8]], %[[j]], %[[c8]]) 82 // LINALG: %[[sv:.*]] = memref.subview %[[A]][%[[i]], %[[j]]] [%[[sv0]], %[[sv1]]] [1, 1] 83 // LINALG-SAME: memref<?x8xf32> to memref<?x?xf32, #[[$map_2d_stride_8x1]]> 84 // LINALG: %[[alloc_view:.*]] = memref.subview %[[alloc]][0, 0] [%[[sv0]], %[[sv1]]] [1, 1] 85 // LINALG: memref.copy %[[sv]], %[[alloc_view]] : memref<?x?xf32, #[[$map_2d_stride_8x1]]> to memref<?x?xf32, #{{.*}}> 86 // LINALG: %[[yielded:.*]] = memref.cast %[[alloc]] : 87 // LINALG-SAME: memref<4x8xf32> to memref<?x8xf32> 88 // LINALG: scf.yield %[[yielded]], %[[c0]], %[[c0]] : 89 // LINALG-SAME: memref<?x8xf32>, index, index 90 // LINALG: } 91 // LINALG: %[[res:.*]] = vector.transfer_read %[[ifres]]#0[%[[ifres]]#1, %[[ifres]]#2], %cst 92 // LINALG-SAME: {in_bounds = [true, true]} : memref<?x8xf32>, vector<4x8xf32> 93 %1 = vector.transfer_read %A[%i, %j], %f0 : memref<?x8xf32>, vector<4x8xf32> 94 95 // LINALG: return %[[res]] : vector<4x8xf32> 96 return %1: vector<4x8xf32> 97} 98 99// CHECK-LABEL: split_vector_transfer_read_strided_2d( 100// CHECK-SAME: %[[A:[a-zA-Z0-9]*]]: memref 101// CHECK-SAME: %[[i:[a-zA-Z0-9]*]]: index 102// CHECK-SAME: %[[j:[a-zA-Z0-9]*]]: index 103 104// LINALG-LABEL: split_vector_transfer_read_strided_2d( 105// LINALG-SAME: %[[A:[a-zA-Z0-9]*]]: memref 106// LINALG-SAME: %[[i:[a-zA-Z0-9]*]]: index 107// LINALG-SAME: %[[j:[a-zA-Z0-9]*]]: index 108func.func @split_vector_transfer_read_strided_2d( 109 %A: memref<7x8xf32, offset:?, strides:[?, 1]>, 110 %i: index, %j: index) -> vector<4x8xf32> { 111 %c0 = arith.constant 0 : index 112 %f0 = arith.constant 0.0 : f32 113 114 // CHECK-DAG: %[[c7:.*]] = arith.constant 7 : index 115 // CHECK-DAG: %[[c8:.*]] = arith.constant 8 : index 116 // CHECK-DAG: %[[c0:.*]] = arith.constant 0 : index 117 // alloca for boundary full tile 118 // CHECK: %[[alloc:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32> 119 // %i + 4 <= dim(%A, 0) 120 // CHECK: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]] 121 // CHECK: %[[cmp0:.*]] = arith.cmpi sle, %[[idx0]], %[[c7]] : index 122 // %j + 8 <= dim(%A, 1) 123 // CHECK: %[[idx1:.*]] = affine.apply #[[$map_p8]]()[%[[j]]] 124 // CHECK: %[[cmp1:.*]] = arith.cmpi sle, %[[idx1]], %[[c8]] : index 125 // are both conds true 126 // CHECK: %[[cond:.*]] = arith.andi %[[cmp0]], %[[cmp1]] : i1 127 // CHECK: %[[ifres:.*]]:3 = scf.if %[[cond]] -> (memref<?x8xf32, #[[$map_2d_stride_1]]>, index, index) { 128 // inBounds but not cast-compatible: yield a memref_casted form of %A 129 // CHECK: %[[casted:.*]] = memref.cast %arg0 : 130 // CHECK-SAME: memref<7x8xf32, #[[$map_2d_stride_1]]> to memref<?x8xf32, #[[$map_2d_stride_1]]> 131 // CHECK: scf.yield %[[casted]], %[[i]], %[[j]] : 132 // CHECK-SAME: memref<?x8xf32, #[[$map_2d_stride_1]]>, index, index 133 // CHECK: } else { 134 // slow path, fill tmp alloc and yield a memref_casted version of it 135 // CHECK: %[[slow:.*]] = vector.transfer_read %[[A]][%[[i]], %[[j]]], %cst : 136 // CHECK-SAME: memref<7x8xf32, #[[$map_2d_stride_1]]>, vector<4x8xf32> 137 // CHECK: %[[cast_alloc:.*]] = vector.type_cast %[[alloc]] : 138 // CHECK-SAME: memref<4x8xf32> to memref<vector<4x8xf32>> 139 // CHECK: store %[[slow]], %[[cast_alloc]][] : 140 // CHECK-SAME: memref<vector<4x8xf32>> 141 // CHECK: %[[yielded:.*]] = memref.cast %[[alloc]] : 142 // CHECK-SAME: memref<4x8xf32> to memref<?x8xf32, #[[$map_2d_stride_1]]> 143 // CHECK: scf.yield %[[yielded]], %[[c0]], %[[c0]] : 144 // CHECK-SAME: memref<?x8xf32, #[[$map_2d_stride_1]]>, index, index 145 // CHECK: } 146 // CHECK: %[[res:.*]] = vector.transfer_read {{.*}} {in_bounds = [true, true]} : 147 // CHECK-SAME: memref<?x8xf32, #[[$map_2d_stride_1]]>, vector<4x8xf32> 148 149 // LINALG-DAG: %[[c0:.*]] = arith.constant 0 : index 150 // LINALG-DAG: %[[c4:.*]] = arith.constant 4 : index 151 // LINALG-DAG: %[[c7:.*]] = arith.constant 7 : index 152 // LINALG-DAG: %[[c8:.*]] = arith.constant 8 : index 153 // alloca for boundary full tile 154 // LINALG: %[[alloc:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32> 155 // %i + 4 <= dim(%A, 0) 156 // LINALG: %[[idx0:.*]] = affine.apply #[[$map_p4]]()[%[[i]]] 157 // LINALG: %[[cmp0:.*]] = arith.cmpi sle, %[[idx0]], %[[c7]] : index 158 // %j + 8 <= dim(%A, 1) 159 // LINALG: %[[idx1:.*]] = affine.apply #[[$map_p8]]()[%[[j]]] 160 // LINALG: %[[cmp1:.*]] = arith.cmpi sle, %[[idx1]], %[[c8]] : index 161 // are both conds true 162 // LINALG: %[[cond:.*]] = arith.andi %[[cmp0]], %[[cmp1]] : i1 163 // LINALG: %[[ifres:.*]]:3 = scf.if %[[cond]] -> (memref<?x8xf32, #[[$map_2d_stride_1]]>, index, index) { 164 // inBounds but not cast-compatible: yield a memref_casted form of %A 165 // LINALG: %[[casted:.*]] = memref.cast %arg0 : 166 // LINALG-SAME: memref<7x8xf32, #[[$map_2d_stride_1]]> to memref<?x8xf32, #[[$map_2d_stride_1]]> 167 // LINALG: scf.yield %[[casted]], %[[i]], %[[j]] : 168 // LINALG-SAME: memref<?x8xf32, #[[$map_2d_stride_1]]>, index, index 169 // LINALG: } else { 170 // slow path, fill tmp alloc and yield a memref_casted version of it 171 // LINALG: linalg.fill ins(%cst : f32) outs(%[[alloc]] : memref<4x8xf32>) 172 // LINALG: %[[sv0:.*]] = affine.min #[[$bounds_map_4]](%[[c7]], %[[i]], %[[c4]]) 173 // LINALG: %[[sv1:.*]] = affine.min #[[$bounds_map_8]](%[[c8]], %[[j]], %[[c8]]) 174 // LINALG: %[[sv:.*]] = memref.subview %[[A]][%[[i]], %[[j]]] [%[[sv0]], %[[sv1]]] [1, 1] 175 // LINALG-SAME: memref<7x8xf32, #[[$map_2d_stride_1]]> to memref<?x?xf32, #[[$map_2d_stride_1]]> 176 // LINALG: %[[alloc_view:.*]] = memref.subview %[[alloc]][0, 0] [%[[sv0]], %[[sv1]]] [1, 1] 177 // LINALG: memref.copy %[[sv]], %[[alloc_view]] : memref<?x?xf32, #[[$map_2d_stride_1]]> to memref<?x?xf32, #{{.*}}> 178 // LINALG: %[[yielded:.*]] = memref.cast %[[alloc]] : 179 // LINALG-SAME: memref<4x8xf32> to memref<?x8xf32, #[[$map_2d_stride_1]]> 180 // LINALG: scf.yield %[[yielded]], %[[c0]], %[[c0]] : 181 // LINALG-SAME: memref<?x8xf32, #[[$map_2d_stride_1]]>, index, index 182 // LINALG: } 183 // LINALG: %[[res:.*]] = vector.transfer_read {{.*}} {in_bounds = [true, true]} : 184 // LINALG-SAME: memref<?x8xf32, #[[$map_2d_stride_1]]>, vector<4x8xf32> 185 %1 = vector.transfer_read %A[%i, %j], %f0 : 186 memref<7x8xf32, offset:?, strides:[?, 1]>, vector<4x8xf32> 187 188 // CHECK: return %[[res]] : vector<4x8xf32> 189 return %1 : vector<4x8xf32> 190} 191 192// ----- 193 194func.func @split_vector_transfer_write_2d(%V: vector<4x8xf32>, %A: memref<?x8xf32>, %i: index, %j: index) { 195 vector.transfer_write %V, %A[%i, %j] : 196 vector<4x8xf32>, memref<?x8xf32> 197 return 198} 199 200// CHECK-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 4)> 201// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 8)> 202// CHECK: func @split_vector_transfer_write_2d( 203// CHECK-SAME: %[[VEC:.*]]: vector<4x8xf32>, 204// CHECK-SAME: %[[DEST:.*]]: memref<?x8xf32>, 205// CHECK-SAME: %[[I:.*]]: index, 206// CHECK-SAME: %[[J:.*]]: index) { 207// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index 208// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 209// CHECK-DAG: %[[CT:.*]] = arith.constant true 210// CHECK: %[[TEMP:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32> 211// CHECK: %[[VAL_8:.*]] = affine.apply #[[MAP0]]()[%[[I]]] 212// CHECK: %[[DIM0:.*]] = memref.dim %[[DEST]], %[[C0]] : memref<?x8xf32> 213// CHECK: %[[DIM0_IN:.*]] = arith.cmpi sle, %[[VAL_8]], %[[DIM0]] : index 214// CHECK: %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[J]]] 215// CHECK: %[[DIM1_IN:.*]] = arith.cmpi sle, %[[DIM1]], %[[C8]] : index 216// CHECK: %[[IN_BOUNDS:.*]] = arith.andi %[[DIM0_IN]], %[[DIM1_IN]] : i1 217// CHECK: %[[IN_BOUND_DEST:.*]]:3 = scf.if %[[IN_BOUNDS]] -> 218// CHECK-SAME: (memref<?x8xf32>, index, index) { 219// CHECK: scf.yield %[[DEST]], %[[I]], %[[J]] : memref<?x8xf32>, index, index 220// CHECK: } else { 221// CHECK: %[[VAL_15:.*]] = memref.cast %[[TEMP]] 222// CHECK-SAME: : memref<4x8xf32> to memref<?x8xf32> 223// CHECK: scf.yield %[[VAL_15]], %[[C0]], %[[C0]] 224// CHECK-SAME: : memref<?x8xf32>, index, index 225// CHECK: } 226// CHECK: vector.transfer_write %[[VEC]], 227// CHECK-SAME: %[[IN_BOUND_DEST:.*]]#0[%[[IN_BOUND_DEST]]#1, %[[IN_BOUND_DEST]]#2] 228// CHECK-SAME: {in_bounds = [true, true]} : vector<4x8xf32>, memref<?x8xf32> 229// CHECK: %[[OUT_BOUNDS:.*]] = arith.xori %[[IN_BOUNDS]], %[[CT]] : i1 230// CHECK: scf.if %[[OUT_BOUNDS]] { 231// CHECK: %[[CASTED:.*]] = vector.type_cast %[[TEMP]] 232// CHECK-SAME: : memref<4x8xf32> to memref<vector<4x8xf32>> 233// CHECK: %[[RESULT_COPY:.*]] = memref.load %[[CASTED]][] 234// CHECK-SAME: : memref<vector<4x8xf32>> 235// CHECK: vector.transfer_write %[[RESULT_COPY]], 236// CHECK-SAME: %[[DEST]][%[[I]], %[[J]]] 237// CHECK-SAME: : vector<4x8xf32>, memref<?x8xf32> 238// CHECK: } 239// CHECK: return 240// CHECK: } 241 242// LINALG-DAG: #[[MAP0:.*]] = affine_map<()[s0] -> (s0 + 4)> 243// LINALG-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 8)> 244// LINALG-DAG: #[[MAP2:.*]] = affine_map<(d0, d1, d2) -> (d0 - d1, 4)> 245// LINALG-DAG: #[[MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0 - d1, 8)> 246// LINALG-DAG: #[[MAP4:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 8 + s0 + d1)> 247// LINALG: func @split_vector_transfer_write_2d( 248// LINALG-SAME: %[[VEC:.*]]: vector<4x8xf32>, 249// LINALG-SAME: %[[DEST:.*]]: memref<?x8xf32>, 250// LINALG-SAME: %[[I:.*]]: index, 251// LINALG-SAME: %[[J:.*]]: index) { 252// LINALG-DAG: %[[CT:.*]] = arith.constant true 253// LINALG-DAG: %[[C0:.*]] = arith.constant 0 : index 254// LINALG-DAG: %[[C4:.*]] = arith.constant 4 : index 255// LINALG-DAG: %[[C8:.*]] = arith.constant 8 : index 256// LINALG: %[[TEMP:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32> 257// LINALG: %[[IDX0:.*]] = affine.apply #[[MAP0]]()[%[[I]]] 258// LINALG: %[[DIM0:.*]] = memref.dim %[[DEST]], %[[C0]] : memref<?x8xf32> 259// LINALG: %[[DIM0_IN:.*]] = arith.cmpi sle, %[[IDX0]], %[[DIM0]] : index 260// LINALG: %[[DIM1:.*]] = affine.apply #[[MAP1]]()[%[[J]]] 261// LINALG: %[[DIM1_IN:.*]] = arith.cmpi sle, %[[DIM1]], %[[C8]] : index 262// LINALG: %[[IN_BOUNDS:.*]] = arith.andi %[[DIM0_IN]], %[[DIM1_IN]] : i1 263// LINALG: %[[IN_BOUND_DEST:.*]]:3 = scf.if %[[IN_BOUNDS]] 264// LINALG-SAME: -> (memref<?x8xf32>, index, index) { 265// LINALG: scf.yield %[[DEST]], %[[I]], %[[J]] : memref<?x8xf32>, index, index 266// LINALG: } else { 267// LINALG: %[[VAL_16:.*]] = memref.cast %[[TEMP]] : memref<4x8xf32> to memref<?x8xf32> 268// LINALG: scf.yield %[[VAL_16]], %[[C0]], %[[C0]] : memref<?x8xf32>, index, index 269// LINALG: } 270// LINALG: vector.transfer_write %[[VEC]], 271// LINALG-SAME: %[[IN_BOUND_DEST:.*]]#0[%[[IN_BOUND_DEST]]#1, %[[IN_BOUND_DEST]]#2] 272// LINALG-SAME: {in_bounds = [true, true]} : vector<4x8xf32>, memref<?x8xf32> 273// LINALG: %[[OUT_BOUNDS:.*]] = arith.xori %[[IN_BOUNDS]], %[[CT]] : i1 274// LINALG: scf.if %[[OUT_BOUNDS]] { 275// LINALG: %[[VAL_19:.*]] = memref.dim %[[DEST]], %[[C0]] : memref<?x8xf32> 276// LINALG-DAG: %[[VAL_20:.*]] = affine.min #[[MAP2]](%[[VAL_19]], %[[I]], %[[C4]]) 277// LINALG-DAG: %[[VAL_21:.*]] = affine.min #[[MAP3]](%[[C8]], %[[J]], %[[C8]]) 278// LINALG: %[[VAL_22:.*]] = memref.subview %[[TEMP]] 279// LINALG-SAME: [%[[I]], %[[J]]] [%[[VAL_20]], %[[VAL_21]]] 280// LINALG-SAME: [1, 1] : memref<4x8xf32> to memref<?x?xf32, #[[MAP4]]> 281// LINALG: %[[DEST_VIEW:.*]] = memref.subview %[[DEST]][0, 0] [%[[VAL_20]], %[[VAL_21]]] [1, 1] 282// LINALG: memref.copy %[[VAL_22]], %[[DEST_VIEW]] 283// LINALG-SAME: : memref<?x?xf32, #[[MAP4]]> to memref<?x?xf32, #{{.*}}> 284// LINALG: } 285// LINALG: return 286// LINALG: } 287 288// ----- 289 290func.func @split_vector_transfer_write_strided_2d( 291 %V: vector<4x8xf32>, %A: memref<7x8xf32, offset:?, strides:[?, 1]>, 292 %i: index, %j: index) { 293 vector.transfer_write %V, %A[%i, %j] : 294 vector<4x8xf32>, memref<7x8xf32, offset:?, strides:[?, 1]> 295 return 296} 297 298// CHECK-DAG: #[[MAP0:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> 299// CHECK-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 4)> 300// CHECK-DAG: #[[MAP2:.*]] = affine_map<()[s0] -> (s0 + 8)> 301// CHECK: func @split_vector_transfer_write_strided_2d( 302// CHECK-SAME: %[[VEC:.*]]: vector<4x8xf32>, 303// CHECK-SAME: %[[DEST:.*]]: memref<7x8xf32, #[[MAP0]]>, 304// CHECK-SAME: %[[I:.*]]: index, 305// CHECK-SAME: %[[J:.*]]: index) { 306// CHECK-DAG: %[[C7:.*]] = arith.constant 7 : index 307// CHECK-DAG: %[[C8:.*]] = arith.constant 8 : index 308// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index 309// CHECK-DAG: %[[CT:.*]] = arith.constant true 310// CHECK: %[[TEMP:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32> 311// CHECK: %[[DIM0:.*]] = affine.apply #[[MAP1]]()[%[[I]]] 312// CHECK: %[[DIM0_IN:.*]] = arith.cmpi sle, %[[DIM0]], %[[C7]] : index 313// CHECK: %[[DIM1:.*]] = affine.apply #[[MAP2]]()[%[[J]]] 314// CHECK: %[[DIM1_IN:.*]] = arith.cmpi sle, %[[DIM1]], %[[C8]] : index 315// CHECK: %[[IN_BOUNDS:.*]] = arith.andi %[[DIM0_IN]], %[[DIM1_IN]] : i1 316// CHECK: %[[IN_BOUND_DEST:.*]]:3 = scf.if %[[IN_BOUNDS]] 317// CHECK-SAME: -> (memref<?x8xf32, #[[MAP0]]>, index, index) { 318// CHECK: %[[VAL_15:.*]] = memref.cast %[[DEST]] 319// CHECK-SAME: : memref<7x8xf32, #[[MAP0]]> to memref<?x8xf32, #[[MAP0]]> 320// CHECK: scf.yield %[[VAL_15]], %[[I]], %[[J]] 321// CHECK-SAME: : memref<?x8xf32, #[[MAP0]]>, index, index 322// CHECK: } else { 323// CHECK: %[[VAL_16:.*]] = memref.cast %[[TEMP]] 324// CHECK-SAME: : memref<4x8xf32> to memref<?x8xf32, #[[MAP0]]> 325// CHECK: scf.yield %[[VAL_16]], %[[C0]], %[[C0]] 326// CHECK-SAME: : memref<?x8xf32, #[[MAP0]]>, index, index 327// CHECK: } 328// CHECK: vector.transfer_write %[[VEC]], 329// CHECK-SAME: %[[IN_BOUND_DEST:.*]]#0 330// CHECK-SAME: [%[[IN_BOUND_DEST]]#1, %[[IN_BOUND_DEST]]#2] 331// CHECK-SAME: {in_bounds = [true, true]} : vector<4x8xf32>, memref<?x8xf32, #[[MAP0]]> 332// CHECK: %[[OUT_BOUNDS:.*]] = arith.xori %[[IN_BOUNDS]], %[[CT]] : i1 333// CHECK: scf.if %[[OUT_BOUNDS]] { 334// CHECK: %[[VAL_19:.*]] = vector.type_cast %[[TEMP]] 335// CHECK-SAME: : memref<4x8xf32> to memref<vector<4x8xf32>> 336// CHECK: %[[VAL_20:.*]] = memref.load %[[VAL_19]][] 337// CHECK-SAME: : memref<vector<4x8xf32>> 338// CHECK: vector.transfer_write %[[VAL_20]], %[[DEST]][%[[I]], %[[J]]] 339// CHECK-SAME: : vector<4x8xf32>, memref<7x8xf32, #[[MAP0]]> 340// CHECK: } 341// CHECK: return 342// CHECK: } 343 344// LINALG-DAG: #[[MAP0:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)> 345// LINALG-DAG: #[[MAP1:.*]] = affine_map<()[s0] -> (s0 + 4)> 346// LINALG-DAG: #[[MAP2:.*]] = affine_map<()[s0] -> (s0 + 8)> 347// LINALG-DAG: #[[MAP3:.*]] = affine_map<(d0, d1, d2) -> (d0 - d1, 4)> 348// LINALG-DAG: #[[MAP4:.*]] = affine_map<(d0, d1, d2) -> (d0 - d1, 8)> 349// LINALG-DAG: #[[MAP5:.*]] = affine_map<(d0, d1)[s0] -> (d0 * 8 + s0 + d1)> 350// LINALG: func @split_vector_transfer_write_strided_2d( 351// LINALG-SAME: %[[VEC:.*]]: vector<4x8xf32>, 352// LINALG-SAME: %[[DEST:.*]]: memref<7x8xf32, #[[MAP0]]>, 353// LINALG-SAME: %[[I:.*]]: index, 354// LINALG-SAME: %[[J:.*]]: index) { 355// LINALG-DAG: %[[C0:.*]] = arith.constant 0 : index 356// LINALG-DAG: %[[CT:.*]] = arith.constant true 357// LINALG-DAG: %[[C7:.*]] = arith.constant 7 : index 358// LINALG-DAG: %[[C4:.*]] = arith.constant 4 : index 359// LINALG-DAG: %[[C8:.*]] = arith.constant 8 : index 360// LINALG: %[[TEMP:.*]] = memref.alloca() {alignment = 32 : i64} : memref<4x8xf32> 361// LINALG: %[[DIM0:.*]] = affine.apply #[[MAP1]]()[%[[I]]] 362// LINALG: %[[DIM0_IN:.*]] = arith.cmpi sle, %[[DIM0]], %[[C7]] : index 363// LINALG: %[[DIM1:.*]] = affine.apply #[[MAP2]]()[%[[J]]] 364// LINALG: %[[DIM1_IN:.*]] = arith.cmpi sle, %[[DIM1]], %[[C8]] : index 365// LINALG: %[[IN_BOUNDS:.*]] = arith.andi %[[DIM0_IN]], %[[DIM1_IN]] : i1 366// LINALG: %[[IN_BOUND_DEST:.*]]:3 = scf.if %[[IN_BOUNDS]] 367// LINALG-SAME: -> (memref<?x8xf32, #[[MAP0]]>, index, index) { 368// LINALG: %[[VAL_16:.*]] = memref.cast %[[DEST]] 369// LINALG-SAME: : memref<7x8xf32, #[[MAP0]]> to memref<?x8xf32, #[[MAP0]]> 370// LINALG: scf.yield %[[VAL_16]], %[[I]], %[[J]] 371// LINALG-SAME: : memref<?x8xf32, #[[MAP0]]>, index, index 372// LINALG: } else { 373// LINALG: %[[VAL_17:.*]] = memref.cast %[[TEMP]] 374// LINALG-SAME: : memref<4x8xf32> to memref<?x8xf32, #[[MAP0]]> 375// LINALG: scf.yield %[[VAL_17]], %[[C0]], %[[C0]] 376// LINALG-SAME: : memref<?x8xf32, #[[MAP0]]>, index, index 377// LINALG: } 378// LINALG: vector.transfer_write %[[VEC]], 379// LINALG-SAME: %[[IN_BOUND_DEST:.*]]#0 380// LINALG-SAME: [%[[IN_BOUND_DEST]]#1, %[[IN_BOUND_DEST]]#2] 381// LINALG-SAME: {in_bounds = [true, true]} 382// LINALG-SAME: : vector<4x8xf32>, memref<?x8xf32, #[[MAP0]]> 383// LINALG: %[[OUT_BOUNDS:.*]] = arith.xori %[[IN_BOUNDS]], %[[CT]] : i1 384// LINALG: scf.if %[[OUT_BOUNDS]] { 385// LINALG-DAG: %[[VAL_20:.*]] = affine.min #[[MAP3]](%[[C7]], %[[I]], %[[C4]]) 386// LINALG-DAG: %[[VAL_21:.*]] = affine.min #[[MAP4]](%[[C8]], %[[J]], %[[C8]]) 387// LINALG: %[[VAL_22:.*]] = memref.subview %[[TEMP]] 388// LINALG-SAME: [%[[I]], %[[J]]] [%[[VAL_20]], %[[VAL_21]]] 389// LINALG-SAME: [1, 1] : memref<4x8xf32> to memref<?x?xf32, #[[MAP5]]> 390// LINALG: %[[DEST_VIEW:.*]] = memref.subview %[[DEST]][0, 0] [%[[VAL_20]], %[[VAL_21]]] [1, 1] 391// LINALG: memref.copy %[[VAL_22]], %[[DEST_VIEW]] 392// LINALG-SAME: : memref<?x?xf32, #[[MAP5]]> to memref<?x?xf32, #[[MAP0]]> 393// LINALG: } 394// LINALG: return 395// LINALG: } 396 397// ----- 398 399func.func private @fake_side_effecting_fun(%0: vector<2x2xf32>) -> () 400 401// CHECK-LABEL: transfer_read_within_async_execute 402func.func @transfer_read_within_async_execute(%A : memref<?x?xf32>) -> !async.token { 403 %c0 = arith.constant 0 : index 404 %f0 = arith.constant 0.0 : f32 405 // CHECK-NOT: alloca 406 // CHECK: async.execute 407 // CHECK: alloca 408 %token = async.execute { 409 %0 = vector.transfer_read %A[%c0, %c0], %f0 : memref<?x?xf32>, vector<2x2xf32> 410 func.call @fake_side_effecting_fun(%0) : (vector<2x2xf32>) -> () 411 async.yield 412 } 413 return %token : !async.token 414} 415 416// ----- 417 418func.func private @fake_side_effecting_fun(%0: vector<2x2xf32>) -> () 419 420// Ensure that `alloca`s are inserted outside of loops even though loops are 421// consdered allocation scopes. 422// CHECK-LABEL: transfer_read_within_scf_for 423func.func @transfer_read_within_scf_for(%A : memref<?x?xf32>, %lb : index, %ub : index, %step : index) { 424 %c0 = arith.constant 0 : index 425 %f0 = arith.constant 0.0 : f32 426 // CHECK: alloca 427 // CHECK: scf.for 428 // CHECK-NOT: alloca 429 scf.for %i = %lb to %ub step %step { 430 %0 = vector.transfer_read %A[%c0, %c0], %f0 : memref<?x?xf32>, vector<2x2xf32> 431 func.call @fake_side_effecting_fun(%0) : (vector<2x2xf32>) -> () 432 } 433 return 434} 435