1// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs" -split-input-file | FileCheck %s 2 3// Run fuzzer with different seeds. 4// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs analysis-fuzzer-seed=23" -split-input-file -o /dev/null 5// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs analysis-fuzzer-seed=59" -split-input-file -o /dev/null 6// RUN: mlir-opt %s -one-shot-bufferize="bufferize-function-boundaries test-analysis-only allow-return-allocs analysis-fuzzer-seed=91" -split-input-file -o /dev/null 7 8// TODO: Extract op-specific test cases and move them to their respective 9// dialects. 10 11//===----------------------------------------------------------------------===// 12// Simple cases 13//===----------------------------------------------------------------------===// 14 15// ----- 16 17// CHECK-LABEL: func @extract_slice_fun( 18func.func @extract_slice_fun(%A : tensor<?xf32> {bufferization.writable = false}, 19// CHECK-SAME: bufferization.access = "read" 20 %B : tensor<?xf32> {bufferization.writable = true}) 21// CHECK-SAME: bufferization.access = "read" 22 -> (tensor<4xf32>, tensor<8xf32>) 23{ 24 // tensor.extract_slice is not used in a write, it is not compelled to 25 // bufferize out of place. Let callers decide whether they want to create 26 // aliasing subviews at all call sites or whether they allocate. 27 // This is true irrespective of whether the function argument is inplaceable. 28 // CHECK: tensor.extract_slice 29 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 30 %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32> 31 32 // CHECK: tensor.extract_slice 33 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 34 %r1 = tensor.extract_slice %B[0][8][1] : tensor<?xf32> to tensor<8xf32> 35 36 return %r0, %r1: tensor<4xf32>, tensor<8xf32> 37} 38 39// ----- 40 41// CHECK-LABEL: func @insert_slice_fun( 42func.func @insert_slice_fun(%A : tensor<?xf32> {bufferization.writable = false}, 43// CHECK-SAME: bufferization.access = "read" 44 %B : tensor<?xf32> {bufferization.writable = true}, 45// CHECK-SAME: bufferization.access = "read-write" 46 %C : tensor<4xf32> {bufferization.writable = false}) 47// CHECK-SAME: bufferization.access = "read" 48 -> (tensor<?xf32>, tensor<?xf32>) 49{ 50 // must bufferize out of place. 51 // CHECK: tensor.insert_slice 52 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false"]} 53 %r0 = tensor.insert_slice %C into %A[0][4][1] : tensor<4xf32> into tensor<?xf32> 54 55 // bufferizes inplace. 56 // CHECK: tensor.insert_slice 57 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} 58 %r1 = tensor.insert_slice %C into %B[0][4][1] : tensor<4xf32> into tensor<?xf32> 59 60 // CHECK: return 61 // CHECK-SAME: __equivalent_func_args__ = [-1, 1] 62 return %r0, %r1: tensor<?xf32>, tensor<?xf32> 63} 64 65// ----- 66 67// CHECK-LABEL: func @conflict_on_B( 68func.func @conflict_on_B(%A : tensor<4x4xf32> {bufferization.writable = true}, 69// CHECK-SAME: bufferization.access = "read" 70 %B : tensor<4x4xf32> {bufferization.writable = true}) 71// CHECK-SAME: bufferization.access = "read-write" 72 -> (tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32>) 73{ 74 // matmul output operand interferes with input operand. 75 // CHECK: linalg.matmul 76 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} 77 %C = linalg.matmul ins(%A, %B: tensor<4x4xf32>, tensor<4x4xf32>) 78 outs(%B: tensor<4x4xf32>) 79 -> tensor<4x4xf32> 80 81 // matmul output operand interferes with input operand. 82 // CHECK: linalg.matmul 83 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} 84 %D = linalg.matmul ins(%B, %A: tensor<4x4xf32>, tensor<4x4xf32>) 85 outs(%B: tensor<4x4xf32>) 86 -> tensor<4x4xf32> 87 88 // matmul output operand does not interferes with input operand. 89 // CHECK: linalg.matmul 90 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} 91 %E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>) 92 outs(%B: tensor<4x4xf32>) 93 -> tensor<4x4xf32> 94 95 // CHECK: return 96 // CHECK-SAME: __equivalent_func_args__ = [-1, -1, 1] 97 return %C, %D, %E: tensor<4x4xf32>, tensor<4x4xf32>, tensor<4x4xf32> 98} 99 100//===----------------------------------------------------------------------===// 101// Length-1 producer-consumer cases. 102//===----------------------------------------------------------------------===// 103 104// ----- 105 106// CHECK-LABEL: func @extract_slice_extract_slice( 107func.func @extract_slice_extract_slice( 108 %A : tensor<?xf32> {bufferization.writable = true}, 109// CHECK-SAME: bufferization.access = "read" 110 %B : tensor<?xf32> {bufferization.writable = false}) 111// CHECK-SAME: bufferization.access = "read" 112 -> (tensor<2xf32>, tensor<2xf32>) 113{ 114 // tensor.extract_slice is not used in a write, it is not compelled to 115 // bufferize out of place. Let callers decide whether they want to create 116 // aliasing subviews at all call sites or whether they allocate. 117 // This is true irrespective of whether the function argument is inplaceable. 118 // CHECK: {__inplace_operands_attr__ = ["true"]} 119 %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32> 120 121 // CHECK: {__inplace_operands_attr__ = ["true"]} 122 %r1 = tensor.extract_slice %r0[0][2][1] : tensor<4xf32> to tensor<2xf32> 123 124 // CHECK: {__inplace_operands_attr__ = ["true"]} 125 %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32> 126 127 // CHECK: {__inplace_operands_attr__ = ["true"]} 128 %r3 = tensor.extract_slice %r2[0][2][1] : tensor<4xf32> to tensor<2xf32> 129 130 return %r1, %r3: tensor<2xf32>, tensor<2xf32> 131} 132 133// ----- 134 135// CHECK-LABEL: func @insert_slice_insert_slice( 136func.func @insert_slice_insert_slice( 137 %A : tensor<?xf32> {bufferization.writable = true}, 138// CHECK-SAME: bufferization.access = "read-write" 139 %A2 : tensor<4xf32> {bufferization.writable = true}, 140// CHECK-SAME: bufferization.access = "read-write" 141 %A3 : tensor<2xf32> {bufferization.writable = true}, 142// CHECK-SAME: bufferization.access = "read" 143 %B : tensor<?xf32> {bufferization.writable = false}, 144// CHECK-SAME: bufferization.access = "read" 145 %B2 : tensor<4xf32> {bufferization.writable = false}, 146// CHECK-SAME: bufferization.access = "read" 147 %B3 : tensor<2xf32> {bufferization.writable = false}) 148// CHECK-SAME: bufferization.access = "read" 149 -> (tensor<?xf32>, tensor<?xf32>) 150{ 151 // CHECK: {__inplace_operands_attr__ = ["true", "true"]} 152 %r0 = tensor.insert_slice %A3 into %A2[0][2][1] : tensor<2xf32> into tensor<4xf32> 153 154 // CHECK: {__inplace_operands_attr__ = ["true", "true"]} 155 %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32> 156 157 // CHECK: {__inplace_operands_attr__ = ["true", "false"]} 158 %r2 = tensor.insert_slice %B3 into %B2[0][2][1] : tensor<2xf32> into tensor<4xf32> 159 160 // CHECK: {__inplace_operands_attr__ = ["true", "false"]} 161 %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32> 162 163 // CHECK: return 164 // CHECK-SAME: __equivalent_func_args__ = [0, -1] 165 return %r1, %r3: tensor<?xf32>, tensor<?xf32> 166} 167 168// ----- 169 170// CHECK-LABEL: func @extract_slice_nonmatching_insert_slice 171func.func @extract_slice_nonmatching_insert_slice( 172 %A : tensor<?xf32> {bufferization.writable = true}, 173 %B : tensor<?xf32> {bufferization.writable = false}, 174 %idx: index) 175 -> (tensor<?xf32>, tensor<?xf32>) 176{ 177 // %r1 bufferizes inplace because %A is inplaceable. 178 // %r0 is an overlapping tensor.extract_slice that does not match, it must be 179 // out of place. 180 // CHECK: tensor.extract_slice 181 // CHECK-SAME: {__inplace_operands_attr__ = ["false"]} 182 %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32> 183 184 // %r1 can bufferize inplace fine. 185 // CHECK: tensor.insert_slice 186 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]} 187 %r1 = tensor.insert_slice %r0 into %A[%idx][4][1] : tensor<4xf32> into tensor<?xf32> 188 189 // %r3 does bufferizes inplace because %B is not inplaceable. 190 // %r0 is an overlapping tensor.extract_slice that does not match, but does 191 // not alias with the buffer coming from %r3 so it can actually bufferize 192 // inplace. 193 // CHECK: tensor.extract_slice 194 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 195 %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32> 196 197 // %r3 cannot bufferize inplace since %B is not inplaceable. 198 // CHECK: tensor.insert_slice 199 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none"]} 200 %r3 = tensor.insert_slice %r2 into %B[%idx][4][1] : tensor<4xf32> into tensor<?xf32> 201 202 // CHECK: return 203 // CHECK-SAME: __equivalent_func_args__ = [0, -1] 204 return %r1, %r3: tensor<?xf32>, tensor<?xf32> 205} 206 207// ----- 208 209// CHECK-LABEL: func @extract_slice_matching_insert_slice 210func.func @extract_slice_matching_insert_slice( 211 %A : tensor<?xf32> {bufferization.writable = true}, 212 %B : tensor<?xf32> {bufferization.writable = false}) 213 -> (tensor<?xf32>, tensor<?xf32>) 214{ 215 // %r1 bufferizes inplace because %A is inplaceable. 216 // %r0 is a tensor.extract_slice that matches, it can also be bufferized 217 // inplace. 218 // CHECK: tensor.extract_slice 219 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 220 %r0 = tensor.extract_slice %A[0][4][1] : tensor<?xf32> to tensor<4xf32> 221 222 // CHECK: tensor.insert_slice 223 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} 224 %r1 = tensor.insert_slice %r0 into %A[0][4][1] : tensor<4xf32> into tensor<?xf32> 225 226 // %r2 is a tensor.extract_slice that matches %r3, it can be bufferized 227 // inplace. 228 // CHECK: tensor.extract_slice 229 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 230 %r2 = tensor.extract_slice %B[0][4][1] : tensor<?xf32> to tensor<4xf32> 231 232 // tensor.insert_slice cannot bufferize inplace. 233 // This should have been captured by a canonicalization pattern and it would 234 // be unproductive to have special logic in bufferization to encode matching 235 // insert_slice(extract_slice(A), A). 236 // CHECK: tensor.insert_slice 237 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false"]} 238 %r3 = tensor.insert_slice %r2 into %B[0][4][1] : tensor<4xf32> into tensor<?xf32> 239 240 // CHECK: return 241 // CHECK-SAME: __equivalent_func_args__ = [0, -1] 242 return %r1, %r3: tensor<?xf32>, tensor<?xf32> 243} 244 245// ----- 246 247// CHECK-LABEL: @read_of_matching_insert_slice_source 248func.func @read_of_matching_insert_slice_source( 249 %A : tensor<?xf32> {bufferization.writable = true}, 250 %idx : index, 251 %idx2 : index) 252 -> (tensor<?xf32>, vector<5xf32>) 253{ 254 %cst = arith.constant 0.0 : f32 255 %cst2 = arith.constant 1.0 : f32 256 257 // CHECK: tensor.extract_slice 258 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} 259 %0 = tensor.extract_slice %A[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32> 260 261 // CHECK: linalg.fill 262 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} 263 %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?xf32>) -> tensor<?xf32> 264 265 // CHECK: tensor.insert_slice 266 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} 267 %2 = tensor.insert_slice %1 into %A[%idx][%idx][1] : tensor<?xf32> into tensor<?xf32> 268 269 %3 = vector.transfer_read %1[%idx2], %cst2 : tensor<?xf32>, vector<5xf32> 270 271 // CHECK: return 272 // CHECK-SAME: __equivalent_func_args__ = [0, -1] 273 return %2, %3 : tensor<?xf32>, vector<5xf32> 274} 275 276// ----- 277 278// CHECK-LABEL: @read_of_matching_insert_slice_source_interleaved 279func.func @read_of_matching_insert_slice_source_interleaved( 280 %A : tensor<?xf32> {bufferization.writable = true}, 281 %idx : index, 282 %idx2 : index, 283 %idx3 : index) 284 -> (tensor<?xf32>, vector<5xf32>) 285{ 286 %cst = arith.constant 0.0 : f32 287 %cst2 = arith.constant 1.0 : f32 288 289 // CHECK: tensor.extract_slice 290 // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]} 291 %0 = tensor.extract_slice %A[%idx][%idx][1] : tensor<?xf32> to tensor<?xf32> 292 293 // CHECK: linalg.fill 294 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} 295 %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<?xf32>) -> tensor<?xf32> 296 297 // CHECK: tensor.insert_slice 298 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} 299 %2 = tensor.insert_slice %1 into %A[%idx][%idx][1] : tensor<?xf32> into tensor<?xf32> 300 301 // CHECK: tensor.extract_slice 302 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} 303 %4 = tensor.extract_slice %2[%idx3][%idx3][1] : tensor<?xf32> to tensor<?xf32> 304 305 // CHECK: linalg.fill 306 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} 307 %5 = linalg.fill ins(%cst : f32) outs(%4 : tensor<?xf32>) -> tensor<?xf32> 308 309 %3 = vector.transfer_read %1[%idx2], %cst2 : tensor<?xf32>, vector<5xf32> 310 311 // CHECK: tensor.insert_slice 312 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} 313 %6 = tensor.insert_slice %5 into %2[%idx3][%idx3][1] : tensor<?xf32> into tensor<?xf32> 314 315 // CHECK: return 316 // CHECK-SAME: __equivalent_func_args__ = [0, -1] 317 return %6, %3 : tensor<?xf32>, vector<5xf32> 318} 319 320// ----- 321 322// CHECK-LABEL: func @extract_slice_linalg_readonly_use 323func.func @extract_slice_linalg_readonly_use( 324 %A : tensor<?x?xf32> {bufferization.writable = false}, 325 %B : tensor<4x4xf32> {bufferization.writable = false}, 326 %C : tensor<4x4xf32> {bufferization.writable = true}) 327 -> (tensor<4x4xf32>, tensor<4x4xf32>) 328{ 329 // tensor.extract_slice is only used as a read, no interference irrespective 330 // of user's inplace status. 331 // CHECK: tensor.extract_slice 332 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 333 %sA = tensor.extract_slice %A[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32> 334 335 // matmul output operand is not inplaceable at the function boundary. 336 // CHECK: linalg.matmul 337 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} 338 %D = linalg.matmul ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>) 339 outs(%B: tensor<4x4xf32>) 340 -> tensor<4x4xf32> 341 342 // matmul output operand is inplaceable at the function boundary. 343 // CHECK: linalg.matmul 344 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} 345 %E = linalg.matmul ins(%sA, %B: tensor<4x4xf32>, tensor<4x4xf32>) 346 outs(%C: tensor<4x4xf32>) 347 -> tensor<4x4xf32> 348 349 // CHECK: return 350 // CHECK-SAME: __equivalent_func_args__ = [-1, 2] 351 return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> 352} 353 354// ----- 355 356// CHECK-LABEL: func @extract_slice_to_linalg_write_use 357func.func @extract_slice_to_linalg_write_use( 358 %A : tensor<4x4xf32> {bufferization.writable = false}, 359 %B : tensor<?x?xf32> {bufferization.writable = false}, 360 %C : tensor<?x?xf32> {bufferization.writable = true}) 361 -> (tensor<4x4xf32>, tensor<4x4xf32>) 362{ 363 // Step 4. %sB forward propagates to a write in %D but it is not inplace. 364 // So this is only ever read and can bufferize inplace. 365 // CHECK: tensor.extract_slice 366 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 367 %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32> 368 369 // Step 3. %sB has a read interference in %E, it does not bufferize inplace. 370 // CHECK: linalg.matmul 371 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"]} 372 %D = linalg.matmul ins(%B, %C: tensor<?x?xf32>, tensor<?x?xf32>) 373 outs(%sB: tensor<4x4xf32>) 374 -> tensor<4x4xf32> 375 376 // Step 2. %sC forward propagates to an inplace write in %E. 377 // %sC backward propagates to %C which is inplaceable. 378 // As a consequence this is bufferized inplace. 379 // CHECK: tensor.extract_slice 380 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 381 %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32> 382 383 // Step 1. %sC backprops to the tensor.extract_slice producer which is not 384 // considered an interference. This bufferizes inplace. 385 // CHECK: linalg.matmul 386 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} 387 %E = linalg.matmul ins(%A, %sB: tensor<4x4xf32>, tensor<4x4xf32>) 388 outs(%sC: tensor<4x4xf32>) 389 -> tensor<4x4xf32> 390 391 return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> 392} 393 394// ----- 395 396// CHECK-LABEL: func @insert_slice_double_extract_slice 397func.func @insert_slice_double_extract_slice( 398 %s1: index, 399 %s2: index, 400 %s3: index, 401 %s4: index, 402 %A: tensor<8x6xf32> {bufferization.writable = false}, 403 %B: tensor<6x6xf32> {bufferization.writable = false}, 404 %C: tensor<30x20xf32> {bufferization.writable = true}) 405 -> tensor<30x20xf32> 406{ 407 // CHECK: tensor.extract_slice 408 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none", "none", "none"]} 409 %15 = tensor.extract_slice %C[%s3, %s4] [%s1, %s2] [1, 1] : tensor<30x20xf32> to tensor<?x?xf32> 410 411 // CHECK: linalg.matmul 412 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} 413 %18 = linalg.matmul ins(%A, %B : tensor<8x6xf32>, tensor<6x6xf32>) outs(%15 : tensor<?x?xf32>) -> tensor<?x?xf32> 414 415 // CHECK: tensor.extract_slice 416 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} 417 %19 = tensor.extract_slice %18[0, 0] [%s1, %s2] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32> 418 419 // CHECK: tensor.insert_slice 420 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none", "none", "none"]} 421 %20 = tensor.insert_slice %19 into %C[%s3, %s4] [%s1, %s2] [1, 1] : tensor<?x?xf32> into tensor<30x20xf32> 422 423 // CHECK: return 424 // CHECK-SAME: __equivalent_func_args__ = [6] 425 return %20 : tensor<30x20xf32> 426} 427 428//===----------------------------------------------------------------------===// 429// Transitive cases 430//===----------------------------------------------------------------------===// 431 432// ----- 433 434// CHECK-LABEL: func @extract_slice_to_linalg_write_use 435func.func @extract_slice_to_linalg_write_use( 436 %A : tensor<4x4xf32> {bufferization.writable = false}, 437 %B : tensor<?x?xf32> {bufferization.writable = false}, 438 %C : tensor<?x?xf32> {bufferization.writable = true}) 439 -> (tensor<4x4xf32>, tensor<4x4xf32>) 440{ 441 // Step 4. %sB forward propagates to an inplace write in %D. 442 // %sB backward propagates to %B which is not inplaceable. 443 // As a consequence this is bufferized out of place. 444 // CHECK: tensor.extract_slice 445 // CHECK-SAME: {__inplace_operands_attr__ = ["false"]} 446 %sB = tensor.extract_slice %B[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32> 447 448 // Step 3. %sB backprops to the tensor.extract_slice producer which is not 449 // considered an interference. This bufferizes inplace. 450 // CHECK: linalg.matmul 451 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} 452 %D = linalg.matmul ins(%B, %C: tensor<?x?xf32>, tensor<?x?xf32>) 453 outs(%sB: tensor<4x4xf32>) 454 -> tensor<4x4xf32> 455 456 // Step 2. %sC forward propagates to an inplace write in %E. 457 // %sC backward propagates to %C which is inplaceable. 458 // As a consequence this is bufferized inplace. 459 // CHECK: tensor.extract_slice 460 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 461 %sC = tensor.extract_slice %C[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32> 462 463 // Step 1. %sC backprops to the tensor.extract_slice producer which is not 464 // considered an interference. This bufferizes inplace. 465 // CHECK: linalg.matmul 466 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} 467 %E = linalg.matmul ins(%A, %A: tensor<4x4xf32>, tensor<4x4xf32>) 468 outs(%sC: tensor<4x4xf32>) 469 -> tensor<4x4xf32> 470 471 return %D, %E: tensor<4x4xf32>, tensor<4x4xf32> 472} 473 474// ----- 475 476// CHECK-LABEL: func @nested_extract_slice_and_insert 477func.func @nested_extract_slice_and_insert( 478 %A : tensor<?x?xf32> {bufferization.writable = false}, 479 %B : tensor<?x?xf32> {bufferization.writable = true}, 480 %C : tensor<?x?xf32> {bufferization.writable = true}, 481 %idx : index, 482 %sz1 : index, 483 %sz2 : index) 484 -> (tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32>) 485{ 486 %f0 = arith.constant 0.0 : f32 487 488 // 2-level matching tensor.extract_slice / tensor.insert_slice into non 489 // inplaceable %A. 490 // - %rA is not inplaceable because %A is not inplaceable at function boundary. 491 // - once %rA is deemed not inplaceable, nothing prevent %rsA to be inplaceable 492 // - this propagates to %FA and %ssA being inplaceable. 493 // - %sA would then bufferize to an inplace write (i.e. %FA) but %A is not 494 // inplaceable and so %sA is not inplaceable. 495 // CHECK: tensor.extract_slice 496 // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]} 497 // CHECK-NEXT: tensor.extract_slice 498 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 499 // CHECK-NEXT: fill 500 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} 501 // CHECK-NEXT: tensor.insert_slice 502 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} 503 // CHECK-NEXT: tensor.insert_slice 504 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "false", "none", "none"]} 505 %sA = tensor.extract_slice %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32> 506 %ssA = tensor.extract_slice %sA[0, 0][4, 4][1, 1] : tensor<?x?xf32> to tensor<4x4xf32> 507 %FA = linalg.fill ins(%f0 : f32) outs(%ssA : tensor<4x4xf32>) -> tensor<4x4xf32> 508 %rsA = tensor.insert_slice %FA into %sA[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<?x?xf32> 509 %rA = tensor.insert_slice %rsA into %A[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32> 510 511 // 3-level matching tensor.extract_slice / tensor.insert_slice into 512 // inplaceable %B. 513 // CHECK-NEXT: tensor.extract_slice 514 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} 515 // CHECK-NEXT: tensor.extract_slice 516 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]} 517 // CHECK-NEXT: tensor.extract_slice 518 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 519 // CHECK-NEXT: fill 520 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} 521 // CHECK-NEXT: tensor.insert_slice 522 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} 523 // CHECK-NEXT: tensor.insert_slice 524 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]} 525 // CHECK-NEXT: tensor.insert_slice 526 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} 527 %sB = tensor.extract_slice %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32> 528 %ssB = tensor.extract_slice %sB[0, 0][4, %idx][1, 1] : tensor<?x?xf32> to tensor<4x?xf32> 529 %sssB = tensor.extract_slice %ssB[0, 0][4, 4][1, 1] : tensor<4x?xf32> to tensor<4x4xf32> 530 %FB = linalg.fill ins(%f0 : f32) outs(%sssB : tensor<4x4xf32>) -> tensor<4x4xf32> 531 %rssB = tensor.insert_slice %FB into %ssB[0, 0][4, 4][1, 1] : tensor<4x4xf32> into tensor<4x?xf32> 532 %rsB = tensor.insert_slice %rssB into %sB[0, 0][4, %idx][1, 1] : tensor<4x?xf32> into tensor<?x?xf32> 533 %rB = tensor.insert_slice %rsB into %B[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32> 534 535 // 2-level matching tensor.extract_slice / tensor.insert_slice into 536 // inplaceable %C with a twist. 537 // Throw a wrench in the system: %rsC production sizes do not match %ssC. 538 // CHECK-NEXT: tensor.extract_slice 539 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none", "none"]} 540 // The tensor.insert_slice that would be candidate for matching does not actually 541 // match. That tensor.insert_slice can still be bufferized inplace nonetheless 542 // but this tensor.extract_slice, which bufferizes to an inplace write, cannot. 543 // CHECK-NEXT: tensor.extract_slice 544 // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none"]} 545 // CHECK-NEXT: fill 546 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} 547 // CHECK-NEXT: tensor.insert_slice 548 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none"]} 549 // CHECK-NEXT: tensor.insert_slice 550 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} 551 %sC = tensor.extract_slice %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> to tensor<?x?xf32> 552 %ssC = tensor.extract_slice %sC[0, 0][%sz1, 4][1, 1] : tensor<?x?xf32> to tensor<?x4xf32> 553 %FC = linalg.fill ins(%f0 : f32) outs(%ssC : tensor<?x4xf32>) -> tensor<?x4xf32> 554 %rsC = tensor.insert_slice %FC into %sC[0, 0][%sz2, 4][1, 1] : tensor<?x4xf32> into tensor<?x?xf32> 555 %rC = tensor.insert_slice %rsC into %C[0, 0][%idx, %idx][1, 1] : tensor<?x?xf32> into tensor<?x?xf32> 556 557 // CHECK: return 558 // CHECK-SAME: __equivalent_func_args__ = [-1, 1, 2] 559 return %rA, %rB, %rC: tensor<?x?xf32>, tensor<?x?xf32>, tensor<?x?xf32> 560} 561 562// ----- 563 564//===----------------------------------------------------------------------===// 565// Cross function boundary cases. 566//===----------------------------------------------------------------------===// 567 568func.func private @foo(tensor<64xf32>) 569 570// CHECK-LABEL: dependence_through_call 571func.func @dependence_through_call(%I : tensor<64xf32> {bufferization.writable = true}) { 572 %f1 = arith.constant 1.000000e+00 : f32 573 %f2 = arith.constant 2.000000e+00 : f32 574 575 // 2. %B already bufferizes inplace, %A would alias and have a different 576 // value. The calls to `foo` are determined to read conservatively, so %A 577 // cannot bufferize inplace. 578 // CHECK: fill 579 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} 580 %A = linalg.fill ins(%f1 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32> 581 582 // 1. Bufferizes inplace: no alias to %A is yet possible. 583 // CHECK: fill 584 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} 585 %B = linalg.fill ins(%f2 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32> 586 587 call @foo(%A) : (tensor<64xf32>) -> () 588 call @foo(%B) : (tensor<64xf32>) -> () 589 590 return 591} 592 593// ----- 594 595func.func private @foo(tensor<64xf32>) 596 597func.func private @bar(%A : tensor<64xf32>) { 598 call @foo(%A) : (tensor<64xf32>) -> () 599 return 600} 601 602func.func @read_dependence_through_scf_and_call( 603 %I : tensor<64xf32> {bufferization.writable = true}, 604 %I2 : tensor<64xf32> {bufferization.writable = true}) { 605 %c0 = arith.constant 0 : index 606 %c1 = arith.constant 1 : index 607 %c10 = arith.constant 10 : index 608 %f1 = arith.constant 1.000000e+00 : f32 609 %f2 = arith.constant 2.000000e+00 : f32 610 611 // 5. %B bufferizes inplace, %A would alias and have a different value. 612 // The calls to `foo` are determined to read conservatively, so %A cannot 613 // bufferize inplace. 614 // CHECK: fill 615 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} 616 %A = linalg.fill ins(%f1 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32> 617 618 // 4. Bufferizes inplace: no alias to %A is yet possible. 619 // CHECK: fill 620 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} 621 %B = linalg.fill ins(%f2 : f32) outs(%I : tensor<64xf32>) -> tensor<64xf32> 622 623 // 3. Does not read or write, bufferizes inplace. 624 // CHECK: scf.for 625 // CHECK-NEXT: scf.yield 626 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"]} 627 // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true", "true"]} 628 %r:2 = scf.for %i = %c0 to %c10 step %c1 iter_args(%0 = %A, %1 = %B) 629 -> (tensor<64xf32>, tensor<64xf32>) 630 { 631 scf.yield %0, %1 : tensor<64xf32>, tensor<64xf32> 632 } 633 call @foo(%r#0) : (tensor<64xf32>) -> () 634 call @foo(%r#1) : (tensor<64xf32>) -> () 635 636 // 2. %B2 already bufferizes inplace, %A2 would alias and have a different 637 // value. The calls to `foo` are determined to read conservatively, so %A2 638 // cannot bufferize inplace. 639 // CHECK: fill 640 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} 641 %A2 = linalg.fill ins(%f1 : f32) outs(%I2 : tensor<64xf32>) -> tensor<64xf32> 642 643 // 1. Bufferizes inplace: no alias to %A2 is yet possible. 644 // CHECK: fill 645 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} 646 %B2 = linalg.fill ins(%f2 : f32) outs(%I2 : tensor<64xf32>) -> tensor<64xf32> 647 648 call @bar(%A2) : (tensor<64xf32>) -> () 649 call @bar(%B2) : (tensor<64xf32>) -> () 650 return 651} 652 653// ----- 654 655//===----------------------------------------------------------------------===// 656// Transitive cases through extract_slice. 657//===----------------------------------------------------------------------===// 658 659// CHECK-LABEL: func @write_into_constant_via_alias 660func.func @write_into_constant_via_alias(%v : vector<5xi32>, 661 %s1 : index, %s2 : index, 662 %s3 : index) -> tensor<?xi32> { 663 %A = arith.constant dense<[1, 2, 3, 4]> : tensor<4xi32> 664 // CHECK: tensor.extract_slice 665 // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none"]} 666 %b = tensor.extract_slice %A[%s1][%s2][1] : tensor<4xi32> to tensor<?xi32> 667 // CHECK: vector.transfer_write 668 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]} 669 %r = vector.transfer_write %v, %b[%s3] : vector<5xi32>, tensor<?xi32> 670 return %r : tensor<?xi32> 671} 672 673// ----- 674 675func.func @matmul_on_tensors( 676 %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false}, 677 %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false}, 678 %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true}) 679 -> tensor<256x256xf32> 680{ 681 %c0 = arith.constant 0 : index 682 %cst_0 = arith.constant 0.000000e+00 : f32 683 %cst_1 = arith.constant 1.000000e+00 : f32 684 685 %7 = bufferization.alloc_tensor() : tensor<256x256xf32> 686 687 // CHECK: linalg.fill 688 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} 689 // CHECK: linalg.fill 690 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} 691 %8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> 692 %11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> 693 694 // CHECK: tensor.extract_slice 695 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 696 // CHECK: tensor.extract_slice 697 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 698 // CHECK: linalg.matmul 699 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} 700 %sA = tensor.extract_slice %8[0, 0][256, 16][1, 1]: tensor<256x256xf32> to tensor<256x16xf32> 701 %sB = tensor.extract_slice %11[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32> 702 %r = linalg.matmul 703 ins(%sA, %sB : tensor<256x16xf32>, tensor<16x256xf32>) 704 outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> 705 706 // CHECK: return 707 // CHECK-SAME: __equivalent_func_args__ = [2] 708 return %r : tensor<256x256xf32> 709} 710 711// ----- 712 713func.func @matmul_on_tensors( 714 %arg0: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false}, 715 %arg1: tensor<518x518xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false}, 716 %arg2: tensor<256x256xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true}) 717 -> tensor<256x256xf32> 718{ 719 %c0 = arith.constant 0 : index 720 %cst_0 = arith.constant 0.000000e+00 : f32 721 %cst_1 = arith.constant 1.000000e+00 : f32 722 723 %7 = bufferization.alloc_tensor() : tensor<256x256xf32> 724 725 // CHECK: linalg.fill 726 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false"]} 727 // CHECK: vector.transfer_write 728 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] 729 %8 = linalg.fill ins(%cst_0 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> 730 %9 = vector.transfer_read %arg0[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32> 731 %10 = vector.transfer_write %9, %8[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32> 732 733 // CHECK: linalg.fill 734 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"]} 735 // CHECK: vector.transfer_write 736 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] 737 %11 = linalg.fill ins(%cst_1 : f32) outs(%7 : tensor<256x256xf32>) -> tensor<256x256xf32> 738 %12 = vector.transfer_read %arg1[%c0, %c0], %cst_0 {in_bounds = [false, true]} : tensor<518x518xf32>, vector<256x256xf32> 739 %13 = vector.transfer_write %12, %11[%c0, %c0] {in_bounds = [true, true]} : vector<256x256xf32>, tensor<256x256xf32> 740 741 // CHECK: tensor.extract_slice 742 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 743 // CHECK: tensor.extract_slice 744 // CHECK-SAME: {__inplace_operands_attr__ = ["true"]} 745 // CHECK: linalg.matmul 746 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "true"]} 747 %sA = tensor.extract_slice %10[0, 0][256, 16][1, 1]: tensor<256x256xf32> to tensor<256x16xf32> 748 %sB = tensor.extract_slice %13[0, 0][16, 256][1, 1]: tensor<256x256xf32> to tensor<16x256xf32> 749 %r = linalg.matmul 750 ins(%sA, %sB : tensor<256x16xf32>, tensor<16x256xf32>) 751 outs(%arg2 : tensor<256x256xf32>) -> tensor<256x256xf32> 752 753 // CHECK: return 754 // CHECK-SAME: __equivalent_func_args__ = [2] 755 return %r : tensor<256x256xf32> 756} 757 758// ----- 759 760//===----------------------------------------------------------------------===// 761// Chain of tensor.insert_slice is better traversed in reverse order without 762// prioritizing the tensor.insert_slice ops. 763//===----------------------------------------------------------------------===// 764 765// CHECK-LABEL: func @insert_slice_chain( 766func.func @insert_slice_chain( 767 %v1: vector<32x90xf32>, 768 %v2: vector<30x90xf32>, 769 %arg0: tensor<62x126xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false}, 770// CHECK-SAME: bufferization.access = "none" 771 %arg1: tensor<126x90xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = false}, 772// CHECK-SAME: bufferization.access = "none" 773 %arg2: tensor<62x90xf32> {bufferization.buffer_layout = affine_map<(d0, d1) -> (d0, d1)>, bufferization.writable = true}) 774// CHECK-SAME: bufferization.access = "write" 775 -> tensor<62x90xf32> attributes {passthrough = [["target-cpu", "skylake-avx512"], ["prefer-vector-width", "512"]]} 776{ 777 %c0 = arith.constant 0 : index 778 %cst = arith.constant 0.000000e+00 : f32 779 780 // CHECK: linalg.fill 781 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true"] 782 %0 = linalg.fill ins(%cst : f32) outs(%arg2 : tensor<62x90xf32>) -> tensor<62x90xf32> 783 784 // CHECK: tensor.extract_slice 785 // CHECK-SAME: {__inplace_operands_attr__ = ["true"] 786 %2 = tensor.extract_slice %0[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> 787 // CHECK: vector.transfer_write 788 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] 789 %7 = vector.transfer_write %v1, %2[%c0, %c0] {in_bounds = [true, true]} : vector<32x90xf32>, tensor<32x90xf32> 790 // CHECK: tensor.insert_slice 791 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] 792 %8 = tensor.insert_slice %7 into %0[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> 793 794 // CHECK: tensor.extract_slice 795 // CHECK-SAME: {__inplace_operands_attr__ = ["true"] 796 %10 = tensor.extract_slice %8[32, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32> 797 // CHECK: vector.transfer_write 798 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] 799 %14 = vector.transfer_write %v2, %10[%c0, %c0] {in_bounds = [true, true]} : vector<30x90xf32>, tensor<30x90xf32> 800 // CHECK: tensor.insert_slice 801 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] 802 %15 = tensor.insert_slice %14 into %8[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32> 803 804 // CHECK: return 805 // CHECK-SAME: __equivalent_func_args__ = [4] 806 return %15 : tensor<62x90xf32> 807} 808 809// ----- 810 811//===----------------------------------------------------------------------===// 812// Insert point issue cases. 813//===----------------------------------------------------------------------===// 814 815// Only test IR validity wrt dominance. 816// CHECK-LABEL: func @ip 817func.func @ip(%t: tensor<10x20xf32> {bufferization.writable = true}, 818 %x: index, %y: index, %v: vector<5x6xf32>) 819 -> tensor<10x20xf32> 820{ 821 %c0 = arith.constant 0 : index 822 %c256 = arith.constant 256 : index 823 %c257 = arith.constant 257 : index 824 %r = scf.for %arg0 = %c0 to %c257 step %c256 iter_args(%arg1 = %t) -> (tensor<10x20xf32>) { 825 %t1 = tensor.extract_slice %arg1[%x, 0] [5, %y] [1, 1] : tensor<10x20xf32> to tensor<5x?xf32> 826 %t11 = tensor.extract_slice %t1[0, 0] [5, %y] [1, 1] : tensor<5x?xf32> to tensor<5x?xf32> 827 %t2 = vector.transfer_write %v, %t11[%c0, %c0] : vector<5x6xf32>, tensor<5x?xf32> 828 %t3 = tensor.insert_slice %t2 into %arg1[%x, 0] [5, %y] [1, 1] : tensor<5x?xf32> into tensor<10x20xf32> 829 scf.yield %t3 : tensor<10x20xf32> 830 } 831 832 // CHECK: return 833 // CHECK-SAME: __equivalent_func_args__ = [0] 834 return %r : tensor<10x20xf32> 835} 836 837// ----- 838 839#accesses = [ 840 affine_map<(i) -> (i)>, 841 affine_map<(i) -> (i)>, 842 affine_map<(i) -> (i)> 843] 844#trait = { 845 indexing_maps = #accesses, 846 iterator_types = ["parallel"] 847} 848 849// CHECK-LABEL: func @linalg_op_same_out_tensors( 850func.func @linalg_op_same_out_tensors( 851 %t1: tensor<?xf32> {bufferization.writable = true}, 852// CHECK-SAME: bufferization.access = "read" 853 %t2: tensor<?xf32> {bufferization.writable = true}) 854// CHECK-SAME: bufferization.access = "write" 855 -> (tensor<?xf32>, tensor<?xf32>){ 856 857 // CHECK: linalg.generic 858 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false"] 859 %o:2 = linalg.generic #trait ins(%t1 : tensor<?xf32>) 860 outs (%t2, %t2 : tensor<?xf32>, tensor<?xf32>) { 861 ^bb(%0: f32, %1: f32, %2 : f32) : 862 linalg.yield %0, %0 : f32, f32 863 } -> (tensor<?xf32>, tensor<?xf32>) 864 865 // CHECK: return 866 // CHECK-SAME: __equivalent_func_args__ = [1, -1] 867 return %o#0, %o#1 : tensor<?xf32>, tensor<?xf32> 868} 869 870// ----- 871 872#accesses = [ 873 affine_map<(i) -> (i)>, 874 affine_map<(i) -> (i)>, 875 affine_map<(i) -> (i)>, 876 affine_map<(i) -> (i)> 877] 878#trait = { 879 indexing_maps = #accesses, 880 iterator_types = ["parallel"] 881} 882 883// CHECK-LABEL: func @linalg_op_same_out_tensors_2( 884func.func @linalg_op_same_out_tensors_2( 885 %t1: tensor<?xf32> {bufferization.writable = true}, 886// CHECK-SAME: bufferization.access = "read" 887 %t2: tensor<?xf32> {bufferization.writable = true}) 888// CHECK-SAME: bufferization.access = "write" 889 -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>){ 890 891 // CHECK: linalg.generic 892 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "false", "false"] 893 %o:3 = linalg.generic #trait 894 ins(%t1 : tensor<?xf32>) 895 outs (%t2, %t2, %t2 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) { 896 ^bb(%0: f32, %1: f32, %2 : f32, %3 : f32) : 897 linalg.yield %0, %0, %0 : f32, f32, f32 898 } -> (tensor<?xf32>, tensor<?xf32>, tensor<?xf32>) 899 900 // CHECK: return 901 // CHECK-SAME: __equivalent_func_args__ = [1, -1, -1] 902 return %o#0, %o#1, %o#2 : tensor<?xf32>, tensor<?xf32>, tensor<?xf32> 903} 904 905// ----- 906 907// CHECK-LABEL: func @double_insert_slice_into_alias 908func.func @double_insert_slice_into_alias( 909 %v1: vector<32x90xf32>, 910 %v2: vector<30x90xf32>, 911 %arg2: tensor<62x90xf32> {bufferization.writable = true}, 912 %s1: index, %s2: index, %s3: index, %s4: index) 913 -> (tensor<62x90xf32>, tensor<?x?xf32>) 914{ 915 %c0 = arith.constant 0 : index 916 917 // Cannot bufferize inplace this extract_slice because both operand and result 918 // are modified and returned separately. 919 // CHECK: tensor.extract_slice 920 // CHECK-SAME: {__inplace_operands_attr__ = ["false", "none", "none", "none", "none"] 921 %e = tensor.extract_slice %arg2[%s1, %s2][%s3, %s4][1, 1] : tensor<62x90xf32> to tensor<?x?xf32> 922 923 // CHECK: tensor.extract_slice 924 // CHECK-SAME: {__inplace_operands_attr__ = ["true"] 925 %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> 926 // CHECK: vector.transfer_write 927 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] 928 %7 = vector.transfer_write %v1, %2[%c0, %c0] {in_bounds = [true, true]} : vector<32x90xf32>, tensor<32x90xf32> 929 // CHECK: tensor.insert_slice 930 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] 931 %8 = tensor.insert_slice %7 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> 932 933 // CHECK: tensor.extract_slice 934 // CHECK-SAME: {__inplace_operands_attr__ = ["true"] 935 %10 = tensor.extract_slice %e[32, 0] [30, 90] [1, 1] : tensor<?x?xf32> to tensor<30x90xf32> 936 // CHECK: vector.transfer_write 937 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none", "none"] 938 %14 = vector.transfer_write %v2, %10[%c0, %c0] {in_bounds = [true, true]} : vector<30x90xf32>, tensor<30x90xf32> 939 // CHECK: tensor.insert_slice 940 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] 941 %15 = tensor.insert_slice %14 into %e[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<?x?xf32> 942 943 // CHECK: return 944 // CHECK-SAME: __equivalent_func_args__ = [2, -1] 945 return %8, %15 : tensor<62x90xf32>, tensor<?x?xf32> 946} 947 948// ----- 949 950// CHECK-LABEL: func @interleaved_extract_insert_slice_chain_1 951func.func @interleaved_extract_insert_slice_chain_1( 952 %arg2: tensor<62x90xf32> {bufferization.writable = true}) 953 -> (tensor<62x90xf32>) 954{ 955 // CHECK: tensor.extract_slice 956 // CHECK-SAME: {__inplace_operands_attr__ = ["true"] 957 %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> 958 959 // TODO: This should bufferize inplace once we have a proper range analysis. 960 // CHECK: tensor.extract_slice 961 // CHECK-SAME: {__inplace_operands_attr__ = ["false"] 962 %10 = tensor.extract_slice %arg2[32, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32> 963 964 965 // CHECK: tensor.insert_slice 966 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] 967 %8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> 968 969 970 // CHECK: tensor.insert_slice 971 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] 972 %15 = tensor.insert_slice %10 into %8[32, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32> 973 974 // CHECK: return 975 // CHECK-SAME: __equivalent_func_args__ = [0] 976 return %15 : tensor<62x90xf32> 977} 978 979// ----- 980 981// CHECK-LABEL: func @interleaved_extract_insert_slice_chain_2 982func.func @interleaved_extract_insert_slice_chain_2( 983 %arg2: tensor<62x90xf32> {bufferization.writable = true}) 984 -> (tensor<62x90xf32>) 985{ 986 // CHECK: tensor.extract_slice 987 // CHECK-SAME: {__inplace_operands_attr__ = ["true"] 988 %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> 989 990 // The slices are overlapping, so this can never bufferize inplace. 991 // CHECK: tensor.extract_slice 992 // CHECK-SAME: {__inplace_operands_attr__ = ["false"] 993 %10 = tensor.extract_slice %arg2[31, 0] [30, 90] [1, 1] : tensor<62x90xf32> to tensor<30x90xf32> 994 995 996 // CHECK: tensor.insert_slice 997 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] 998 %8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> 999 1000 1001 // CHECK: tensor.insert_slice 1002 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] 1003 %15 = tensor.insert_slice %10 into %8[31, 0] [30, 90] [1, 1] : tensor<30x90xf32> into tensor<62x90xf32> 1004 1005 // CHECK: return 1006 // CHECK-SAME: __equivalent_func_args__ = [0] 1007 return %15 : tensor<62x90xf32> 1008} 1009 1010// ----- 1011 1012// CHECK-LABEL: func @extract_once_insert_twice 1013func.func @extract_once_insert_twice( 1014 %arg2: tensor<62x90xf32> {bufferization.writable = true}) 1015 -> (tensor<62x90xf32>) 1016{ 1017 // CHECK: tensor.extract_slice 1018 // CHECK-SAME: {__inplace_operands_attr__ = ["false"] 1019 %2 = tensor.extract_slice %arg2[0, 0] [32, 90] [1, 1] : tensor<62x90xf32> to tensor<32x90xf32> 1020 1021 // CHECK: tensor.insert_slice 1022 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] 1023 %8 = tensor.insert_slice %2 into %arg2[0, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> 1024 1025 // CHECK: tensor.insert_slice 1026 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true"] 1027 %15 = tensor.insert_slice %2 into %8[15, 0] [32, 90] [1, 1] : tensor<32x90xf32> into tensor<62x90xf32> 1028 1029 // CHECK: return 1030 // CHECK-SAME: __equivalent_func_args__ = [0] 1031 return %15 : tensor<62x90xf32> 1032} 1033 1034// ----- 1035 1036// CHECK-LABEL: func @some_use 1037func.func @some_use(%A : tensor<?xf32> {bufferization.writable = true}, 1038 %v : vector<5xf32>) -> (tensor<?xf32>) { 1039 %idx = arith.constant 0 : index 1040 // CHECK: vector.transfer_write 1041 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"] 1042 %0 = vector.transfer_write %v, %A[%idx] : vector<5xf32>, tensor<?xf32> 1043 return %0 : tensor<?xf32> 1044} 1045 1046 1047// CHECK-LABEL: func @main_func 1048func.func @main_func(%A : tensor<?xf32> {bufferization.writable = true}, 1049 %v : vector<5xf32>) -> (tensor<?xf32>) { 1050 // CHECK: call 1051 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"] 1052 %0 = call @some_use(%A, %v) : (tensor<?xf32>, vector<5xf32>) -> (tensor<?xf32>) 1053 return %0 : tensor<?xf32> 1054} 1055 1056// ----- 1057 1058// CHECK-LABEL: func @to_tensor_op_not_writable 1059func.func @to_tensor_op_not_writable(%m: memref<?xf32>, %v: vector<5xf32>, 1060 %idx1: index, %idx2: index) 1061 -> vector<10xf32> { 1062 %0 = bufferization.to_tensor %m : memref<?xf32> 1063 1064 // Write to the tensor. Cannot be inplace due to tensor_load. 1065 // CHECK: vector.transfer_write 1066 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"] 1067 %w = vector.transfer_write %v, %0[%idx1] : vector<5xf32>, tensor<?xf32> 1068 1069 // Read from the tensor and return result. 1070 %cst = arith.constant 0.0 : f32 1071 %r = vector.transfer_read %w[%idx2], %cst : tensor<?xf32>, vector<10xf32> 1072 return %r : vector<10xf32> 1073} 1074 1075// ----- 1076 1077// CHECK-LABEL: func @to_memref_op_is_reading 1078func.func @to_memref_op_is_reading(%t1: tensor<?xf32> {bufferization.writable = true}, 1079 %idx1: index, %idx2: index, %idx3: index, 1080 %v1: vector<5xf32>) 1081 -> (vector<5xf32>, vector<5xf32>) { 1082 // Write + read to/from tensor. 1083 // CHECK: vector.transfer_write 1084 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"] 1085 %1 = vector.transfer_write %v1, %t1[%idx2] : vector<5xf32>, tensor<?xf32> 1086 %cst = arith.constant 0.0 : f32 1087 %r1 = vector.transfer_read %1[%idx3], %cst : tensor<?xf32>, vector<5xf32> 1088 1089 // Write + read to/from same memref. 1090 %0 = bufferization.to_memref %t1 : memref<?xf32> 1091 vector.transfer_write %v1, %0[%idx1] : vector<5xf32>, memref<?xf32> 1092 %r2 = vector.transfer_read %0[%idx3], %cst : memref<?xf32>, vector<5xf32> 1093 1094 return %r1, %r2 : vector<5xf32>, vector<5xf32> 1095} 1096 1097// ----- 1098 1099// CHECK-LABEL: func @inner_func 1100func.func @inner_func(%t: tensor<?xf32>) -> tensor<?xf32> { 1101 // CHECK: return 1102 // CHECK-SAME: __equivalent_func_args__ = [0] 1103 return %t : tensor<?xf32> 1104} 1105 1106func.func @equivalent_func_arg(%c0: index, %c10: index, %c1: index, %t0: tensor<?xf32>) -> tensor<?xf32> { 1107 // This test does not check IR. It just asserts there is no failure due to 1108 // non-equivalent scf.for yield values. 1109 %1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) { 1110 %3 = func.call @inner_func(%t1) : (tensor<?xf32>) -> tensor<?xf32> 1111 scf.yield %3 : tensor<?xf32> 1112 } 1113 return %1: tensor<?xf32> 1114} 1115 1116// ----- 1117 1118// CHECK-LABEL: func @inner_func_2 1119func.func @inner_func_2(%t: tensor<?xf32>) -> tensor<?xf32> { 1120 %f = arith.constant 1.0 : f32 1121 %c0 = arith.constant 0 : index 1122 %0 = tensor.insert %f into %t[%c0] : tensor<?xf32> 1123 // CHECK: return 1124 // CHECK-SAME: __equivalent_func_args__ = [0] 1125 return %0 : tensor<?xf32> 1126} 1127 1128func.func @equivalent_func_arg_2(%c0: index, %c10: index, %c1: index, %t0: tensor<?xf32>) -> tensor<?xf32> { 1129 // This test does not check IR. It just asserts there is no failure due to 1130 // non-equivalent scf.for yield values. 1131 %1 = scf.for %iv = %c0 to %c10 step %c1 iter_args(%t1 = %t0) -> (tensor<?xf32>) { 1132 %3 = func.call @inner_func_2(%t1) : (tensor<?xf32>) -> tensor<?xf32> 1133 scf.yield %3 : tensor<?xf32> 1134 } 1135 return %1: tensor<?xf32> 1136} 1137 1138// ----- 1139 1140// CHECK-LABEL: func @write_after_select_read_one 1141// CHECK-SAME: %[[t1:.*]]: tensor<?xf32> {{.*}}, %[[t2:.*]]: tensor<?xf32> 1142func.func @write_after_select_read_one( 1143 %t1 : tensor<?xf32> {bufferization.writable = true}, 1144 %t2 : tensor<?xf32> {bufferization.writable = true}, 1145 %c : i1) 1146 -> (f32, tensor<?xf32>) 1147{ 1148 %cst = arith.constant 0.0 : f32 1149 %idx = arith.constant 0 : index 1150 1151 // CHECK: arith.select %{{.*}}, %[[t1]], %[[t2]] 1152 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "true"]} 1153 %s = arith.select %c, %t1, %t2 : tensor<?xf32> 1154 // CHECK: tensor.insert 1155 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]} 1156 %w = tensor.insert %cst into %s[%idx] : tensor<?xf32> 1157 // CHECK: tensor.extract 1158 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]} 1159 %f = tensor.extract %t1[%idx] : tensor<?xf32> 1160 1161 return %f, %w : f32, tensor<?xf32> 1162} 1163 1164// ----- 1165 1166// CHECK-LABEL: func @write_after_select_read_both 1167// CHECK-SAME: %[[t1:.*]]: tensor<?xf32> {{.*}}, %[[t2:.*]]: tensor<?xf32> 1168func.func @write_after_select_read_both( 1169 %t1 : tensor<?xf32> {bufferization.writable = true}, 1170 %t2 : tensor<?xf32> {bufferization.writable = true}, 1171 %c : i1) 1172 -> (f32, f32, tensor<?xf32>) 1173{ 1174 %cst = arith.constant 0.0 : f32 1175 %idx = arith.constant 0 : index 1176 1177 // CHECK: arith.select %{{.*}}, %[[t1]], %[[t2]] 1178 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "false"]} 1179 %s = arith.select %c, %t1, %t2 : tensor<?xf32> 1180 // CHECK: tensor.insert 1181 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]} 1182 %w = tensor.insert %cst into %s[%idx] : tensor<?xf32> 1183 // CHECK: tensor.extract 1184 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]} 1185 %f = tensor.extract %t1[%idx] : tensor<?xf32> 1186 // CHECK: tensor.extract 1187 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]} 1188 %f2 = tensor.extract %t2[%idx] : tensor<?xf32> 1189 1190 return %f, %f2, %w : f32, f32, tensor<?xf32> 1191} 1192 1193// ----- 1194 1195// CHECK-LABEL: func @write_after_select_no_conflict 1196// CHECK-SAME: %[[t1:.*]]: tensor<?xf32> {{.*}}, %[[t2:.*]]: tensor<?xf32> 1197func.func @write_after_select_no_conflict( 1198 %t1 : tensor<?xf32> {bufferization.writable = true}, 1199 %t2 : tensor<?xf32> {bufferization.writable = true}, 1200 %c : i1) 1201 -> (f32, tensor<?xf32>) 1202{ 1203 %cst = arith.constant 0.0 : f32 1204 %idx = arith.constant 0 : index 1205 1206 // CHECK: arith.select %{{.*}}, %[[t1]], %[[t2]] 1207 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "true"]} 1208 %s = arith.select %c, %t1, %t2 : tensor<?xf32> 1209 // CHECK: tensor.insert 1210 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]} 1211 %w = tensor.insert %cst into %s[%idx] : tensor<?xf32> 1212 // CHECK: tensor.extract 1213 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "none"]} 1214 %f = tensor.extract %w[%idx] : tensor<?xf32> 1215 1216 return %f, %w : f32, tensor<?xf32> 1217} 1218 1219// ----- 1220 1221// CHECK-LABEL: func @write_to_same_tensor_in_loop_out_of_place( 1222func.func @write_to_same_tensor_in_loop_out_of_place( 1223 %A : tensor<?xf32> {bufferization.writable = true}, 1224 %B : tensor<?xf32> {bufferization.writable = true}, 1225 %lb : index, %ub : index, %step : index, %sz: index) 1226 -> (tensor<?xf32>) 1227{ 1228 // CHECK: scf.for {{.*}} { 1229 %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) { 1230 %i2 = arith.index_cast %i : index to i32 1231 %i3 = arith.sitofp %i2 : i32 to f32 1232 // The tensor.insert is out-of-place because the %B is written multiple 1233 // times inside a loop. 1234 // CHECK: tensor.insert 1235 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]} 1236 %B2 = tensor.insert %i3 into %B[%i] : tensor<?xf32> 1237 // CHECK: tensor.insert_slice 1238 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} 1239 %A2 = tensor.insert_slice %B2 into %t[%i][%sz][1] : tensor<?xf32> into tensor<?xf32> 1240 scf.yield %A2 : tensor<?xf32> 1241 } 1242 // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]} 1243 1244 return %r0 : tensor<?xf32> 1245} 1246 1247// ----- 1248 1249// CHECK-LABEL: func @write_to_same_alloc_tensor_in_place( 1250func.func @write_to_same_alloc_tensor_in_place( 1251 %A : tensor<?xf32> {bufferization.writable = true}, 1252 %lb : index, %ub : index, %step : index, %sz: index, %sz2: index) 1253 -> (tensor<?xf32>) 1254{ 1255 %B = bufferization.alloc_tensor(%sz2) : tensor<?xf32> 1256 1257 // CHECK: scf.for {{.*}} { 1258 %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) { 1259 %i2 = arith.index_cast %i : index to i32 1260 %i3 = arith.sitofp %i2 : i32 to f32 1261 // %B is written multiple times inside a loop, but it is an alloc_tensor. 1262 // CHECK: tensor.insert 1263 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "true", "none"]} 1264 %B2 = tensor.insert %i3 into %B[%i] : tensor<?xf32> 1265 // CHECK: tensor.insert_slice 1266 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} 1267 %A2 = tensor.insert_slice %B2 into %t[%i][%sz][1] : tensor<?xf32> into tensor<?xf32> 1268 scf.yield %A2 : tensor<?xf32> 1269 } 1270 // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]} 1271 1272 return %r0 : tensor<?xf32> 1273} 1274 1275// ----- 1276 1277// CHECK-LABEL: func @write_to_same_alloc_tensor_out_of_place( 1278func.func @write_to_same_alloc_tensor_out_of_place( 1279 %A : tensor<?xf32> {bufferization.writable = true}, 1280 %lb : index, %ub : index, %step : index, %sz: index, %sz2: index, %f: f32) 1281 -> (tensor<?xf32>) 1282{ 1283 %B = bufferization.alloc_tensor(%sz2) : tensor<?xf32> 1284 %C = tensor.insert %f into %B[%lb] : tensor<?xf32> 1285 1286 // CHECK: scf.for {{.*}} { 1287 %r0 = scf.for %i = %lb to %ub step %step iter_args(%t = %A) -> (tensor<?xf32>) { 1288 %i2 = arith.index_cast %i : index to i32 1289 %i3 = arith.sitofp %i2 : i32 to f32 1290 // %C is written multiple times inside a loop. Even though %C aliases with 1291 // an alloc_tensor, out-of-bounds bufferization is necessary because there 1292 // is another alias (%C) outside of the loop. 1293 // CHECK: tensor.insert 1294 // CHECK-SAME: {__inplace_operands_attr__ = ["none", "false", "none"]} 1295 %B2 = tensor.insert %i3 into %C[%i] : tensor<?xf32> 1296 // CHECK: tensor.insert_slice 1297 // CHECK-SAME: {__inplace_operands_attr__ = ["true", "true", "none", "none"]} 1298 %A2 = tensor.insert_slice %B2 into %t[%i][%sz][1] : tensor<?xf32> into tensor<?xf32> 1299 scf.yield %A2 : tensor<?xf32> 1300 } 1301 // CHECK: } {__inplace_operands_attr__ = ["none", "none", "none", "true"]} 1302 1303 return %r0 : tensor<?xf32> 1304} 1305