1// RUN: mlir-opt %s -split-input-file -loop-invariant-code-motion | FileCheck %s 2 3func.func @nested_loops_both_having_invariant_code() { 4 %m = memref.alloc() : memref<10xf32> 5 %cf7 = arith.constant 7.0 : f32 6 %cf8 = arith.constant 8.0 : f32 7 8 affine.for %arg0 = 0 to 10 { 9 %v0 = arith.addf %cf7, %cf8 : f32 10 affine.for %arg1 = 0 to 10 { 11 %v1 = arith.addf %v0, %cf8 : f32 12 affine.store %v0, %m[%arg0] : memref<10xf32> 13 } 14 } 15 16 // CHECK: %0 = memref.alloc() : memref<10xf32> 17 // CHECK-NEXT: %[[CST0:.*]] = arith.constant 7.000000e+00 : f32 18 // CHECK-NEXT: %[[CST1:.*]] = arith.constant 8.000000e+00 : f32 19 // CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[CST0]], %[[CST1]] : f32 20 // CHECK-NEXT: arith.addf %[[ADD0]], %[[CST1]] : f32 21 // CHECK-NEXT: affine.for 22 // CHECK-NEXT: affine.for 23 // CHECK-NEXT: affine.store 24 25 return 26} 27 28// ----- 29 30func.func @nested_loops_code_invariant_to_both() { 31 %m = memref.alloc() : memref<10xf32> 32 %cf7 = arith.constant 7.0 : f32 33 %cf8 = arith.constant 8.0 : f32 34 35 affine.for %arg0 = 0 to 10 { 36 affine.for %arg1 = 0 to 10 { 37 %v0 = arith.addf %cf7, %cf8 : f32 38 } 39 } 40 41 // CHECK: %0 = memref.alloc() : memref<10xf32> 42 // CHECK-NEXT: %cst = arith.constant 7.000000e+00 : f32 43 // CHECK-NEXT: %cst_0 = arith.constant 8.000000e+00 : f32 44 // CHECK-NEXT: %1 = arith.addf %cst, %cst_0 : f32 45 46 return 47} 48 49// ----- 50 51func.func @single_loop_nothing_invariant() { 52 %m1 = memref.alloc() : memref<10xf32> 53 %m2 = memref.alloc() : memref<10xf32> 54 affine.for %arg0 = 0 to 10 { 55 %v0 = affine.load %m1[%arg0] : memref<10xf32> 56 %v1 = affine.load %m2[%arg0] : memref<10xf32> 57 %v2 = arith.addf %v0, %v1 : f32 58 affine.store %v2, %m1[%arg0] : memref<10xf32> 59 } 60 61 // CHECK: %0 = memref.alloc() : memref<10xf32> 62 // CHECK-NEXT: %1 = memref.alloc() : memref<10xf32> 63 // CHECK-NEXT: affine.for %arg0 = 0 to 10 { 64 // CHECK-NEXT: %2 = affine.load %0[%arg0] : memref<10xf32> 65 // CHECK-NEXT: %3 = affine.load %1[%arg0] : memref<10xf32> 66 // CHECK-NEXT: %4 = arith.addf %2, %3 : f32 67 // CHECK-NEXT: affine.store %4, %0[%arg0] : memref<10xf32> 68 69 return 70} 71 72// ----- 73 74func.func @invariant_code_inside_affine_if() { 75 %m = memref.alloc() : memref<10xf32> 76 %cf8 = arith.constant 8.0 : f32 77 78 affine.for %arg0 = 0 to 10 { 79 %t0 = affine.apply affine_map<(d1) -> (d1 + 1)>(%arg0) 80 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %t0) { 81 %cf9 = arith.addf %cf8, %cf8 : f32 82 affine.store %cf9, %m[%arg0] : memref<10xf32> 83 84 } 85 } 86 87 // CHECK: %0 = memref.alloc() : memref<10xf32> 88 // CHECK-NEXT: %cst = arith.constant 8.000000e+00 : f32 89 // CHECK-NEXT: affine.for %arg0 = 0 to 10 { 90 // CHECK-NEXT: %1 = affine.apply #map(%arg0) 91 // CHECK-NEXT: affine.if #set(%arg0, %1) { 92 // CHECK-NEXT: %2 = arith.addf %cst, %cst : f32 93 // CHECK-NEXT: affine.store %2, %0[%arg0] : memref<10xf32> 94 // CHECK-NEXT: } 95 96 97 return 98} 99 100// ----- 101 102func.func @invariant_affine_if() { 103 %m = memref.alloc() : memref<10xf32> 104 %cf8 = arith.constant 8.0 : f32 105 affine.for %arg0 = 0 to 10 { 106 affine.for %arg1 = 0 to 10 { 107 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 108 %cf9 = arith.addf %cf8, %cf8 : f32 109 } 110 } 111 } 112 113 // CHECK: %0 = memref.alloc() : memref<10xf32> 114 // CHECK-NEXT: %[[CST:.*]] = arith.constant 8.000000e+00 : f32 115 // CHECK-NEXT: affine.for %[[ARG:.*]] = 0 to 10 { 116 // CHECK-NEXT: } 117 // CHECK-NEXT: affine.for %[[ARG:.*]] = 0 to 10 { 118 // CHECK-NEXT: affine.if #set(%[[ARG]], %[[ARG]]) { 119 // CHECK-NEXT: arith.addf %[[CST]], %[[CST]] : f32 120 // CHECK-NEXT: } 121 122 return 123} 124 125// ----- 126 127func.func @invariant_affine_if2() { 128 %m = memref.alloc() : memref<10xf32> 129 %cf8 = arith.constant 8.0 : f32 130 affine.for %arg0 = 0 to 10 { 131 affine.for %arg1 = 0 to 10 { 132 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 133 %cf9 = arith.addf %cf8, %cf8 : f32 134 affine.store %cf9, %m[%arg1] : memref<10xf32> 135 } 136 } 137 } 138 139 // CHECK: memref.alloc 140 // CHECK-NEXT: arith.constant 141 // CHECK-NEXT: affine.for 142 // CHECK-NEXT: affine.for 143 // CHECK-NEXT: affine.if 144 // CHECK-NEXT: arith.addf 145 // CHECK-NEXT: affine.store 146 // CHECK-NEXT: } 147 // CHECK-NEXT: } 148 149 return 150} 151 152// ----- 153 154func.func @invariant_affine_nested_if() { 155 %m = memref.alloc() : memref<10xf32> 156 %cf8 = arith.constant 8.0 : f32 157 affine.for %arg0 = 0 to 10 { 158 affine.for %arg1 = 0 to 10 { 159 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 160 %cf9 = arith.addf %cf8, %cf8 : f32 161 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 162 %cf10 = arith.addf %cf9, %cf9 : f32 163 } 164 } 165 } 166 } 167 168 // CHECK: memref.alloc 169 // CHECK-NEXT: arith.constant 170 // CHECK-NEXT: affine.for 171 // CHECK-NEXT: } 172 // CHECK-NEXT: affine.for 173 // CHECK-NEXT: affine.if 174 // CHECK-NEXT: arith.addf 175 // CHECK-NEXT: affine.if 176 // CHECK-NEXT: arith.addf 177 // CHECK-NEXT: } 178 // CHECK-NEXT: } 179 180 181 return 182} 183 184// ----- 185 186func.func @invariant_affine_nested_if_else() { 187 %m = memref.alloc() : memref<10xf32> 188 %cf8 = arith.constant 8.0 : f32 189 affine.for %arg0 = 0 to 10 { 190 affine.for %arg1 = 0 to 10 { 191 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 192 %cf9 = arith.addf %cf8, %cf8 : f32 193 affine.store %cf9, %m[%arg0] : memref<10xf32> 194 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 195 %cf10 = arith.addf %cf9, %cf9 : f32 196 } else { 197 affine.store %cf9, %m[%arg1] : memref<10xf32> 198 } 199 } 200 } 201 } 202 203 // CHECK: memref.alloc 204 // CHECK-NEXT: arith.constant 205 // CHECK-NEXT: affine.for 206 // CHECK-NEXT: affine.for 207 // CHECK-NEXT: affine.if 208 // CHECK-NEXT: arith.addf 209 // CHECK-NEXT: affine.store 210 // CHECK-NEXT: affine.if 211 // CHECK-NEXT: arith.addf 212 // CHECK-NEXT: } else { 213 // CHECK-NEXT: affine.store 214 // CHECK-NEXT: } 215 // CHECK-NEXT: } 216 // CHECK-NEXT: } 217 218 219 return 220} 221 222// ----- 223 224func.func @invariant_loop_dialect() { 225 %ci0 = arith.constant 0 : index 226 %ci10 = arith.constant 10 : index 227 %ci1 = arith.constant 1 : index 228 %m = memref.alloc() : memref<10xf32> 229 %cf7 = arith.constant 7.0 : f32 230 %cf8 = arith.constant 8.0 : f32 231 scf.for %arg0 = %ci0 to %ci10 step %ci1 { 232 scf.for %arg1 = %ci0 to %ci10 step %ci1 { 233 %v0 = arith.addf %cf7, %cf8 : f32 234 } 235 } 236 237 // CHECK: %0 = memref.alloc() : memref<10xf32> 238 // CHECK-NEXT: %cst = arith.constant 7.000000e+00 : f32 239 // CHECK-NEXT: %cst_0 = arith.constant 8.000000e+00 : f32 240 // CHECK-NEXT: %1 = arith.addf %cst, %cst_0 : f32 241 242 return 243} 244 245// ----- 246 247func.func @variant_loop_dialect() { 248 %ci0 = arith.constant 0 : index 249 %ci10 = arith.constant 10 : index 250 %ci1 = arith.constant 1 : index 251 %m = memref.alloc() : memref<10xf32> 252 scf.for %arg0 = %ci0 to %ci10 step %ci1 { 253 scf.for %arg1 = %ci0 to %ci10 step %ci1 { 254 %v0 = arith.addi %arg0, %arg1 : index 255 } 256 } 257 258 // CHECK: %0 = memref.alloc() : memref<10xf32> 259 // CHECK-NEXT: scf.for 260 // CHECK-NEXT: scf.for 261 // CHECK-NEXT: arith.addi 262 263 return 264} 265 266// ----- 267 268func.func @parallel_loop_with_invariant() { 269 %c0 = arith.constant 0 : index 270 %c10 = arith.constant 10 : index 271 %c1 = arith.constant 1 : index 272 %c7 = arith.constant 7 : i32 273 %c8 = arith.constant 8 : i32 274 scf.parallel (%arg0, %arg1) = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) { 275 %v0 = arith.addi %c7, %c8 : i32 276 %v3 = arith.addi %arg0, %arg1 : index 277 } 278 279 // CHECK-LABEL: func @parallel_loop_with_invariant 280 // CHECK: %c0 = arith.constant 0 : index 281 // CHECK-NEXT: %c10 = arith.constant 10 : index 282 // CHECK-NEXT: %c1 = arith.constant 1 : index 283 // CHECK-NEXT: %c7_i32 = arith.constant 7 : i32 284 // CHECK-NEXT: %c8_i32 = arith.constant 8 : i32 285 // CHECK-NEXT: arith.addi %c7_i32, %c8_i32 : i32 286 // CHECK-NEXT: scf.parallel (%arg0, %arg1) = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) 287 // CHECK-NEXT: arith.addi %arg0, %arg1 : index 288 // CHECK-NEXT: yield 289 // CHECK-NEXT: } 290 // CHECK-NEXT: return 291 292 return 293} 294 295// ----- 296 297func.func private @make_val() -> (index) 298 299// CHECK-LABEL: func @nested_uses_inside 300func.func @nested_uses_inside(%lb: index, %ub: index, %step: index) { 301 %true = arith.constant true 302 303 // Check that ops that contain nested uses to values not defiend outside 304 // remain in the loop. 305 // CHECK-NEXT: arith.constant 306 // CHECK-NEXT: scf.for 307 // CHECK-NEXT: call @ 308 // CHECK-NEXT: call @ 309 // CHECK-NEXT: scf.if 310 // CHECK-NEXT: scf.yield 311 // CHECK-NEXT: else 312 // CHECK-NEXT: scf.yield 313 scf.for %i = %lb to %ub step %step { 314 %val = func.call @make_val() : () -> (index) 315 %val2 = func.call @make_val() : () -> (index) 316 %r = scf.if %true -> (index) { 317 scf.yield %val: index 318 } else { 319 scf.yield %val2: index 320 } 321 } 322 return 323} 324 325// ----- 326 327// Test that two ops that feed into each other are moved without violating 328// dominance in non-graph regions. 329// CHECK-LABEL: func @invariant_subgraph 330// CHECK-SAME: %{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %[[ARG:.*]]: i32 331func.func @invariant_subgraph(%lb: index, %ub: index, %step: index, %arg: i32) { 332 // CHECK: %[[V0:.*]] = arith.addi %[[ARG]], %[[ARG]] 333 // CHECK-NEXT: %[[V1:.*]] = arith.addi %[[ARG]], %[[V0]] 334 // CHECK-NEXT: scf.for 335 scf.for %i = %lb to %ub step %step { 336 // CHECK-NEXT: "test.sink"(%[[V1]]) 337 %v0 = arith.addi %arg, %arg : i32 338 %v1 = arith.addi %arg, %v0 : i32 339 "test.sink"(%v1) : (i32) -> () 340 } 341 return 342} 343 344// ----- 345 346// Test invariant nested loop is hoisted. 347// CHECK-LABEL: func @test_invariant_nested_loop 348func.func @test_invariant_nested_loop() { 349 // CHECK: %[[C:.*]] = arith.constant 350 %0 = arith.constant 5 : i32 351 // CHECK: %[[V0:.*]] = arith.addi %[[C]], %[[C]] 352 // CHECK-NEXT: %[[V1:.*]] = arith.addi %[[V0]], %[[C]] 353 // CHECK-NEXT: test.graph_loop 354 // CHECK-NEXT: ^bb0(%[[ARG0:.*]]: i32) 355 // CHECK-NEXT: %[[V2:.*]] = arith.subi %[[ARG0]], %[[ARG0]] 356 // CHECK-NEXT: test.region_yield %[[V2]] 357 // CHECK: test.graph_loop 358 // CHECK-NEXT: test.region_yield %[[V1]] 359 test.graph_loop { 360 %1 = arith.addi %0, %0 : i32 361 %2 = arith.addi %1, %0 : i32 362 test.graph_loop { 363 ^bb0(%arg0: i32): 364 %3 = arith.subi %arg0, %arg0 : i32 365 test.region_yield %3 : i32 366 } : () -> () 367 test.region_yield %2 : i32 368 } : () -> () 369 return 370} 371 372 373// ----- 374 375// Test ops in a graph region are hoisted. 376// CHECK-LABEL: func @test_invariants_in_graph_region 377func.func @test_invariants_in_graph_region() { 378 // CHECK: test.single_no_terminator_op 379 test.single_no_terminator_op : { 380 // CHECK-NEXT: %[[C:.*]] = arith.constant 381 // CHECK-NEXT: %[[V1:.*]] = arith.addi %[[C]], %[[C]] 382 // CHECK-NEXT: %[[V0:.*]] = arith.addi %[[C]], %[[V1]] 383 test.graph_loop { 384 %v0 = arith.addi %c0, %v1 : i32 385 %v1 = arith.addi %c0, %c0 : i32 386 %c0 = arith.constant 5 : i32 387 test.region_yield %v0 : i32 388 } : () -> () 389 } 390 return 391} 392 393// ----- 394 395// Test ops in a graph region are hoisted in topological order into non-graph 396// regions and that dominance is preserved. 397// CHECK-LABEL: func @test_invariant_backedge 398func.func @test_invariant_backedge() { 399 // CHECK-NEXT: %[[C:.*]] = arith.constant 400 // CHECK-NEXT: %[[V1:.*]] = arith.addi %[[C]], %[[C]] 401 // CHECK-NEXT: %[[V0:.*]] = arith.addi %[[C]], %[[V1]] 402 // CHECK-NEXT: test.graph_loop 403 test.graph_loop { 404 // CHECK-NEXT: test.region_yield %[[V0]] 405 %v0 = arith.addi %c0, %v1 : i32 406 %v1 = arith.addi %c0, %c0 : i32 407 %c0 = arith.constant 5 : i32 408 test.region_yield %v0 : i32 409 } : () -> () 410 return 411} 412 413// ----- 414 415// Test that cycles aren't hoisted from graph regions to non-graph regions. 416// CHECK-LABEL: func @test_invariant_cycle_not_hoisted 417func.func @test_invariant_cycle_not_hoisted() { 418 // CHECK: test.graph_loop 419 test.graph_loop { 420 // CHECK-NEXT: %[[A:.*]] = "test.a"(%[[B:.*]]) : 421 // CHECK-NEXT: %[[B]] = "test.b"(%[[A]]) : 422 // CHECK-NEXT: test.region_yield %[[A]] 423 %a = "test.a"(%b) : (i32) -> i32 424 %b = "test.b"(%a) : (i32) -> i32 425 test.region_yield %a : i32 426 } : () -> () 427 return 428} 429