1// RUN: mlir-opt %s -split-input-file -loop-invariant-code-motion | FileCheck %s 2 3func @nested_loops_both_having_invariant_code() { 4 %m = memref.alloc() : memref<10xf32> 5 %cf7 = arith.constant 7.0 : f32 6 %cf8 = arith.constant 8.0 : f32 7 8 affine.for %arg0 = 0 to 10 { 9 %v0 = arith.addf %cf7, %cf8 : f32 10 affine.for %arg1 = 0 to 10 { 11 %v1 = arith.addf %v0, %cf8 : f32 12 affine.store %v0, %m[%arg0] : memref<10xf32> 13 } 14 } 15 16 // CHECK: %0 = memref.alloc() : memref<10xf32> 17 // CHECK-NEXT: %[[CST0:.*]] = arith.constant 7.000000e+00 : f32 18 // CHECK-NEXT: %[[CST1:.*]] = arith.constant 8.000000e+00 : f32 19 // CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[CST0]], %[[CST1]] : f32 20 // CHECK-NEXT: arith.addf %[[ADD0]], %[[CST1]] : f32 21 // CHECK-NEXT: affine.for 22 // CHECK-NEXT: affine.for 23 // CHECK-NEXT: affine.store 24 25 return 26} 27 28// ----- 29 30func @nested_loops_code_invariant_to_both() { 31 %m = memref.alloc() : memref<10xf32> 32 %cf7 = arith.constant 7.0 : f32 33 %cf8 = arith.constant 8.0 : f32 34 35 affine.for %arg0 = 0 to 10 { 36 affine.for %arg1 = 0 to 10 { 37 %v0 = arith.addf %cf7, %cf8 : f32 38 } 39 } 40 41 // CHECK: %0 = memref.alloc() : memref<10xf32> 42 // CHECK-NEXT: %cst = arith.constant 7.000000e+00 : f32 43 // CHECK-NEXT: %cst_0 = arith.constant 8.000000e+00 : f32 44 // CHECK-NEXT: %1 = arith.addf %cst, %cst_0 : f32 45 46 return 47} 48 49// ----- 50 51func @single_loop_nothing_invariant() { 52 %m1 = memref.alloc() : memref<10xf32> 53 %m2 = memref.alloc() : memref<10xf32> 54 affine.for %arg0 = 0 to 10 { 55 %v0 = affine.load %m1[%arg0] : memref<10xf32> 56 %v1 = affine.load %m2[%arg0] : memref<10xf32> 57 %v2 = arith.addf %v0, %v1 : f32 58 affine.store %v2, %m1[%arg0] : memref<10xf32> 59 } 60 61 // CHECK: %0 = memref.alloc() : memref<10xf32> 62 // CHECK-NEXT: %1 = memref.alloc() : memref<10xf32> 63 // CHECK-NEXT: affine.for %arg0 = 0 to 10 { 64 // CHECK-NEXT: %2 = affine.load %0[%arg0] : memref<10xf32> 65 // CHECK-NEXT: %3 = affine.load %1[%arg0] : memref<10xf32> 66 // CHECK-NEXT: %4 = arith.addf %2, %3 : f32 67 // CHECK-NEXT: affine.store %4, %0[%arg0] : memref<10xf32> 68 69 return 70} 71 72// ----- 73 74func @invariant_code_inside_affine_if() { 75 %m = memref.alloc() : memref<10xf32> 76 %cf8 = arith.constant 8.0 : f32 77 78 affine.for %arg0 = 0 to 10 { 79 %t0 = affine.apply affine_map<(d1) -> (d1 + 1)>(%arg0) 80 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %t0) { 81 %cf9 = arith.addf %cf8, %cf8 : f32 82 affine.store %cf9, %m[%arg0] : memref<10xf32> 83 84 } 85 } 86 87 // CHECK: %0 = memref.alloc() : memref<10xf32> 88 // CHECK-NEXT: %cst = arith.constant 8.000000e+00 : f32 89 // CHECK-NEXT: affine.for %arg0 = 0 to 10 { 90 // CHECK-NEXT: %1 = affine.apply #map(%arg0) 91 // CHECK-NEXT: affine.if #set(%arg0, %1) { 92 // CHECK-NEXT: %2 = arith.addf %cst, %cst : f32 93 // CHECK-NEXT: affine.store %2, %0[%arg0] : memref<10xf32> 94 // CHECK-NEXT: } 95 96 97 return 98} 99 100// ----- 101 102func @invariant_affine_if() { 103 %m = memref.alloc() : memref<10xf32> 104 %cf8 = arith.constant 8.0 : f32 105 affine.for %arg0 = 0 to 10 { 106 affine.for %arg1 = 0 to 10 { 107 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 108 %cf9 = arith.addf %cf8, %cf8 : f32 109 } 110 } 111 } 112 113 // CHECK: %0 = memref.alloc() : memref<10xf32> 114 // CHECK-NEXT: %[[CST:.*]] = arith.constant 8.000000e+00 : f32 115 // CHECK-NEXT: affine.for %[[ARG:.*]] = 0 to 10 { 116 // CHECK-NEXT: } 117 // CHECK-NEXT: affine.for %[[ARG:.*]] = 0 to 10 { 118 // CHECK-NEXT: affine.if #set(%[[ARG]], %[[ARG]]) { 119 // CHECK-NEXT: arith.addf %[[CST]], %[[CST]] : f32 120 // CHECK-NEXT: } 121 122 return 123} 124 125// ----- 126 127func @invariant_affine_if2() { 128 %m = memref.alloc() : memref<10xf32> 129 %cf8 = arith.constant 8.0 : f32 130 affine.for %arg0 = 0 to 10 { 131 affine.for %arg1 = 0 to 10 { 132 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 133 %cf9 = arith.addf %cf8, %cf8 : f32 134 affine.store %cf9, %m[%arg1] : memref<10xf32> 135 } 136 } 137 } 138 139 // CHECK: memref.alloc 140 // CHECK-NEXT: arith.constant 141 // CHECK-NEXT: affine.for 142 // CHECK-NEXT: affine.for 143 // CHECK-NEXT: affine.if 144 // CHECK-NEXT: arith.addf 145 // CHECK-NEXT: affine.store 146 // CHECK-NEXT: } 147 // CHECK-NEXT: } 148 149 return 150} 151 152// ----- 153 154func @invariant_affine_nested_if() { 155 %m = memref.alloc() : memref<10xf32> 156 %cf8 = arith.constant 8.0 : f32 157 affine.for %arg0 = 0 to 10 { 158 affine.for %arg1 = 0 to 10 { 159 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 160 %cf9 = arith.addf %cf8, %cf8 : f32 161 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 162 %cf10 = arith.addf %cf9, %cf9 : f32 163 } 164 } 165 } 166 } 167 168 // CHECK: memref.alloc 169 // CHECK-NEXT: arith.constant 170 // CHECK-NEXT: affine.for 171 // CHECK-NEXT: affine.for 172 // CHECK-NEXT: affine.if 173 // CHECK-NEXT: arith.addf 174 // CHECK-NEXT: affine.if 175 // CHECK-NEXT: arith.addf 176 // CHECK-NEXT: } 177 // CHECK-NEXT: } 178 // CHECK-NEXT: } 179 180 181 return 182} 183 184// ----- 185 186func @invariant_affine_nested_if_else() { 187 %m = memref.alloc() : memref<10xf32> 188 %cf8 = arith.constant 8.0 : f32 189 affine.for %arg0 = 0 to 10 { 190 affine.for %arg1 = 0 to 10 { 191 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 192 %cf9 = arith.addf %cf8, %cf8 : f32 193 affine.store %cf9, %m[%arg0] : memref<10xf32> 194 affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) { 195 %cf10 = arith.addf %cf9, %cf9 : f32 196 } else { 197 affine.store %cf9, %m[%arg1] : memref<10xf32> 198 } 199 } 200 } 201 } 202 203 // CHECK: memref.alloc 204 // CHECK-NEXT: arith.constant 205 // CHECK-NEXT: affine.for 206 // CHECK-NEXT: affine.for 207 // CHECK-NEXT: affine.if 208 // CHECK-NEXT: arith.addf 209 // CHECK-NEXT: affine.store 210 // CHECK-NEXT: affine.if 211 // CHECK-NEXT: arith.addf 212 // CHECK-NEXT: } else { 213 // CHECK-NEXT: affine.store 214 // CHECK-NEXT: } 215 // CHECK-NEXT: } 216 // CHECK-NEXT: } 217 218 219 return 220} 221 222// ----- 223 224func @invariant_loop_dialect() { 225 %ci0 = arith.constant 0 : index 226 %ci10 = arith.constant 10 : index 227 %ci1 = arith.constant 1 : index 228 %m = memref.alloc() : memref<10xf32> 229 %cf7 = arith.constant 7.0 : f32 230 %cf8 = arith.constant 8.0 : f32 231 scf.for %arg0 = %ci0 to %ci10 step %ci1 { 232 scf.for %arg1 = %ci0 to %ci10 step %ci1 { 233 %v0 = arith.addf %cf7, %cf8 : f32 234 } 235 } 236 237 // CHECK: %0 = memref.alloc() : memref<10xf32> 238 // CHECK-NEXT: %cst = arith.constant 7.000000e+00 : f32 239 // CHECK-NEXT: %cst_0 = arith.constant 8.000000e+00 : f32 240 // CHECK-NEXT: %1 = arith.addf %cst, %cst_0 : f32 241 242 return 243} 244 245// ----- 246 247func @variant_loop_dialect() { 248 %ci0 = arith.constant 0 : index 249 %ci10 = arith.constant 10 : index 250 %ci1 = arith.constant 1 : index 251 %m = memref.alloc() : memref<10xf32> 252 scf.for %arg0 = %ci0 to %ci10 step %ci1 { 253 scf.for %arg1 = %ci0 to %ci10 step %ci1 { 254 %v0 = arith.addi %arg0, %arg1 : index 255 } 256 } 257 258 // CHECK: %0 = memref.alloc() : memref<10xf32> 259 // CHECK-NEXT: scf.for 260 // CHECK-NEXT: scf.for 261 // CHECK-NEXT: arith.addi 262 263 return 264} 265 266// ----- 267 268func @parallel_loop_with_invariant() { 269 %c0 = arith.constant 0 : index 270 %c10 = arith.constant 10 : index 271 %c1 = arith.constant 1 : index 272 %c7 = arith.constant 7 : i32 273 %c8 = arith.constant 8 : i32 274 scf.parallel (%arg0, %arg1) = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) { 275 %v0 = arith.addi %c7, %c8 : i32 276 %v3 = arith.addi %arg0, %arg1 : index 277 } 278 279 // CHECK-LABEL: func @parallel_loop_with_invariant 280 // CHECK: %c0 = arith.constant 0 : index 281 // CHECK-NEXT: %c10 = arith.constant 10 : index 282 // CHECK-NEXT: %c1 = arith.constant 1 : index 283 // CHECK-NEXT: %c7_i32 = arith.constant 7 : i32 284 // CHECK-NEXT: %c8_i32 = arith.constant 8 : i32 285 // CHECK-NEXT: arith.addi %c7_i32, %c8_i32 : i32 286 // CHECK-NEXT: scf.parallel (%arg0, %arg1) = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) 287 // CHECK-NEXT: arith.addi %arg0, %arg1 : index 288 // CHECK-NEXT: yield 289 // CHECK-NEXT: } 290 // CHECK-NEXT: return 291 292 return 293} 294 295