1// RUN: mlir-opt %s  -split-input-file -loop-invariant-code-motion | FileCheck %s
2
3func.func @nested_loops_both_having_invariant_code() {
4  %m = memref.alloc() : memref<10xf32>
5  %cf7 = arith.constant 7.0 : f32
6  %cf8 = arith.constant 8.0 : f32
7
8  affine.for %arg0 = 0 to 10 {
9    %v0 = arith.addf %cf7, %cf8 : f32
10    affine.for %arg1 = 0 to 10 {
11      %v1 = arith.addf %v0, %cf8 : f32
12      affine.store %v0, %m[%arg0] : memref<10xf32>
13    }
14  }
15
16  // CHECK: %0 = memref.alloc() : memref<10xf32>
17  // CHECK-NEXT: %[[CST0:.*]] = arith.constant 7.000000e+00 : f32
18  // CHECK-NEXT: %[[CST1:.*]] = arith.constant 8.000000e+00 : f32
19  // CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[CST0]], %[[CST1]] : f32
20  // CHECK-NEXT: arith.addf %[[ADD0]], %[[CST1]] : f32
21  // CHECK-NEXT: affine.for
22  // CHECK-NEXT: affine.for
23  // CHECK-NEXT: affine.store
24
25  return
26}
27
28// -----
29
30func.func @nested_loops_code_invariant_to_both() {
31  %m = memref.alloc() : memref<10xf32>
32  %cf7 = arith.constant 7.0 : f32
33  %cf8 = arith.constant 8.0 : f32
34
35  affine.for %arg0 = 0 to 10 {
36    affine.for %arg1 = 0 to 10 {
37      %v0 = arith.addf %cf7, %cf8 : f32
38    }
39  }
40
41  // CHECK: %0 = memref.alloc() : memref<10xf32>
42  // CHECK-NEXT: %cst = arith.constant 7.000000e+00 : f32
43  // CHECK-NEXT: %cst_0 = arith.constant 8.000000e+00 : f32
44  // CHECK-NEXT: %1 = arith.addf %cst, %cst_0 : f32
45
46  return
47}
48
49// -----
50
51func.func @single_loop_nothing_invariant() {
52  %m1 = memref.alloc() : memref<10xf32>
53  %m2 = memref.alloc() : memref<10xf32>
54  affine.for %arg0 = 0 to 10 {
55    %v0 = affine.load %m1[%arg0] : memref<10xf32>
56    %v1 = affine.load %m2[%arg0] : memref<10xf32>
57    %v2 = arith.addf %v0, %v1 : f32
58    affine.store %v2, %m1[%arg0] : memref<10xf32>
59  }
60
61  // CHECK: %0 = memref.alloc() : memref<10xf32>
62  // CHECK-NEXT: %1 = memref.alloc() : memref<10xf32>
63  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
64  // CHECK-NEXT: %2 = affine.load %0[%arg0] : memref<10xf32>
65  // CHECK-NEXT: %3 = affine.load %1[%arg0] : memref<10xf32>
66  // CHECK-NEXT: %4 = arith.addf %2, %3 : f32
67  // CHECK-NEXT: affine.store %4, %0[%arg0] : memref<10xf32>
68
69  return
70}
71
72// -----
73
74func.func @invariant_code_inside_affine_if() {
75  %m = memref.alloc() : memref<10xf32>
76  %cf8 = arith.constant 8.0 : f32
77
78  affine.for %arg0 = 0 to 10 {
79    %t0 = affine.apply affine_map<(d1) -> (d1 + 1)>(%arg0)
80    affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %t0) {
81        %cf9 = arith.addf %cf8, %cf8 : f32
82        affine.store %cf9, %m[%arg0] : memref<10xf32>
83
84    }
85  }
86
87  // CHECK: %0 = memref.alloc() : memref<10xf32>
88  // CHECK-NEXT: %cst = arith.constant 8.000000e+00 : f32
89  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
90  // CHECK-NEXT: %1 = affine.apply #map(%arg0)
91  // CHECK-NEXT: affine.if #set(%arg0, %1) {
92  // CHECK-NEXT: %2 = arith.addf %cst, %cst : f32
93  // CHECK-NEXT: affine.store %2, %0[%arg0] : memref<10xf32>
94  // CHECK-NEXT: }
95
96
97  return
98}
99
100// -----
101
102func.func @invariant_affine_if() {
103  %m = memref.alloc() : memref<10xf32>
104  %cf8 = arith.constant 8.0 : f32
105  affine.for %arg0 = 0 to 10 {
106    affine.for %arg1 = 0 to 10 {
107      affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
108          %cf9 = arith.addf %cf8, %cf8 : f32
109      }
110    }
111  }
112
113  // CHECK: %0 = memref.alloc() : memref<10xf32>
114  // CHECK-NEXT: %[[CST:.*]] = arith.constant 8.000000e+00 : f32
115  // CHECK-NEXT: affine.for %[[ARG:.*]] = 0 to 10 {
116  // CHECK-NEXT: }
117  // CHECK-NEXT: affine.for %[[ARG:.*]] = 0 to 10 {
118  // CHECK-NEXT: affine.if #set(%[[ARG]], %[[ARG]]) {
119  // CHECK-NEXT: arith.addf %[[CST]], %[[CST]] : f32
120  // CHECK-NEXT: }
121
122  return
123}
124
125// -----
126
127func.func @invariant_affine_if2() {
128  %m = memref.alloc() : memref<10xf32>
129  %cf8 = arith.constant 8.0 : f32
130  affine.for %arg0 = 0 to 10 {
131    affine.for %arg1 = 0 to 10 {
132      affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
133          %cf9 = arith.addf %cf8, %cf8 : f32
134          affine.store %cf9, %m[%arg1] : memref<10xf32>
135      }
136    }
137  }
138
139  // CHECK: memref.alloc
140  // CHECK-NEXT: arith.constant
141  // CHECK-NEXT: affine.for
142  // CHECK-NEXT: affine.for
143  // CHECK-NEXT: affine.if
144  // CHECK-NEXT: arith.addf
145  // CHECK-NEXT: affine.store
146  // CHECK-NEXT: }
147  // CHECK-NEXT: }
148
149  return
150}
151
152// -----
153
154func.func @invariant_affine_nested_if() {
155  %m = memref.alloc() : memref<10xf32>
156  %cf8 = arith.constant 8.0 : f32
157  affine.for %arg0 = 0 to 10 {
158    affine.for %arg1 = 0 to 10 {
159      affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
160        %cf9 = arith.addf %cf8, %cf8 : f32
161        affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
162          %cf10 = arith.addf %cf9, %cf9 : f32
163        }
164      }
165    }
166  }
167
168  // CHECK: memref.alloc
169  // CHECK-NEXT: arith.constant
170  // CHECK-NEXT: affine.for
171  // CHECK-NEXT: }
172  // CHECK-NEXT: affine.for
173  // CHECK-NEXT: affine.if
174  // CHECK-NEXT: arith.addf
175  // CHECK-NEXT: affine.if
176  // CHECK-NEXT: arith.addf
177  // CHECK-NEXT: }
178  // CHECK-NEXT: }
179
180
181  return
182}
183
184// -----
185
186func.func @invariant_affine_nested_if_else() {
187  %m = memref.alloc() : memref<10xf32>
188  %cf8 = arith.constant 8.0 : f32
189  affine.for %arg0 = 0 to 10 {
190    affine.for %arg1 = 0 to 10 {
191      affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
192          %cf9 = arith.addf %cf8, %cf8 : f32
193          affine.store %cf9, %m[%arg0] : memref<10xf32>
194          affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
195            %cf10 = arith.addf %cf9, %cf9 : f32
196          } else {
197            affine.store %cf9, %m[%arg1] : memref<10xf32>
198          }
199      }
200    }
201  }
202
203  // CHECK: memref.alloc
204  // CHECK-NEXT: arith.constant
205  // CHECK-NEXT: affine.for
206  // CHECK-NEXT: affine.for
207  // CHECK-NEXT: affine.if
208  // CHECK-NEXT: arith.addf
209  // CHECK-NEXT: affine.store
210  // CHECK-NEXT: affine.if
211  // CHECK-NEXT: arith.addf
212  // CHECK-NEXT: } else {
213  // CHECK-NEXT: affine.store
214  // CHECK-NEXT: }
215  // CHECK-NEXT: }
216  // CHECK-NEXT: }
217
218
219  return
220}
221
222// -----
223
224func.func @invariant_loop_dialect() {
225  %ci0 = arith.constant 0 : index
226  %ci10 = arith.constant 10 : index
227  %ci1 = arith.constant 1 : index
228  %m = memref.alloc() : memref<10xf32>
229  %cf7 = arith.constant 7.0 : f32
230  %cf8 = arith.constant 8.0 : f32
231  scf.for %arg0 = %ci0 to %ci10 step %ci1 {
232    scf.for %arg1 = %ci0 to %ci10 step %ci1 {
233      %v0 = arith.addf %cf7, %cf8 : f32
234    }
235  }
236
237  // CHECK: %0 = memref.alloc() : memref<10xf32>
238  // CHECK-NEXT: %cst = arith.constant 7.000000e+00 : f32
239  // CHECK-NEXT: %cst_0 = arith.constant 8.000000e+00 : f32
240  // CHECK-NEXT: %1 = arith.addf %cst, %cst_0 : f32
241
242  return
243}
244
245// -----
246
247func.func @variant_loop_dialect() {
248  %ci0 = arith.constant 0 : index
249  %ci10 = arith.constant 10 : index
250  %ci1 = arith.constant 1 : index
251  %m = memref.alloc() : memref<10xf32>
252  scf.for %arg0 = %ci0 to %ci10 step %ci1 {
253    scf.for %arg1 = %ci0 to %ci10 step %ci1 {
254      %v0 = arith.addi %arg0, %arg1 : index
255    }
256  }
257
258  // CHECK: %0 = memref.alloc() : memref<10xf32>
259  // CHECK-NEXT: scf.for
260  // CHECK-NEXT: scf.for
261  // CHECK-NEXT: arith.addi
262
263  return
264}
265
266// -----
267
268func.func @parallel_loop_with_invariant() {
269  %c0 = arith.constant 0 : index
270  %c10 = arith.constant 10 : index
271  %c1 = arith.constant 1 : index
272  %c7 = arith.constant 7 : i32
273  %c8 = arith.constant 8 : i32
274  scf.parallel (%arg0, %arg1) = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
275      %v0 = arith.addi %c7, %c8 : i32
276      %v3 = arith.addi %arg0, %arg1 : index
277  }
278
279  // CHECK-LABEL: func @parallel_loop_with_invariant
280  // CHECK: %c0 = arith.constant 0 : index
281  // CHECK-NEXT: %c10 = arith.constant 10 : index
282  // CHECK-NEXT: %c1 = arith.constant 1 : index
283  // CHECK-NEXT: %c7_i32 = arith.constant 7 : i32
284  // CHECK-NEXT: %c8_i32 = arith.constant 8 : i32
285  // CHECK-NEXT: arith.addi %c7_i32, %c8_i32 : i32
286  // CHECK-NEXT: scf.parallel (%arg0, %arg1) = (%c0, %c0) to (%c10, %c10) step (%c1, %c1)
287  // CHECK-NEXT:   arith.addi %arg0, %arg1 : index
288  // CHECK-NEXT:   yield
289  // CHECK-NEXT: }
290  // CHECK-NEXT: return
291
292  return
293}
294
295// -----
296
297func.func private @make_val() -> (index)
298
299// CHECK-LABEL: func @nested_uses_inside
300func.func @nested_uses_inside(%lb: index, %ub: index, %step: index) {
301  %true = arith.constant true
302
303  // Check that ops that contain nested uses to values not defiend outside
304  // remain in the loop.
305  // CHECK-NEXT: arith.constant
306  // CHECK-NEXT: scf.for
307  // CHECK-NEXT:   call @
308  // CHECK-NEXT:   call @
309  // CHECK-NEXT:   scf.if
310  // CHECK-NEXT:     scf.yield
311  // CHECK-NEXT:   else
312  // CHECK-NEXT:     scf.yield
313  scf.for %i = %lb to %ub step %step {
314    %val = func.call @make_val() : () -> (index)
315    %val2 = func.call @make_val() : () -> (index)
316    %r = scf.if %true -> (index) {
317      scf.yield %val: index
318    } else {
319      scf.yield %val2: index
320    }
321  }
322  return
323}
324
325// -----
326
327// Test that two ops that feed into each other are moved without violating
328// dominance in non-graph regions.
329// CHECK-LABEL: func @invariant_subgraph
330// CHECK-SAME: %{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %[[ARG:.*]]: i32
331func.func @invariant_subgraph(%lb: index, %ub: index, %step: index, %arg: i32) {
332  // CHECK:      %[[V0:.*]] = arith.addi %[[ARG]], %[[ARG]]
333  // CHECK-NEXT: %[[V1:.*]] = arith.addi %[[ARG]], %[[V0]]
334  // CHECK-NEXT: scf.for
335  scf.for %i = %lb to %ub step %step {
336    // CHECK-NEXT: "test.sink"(%[[V1]])
337    %v0 = arith.addi %arg, %arg : i32
338    %v1 = arith.addi %arg, %v0 : i32
339    "test.sink"(%v1) : (i32) -> ()
340  }
341  return
342}
343
344// -----
345
346// Test invariant nested loop is hoisted.
347// CHECK-LABEL: func @test_invariant_nested_loop
348func.func @test_invariant_nested_loop() {
349  // CHECK: %[[C:.*]] = arith.constant
350  %0 = arith.constant 5 : i32
351  // CHECK: %[[V0:.*]] = arith.addi %[[C]], %[[C]]
352  // CHECK-NEXT: %[[V1:.*]] = arith.addi %[[V0]], %[[C]]
353  // CHECK-NEXT: test.graph_loop
354  // CHECK-NEXT: ^bb0(%[[ARG0:.*]]: i32)
355  // CHECK-NEXT: %[[V2:.*]] = arith.subi %[[ARG0]], %[[ARG0]]
356  // CHECK-NEXT: test.region_yield %[[V2]]
357  // CHECK: test.graph_loop
358  // CHECK-NEXT: test.region_yield %[[V1]]
359  test.graph_loop {
360    %1 = arith.addi %0, %0 : i32
361    %2 = arith.addi %1, %0 : i32
362    test.graph_loop {
363    ^bb0(%arg0: i32):
364      %3 = arith.subi %arg0, %arg0 : i32
365      test.region_yield %3 : i32
366    } : () -> ()
367    test.region_yield %2 : i32
368  } : () -> ()
369  return
370}
371
372
373// -----
374
375// Test ops in a graph region are hoisted.
376// CHECK-LABEL: func @test_invariants_in_graph_region
377func.func @test_invariants_in_graph_region() {
378  // CHECK: test.single_no_terminator_op
379  test.single_no_terminator_op : {
380    // CHECK-NEXT: %[[C:.*]] = arith.constant
381    // CHECK-NEXT: %[[V1:.*]] = arith.addi %[[C]], %[[C]]
382    // CHECK-NEXT: %[[V0:.*]] = arith.addi %[[C]], %[[V1]]
383    test.graph_loop {
384      %v0 = arith.addi %c0, %v1 : i32
385      %v1 = arith.addi %c0, %c0 : i32
386      %c0 = arith.constant 5 : i32
387      test.region_yield %v0 : i32
388    } : () -> ()
389  }
390  return
391}
392
393// -----
394
395// Test ops in a graph region are hoisted in topological order into non-graph
396// regions and that dominance is preserved.
397// CHECK-LABEL: func @test_invariant_backedge
398func.func @test_invariant_backedge() {
399  // CHECK-NEXT: %[[C:.*]] = arith.constant
400  // CHECK-NEXT: %[[V1:.*]] = arith.addi %[[C]], %[[C]]
401  // CHECK-NEXT: %[[V0:.*]] = arith.addi %[[C]], %[[V1]]
402  // CHECK-NEXT: test.graph_loop
403  test.graph_loop {
404    // CHECK-NEXT: test.region_yield %[[V0]]
405    %v0 = arith.addi %c0, %v1 : i32
406    %v1 = arith.addi %c0, %c0 : i32
407    %c0 = arith.constant 5 : i32
408    test.region_yield %v0 : i32
409  } : () -> ()
410  return
411}
412
413// -----
414
415// Test that cycles aren't hoisted from graph regions to non-graph regions.
416// CHECK-LABEL: func @test_invariant_cycle_not_hoisted
417func.func @test_invariant_cycle_not_hoisted() {
418  // CHECK: test.graph_loop
419  test.graph_loop {
420    // CHECK-NEXT: %[[A:.*]] = "test.a"(%[[B:.*]]) :
421    // CHECK-NEXT: %[[B]] = "test.b"(%[[A]]) :
422    // CHECK-NEXT: test.region_yield %[[A]]
423    %a = "test.a"(%b) : (i32) -> i32
424    %b = "test.b"(%a) : (i32) -> i32
425    test.region_yield %a : i32
426  } : () -> ()
427  return
428}
429