1// RUN: mlir-opt %s  -split-input-file -loop-invariant-code-motion | FileCheck %s
2
3func @nested_loops_both_having_invariant_code() {
4  %m = memref.alloc() : memref<10xf32>
5  %cf7 = arith.constant 7.0 : f32
6  %cf8 = arith.constant 8.0 : f32
7
8  affine.for %arg0 = 0 to 10 {
9    %v0 = arith.addf %cf7, %cf8 : f32
10    affine.for %arg1 = 0 to 10 {
11      %v1 = arith.addf %v0, %cf8 : f32
12      affine.store %v0, %m[%arg0] : memref<10xf32>
13    }
14  }
15
16  // CHECK: %0 = memref.alloc() : memref<10xf32>
17  // CHECK-NEXT: %[[CST0:.*]] = arith.constant 7.000000e+00 : f32
18  // CHECK-NEXT: %[[CST1:.*]] = arith.constant 8.000000e+00 : f32
19  // CHECK-NEXT: %[[ADD0:.*]] = arith.addf %[[CST0]], %[[CST1]] : f32
20  // CHECK-NEXT: arith.addf %[[ADD0]], %[[CST1]] : f32
21  // CHECK-NEXT: affine.for
22  // CHECK-NEXT: affine.for
23  // CHECK-NEXT: affine.store
24
25  return
26}
27
28// -----
29
30func @nested_loops_code_invariant_to_both() {
31  %m = memref.alloc() : memref<10xf32>
32  %cf7 = arith.constant 7.0 : f32
33  %cf8 = arith.constant 8.0 : f32
34
35  affine.for %arg0 = 0 to 10 {
36    affine.for %arg1 = 0 to 10 {
37      %v0 = arith.addf %cf7, %cf8 : f32
38    }
39  }
40
41  // CHECK: %0 = memref.alloc() : memref<10xf32>
42  // CHECK-NEXT: %cst = arith.constant 7.000000e+00 : f32
43  // CHECK-NEXT: %cst_0 = arith.constant 8.000000e+00 : f32
44  // CHECK-NEXT: %1 = arith.addf %cst, %cst_0 : f32
45
46  return
47}
48
49// -----
50
51func @single_loop_nothing_invariant() {
52  %m1 = memref.alloc() : memref<10xf32>
53  %m2 = memref.alloc() : memref<10xf32>
54  affine.for %arg0 = 0 to 10 {
55    %v0 = affine.load %m1[%arg0] : memref<10xf32>
56    %v1 = affine.load %m2[%arg0] : memref<10xf32>
57    %v2 = arith.addf %v0, %v1 : f32
58    affine.store %v2, %m1[%arg0] : memref<10xf32>
59  }
60
61  // CHECK: %0 = memref.alloc() : memref<10xf32>
62  // CHECK-NEXT: %1 = memref.alloc() : memref<10xf32>
63  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
64  // CHECK-NEXT: %2 = affine.load %0[%arg0] : memref<10xf32>
65  // CHECK-NEXT: %3 = affine.load %1[%arg0] : memref<10xf32>
66  // CHECK-NEXT: %4 = arith.addf %2, %3 : f32
67  // CHECK-NEXT: affine.store %4, %0[%arg0] : memref<10xf32>
68
69  return
70}
71
72// -----
73
74func @invariant_code_inside_affine_if() {
75  %m = memref.alloc() : memref<10xf32>
76  %cf8 = arith.constant 8.0 : f32
77
78  affine.for %arg0 = 0 to 10 {
79    %t0 = affine.apply affine_map<(d1) -> (d1 + 1)>(%arg0)
80    affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %t0) {
81        %cf9 = arith.addf %cf8, %cf8 : f32
82        affine.store %cf9, %m[%arg0] : memref<10xf32>
83
84    }
85  }
86
87  // CHECK: %0 = memref.alloc() : memref<10xf32>
88  // CHECK-NEXT: %cst = arith.constant 8.000000e+00 : f32
89  // CHECK-NEXT: affine.for %arg0 = 0 to 10 {
90  // CHECK-NEXT: %1 = affine.apply #map(%arg0)
91  // CHECK-NEXT: affine.if #set(%arg0, %1) {
92  // CHECK-NEXT: %2 = arith.addf %cst, %cst : f32
93  // CHECK-NEXT: affine.store %2, %0[%arg0] : memref<10xf32>
94  // CHECK-NEXT: }
95
96
97  return
98}
99
100// -----
101
102func @invariant_affine_if() {
103  %m = memref.alloc() : memref<10xf32>
104  %cf8 = arith.constant 8.0 : f32
105  affine.for %arg0 = 0 to 10 {
106    affine.for %arg1 = 0 to 10 {
107      affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
108          %cf9 = arith.addf %cf8, %cf8 : f32
109      }
110    }
111  }
112
113  // CHECK: %0 = memref.alloc() : memref<10xf32>
114  // CHECK-NEXT: %[[CST:.*]] = arith.constant 8.000000e+00 : f32
115  // CHECK-NEXT: affine.for %[[ARG:.*]] = 0 to 10 {
116  // CHECK-NEXT: }
117  // CHECK-NEXT: affine.for %[[ARG:.*]] = 0 to 10 {
118  // CHECK-NEXT: affine.if #set(%[[ARG]], %[[ARG]]) {
119  // CHECK-NEXT: arith.addf %[[CST]], %[[CST]] : f32
120  // CHECK-NEXT: }
121
122  return
123}
124
125// -----
126
127func @invariant_affine_if2() {
128  %m = memref.alloc() : memref<10xf32>
129  %cf8 = arith.constant 8.0 : f32
130  affine.for %arg0 = 0 to 10 {
131    affine.for %arg1 = 0 to 10 {
132      affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
133          %cf9 = arith.addf %cf8, %cf8 : f32
134          affine.store %cf9, %m[%arg1] : memref<10xf32>
135      }
136    }
137  }
138
139  // CHECK: memref.alloc
140  // CHECK-NEXT: arith.constant
141  // CHECK-NEXT: affine.for
142  // CHECK-NEXT: affine.for
143  // CHECK-NEXT: affine.if
144  // CHECK-NEXT: arith.addf
145  // CHECK-NEXT: affine.store
146  // CHECK-NEXT: }
147  // CHECK-NEXT: }
148
149  return
150}
151
152// -----
153
154func @invariant_affine_nested_if() {
155  %m = memref.alloc() : memref<10xf32>
156  %cf8 = arith.constant 8.0 : f32
157  affine.for %arg0 = 0 to 10 {
158    affine.for %arg1 = 0 to 10 {
159      affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
160          %cf9 = arith.addf %cf8, %cf8 : f32
161          affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
162            %cf10 = arith.addf %cf9, %cf9 : f32
163          }
164      }
165    }
166  }
167
168  // CHECK: memref.alloc
169  // CHECK-NEXT: arith.constant
170  // CHECK-NEXT: affine.for
171  // CHECK-NEXT: affine.for
172  // CHECK-NEXT: affine.if
173  // CHECK-NEXT: arith.addf
174  // CHECK-NEXT: affine.if
175  // CHECK-NEXT: arith.addf
176  // CHECK-NEXT: }
177  // CHECK-NEXT: }
178  // CHECK-NEXT: }
179
180
181  return
182}
183
184// -----
185
186func @invariant_affine_nested_if_else() {
187  %m = memref.alloc() : memref<10xf32>
188  %cf8 = arith.constant 8.0 : f32
189  affine.for %arg0 = 0 to 10 {
190    affine.for %arg1 = 0 to 10 {
191      affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
192          %cf9 = arith.addf %cf8, %cf8 : f32
193          affine.store %cf9, %m[%arg0] : memref<10xf32>
194          affine.if affine_set<(d0, d1) : (d1 - d0 >= 0)> (%arg0, %arg0) {
195            %cf10 = arith.addf %cf9, %cf9 : f32
196          } else {
197            affine.store %cf9, %m[%arg1] : memref<10xf32>
198          }
199      }
200    }
201  }
202
203  // CHECK: memref.alloc
204  // CHECK-NEXT: arith.constant
205  // CHECK-NEXT: affine.for
206  // CHECK-NEXT: affine.for
207  // CHECK-NEXT: affine.if
208  // CHECK-NEXT: arith.addf
209  // CHECK-NEXT: affine.store
210  // CHECK-NEXT: affine.if
211  // CHECK-NEXT: arith.addf
212  // CHECK-NEXT: } else {
213  // CHECK-NEXT: affine.store
214  // CHECK-NEXT: }
215  // CHECK-NEXT: }
216  // CHECK-NEXT: }
217
218
219  return
220}
221
222// -----
223
224func @invariant_loop_dialect() {
225  %ci0 = arith.constant 0 : index
226  %ci10 = arith.constant 10 : index
227  %ci1 = arith.constant 1 : index
228  %m = memref.alloc() : memref<10xf32>
229  %cf7 = arith.constant 7.0 : f32
230  %cf8 = arith.constant 8.0 : f32
231  scf.for %arg0 = %ci0 to %ci10 step %ci1 {
232    scf.for %arg1 = %ci0 to %ci10 step %ci1 {
233      %v0 = arith.addf %cf7, %cf8 : f32
234    }
235  }
236
237  // CHECK: %0 = memref.alloc() : memref<10xf32>
238  // CHECK-NEXT: %cst = arith.constant 7.000000e+00 : f32
239  // CHECK-NEXT: %cst_0 = arith.constant 8.000000e+00 : f32
240  // CHECK-NEXT: %1 = arith.addf %cst, %cst_0 : f32
241
242  return
243}
244
245// -----
246
247func @variant_loop_dialect() {
248  %ci0 = arith.constant 0 : index
249  %ci10 = arith.constant 10 : index
250  %ci1 = arith.constant 1 : index
251  %m = memref.alloc() : memref<10xf32>
252  scf.for %arg0 = %ci0 to %ci10 step %ci1 {
253    scf.for %arg1 = %ci0 to %ci10 step %ci1 {
254      %v0 = arith.addi %arg0, %arg1 : index
255    }
256  }
257
258  // CHECK: %0 = memref.alloc() : memref<10xf32>
259  // CHECK-NEXT: scf.for
260  // CHECK-NEXT: scf.for
261  // CHECK-NEXT: arith.addi
262
263  return
264}
265
266// -----
267
268func @parallel_loop_with_invariant() {
269  %c0 = arith.constant 0 : index
270  %c10 = arith.constant 10 : index
271  %c1 = arith.constant 1 : index
272  %c7 = arith.constant 7 : i32
273  %c8 = arith.constant 8 : i32
274  scf.parallel (%arg0, %arg1) = (%c0, %c0) to (%c10, %c10) step (%c1, %c1) {
275      %v0 = arith.addi %c7, %c8 : i32
276      %v3 = arith.addi %arg0, %arg1 : index
277  }
278
279  // CHECK-LABEL: func @parallel_loop_with_invariant
280  // CHECK: %c0 = arith.constant 0 : index
281  // CHECK-NEXT: %c10 = arith.constant 10 : index
282  // CHECK-NEXT: %c1 = arith.constant 1 : index
283  // CHECK-NEXT: %c7_i32 = arith.constant 7 : i32
284  // CHECK-NEXT: %c8_i32 = arith.constant 8 : i32
285  // CHECK-NEXT: arith.addi %c7_i32, %c8_i32 : i32
286  // CHECK-NEXT: scf.parallel (%arg0, %arg1) = (%c0, %c0) to (%c10, %c10) step (%c1, %c1)
287  // CHECK-NEXT:   arith.addi %arg0, %arg1 : index
288  // CHECK-NEXT:   yield
289  // CHECK-NEXT: }
290  // CHECK-NEXT: return
291
292  return
293}
294
295