1; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
2; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s
3
4; Uses llvm.amdgcn.break
5
6; OPT-LABEL: @break_loop(
7; OPT: bb1:
8; OPT: icmp slt i32
9; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
10
11; OPT: bb4:
12; OPT: load volatile
13; OPT: icmp slt i32
14; OPT: xor i1 %cmp1
15; OPT: br label %Flow
16
17; OPT: Flow:
18; OPT: call i64 @llvm.amdgcn.if.break.i64.i64(
19; OPT: call i1 @llvm.amdgcn.loop.i64(i64
20; OPT: br i1 %{{[0-9]+}}, label %bb9, label %bb1
21
22; OPT: bb9:
23; OPT: call void @llvm.amdgcn.end.cf.i64(i64
24
25; GCN-LABEL: {{^}}break_loop:
26; GCN:      s_mov_b64         [[ACCUM_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}}
27
28; GCN: [[LOOP_ENTRY:BB[0-9]+_[0-9]+]]: ; %bb1
29; GCN:     s_add_i32 s4, s4, 1
30; GCN:     s_or_b64 [[INNER_MASK:s\[[0-9]+:[0-9]+\]]], [[INNER_MASK]], exec
31; GCN:     s_cmp_gt_i32 s4, -1
32; GCN:     s_cbranch_scc1   [[FLOW:BB[0-9]+_[0-9]+]]
33
34; GCN: ; %bb4
35; GCN:      buffer_load_dword
36; GCN:      v_cmp_ge_i32_e32  vcc
37; GCN:      s_andn2_b64 [[INNER_MASK]], [[INNER_MASK]], exec
38; GCN:      s_and_b64 [[BROKEN_MASK:s\[[0-9]+:[0-9]+\]]], vcc, exec
39; GCN:      s_or_b64  [[INNER_MASK]], [[INNER_MASK]], [[BROKEN_MASK]]
40
41; GCN: [[FLOW]]: ; %Flow
42; GCN:           ;   in Loop: Header=BB0_1 Depth=1
43; GCN:      s_and_b64         [[BROKEN_MASK]], exec, [[INNER_MASK]]
44; GCN:      s_or_b64          [[BROKEN_MASK]], [[BROKEN_MASK]], [[ACCUM_MASK]]
45; GCN:      s_mov_b64         [[ACCUM_MASK]], [[BROKEN_MASK]]
46; GCN:      s_andn2_b64       exec, exec, [[BROKEN_MASK]]
47; GCN-NEXT: s_cbranch_execnz  [[LOOP_ENTRY]]
48
49; GCN: ; %bb.4: ; %bb9
50; GCN-NEXT: s_endpgm
51define amdgpu_kernel void @break_loop(i32 %arg) #0 {
52bb:
53  %id = call i32 @llvm.amdgcn.workitem.id.x()
54  %tmp = sub i32 %id, %arg
55  br label %bb1
56
57bb1:
58  %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
59  %lsr.iv.next = add i32 %lsr.iv, 1
60  %cmp0 = icmp slt i32 %lsr.iv.next, 0
61  br i1 %cmp0, label %bb4, label %bb9
62
63bb4:
64  %load = load volatile i32, i32 addrspace(1)* undef, align 4
65  %cmp1 = icmp slt i32 %tmp, %load
66  br i1 %cmp1, label %bb1, label %bb9
67
68bb9:
69  ret void
70}
71
72; OPT-LABEL: @undef_phi_cond_break_loop(
73; OPT: bb1:
74; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ]
75; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
76; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
77; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
78; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
79
80; OPT: bb4:
81; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
82; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
83; OPT-NEXT: br label %Flow
84
85; OPT: Flow:
86; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
87; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
88; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken)
89; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0)
90; OPT-NEXT: br i1 %1, label %bb9, label %bb1
91
92; OPT: bb9:                                              ; preds = %Flow
93; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0)
94; OPT-NEXT: store volatile i32 7
95; OPT-NEXT: ret void
96define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
97bb:
98  %id = call i32 @llvm.amdgcn.workitem.id.x()
99  %tmp = sub i32 %id, %arg
100  br label %bb1
101
102bb1:                                              ; preds = %Flow, %bb
103  %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
104  %lsr.iv.next = add i32 %lsr.iv, 1
105  %cmp0 = icmp slt i32 %lsr.iv.next, 0
106  br i1 %cmp0, label %bb4, label %Flow
107
108bb4:                                              ; preds = %bb1
109  %load = load volatile i32, i32 addrspace(1)* undef, align 4
110  %cmp1 = icmp sge i32 %tmp, %load
111  br label %Flow
112
113Flow:                                             ; preds = %bb4, %bb1
114  %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
115  %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
116  br i1 %tmp3, label %bb9, label %bb1
117
118bb9:                                              ; preds = %Flow
119  store volatile i32 7, i32 addrspace(3)* undef
120  ret void
121}
122
123; FIXME: ConstantExpr compare of address to null folds away
124@lds = addrspace(3) global i32 undef
125
126; OPT-LABEL: @constexpr_phi_cond_break_loop(
127; OPT: bb1:
128; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ]
129; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
130; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
131; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
132; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
133
134; OPT: bb4:
135; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
136; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
137; OPT-NEXT: br label %Flow
138
139; OPT: Flow:
140; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
141; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ]
142; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken)
143; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0)
144; OPT-NEXT: br i1 %1, label %bb9, label %bb1
145
146; OPT: bb9:                                              ; preds = %Flow
147; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0)
148; OPT-NEXT: store volatile i32 7
149; OPT-NEXT: ret void
150define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
151bb:
152  %id = call i32 @llvm.amdgcn.workitem.id.x()
153  %tmp = sub i32 %id, %arg
154  br label %bb1
155
156bb1:                                              ; preds = %Flow, %bb
157  %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
158  %lsr.iv.next = add i32 %lsr.iv, 1
159  %cmp0 = icmp slt i32 %lsr.iv.next, 0
160  br i1 %cmp0, label %bb4, label %Flow
161
162bb4:                                              ; preds = %bb1
163  %load = load volatile i32, i32 addrspace(1)* undef, align 4
164  %cmp1 = icmp sge i32 %tmp, %load
165  br label %Flow
166
167Flow:                                             ; preds = %bb4, %bb1
168  %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
169  %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ]
170  br i1 %tmp3, label %bb9, label %bb1
171
172bb9:                                              ; preds = %Flow
173  store volatile i32 7, i32 addrspace(3)* undef
174  ret void
175}
176
177; OPT-LABEL: @true_phi_cond_break_loop(
178; OPT: bb1:
179; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ]
180; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
181; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
182; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
183; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
184
185; OPT: bb4:
186; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
187; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
188; OPT-NEXT: br label %Flow
189
190; OPT: Flow:
191; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
192; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
193; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken)
194; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0)
195; OPT-NEXT: br i1 %1, label %bb9, label %bb1
196
197; OPT: bb9:                                              ; preds = %Flow
198; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0)
199; OPT-NEXT: store volatile i32 7
200; OPT-NEXT: ret void
201define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
202bb:
203  %id = call i32 @llvm.amdgcn.workitem.id.x()
204  %tmp = sub i32 %id, %arg
205  br label %bb1
206
207bb1:                                              ; preds = %Flow, %bb
208  %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
209  %lsr.iv.next = add i32 %lsr.iv, 1
210  %cmp0 = icmp slt i32 %lsr.iv.next, 0
211  br i1 %cmp0, label %bb4, label %Flow
212
213bb4:                                              ; preds = %bb1
214  %load = load volatile i32, i32 addrspace(1)* undef, align 4
215  %cmp1 = icmp sge i32 %tmp, %load
216  br label %Flow
217
218Flow:                                             ; preds = %bb4, %bb1
219  %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
220  %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
221  br i1 %tmp3, label %bb9, label %bb1
222
223bb9:                                              ; preds = %Flow
224  store volatile i32 7, i32 addrspace(3)* undef
225  ret void
226}
227
228; OPT-LABEL: @false_phi_cond_break_loop(
229; OPT: bb1:
230; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ]
231; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
232; OPT-NOT: call
233; OPT: br i1 %cmp0, label %bb4, label %Flow
234
235; OPT: bb4:
236; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
237; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
238; OPT-NEXT: br label %Flow
239
240; OPT: Flow:
241; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
242; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
243; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %tmp3, i64 %phi.broken)
244; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0)
245; OPT-NEXT: br i1 %1, label %bb9, label %bb1
246
247; OPT: bb9:                                              ; preds = %Flow
248; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0)
249; OPT-NEXT: store volatile i32 7
250; OPT-NEXT: ret void
251define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
252bb:
253  %id = call i32 @llvm.amdgcn.workitem.id.x()
254  %tmp = sub i32 %id, %arg
255  br label %bb1
256
257bb1:                                              ; preds = %Flow, %bb
258  %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
259  %lsr.iv.next = add i32 %lsr.iv, 1
260  %cmp0 = icmp slt i32 %lsr.iv.next, 0
261  br i1 %cmp0, label %bb4, label %Flow
262
263bb4:                                              ; preds = %bb1
264  %load = load volatile i32, i32 addrspace(1)* undef, align 4
265  %cmp1 = icmp sge i32 %tmp, %load
266  br label %Flow
267
268Flow:                                             ; preds = %bb4, %bb1
269  %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
270  %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
271  br i1 %tmp3, label %bb9, label %bb1
272
273bb9:                                              ; preds = %Flow
274  store volatile i32 7, i32 addrspace(3)* undef
275  ret void
276}
277
278; Swap order of branches in flow block so that the true phi is
279; continue.
280
281; OPT-LABEL: @invert_true_phi_cond_break_loop(
282; OPT: bb1:
283; OPT-NEXT: %phi.broken = phi i64 [ %1, %Flow ], [ 0, %bb ]
284; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
285; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
286; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
287; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
288
289; OPT: bb4:
290; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
291; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
292; OPT-NEXT: br label %Flow
293
294; OPT: Flow:
295; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
296; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
297; OPT-NEXT: %0 = xor i1 %tmp3, true
298; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break.i64.i64(i1 %0, i64 %phi.broken)
299; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop.i64(i64 %1)
300; OPT-NEXT: br i1 %2, label %bb9, label %bb1
301
302; OPT: bb9:
303; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %1)
304; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef
305; OPT-NEXT: ret void
306define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
307bb:
308  %id = call i32 @llvm.amdgcn.workitem.id.x()
309  %tmp = sub i32 %id, %arg
310  br label %bb1
311
312bb1:                                              ; preds = %Flow, %bb
313  %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
314  %lsr.iv.next = add i32 %lsr.iv, 1
315  %cmp0 = icmp slt i32 %lsr.iv.next, 0
316  br i1 %cmp0, label %bb4, label %Flow
317
318bb4:                                              ; preds = %bb1
319  %load = load volatile i32, i32 addrspace(1)* undef, align 4
320  %cmp1 = icmp sge i32 %tmp, %load
321  br label %Flow
322
323Flow:                                             ; preds = %bb4, %bb1
324  %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
325  %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
326  br i1 %tmp3, label %bb1, label %bb9
327
328bb9:                                              ; preds = %Flow
329  store volatile i32 7, i32 addrspace(3)* undef
330  ret void
331}
332
333declare i32 @llvm.amdgcn.workitem.id.x() #1
334
335attributes #0 = { nounwind }
336attributes #1 = { nounwind readnone }
337