1; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
2; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s
3
4; Uses llvm.amdgcn.break
5
6; OPT-LABEL: @break_loop(
7; OPT: bb1:
8; OPT: icmp slt i32
9; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
10
11; OPT: bb4:
12; OPT: load volatile
13; OPT: icmp slt i32
14; OPT: xor i1 %cmp1
15; OPT: br label %Flow
16
17; OPT: Flow:
18; OPT: call i64 @llvm.amdgcn.if.break.i64(
19; OPT: call i1 @llvm.amdgcn.loop.i64(i64
20; OPT: br i1 %{{[0-9]+}}, label %bb9, label %bb1
21
22; OPT: bb9:
23; OPT: call void @llvm.amdgcn.end.cf.i64(i64
24
25; GCN-LABEL: {{^}}break_loop:
26; GCN:      s_mov_b64         [[ACCUM_MASK:s\[[0-9]+:[0-9]+\]]], 0{{$}}
27
28; GCN: [[LOOP_ENTRY:BB[0-9]+_[0-9]+]]: ; %bb1
29; GCN:     s_add_i32 s6, s6, 1
30; GCN:     s_or_b64 [[INNER_MASK:s\[[0-9]+:[0-9]+\]]], [[INNER_MASK]], exec
31; GCN:     s_cmp_gt_i32 s6, -1
32; GCN:     s_cbranch_scc1   [[FLOW:BB[0-9]+_[0-9]+]]
33
34; GCN: ; %bb4
35; GCN:      buffer_load_dword
36; GCN:      v_cmp_ge_i32_e32  vcc
37; GCN:      s_andn2_b64 [[INNER_MASK]], [[INNER_MASK]], exec
38; GCN:      s_and_b64 [[BROKEN_MASK:s\[[0-9]+:[0-9]+\]]], vcc, exec
39; GCN:      s_or_b64  [[INNER_MASK]], [[INNER_MASK]], [[BROKEN_MASK]]
40
41; GCN: [[FLOW]]: ; %Flow
42; GCN:           ;   in Loop: Header=BB0_1 Depth=1
43; GCN:      s_and_b64         [[AND_MASK:s\[[0-9]+:[0-9]+\]]], exec, [[INNER_MASK]]
44; GCN-NEXT: s_or_b64          [[ACCUM_MASK]], [[AND_MASK]], [[ACCUM_MASK]]
45; GCN-NEXT: s_andn2_b64       exec, exec, [[ACCUM_MASK]]
46; GCN-NEXT: s_cbranch_execnz  [[LOOP_ENTRY]]
47
48; GCN: ; %bb.4: ; %bb9
49; GCN-NEXT: s_endpgm
50define amdgpu_kernel void @break_loop(i32 %arg) #0 {
51bb:
52  %id = call i32 @llvm.amdgcn.workitem.id.x()
53  %tmp = sub i32 %id, %arg
54  br label %bb1
55
56bb1:
57  %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
58  %lsr.iv.next = add i32 %lsr.iv, 1
59  %cmp0 = icmp slt i32 %lsr.iv.next, 0
60  br i1 %cmp0, label %bb4, label %bb9
61
62bb4:
63  %load = load volatile i32, i32 addrspace(1)* undef, align 4
64  %cmp1 = icmp slt i32 %tmp, %load
65  br i1 %cmp1, label %bb1, label %bb9
66
67bb9:
68  ret void
69}
70
71; OPT-LABEL: @undef_phi_cond_break_loop(
72; OPT: bb1:
73; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ]
74; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
75; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
76; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
77; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
78
79; OPT: bb4:
80; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
81; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
82; OPT-NEXT: br label %Flow
83
84; OPT: Flow:
85; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
86; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
87; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64(i1 %tmp3, i64 %phi.broken)
88; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0)
89; OPT-NEXT: br i1 %1, label %bb9, label %bb1
90
91; OPT: bb9:                                              ; preds = %Flow
92; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0)
93; OPT-NEXT: store volatile i32 7
94; OPT-NEXT: ret void
95define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
96bb:
97  %id = call i32 @llvm.amdgcn.workitem.id.x()
98  %tmp = sub i32 %id, %arg
99  br label %bb1
100
101bb1:                                              ; preds = %Flow, %bb
102  %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
103  %lsr.iv.next = add i32 %lsr.iv, 1
104  %cmp0 = icmp slt i32 %lsr.iv.next, 0
105  br i1 %cmp0, label %bb4, label %Flow
106
107bb4:                                              ; preds = %bb1
108  %load = load volatile i32, i32 addrspace(1)* undef, align 4
109  %cmp1 = icmp sge i32 %tmp, %load
110  br label %Flow
111
112Flow:                                             ; preds = %bb4, %bb1
113  %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
114  %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
115  br i1 %tmp3, label %bb9, label %bb1
116
117bb9:                                              ; preds = %Flow
118  store volatile i32 7, i32 addrspace(3)* undef
119  ret void
120}
121
122; FIXME: ConstantExpr compare of address to null folds away
123@lds = addrspace(3) global i32 undef
124
125; OPT-LABEL: @constexpr_phi_cond_break_loop(
126; OPT: bb1:
127; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ]
128; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
129; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
130; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
131; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
132
133; OPT: bb4:
134; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
135; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
136; OPT-NEXT: br label %Flow
137
138; OPT: Flow:
139; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
140; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ]
141; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64(i1 %tmp3, i64 %phi.broken)
142; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0)
143; OPT-NEXT: br i1 %1, label %bb9, label %bb1
144
145; OPT: bb9:                                              ; preds = %Flow
146; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0)
147; OPT-NEXT: store volatile i32 7
148; OPT-NEXT: ret void
149define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
150bb:
151  %id = call i32 @llvm.amdgcn.workitem.id.x()
152  %tmp = sub i32 %id, %arg
153  br label %bb1
154
155bb1:                                              ; preds = %Flow, %bb
156  %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
157  %lsr.iv.next = add i32 %lsr.iv, 1
158  %cmp0 = icmp slt i32 %lsr.iv.next, 0
159  br i1 %cmp0, label %bb4, label %Flow
160
161bb4:                                              ; preds = %bb1
162  %load = load volatile i32, i32 addrspace(1)* undef, align 4
163  %cmp1 = icmp sge i32 %tmp, %load
164  br label %Flow
165
166Flow:                                             ; preds = %bb4, %bb1
167  %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
168  %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ]
169  br i1 %tmp3, label %bb9, label %bb1
170
171bb9:                                              ; preds = %Flow
172  store volatile i32 7, i32 addrspace(3)* undef
173  ret void
174}
175
176; OPT-LABEL: @true_phi_cond_break_loop(
177; OPT: bb1:
178; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ]
179; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
180; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
181; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
182; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
183
184; OPT: bb4:
185; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
186; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
187; OPT-NEXT: br label %Flow
188
189; OPT: Flow:
190; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
191; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
192; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64(i1 %tmp3, i64 %phi.broken)
193; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0)
194; OPT-NEXT: br i1 %1, label %bb9, label %bb1
195
196; OPT: bb9:                                              ; preds = %Flow
197; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0)
198; OPT-NEXT: store volatile i32 7
199; OPT-NEXT: ret void
200define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
201bb:
202  %id = call i32 @llvm.amdgcn.workitem.id.x()
203  %tmp = sub i32 %id, %arg
204  br label %bb1
205
206bb1:                                              ; preds = %Flow, %bb
207  %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
208  %lsr.iv.next = add i32 %lsr.iv, 1
209  %cmp0 = icmp slt i32 %lsr.iv.next, 0
210  br i1 %cmp0, label %bb4, label %Flow
211
212bb4:                                              ; preds = %bb1
213  %load = load volatile i32, i32 addrspace(1)* undef, align 4
214  %cmp1 = icmp sge i32 %tmp, %load
215  br label %Flow
216
217Flow:                                             ; preds = %bb4, %bb1
218  %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
219  %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
220  br i1 %tmp3, label %bb9, label %bb1
221
222bb9:                                              ; preds = %Flow
223  store volatile i32 7, i32 addrspace(3)* undef
224  ret void
225}
226
227; OPT-LABEL: @false_phi_cond_break_loop(
228; OPT: bb1:
229; OPT-NEXT: %phi.broken = phi i64 [ %0, %Flow ], [ 0, %bb ]
230; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
231; OPT-NOT: call
232; OPT: br i1 %cmp0, label %bb4, label %Flow
233
234; OPT: bb4:
235; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
236; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
237; OPT-NEXT: br label %Flow
238
239; OPT: Flow:
240; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
241; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
242; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break.i64(i1 %tmp3, i64 %phi.broken)
243; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop.i64(i64 %0)
244; OPT-NEXT: br i1 %1, label %bb9, label %bb1
245
246; OPT: bb9:                                              ; preds = %Flow
247; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %0)
248; OPT-NEXT: store volatile i32 7
249; OPT-NEXT: ret void
250define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
251bb:
252  %id = call i32 @llvm.amdgcn.workitem.id.x()
253  %tmp = sub i32 %id, %arg
254  br label %bb1
255
256bb1:                                              ; preds = %Flow, %bb
257  %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
258  %lsr.iv.next = add i32 %lsr.iv, 1
259  %cmp0 = icmp slt i32 %lsr.iv.next, 0
260  br i1 %cmp0, label %bb4, label %Flow
261
262bb4:                                              ; preds = %bb1
263  %load = load volatile i32, i32 addrspace(1)* undef, align 4
264  %cmp1 = icmp sge i32 %tmp, %load
265  br label %Flow
266
267Flow:                                             ; preds = %bb4, %bb1
268  %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
269  %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
270  br i1 %tmp3, label %bb9, label %bb1
271
272bb9:                                              ; preds = %Flow
273  store volatile i32 7, i32 addrspace(3)* undef
274  ret void
275}
276
277; Swap order of branches in flow block so that the true phi is
278; continue.
279
280; OPT-LABEL: @invert_true_phi_cond_break_loop(
281; OPT: bb1:
282; OPT-NEXT: %phi.broken = phi i64 [ %1, %Flow ], [ 0, %bb ]
283; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
284; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
285; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
286; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
287
288; OPT: bb4:
289; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
290; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
291; OPT-NEXT: br label %Flow
292
293; OPT: Flow:
294; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
295; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
296; OPT-NEXT: %0 = xor i1 %tmp3, true
297; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break.i64(i1 %0, i64 %phi.broken)
298; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop.i64(i64 %1)
299; OPT-NEXT: br i1 %2, label %bb9, label %bb1
300
301; OPT: bb9:
302; OPT-NEXT: call void @llvm.amdgcn.end.cf.i64(i64 %1)
303; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef
304; OPT-NEXT: ret void
305define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
306bb:
307  %id = call i32 @llvm.amdgcn.workitem.id.x()
308  %tmp = sub i32 %id, %arg
309  br label %bb1
310
311bb1:                                              ; preds = %Flow, %bb
312  %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
313  %lsr.iv.next = add i32 %lsr.iv, 1
314  %cmp0 = icmp slt i32 %lsr.iv.next, 0
315  br i1 %cmp0, label %bb4, label %Flow
316
317bb4:                                              ; preds = %bb1
318  %load = load volatile i32, i32 addrspace(1)* undef, align 4
319  %cmp1 = icmp sge i32 %tmp, %load
320  br label %Flow
321
322Flow:                                             ; preds = %bb4, %bb1
323  %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
324  %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
325  br i1 %tmp3, label %bb1, label %bb9
326
327bb9:                                              ; preds = %Flow
328  store volatile i32 7, i32 addrspace(3)* undef
329  ret void
330}
331
332declare i32 @llvm.amdgcn.workitem.id.x() #1
333
334attributes #0 = { nounwind }
335attributes #1 = { nounwind readnone }
336