1; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
2; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4; Uses llvm.amdgcn.break
5
6; OPT-LABEL: @break_loop(
7; OPT: bb1:
8; OPT: call i64 @llvm.amdgcn.break(i64
9; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
10
11; OPT: bb4:
12; OPT: load volatile
13; OPT: xor i1 %cmp1
14; OPT: call i64 @llvm.amdgcn.if.break(
15; OPT: br label %Flow
16
17; OPT: Flow:
18; OPT: call i1 @llvm.amdgcn.loop(i64
19; OPT: br i1 %{{[0-9]+}}, label %bb9, label %bb1
20
21; OPT: bb9:
22; OPT: call void @llvm.amdgcn.end.cf(i64
23
24; TODO: Can remove exec fixes in return block
25; GCN-LABEL: {{^}}break_loop:
26; GCN: s_mov_b64 [[INITMASK:s\[[0-9]+:[0-9]+\]]], 0{{$}}
27
28; GCN: [[LOOP_ENTRY:BB[0-9]+_[0-9]+]]: ; %bb1
29; GCN: s_or_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec, [[INITMASK]]
30; GCN: v_cmp_lt_i32_e32 vcc, -1
31; GCN: s_and_b64 vcc, exec, vcc
32; GCN-NEXT: s_cbranch_vccnz [[FLOW:BB[0-9]+_[0-9]+]]
33
34; GCN: ; BB#2: ; %bb4
35; GCN: buffer_load_dword
36; GCN: v_cmp_ge_i32_e32 vcc,
37; GCN: s_or_b64 [[MASK]], vcc, [[INITMASK]]
38
39; GCN: [[FLOW]]:
40; GCN: s_mov_b64 [[INITMASK]], [[MASK]]
41; GCN: s_andn2_b64 exec, exec, [[MASK]]
42; GCN-NEXT: s_cbranch_execnz [[LOOP_ENTRY]]
43
44; GCN: ; BB#4: ; %bb9
45; GCN-NEXT: s_or_b64 exec, exec, [[MASK]]
46; GCN-NEXT: s_endpgm
47define amdgpu_kernel void @break_loop(i32 %arg) #0 {
48bb:
49  %id = call i32 @llvm.amdgcn.workitem.id.x()
50  %tmp = sub i32 %id, %arg
51  br label %bb1
52
53bb1:
54  %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
55  %lsr.iv.next = add i32 %lsr.iv, 1
56  %cmp0 = icmp slt i32 %lsr.iv.next, 0
57  br i1 %cmp0, label %bb4, label %bb9
58
59bb4:
60  %load = load volatile i32, i32 addrspace(1)* undef, align 4
61  %cmp1 = icmp slt i32 %tmp, %load
62  br i1 %cmp1, label %bb1, label %bb9
63
64bb9:
65  ret void
66}
67
68; OPT-LABEL: @undef_phi_cond_break_loop(
69; OPT: bb1:
70; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
71; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
72; OPT: %0 = call i64 @llvm.amdgcn.if.break(i1 undef, i64 %phi.broken)
73; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
74
75; OPT: bb4:
76; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
77; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
78; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
79; OPT-NEXT: br label %Flow
80
81; OPT: Flow:
82; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ]
83; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
84; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
85; OPT-NEXT: br i1 %2, label %bb9, label %bb1
86
87; OPT: bb9:                                              ; preds = %Flow
88; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
89; OPT-NEXT: store volatile i32 7
90; OPT-NEXT: ret void
91define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
92bb:
93  %id = call i32 @llvm.amdgcn.workitem.id.x()
94  %tmp = sub i32 %id, %arg
95  br label %bb1
96
97bb1:                                              ; preds = %Flow, %bb
98  %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
99  %lsr.iv.next = add i32 %lsr.iv, 1
100  %cmp0 = icmp slt i32 %lsr.iv.next, 0
101  br i1 %cmp0, label %bb4, label %Flow
102
103bb4:                                              ; preds = %bb1
104  %load = load volatile i32, i32 addrspace(1)* undef, align 4
105  %cmp1 = icmp sge i32 %tmp, %load
106  br label %Flow
107
108Flow:                                             ; preds = %bb4, %bb1
109  %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
110  %tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
111  br i1 %tmp3, label %bb9, label %bb1
112
113bb9:                                              ; preds = %Flow
114  store volatile i32 7, i32 addrspace(3)* undef
115  ret void
116}
117
118; FIXME: ConstantExpr compare of address to null folds away
119@lds = addrspace(3) global i32 undef
120
121; OPT-LABEL: @constexpr_phi_cond_break_loop(
122; OPT: bb1:
123; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
124; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
125; OPT: %0 = call i64 @llvm.amdgcn.if.break(i1 icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), i64 %phi.broken)
126; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
127
128; OPT: bb4:
129; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
130; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
131; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
132; OPT-NEXT: br label %Flow
133
134; OPT: Flow:
135; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ]
136; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
137; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
138; OPT-NEXT: br i1 %2, label %bb9, label %bb1
139
140; OPT: bb9:                                              ; preds = %Flow
141; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
142; OPT-NEXT: store volatile i32 7
143; OPT-NEXT: ret void
144define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
145bb:
146  %id = call i32 @llvm.amdgcn.workitem.id.x()
147  %tmp = sub i32 %id, %arg
148  br label %bb1
149
150bb1:                                              ; preds = %Flow, %bb
151  %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
152  %lsr.iv.next = add i32 %lsr.iv, 1
153  %cmp0 = icmp slt i32 %lsr.iv.next, 0
154  br i1 %cmp0, label %bb4, label %Flow
155
156bb4:                                              ; preds = %bb1
157  %load = load volatile i32, i32 addrspace(1)* undef, align 4
158  %cmp1 = icmp sge i32 %tmp, %load
159  br label %Flow
160
161Flow:                                             ; preds = %bb4, %bb1
162  %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
163  %tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ]
164  br i1 %tmp3, label %bb9, label %bb1
165
166bb9:                                              ; preds = %Flow
167  store volatile i32 7, i32 addrspace(3)* undef
168  ret void
169}
170
171; OPT-LABEL: @true_phi_cond_break_loop(
172; OPT: bb1:
173; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
174; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
175; OPT: %0 = call i64 @llvm.amdgcn.break(i64 %phi.broken)
176; OPT: br i1 %cmp0, label %bb4, label %Flow
177
178; OPT: bb4:
179; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
180; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
181; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
182; OPT-NEXT: br label %Flow
183
184; OPT: Flow:
185; OPT-NEXT: %loop.phi = phi i64 [ %1, %bb4 ], [ %0, %bb1 ]
186; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
187; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
188; OPT-NEXT: br i1 %2, label %bb9, label %bb1
189
190; OPT: bb9:                                              ; preds = %Flow
191; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
192; OPT-NEXT: store volatile i32 7
193; OPT-NEXT: ret void
194define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
195bb:
196  %id = call i32 @llvm.amdgcn.workitem.id.x()
197  %tmp = sub i32 %id, %arg
198  br label %bb1
199
200bb1:                                              ; preds = %Flow, %bb
201  %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
202  %lsr.iv.next = add i32 %lsr.iv, 1
203  %cmp0 = icmp slt i32 %lsr.iv.next, 0
204  br i1 %cmp0, label %bb4, label %Flow
205
206bb4:                                              ; preds = %bb1
207  %load = load volatile i32, i32 addrspace(1)* undef, align 4
208  %cmp1 = icmp sge i32 %tmp, %load
209  br label %Flow
210
211Flow:                                             ; preds = %bb4, %bb1
212  %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
213  %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
214  br i1 %tmp3, label %bb9, label %bb1
215
216bb9:                                              ; preds = %Flow
217  store volatile i32 7, i32 addrspace(3)* undef
218  ret void
219}
220
221; OPT-LABEL: @false_phi_cond_break_loop(
222; OPT: bb1:
223; OPT-NEXT: %phi.broken = phi i64 [ %loop.phi, %Flow ], [ 0, %bb ]
224; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
225; OPT-NOT: call
226; OPT: br i1 %cmp0, label %bb4, label %Flow
227
228; OPT: bb4:
229; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
230; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
231; OPT-NEXT: %0 = call i64 @llvm.amdgcn.if.break(i1 %cmp1, i64 %phi.broken)
232; OPT-NEXT: br label %Flow
233
234; OPT: Flow:
235; OPT-NEXT: %loop.phi = phi i64 [ %0, %bb4 ], [ %phi.broken, %bb1 ]
236; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
237; OPT-NEXT: %1 = call i1 @llvm.amdgcn.loop(i64 %loop.phi)
238; OPT-NEXT: br i1 %1, label %bb9, label %bb1
239
240; OPT: bb9:                                              ; preds = %Flow
241; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %loop.phi)
242; OPT-NEXT: store volatile i32 7
243; OPT-NEXT: ret void
244define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
245bb:
246  %id = call i32 @llvm.amdgcn.workitem.id.x()
247  %tmp = sub i32 %id, %arg
248  br label %bb1
249
250bb1:                                              ; preds = %Flow, %bb
251  %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
252  %lsr.iv.next = add i32 %lsr.iv, 1
253  %cmp0 = icmp slt i32 %lsr.iv.next, 0
254  br i1 %cmp0, label %bb4, label %Flow
255
256bb4:                                              ; preds = %bb1
257  %load = load volatile i32, i32 addrspace(1)* undef, align 4
258  %cmp1 = icmp sge i32 %tmp, %load
259  br label %Flow
260
261Flow:                                             ; preds = %bb4, %bb1
262  %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
263  %tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
264  br i1 %tmp3, label %bb9, label %bb1
265
266bb9:                                              ; preds = %Flow
267  store volatile i32 7, i32 addrspace(3)* undef
268  ret void
269}
270
271; Swap order of branches in flow block so that the true phi is
272; continue.
273
274; OPT-LABEL: @invert_true_phi_cond_break_loop(
275; OPT: bb1:
276; OPT-NEXT: %phi.broken = phi i64 [ %1, %Flow ], [ 0, %bb ]
277; OPT-NEXT: %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
278; OPT-NEXT: %lsr.iv.next = add i32 %lsr.iv, 1
279; OPT-NEXT: %cmp0 = icmp slt i32 %lsr.iv.next, 0
280; OPT-NEXT: br i1 %cmp0, label %bb4, label %Flow
281
282; OPT: bb4:
283; OPT-NEXT: %load = load volatile i32, i32 addrspace(1)* undef, align 4
284; OPT-NEXT: %cmp1 = icmp sge i32 %tmp, %load
285; OPT-NEXT: br label %Flow
286
287; OPT: Flow:
288; OPT-NEXT: %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
289; OPT-NEXT: %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
290; OPT-NEXT: %0 = xor i1 %tmp3, true
291; OPT-NEXT: %1 = call i64 @llvm.amdgcn.if.break(i1 %0, i64 %phi.broken)
292; OPT-NEXT: %2 = call i1 @llvm.amdgcn.loop(i64 %1)
293; OPT-NEXT: br i1 %2, label %bb9, label %bb1
294
295; OPT: bb9:
296; OPT-NEXT: call void @llvm.amdgcn.end.cf(i64 %1)
297; OPT-NEXT: store volatile i32 7, i32 addrspace(3)* undef
298; OPT-NEXT: ret void
299define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
300bb:
301  %id = call i32 @llvm.amdgcn.workitem.id.x()
302  %tmp = sub i32 %id, %arg
303  br label %bb1
304
305bb1:                                              ; preds = %Flow, %bb
306  %lsr.iv = phi i32 [ undef, %bb ], [ %tmp2, %Flow ]
307  %lsr.iv.next = add i32 %lsr.iv, 1
308  %cmp0 = icmp slt i32 %lsr.iv.next, 0
309  br i1 %cmp0, label %bb4, label %Flow
310
311bb4:                                              ; preds = %bb1
312  %load = load volatile i32, i32 addrspace(1)* undef, align 4
313  %cmp1 = icmp sge i32 %tmp, %load
314  br label %Flow
315
316Flow:                                             ; preds = %bb4, %bb1
317  %tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
318  %tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
319  br i1 %tmp3, label %bb1, label %bb9
320
321bb9:                                              ; preds = %Flow
322  store volatile i32 7, i32 addrspace(3)* undef
323  ret void
324}
325
326declare i32 @llvm.amdgcn.workitem.id.x() #1
327
328attributes #0 = { nounwind }
329attributes #1 = { nounwind readnone }
330