1; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
2; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s
3
4; Uses llvm.amdgcn.break
5
6define amdgpu_kernel void @break_loop(i32 %arg) #0 {
7; OPT-LABEL: @break_loop(
8; OPT-NEXT:  bb:
9; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
10; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
11; OPT-NEXT:    br label [[BB1:%.*]]
12; OPT:       bb1:
13; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP2:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
14; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
15; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
16; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
17; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
18; OPT:       bb4:
19; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
20; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
21; OPT-NEXT:    br label [[FLOW]]
22; OPT:       Flow:
23; OPT-NEXT:    [[TMP1:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
24; OPT-NEXT:    [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]])
25; OPT-NEXT:    [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]])
26; OPT-NEXT:    br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]]
27; OPT:       bb9:
28; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
29; OPT-NEXT:    ret void
30;
31; GCN-LABEL: break_loop:
32; GCN:       ; %bb.0: ; %bb
33; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
34; GCN-NEXT:    s_mov_b64 s[0:1], 0
35; GCN-NEXT:    s_mov_b32 s2, -1
36; GCN-NEXT:    s_waitcnt lgkmcnt(0)
37; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
38; GCN-NEXT:    s_mov_b32 s3, 0xf000
39; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
40; GCN-NEXT:    ; implicit-def: $sgpr6
41; GCN-NEXT:  .LBB0_1: ; %bb1
42; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
43; GCN-NEXT:    s_add_i32 s6, s6, 1
44; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
45; GCN-NEXT:    s_cmp_gt_i32 s6, -1
46; GCN-NEXT:    s_cbranch_scc1 .LBB0_3
47; GCN-NEXT:  ; %bb.2: ; %bb4
48; GCN-NEXT:    ; in Loop: Header=BB0_1 Depth=1
49; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
50; GCN-NEXT:    s_waitcnt vmcnt(0)
51; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
52; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
53; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
54; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
55; GCN-NEXT:  .LBB0_3: ; %Flow
56; GCN-NEXT:    ; in Loop: Header=BB0_1 Depth=1
57; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
58; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
59; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
60; GCN-NEXT:    s_cbranch_execnz .LBB0_1
61; GCN-NEXT:  ; %bb.4: ; %bb9
62; GCN-NEXT:    s_endpgm
63bb:
64  %id = call i32 @llvm.amdgcn.workitem.id.x()
65  %my.tmp = sub i32 %id, %arg
66  br label %bb1
67
68bb1:
69  %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
70  %lsr.iv.next = add i32 %lsr.iv, 1
71  %cmp0 = icmp slt i32 %lsr.iv.next, 0
72  br i1 %cmp0, label %bb4, label %bb9
73
74bb4:
75  %load = load volatile i32, i32 addrspace(1)* undef, align 4
76  %cmp1 = icmp slt i32 %my.tmp, %load
77  br i1 %cmp1, label %bb1, label %bb9
78
79bb9:
80  ret void
81}
82
83define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
84; OPT-LABEL: @undef_phi_cond_break_loop(
85; OPT-NEXT:  bb:
86; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
87; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
88; OPT-NEXT:    br label [[BB1:%.*]]
89; OPT:       bb1:
90; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
91; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
92; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
93; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
94; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
95; OPT:       bb4:
96; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
97; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
98; OPT-NEXT:    br label [[FLOW]]
99; OPT:       Flow:
100; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ undef, [[BB1]] ]
101; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
102; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
103; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
104; OPT:       bb9:
105; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
106; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
107; OPT-NEXT:    ret void
108;
109; GCN-LABEL: undef_phi_cond_break_loop:
110; GCN:       ; %bb.0: ; %bb
111; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
112; GCN-NEXT:    s_mov_b64 s[0:1], 0
113; GCN-NEXT:    s_mov_b32 s2, -1
114; GCN-NEXT:    s_waitcnt lgkmcnt(0)
115; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
116; GCN-NEXT:    s_mov_b32 s3, 0xf000
117; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
118; GCN-NEXT:    ; implicit-def: $sgpr6
119; GCN-NEXT:  .LBB1_1: ; %bb1
120; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
121; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
122; GCN-NEXT:    s_cmp_gt_i32 s6, -1
123; GCN-NEXT:    s_cbranch_scc1 .LBB1_3
124; GCN-NEXT:  ; %bb.2: ; %bb4
125; GCN-NEXT:    ; in Loop: Header=BB1_1 Depth=1
126; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
127; GCN-NEXT:    s_waitcnt vmcnt(0)
128; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
129; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
130; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
131; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
132; GCN-NEXT:  .LBB1_3: ; %Flow
133; GCN-NEXT:    ; in Loop: Header=BB1_1 Depth=1
134; GCN-NEXT:    s_add_i32 s6, s6, 1
135; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
136; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
137; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
138; GCN-NEXT:    s_cbranch_execnz .LBB1_1
139; GCN-NEXT:  ; %bb.4: ; %bb9
140; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
141; GCN-NEXT:    v_mov_b32_e32 v0, 7
142; GCN-NEXT:    s_mov_b32 m0, -1
143; GCN-NEXT:    ds_write_b32 v0, v0
144; GCN-NEXT:    s_endpgm
145bb:
146  %id = call i32 @llvm.amdgcn.workitem.id.x()
147  %my.tmp = sub i32 %id, %arg
148  br label %bb1
149
150bb1:                                              ; preds = %Flow, %bb
151  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
152  %lsr.iv.next = add i32 %lsr.iv, 1
153  %cmp0 = icmp slt i32 %lsr.iv.next, 0
154  br i1 %cmp0, label %bb4, label %Flow
155
156bb4:                                              ; preds = %bb1
157  %load = load volatile i32, i32 addrspace(1)* undef, align 4
158  %cmp1 = icmp sge i32 %my.tmp, %load
159  br label %Flow
160
161Flow:                                             ; preds = %bb4, %bb1
162  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
163  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
164  br i1 %my.tmp3, label %bb9, label %bb1
165
166bb9:                                              ; preds = %Flow
167  store volatile i32 7, i32 addrspace(3)* undef
168  ret void
169}
170
171; FIXME: ConstantExpr compare of address to null folds away
172@lds = addrspace(3) global i32 undef
173
174define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
175; OPT-LABEL: @constexpr_phi_cond_break_loop(
176; OPT-NEXT:  bb:
177; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
178; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
179; OPT-NEXT:    br label [[BB1:%.*]]
180; OPT:       bb1:
181; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
182; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
183; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
184; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
185; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
186; OPT:       bb4:
187; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
188; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
189; OPT-NEXT:    br label [[FLOW]]
190; OPT:       Flow:
191; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), [[BB1]] ]
192; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
193; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
194; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
195; OPT:       bb9:
196; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
197; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
198; OPT-NEXT:    ret void
199;
200; GCN-LABEL: constexpr_phi_cond_break_loop:
201; GCN:       ; %bb.0: ; %bb
202; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
203; GCN-NEXT:    s_mov_b64 s[0:1], 0
204; GCN-NEXT:    s_mov_b32 s2, -1
205; GCN-NEXT:    s_waitcnt lgkmcnt(0)
206; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
207; GCN-NEXT:    s_mov_b32 s3, 0xf000
208; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
209; GCN-NEXT:    ; implicit-def: $sgpr6
210; GCN-NEXT:  .LBB2_1: ; %bb1
211; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
212; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
213; GCN-NEXT:    s_cmp_gt_i32 s6, -1
214; GCN-NEXT:    s_cbranch_scc1 .LBB2_3
215; GCN-NEXT:  ; %bb.2: ; %bb4
216; GCN-NEXT:    ; in Loop: Header=BB2_1 Depth=1
217; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
218; GCN-NEXT:    s_waitcnt vmcnt(0)
219; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
220; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
221; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
222; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
223; GCN-NEXT:  .LBB2_3: ; %Flow
224; GCN-NEXT:    ; in Loop: Header=BB2_1 Depth=1
225; GCN-NEXT:    s_add_i32 s6, s6, 1
226; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
227; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
228; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
229; GCN-NEXT:    s_cbranch_execnz .LBB2_1
230; GCN-NEXT:  ; %bb.4: ; %bb9
231; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
232; GCN-NEXT:    v_mov_b32_e32 v0, 7
233; GCN-NEXT:    s_mov_b32 m0, -1
234; GCN-NEXT:    ds_write_b32 v0, v0
235; GCN-NEXT:    s_endpgm
236bb:
237  %id = call i32 @llvm.amdgcn.workitem.id.x()
238  %my.tmp = sub i32 %id, %arg
239  br label %bb1
240
241bb1:                                              ; preds = %Flow, %bb
242  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
243  %lsr.iv.next = add i32 %lsr.iv, 1
244  %cmp0 = icmp slt i32 %lsr.iv.next, 0
245  br i1 %cmp0, label %bb4, label %Flow
246
247bb4:                                              ; preds = %bb1
248  %load = load volatile i32, i32 addrspace(1)* undef, align 4
249  %cmp1 = icmp sge i32 %my.tmp, %load
250  br label %Flow
251
252Flow:                                             ; preds = %bb4, %bb1
253  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
254  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ]
255  br i1 %my.tmp3, label %bb9, label %bb1
256
257bb9:                                              ; preds = %Flow
258  store volatile i32 7, i32 addrspace(3)* undef
259  ret void
260}
261
262define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
263; OPT-LABEL: @true_phi_cond_break_loop(
264; OPT-NEXT:  bb:
265; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
266; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
267; OPT-NEXT:    br label [[BB1:%.*]]
268; OPT:       bb1:
269; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
270; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
271; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
272; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
273; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
274; OPT:       bb4:
275; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
276; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
277; OPT-NEXT:    br label [[FLOW]]
278; OPT:       Flow:
279; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
280; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
281; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
282; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
283; OPT:       bb9:
284; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
285; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
286; OPT-NEXT:    ret void
287;
288; GCN-LABEL: true_phi_cond_break_loop:
289; GCN:       ; %bb.0: ; %bb
290; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
291; GCN-NEXT:    s_mov_b64 s[0:1], 0
292; GCN-NEXT:    s_mov_b32 s2, -1
293; GCN-NEXT:    s_waitcnt lgkmcnt(0)
294; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
295; GCN-NEXT:    s_mov_b32 s3, 0xf000
296; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
297; GCN-NEXT:    ; implicit-def: $sgpr6
298; GCN-NEXT:  .LBB3_1: ; %bb1
299; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
300; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
301; GCN-NEXT:    s_cmp_gt_i32 s6, -1
302; GCN-NEXT:    s_cbranch_scc1 .LBB3_3
303; GCN-NEXT:  ; %bb.2: ; %bb4
304; GCN-NEXT:    ; in Loop: Header=BB3_1 Depth=1
305; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
306; GCN-NEXT:    s_waitcnt vmcnt(0)
307; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
308; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
309; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
310; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
311; GCN-NEXT:  .LBB3_3: ; %Flow
312; GCN-NEXT:    ; in Loop: Header=BB3_1 Depth=1
313; GCN-NEXT:    s_add_i32 s6, s6, 1
314; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
315; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
316; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
317; GCN-NEXT:    s_cbranch_execnz .LBB3_1
318; GCN-NEXT:  ; %bb.4: ; %bb9
319; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
320; GCN-NEXT:    v_mov_b32_e32 v0, 7
321; GCN-NEXT:    s_mov_b32 m0, -1
322; GCN-NEXT:    ds_write_b32 v0, v0
323; GCN-NEXT:    s_endpgm
324bb:
325  %id = call i32 @llvm.amdgcn.workitem.id.x()
326  %my.tmp = sub i32 %id, %arg
327  br label %bb1
328
329bb1:                                              ; preds = %Flow, %bb
330  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
331  %lsr.iv.next = add i32 %lsr.iv, 1
332  %cmp0 = icmp slt i32 %lsr.iv.next, 0
333  br i1 %cmp0, label %bb4, label %Flow
334
335bb4:                                              ; preds = %bb1
336  %load = load volatile i32, i32 addrspace(1)* undef, align 4
337  %cmp1 = icmp sge i32 %my.tmp, %load
338  br label %Flow
339
340Flow:                                             ; preds = %bb4, %bb1
341  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
342  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
343  br i1 %my.tmp3, label %bb9, label %bb1
344
345bb9:                                              ; preds = %Flow
346  store volatile i32 7, i32 addrspace(3)* undef
347  ret void
348}
349
350define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
351; OPT-LABEL: @false_phi_cond_break_loop(
352; OPT-NEXT:  bb:
353; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
354; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
355; OPT-NEXT:    br label [[BB1:%.*]]
356; OPT:       bb1:
357; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
358; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
359; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
360; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
361; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
362; OPT:       bb4:
363; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
364; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
365; OPT-NEXT:    br label [[FLOW]]
366; OPT:       Flow:
367; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ false, [[BB1]] ]
368; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
369; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
370; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
371; OPT:       bb9:
372; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
373; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
374; OPT-NEXT:    ret void
375;
376; GCN-LABEL: false_phi_cond_break_loop:
377; GCN:       ; %bb.0: ; %bb
378; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
379; GCN-NEXT:    s_mov_b64 s[0:1], 0
380; GCN-NEXT:    s_mov_b32 s2, -1
381; GCN-NEXT:    s_waitcnt lgkmcnt(0)
382; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
383; GCN-NEXT:    s_mov_b32 s3, 0xf000
384; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
385; GCN-NEXT:    ; implicit-def: $sgpr6
386; GCN-NEXT:  .LBB4_1: ; %bb1
387; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
388; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
389; GCN-NEXT:    s_cmp_gt_i32 s6, -1
390; GCN-NEXT:    s_cbranch_scc1 .LBB4_3
391; GCN-NEXT:  ; %bb.2: ; %bb4
392; GCN-NEXT:    ; in Loop: Header=BB4_1 Depth=1
393; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
394; GCN-NEXT:    s_waitcnt vmcnt(0)
395; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
396; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
397; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
398; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
399; GCN-NEXT:  .LBB4_3: ; %Flow
400; GCN-NEXT:    ; in Loop: Header=BB4_1 Depth=1
401; GCN-NEXT:    s_add_i32 s6, s6, 1
402; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
403; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
404; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
405; GCN-NEXT:    s_cbranch_execnz .LBB4_1
406; GCN-NEXT:  ; %bb.4: ; %bb9
407; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
408; GCN-NEXT:    v_mov_b32_e32 v0, 7
409; GCN-NEXT:    s_mov_b32 m0, -1
410; GCN-NEXT:    ds_write_b32 v0, v0
411; GCN-NEXT:    s_endpgm
412bb:
413  %id = call i32 @llvm.amdgcn.workitem.id.x()
414  %my.tmp = sub i32 %id, %arg
415  br label %bb1
416
417bb1:                                              ; preds = %Flow, %bb
418  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
419  %lsr.iv.next = add i32 %lsr.iv, 1
420  %cmp0 = icmp slt i32 %lsr.iv.next, 0
421  br i1 %cmp0, label %bb4, label %Flow
422
423bb4:                                              ; preds = %bb1
424  %load = load volatile i32, i32 addrspace(1)* undef, align 4
425  %cmp1 = icmp sge i32 %my.tmp, %load
426  br label %Flow
427
428Flow:                                             ; preds = %bb4, %bb1
429  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
430  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
431  br i1 %my.tmp3, label %bb9, label %bb1
432
433bb9:                                              ; preds = %Flow
434  store volatile i32 7, i32 addrspace(3)* undef
435  ret void
436}
437
438; Swap order of branches in flow block so that the true phi is
439; continue.
440
441define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
442; OPT-LABEL: @invert_true_phi_cond_break_loop(
443; OPT-NEXT:  bb:
444; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
445; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
446; OPT-NEXT:    br label [[BB1:%.*]]
447; OPT:       bb1:
448; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP1:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
449; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
450; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
451; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
452; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
453; OPT:       bb4:
454; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
455; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
456; OPT-NEXT:    br label [[FLOW]]
457; OPT:       Flow:
458; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
459; OPT-NEXT:    [[TMP0:%.*]] = xor i1 [[MY_TMP3]], true
460; OPT-NEXT:    [[TMP1]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP0]], i64 [[PHI_BROKEN]])
461; OPT-NEXT:    [[TMP2:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP1]])
462; OPT-NEXT:    br i1 [[TMP2]], label [[BB9:%.*]], label [[BB1]]
463; OPT:       bb9:
464; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP1]])
465; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
466; OPT-NEXT:    ret void
467;
468; GCN-LABEL: invert_true_phi_cond_break_loop:
469; GCN:       ; %bb.0: ; %bb
470; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
471; GCN-NEXT:    s_mov_b64 s[0:1], 0
472; GCN-NEXT:    s_mov_b32 s2, -1
473; GCN-NEXT:    s_waitcnt lgkmcnt(0)
474; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
475; GCN-NEXT:    s_mov_b32 s3, 0xf000
476; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
477; GCN-NEXT:    ; implicit-def: $sgpr6
478; GCN-NEXT:  .LBB5_1: ; %bb1
479; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
480; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
481; GCN-NEXT:    s_cmp_gt_i32 s6, -1
482; GCN-NEXT:    s_cbranch_scc1 .LBB5_3
483; GCN-NEXT:  ; %bb.2: ; %bb4
484; GCN-NEXT:    ; in Loop: Header=BB5_1 Depth=1
485; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
486; GCN-NEXT:    s_waitcnt vmcnt(0)
487; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
488; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
489; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
490; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
491; GCN-NEXT:  .LBB5_3: ; %Flow
492; GCN-NEXT:    ; in Loop: Header=BB5_1 Depth=1
493; GCN-NEXT:    s_xor_b64 s[8:9], s[4:5], -1
494; GCN-NEXT:    s_add_i32 s6, s6, 1
495; GCN-NEXT:    s_and_b64 s[8:9], exec, s[8:9]
496; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
497; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
498; GCN-NEXT:    s_cbranch_execnz .LBB5_1
499; GCN-NEXT:  ; %bb.4: ; %bb9
500; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
501; GCN-NEXT:    v_mov_b32_e32 v0, 7
502; GCN-NEXT:    s_mov_b32 m0, -1
503; GCN-NEXT:    ds_write_b32 v0, v0
504; GCN-NEXT:    s_endpgm
505bb:
506  %id = call i32 @llvm.amdgcn.workitem.id.x()
507  %my.tmp = sub i32 %id, %arg
508  br label %bb1
509
510bb1:                                              ; preds = %Flow, %bb
511  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
512  %lsr.iv.next = add i32 %lsr.iv, 1
513  %cmp0 = icmp slt i32 %lsr.iv.next, 0
514  br i1 %cmp0, label %bb4, label %Flow
515
516bb4:                                              ; preds = %bb1
517  %load = load volatile i32, i32 addrspace(1)* undef, align 4
518  %cmp1 = icmp sge i32 %my.tmp, %load
519  br label %Flow
520
521Flow:                                             ; preds = %bb4, %bb1
522  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
523  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
524  br i1 %my.tmp3, label %bb1, label %bb9
525
526bb9:                                              ; preds = %Flow
527  store volatile i32 7, i32 addrspace(3)* undef
528  ret void
529}
530
531declare i32 @llvm.amdgcn.workitem.id.x() #1
532
533attributes #0 = { nounwind }
534attributes #1 = { nounwind readnone }
535