1; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
2; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s
3
4; Uses llvm.amdgcn.break
5
6define amdgpu_kernel void @break_loop(i32 %arg) #0 {
7; OPT-LABEL: @break_loop(
8; OPT-NEXT:  bb:
9; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
10; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
11; OPT-NEXT:    br label [[BB1:%.*]]
12; OPT:       bb1:
13; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP2:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
14; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
15; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
16; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
17; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
18; OPT:       bb4:
19; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
20; OPT-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[MY_TMP]], [[LOAD]]
21; OPT-NEXT:    [[TMP0:%.*]] = xor i1 [[CMP1]], true
22; OPT-NEXT:    br label [[FLOW]]
23; OPT:       Flow:
24; OPT-NEXT:    [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[BB4]] ], [ true, [[BB1]] ]
25; OPT-NEXT:    [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]])
26; OPT-NEXT:    [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]])
27; OPT-NEXT:    br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]]
28; OPT:       bb9:
29; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
30; OPT-NEXT:    ret void
31;
32; GCN-LABEL: break_loop:
33; GCN:       ; %bb.0: ; %bb
34; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
35; GCN-NEXT:    s_mov_b64 s[0:1], 0
36; GCN-NEXT:    s_mov_b32 s2, -1
37; GCN-NEXT:    s_waitcnt lgkmcnt(0)
38; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
39; GCN-NEXT:    s_mov_b32 s3, 0xf000
40; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
41; GCN-NEXT:    ; implicit-def: $sgpr6
42; GCN-NEXT:  BB0_1: ; %bb1
43; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
44; GCN-NEXT:    s_add_i32 s6, s6, 1
45; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
46; GCN-NEXT:    s_cmp_gt_i32 s6, -1
47; GCN-NEXT:    s_cbranch_scc1 BB0_3
48; GCN-NEXT:  ; %bb.2: ; %bb4
49; GCN-NEXT:    ; in Loop: Header=BB0_1 Depth=1
50; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
51; GCN-NEXT:    s_waitcnt vmcnt(0)
52; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
53; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
54; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
55; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
56; GCN-NEXT:  BB0_3: ; %Flow
57; GCN-NEXT:    ; in Loop: Header=BB0_1 Depth=1
58; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
59; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
60; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
61; GCN-NEXT:    s_cbranch_execnz BB0_1
62; GCN-NEXT:  ; %bb.4: ; %bb9
63; GCN-NEXT:    s_endpgm
64bb:
65  %id = call i32 @llvm.amdgcn.workitem.id.x()
66  %my.tmp = sub i32 %id, %arg
67  br label %bb1
68
69bb1:
70  %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
71  %lsr.iv.next = add i32 %lsr.iv, 1
72  %cmp0 = icmp slt i32 %lsr.iv.next, 0
73  br i1 %cmp0, label %bb4, label %bb9
74
75bb4:
76  %load = load volatile i32, i32 addrspace(1)* undef, align 4
77  %cmp1 = icmp slt i32 %my.tmp, %load
78  br i1 %cmp1, label %bb1, label %bb9
79
80bb9:
81  ret void
82}
83
84define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
85; OPT-LABEL: @undef_phi_cond_break_loop(
86; OPT-NEXT:  bb:
87; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
88; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
89; OPT-NEXT:    br label [[BB1:%.*]]
90; OPT:       bb1:
91; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
92; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
93; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
94; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
95; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
96; OPT:       bb4:
97; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
98; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
99; OPT-NEXT:    br label [[FLOW]]
100; OPT:       Flow:
101; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ undef, [[BB1]] ]
102; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
103; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
104; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
105; OPT:       bb9:
106; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
107; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
108; OPT-NEXT:    ret void
109;
110; GCN-LABEL: undef_phi_cond_break_loop:
111; GCN:       ; %bb.0: ; %bb
112; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
113; GCN-NEXT:    s_mov_b64 s[0:1], 0
114; GCN-NEXT:    s_mov_b32 s2, -1
115; GCN-NEXT:    s_waitcnt lgkmcnt(0)
116; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
117; GCN-NEXT:    s_mov_b32 s3, 0xf000
118; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
119; GCN-NEXT:    ; implicit-def: $sgpr6
120; GCN-NEXT:  BB1_1: ; %bb1
121; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
122; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
123; GCN-NEXT:    s_cmp_gt_i32 s6, -1
124; GCN-NEXT:    s_cbranch_scc1 BB1_3
125; GCN-NEXT:  ; %bb.2: ; %bb4
126; GCN-NEXT:    ; in Loop: Header=BB1_1 Depth=1
127; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
128; GCN-NEXT:    s_waitcnt vmcnt(0)
129; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
130; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
131; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
132; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
133; GCN-NEXT:  BB1_3: ; %Flow
134; GCN-NEXT:    ; in Loop: Header=BB1_1 Depth=1
135; GCN-NEXT:    s_add_i32 s6, s6, 1
136; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
137; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
138; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
139; GCN-NEXT:    s_cbranch_execnz BB1_1
140; GCN-NEXT:  ; %bb.4: ; %bb9
141; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
142; GCN-NEXT:    v_mov_b32_e32 v0, 7
143; GCN-NEXT:    s_mov_b32 m0, -1
144; GCN-NEXT:    ds_write_b32 v0, v0
145; GCN-NEXT:    s_endpgm
146bb:
147  %id = call i32 @llvm.amdgcn.workitem.id.x()
148  %my.tmp = sub i32 %id, %arg
149  br label %bb1
150
151bb1:                                              ; preds = %Flow, %bb
152  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
153  %lsr.iv.next = add i32 %lsr.iv, 1
154  %cmp0 = icmp slt i32 %lsr.iv.next, 0
155  br i1 %cmp0, label %bb4, label %Flow
156
157bb4:                                              ; preds = %bb1
158  %load = load volatile i32, i32 addrspace(1)* undef, align 4
159  %cmp1 = icmp sge i32 %my.tmp, %load
160  br label %Flow
161
162Flow:                                             ; preds = %bb4, %bb1
163  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
164  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
165  br i1 %my.tmp3, label %bb9, label %bb1
166
167bb9:                                              ; preds = %Flow
168  store volatile i32 7, i32 addrspace(3)* undef
169  ret void
170}
171
172; FIXME: ConstantExpr compare of address to null folds away
173@lds = addrspace(3) global i32 undef
174
175define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
176; OPT-LABEL: @constexpr_phi_cond_break_loop(
177; OPT-NEXT:  bb:
178; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
179; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
180; OPT-NEXT:    br label [[BB1:%.*]]
181; OPT:       bb1:
182; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
183; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
184; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
185; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
186; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
187; OPT:       bb4:
188; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
189; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
190; OPT-NEXT:    br label [[FLOW]]
191; OPT:       Flow:
192; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), [[BB1]] ]
193; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
194; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
195; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
196; OPT:       bb9:
197; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
198; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
199; OPT-NEXT:    ret void
200;
201; GCN-LABEL: constexpr_phi_cond_break_loop:
202; GCN:       ; %bb.0: ; %bb
203; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
204; GCN-NEXT:    s_mov_b64 s[0:1], 0
205; GCN-NEXT:    s_mov_b32 s2, -1
206; GCN-NEXT:    s_waitcnt lgkmcnt(0)
207; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
208; GCN-NEXT:    s_mov_b32 s3, 0xf000
209; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
210; GCN-NEXT:    ; implicit-def: $sgpr6
211; GCN-NEXT:  BB2_1: ; %bb1
212; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
213; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
214; GCN-NEXT:    s_cmp_gt_i32 s6, -1
215; GCN-NEXT:    s_cbranch_scc1 BB2_3
216; GCN-NEXT:  ; %bb.2: ; %bb4
217; GCN-NEXT:    ; in Loop: Header=BB2_1 Depth=1
218; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
219; GCN-NEXT:    s_waitcnt vmcnt(0)
220; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
221; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
222; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
223; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
224; GCN-NEXT:  BB2_3: ; %Flow
225; GCN-NEXT:    ; in Loop: Header=BB2_1 Depth=1
226; GCN-NEXT:    s_add_i32 s6, s6, 1
227; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
228; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
229; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
230; GCN-NEXT:    s_cbranch_execnz BB2_1
231; GCN-NEXT:  ; %bb.4: ; %bb9
232; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
233; GCN-NEXT:    v_mov_b32_e32 v0, 7
234; GCN-NEXT:    s_mov_b32 m0, -1
235; GCN-NEXT:    ds_write_b32 v0, v0
236; GCN-NEXT:    s_endpgm
237bb:
238  %id = call i32 @llvm.amdgcn.workitem.id.x()
239  %my.tmp = sub i32 %id, %arg
240  br label %bb1
241
242bb1:                                              ; preds = %Flow, %bb
243  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
244  %lsr.iv.next = add i32 %lsr.iv, 1
245  %cmp0 = icmp slt i32 %lsr.iv.next, 0
246  br i1 %cmp0, label %bb4, label %Flow
247
248bb4:                                              ; preds = %bb1
249  %load = load volatile i32, i32 addrspace(1)* undef, align 4
250  %cmp1 = icmp sge i32 %my.tmp, %load
251  br label %Flow
252
253Flow:                                             ; preds = %bb4, %bb1
254  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
255  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ]
256  br i1 %my.tmp3, label %bb9, label %bb1
257
258bb9:                                              ; preds = %Flow
259  store volatile i32 7, i32 addrspace(3)* undef
260  ret void
261}
262
263define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
264; OPT-LABEL: @true_phi_cond_break_loop(
265; OPT-NEXT:  bb:
266; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
267; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
268; OPT-NEXT:    br label [[BB1:%.*]]
269; OPT:       bb1:
270; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
271; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
272; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
273; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
274; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
275; OPT:       bb4:
276; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
277; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
278; OPT-NEXT:    br label [[FLOW]]
279; OPT:       Flow:
280; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
281; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
282; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
283; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
284; OPT:       bb9:
285; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
286; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
287; OPT-NEXT:    ret void
288;
289; GCN-LABEL: true_phi_cond_break_loop:
290; GCN:       ; %bb.0: ; %bb
291; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
292; GCN-NEXT:    s_mov_b64 s[0:1], 0
293; GCN-NEXT:    s_mov_b32 s2, -1
294; GCN-NEXT:    s_waitcnt lgkmcnt(0)
295; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
296; GCN-NEXT:    s_mov_b32 s3, 0xf000
297; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
298; GCN-NEXT:    ; implicit-def: $sgpr6
299; GCN-NEXT:  BB3_1: ; %bb1
300; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
301; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
302; GCN-NEXT:    s_cmp_gt_i32 s6, -1
303; GCN-NEXT:    s_cbranch_scc1 BB3_3
304; GCN-NEXT:  ; %bb.2: ; %bb4
305; GCN-NEXT:    ; in Loop: Header=BB3_1 Depth=1
306; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
307; GCN-NEXT:    s_waitcnt vmcnt(0)
308; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
309; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
310; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
311; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
312; GCN-NEXT:  BB3_3: ; %Flow
313; GCN-NEXT:    ; in Loop: Header=BB3_1 Depth=1
314; GCN-NEXT:    s_add_i32 s6, s6, 1
315; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
316; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
317; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
318; GCN-NEXT:    s_cbranch_execnz BB3_1
319; GCN-NEXT:  ; %bb.4: ; %bb9
320; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
321; GCN-NEXT:    v_mov_b32_e32 v0, 7
322; GCN-NEXT:    s_mov_b32 m0, -1
323; GCN-NEXT:    ds_write_b32 v0, v0
324; GCN-NEXT:    s_endpgm
325bb:
326  %id = call i32 @llvm.amdgcn.workitem.id.x()
327  %my.tmp = sub i32 %id, %arg
328  br label %bb1
329
330bb1:                                              ; preds = %Flow, %bb
331  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
332  %lsr.iv.next = add i32 %lsr.iv, 1
333  %cmp0 = icmp slt i32 %lsr.iv.next, 0
334  br i1 %cmp0, label %bb4, label %Flow
335
336bb4:                                              ; preds = %bb1
337  %load = load volatile i32, i32 addrspace(1)* undef, align 4
338  %cmp1 = icmp sge i32 %my.tmp, %load
339  br label %Flow
340
341Flow:                                             ; preds = %bb4, %bb1
342  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
343  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
344  br i1 %my.tmp3, label %bb9, label %bb1
345
346bb9:                                              ; preds = %Flow
347  store volatile i32 7, i32 addrspace(3)* undef
348  ret void
349}
350
351define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
352; OPT-LABEL: @false_phi_cond_break_loop(
353; OPT-NEXT:  bb:
354; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
355; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
356; OPT-NEXT:    br label [[BB1:%.*]]
357; OPT:       bb1:
358; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
359; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
360; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
361; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
362; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
363; OPT:       bb4:
364; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
365; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
366; OPT-NEXT:    br label [[FLOW]]
367; OPT:       Flow:
368; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ false, [[BB1]] ]
369; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
370; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
371; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
372; OPT:       bb9:
373; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
374; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
375; OPT-NEXT:    ret void
376;
377; GCN-LABEL: false_phi_cond_break_loop:
378; GCN:       ; %bb.0: ; %bb
379; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
380; GCN-NEXT:    s_mov_b64 s[0:1], 0
381; GCN-NEXT:    s_mov_b32 s2, -1
382; GCN-NEXT:    s_waitcnt lgkmcnt(0)
383; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
384; GCN-NEXT:    s_mov_b32 s3, 0xf000
385; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
386; GCN-NEXT:    ; implicit-def: $sgpr6
387; GCN-NEXT:  BB4_1: ; %bb1
388; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
389; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
390; GCN-NEXT:    s_cmp_gt_i32 s6, -1
391; GCN-NEXT:    s_cbranch_scc1 BB4_3
392; GCN-NEXT:  ; %bb.2: ; %bb4
393; GCN-NEXT:    ; in Loop: Header=BB4_1 Depth=1
394; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
395; GCN-NEXT:    s_waitcnt vmcnt(0)
396; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
397; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
398; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
399; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
400; GCN-NEXT:  BB4_3: ; %Flow
401; GCN-NEXT:    ; in Loop: Header=BB4_1 Depth=1
402; GCN-NEXT:    s_add_i32 s6, s6, 1
403; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
404; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
405; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
406; GCN-NEXT:    s_cbranch_execnz BB4_1
407; GCN-NEXT:  ; %bb.4: ; %bb9
408; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
409; GCN-NEXT:    v_mov_b32_e32 v0, 7
410; GCN-NEXT:    s_mov_b32 m0, -1
411; GCN-NEXT:    ds_write_b32 v0, v0
412; GCN-NEXT:    s_endpgm
413bb:
414  %id = call i32 @llvm.amdgcn.workitem.id.x()
415  %my.tmp = sub i32 %id, %arg
416  br label %bb1
417
418bb1:                                              ; preds = %Flow, %bb
419  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
420  %lsr.iv.next = add i32 %lsr.iv, 1
421  %cmp0 = icmp slt i32 %lsr.iv.next, 0
422  br i1 %cmp0, label %bb4, label %Flow
423
424bb4:                                              ; preds = %bb1
425  %load = load volatile i32, i32 addrspace(1)* undef, align 4
426  %cmp1 = icmp sge i32 %my.tmp, %load
427  br label %Flow
428
429Flow:                                             ; preds = %bb4, %bb1
430  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
431  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
432  br i1 %my.tmp3, label %bb9, label %bb1
433
434bb9:                                              ; preds = %Flow
435  store volatile i32 7, i32 addrspace(3)* undef
436  ret void
437}
438
439; Swap order of branches in flow block so that the true phi is
440; continue.
441
442define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
443; OPT-LABEL: @invert_true_phi_cond_break_loop(
444; OPT-NEXT:  bb:
445; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
446; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
447; OPT-NEXT:    br label [[BB1:%.*]]
448; OPT:       bb1:
449; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP1:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
450; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
451; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
452; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
453; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
454; OPT:       bb4:
455; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
456; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
457; OPT-NEXT:    br label [[FLOW]]
458; OPT:       Flow:
459; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
460; OPT-NEXT:    [[TMP0:%.*]] = xor i1 [[MY_TMP3]], true
461; OPT-NEXT:    [[TMP1]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP0]], i64 [[PHI_BROKEN]])
462; OPT-NEXT:    [[TMP2:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP1]])
463; OPT-NEXT:    br i1 [[TMP2]], label [[BB9:%.*]], label [[BB1]]
464; OPT:       bb9:
465; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP1]])
466; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
467; OPT-NEXT:    ret void
468;
469; GCN-LABEL: invert_true_phi_cond_break_loop:
470; GCN:       ; %bb.0: ; %bb
471; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
472; GCN-NEXT:    s_mov_b64 s[0:1], 0
473; GCN-NEXT:    s_mov_b32 s2, -1
474; GCN-NEXT:    s_waitcnt lgkmcnt(0)
475; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
476; GCN-NEXT:    s_mov_b32 s3, 0xf000
477; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
478; GCN-NEXT:    ; implicit-def: $sgpr6
479; GCN-NEXT:  BB5_1: ; %bb1
480; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
481; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
482; GCN-NEXT:    s_cmp_gt_i32 s6, -1
483; GCN-NEXT:    s_cbranch_scc1 BB5_3
484; GCN-NEXT:  ; %bb.2: ; %bb4
485; GCN-NEXT:    ; in Loop: Header=BB5_1 Depth=1
486; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
487; GCN-NEXT:    s_waitcnt vmcnt(0)
488; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
489; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
490; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
491; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
492; GCN-NEXT:  BB5_3: ; %Flow
493; GCN-NEXT:    ; in Loop: Header=BB5_1 Depth=1
494; GCN-NEXT:    s_xor_b64 s[8:9], s[4:5], -1
495; GCN-NEXT:    s_add_i32 s6, s6, 1
496; GCN-NEXT:    s_and_b64 s[8:9], exec, s[8:9]
497; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
498; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
499; GCN-NEXT:    s_cbranch_execnz BB5_1
500; GCN-NEXT:  ; %bb.4: ; %bb9
501; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
502; GCN-NEXT:    v_mov_b32_e32 v0, 7
503; GCN-NEXT:    s_mov_b32 m0, -1
504; GCN-NEXT:    ds_write_b32 v0, v0
505; GCN-NEXT:    s_endpgm
506bb:
507  %id = call i32 @llvm.amdgcn.workitem.id.x()
508  %my.tmp = sub i32 %id, %arg
509  br label %bb1
510
511bb1:                                              ; preds = %Flow, %bb
512  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
513  %lsr.iv.next = add i32 %lsr.iv, 1
514  %cmp0 = icmp slt i32 %lsr.iv.next, 0
515  br i1 %cmp0, label %bb4, label %Flow
516
517bb4:                                              ; preds = %bb1
518  %load = load volatile i32, i32 addrspace(1)* undef, align 4
519  %cmp1 = icmp sge i32 %my.tmp, %load
520  br label %Flow
521
522Flow:                                             ; preds = %bb4, %bb1
523  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
524  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
525  br i1 %my.tmp3, label %bb1, label %bb9
526
527bb9:                                              ; preds = %Flow
528  store volatile i32 7, i32 addrspace(3)* undef
529  ret void
530}
531
532declare i32 @llvm.amdgcn.workitem.id.x() #1
533
534attributes #0 = { nounwind }
535attributes #1 = { nounwind readnone }
536