1; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
2; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s
3
4; Uses llvm.amdgcn.break
5
6define amdgpu_kernel void @break_loop(i32 %arg) #0 {
7; OPT-LABEL: @break_loop(
8; OPT-NEXT:  bb:
9; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
10; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
11; OPT-NEXT:    br label [[BB1:%.*]]
12; OPT:       bb1:
13; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP2:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
14; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
15; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
16; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
17; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
18; OPT:       bb4:
19; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
20; OPT-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[MY_TMP]], [[LOAD]]
21; OPT-NEXT:    [[TMP0:%.*]] = xor i1 [[CMP1]], true
22; OPT-NEXT:    br label [[FLOW]]
23; OPT:       Flow:
24; OPT-NEXT:    [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[BB4]] ], [ true, [[BB1]] ]
25; OPT-NEXT:    [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]])
26; OPT-NEXT:    [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]])
27; OPT-NEXT:    br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]]
28; OPT:       bb9:
29; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
30; OPT-NEXT:    ret void
31;
32; GCN-LABEL: break_loop:
33; GCN:       ; %bb.0: ; %bb
34; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
35; GCN-NEXT:    s_mov_b64 s[0:1], 0
36; GCN-NEXT:    s_mov_b32 s2, -1
37; GCN-NEXT:    s_waitcnt lgkmcnt(0)
38; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
39; GCN-NEXT:    s_mov_b32 s3, 0xf000
40; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
41; GCN-NEXT:    ; implicit-def: $sgpr6
42; GCN-NEXT:  BB0_1: ; %bb1
43; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
44; GCN-NEXT:    s_add_i32 s6, s6, 1
45; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
46; GCN-NEXT:    s_cmp_gt_i32 s6, -1
47; GCN-NEXT:    s_cbranch_scc1 BB0_3
48; GCN-NEXT:  ; %bb.2: ; %bb4
49; GCN-NEXT:    ; in Loop: Header=BB0_1 Depth=1
50; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
51; GCN-NEXT:    s_waitcnt vmcnt(0)
52; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
53; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
54; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
55; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
56; GCN-NEXT:  BB0_3: ; %Flow
57; GCN-NEXT:    ; in Loop: Header=BB0_1 Depth=1
58; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
59; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
60; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
61; GCN-NEXT:    s_cbranch_execnz BB0_1
62; GCN-NEXT:  ; %bb.4: ; %bb9
63; GCN-NEXT:    s_endpgm
64bb:
65  %id = call i32 @llvm.amdgcn.workitem.id.x()
66  %my.tmp = sub i32 %id, %arg
67  br label %bb1
68
69bb1:
70  %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
71  %lsr.iv.next = add i32 %lsr.iv, 1
72  %cmp0 = icmp slt i32 %lsr.iv.next, 0
73  br i1 %cmp0, label %bb4, label %bb9
74
75bb4:
76  %load = load volatile i32, i32 addrspace(1)* undef, align 4
77  %cmp1 = icmp slt i32 %my.tmp, %load
78  br i1 %cmp1, label %bb1, label %bb9
79
80bb9:
81  ret void
82}
83
84define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
85; OPT-LABEL: @undef_phi_cond_break_loop(
86; OPT-NEXT:  bb:
87; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
88; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
89; OPT-NEXT:    br label [[BB1:%.*]]
90; OPT:       bb1:
91; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
92; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
93; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
94; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
95; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
96; OPT:       bb4:
97; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
98; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
99; OPT-NEXT:    br label [[FLOW]]
100; OPT:       Flow:
101; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ undef, [[BB1]] ]
102; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
103; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
104; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
105; OPT:       bb9:
106; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
107; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
108; OPT-NEXT:    ret void
109;
110; GCN-LABEL: undef_phi_cond_break_loop:
111; GCN:       ; %bb.0: ; %bb
112; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
113; GCN-NEXT:    s_mov_b64 s[0:1], 0
114; GCN-NEXT:    s_mov_b32 s2, -1
115; GCN-NEXT:    s_waitcnt lgkmcnt(0)
116; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
117; GCN-NEXT:    s_mov_b32 s3, 0xf000
118; GCN-NEXT:    ; implicit-def: $sgpr6_sgpr7
119; GCN-NEXT:    ; implicit-def: $sgpr4
120; GCN-NEXT:  BB1_1: ; %bb1
121; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
122; GCN-NEXT:    s_andn2_b64 s[6:7], s[6:7], exec
123; GCN-NEXT:    s_and_b64 s[8:9], s[0:1], exec
124; GCN-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
125; GCN-NEXT:    s_cmp_gt_i32 s4, -1
126; GCN-NEXT:    s_cbranch_scc1 BB1_3
127; GCN-NEXT:  ; %bb.2: ; %bb4
128; GCN-NEXT:    ; in Loop: Header=BB1_1 Depth=1
129; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
130; GCN-NEXT:    s_waitcnt vmcnt(0)
131; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
132; GCN-NEXT:    s_andn2_b64 s[6:7], s[6:7], exec
133; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
134; GCN-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
135; GCN-NEXT:  BB1_3: ; %Flow
136; GCN-NEXT:    ; in Loop: Header=BB1_1 Depth=1
137; GCN-NEXT:    s_add_i32 s4, s4, 1
138; GCN-NEXT:    s_and_b64 s[8:9], exec, s[6:7]
139; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
140; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
141; GCN-NEXT:    s_cbranch_execnz BB1_1
142; GCN-NEXT:  ; %bb.4: ; %bb9
143; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
144; GCN-NEXT:    v_mov_b32_e32 v0, 7
145; GCN-NEXT:    s_mov_b32 m0, -1
146; GCN-NEXT:    ds_write_b32 v0, v0
147; GCN-NEXT:    s_endpgm
148bb:
149  %id = call i32 @llvm.amdgcn.workitem.id.x()
150  %my.tmp = sub i32 %id, %arg
151  br label %bb1
152
153bb1:                                              ; preds = %Flow, %bb
154  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
155  %lsr.iv.next = add i32 %lsr.iv, 1
156  %cmp0 = icmp slt i32 %lsr.iv.next, 0
157  br i1 %cmp0, label %bb4, label %Flow
158
159bb4:                                              ; preds = %bb1
160  %load = load volatile i32, i32 addrspace(1)* undef, align 4
161  %cmp1 = icmp sge i32 %my.tmp, %load
162  br label %Flow
163
164Flow:                                             ; preds = %bb4, %bb1
165  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
166  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
167  br i1 %my.tmp3, label %bb9, label %bb1
168
169bb9:                                              ; preds = %Flow
170  store volatile i32 7, i32 addrspace(3)* undef
171  ret void
172}
173
174; FIXME: ConstantExpr compare of address to null folds away
175@lds = addrspace(3) global i32 undef
176
177define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
178; OPT-LABEL: @constexpr_phi_cond_break_loop(
179; OPT-NEXT:  bb:
180; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
181; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
182; OPT-NEXT:    br label [[BB1:%.*]]
183; OPT:       bb1:
184; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
185; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
186; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
187; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
188; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
189; OPT:       bb4:
190; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
191; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
192; OPT-NEXT:    br label [[FLOW]]
193; OPT:       Flow:
194; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), [[BB1]] ]
195; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
196; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
197; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
198; OPT:       bb9:
199; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
200; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
201; OPT-NEXT:    ret void
202;
203; GCN-LABEL: constexpr_phi_cond_break_loop:
204; GCN:       ; %bb.0: ; %bb
205; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
206; GCN-NEXT:    s_mov_b64 s[0:1], 0
207; GCN-NEXT:    s_mov_b32 s2, -1
208; GCN-NEXT:    s_waitcnt lgkmcnt(0)
209; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
210; GCN-NEXT:    s_mov_b32 s3, 0xf000
211; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
212; GCN-NEXT:    ; implicit-def: $sgpr6
213; GCN-NEXT:  BB2_1: ; %bb1
214; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
215; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
216; GCN-NEXT:    s_cmp_gt_i32 s6, -1
217; GCN-NEXT:    s_cbranch_scc1 BB2_3
218; GCN-NEXT:  ; %bb.2: ; %bb4
219; GCN-NEXT:    ; in Loop: Header=BB2_1 Depth=1
220; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
221; GCN-NEXT:    s_waitcnt vmcnt(0)
222; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
223; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
224; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
225; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
226; GCN-NEXT:  BB2_3: ; %Flow
227; GCN-NEXT:    ; in Loop: Header=BB2_1 Depth=1
228; GCN-NEXT:    s_add_i32 s6, s6, 1
229; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
230; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
231; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
232; GCN-NEXT:    s_cbranch_execnz BB2_1
233; GCN-NEXT:  ; %bb.4: ; %bb9
234; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
235; GCN-NEXT:    v_mov_b32_e32 v0, 7
236; GCN-NEXT:    s_mov_b32 m0, -1
237; GCN-NEXT:    ds_write_b32 v0, v0
238; GCN-NEXT:    s_endpgm
239bb:
240  %id = call i32 @llvm.amdgcn.workitem.id.x()
241  %my.tmp = sub i32 %id, %arg
242  br label %bb1
243
244bb1:                                              ; preds = %Flow, %bb
245  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
246  %lsr.iv.next = add i32 %lsr.iv, 1
247  %cmp0 = icmp slt i32 %lsr.iv.next, 0
248  br i1 %cmp0, label %bb4, label %Flow
249
250bb4:                                              ; preds = %bb1
251  %load = load volatile i32, i32 addrspace(1)* undef, align 4
252  %cmp1 = icmp sge i32 %my.tmp, %load
253  br label %Flow
254
255Flow:                                             ; preds = %bb4, %bb1
256  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
257  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ]
258  br i1 %my.tmp3, label %bb9, label %bb1
259
260bb9:                                              ; preds = %Flow
261  store volatile i32 7, i32 addrspace(3)* undef
262  ret void
263}
264
265define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
266; OPT-LABEL: @true_phi_cond_break_loop(
267; OPT-NEXT:  bb:
268; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
269; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
270; OPT-NEXT:    br label [[BB1:%.*]]
271; OPT:       bb1:
272; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
273; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
274; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
275; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
276; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
277; OPT:       bb4:
278; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
279; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
280; OPT-NEXT:    br label [[FLOW]]
281; OPT:       Flow:
282; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
283; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
284; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
285; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
286; OPT:       bb9:
287; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
288; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
289; OPT-NEXT:    ret void
290;
291; GCN-LABEL: true_phi_cond_break_loop:
292; GCN:       ; %bb.0: ; %bb
293; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
294; GCN-NEXT:    s_mov_b64 s[0:1], 0
295; GCN-NEXT:    s_mov_b32 s2, -1
296; GCN-NEXT:    s_waitcnt lgkmcnt(0)
297; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
298; GCN-NEXT:    s_mov_b32 s3, 0xf000
299; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
300; GCN-NEXT:    ; implicit-def: $sgpr6
301; GCN-NEXT:  BB3_1: ; %bb1
302; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
303; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
304; GCN-NEXT:    s_cmp_gt_i32 s6, -1
305; GCN-NEXT:    s_cbranch_scc1 BB3_3
306; GCN-NEXT:  ; %bb.2: ; %bb4
307; GCN-NEXT:    ; in Loop: Header=BB3_1 Depth=1
308; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
309; GCN-NEXT:    s_waitcnt vmcnt(0)
310; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
311; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
312; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
313; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
314; GCN-NEXT:  BB3_3: ; %Flow
315; GCN-NEXT:    ; in Loop: Header=BB3_1 Depth=1
316; GCN-NEXT:    s_add_i32 s6, s6, 1
317; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
318; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
319; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
320; GCN-NEXT:    s_cbranch_execnz BB3_1
321; GCN-NEXT:  ; %bb.4: ; %bb9
322; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
323; GCN-NEXT:    v_mov_b32_e32 v0, 7
324; GCN-NEXT:    s_mov_b32 m0, -1
325; GCN-NEXT:    ds_write_b32 v0, v0
326; GCN-NEXT:    s_endpgm
327bb:
328  %id = call i32 @llvm.amdgcn.workitem.id.x()
329  %my.tmp = sub i32 %id, %arg
330  br label %bb1
331
332bb1:                                              ; preds = %Flow, %bb
333  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
334  %lsr.iv.next = add i32 %lsr.iv, 1
335  %cmp0 = icmp slt i32 %lsr.iv.next, 0
336  br i1 %cmp0, label %bb4, label %Flow
337
338bb4:                                              ; preds = %bb1
339  %load = load volatile i32, i32 addrspace(1)* undef, align 4
340  %cmp1 = icmp sge i32 %my.tmp, %load
341  br label %Flow
342
343Flow:                                             ; preds = %bb4, %bb1
344  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
345  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
346  br i1 %my.tmp3, label %bb9, label %bb1
347
348bb9:                                              ; preds = %Flow
349  store volatile i32 7, i32 addrspace(3)* undef
350  ret void
351}
352
353define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
354; OPT-LABEL: @false_phi_cond_break_loop(
355; OPT-NEXT:  bb:
356; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
357; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
358; OPT-NEXT:    br label [[BB1:%.*]]
359; OPT:       bb1:
360; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
361; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
362; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
363; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
364; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
365; OPT:       bb4:
366; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
367; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
368; OPT-NEXT:    br label [[FLOW]]
369; OPT:       Flow:
370; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ false, [[BB1]] ]
371; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
372; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
373; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
374; OPT:       bb9:
375; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
376; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
377; OPT-NEXT:    ret void
378;
379; GCN-LABEL: false_phi_cond_break_loop:
380; GCN:       ; %bb.0: ; %bb
381; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
382; GCN-NEXT:    s_mov_b64 s[0:1], 0
383; GCN-NEXT:    s_mov_b32 s2, -1
384; GCN-NEXT:    s_waitcnt lgkmcnt(0)
385; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
386; GCN-NEXT:    s_mov_b32 s3, 0xf000
387; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
388; GCN-NEXT:    ; implicit-def: $sgpr6
389; GCN-NEXT:  BB4_1: ; %bb1
390; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
391; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
392; GCN-NEXT:    s_cmp_gt_i32 s6, -1
393; GCN-NEXT:    s_cbranch_scc1 BB4_3
394; GCN-NEXT:  ; %bb.2: ; %bb4
395; GCN-NEXT:    ; in Loop: Header=BB4_1 Depth=1
396; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
397; GCN-NEXT:    s_waitcnt vmcnt(0)
398; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
399; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
400; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
401; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
402; GCN-NEXT:  BB4_3: ; %Flow
403; GCN-NEXT:    ; in Loop: Header=BB4_1 Depth=1
404; GCN-NEXT:    s_add_i32 s6, s6, 1
405; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
406; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
407; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
408; GCN-NEXT:    s_cbranch_execnz BB4_1
409; GCN-NEXT:  ; %bb.4: ; %bb9
410; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
411; GCN-NEXT:    v_mov_b32_e32 v0, 7
412; GCN-NEXT:    s_mov_b32 m0, -1
413; GCN-NEXT:    ds_write_b32 v0, v0
414; GCN-NEXT:    s_endpgm
415bb:
416  %id = call i32 @llvm.amdgcn.workitem.id.x()
417  %my.tmp = sub i32 %id, %arg
418  br label %bb1
419
420bb1:                                              ; preds = %Flow, %bb
421  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
422  %lsr.iv.next = add i32 %lsr.iv, 1
423  %cmp0 = icmp slt i32 %lsr.iv.next, 0
424  br i1 %cmp0, label %bb4, label %Flow
425
426bb4:                                              ; preds = %bb1
427  %load = load volatile i32, i32 addrspace(1)* undef, align 4
428  %cmp1 = icmp sge i32 %my.tmp, %load
429  br label %Flow
430
431Flow:                                             ; preds = %bb4, %bb1
432  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
433  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
434  br i1 %my.tmp3, label %bb9, label %bb1
435
436bb9:                                              ; preds = %Flow
437  store volatile i32 7, i32 addrspace(3)* undef
438  ret void
439}
440
441; Swap order of branches in flow block so that the true phi is
442; continue.
443
444define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
445; OPT-LABEL: @invert_true_phi_cond_break_loop(
446; OPT-NEXT:  bb:
447; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
448; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
449; OPT-NEXT:    br label [[BB1:%.*]]
450; OPT:       bb1:
451; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP1:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
452; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
453; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
454; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
455; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
456; OPT:       bb4:
457; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
458; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
459; OPT-NEXT:    br label [[FLOW]]
460; OPT:       Flow:
461; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
462; OPT-NEXT:    [[TMP0:%.*]] = xor i1 [[MY_TMP3]], true
463; OPT-NEXT:    [[TMP1]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP0]], i64 [[PHI_BROKEN]])
464; OPT-NEXT:    [[TMP2:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP1]])
465; OPT-NEXT:    br i1 [[TMP2]], label [[BB9:%.*]], label [[BB1]]
466; OPT:       bb9:
467; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP1]])
468; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
469; OPT-NEXT:    ret void
470;
471; GCN-LABEL: invert_true_phi_cond_break_loop:
472; GCN:       ; %bb.0: ; %bb
473; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
474; GCN-NEXT:    s_mov_b64 s[0:1], 0
475; GCN-NEXT:    s_mov_b32 s2, -1
476; GCN-NEXT:    s_waitcnt lgkmcnt(0)
477; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
478; GCN-NEXT:    s_mov_b32 s3, 0xf000
479; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
480; GCN-NEXT:    ; implicit-def: $sgpr6
481; GCN-NEXT:  BB5_1: ; %bb1
482; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
483; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
484; GCN-NEXT:    s_cmp_gt_i32 s6, -1
485; GCN-NEXT:    s_cbranch_scc1 BB5_3
486; GCN-NEXT:  ; %bb.2: ; %bb4
487; GCN-NEXT:    ; in Loop: Header=BB5_1 Depth=1
488; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0 glc
489; GCN-NEXT:    s_waitcnt vmcnt(0)
490; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
491; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
492; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
493; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
494; GCN-NEXT:  BB5_3: ; %Flow
495; GCN-NEXT:    ; in Loop: Header=BB5_1 Depth=1
496; GCN-NEXT:    s_xor_b64 s[8:9], s[4:5], -1
497; GCN-NEXT:    s_add_i32 s6, s6, 1
498; GCN-NEXT:    s_and_b64 s[8:9], exec, s[8:9]
499; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
500; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
501; GCN-NEXT:    s_cbranch_execnz BB5_1
502; GCN-NEXT:  ; %bb.4: ; %bb9
503; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
504; GCN-NEXT:    v_mov_b32_e32 v0, 7
505; GCN-NEXT:    s_mov_b32 m0, -1
506; GCN-NEXT:    ds_write_b32 v0, v0
507; GCN-NEXT:    s_endpgm
508bb:
509  %id = call i32 @llvm.amdgcn.workitem.id.x()
510  %my.tmp = sub i32 %id, %arg
511  br label %bb1
512
513bb1:                                              ; preds = %Flow, %bb
514  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
515  %lsr.iv.next = add i32 %lsr.iv, 1
516  %cmp0 = icmp slt i32 %lsr.iv.next, 0
517  br i1 %cmp0, label %bb4, label %Flow
518
519bb4:                                              ; preds = %bb1
520  %load = load volatile i32, i32 addrspace(1)* undef, align 4
521  %cmp1 = icmp sge i32 %my.tmp, %load
522  br label %Flow
523
524Flow:                                             ; preds = %bb4, %bb1
525  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
526  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
527  br i1 %my.tmp3, label %bb1, label %bb9
528
529bb9:                                              ; preds = %Flow
530  store volatile i32 7, i32 addrspace(3)* undef
531  ret void
532}
533
534declare i32 @llvm.amdgcn.workitem.id.x() #1
535
536attributes #0 = { nounwind }
537attributes #1 = { nounwind readnone }
538