1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
3; RUN: opt -mtriple=amdgcn-- -S -structurizecfg -si-annotate-control-flow %s | FileCheck -check-prefix=OPT %s
4; RUN: llc -march=amdgcn -verify-machineinstrs -disable-block-placement < %s | FileCheck -check-prefix=GCN %s
5
6; Uses llvm.amdgcn.break
7
8define amdgpu_kernel void @break_loop(i32 %arg) #0 {
9; OPT-LABEL: @break_loop(
10; OPT-NEXT:  bb:
11; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
12; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
13; OPT-NEXT:    br label [[BB1:%.*]]
14; OPT:       bb1:
15; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP2:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
16; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[LSR_IV_NEXT:%.*]], [[FLOW]] ]
17; OPT-NEXT:    [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 1
18; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
19; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
20; OPT:       bb4:
21; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
22; OPT-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[MY_TMP]], [[LOAD]]
23; OPT-NEXT:    [[TMP0:%.*]] = xor i1 [[CMP1]], true
24; OPT-NEXT:    br label [[FLOW]]
25; OPT:       Flow:
26; OPT-NEXT:    [[TMP1:%.*]] = phi i1 [ [[TMP0]], [[BB4]] ], [ true, [[BB1]] ]
27; OPT-NEXT:    [[TMP2]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP1]], i64 [[PHI_BROKEN]])
28; OPT-NEXT:    [[TMP3:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP2]])
29; OPT-NEXT:    br i1 [[TMP3]], label [[BB9:%.*]], label [[BB1]]
30; OPT:       bb9:
31; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP2]])
32; OPT-NEXT:    ret void
33;
34; GCN-LABEL: break_loop:
35; GCN:       ; %bb.0: ; %bb
36; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
37; GCN-NEXT:    s_mov_b64 s[0:1], 0
38; GCN-NEXT:    s_mov_b32 s2, -1
39; GCN-NEXT:    s_waitcnt lgkmcnt(0)
40; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
41; GCN-NEXT:    s_mov_b32 s3, 0xf000
42; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
43; GCN-NEXT:    ; implicit-def: $sgpr6
44; GCN-NEXT:  BB0_1: ; %bb1
45; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
46; GCN-NEXT:    s_add_i32 s6, s6, 1
47; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
48; GCN-NEXT:    s_cmp_gt_i32 s6, -1
49; GCN-NEXT:    s_cbranch_scc1 BB0_3
50; GCN-NEXT:  ; %bb.2: ; %bb4
51; GCN-NEXT:    ; in Loop: Header=BB0_1 Depth=1
52; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0
53; GCN-NEXT:    s_waitcnt vmcnt(0)
54; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
55; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
56; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
57; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
58; GCN-NEXT:  BB0_3: ; %Flow
59; GCN-NEXT:    ; in Loop: Header=BB0_1 Depth=1
60; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
61; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
62; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
63; GCN-NEXT:    s_cbranch_execnz BB0_1
64; GCN-NEXT:  ; %bb.4: ; %bb9
65; GCN-NEXT:    s_endpgm
66bb:
67  %id = call i32 @llvm.amdgcn.workitem.id.x()
68  %my.tmp = sub i32 %id, %arg
69  br label %bb1
70
71bb1:
72  %lsr.iv = phi i32 [ undef, %bb ], [ %lsr.iv.next, %bb4 ]
73  %lsr.iv.next = add i32 %lsr.iv, 1
74  %cmp0 = icmp slt i32 %lsr.iv.next, 0
75  br i1 %cmp0, label %bb4, label %bb9
76
77bb4:
78  %load = load volatile i32, i32 addrspace(1)* undef, align 4
79  %cmp1 = icmp slt i32 %my.tmp, %load
80  br i1 %cmp1, label %bb1, label %bb9
81
82bb9:
83  ret void
84}
85
86define amdgpu_kernel void @undef_phi_cond_break_loop(i32 %arg) #0 {
87; OPT-LABEL: @undef_phi_cond_break_loop(
88; OPT-NEXT:  bb:
89; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
90; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
91; OPT-NEXT:    br label [[BB1:%.*]]
92; OPT:       bb1:
93; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
94; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
95; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
96; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
97; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
98; OPT:       bb4:
99; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
100; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
101; OPT-NEXT:    br label [[FLOW]]
102; OPT:       Flow:
103; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
104; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ undef, [[BB1]] ]
105; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
106; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
107; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
108; OPT:       bb9:
109; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
110; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
111; OPT-NEXT:    ret void
112;
113; GCN-LABEL: undef_phi_cond_break_loop:
114; GCN:       ; %bb.0: ; %bb
115; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
116; GCN-NEXT:    s_mov_b64 s[0:1], 0
117; GCN-NEXT:    s_mov_b32 s2, -1
118; GCN-NEXT:    s_waitcnt lgkmcnt(0)
119; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
120; GCN-NEXT:    s_mov_b32 s3, 0xf000
121; GCN-NEXT:    ; implicit-def: $sgpr6_sgpr7
122; GCN-NEXT:    ; implicit-def: $sgpr4
123; GCN-NEXT:  BB1_1: ; %bb1
124; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
125; GCN-NEXT:    s_andn2_b64 s[6:7], s[6:7], exec
126; GCN-NEXT:    s_and_b64 s[8:9], s[0:1], exec
127; GCN-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
128; GCN-NEXT:    s_cmp_gt_i32 s4, -1
129; GCN-NEXT:    s_cbranch_scc1 BB1_3
130; GCN-NEXT:  ; %bb.2: ; %bb4
131; GCN-NEXT:    ; in Loop: Header=BB1_1 Depth=1
132; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0
133; GCN-NEXT:    s_waitcnt vmcnt(0)
134; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
135; GCN-NEXT:    s_andn2_b64 s[6:7], s[6:7], exec
136; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
137; GCN-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
138; GCN-NEXT:  BB1_3: ; %Flow
139; GCN-NEXT:    ; in Loop: Header=BB1_1 Depth=1
140; GCN-NEXT:    s_add_i32 s4, s4, 1
141; GCN-NEXT:    s_and_b64 s[8:9], exec, s[6:7]
142; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
143; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
144; GCN-NEXT:    s_cbranch_execnz BB1_1
145; GCN-NEXT:  ; %bb.4: ; %bb9
146; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
147; GCN-NEXT:    v_mov_b32_e32 v0, 7
148; GCN-NEXT:    s_mov_b32 m0, -1
149; GCN-NEXT:    ds_write_b32 v0, v0
150; GCN-NEXT:    s_endpgm
151bb:
152  %id = call i32 @llvm.amdgcn.workitem.id.x()
153  %my.tmp = sub i32 %id, %arg
154  br label %bb1
155
156bb1:                                              ; preds = %Flow, %bb
157  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
158  %lsr.iv.next = add i32 %lsr.iv, 1
159  %cmp0 = icmp slt i32 %lsr.iv.next, 0
160  br i1 %cmp0, label %bb4, label %Flow
161
162bb4:                                              ; preds = %bb1
163  %load = load volatile i32, i32 addrspace(1)* undef, align 4
164  %cmp1 = icmp sge i32 %my.tmp, %load
165  br label %Flow
166
167Flow:                                             ; preds = %bb4, %bb1
168  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
169  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ undef, %bb1 ]
170  br i1 %my.tmp3, label %bb9, label %bb1
171
172bb9:                                              ; preds = %Flow
173  store volatile i32 7, i32 addrspace(3)* undef
174  ret void
175}
176
177; FIXME: ConstantExpr compare of address to null folds away
178@lds = addrspace(3) global i32 undef
179
180define amdgpu_kernel void @constexpr_phi_cond_break_loop(i32 %arg) #0 {
181; OPT-LABEL: @constexpr_phi_cond_break_loop(
182; OPT-NEXT:  bb:
183; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
184; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
185; OPT-NEXT:    br label [[BB1:%.*]]
186; OPT:       bb1:
187; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
188; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
189; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
190; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
191; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
192; OPT:       bb4:
193; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
194; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
195; OPT-NEXT:    br label [[FLOW]]
196; OPT:       Flow:
197; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
198; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), [[BB1]] ]
199; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
200; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
201; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
202; OPT:       bb9:
203; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
204; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
205; OPT-NEXT:    ret void
206;
207; GCN-LABEL: constexpr_phi_cond_break_loop:
208; GCN:       ; %bb.0: ; %bb
209; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
210; GCN-NEXT:    s_mov_b64 s[0:1], 0
211; GCN-NEXT:    s_mov_b32 s2, lds@abs32@lo
212; GCN-NEXT:    s_mov_b32 s6, -1
213; GCN-NEXT:    s_waitcnt lgkmcnt(0)
214; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
215; GCN-NEXT:    s_mov_b32 s7, 0xf000
216; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
217; GCN-NEXT:    ; implicit-def: $sgpr3
218; GCN-NEXT:  BB2_1: ; %bb1
219; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
220; GCN-NEXT:    v_cmp_ne_u32_e64 s[8:9], s2, 4
221; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
222; GCN-NEXT:    s_and_b64 s[8:9], s[8:9], exec
223; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
224; GCN-NEXT:    s_cmp_gt_i32 s3, -1
225; GCN-NEXT:    s_cbranch_scc1 BB2_3
226; GCN-NEXT:  ; %bb.2: ; %bb4
227; GCN-NEXT:    ; in Loop: Header=BB2_1 Depth=1
228; GCN-NEXT:    buffer_load_dword v1, off, s[4:7], 0
229; GCN-NEXT:    s_waitcnt vmcnt(0)
230; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
231; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
232; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
233; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
234; GCN-NEXT:  BB2_3: ; %Flow
235; GCN-NEXT:    ; in Loop: Header=BB2_1 Depth=1
236; GCN-NEXT:    s_add_i32 s3, s3, 1
237; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
238; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
239; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
240; GCN-NEXT:    s_cbranch_execnz BB2_1
241; GCN-NEXT:  ; %bb.4: ; %bb9
242; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
243; GCN-NEXT:    v_mov_b32_e32 v0, 7
244; GCN-NEXT:    s_mov_b32 m0, -1
245; GCN-NEXT:    ds_write_b32 v0, v0
246; GCN-NEXT:    s_endpgm
247bb:
248  %id = call i32 @llvm.amdgcn.workitem.id.x()
249  %my.tmp = sub i32 %id, %arg
250  br label %bb1
251
252bb1:                                              ; preds = %Flow, %bb
253  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
254  %lsr.iv.next = add i32 %lsr.iv, 1
255  %cmp0 = icmp slt i32 %lsr.iv.next, 0
256  br i1 %cmp0, label %bb4, label %Flow
257
258bb4:                                              ; preds = %bb1
259  %load = load volatile i32, i32 addrspace(1)* undef, align 4
260  %cmp1 = icmp sge i32 %my.tmp, %load
261  br label %Flow
262
263Flow:                                             ; preds = %bb4, %bb1
264  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
265  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds), %bb1 ]
266  br i1 %my.tmp3, label %bb9, label %bb1
267
268bb9:                                              ; preds = %Flow
269  store volatile i32 7, i32 addrspace(3)* undef
270  ret void
271}
272
273define amdgpu_kernel void @true_phi_cond_break_loop(i32 %arg) #0 {
274; OPT-LABEL: @true_phi_cond_break_loop(
275; OPT-NEXT:  bb:
276; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
277; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
278; OPT-NEXT:    br label [[BB1:%.*]]
279; OPT:       bb1:
280; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
281; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
282; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
283; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
284; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
285; OPT:       bb4:
286; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
287; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
288; OPT-NEXT:    br label [[FLOW]]
289; OPT:       Flow:
290; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
291; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
292; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
293; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
294; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
295; OPT:       bb9:
296; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
297; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
298; OPT-NEXT:    ret void
299;
300; GCN-LABEL: true_phi_cond_break_loop:
301; GCN:       ; %bb.0: ; %bb
302; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
303; GCN-NEXT:    s_mov_b64 s[0:1], 0
304; GCN-NEXT:    s_mov_b32 s2, -1
305; GCN-NEXT:    s_waitcnt lgkmcnt(0)
306; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
307; GCN-NEXT:    s_mov_b32 s3, 0xf000
308; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
309; GCN-NEXT:    ; implicit-def: $sgpr6
310; GCN-NEXT:  BB3_1: ; %bb1
311; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
312; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
313; GCN-NEXT:    s_cmp_gt_i32 s6, -1
314; GCN-NEXT:    s_cbranch_scc1 BB3_3
315; GCN-NEXT:  ; %bb.2: ; %bb4
316; GCN-NEXT:    ; in Loop: Header=BB3_1 Depth=1
317; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0
318; GCN-NEXT:    s_waitcnt vmcnt(0)
319; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
320; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
321; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
322; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
323; GCN-NEXT:  BB3_3: ; %Flow
324; GCN-NEXT:    ; in Loop: Header=BB3_1 Depth=1
325; GCN-NEXT:    s_add_i32 s6, s6, 1
326; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
327; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
328; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
329; GCN-NEXT:    s_cbranch_execnz BB3_1
330; GCN-NEXT:  ; %bb.4: ; %bb9
331; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
332; GCN-NEXT:    v_mov_b32_e32 v0, 7
333; GCN-NEXT:    s_mov_b32 m0, -1
334; GCN-NEXT:    ds_write_b32 v0, v0
335; GCN-NEXT:    s_endpgm
336bb:
337  %id = call i32 @llvm.amdgcn.workitem.id.x()
338  %my.tmp = sub i32 %id, %arg
339  br label %bb1
340
341bb1:                                              ; preds = %Flow, %bb
342  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
343  %lsr.iv.next = add i32 %lsr.iv, 1
344  %cmp0 = icmp slt i32 %lsr.iv.next, 0
345  br i1 %cmp0, label %bb4, label %Flow
346
347bb4:                                              ; preds = %bb1
348  %load = load volatile i32, i32 addrspace(1)* undef, align 4
349  %cmp1 = icmp sge i32 %my.tmp, %load
350  br label %Flow
351
352Flow:                                             ; preds = %bb4, %bb1
353  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
354  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
355  br i1 %my.tmp3, label %bb9, label %bb1
356
357bb9:                                              ; preds = %Flow
358  store volatile i32 7, i32 addrspace(3)* undef
359  ret void
360}
361
362define amdgpu_kernel void @false_phi_cond_break_loop(i32 %arg) #0 {
363; OPT-LABEL: @false_phi_cond_break_loop(
364; OPT-NEXT:  bb:
365; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
366; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
367; OPT-NEXT:    br label [[BB1:%.*]]
368; OPT:       bb1:
369; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP0:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
370; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
371; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
372; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
373; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
374; OPT:       bb4:
375; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
376; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
377; OPT-NEXT:    br label [[FLOW]]
378; OPT:       Flow:
379; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
380; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ false, [[BB1]] ]
381; OPT-NEXT:    [[TMP0]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[MY_TMP3]], i64 [[PHI_BROKEN]])
382; OPT-NEXT:    [[TMP1:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP0]])
383; OPT-NEXT:    br i1 [[TMP1]], label [[BB9:%.*]], label [[BB1]]
384; OPT:       bb9:
385; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP0]])
386; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
387; OPT-NEXT:    ret void
388;
389; GCN-LABEL: false_phi_cond_break_loop:
390; GCN:       ; %bb.0: ; %bb
391; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
392; GCN-NEXT:    s_mov_b64 s[0:1], 0
393; GCN-NEXT:    s_mov_b32 s2, -1
394; GCN-NEXT:    s_waitcnt lgkmcnt(0)
395; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
396; GCN-NEXT:    s_mov_b32 s3, 0xf000
397; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
398; GCN-NEXT:    ; implicit-def: $sgpr6
399; GCN-NEXT:  BB4_1: ; %bb1
400; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
401; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
402; GCN-NEXT:    s_cmp_gt_i32 s6, -1
403; GCN-NEXT:    s_cbranch_scc1 BB4_3
404; GCN-NEXT:  ; %bb.2: ; %bb4
405; GCN-NEXT:    ; in Loop: Header=BB4_1 Depth=1
406; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0
407; GCN-NEXT:    s_waitcnt vmcnt(0)
408; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
409; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
410; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
411; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
412; GCN-NEXT:  BB4_3: ; %Flow
413; GCN-NEXT:    ; in Loop: Header=BB4_1 Depth=1
414; GCN-NEXT:    s_add_i32 s6, s6, 1
415; GCN-NEXT:    s_and_b64 s[8:9], exec, s[4:5]
416; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
417; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
418; GCN-NEXT:    s_cbranch_execnz BB4_1
419; GCN-NEXT:  ; %bb.4: ; %bb9
420; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
421; GCN-NEXT:    v_mov_b32_e32 v0, 7
422; GCN-NEXT:    s_mov_b32 m0, -1
423; GCN-NEXT:    ds_write_b32 v0, v0
424; GCN-NEXT:    s_endpgm
425bb:
426  %id = call i32 @llvm.amdgcn.workitem.id.x()
427  %my.tmp = sub i32 %id, %arg
428  br label %bb1
429
430bb1:                                              ; preds = %Flow, %bb
431  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
432  %lsr.iv.next = add i32 %lsr.iv, 1
433  %cmp0 = icmp slt i32 %lsr.iv.next, 0
434  br i1 %cmp0, label %bb4, label %Flow
435
436bb4:                                              ; preds = %bb1
437  %load = load volatile i32, i32 addrspace(1)* undef, align 4
438  %cmp1 = icmp sge i32 %my.tmp, %load
439  br label %Flow
440
441Flow:                                             ; preds = %bb4, %bb1
442  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
443  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ false, %bb1 ]
444  br i1 %my.tmp3, label %bb9, label %bb1
445
446bb9:                                              ; preds = %Flow
447  store volatile i32 7, i32 addrspace(3)* undef
448  ret void
449}
450
451; Swap order of branches in flow block so that the true phi is
452; continue.
453
454define amdgpu_kernel void @invert_true_phi_cond_break_loop(i32 %arg) #0 {
455; OPT-LABEL: @invert_true_phi_cond_break_loop(
456; OPT-NEXT:  bb:
457; OPT-NEXT:    [[ID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
458; OPT-NEXT:    [[MY_TMP:%.*]] = sub i32 [[ID]], [[ARG:%.*]]
459; OPT-NEXT:    br label [[BB1:%.*]]
460; OPT:       bb1:
461; OPT-NEXT:    [[PHI_BROKEN:%.*]] = phi i64 [ [[TMP1:%.*]], [[FLOW:%.*]] ], [ 0, [[BB:%.*]] ]
462; OPT-NEXT:    [[LSR_IV:%.*]] = phi i32 [ undef, [[BB]] ], [ [[MY_TMP2:%.*]], [[FLOW]] ]
463; OPT-NEXT:    [[LSR_IV_NEXT:%.*]] = add i32 [[LSR_IV]], 1
464; OPT-NEXT:    [[CMP0:%.*]] = icmp slt i32 [[LSR_IV_NEXT]], 0
465; OPT-NEXT:    br i1 [[CMP0]], label [[BB4:%.*]], label [[FLOW]]
466; OPT:       bb4:
467; OPT-NEXT:    [[LOAD:%.*]] = load volatile i32, i32 addrspace(1)* undef, align 4
468; OPT-NEXT:    [[CMP1:%.*]] = icmp sge i32 [[MY_TMP]], [[LOAD]]
469; OPT-NEXT:    br label [[FLOW]]
470; OPT:       Flow:
471; OPT-NEXT:    [[MY_TMP2]] = phi i32 [ [[LSR_IV_NEXT]], [[BB4]] ], [ undef, [[BB1]] ]
472; OPT-NEXT:    [[MY_TMP3:%.*]] = phi i1 [ [[CMP1]], [[BB4]] ], [ true, [[BB1]] ]
473; OPT-NEXT:    [[TMP0:%.*]] = xor i1 [[MY_TMP3]], true
474; OPT-NEXT:    [[TMP1]] = call i64 @llvm.amdgcn.if.break.i64(i1 [[TMP0]], i64 [[PHI_BROKEN]])
475; OPT-NEXT:    [[TMP2:%.*]] = call i1 @llvm.amdgcn.loop.i64(i64 [[TMP1]])
476; OPT-NEXT:    br i1 [[TMP2]], label [[BB9:%.*]], label [[BB1]]
477; OPT:       bb9:
478; OPT-NEXT:    call void @llvm.amdgcn.end.cf.i64(i64 [[TMP1]])
479; OPT-NEXT:    store volatile i32 7, i32 addrspace(3)* undef
480; OPT-NEXT:    ret void
481;
482; GCN-LABEL: invert_true_phi_cond_break_loop:
483; GCN:       ; %bb.0: ; %bb
484; GCN-NEXT:    s_load_dword s3, s[0:1], 0x9
485; GCN-NEXT:    s_mov_b64 s[0:1], 0
486; GCN-NEXT:    s_mov_b32 s2, -1
487; GCN-NEXT:    s_waitcnt lgkmcnt(0)
488; GCN-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
489; GCN-NEXT:    s_mov_b32 s3, 0xf000
490; GCN-NEXT:    ; implicit-def: $sgpr4_sgpr5
491; GCN-NEXT:    ; implicit-def: $sgpr6
492; GCN-NEXT:  BB5_1: ; %bb1
493; GCN-NEXT:    ; =>This Inner Loop Header: Depth=1
494; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], exec
495; GCN-NEXT:    s_cmp_gt_i32 s6, -1
496; GCN-NEXT:    s_cbranch_scc1 BB5_3
497; GCN-NEXT:  ; %bb.2: ; %bb4
498; GCN-NEXT:    ; in Loop: Header=BB5_1 Depth=1
499; GCN-NEXT:    buffer_load_dword v1, off, s[0:3], 0
500; GCN-NEXT:    s_waitcnt vmcnt(0)
501; GCN-NEXT:    v_cmp_ge_i32_e32 vcc, v0, v1
502; GCN-NEXT:    s_andn2_b64 s[4:5], s[4:5], exec
503; GCN-NEXT:    s_and_b64 s[8:9], vcc, exec
504; GCN-NEXT:    s_or_b64 s[4:5], s[4:5], s[8:9]
505; GCN-NEXT:  BB5_3: ; %Flow
506; GCN-NEXT:    ; in Loop: Header=BB5_1 Depth=1
507; GCN-NEXT:    s_add_i32 s6, s6, 1
508; GCN-NEXT:    s_xor_b64 s[8:9], s[4:5], -1
509; GCN-NEXT:    s_and_b64 s[8:9], exec, s[8:9]
510; GCN-NEXT:    s_or_b64 s[0:1], s[8:9], s[0:1]
511; GCN-NEXT:    s_andn2_b64 exec, exec, s[0:1]
512; GCN-NEXT:    s_cbranch_execnz BB5_1
513; GCN-NEXT:  ; %bb.4: ; %bb9
514; GCN-NEXT:    s_or_b64 exec, exec, s[0:1]
515; GCN-NEXT:    v_mov_b32_e32 v0, 7
516; GCN-NEXT:    s_mov_b32 m0, -1
517; GCN-NEXT:    ds_write_b32 v0, v0
518; GCN-NEXT:    s_endpgm
519bb:
520  %id = call i32 @llvm.amdgcn.workitem.id.x()
521  %my.tmp = sub i32 %id, %arg
522  br label %bb1
523
524bb1:                                              ; preds = %Flow, %bb
525  %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ]
526  %lsr.iv.next = add i32 %lsr.iv, 1
527  %cmp0 = icmp slt i32 %lsr.iv.next, 0
528  br i1 %cmp0, label %bb4, label %Flow
529
530bb4:                                              ; preds = %bb1
531  %load = load volatile i32, i32 addrspace(1)* undef, align 4
532  %cmp1 = icmp sge i32 %my.tmp, %load
533  br label %Flow
534
535Flow:                                             ; preds = %bb4, %bb1
536  %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ]
537  %my.tmp3 = phi i1 [ %cmp1, %bb4 ], [ true, %bb1 ]
538  br i1 %my.tmp3, label %bb1, label %bb9
539
540bb9:                                              ; preds = %Flow
541  store volatile i32 7, i32 addrspace(3)* undef
542  ret void
543}
544
545declare i32 @llvm.amdgcn.workitem.id.x() #1
546
547attributes #0 = { nounwind }
548attributes #1 = { nounwind readnone }
549