1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s
3; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck --check-prefix=FLAT %s
4
5define amdgpu_kernel void @break_inserted_outside_of_loop(i32 addrspace(1)* %out, i32 %a) {
6; SI-LABEL: break_inserted_outside_of_loop:
7; SI:       ; %bb.0: ; %main_body
8; SI-NEXT:    s_load_dword s2, s[0:1], 0xb
9; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
10; SI-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, -1, 0
11; SI-NEXT:    s_waitcnt lgkmcnt(0)
12; SI-NEXT:    v_and_b32_e32 v0, s2, v0
13; SI-NEXT:    v_and_b32_e32 v0, 1, v0
14; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
15; SI-NEXT:    s_mov_b64 s[2:3], 0
16; SI-NEXT:  .LBB0_1: ; %ENDIF
17; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
18; SI-NEXT:    s_and_b64 s[4:5], exec, vcc
19; SI-NEXT:    s_or_b64 s[2:3], s[4:5], s[2:3]
20; SI-NEXT:    s_andn2_b64 exec, exec, s[2:3]
21; SI-NEXT:    s_cbranch_execnz .LBB0_1
22; SI-NEXT:  ; %bb.2: ; %ENDLOOP
23; SI-NEXT:    s_or_b64 exec, exec, s[2:3]
24; SI-NEXT:    s_mov_b32 s3, 0xf000
25; SI-NEXT:    s_mov_b32 s2, -1
26; SI-NEXT:    v_mov_b32_e32 v0, 0
27; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
28; SI-NEXT:    s_endpgm
29;
30; FLAT-LABEL: break_inserted_outside_of_loop:
31; FLAT:       ; %bb.0: ; %main_body
32; FLAT-NEXT:    s_load_dword s2, s[0:1], 0x2c
33; FLAT-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
34; FLAT-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
35; FLAT-NEXT:    s_waitcnt lgkmcnt(0)
36; FLAT-NEXT:    v_and_b32_e32 v0, s2, v0
37; FLAT-NEXT:    v_and_b32_e32 v0, 1, v0
38; FLAT-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
39; FLAT-NEXT:    s_mov_b64 s[2:3], 0
40; FLAT-NEXT:  .LBB0_1: ; %ENDIF
41; FLAT-NEXT:    ; =>This Inner Loop Header: Depth=1
42; FLAT-NEXT:    s_and_b64 s[4:5], exec, vcc
43; FLAT-NEXT:    s_or_b64 s[2:3], s[4:5], s[2:3]
44; FLAT-NEXT:    s_andn2_b64 exec, exec, s[2:3]
45; FLAT-NEXT:    s_cbranch_execnz .LBB0_1
46; FLAT-NEXT:  ; %bb.2: ; %ENDLOOP
47; FLAT-NEXT:    s_or_b64 exec, exec, s[2:3]
48; FLAT-NEXT:    s_mov_b32 s3, 0xf000
49; FLAT-NEXT:    s_mov_b32 s2, -1
50; FLAT-NEXT:    v_mov_b32_e32 v0, 0
51; FLAT-NEXT:    buffer_store_dword v0, off, s[0:3], 0
52; FLAT-NEXT:    s_endpgm
53main_body:
54  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
55  %0 = and i32 %a, %tid
56  %1 = trunc i32 %0 to i1
57  br label %ENDIF
58
59ENDLOOP:
60  store i32 0, i32 addrspace(1)* %out
61  ret void
62
63ENDIF:
64  br i1 %1, label %ENDLOOP, label %ENDIF
65}
66
67define amdgpu_kernel void @phi_cond_outside_loop(i32 %b) {
68; SI-LABEL: phi_cond_outside_loop:
69; SI:       ; %bb.0: ; %entry
70; SI-NEXT:    v_mbcnt_lo_u32_b32_e64 v0, -1, 0
71; SI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
72; SI-NEXT:    s_mov_b64 s[2:3], 0
73; SI-NEXT:    s_mov_b64 s[4:5], 0
74; SI-NEXT:    s_and_saveexec_b64 s[6:7], vcc
75; SI-NEXT:    s_cbranch_execz .LBB1_2
76; SI-NEXT:  ; %bb.1: ; %else
77; SI-NEXT:    s_load_dword s0, s[0:1], 0x9
78; SI-NEXT:    s_waitcnt lgkmcnt(0)
79; SI-NEXT:    s_cmp_eq_u32 s0, 0
80; SI-NEXT:    s_cselect_b64 s[0:1], -1, 0
81; SI-NEXT:    s_and_b64 s[4:5], s[0:1], exec
82; SI-NEXT:  .LBB1_2: ; %endif
83; SI-NEXT:    s_or_b64 exec, exec, s[6:7]
84; SI-NEXT:  .LBB1_3: ; %loop
85; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
86; SI-NEXT:    s_and_b64 s[0:1], exec, s[4:5]
87; SI-NEXT:    s_or_b64 s[2:3], s[0:1], s[2:3]
88; SI-NEXT:    s_andn2_b64 exec, exec, s[2:3]
89; SI-NEXT:    s_cbranch_execnz .LBB1_3
90; SI-NEXT:  ; %bb.4: ; %exit
91; SI-NEXT:    s_endpgm
92;
93; FLAT-LABEL: phi_cond_outside_loop:
94; FLAT:       ; %bb.0: ; %entry
95; FLAT-NEXT:    v_mbcnt_lo_u32_b32 v0, -1, 0
96; FLAT-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
97; FLAT-NEXT:    s_mov_b64 s[2:3], 0
98; FLAT-NEXT:    s_mov_b64 s[4:5], 0
99; FLAT-NEXT:    s_and_saveexec_b64 s[6:7], vcc
100; FLAT-NEXT:    s_cbranch_execz .LBB1_2
101; FLAT-NEXT:  ; %bb.1: ; %else
102; FLAT-NEXT:    s_load_dword s0, s[0:1], 0x24
103; FLAT-NEXT:    s_waitcnt lgkmcnt(0)
104; FLAT-NEXT:    s_cmp_eq_u32 s0, 0
105; FLAT-NEXT:    s_cselect_b64 s[0:1], -1, 0
106; FLAT-NEXT:    s_and_b64 s[4:5], s[0:1], exec
107; FLAT-NEXT:  .LBB1_2: ; %endif
108; FLAT-NEXT:    s_or_b64 exec, exec, s[6:7]
109; FLAT-NEXT:  .LBB1_3: ; %loop
110; FLAT-NEXT:    ; =>This Inner Loop Header: Depth=1
111; FLAT-NEXT:    s_and_b64 s[0:1], exec, s[4:5]
112; FLAT-NEXT:    s_or_b64 s[2:3], s[0:1], s[2:3]
113; FLAT-NEXT:    s_andn2_b64 exec, exec, s[2:3]
114; FLAT-NEXT:    s_cbranch_execnz .LBB1_3
115; FLAT-NEXT:  ; %bb.4: ; %exit
116; FLAT-NEXT:    s_endpgm
117entry:
118  %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
119  %0 = icmp eq i32 %tid , 0
120  br i1 %0, label %if, label %else
121
122if:
123  br label %endif
124
125else:
126  %1 = icmp eq i32 %b, 0
127  br label %endif
128
129endif:
130  %2 = phi i1 [0, %if], [%1, %else]
131  br label %loop
132
133loop:
134  br i1 %2, label %exit, label %loop
135
136exit:
137  ret void
138}
139
140define amdgpu_kernel void @switch_unreachable(i32 addrspace(1)* %g, i8 addrspace(3)* %l, i32 %x) nounwind {
141; SI-LABEL: switch_unreachable:
142; SI:       ; %bb.0: ; %centry
143;
144; FLAT-LABEL: switch_unreachable:
145; FLAT:       ; %bb.0: ; %centry
146centry:
147  switch i32 %x, label %sw.default [
148    i32 0, label %sw.bb
149    i32 60, label %sw.bb
150  ]
151
152sw.bb:
153  unreachable
154
155sw.default:
156  unreachable
157
158sw.epilog:
159  ret void
160}
161
162declare float @llvm.fabs.f32(float) nounwind readnone
163
164define amdgpu_kernel void @loop_land_info_assert(i32 %c0, i32 %c1, i32 %c2, i32 %c3, i32 %x, i32 %y, i1 %arg) nounwind {
165; SI-LABEL: loop_land_info_assert:
166; SI:       ; %bb.0: ; %entry
167; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
168; SI-NEXT:    s_load_dword s6, s[0:1], 0x0
169; SI-NEXT:    s_load_dword s14, s[0:1], 0xc
170; SI-NEXT:    v_bfrev_b32_e32 v0, 44
171; SI-NEXT:    s_waitcnt lgkmcnt(0)
172; SI-NEXT:    s_cmp_lt_i32 s2, 1
173; SI-NEXT:    s_cselect_b64 s[0:1], -1, 0
174; SI-NEXT:    s_cmp_lt_i32 s3, 4
175; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
176; SI-NEXT:    s_cmp_gt_i32 s3, 3
177; SI-NEXT:    s_cselect_b64 s[2:3], -1, 0
178; SI-NEXT:    s_and_b64 s[2:3], s[0:1], s[2:3]
179; SI-NEXT:    v_cmp_lt_f32_e64 s[6:7], |s6|, v0
180; SI-NEXT:    s_and_b64 s[0:1], exec, s[4:5]
181; SI-NEXT:    s_and_b64 s[2:3], exec, s[2:3]
182; SI-NEXT:    s_and_b64 s[4:5], exec, s[6:7]
183; SI-NEXT:    s_mov_b32 s7, 0xf000
184; SI-NEXT:    s_mov_b32 s6, -1
185; SI-NEXT:    v_mov_b32_e32 v0, 3
186; SI-NEXT:    s_branch .LBB3_4
187; SI-NEXT:  .LBB3_1: ; %Flow6
188; SI-NEXT:    ; in Loop: Header=BB3_4 Depth=1
189; SI-NEXT:    s_mov_b64 s[8:9], 0
190; SI-NEXT:  .LBB3_2: ; %Flow5
191; SI-NEXT:    ; in Loop: Header=BB3_4 Depth=1
192; SI-NEXT:    s_mov_b64 s[12:13], 0
193; SI-NEXT:  .LBB3_3: ; %Flow
194; SI-NEXT:    ; in Loop: Header=BB3_4 Depth=1
195; SI-NEXT:    s_and_b64 vcc, exec, s[10:11]
196; SI-NEXT:    s_cbranch_vccnz .LBB3_8
197; SI-NEXT:  .LBB3_4: ; %while.cond
198; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
199; SI-NEXT:    s_mov_b64 s[12:13], -1
200; SI-NEXT:    s_mov_b64 s[8:9], -1
201; SI-NEXT:    s_mov_b64 s[10:11], -1
202; SI-NEXT:    s_mov_b64 vcc, s[0:1]
203; SI-NEXT:    s_cbranch_vccz .LBB3_3
204; SI-NEXT:  ; %bb.5: ; %convex.exit
205; SI-NEXT:    ; in Loop: Header=BB3_4 Depth=1
206; SI-NEXT:    s_mov_b64 s[8:9], -1
207; SI-NEXT:    s_mov_b64 s[10:11], -1
208; SI-NEXT:    s_mov_b64 vcc, s[2:3]
209; SI-NEXT:    s_cbranch_vccz .LBB3_2
210; SI-NEXT:  ; %bb.6: ; %if.end
211; SI-NEXT:    ; in Loop: Header=BB3_4 Depth=1
212; SI-NEXT:    s_mov_b64 s[10:11], -1
213; SI-NEXT:    s_mov_b64 vcc, s[4:5]
214; SI-NEXT:    s_cbranch_vccz .LBB3_1
215; SI-NEXT:  ; %bb.7: ; %if.else
216; SI-NEXT:    ; in Loop: Header=BB3_4 Depth=1
217; SI-NEXT:    s_mov_b64 s[10:11], 0
218; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
219; SI-NEXT:    s_waitcnt vmcnt(0)
220; SI-NEXT:    s_branch .LBB3_1
221; SI-NEXT:  .LBB3_8: ; %loop.exit.guard4
222; SI-NEXT:    ; in Loop: Header=BB3_4 Depth=1
223; SI-NEXT:    s_and_b64 vcc, exec, s[8:9]
224; SI-NEXT:    s_cbranch_vccz .LBB3_4
225; SI-NEXT:  ; %bb.9: ; %loop.exit.guard
226; SI-NEXT:    s_and_b64 vcc, exec, s[12:13]
227; SI-NEXT:    s_cbranch_vccz .LBB3_13
228; SI-NEXT:  ; %bb.10: ; %for.cond.preheader
229; SI-NEXT:    s_cmpk_lt_i32 s14, 0x3e8
230; SI-NEXT:    s_cbranch_scc0 .LBB3_13
231; SI-NEXT:  ; %bb.11: ; %for.body
232; SI-NEXT:    s_and_b64 vcc, exec, 0
233; SI-NEXT:  .LBB3_12: ; %self.loop
234; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
235; SI-NEXT:    s_mov_b64 vcc, vcc
236; SI-NEXT:    s_cbranch_vccz .LBB3_12
237; SI-NEXT:  .LBB3_13: ; %DummyReturnBlock
238; SI-NEXT:    s_endpgm
239;
240; FLAT-LABEL: loop_land_info_assert:
241; FLAT:       ; %bb.0: ; %entry
242; FLAT-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
243; FLAT-NEXT:    s_load_dword s6, s[0:1], 0x0
244; FLAT-NEXT:    s_load_dword s14, s[0:1], 0x30
245; FLAT-NEXT:    v_bfrev_b32_e32 v0, 44
246; FLAT-NEXT:    s_waitcnt lgkmcnt(0)
247; FLAT-NEXT:    s_cmp_lt_i32 s2, 1
248; FLAT-NEXT:    s_cselect_b64 s[0:1], -1, 0
249; FLAT-NEXT:    s_cmp_lt_i32 s3, 4
250; FLAT-NEXT:    s_cselect_b64 s[4:5], -1, 0
251; FLAT-NEXT:    s_cmp_gt_i32 s3, 3
252; FLAT-NEXT:    s_cselect_b64 s[2:3], -1, 0
253; FLAT-NEXT:    s_and_b64 s[2:3], s[0:1], s[2:3]
254; FLAT-NEXT:    v_cmp_lt_f32_e64 s[6:7], |s6|, v0
255; FLAT-NEXT:    s_and_b64 s[0:1], exec, s[4:5]
256; FLAT-NEXT:    s_and_b64 s[2:3], exec, s[2:3]
257; FLAT-NEXT:    s_and_b64 s[4:5], exec, s[6:7]
258; FLAT-NEXT:    s_mov_b32 s7, 0xf000
259; FLAT-NEXT:    s_mov_b32 s6, -1
260; FLAT-NEXT:    v_mov_b32_e32 v0, 3
261; FLAT-NEXT:    s_branch .LBB3_4
262; FLAT-NEXT:  .LBB3_1: ; %Flow6
263; FLAT-NEXT:    ; in Loop: Header=BB3_4 Depth=1
264; FLAT-NEXT:    s_mov_b64 s[8:9], 0
265; FLAT-NEXT:  .LBB3_2: ; %Flow5
266; FLAT-NEXT:    ; in Loop: Header=BB3_4 Depth=1
267; FLAT-NEXT:    s_mov_b64 s[12:13], 0
268; FLAT-NEXT:  .LBB3_3: ; %Flow
269; FLAT-NEXT:    ; in Loop: Header=BB3_4 Depth=1
270; FLAT-NEXT:    s_and_b64 vcc, exec, s[10:11]
271; FLAT-NEXT:    s_cbranch_vccnz .LBB3_8
272; FLAT-NEXT:  .LBB3_4: ; %while.cond
273; FLAT-NEXT:    ; =>This Inner Loop Header: Depth=1
274; FLAT-NEXT:    s_mov_b64 s[12:13], -1
275; FLAT-NEXT:    s_mov_b64 s[8:9], -1
276; FLAT-NEXT:    s_mov_b64 s[10:11], -1
277; FLAT-NEXT:    s_mov_b64 vcc, s[0:1]
278; FLAT-NEXT:    s_cbranch_vccz .LBB3_3
279; FLAT-NEXT:  ; %bb.5: ; %convex.exit
280; FLAT-NEXT:    ; in Loop: Header=BB3_4 Depth=1
281; FLAT-NEXT:    s_mov_b64 s[8:9], -1
282; FLAT-NEXT:    s_mov_b64 s[10:11], -1
283; FLAT-NEXT:    s_mov_b64 vcc, s[2:3]
284; FLAT-NEXT:    s_cbranch_vccz .LBB3_2
285; FLAT-NEXT:  ; %bb.6: ; %if.end
286; FLAT-NEXT:    ; in Loop: Header=BB3_4 Depth=1
287; FLAT-NEXT:    s_mov_b64 s[10:11], -1
288; FLAT-NEXT:    s_mov_b64 vcc, s[4:5]
289; FLAT-NEXT:    s_cbranch_vccz .LBB3_1
290; FLAT-NEXT:  ; %bb.7: ; %if.else
291; FLAT-NEXT:    ; in Loop: Header=BB3_4 Depth=1
292; FLAT-NEXT:    s_mov_b64 s[10:11], 0
293; FLAT-NEXT:    buffer_store_dword v0, off, s[4:7], 0
294; FLAT-NEXT:    s_waitcnt vmcnt(0)
295; FLAT-NEXT:    s_branch .LBB3_1
296; FLAT-NEXT:  .LBB3_8: ; %loop.exit.guard4
297; FLAT-NEXT:    ; in Loop: Header=BB3_4 Depth=1
298; FLAT-NEXT:    s_and_b64 vcc, exec, s[8:9]
299; FLAT-NEXT:    s_cbranch_vccz .LBB3_4
300; FLAT-NEXT:  ; %bb.9: ; %loop.exit.guard
301; FLAT-NEXT:    s_and_b64 vcc, exec, s[12:13]
302; FLAT-NEXT:    s_cbranch_vccz .LBB3_13
303; FLAT-NEXT:  ; %bb.10: ; %for.cond.preheader
304; FLAT-NEXT:    s_cmpk_lt_i32 s14, 0x3e8
305; FLAT-NEXT:    s_cbranch_scc0 .LBB3_13
306; FLAT-NEXT:  ; %bb.11: ; %for.body
307; FLAT-NEXT:    s_and_b64 vcc, exec, 0
308; FLAT-NEXT:  .LBB3_12: ; %self.loop
309; FLAT-NEXT:    ; =>This Inner Loop Header: Depth=1
310; FLAT-NEXT:    s_mov_b64 vcc, vcc
311; FLAT-NEXT:    s_cbranch_vccz .LBB3_12
312; FLAT-NEXT:  .LBB3_13: ; %DummyReturnBlock
313; FLAT-NEXT:    s_endpgm
314entry:
315  %cmp = icmp sgt i32 %c0, 0
316  br label %while.cond.outer
317
318while.cond.outer:
319  %tmp = load float, float addrspace(1)* undef
320  br label %while.cond
321
322while.cond:
323  %cmp1 = icmp slt i32 %c1, 4
324  br i1 %cmp1, label %convex.exit, label %for.cond
325
326convex.exit:
327  %or = or i1 %cmp, %cmp1
328  br i1 %or, label %return, label %if.end
329
330if.end:
331  %tmp3 = call float @llvm.fabs.f32(float %tmp) nounwind readnone
332  %cmp2 = fcmp olt float %tmp3, 0x3E80000000000000
333  br i1 %cmp2, label %if.else, label %while.cond.outer
334
335if.else:
336  store volatile i32 3, i32 addrspace(1)* undef, align 4
337  br label %while.cond
338
339for.cond:
340  %cmp3 = icmp slt i32 %c3, 1000
341  br i1 %cmp3, label %for.body, label %return
342
343for.body:
344  br i1 %cmp3, label %self.loop, label %if.end.2
345
346if.end.2:
347  %or.cond2 = or i1 %cmp3, %arg
348  br i1 %or.cond2, label %return, label %for.cond
349
350self.loop:
351 br label %self.loop
352
353return:
354  ret void
355}
356
357declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
358
359attributes #0 = { nounwind readnone }
360