1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,SI %s
3; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,GFX10-WAVE64 %s
4; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX10-WAVE32 %s
5
6define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 {
7; GCN-LABEL: test_kill_depth_0_imm_pos:
8; GCN:       ; %bb.0:
9; GCN-NEXT:    s_endpgm
10  call void @llvm.amdgcn.kill(i1 true)
11  ret void
12}
13
14define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 {
15; WAVE64-LABEL: test_kill_depth_0_imm_neg:
16; WAVE64:       ; %bb.0:
17; WAVE64-NEXT:    s_andn2_b64 exec, exec, exec
18; WAVE64-NEXT:    s_cbranch_scc0 .LBB1_1
19; WAVE64-NEXT:    s_endpgm
20; WAVE64-NEXT:  .LBB1_1:
21; WAVE64-NEXT:    s_mov_b64 exec, 0
22; WAVE64-NEXT:    exp null off, off, off, off done vm
23; WAVE64-NEXT:    s_endpgm
24;
25; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg:
26; GFX10-WAVE32:       ; %bb.0:
27; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, exec_lo
28; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB1_1
29; GFX10-WAVE32-NEXT:    s_endpgm
30; GFX10-WAVE32-NEXT:  .LBB1_1:
31; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
32; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
33; GFX10-WAVE32-NEXT:    s_endpgm
34  call void @llvm.amdgcn.kill(i1 false)
35  ret void
36}
37
38; FIXME: Ideally only one early-exit would be emitted
39define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 {
40; WAVE64-LABEL: test_kill_depth_0_imm_neg_x2:
41; WAVE64:       ; %bb.0:
42; WAVE64-NEXT:    s_mov_b64 s[0:1], exec
43; WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
44; WAVE64-NEXT:    s_cbranch_scc0 .LBB2_2
45; WAVE64-NEXT:  ; %bb.1:
46; WAVE64-NEXT:    s_mov_b64 exec, 0
47; WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
48; WAVE64-NEXT:    s_cbranch_scc0 .LBB2_2
49; WAVE64-NEXT:    s_endpgm
50; WAVE64-NEXT:  .LBB2_2:
51; WAVE64-NEXT:    s_mov_b64 exec, 0
52; WAVE64-NEXT:    exp null off, off, off, off done vm
53; WAVE64-NEXT:    s_endpgm
54;
55; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg_x2:
56; GFX10-WAVE32:       ; %bb.0:
57; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
58; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, exec_lo
59; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB2_2
60; GFX10-WAVE32-NEXT:  ; %bb.1:
61; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
62; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, exec_lo
63; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB2_2
64; GFX10-WAVE32-NEXT:    s_endpgm
65; GFX10-WAVE32-NEXT:  .LBB2_2:
66; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
67; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
68; GFX10-WAVE32-NEXT:    s_endpgm
69  call void @llvm.amdgcn.kill(i1 false)
70  call void @llvm.amdgcn.kill(i1 false)
71  ret void
72}
73
74define amdgpu_ps void @test_kill_depth_var(float %x) #0 {
75; WAVE64-LABEL: test_kill_depth_var:
76; WAVE64:       ; %bb.0:
77; WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
78; WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
79; WAVE64-NEXT:    s_cbranch_scc0 .LBB3_1
80; WAVE64-NEXT:    s_endpgm
81; WAVE64-NEXT:  .LBB3_1:
82; WAVE64-NEXT:    s_mov_b64 exec, 0
83; WAVE64-NEXT:    exp null off, off, off, off done vm
84; WAVE64-NEXT:    s_endpgm
85;
86; GFX10-WAVE32-LABEL: test_kill_depth_var:
87; GFX10-WAVE32:       ; %bb.0:
88; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
89; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
90; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB3_1
91; GFX10-WAVE32-NEXT:    s_endpgm
92; GFX10-WAVE32-NEXT:  .LBB3_1:
93; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
94; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
95; GFX10-WAVE32-NEXT:    s_endpgm
96  %cmp = fcmp olt float %x, 0.0
97  call void @llvm.amdgcn.kill(i1 %cmp)
98  ret void
99}
100
101; FIXME: Ideally only one early-exit would be emitted
102define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 {
103; SI-LABEL: test_kill_depth_var_x2_same:
104; SI:       ; %bb.0:
105; SI-NEXT:    s_mov_b64 s[0:1], exec
106; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
107; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
108; SI-NEXT:    s_cbranch_scc0 .LBB4_2
109; SI-NEXT:  ; %bb.1:
110; SI-NEXT:    s_andn2_b64 exec, exec, vcc
111; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
112; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
113; SI-NEXT:    s_cbranch_scc0 .LBB4_2
114; SI-NEXT:    s_endpgm
115; SI-NEXT:  .LBB4_2:
116; SI-NEXT:    s_mov_b64 exec, 0
117; SI-NEXT:    exp null off, off, off, off done vm
118; SI-NEXT:    s_endpgm
119;
120; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_same:
121; GFX10-WAVE64:       ; %bb.0:
122; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
123; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
124; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
125; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB4_2
126; GFX10-WAVE64-NEXT:  ; %bb.1:
127; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
128; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
129; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
130; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB4_2
131; GFX10-WAVE64-NEXT:    s_endpgm
132; GFX10-WAVE64-NEXT:  .LBB4_2:
133; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
134; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
135; GFX10-WAVE64-NEXT:    s_endpgm
136;
137; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_same:
138; GFX10-WAVE32:       ; %bb.0:
139; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
140; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
141; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
142; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB4_2
143; GFX10-WAVE32-NEXT:  ; %bb.1:
144; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
145; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
146; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
147; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB4_2
148; GFX10-WAVE32-NEXT:    s_endpgm
149; GFX10-WAVE32-NEXT:  .LBB4_2:
150; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
151; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
152; GFX10-WAVE32-NEXT:    s_endpgm
153  %cmp = fcmp olt float %x, 0.0
154  call void @llvm.amdgcn.kill(i1 %cmp)
155  call void @llvm.amdgcn.kill(i1 %cmp)
156  ret void
157}
158
159; FIXME: Ideally only one early-exit would be emitted
160define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 {
161; SI-LABEL: test_kill_depth_var_x2:
162; SI:       ; %bb.0:
163; SI-NEXT:    s_mov_b64 s[0:1], exec
164; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
165; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
166; SI-NEXT:    s_cbranch_scc0 .LBB5_2
167; SI-NEXT:  ; %bb.1:
168; SI-NEXT:    s_andn2_b64 exec, exec, vcc
169; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v1
170; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
171; SI-NEXT:    s_cbranch_scc0 .LBB5_2
172; SI-NEXT:    s_endpgm
173; SI-NEXT:  .LBB5_2:
174; SI-NEXT:    s_mov_b64 exec, 0
175; SI-NEXT:    exp null off, off, off, off done vm
176; SI-NEXT:    s_endpgm
177;
178; GFX10-WAVE64-LABEL: test_kill_depth_var_x2:
179; GFX10-WAVE64:       ; %bb.0:
180; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
181; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
182; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
183; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB5_2
184; GFX10-WAVE64-NEXT:  ; %bb.1:
185; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
186; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v1
187; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
188; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB5_2
189; GFX10-WAVE64-NEXT:    s_endpgm
190; GFX10-WAVE64-NEXT:  .LBB5_2:
191; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
192; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
193; GFX10-WAVE64-NEXT:    s_endpgm
194;
195; GFX10-WAVE32-LABEL: test_kill_depth_var_x2:
196; GFX10-WAVE32:       ; %bb.0:
197; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
198; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
199; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
200; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB5_2
201; GFX10-WAVE32-NEXT:  ; %bb.1:
202; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
203; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v1
204; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
205; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB5_2
206; GFX10-WAVE32-NEXT:    s_endpgm
207; GFX10-WAVE32-NEXT:  .LBB5_2:
208; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
209; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
210; GFX10-WAVE32-NEXT:    s_endpgm
211  %cmp.x = fcmp olt float %x, 0.0
212  call void @llvm.amdgcn.kill(i1 %cmp.x)
213  %cmp.y = fcmp olt float %y, 0.0
214  call void @llvm.amdgcn.kill(i1 %cmp.y)
215  ret void
216}
217
218define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 {
219; SI-LABEL: test_kill_depth_var_x2_instructions:
220; SI:       ; %bb.0:
221; SI-NEXT:    s_mov_b64 s[0:1], exec
222; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
223; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
224; SI-NEXT:    s_cbranch_scc0 .LBB6_2
225; SI-NEXT:  ; %bb.1:
226; SI-NEXT:    s_andn2_b64 exec, exec, vcc
227; SI-NEXT:    ;;#ASMSTART
228; SI-NEXT:    v_mov_b32_e64 v7, -1
229; SI-NEXT:    ;;#ASMEND
230; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
231; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
232; SI-NEXT:    s_cbranch_scc0 .LBB6_2
233; SI-NEXT:    s_endpgm
234; SI-NEXT:  .LBB6_2:
235; SI-NEXT:    s_mov_b64 exec, 0
236; SI-NEXT:    exp null off, off, off, off done vm
237; SI-NEXT:    s_endpgm
238;
239; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_instructions:
240; GFX10-WAVE64:       ; %bb.0:
241; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
242; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
243; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
244; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB6_2
245; GFX10-WAVE64-NEXT:  ; %bb.1:
246; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
247; GFX10-WAVE64-NEXT:    ;;#ASMSTART
248; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v7, -1
249; GFX10-WAVE64-NEXT:    ;;#ASMEND
250; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
251; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
252; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB6_2
253; GFX10-WAVE64-NEXT:    s_endpgm
254; GFX10-WAVE64-NEXT:  .LBB6_2:
255; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
256; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
257; GFX10-WAVE64-NEXT:    s_endpgm
258;
259; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_instructions:
260; GFX10-WAVE32:       ; %bb.0:
261; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
262; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
263; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
264; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB6_2
265; GFX10-WAVE32-NEXT:  ; %bb.1:
266; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
267; GFX10-WAVE32-NEXT:    ;;#ASMSTART
268; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v7, -1
269; GFX10-WAVE32-NEXT:    ;;#ASMEND
270; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v7
271; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
272; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB6_2
273; GFX10-WAVE32-NEXT:    s_endpgm
274; GFX10-WAVE32-NEXT:  .LBB6_2:
275; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
276; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
277; GFX10-WAVE32-NEXT:    s_endpgm
278  %cmp.x = fcmp olt float %x, 0.0
279  call void @llvm.amdgcn.kill(i1 %cmp.x)
280  %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={v7}"()
281  %cmp.y = fcmp olt float %y, 0.0
282  call void @llvm.amdgcn.kill(i1 %cmp.y)
283  ret void
284}
285
286; FIXME: why does the skip depend on the asm length in the same block?
287define amdgpu_ps float @test_kill_control_flow(i32 inreg %arg) #0 {
288; SI-LABEL: test_kill_control_flow:
289; SI:       ; %bb.0: ; %entry
290; SI-NEXT:    s_cmp_lg_u32 s0, 0
291; SI-NEXT:    s_cbranch_scc0 .LBB7_2
292; SI-NEXT:  ; %bb.1: ; %exit
293; SI-NEXT:    v_mov_b32_e32 v0, 1.0
294; SI-NEXT:    s_branch .LBB7_5
295; SI-NEXT:  .LBB7_2: ; %bb
296; SI-NEXT:    s_mov_b64 s[2:3], exec
297; SI-NEXT:    ;;#ASMSTART
298; SI-NEXT:    v_mov_b32_e64 v7, -1
299; SI-NEXT:    v_nop_e64
300; SI-NEXT:    v_nop_e64
301; SI-NEXT:    v_nop_e64
302; SI-NEXT:    v_nop_e64
303; SI-NEXT:    v_nop_e64
304; SI-NEXT:    v_nop_e64
305; SI-NEXT:    v_nop_e64
306; SI-NEXT:    v_nop_e64
307; SI-NEXT:    v_nop_e64
308; SI-NEXT:    v_nop_e64
309; SI-NEXT:    ;;#ASMEND
310; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
311; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
312; SI-NEXT:    s_cbranch_scc0 .LBB7_4
313; SI-NEXT:  ; %bb.3: ; %bb
314; SI-NEXT:    s_andn2_b64 exec, exec, vcc
315; SI-NEXT:    v_mov_b32_e32 v0, 1.0
316; SI-NEXT:    s_branch .LBB7_5
317; SI-NEXT:  .LBB7_4:
318; SI-NEXT:    s_mov_b64 exec, 0
319; SI-NEXT:    exp null off, off, off, off done vm
320; SI-NEXT:    s_endpgm
321; SI-NEXT:  .LBB7_5:
322;
323; GFX10-WAVE64-LABEL: test_kill_control_flow:
324; GFX10-WAVE64:       ; %bb.0: ; %entry
325; GFX10-WAVE64-NEXT:    s_cmp_lg_u32 s0, 0
326; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB7_2
327; GFX10-WAVE64-NEXT:  ; %bb.1: ; %exit
328; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 1.0
329; GFX10-WAVE64-NEXT:    s_branch .LBB7_5
330; GFX10-WAVE64-NEXT:  .LBB7_2: ; %bb
331; GFX10-WAVE64-NEXT:    ;;#ASMSTART
332; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v7, -1
333; GFX10-WAVE64-NEXT:    v_nop_e64
334; GFX10-WAVE64-NEXT:    v_nop_e64
335; GFX10-WAVE64-NEXT:    v_nop_e64
336; GFX10-WAVE64-NEXT:    v_nop_e64
337; GFX10-WAVE64-NEXT:    v_nop_e64
338; GFX10-WAVE64-NEXT:    v_nop_e64
339; GFX10-WAVE64-NEXT:    v_nop_e64
340; GFX10-WAVE64-NEXT:    v_nop_e64
341; GFX10-WAVE64-NEXT:    v_nop_e64
342; GFX10-WAVE64-NEXT:    v_nop_e64
343; GFX10-WAVE64-NEXT:    ;;#ASMEND
344; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
345; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
346; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
347; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB7_4
348; GFX10-WAVE64-NEXT:  ; %bb.3: ; %bb
349; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
350; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 1.0
351; GFX10-WAVE64-NEXT:    s_branch .LBB7_5
352; GFX10-WAVE64-NEXT:  .LBB7_4:
353; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
354; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
355; GFX10-WAVE64-NEXT:    s_endpgm
356; GFX10-WAVE64-NEXT:  .LBB7_5:
357;
358; GFX10-WAVE32-LABEL: test_kill_control_flow:
359; GFX10-WAVE32:       ; %bb.0: ; %entry
360; GFX10-WAVE32-NEXT:    s_cmp_lg_u32 s0, 0
361; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB7_2
362; GFX10-WAVE32-NEXT:  ; %bb.1: ; %exit
363; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 1.0
364; GFX10-WAVE32-NEXT:    s_branch .LBB7_5
365; GFX10-WAVE32-NEXT:  .LBB7_2: ; %bb
366; GFX10-WAVE32-NEXT:    ;;#ASMSTART
367; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v7, -1
368; GFX10-WAVE32-NEXT:    v_nop_e64
369; GFX10-WAVE32-NEXT:    v_nop_e64
370; GFX10-WAVE32-NEXT:    v_nop_e64
371; GFX10-WAVE32-NEXT:    v_nop_e64
372; GFX10-WAVE32-NEXT:    v_nop_e64
373; GFX10-WAVE32-NEXT:    v_nop_e64
374; GFX10-WAVE32-NEXT:    v_nop_e64
375; GFX10-WAVE32-NEXT:    v_nop_e64
376; GFX10-WAVE32-NEXT:    v_nop_e64
377; GFX10-WAVE32-NEXT:    v_nop_e64
378; GFX10-WAVE32-NEXT:    ;;#ASMEND
379; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v7
380; GFX10-WAVE32-NEXT:    s_mov_b32 s1, exec_lo
381; GFX10-WAVE32-NEXT:    s_andn2_b32 s1, s1, vcc_lo
382; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB7_4
383; GFX10-WAVE32-NEXT:  ; %bb.3: ; %bb
384; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
385; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 1.0
386; GFX10-WAVE32-NEXT:    s_branch .LBB7_5
387; GFX10-WAVE32-NEXT:  .LBB7_4:
388; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
389; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
390; GFX10-WAVE32-NEXT:    s_endpgm
391; GFX10-WAVE32-NEXT:  .LBB7_5:
392entry:
393  %cmp = icmp eq i32 %arg, 0
394  br i1 %cmp, label %bb, label %exit
395
396bb:
397  %var = call float asm sideeffect "v_mov_b32_e64 v7, -1
398    v_nop_e64
399    v_nop_e64
400    v_nop_e64
401    v_nop_e64
402    v_nop_e64
403    v_nop_e64
404    v_nop_e64
405    v_nop_e64
406    v_nop_e64
407    v_nop_e64", "={v7}"()
408  %cmp.var = fcmp olt float %var, 0.0
409  ; TODO: We could do an early-exit here (the branch above is uniform!)
410  call void @llvm.amdgcn.kill(i1 %cmp.var)
411  br label %exit
412
413exit:
414  ret float 1.0
415}
416
417define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 {
418; SI-LABEL: test_kill_control_flow_remainder:
419; SI:       ; %bb.0: ; %entry
420; SI-NEXT:    s_cmp_lg_u32 s0, 0
421; SI-NEXT:    v_mov_b32_e32 v9, 0
422; SI-NEXT:    s_cbranch_scc1 .LBB8_3
423; SI-NEXT:  ; %bb.1: ; %bb
424; SI-NEXT:    s_mov_b64 s[2:3], exec
425; SI-NEXT:    ;;#ASMSTART
426; SI-NEXT:    v_mov_b32_e64 v7, -1
427; SI-NEXT:    v_nop_e64
428; SI-NEXT:    v_nop_e64
429; SI-NEXT:    v_nop_e64
430; SI-NEXT:    v_nop_e64
431; SI-NEXT:    v_nop_e64
432; SI-NEXT:    v_nop_e64
433; SI-NEXT:    v_nop_e64
434; SI-NEXT:    v_nop_e64
435; SI-NEXT:    v_nop_e64
436; SI-NEXT:    v_nop_e64
437; SI-NEXT:    v_nop_e64
438; SI-NEXT:    ;;#ASMEND
439; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
440; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
441; SI-NEXT:    ;;#ASMSTART
442; SI-NEXT:    v_mov_b32_e64 v8, -1
443; SI-NEXT:    ;;#ASMEND
444; SI-NEXT:    s_cbranch_scc0 .LBB8_4
445; SI-NEXT:  ; %bb.2: ; %bb
446; SI-NEXT:    s_andn2_b64 exec, exec, vcc
447; SI-NEXT:    s_mov_b32 s3, 0xf000
448; SI-NEXT:    s_mov_b32 s2, -1
449; SI-NEXT:    buffer_store_dword v8, off, s[0:3], 0
450; SI-NEXT:    s_waitcnt vmcnt(0)
451; SI-NEXT:    ;;#ASMSTART
452; SI-NEXT:    v_mov_b32_e64 v9, -2
453; SI-NEXT:    ;;#ASMEND
454; SI-NEXT:  .LBB8_3: ; %exit
455; SI-NEXT:    s_mov_b32 s3, 0xf000
456; SI-NEXT:    s_mov_b32 s2, -1
457; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0
458; SI-NEXT:    s_endpgm
459; SI-NEXT:  .LBB8_4:
460; SI-NEXT:    s_mov_b64 exec, 0
461; SI-NEXT:    exp null off, off, off, off done vm
462; SI-NEXT:    s_endpgm
463;
464; GFX10-WAVE64-LABEL: test_kill_control_flow_remainder:
465; GFX10-WAVE64:       ; %bb.0: ; %entry
466; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v9, 0
467; GFX10-WAVE64-NEXT:    s_cmp_lg_u32 s0, 0
468; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB8_2
469; GFX10-WAVE64-NEXT:  ; %bb.1: ; %exit
470; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v9, off
471; GFX10-WAVE64-NEXT:    s_endpgm
472; GFX10-WAVE64-NEXT:  .LBB8_2: ; %bb
473; GFX10-WAVE64-NEXT:    ;;#ASMSTART
474; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v7, -1
475; GFX10-WAVE64-NEXT:    v_nop_e64
476; GFX10-WAVE64-NEXT:    v_nop_e64
477; GFX10-WAVE64-NEXT:    v_nop_e64
478; GFX10-WAVE64-NEXT:    v_nop_e64
479; GFX10-WAVE64-NEXT:    v_nop_e64
480; GFX10-WAVE64-NEXT:    v_nop_e64
481; GFX10-WAVE64-NEXT:    v_nop_e64
482; GFX10-WAVE64-NEXT:    v_nop_e64
483; GFX10-WAVE64-NEXT:    v_nop_e64
484; GFX10-WAVE64-NEXT:    v_nop_e64
485; GFX10-WAVE64-NEXT:    v_nop_e64
486; GFX10-WAVE64-NEXT:    ;;#ASMEND
487; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
488; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
489; GFX10-WAVE64-NEXT:    ;;#ASMSTART
490; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v8, -1
491; GFX10-WAVE64-NEXT:    ;;#ASMEND
492; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
493; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB8_4
494; GFX10-WAVE64-NEXT:  ; %bb.3: ; %bb
495; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
496; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v8, off
497; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
498; GFX10-WAVE64-NEXT:    ;;#ASMSTART
499; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v9, -2
500; GFX10-WAVE64-NEXT:    ;;#ASMEND
501; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v9, off
502; GFX10-WAVE64-NEXT:    s_endpgm
503; GFX10-WAVE64-NEXT:  .LBB8_4:
504; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
505; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
506; GFX10-WAVE64-NEXT:    s_endpgm
507;
508; GFX10-WAVE32-LABEL: test_kill_control_flow_remainder:
509; GFX10-WAVE32:       ; %bb.0: ; %entry
510; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v9, 0
511; GFX10-WAVE32-NEXT:    s_cmp_lg_u32 s0, 0
512; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB8_2
513; GFX10-WAVE32-NEXT:  ; %bb.1: ; %exit
514; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v9, off
515; GFX10-WAVE32-NEXT:    s_endpgm
516; GFX10-WAVE32-NEXT:  .LBB8_2: ; %bb
517; GFX10-WAVE32-NEXT:    ;;#ASMSTART
518; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v7, -1
519; GFX10-WAVE32-NEXT:    v_nop_e64
520; GFX10-WAVE32-NEXT:    v_nop_e64
521; GFX10-WAVE32-NEXT:    v_nop_e64
522; GFX10-WAVE32-NEXT:    v_nop_e64
523; GFX10-WAVE32-NEXT:    v_nop_e64
524; GFX10-WAVE32-NEXT:    v_nop_e64
525; GFX10-WAVE32-NEXT:    v_nop_e64
526; GFX10-WAVE32-NEXT:    v_nop_e64
527; GFX10-WAVE32-NEXT:    v_nop_e64
528; GFX10-WAVE32-NEXT:    v_nop_e64
529; GFX10-WAVE32-NEXT:    v_nop_e64
530; GFX10-WAVE32-NEXT:    ;;#ASMEND
531; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v7
532; GFX10-WAVE32-NEXT:    s_mov_b32 s1, exec_lo
533; GFX10-WAVE32-NEXT:    ;;#ASMSTART
534; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v8, -1
535; GFX10-WAVE32-NEXT:    ;;#ASMEND
536; GFX10-WAVE32-NEXT:    s_andn2_b32 s1, s1, vcc_lo
537; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB8_4
538; GFX10-WAVE32-NEXT:  ; %bb.3: ; %bb
539; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
540; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v8, off
541; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
542; GFX10-WAVE32-NEXT:    ;;#ASMSTART
543; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v9, -2
544; GFX10-WAVE32-NEXT:    ;;#ASMEND
545; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v9, off
546; GFX10-WAVE32-NEXT:    s_endpgm
547; GFX10-WAVE32-NEXT:  .LBB8_4:
548; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
549; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
550; GFX10-WAVE32-NEXT:    s_endpgm
551entry:
552  %cmp = icmp eq i32 %arg, 0
553  br i1 %cmp, label %bb, label %exit
554
555bb:
556  %var = call float asm sideeffect "v_mov_b32_e64 v7, -1
557    v_nop_e64
558    v_nop_e64
559    v_nop_e64
560    v_nop_e64
561    v_nop_e64
562    v_nop_e64
563    v_nop_e64
564    v_nop_e64
565    v_nop_e64
566    v_nop_e64
567    v_nop_e64", "={v7}"()
568  %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={v8}"()
569  %cmp.var = fcmp olt float %var, 0.0
570  ; TODO: We could do an early-exit here (the branch above is uniform!)
571  call void @llvm.amdgcn.kill(i1 %cmp.var)
572  store volatile float %live.across, float addrspace(1)* undef
573  %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={v9}"()
574  br label %exit
575
576exit:
577  %phi = phi float [ 0.0, %entry ], [ %live.out, %bb ]
578  store float %phi, float addrspace(1)* undef
579  ret void
580}
581
582define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) #0 {
583; SI-LABEL: test_kill_control_flow_return:
584; SI:       ; %bb.0: ; %entry
585; SI-NEXT:    s_cmp_eq_u32 s0, 1
586; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
587; SI-NEXT:    s_mov_b64 s[2:3], exec
588; SI-NEXT:    s_xor_b64 s[4:5], s[4:5], exec
589; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], s[4:5]
590; SI-NEXT:    s_cbranch_scc0 .LBB9_4
591; SI-NEXT:  ; %bb.1: ; %entry
592; SI-NEXT:    s_and_b64 exec, exec, s[2:3]
593; SI-NEXT:    s_cmp_lg_u32 s0, 0
594; SI-NEXT:    v_mov_b32_e32 v0, 0
595; SI-NEXT:    s_cbranch_scc0 .LBB9_3
596; SI-NEXT:  ; %bb.2: ; %exit
597; SI-NEXT:    s_branch .LBB9_5
598; SI-NEXT:  .LBB9_3: ; %bb
599; SI-NEXT:    ;;#ASMSTART
600; SI-NEXT:    v_mov_b32_e64 v7, -1
601; SI-NEXT:    v_nop_e64
602; SI-NEXT:    v_nop_e64
603; SI-NEXT:    v_nop_e64
604; SI-NEXT:    v_nop_e64
605; SI-NEXT:    v_nop_e64
606; SI-NEXT:    v_nop_e64
607; SI-NEXT:    v_nop_e64
608; SI-NEXT:    v_nop_e64
609; SI-NEXT:    v_nop_e64
610; SI-NEXT:    v_nop_e64
611; SI-NEXT:    ;;#ASMEND
612; SI-NEXT:    v_mov_b32_e32 v0, v7
613; SI-NEXT:    s_branch .LBB9_5
614; SI-NEXT:  .LBB9_4:
615; SI-NEXT:    s_mov_b64 exec, 0
616; SI-NEXT:    exp null off, off, off, off done vm
617; SI-NEXT:    s_endpgm
618; SI-NEXT:  .LBB9_5:
619;
620; GFX10-WAVE64-LABEL: test_kill_control_flow_return:
621; GFX10-WAVE64:       ; %bb.0: ; %entry
622; GFX10-WAVE64-NEXT:    s_cmp_eq_u32 s0, 1
623; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
624; GFX10-WAVE64-NEXT:    s_cselect_b64 s[4:5], -1, 0
625; GFX10-WAVE64-NEXT:    s_xor_b64 s[4:5], s[4:5], exec
626; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], s[4:5]
627; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB9_4
628; GFX10-WAVE64-NEXT:  ; %bb.1: ; %entry
629; GFX10-WAVE64-NEXT:    s_and_b64 exec, exec, s[2:3]
630; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 0
631; GFX10-WAVE64-NEXT:    s_cmp_lg_u32 s0, 0
632; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB9_3
633; GFX10-WAVE64-NEXT:  ; %bb.2: ; %exit
634; GFX10-WAVE64-NEXT:    s_branch .LBB9_5
635; GFX10-WAVE64-NEXT:  .LBB9_3: ; %bb
636; GFX10-WAVE64-NEXT:    ;;#ASMSTART
637; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v7, -1
638; GFX10-WAVE64-NEXT:    v_nop_e64
639; GFX10-WAVE64-NEXT:    v_nop_e64
640; GFX10-WAVE64-NEXT:    v_nop_e64
641; GFX10-WAVE64-NEXT:    v_nop_e64
642; GFX10-WAVE64-NEXT:    v_nop_e64
643; GFX10-WAVE64-NEXT:    v_nop_e64
644; GFX10-WAVE64-NEXT:    v_nop_e64
645; GFX10-WAVE64-NEXT:    v_nop_e64
646; GFX10-WAVE64-NEXT:    v_nop_e64
647; GFX10-WAVE64-NEXT:    v_nop_e64
648; GFX10-WAVE64-NEXT:    ;;#ASMEND
649; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, v7
650; GFX10-WAVE64-NEXT:    s_branch .LBB9_5
651; GFX10-WAVE64-NEXT:  .LBB9_4:
652; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
653; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
654; GFX10-WAVE64-NEXT:    s_endpgm
655; GFX10-WAVE64-NEXT:  .LBB9_5:
656;
657; GFX10-WAVE32-LABEL: test_kill_control_flow_return:
658; GFX10-WAVE32:       ; %bb.0: ; %entry
659; GFX10-WAVE32-NEXT:    s_cmp_eq_u32 s0, 1
660; GFX10-WAVE32-NEXT:    s_mov_b32 s1, exec_lo
661; GFX10-WAVE32-NEXT:    s_cselect_b32 s2, -1, 0
662; GFX10-WAVE32-NEXT:    s_xor_b32 s2, s2, exec_lo
663; GFX10-WAVE32-NEXT:    s_andn2_b32 s1, s1, s2
664; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB9_4
665; GFX10-WAVE32-NEXT:  ; %bb.1: ; %entry
666; GFX10-WAVE32-NEXT:    s_and_b32 exec_lo, exec_lo, s1
667; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 0
668; GFX10-WAVE32-NEXT:    s_cmp_lg_u32 s0, 0
669; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB9_3
670; GFX10-WAVE32-NEXT:  ; %bb.2: ; %exit
671; GFX10-WAVE32-NEXT:    s_branch .LBB9_5
672; GFX10-WAVE32-NEXT:  .LBB9_3: ; %bb
673; GFX10-WAVE32-NEXT:    ;;#ASMSTART
674; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v7, -1
675; GFX10-WAVE32-NEXT:    v_nop_e64
676; GFX10-WAVE32-NEXT:    v_nop_e64
677; GFX10-WAVE32-NEXT:    v_nop_e64
678; GFX10-WAVE32-NEXT:    v_nop_e64
679; GFX10-WAVE32-NEXT:    v_nop_e64
680; GFX10-WAVE32-NEXT:    v_nop_e64
681; GFX10-WAVE32-NEXT:    v_nop_e64
682; GFX10-WAVE32-NEXT:    v_nop_e64
683; GFX10-WAVE32-NEXT:    v_nop_e64
684; GFX10-WAVE32-NEXT:    v_nop_e64
685; GFX10-WAVE32-NEXT:    ;;#ASMEND
686; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, v7
687; GFX10-WAVE32-NEXT:    s_branch .LBB9_5
688; GFX10-WAVE32-NEXT:  .LBB9_4:
689; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
690; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
691; GFX10-WAVE32-NEXT:    s_endpgm
692; GFX10-WAVE32-NEXT:  .LBB9_5:
693entry:
694  %kill = icmp eq i32 %arg, 1
695  %cmp = icmp eq i32 %arg, 0
696  call void @llvm.amdgcn.kill(i1 %kill)
697  br i1 %cmp, label %bb, label %exit
698
699bb:
700  %var = call float asm sideeffect "v_mov_b32_e64 v7, -1
701    v_nop_e64
702    v_nop_e64
703    v_nop_e64
704    v_nop_e64
705    v_nop_e64
706    v_nop_e64
707    v_nop_e64
708    v_nop_e64
709    v_nop_e64
710    v_nop_e64", "={v7}"()
711  br label %exit
712
713exit:
714  %ret = phi float [ %var, %bb ], [ 0.0, %entry ]
715  ret float %ret
716}
717
718define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 {
719; SI-LABEL: test_kill_divergent_loop:
720; SI:       ; %bb.0: ; %entry
721; SI-NEXT:    s_mov_b64 s[0:1], exec
722; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
723; SI-NEXT:    s_and_saveexec_b64 s[2:3], vcc
724; SI-NEXT:    s_xor_b64 s[4:5], exec, s[2:3]
725; SI-NEXT:    s_cbranch_execz .LBB10_4
726; SI-NEXT:  ; %bb.1: ; %bb.preheader
727; SI-NEXT:    s_mov_b32 s3, 0xf000
728; SI-NEXT:    s_mov_b32 s2, -1
729; SI-NEXT:  .LBB10_2: ; %bb
730; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
731; SI-NEXT:    ;;#ASMSTART
732; SI-NEXT:    v_mov_b32_e64 v7, -1
733; SI-NEXT:    v_nop_e64
734; SI-NEXT:    v_nop_e64
735; SI-NEXT:    v_nop_e64
736; SI-NEXT:    v_nop_e64
737; SI-NEXT:    v_nop_e64
738; SI-NEXT:    v_nop_e64
739; SI-NEXT:    v_nop_e64
740; SI-NEXT:    v_nop_e64
741; SI-NEXT:    v_nop_e64
742; SI-NEXT:    v_nop_e64
743; SI-NEXT:    ;;#ASMEND
744; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
745; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
746; SI-NEXT:    s_cbranch_scc0 .LBB10_5
747; SI-NEXT:  ; %bb.3: ; %bb
748; SI-NEXT:    ; in Loop: Header=BB10_2 Depth=1
749; SI-NEXT:    s_andn2_b64 exec, exec, vcc
750; SI-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
751; SI-NEXT:    s_waitcnt vmcnt(0)
752; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
753; SI-NEXT:    s_and_b64 vcc, exec, vcc
754; SI-NEXT:    s_cbranch_vccnz .LBB10_2
755; SI-NEXT:  .LBB10_4: ; %Flow1
756; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
757; SI-NEXT:    s_mov_b32 s3, 0xf000
758; SI-NEXT:    s_mov_b32 s2, -1
759; SI-NEXT:    v_mov_b32_e32 v0, 8
760; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
761; SI-NEXT:    s_waitcnt vmcnt(0)
762; SI-NEXT:    s_endpgm
763; SI-NEXT:  .LBB10_5:
764; SI-NEXT:    s_mov_b64 exec, 0
765; SI-NEXT:    exp null off, off, off, off done vm
766; SI-NEXT:    s_endpgm
767;
768; GFX10-WAVE64-LABEL: test_kill_divergent_loop:
769; GFX10-WAVE64:       ; %bb.0: ; %entry
770; GFX10-WAVE64-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
771; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
772; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[2:3], vcc
773; GFX10-WAVE64-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
774; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB10_3
775; GFX10-WAVE64-NEXT:  .LBB10_1: ; %bb
776; GFX10-WAVE64-NEXT:    ; =>This Inner Loop Header: Depth=1
777; GFX10-WAVE64-NEXT:    ;;#ASMSTART
778; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v7, -1
779; GFX10-WAVE64-NEXT:    v_nop_e64
780; GFX10-WAVE64-NEXT:    v_nop_e64
781; GFX10-WAVE64-NEXT:    v_nop_e64
782; GFX10-WAVE64-NEXT:    v_nop_e64
783; GFX10-WAVE64-NEXT:    v_nop_e64
784; GFX10-WAVE64-NEXT:    v_nop_e64
785; GFX10-WAVE64-NEXT:    v_nop_e64
786; GFX10-WAVE64-NEXT:    v_nop_e64
787; GFX10-WAVE64-NEXT:    v_nop_e64
788; GFX10-WAVE64-NEXT:    v_nop_e64
789; GFX10-WAVE64-NEXT:    ;;#ASMEND
790; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
791; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
792; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB10_4
793; GFX10-WAVE64-NEXT:  ; %bb.2: ; %bb
794; GFX10-WAVE64-NEXT:    ; in Loop: Header=BB10_1 Depth=1
795; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
796; GFX10-WAVE64-NEXT:    global_load_dword v0, v[0:1], off glc dlc
797; GFX10-WAVE64-NEXT:    s_waitcnt vmcnt(0)
798; GFX10-WAVE64-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
799; GFX10-WAVE64-NEXT:    s_and_b64 vcc, exec, vcc
800; GFX10-WAVE64-NEXT:    s_cbranch_vccnz .LBB10_1
801; GFX10-WAVE64-NEXT:  .LBB10_3: ; %Flow1
802; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[2:3]
803; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 8
804; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v0, off
805; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
806; GFX10-WAVE64-NEXT:    s_endpgm
807; GFX10-WAVE64-NEXT:  .LBB10_4:
808; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
809; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
810; GFX10-WAVE64-NEXT:    s_endpgm
811;
812; GFX10-WAVE32-LABEL: test_kill_divergent_loop:
813; GFX10-WAVE32:       ; %bb.0: ; %entry
814; GFX10-WAVE32-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
815; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
816; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s1, vcc_lo
817; GFX10-WAVE32-NEXT:    s_xor_b32 s1, exec_lo, s1
818; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB10_3
819; GFX10-WAVE32-NEXT:  .LBB10_1: ; %bb
820; GFX10-WAVE32-NEXT:    ; =>This Inner Loop Header: Depth=1
821; GFX10-WAVE32-NEXT:    ;;#ASMSTART
822; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v7, -1
823; GFX10-WAVE32-NEXT:    v_nop_e64
824; GFX10-WAVE32-NEXT:    v_nop_e64
825; GFX10-WAVE32-NEXT:    v_nop_e64
826; GFX10-WAVE32-NEXT:    v_nop_e64
827; GFX10-WAVE32-NEXT:    v_nop_e64
828; GFX10-WAVE32-NEXT:    v_nop_e64
829; GFX10-WAVE32-NEXT:    v_nop_e64
830; GFX10-WAVE32-NEXT:    v_nop_e64
831; GFX10-WAVE32-NEXT:    v_nop_e64
832; GFX10-WAVE32-NEXT:    v_nop_e64
833; GFX10-WAVE32-NEXT:    ;;#ASMEND
834; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v7
835; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
836; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB10_4
837; GFX10-WAVE32-NEXT:  ; %bb.2: ; %bb
838; GFX10-WAVE32-NEXT:    ; in Loop: Header=BB10_1 Depth=1
839; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
840; GFX10-WAVE32-NEXT:    global_load_dword v0, v[0:1], off glc dlc
841; GFX10-WAVE32-NEXT:    s_waitcnt vmcnt(0)
842; GFX10-WAVE32-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
843; GFX10-WAVE32-NEXT:    s_and_b32 vcc_lo, exec_lo, vcc_lo
844; GFX10-WAVE32-NEXT:    s_cbranch_vccnz .LBB10_1
845; GFX10-WAVE32-NEXT:  .LBB10_3: ; %Flow1
846; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s1
847; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 8
848; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v0, off
849; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
850; GFX10-WAVE32-NEXT:    s_endpgm
851; GFX10-WAVE32-NEXT:  .LBB10_4:
852; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
853; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
854; GFX10-WAVE32-NEXT:    s_endpgm
855entry:
856  %cmp = icmp eq i32 %arg, 0
857  br i1 %cmp, label %bb, label %exit
858
859bb:
860  %var = call float asm sideeffect "v_mov_b32_e64 v7, -1
861    v_nop_e64
862    v_nop_e64
863    v_nop_e64
864    v_nop_e64
865    v_nop_e64
866    v_nop_e64
867    v_nop_e64
868    v_nop_e64
869    v_nop_e64
870    v_nop_e64", "={v7}"()
871  %cmp.var = fcmp olt float %var, 0.0
872  call void @llvm.amdgcn.kill(i1 %cmp.var)
873  %vgpr = load volatile i32, i32 addrspace(1)* undef
874  %loop.cond = icmp eq i32 %vgpr, 0
875  br i1 %loop.cond, label %bb, label %exit
876
877exit:
878  store volatile i32 8, i32 addrspace(1)* undef
879  ret void
880}
881
882; bug 28550
883define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
884; SI-LABEL: phi_use_def_before_kill:
885; SI:       ; %bb.0: ; %bb
886; SI-NEXT:    v_add_f32_e64 v1, s0, 1.0
887; SI-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
888; SI-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc
889; SI-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v1
890; SI-NEXT:    s_andn2_b64 exec, exec, vcc
891; SI-NEXT:    s_cbranch_scc0 .LBB11_6
892; SI-NEXT:  ; %bb.1: ; %bb
893; SI-NEXT:    s_andn2_b64 exec, exec, vcc
894; SI-NEXT:    s_cbranch_scc0 .LBB11_3
895; SI-NEXT:  ; %bb.2: ; %bb8
896; SI-NEXT:    s_mov_b32 s3, 0xf000
897; SI-NEXT:    s_mov_b32 s2, -1
898; SI-NEXT:    v_mov_b32_e32 v0, 8
899; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
900; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
901; SI-NEXT:    v_mov_b32_e32 v0, 4.0
902; SI-NEXT:  .LBB11_3: ; %phibb
903; SI-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v0
904; SI-NEXT:    s_and_b64 vcc, exec, vcc
905; SI-NEXT:    s_cbranch_vccz .LBB11_5
906; SI-NEXT:  ; %bb.4: ; %bb10
907; SI-NEXT:    s_mov_b32 s3, 0xf000
908; SI-NEXT:    s_mov_b32 s2, -1
909; SI-NEXT:    v_mov_b32_e32 v0, 9
910; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
911; SI-NEXT:    s_waitcnt vmcnt(0)
912; SI-NEXT:  .LBB11_5: ; %end
913; SI-NEXT:    s_endpgm
914; SI-NEXT:  .LBB11_6:
915; SI-NEXT:    s_mov_b64 exec, 0
916; SI-NEXT:    exp null off, off, off, off done vm
917; SI-NEXT:    s_endpgm
918;
919; GFX10-WAVE64-LABEL: phi_use_def_before_kill:
920; GFX10-WAVE64:       ; %bb.0: ; %bb
921; GFX10-WAVE64-NEXT:    v_add_f32_e64 v1, s0, 1.0
922; GFX10-WAVE64-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
923; GFX10-WAVE64-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc
924; GFX10-WAVE64-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v1
925; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
926; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB11_6
927; GFX10-WAVE64-NEXT:  ; %bb.1: ; %bb
928; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
929; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB11_3
930; GFX10-WAVE64-NEXT:  ; %bb.2: ; %bb8
931; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v1, 8
932; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 4.0
933; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v1, off
934; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
935; GFX10-WAVE64-NEXT:  .LBB11_3: ; %phibb
936; GFX10-WAVE64-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v0
937; GFX10-WAVE64-NEXT:    s_and_b64 vcc, exec, vcc
938; GFX10-WAVE64-NEXT:    s_cbranch_vccz .LBB11_5
939; GFX10-WAVE64-NEXT:  ; %bb.4: ; %bb10
940; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 9
941; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v0, off
942; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
943; GFX10-WAVE64-NEXT:  .LBB11_5: ; %end
944; GFX10-WAVE64-NEXT:    s_endpgm
945; GFX10-WAVE64-NEXT:  .LBB11_6:
946; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
947; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
948; GFX10-WAVE64-NEXT:    s_endpgm
949;
950; GFX10-WAVE32-LABEL: phi_use_def_before_kill:
951; GFX10-WAVE32:       ; %bb.0: ; %bb
952; GFX10-WAVE32-NEXT:    v_add_f32_e64 v1, s0, 1.0
953; GFX10-WAVE32-NEXT:    v_cmp_lt_f32_e32 vcc_lo, 0, v1
954; GFX10-WAVE32-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc_lo
955; GFX10-WAVE32-NEXT:    v_cmp_nlt_f32_e32 vcc_lo, 0, v1
956; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
957; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB11_6
958; GFX10-WAVE32-NEXT:  ; %bb.1: ; %bb
959; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
960; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB11_3
961; GFX10-WAVE32-NEXT:  ; %bb.2: ; %bb8
962; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v1, 8
963; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 4.0
964; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v1, off
965; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
966; GFX10-WAVE32-NEXT:  .LBB11_3: ; %phibb
967; GFX10-WAVE32-NEXT:    v_cmp_eq_f32_e32 vcc_lo, 0, v0
968; GFX10-WAVE32-NEXT:    s_and_b32 vcc_lo, exec_lo, vcc_lo
969; GFX10-WAVE32-NEXT:    s_cbranch_vccz .LBB11_5
970; GFX10-WAVE32-NEXT:  ; %bb.4: ; %bb10
971; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 9
972; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v0, off
973; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
974; GFX10-WAVE32-NEXT:  .LBB11_5: ; %end
975; GFX10-WAVE32-NEXT:    s_endpgm
976; GFX10-WAVE32-NEXT:  .LBB11_6:
977; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
978; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
979; GFX10-WAVE32-NEXT:    s_endpgm
980bb:
981  %tmp = fadd float %x, 1.000000e+00
982  %tmp1 = fcmp olt float 0.000000e+00, %tmp
983  %tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00
984  %cmp.tmp2 = fcmp olt float %tmp2, 0.0
985  call void @llvm.amdgcn.kill(i1 %cmp.tmp2)
986  br i1 undef, label %phibb, label %bb8
987
988phibb:
989  %tmp5 = phi float [ %tmp2, %bb ], [ 4.0, %bb8 ]
990  %tmp6 = fcmp oeq float %tmp5, 0.000000e+00
991  br i1 %tmp6, label %bb10, label %end
992
993bb8:
994  store volatile i32 8, i32 addrspace(1)* undef
995  br label %phibb
996
997bb10:
998  store volatile i32 9, i32 addrspace(1)* undef
999  br label %end
1000
1001end:
1002  ret void
1003}
1004
1005define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 {
1006; SI-LABEL: no_skip_no_successors:
1007; SI:       ; %bb.0: ; %bb
1008; SI-NEXT:    v_cmp_nge_f32_e64 s[4:5], s1, 0
1009; SI-NEXT:    s_and_b64 vcc, exec, s[4:5]
1010; SI-NEXT:    s_cbranch_vccz .LBB12_3
1011; SI-NEXT:  ; %bb.1: ; %bb6
1012; SI-NEXT:    s_mov_b64 s[2:3], exec
1013; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
1014; SI-NEXT:    s_cbranch_scc0 .LBB12_5
1015; SI-NEXT:  ; %bb.2: ; %bb6
1016; SI-NEXT:    s_mov_b64 exec, 0
1017; SI-NEXT:  .LBB12_3: ; %bb3
1018; SI-NEXT:    v_mov_b32_e32 v0, 0x3e7ae148
1019; SI-NEXT:    v_cmp_nge_f32_e32 vcc, s0, v0
1020; SI-NEXT:    s_and_b64 vcc, exec, vcc
1021; SI-NEXT:  ; %bb.4: ; %bb5
1022; SI-NEXT:  .LBB12_5:
1023; SI-NEXT:    s_mov_b64 exec, 0
1024; SI-NEXT:    exp null off, off, off, off done vm
1025; SI-NEXT:    s_endpgm
1026;
1027; GFX10-WAVE64-LABEL: no_skip_no_successors:
1028; GFX10-WAVE64:       ; %bb.0: ; %bb
1029; GFX10-WAVE64-NEXT:    v_cmp_nge_f32_e64 s[4:5], s1, 0
1030; GFX10-WAVE64-NEXT:    s_and_b64 vcc, exec, s[4:5]
1031; GFX10-WAVE64-NEXT:    s_cbranch_vccz .LBB12_3
1032; GFX10-WAVE64-NEXT:  ; %bb.1: ; %bb6
1033; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
1034; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
1035; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB12_5
1036; GFX10-WAVE64-NEXT:  ; %bb.2: ; %bb6
1037; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1038; GFX10-WAVE64-NEXT:  .LBB12_3: ; %bb3
1039; GFX10-WAVE64-NEXT:    v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0
1040; GFX10-WAVE64-NEXT:    s_and_b64 vcc, exec, s[0:1]
1041; GFX10-WAVE64-NEXT:  ; %bb.4: ; %bb5
1042; GFX10-WAVE64-NEXT:  .LBB12_5:
1043; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1044; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
1045; GFX10-WAVE64-NEXT:    s_endpgm
1046;
1047; GFX10-WAVE32-LABEL: no_skip_no_successors:
1048; GFX10-WAVE32:       ; %bb.0: ; %bb
1049; GFX10-WAVE32-NEXT:    v_cmp_nge_f32_e64 s1, s1, 0
1050; GFX10-WAVE32-NEXT:    s_and_b32 vcc_lo, exec_lo, s1
1051; GFX10-WAVE32-NEXT:    s_cbranch_vccz .LBB12_3
1052; GFX10-WAVE32-NEXT:  ; %bb.1: ; %bb6
1053; GFX10-WAVE32-NEXT:    s_mov_b32 s2, exec_lo
1054; GFX10-WAVE32-NEXT:    s_andn2_b32 s2, s2, exec_lo
1055; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB12_5
1056; GFX10-WAVE32-NEXT:  ; %bb.2: ; %bb6
1057; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1058; GFX10-WAVE32-NEXT:  .LBB12_3: ; %bb3
1059; GFX10-WAVE32-NEXT:    v_cmp_nle_f32_e64 s0, 0x3e7ae148, s0
1060; GFX10-WAVE32-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
1061; GFX10-WAVE32-NEXT:  ; %bb.4: ; %bb5
1062; GFX10-WAVE32-NEXT:  .LBB12_5:
1063; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1064; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
1065; GFX10-WAVE32-NEXT:    s_endpgm
1066bb:
1067  %tmp = fcmp ult float %arg1, 0.000000e+00
1068  %tmp2 = fcmp ult float %arg, 0x3FCF5C2900000000
1069  br i1 %tmp, label %bb6, label %bb3
1070
1071bb3:                                              ; preds = %bb
1072  br i1 %tmp2, label %bb5, label %bb4
1073
1074bb4:                                              ; preds = %bb3
1075  br i1 true, label %bb5, label %bb7
1076
1077bb5:                                              ; preds = %bb4, %bb3
1078  unreachable
1079
1080bb6:                                              ; preds = %bb
1081  call void @llvm.amdgcn.kill(i1 false)
1082  unreachable
1083
1084bb7:                                              ; preds = %bb4
1085  ret void
1086}
1087
1088define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) #0 {
1089; SI-LABEL: if_after_kill_block:
1090; SI:       ; %bb.0: ; %bb
1091; SI-NEXT:    s_mov_b64 s[2:3], exec
1092; SI-NEXT:    s_wqm_b64 exec, exec
1093; SI-NEXT:    s_mov_b32 s0, 0
1094; SI-NEXT:    v_cmp_nle_f32_e32 vcc, 0, v1
1095; SI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1096; SI-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
1097; SI-NEXT:    s_cbranch_execz .LBB13_3
1098; SI-NEXT:  ; %bb.1: ; %bb3
1099; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
1100; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
1101; SI-NEXT:    s_cbranch_scc0 .LBB13_6
1102; SI-NEXT:  ; %bb.2: ; %bb3
1103; SI-NEXT:    s_andn2_b64 exec, exec, vcc
1104; SI-NEXT:  .LBB13_3: ; %bb4
1105; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
1106; SI-NEXT:    s_mov_b32 s1, s0
1107; SI-NEXT:    s_mov_b32 s2, s0
1108; SI-NEXT:    s_mov_b32 s3, s0
1109; SI-NEXT:    s_mov_b32 s4, s0
1110; SI-NEXT:    s_mov_b32 s5, s0
1111; SI-NEXT:    s_mov_b32 s6, s0
1112; SI-NEXT:    s_mov_b32 s7, s0
1113; SI-NEXT:    image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10
1114; SI-NEXT:    s_waitcnt vmcnt(0)
1115; SI-NEXT:    v_cmp_neq_f32_e32 vcc, 0, v0
1116; SI-NEXT:    s_and_saveexec_b64 s[0:1], vcc
1117; SI-NEXT:    s_cbranch_execz .LBB13_5
1118; SI-NEXT:  ; %bb.4: ; %bb8
1119; SI-NEXT:    s_mov_b32 s3, 0xf000
1120; SI-NEXT:    s_mov_b32 s2, -1
1121; SI-NEXT:    v_mov_b32_e32 v0, 9
1122; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1123; SI-NEXT:    s_waitcnt vmcnt(0)
1124; SI-NEXT:  .LBB13_5: ; %UnifiedReturnBlock
1125; SI-NEXT:    s_endpgm
1126; SI-NEXT:  .LBB13_6:
1127; SI-NEXT:    s_mov_b64 exec, 0
1128; SI-NEXT:    exp null off, off, off, off done vm
1129; SI-NEXT:    s_endpgm
1130;
1131; GFX10-WAVE64-LABEL: if_after_kill_block:
1132; GFX10-WAVE64:       ; %bb.0: ; %bb
1133; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
1134; GFX10-WAVE64-NEXT:    s_wqm_b64 exec, exec
1135; GFX10-WAVE64-NEXT:    v_cmp_nle_f32_e32 vcc, 0, v1
1136; GFX10-WAVE64-NEXT:    s_mov_b32 s0, 0
1137; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1138; GFX10-WAVE64-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
1139; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB13_3
1140; GFX10-WAVE64-NEXT:  ; %bb.1: ; %bb3
1141; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
1142; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
1143; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB13_6
1144; GFX10-WAVE64-NEXT:  ; %bb.2: ; %bb3
1145; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
1146; GFX10-WAVE64-NEXT:  .LBB13_3: ; %bb4
1147; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[4:5]
1148; GFX10-WAVE64-NEXT:    s_mov_b32 s1, s0
1149; GFX10-WAVE64-NEXT:    s_mov_b32 s2, s0
1150; GFX10-WAVE64-NEXT:    s_mov_b32 s3, s0
1151; GFX10-WAVE64-NEXT:    s_mov_b32 s4, s0
1152; GFX10-WAVE64-NEXT:    s_mov_b32 s5, s0
1153; GFX10-WAVE64-NEXT:    s_mov_b32 s6, s0
1154; GFX10-WAVE64-NEXT:    s_mov_b32 s7, s0
1155; GFX10-WAVE64-NEXT:    image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
1156; GFX10-WAVE64-NEXT:    s_waitcnt vmcnt(0)
1157; GFX10-WAVE64-NEXT:    v_cmp_neq_f32_e32 vcc, 0, v0
1158; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[0:1], vcc
1159; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB13_5
1160; GFX10-WAVE64-NEXT:  ; %bb.4: ; %bb8
1161; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 9
1162; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v0, off
1163; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
1164; GFX10-WAVE64-NEXT:  .LBB13_5: ; %UnifiedReturnBlock
1165; GFX10-WAVE64-NEXT:    s_endpgm
1166; GFX10-WAVE64-NEXT:  .LBB13_6:
1167; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1168; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
1169; GFX10-WAVE64-NEXT:    s_endpgm
1170;
1171; GFX10-WAVE32-LABEL: if_after_kill_block:
1172; GFX10-WAVE32:       ; %bb.0: ; %bb
1173; GFX10-WAVE32-NEXT:    s_mov_b32 s1, exec_lo
1174; GFX10-WAVE32-NEXT:    s_wqm_b32 exec_lo, exec_lo
1175; GFX10-WAVE32-NEXT:    v_cmp_nle_f32_e32 vcc_lo, 0, v1
1176; GFX10-WAVE32-NEXT:    s_mov_b32 s0, 0
1177; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s2, vcc_lo
1178; GFX10-WAVE32-NEXT:    s_xor_b32 s2, exec_lo, s2
1179; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB13_3
1180; GFX10-WAVE32-NEXT:  ; %bb.1: ; %bb3
1181; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
1182; GFX10-WAVE32-NEXT:    s_andn2_b32 s1, s1, vcc_lo
1183; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB13_6
1184; GFX10-WAVE32-NEXT:  ; %bb.2: ; %bb3
1185; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
1186; GFX10-WAVE32-NEXT:  .LBB13_3: ; %bb4
1187; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s2
1188; GFX10-WAVE32-NEXT:    s_mov_b32 s1, s0
1189; GFX10-WAVE32-NEXT:    s_mov_b32 s2, s0
1190; GFX10-WAVE32-NEXT:    s_mov_b32 s3, s0
1191; GFX10-WAVE32-NEXT:    s_mov_b32 s4, s0
1192; GFX10-WAVE32-NEXT:    s_mov_b32 s5, s0
1193; GFX10-WAVE32-NEXT:    s_mov_b32 s6, s0
1194; GFX10-WAVE32-NEXT:    s_mov_b32 s7, s0
1195; GFX10-WAVE32-NEXT:    image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
1196; GFX10-WAVE32-NEXT:    s_waitcnt vmcnt(0)
1197; GFX10-WAVE32-NEXT:    v_cmp_neq_f32_e32 vcc_lo, 0, v0
1198; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s0, vcc_lo
1199; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB13_5
1200; GFX10-WAVE32-NEXT:  ; %bb.4: ; %bb8
1201; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 9
1202; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v0, off
1203; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
1204; GFX10-WAVE32-NEXT:  .LBB13_5: ; %UnifiedReturnBlock
1205; GFX10-WAVE32-NEXT:    s_endpgm
1206; GFX10-WAVE32-NEXT:  .LBB13_6:
1207; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1208; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
1209; GFX10-WAVE32-NEXT:    s_endpgm
1210bb:
1211  %tmp = fcmp ult float %arg1, 0.000000e+00
1212  br i1 %tmp, label %bb3, label %bb4
1213
1214bb3:                                              ; preds = %bb
1215  %cmp.arg = fcmp olt float %arg, 0.0
1216  call void @llvm.amdgcn.kill(i1 %cmp.arg)
1217  br label %bb4
1218
1219bb4:                                              ; preds = %bb3, %bb
1220  %tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, float %arg2, float %arg3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
1221  %tmp6 = extractelement <4 x float> %tmp5, i32 0
1222  %tmp7 = fcmp une float %tmp6, 0.000000e+00
1223  br i1 %tmp7, label %bb8, label %bb9
1224
1225bb8:                                              ; preds = %bb9, %bb4
1226  store volatile i32 9, i32 addrspace(1)* undef
1227  ret void
1228
1229bb9:                                              ; preds = %bb4
1230  ret void
1231}
1232
1233define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) {
1234; SI-LABEL: cbranch_kill:
1235; SI:       ; %bb.0: ; %.entry
1236; SI-NEXT:    s_mov_b32 s4, 0
1237; SI-NEXT:    s_mov_b64 s[0:1], exec
1238; SI-NEXT:    v_mov_b32_e32 v2, v1
1239; SI-NEXT:    v_mov_b32_e32 v3, v1
1240; SI-NEXT:    s_mov_b32 s5, s4
1241; SI-NEXT:    s_mov_b32 s6, s4
1242; SI-NEXT:    s_mov_b32 s7, s4
1243; SI-NEXT:    s_mov_b32 s8, s4
1244; SI-NEXT:    s_mov_b32 s9, s4
1245; SI-NEXT:    s_mov_b32 s10, s4
1246; SI-NEXT:    s_mov_b32 s11, s4
1247; SI-NEXT:    image_sample_lz v1, v[1:3], s[4:11], s[0:3] dmask:0x1 da
1248; SI-NEXT:    s_waitcnt vmcnt(0)
1249; SI-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v1
1250; SI-NEXT:    s_and_saveexec_b64 s[2:3], vcc
1251; SI-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
1252; SI-NEXT:    s_cbranch_execz .LBB14_3
1253; SI-NEXT:  ; %bb.1: ; %kill
1254; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
1255; SI-NEXT:    ; implicit-def: $vgpr0
1256; SI-NEXT:    ; implicit-def: $vgpr1
1257; SI-NEXT:    s_cbranch_scc0 .LBB14_6
1258; SI-NEXT:  ; %bb.2: ; %kill
1259; SI-NEXT:    s_mov_b64 exec, 0
1260; SI-NEXT:  .LBB14_3: ; %Flow
1261; SI-NEXT:    s_or_saveexec_b64 s[0:1], s[2:3]
1262; SI-NEXT:    ; implicit-def: $vgpr2
1263; SI-NEXT:    s_xor_b64 exec, exec, s[0:1]
1264; SI-NEXT:  ; %bb.4: ; %live
1265; SI-NEXT:    v_mul_f32_e32 v2, v0, v1
1266; SI-NEXT:  ; %bb.5: ; %export
1267; SI-NEXT:    s_or_b64 exec, exec, s[0:1]
1268; SI-NEXT:    exp mrt0 v2, v2, v2, v2 done vm
1269; SI-NEXT:    s_endpgm
1270; SI-NEXT:  .LBB14_6:
1271; SI-NEXT:    s_mov_b64 exec, 0
1272; SI-NEXT:    exp null off, off, off, off done vm
1273; SI-NEXT:    s_endpgm
1274;
1275; GFX10-WAVE64-LABEL: cbranch_kill:
1276; GFX10-WAVE64:       ; %bb.0: ; %.entry
1277; GFX10-WAVE64-NEXT:    s_mov_b32 s4, 0
1278; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
1279; GFX10-WAVE64-NEXT:    s_mov_b32 s5, s4
1280; GFX10-WAVE64-NEXT:    s_mov_b32 s6, s4
1281; GFX10-WAVE64-NEXT:    s_mov_b32 s7, s4
1282; GFX10-WAVE64-NEXT:    s_mov_b32 s8, s4
1283; GFX10-WAVE64-NEXT:    s_mov_b32 s9, s4
1284; GFX10-WAVE64-NEXT:    s_mov_b32 s10, s4
1285; GFX10-WAVE64-NEXT:    s_mov_b32 s11, s4
1286; GFX10-WAVE64-NEXT:    image_sample_lz v1, [v1, v1, v1], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
1287; GFX10-WAVE64-NEXT:    s_waitcnt vmcnt(0)
1288; GFX10-WAVE64-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v1
1289; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[2:3], vcc
1290; GFX10-WAVE64-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
1291; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB14_3
1292; GFX10-WAVE64-NEXT:  ; %bb.1: ; %kill
1293; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
1294; GFX10-WAVE64-NEXT:    ; implicit-def: $vgpr0
1295; GFX10-WAVE64-NEXT:    ; implicit-def: $vgpr1
1296; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB14_6
1297; GFX10-WAVE64-NEXT:  ; %bb.2: ; %kill
1298; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1299; GFX10-WAVE64-NEXT:  .LBB14_3: ; %Flow
1300; GFX10-WAVE64-NEXT:    s_or_saveexec_b64 s[0:1], s[2:3]
1301; GFX10-WAVE64-NEXT:    ; implicit-def: $vgpr2
1302; GFX10-WAVE64-NEXT:    s_xor_b64 exec, exec, s[0:1]
1303; GFX10-WAVE64-NEXT:  ; %bb.4: ; %live
1304; GFX10-WAVE64-NEXT:    v_mul_f32_e32 v2, v0, v1
1305; GFX10-WAVE64-NEXT:  ; %bb.5: ; %export
1306; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[0:1]
1307; GFX10-WAVE64-NEXT:    exp mrt0 v2, v2, v2, v2 done vm
1308; GFX10-WAVE64-NEXT:    s_endpgm
1309; GFX10-WAVE64-NEXT:  .LBB14_6:
1310; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1311; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
1312; GFX10-WAVE64-NEXT:    s_endpgm
1313;
1314; GFX10-WAVE32-LABEL: cbranch_kill:
1315; GFX10-WAVE32:       ; %bb.0: ; %.entry
1316; GFX10-WAVE32-NEXT:    s_mov_b32 s4, 0
1317; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
1318; GFX10-WAVE32-NEXT:    s_mov_b32 s5, s4
1319; GFX10-WAVE32-NEXT:    s_mov_b32 s6, s4
1320; GFX10-WAVE32-NEXT:    s_mov_b32 s7, s4
1321; GFX10-WAVE32-NEXT:    s_mov_b32 s8, s4
1322; GFX10-WAVE32-NEXT:    s_mov_b32 s9, s4
1323; GFX10-WAVE32-NEXT:    s_mov_b32 s10, s4
1324; GFX10-WAVE32-NEXT:    s_mov_b32 s11, s4
1325; GFX10-WAVE32-NEXT:    image_sample_lz v1, [v1, v1, v1], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
1326; GFX10-WAVE32-NEXT:    s_waitcnt vmcnt(0)
1327; GFX10-WAVE32-NEXT:    v_cmp_ge_f32_e32 vcc_lo, 0, v1
1328; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s1, vcc_lo
1329; GFX10-WAVE32-NEXT:    s_xor_b32 s1, exec_lo, s1
1330; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB14_3
1331; GFX10-WAVE32-NEXT:  ; %bb.1: ; %kill
1332; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, exec_lo
1333; GFX10-WAVE32-NEXT:    ; implicit-def: $vgpr0
1334; GFX10-WAVE32-NEXT:    ; implicit-def: $vgpr1
1335; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB14_6
1336; GFX10-WAVE32-NEXT:  ; %bb.2: ; %kill
1337; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1338; GFX10-WAVE32-NEXT:  .LBB14_3: ; %Flow
1339; GFX10-WAVE32-NEXT:    s_or_saveexec_b32 s0, s1
1340; GFX10-WAVE32-NEXT:    ; implicit-def: $vgpr2
1341; GFX10-WAVE32-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
1342; GFX10-WAVE32-NEXT:  ; %bb.4: ; %live
1343; GFX10-WAVE32-NEXT:    v_mul_f32_e32 v2, v0, v1
1344; GFX10-WAVE32-NEXT:  ; %bb.5: ; %export
1345; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s0
1346; GFX10-WAVE32-NEXT:    exp mrt0 v2, v2, v2, v2 done vm
1347; GFX10-WAVE32-NEXT:    s_endpgm
1348; GFX10-WAVE32-NEXT:  .LBB14_6:
1349; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1350; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
1351; GFX10-WAVE32-NEXT:    s_endpgm
1352.entry:
1353  %sample = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 1, float %val1, float %val1, float %val1, float 0.000000e+00, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
1354  %cond0 = fcmp ugt float %sample, 0.000000e+00
1355  br i1 %cond0, label %live, label %kill
1356
1357kill:
1358  call void @llvm.amdgcn.kill(i1 false)
1359  br label %export
1360
1361live:
1362  %scale = fmul reassoc nnan nsz arcp contract float %val0, %sample
1363  br label %export
1364
1365export:
1366  %proxy = phi float [ undef, %kill ], [ %scale, %live ]
1367  call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %proxy, float %proxy, float %proxy, float %proxy, i1 immarg true, i1 immarg true) #3
1368  ret void
1369}
1370
1371
1372define amdgpu_ps void @complex_loop(i32 inreg %cmpa, i32 %cmpb, i32 %cmpc) {
1373; SI-LABEL: complex_loop:
1374; SI:       ; %bb.0: ; %.entry
1375; SI-NEXT:    s_cmp_lt_i32 s0, 1
1376; SI-NEXT:    s_cbranch_scc1 .LBB15_7
1377; SI-NEXT:  ; %bb.1: ; %.lr.ph
1378; SI-NEXT:    s_mov_b64 s[2:3], exec
1379; SI-NEXT:    s_mov_b32 s6, 0
1380; SI-NEXT:    s_mov_b64 s[0:1], 0
1381; SI-NEXT:    s_branch .LBB15_3
1382; SI-NEXT:  .LBB15_2: ; %latch
1383; SI-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1384; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
1385; SI-NEXT:    s_add_i32 s6, s6, 1
1386; SI-NEXT:    v_cmp_ge_i32_e32 vcc, s6, v1
1387; SI-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
1388; SI-NEXT:    v_mov_b32_e32 v2, s6
1389; SI-NEXT:    s_andn2_b64 exec, exec, s[0:1]
1390; SI-NEXT:    s_cbranch_execz .LBB15_6
1391; SI-NEXT:  .LBB15_3: ; %hdr
1392; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
1393; SI-NEXT:    v_cmp_gt_u32_e32 vcc, s6, v0
1394; SI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1395; SI-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
1396; SI-NEXT:    s_cbranch_execz .LBB15_2
1397; SI-NEXT:  ; %bb.4: ; %kill
1398; SI-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1399; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
1400; SI-NEXT:    s_cbranch_scc0 .LBB15_8
1401; SI-NEXT:  ; %bb.5: ; %kill
1402; SI-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1403; SI-NEXT:    s_mov_b64 exec, 0
1404; SI-NEXT:    s_branch .LBB15_2
1405; SI-NEXT:  .LBB15_6: ; %Flow
1406; SI-NEXT:    s_or_b64 exec, exec, s[0:1]
1407; SI-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1408; SI-NEXT:    s_endpgm
1409; SI-NEXT:  .LBB15_7:
1410; SI-NEXT:    v_mov_b32_e32 v2, -1
1411; SI-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1412; SI-NEXT:    s_endpgm
1413; SI-NEXT:  .LBB15_8:
1414; SI-NEXT:    s_mov_b64 exec, 0
1415; SI-NEXT:    exp null off, off, off, off done vm
1416; SI-NEXT:    s_endpgm
1417;
1418; GFX10-WAVE64-LABEL: complex_loop:
1419; GFX10-WAVE64:       ; %bb.0: ; %.entry
1420; GFX10-WAVE64-NEXT:    s_cmp_lt_i32 s0, 1
1421; GFX10-WAVE64-NEXT:    s_cbranch_scc1 .LBB15_7
1422; GFX10-WAVE64-NEXT:  ; %bb.1: ; %.lr.ph
1423; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
1424; GFX10-WAVE64-NEXT:    s_mov_b32 s6, 0
1425; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], 0
1426; GFX10-WAVE64-NEXT:    s_branch .LBB15_3
1427; GFX10-WAVE64-NEXT:  .LBB15_2: ; %latch
1428; GFX10-WAVE64-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1429; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[4:5]
1430; GFX10-WAVE64-NEXT:    s_add_i32 s6, s6, 1
1431; GFX10-WAVE64-NEXT:    v_cmp_ge_i32_e32 vcc, s6, v1
1432; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v2, s6
1433; GFX10-WAVE64-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
1434; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, s[0:1]
1435; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB15_6
1436; GFX10-WAVE64-NEXT:  .LBB15_3: ; %hdr
1437; GFX10-WAVE64-NEXT:    ; =>This Inner Loop Header: Depth=1
1438; GFX10-WAVE64-NEXT:    v_cmp_gt_u32_e32 vcc, s6, v0
1439; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1440; GFX10-WAVE64-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
1441; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB15_2
1442; GFX10-WAVE64-NEXT:  ; %bb.4: ; %kill
1443; GFX10-WAVE64-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1444; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
1445; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB15_8
1446; GFX10-WAVE64-NEXT:  ; %bb.5: ; %kill
1447; GFX10-WAVE64-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1448; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1449; GFX10-WAVE64-NEXT:    s_branch .LBB15_2
1450; GFX10-WAVE64-NEXT:  .LBB15_6: ; %Flow
1451; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[0:1]
1452; GFX10-WAVE64-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1453; GFX10-WAVE64-NEXT:    s_endpgm
1454; GFX10-WAVE64-NEXT:  .LBB15_7:
1455; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v2, -1
1456; GFX10-WAVE64-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1457; GFX10-WAVE64-NEXT:    s_endpgm
1458; GFX10-WAVE64-NEXT:  .LBB15_8:
1459; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1460; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
1461; GFX10-WAVE64-NEXT:    s_endpgm
1462;
1463; GFX10-WAVE32-LABEL: complex_loop:
1464; GFX10-WAVE32:       ; %bb.0: ; %.entry
1465; GFX10-WAVE32-NEXT:    s_cmp_lt_i32 s0, 1
1466; GFX10-WAVE32-NEXT:    s_cbranch_scc1 .LBB15_7
1467; GFX10-WAVE32-NEXT:  ; %bb.1: ; %.lr.ph
1468; GFX10-WAVE32-NEXT:    s_mov_b32 s1, exec_lo
1469; GFX10-WAVE32-NEXT:    s_mov_b32 s0, 0
1470; GFX10-WAVE32-NEXT:    s_mov_b32 s2, 0
1471; GFX10-WAVE32-NEXT:    s_branch .LBB15_3
1472; GFX10-WAVE32-NEXT:  .LBB15_2: ; %latch
1473; GFX10-WAVE32-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1474; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s3
1475; GFX10-WAVE32-NEXT:    s_add_i32 s2, s2, 1
1476; GFX10-WAVE32-NEXT:    v_cmp_ge_i32_e32 vcc_lo, s2, v1
1477; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v2, s2
1478; GFX10-WAVE32-NEXT:    s_or_b32 s0, vcc_lo, s0
1479; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, s0
1480; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB15_6
1481; GFX10-WAVE32-NEXT:  .LBB15_3: ; %hdr
1482; GFX10-WAVE32-NEXT:    ; =>This Inner Loop Header: Depth=1
1483; GFX10-WAVE32-NEXT:    v_cmp_gt_u32_e32 vcc_lo, s2, v0
1484; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s3, vcc_lo
1485; GFX10-WAVE32-NEXT:    s_xor_b32 s3, exec_lo, s3
1486; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB15_2
1487; GFX10-WAVE32-NEXT:  ; %bb.4: ; %kill
1488; GFX10-WAVE32-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1489; GFX10-WAVE32-NEXT:    s_andn2_b32 s1, s1, exec_lo
1490; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB15_8
1491; GFX10-WAVE32-NEXT:  ; %bb.5: ; %kill
1492; GFX10-WAVE32-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1493; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1494; GFX10-WAVE32-NEXT:    s_branch .LBB15_2
1495; GFX10-WAVE32-NEXT:  .LBB15_6: ; %Flow
1496; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s0
1497; GFX10-WAVE32-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1498; GFX10-WAVE32-NEXT:    s_endpgm
1499; GFX10-WAVE32-NEXT:  .LBB15_7:
1500; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v2, -1
1501; GFX10-WAVE32-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1502; GFX10-WAVE32-NEXT:    s_endpgm
1503; GFX10-WAVE32-NEXT:  .LBB15_8:
1504; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1505; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
1506; GFX10-WAVE32-NEXT:    s_endpgm
1507.entry:
1508  %flaga = icmp sgt i32 %cmpa, 0
1509  br i1 %flaga, label %.lr.ph, label %._crit_edge
1510
1511.lr.ph:
1512  br label %hdr
1513
1514hdr:
1515  %ctr = phi i32 [ 0, %.lr.ph ], [ %ctr.next, %latch ]
1516  %flagb = icmp ugt i32 %ctr, %cmpb
1517  br i1 %flagb, label %kill, label %latch
1518
1519kill:
1520  call void @llvm.amdgcn.kill(i1 false)
1521  br label %latch
1522
1523latch:
1524  %ctr.next = add nuw nsw i32 %ctr, 1
1525  %flagc = icmp slt i32 %ctr.next, %cmpc
1526  br i1 %flagc, label %hdr, label %._crit_edge
1527
1528._crit_edge:
1529  %tmp = phi i32 [ -1, %.entry ], [ %ctr.next, %latch ]
1530  %out = bitcast i32 %tmp to float
1531  call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %out, float %out, float undef, float undef, i1 immarg true, i1 immarg true)
1532  ret void
1533}
1534
1535define void @skip_mode_switch(i32 %arg) {
1536; SI-LABEL: skip_mode_switch:
1537; SI:       ; %bb.0: ; %entry
1538; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1539; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1540; SI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1541; SI-NEXT:    s_cbranch_execz .LBB16_2
1542; SI-NEXT:  ; %bb.1: ; %bb.0
1543; SI-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
1544; SI-NEXT:  .LBB16_2: ; %bb.1
1545; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
1546; SI-NEXT:    s_setpc_b64 s[30:31]
1547;
1548; GFX10-WAVE64-LABEL: skip_mode_switch:
1549; GFX10-WAVE64:       ; %bb.0: ; %entry
1550; GFX10-WAVE64-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1551; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
1552; GFX10-WAVE64-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1553; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1554; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB16_2
1555; GFX10-WAVE64-NEXT:  ; %bb.1: ; %bb.0
1556; GFX10-WAVE64-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
1557; GFX10-WAVE64-NEXT:  .LBB16_2: ; %bb.1
1558; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[4:5]
1559; GFX10-WAVE64-NEXT:    s_setpc_b64 s[30:31]
1560;
1561; GFX10-WAVE32-LABEL: skip_mode_switch:
1562; GFX10-WAVE32:       ; %bb.0: ; %entry
1563; GFX10-WAVE32-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1564; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
1565; GFX10-WAVE32-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1566; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s4, vcc_lo
1567; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB16_2
1568; GFX10-WAVE32-NEXT:  ; %bb.1: ; %bb.0
1569; GFX10-WAVE32-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
1570; GFX10-WAVE32-NEXT:  .LBB16_2: ; %bb.1
1571; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s4
1572; GFX10-WAVE32-NEXT:    s_setpc_b64 s[30:31]
1573entry:
1574  %cmp = icmp eq i32 %arg, 0
1575  br i1 %cmp, label %bb.0, label %bb.1
1576
1577bb.0:
1578  call void @llvm.amdgcn.s.setreg(i32 2049, i32 3)
1579  br label %bb.1
1580
1581bb.1:
1582  ret void
1583}
1584
1585declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #3
1586declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
1587declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1588declare void @llvm.amdgcn.kill(i1) #0
1589
1590declare void @llvm.amdgcn.s.setreg(i32 immarg, i32)
1591
1592attributes #0 = { nounwind }
1593attributes #1 = { nounwind readonly }
1594attributes #2 = { nounwind readnone speculatable }
1595attributes #3 = { inaccessiblememonly nounwind writeonly }
1596