1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,SI %s
3; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,GFX10-WAVE64 %s
4; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX10-WAVE32 %s
5; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
6
7define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 {
8; GCN-LABEL: test_kill_depth_0_imm_pos:
9; GCN:       ; %bb.0:
10; GCN-NEXT:    s_endpgm
11  call void @llvm.amdgcn.kill(i1 true)
12  ret void
13}
14
15define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 {
16; WAVE64-LABEL: test_kill_depth_0_imm_neg:
17; WAVE64:       ; %bb.0:
18; WAVE64-NEXT:    s_andn2_b64 exec, exec, exec
19; WAVE64-NEXT:    s_cbranch_scc0 .LBB1_1
20; WAVE64-NEXT:    s_endpgm
21; WAVE64-NEXT:  .LBB1_1:
22; WAVE64-NEXT:    s_mov_b64 exec, 0
23; WAVE64-NEXT:    exp null off, off, off, off done vm
24; WAVE64-NEXT:    s_endpgm
25;
26; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg:
27; GFX10-WAVE32:       ; %bb.0:
28; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, exec_lo
29; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB1_1
30; GFX10-WAVE32-NEXT:    s_endpgm
31; GFX10-WAVE32-NEXT:  .LBB1_1:
32; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
33; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
34; GFX10-WAVE32-NEXT:    s_endpgm
35;
36; GFX11-LABEL: test_kill_depth_0_imm_neg:
37; GFX11:       ; %bb.0:
38; GFX11-NEXT:    s_and_not1_b64 exec, exec, exec
39; GFX11-NEXT:    s_cbranch_scc0 .LBB1_1
40; GFX11-NEXT:    s_endpgm
41; GFX11-NEXT:  .LBB1_1:
42; GFX11-NEXT:    s_mov_b64 exec, 0
43; GFX11-NEXT:    exp mrt0 off, off, off, off done
44; GFX11-NEXT:    s_endpgm
45  call void @llvm.amdgcn.kill(i1 false)
46  ret void
47}
48
49; FIXME: Ideally only one early-exit would be emitted
50define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 {
51; WAVE64-LABEL: test_kill_depth_0_imm_neg_x2:
52; WAVE64:       ; %bb.0:
53; WAVE64-NEXT:    s_mov_b64 s[0:1], exec
54; WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
55; WAVE64-NEXT:    s_cbranch_scc0 .LBB2_2
56; WAVE64-NEXT:  ; %bb.1:
57; WAVE64-NEXT:    s_mov_b64 exec, 0
58; WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
59; WAVE64-NEXT:    s_cbranch_scc0 .LBB2_2
60; WAVE64-NEXT:    s_endpgm
61; WAVE64-NEXT:  .LBB2_2:
62; WAVE64-NEXT:    s_mov_b64 exec, 0
63; WAVE64-NEXT:    exp null off, off, off, off done vm
64; WAVE64-NEXT:    s_endpgm
65;
66; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg_x2:
67; GFX10-WAVE32:       ; %bb.0:
68; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
69; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, exec_lo
70; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB2_2
71; GFX10-WAVE32-NEXT:  ; %bb.1:
72; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
73; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, exec_lo
74; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB2_2
75; GFX10-WAVE32-NEXT:    s_endpgm
76; GFX10-WAVE32-NEXT:  .LBB2_2:
77; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
78; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
79; GFX10-WAVE32-NEXT:    s_endpgm
80;
81; GFX11-LABEL: test_kill_depth_0_imm_neg_x2:
82; GFX11:       ; %bb.0:
83; GFX11-NEXT:    s_mov_b64 s[0:1], exec
84; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
85; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], exec
86; GFX11-NEXT:    s_cbranch_scc0 .LBB2_2
87; GFX11-NEXT:  ; %bb.1:
88; GFX11-NEXT:    s_mov_b64 exec, 0
89; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
90; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], exec
91; GFX11-NEXT:    s_cbranch_scc0 .LBB2_2
92; GFX11-NEXT:    s_endpgm
93; GFX11-NEXT:  .LBB2_2:
94; GFX11-NEXT:    s_mov_b64 exec, 0
95; GFX11-NEXT:    exp mrt0 off, off, off, off done
96; GFX11-NEXT:    s_endpgm
97  call void @llvm.amdgcn.kill(i1 false)
98  call void @llvm.amdgcn.kill(i1 false)
99  ret void
100}
101
102define amdgpu_ps void @test_kill_depth_var(float %x) #0 {
103; WAVE64-LABEL: test_kill_depth_var:
104; WAVE64:       ; %bb.0:
105; WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
106; WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
107; WAVE64-NEXT:    s_cbranch_scc0 .LBB3_1
108; WAVE64-NEXT:    s_endpgm
109; WAVE64-NEXT:  .LBB3_1:
110; WAVE64-NEXT:    s_mov_b64 exec, 0
111; WAVE64-NEXT:    exp null off, off, off, off done vm
112; WAVE64-NEXT:    s_endpgm
113;
114; GFX10-WAVE32-LABEL: test_kill_depth_var:
115; GFX10-WAVE32:       ; %bb.0:
116; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
117; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
118; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB3_1
119; GFX10-WAVE32-NEXT:    s_endpgm
120; GFX10-WAVE32-NEXT:  .LBB3_1:
121; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
122; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
123; GFX10-WAVE32-NEXT:    s_endpgm
124;
125; GFX11-LABEL: test_kill_depth_var:
126; GFX11:       ; %bb.0:
127; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
128; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
129; GFX11-NEXT:    s_cbranch_scc0 .LBB3_1
130; GFX11-NEXT:    s_endpgm
131; GFX11-NEXT:  .LBB3_1:
132; GFX11-NEXT:    s_mov_b64 exec, 0
133; GFX11-NEXT:    exp mrt0 off, off, off, off done
134; GFX11-NEXT:    s_endpgm
135  %cmp = fcmp olt float %x, 0.0
136  call void @llvm.amdgcn.kill(i1 %cmp)
137  ret void
138}
139
140; FIXME: Ideally only one early-exit would be emitted
141define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 {
142; SI-LABEL: test_kill_depth_var_x2_same:
143; SI:       ; %bb.0:
144; SI-NEXT:    s_mov_b64 s[0:1], exec
145; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
146; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
147; SI-NEXT:    s_cbranch_scc0 .LBB4_2
148; SI-NEXT:  ; %bb.1:
149; SI-NEXT:    s_andn2_b64 exec, exec, vcc
150; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
151; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
152; SI-NEXT:    s_cbranch_scc0 .LBB4_2
153; SI-NEXT:    s_endpgm
154; SI-NEXT:  .LBB4_2:
155; SI-NEXT:    s_mov_b64 exec, 0
156; SI-NEXT:    exp null off, off, off, off done vm
157; SI-NEXT:    s_endpgm
158;
159; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_same:
160; GFX10-WAVE64:       ; %bb.0:
161; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
162; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
163; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
164; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB4_2
165; GFX10-WAVE64-NEXT:  ; %bb.1:
166; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
167; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
168; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
169; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB4_2
170; GFX10-WAVE64-NEXT:    s_endpgm
171; GFX10-WAVE64-NEXT:  .LBB4_2:
172; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
173; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
174; GFX10-WAVE64-NEXT:    s_endpgm
175;
176; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_same:
177; GFX10-WAVE32:       ; %bb.0:
178; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
179; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
180; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
181; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB4_2
182; GFX10-WAVE32-NEXT:  ; %bb.1:
183; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
184; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
185; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
186; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB4_2
187; GFX10-WAVE32-NEXT:    s_endpgm
188; GFX10-WAVE32-NEXT:  .LBB4_2:
189; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
190; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
191; GFX10-WAVE32-NEXT:    s_endpgm
192;
193; GFX11-LABEL: test_kill_depth_var_x2_same:
194; GFX11:       ; %bb.0:
195; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
196; GFX11-NEXT:    s_mov_b64 s[0:1], exec
197; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
198; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
199; GFX11-NEXT:    s_cbranch_scc0 .LBB4_2
200; GFX11-NEXT:  ; %bb.1:
201; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
202; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
203; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
204; GFX11-NEXT:    s_cbranch_scc0 .LBB4_2
205; GFX11-NEXT:    s_endpgm
206; GFX11-NEXT:  .LBB4_2:
207; GFX11-NEXT:    s_mov_b64 exec, 0
208; GFX11-NEXT:    exp mrt0 off, off, off, off done
209; GFX11-NEXT:    s_endpgm
210  %cmp = fcmp olt float %x, 0.0
211  call void @llvm.amdgcn.kill(i1 %cmp)
212  call void @llvm.amdgcn.kill(i1 %cmp)
213  ret void
214}
215
216; FIXME: Ideally only one early-exit would be emitted
217define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 {
218; SI-LABEL: test_kill_depth_var_x2:
219; SI:       ; %bb.0:
220; SI-NEXT:    s_mov_b64 s[0:1], exec
221; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
222; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
223; SI-NEXT:    s_cbranch_scc0 .LBB5_2
224; SI-NEXT:  ; %bb.1:
225; SI-NEXT:    s_andn2_b64 exec, exec, vcc
226; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v1
227; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
228; SI-NEXT:    s_cbranch_scc0 .LBB5_2
229; SI-NEXT:    s_endpgm
230; SI-NEXT:  .LBB5_2:
231; SI-NEXT:    s_mov_b64 exec, 0
232; SI-NEXT:    exp null off, off, off, off done vm
233; SI-NEXT:    s_endpgm
234;
235; GFX10-WAVE64-LABEL: test_kill_depth_var_x2:
236; GFX10-WAVE64:       ; %bb.0:
237; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
238; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
239; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
240; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB5_2
241; GFX10-WAVE64-NEXT:  ; %bb.1:
242; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
243; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v1
244; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
245; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB5_2
246; GFX10-WAVE64-NEXT:    s_endpgm
247; GFX10-WAVE64-NEXT:  .LBB5_2:
248; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
249; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
250; GFX10-WAVE64-NEXT:    s_endpgm
251;
252; GFX10-WAVE32-LABEL: test_kill_depth_var_x2:
253; GFX10-WAVE32:       ; %bb.0:
254; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
255; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
256; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
257; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB5_2
258; GFX10-WAVE32-NEXT:  ; %bb.1:
259; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
260; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v1
261; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
262; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB5_2
263; GFX10-WAVE32-NEXT:    s_endpgm
264; GFX10-WAVE32-NEXT:  .LBB5_2:
265; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
266; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
267; GFX10-WAVE32-NEXT:    s_endpgm
268;
269; GFX11-LABEL: test_kill_depth_var_x2:
270; GFX11:       ; %bb.0:
271; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
272; GFX11-NEXT:    s_mov_b64 s[0:1], exec
273; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
274; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
275; GFX11-NEXT:    s_cbranch_scc0 .LBB5_2
276; GFX11-NEXT:  ; %bb.1:
277; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
278; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v1
279; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
280; GFX11-NEXT:    s_cbranch_scc0 .LBB5_2
281; GFX11-NEXT:    s_endpgm
282; GFX11-NEXT:  .LBB5_2:
283; GFX11-NEXT:    s_mov_b64 exec, 0
284; GFX11-NEXT:    exp mrt0 off, off, off, off done
285; GFX11-NEXT:    s_endpgm
286  %cmp.x = fcmp olt float %x, 0.0
287  call void @llvm.amdgcn.kill(i1 %cmp.x)
288  %cmp.y = fcmp olt float %y, 0.0
289  call void @llvm.amdgcn.kill(i1 %cmp.y)
290  ret void
291}
292
293define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 {
294; SI-LABEL: test_kill_depth_var_x2_instructions:
295; SI:       ; %bb.0:
296; SI-NEXT:    s_mov_b64 s[0:1], exec
297; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
298; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
299; SI-NEXT:    s_cbranch_scc0 .LBB6_2
300; SI-NEXT:  ; %bb.1:
301; SI-NEXT:    s_andn2_b64 exec, exec, vcc
302; SI-NEXT:    ;;#ASMSTART
303; SI-NEXT:    v_mov_b32_e64 v7, -1
304; SI-NEXT:    ;;#ASMEND
305; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
306; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
307; SI-NEXT:    s_cbranch_scc0 .LBB6_2
308; SI-NEXT:    s_endpgm
309; SI-NEXT:  .LBB6_2:
310; SI-NEXT:    s_mov_b64 exec, 0
311; SI-NEXT:    exp null off, off, off, off done vm
312; SI-NEXT:    s_endpgm
313;
314; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_instructions:
315; GFX10-WAVE64:       ; %bb.0:
316; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
317; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
318; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
319; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB6_2
320; GFX10-WAVE64-NEXT:  ; %bb.1:
321; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
322; GFX10-WAVE64-NEXT:    ;;#ASMSTART
323; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v7, -1
324; GFX10-WAVE64-NEXT:    ;;#ASMEND
325; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
326; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
327; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB6_2
328; GFX10-WAVE64-NEXT:    s_endpgm
329; GFX10-WAVE64-NEXT:  .LBB6_2:
330; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
331; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
332; GFX10-WAVE64-NEXT:    s_endpgm
333;
334; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_instructions:
335; GFX10-WAVE32:       ; %bb.0:
336; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
337; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
338; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
339; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB6_2
340; GFX10-WAVE32-NEXT:  ; %bb.1:
341; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
342; GFX10-WAVE32-NEXT:    ;;#ASMSTART
343; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v7, -1
344; GFX10-WAVE32-NEXT:    ;;#ASMEND
345; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v7
346; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
347; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB6_2
348; GFX10-WAVE32-NEXT:    s_endpgm
349; GFX10-WAVE32-NEXT:  .LBB6_2:
350; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
351; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
352; GFX10-WAVE32-NEXT:    s_endpgm
353;
354; GFX11-LABEL: test_kill_depth_var_x2_instructions:
355; GFX11:       ; %bb.0:
356; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
357; GFX11-NEXT:    s_mov_b64 s[0:1], exec
358; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
359; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
360; GFX11-NEXT:    s_cbranch_scc0 .LBB6_2
361; GFX11-NEXT:  ; %bb.1:
362; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
363; GFX11-NEXT:    ;;#ASMSTART
364; GFX11-NEXT:    v_mov_b32_e64 v7, -1
365; GFX11-NEXT:    ;;#ASMEND
366; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
367; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
368; GFX11-NEXT:    s_cbranch_scc0 .LBB6_2
369; GFX11-NEXT:    s_endpgm
370; GFX11-NEXT:  .LBB6_2:
371; GFX11-NEXT:    s_mov_b64 exec, 0
372; GFX11-NEXT:    exp mrt0 off, off, off, off done
373; GFX11-NEXT:    s_endpgm
374  %cmp.x = fcmp olt float %x, 0.0
375  call void @llvm.amdgcn.kill(i1 %cmp.x)
376  %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={v7}"()
377  %cmp.y = fcmp olt float %y, 0.0
378  call void @llvm.amdgcn.kill(i1 %cmp.y)
379  ret void
380}
381
382; FIXME: why does the skip depend on the asm length in the same block?
383define amdgpu_ps float @test_kill_control_flow(i32 inreg %arg) #0 {
384; SI-LABEL: test_kill_control_flow:
385; SI:       ; %bb.0: ; %entry
386; SI-NEXT:    s_cmp_lg_u32 s0, 0
387; SI-NEXT:    s_cbranch_scc0 .LBB7_2
388; SI-NEXT:  ; %bb.1: ; %exit
389; SI-NEXT:    v_mov_b32_e32 v0, 1.0
390; SI-NEXT:    s_branch .LBB7_5
391; SI-NEXT:  .LBB7_2: ; %bb
392; SI-NEXT:    s_mov_b64 s[2:3], exec
393; SI-NEXT:    ;;#ASMSTART
394; SI-NEXT:    v_mov_b32_e64 v7, -1
395; SI-NEXT:    v_nop_e64
396; SI-NEXT:    v_nop_e64
397; SI-NEXT:    v_nop_e64
398; SI-NEXT:    v_nop_e64
399; SI-NEXT:    v_nop_e64
400; SI-NEXT:    v_nop_e64
401; SI-NEXT:    v_nop_e64
402; SI-NEXT:    v_nop_e64
403; SI-NEXT:    v_nop_e64
404; SI-NEXT:    v_nop_e64
405; SI-NEXT:    ;;#ASMEND
406; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
407; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
408; SI-NEXT:    s_cbranch_scc0 .LBB7_4
409; SI-NEXT:  ; %bb.3: ; %bb
410; SI-NEXT:    s_andn2_b64 exec, exec, vcc
411; SI-NEXT:    v_mov_b32_e32 v0, 1.0
412; SI-NEXT:    s_branch .LBB7_5
413; SI-NEXT:  .LBB7_4:
414; SI-NEXT:    s_mov_b64 exec, 0
415; SI-NEXT:    exp null off, off, off, off done vm
416; SI-NEXT:    s_endpgm
417; SI-NEXT:  .LBB7_5:
418;
419; GFX10-WAVE64-LABEL: test_kill_control_flow:
420; GFX10-WAVE64:       ; %bb.0: ; %entry
421; GFX10-WAVE64-NEXT:    s_cmp_lg_u32 s0, 0
422; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB7_2
423; GFX10-WAVE64-NEXT:  ; %bb.1: ; %exit
424; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 1.0
425; GFX10-WAVE64-NEXT:    s_branch .LBB7_5
426; GFX10-WAVE64-NEXT:  .LBB7_2: ; %bb
427; GFX10-WAVE64-NEXT:    ;;#ASMSTART
428; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v7, -1
429; GFX10-WAVE64-NEXT:    v_nop_e64
430; GFX10-WAVE64-NEXT:    v_nop_e64
431; GFX10-WAVE64-NEXT:    v_nop_e64
432; GFX10-WAVE64-NEXT:    v_nop_e64
433; GFX10-WAVE64-NEXT:    v_nop_e64
434; GFX10-WAVE64-NEXT:    v_nop_e64
435; GFX10-WAVE64-NEXT:    v_nop_e64
436; GFX10-WAVE64-NEXT:    v_nop_e64
437; GFX10-WAVE64-NEXT:    v_nop_e64
438; GFX10-WAVE64-NEXT:    v_nop_e64
439; GFX10-WAVE64-NEXT:    ;;#ASMEND
440; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
441; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
442; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
443; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB7_4
444; GFX10-WAVE64-NEXT:  ; %bb.3: ; %bb
445; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
446; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 1.0
447; GFX10-WAVE64-NEXT:    s_branch .LBB7_5
448; GFX10-WAVE64-NEXT:  .LBB7_4:
449; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
450; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
451; GFX10-WAVE64-NEXT:    s_endpgm
452; GFX10-WAVE64-NEXT:  .LBB7_5:
453;
454; GFX10-WAVE32-LABEL: test_kill_control_flow:
455; GFX10-WAVE32:       ; %bb.0: ; %entry
456; GFX10-WAVE32-NEXT:    s_cmp_lg_u32 s0, 0
457; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB7_2
458; GFX10-WAVE32-NEXT:  ; %bb.1: ; %exit
459; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 1.0
460; GFX10-WAVE32-NEXT:    s_branch .LBB7_5
461; GFX10-WAVE32-NEXT:  .LBB7_2: ; %bb
462; GFX10-WAVE32-NEXT:    ;;#ASMSTART
463; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v7, -1
464; GFX10-WAVE32-NEXT:    v_nop_e64
465; GFX10-WAVE32-NEXT:    v_nop_e64
466; GFX10-WAVE32-NEXT:    v_nop_e64
467; GFX10-WAVE32-NEXT:    v_nop_e64
468; GFX10-WAVE32-NEXT:    v_nop_e64
469; GFX10-WAVE32-NEXT:    v_nop_e64
470; GFX10-WAVE32-NEXT:    v_nop_e64
471; GFX10-WAVE32-NEXT:    v_nop_e64
472; GFX10-WAVE32-NEXT:    v_nop_e64
473; GFX10-WAVE32-NEXT:    v_nop_e64
474; GFX10-WAVE32-NEXT:    ;;#ASMEND
475; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v7
476; GFX10-WAVE32-NEXT:    s_mov_b32 s1, exec_lo
477; GFX10-WAVE32-NEXT:    s_andn2_b32 s1, s1, vcc_lo
478; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB7_4
479; GFX10-WAVE32-NEXT:  ; %bb.3: ; %bb
480; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
481; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 1.0
482; GFX10-WAVE32-NEXT:    s_branch .LBB7_5
483; GFX10-WAVE32-NEXT:  .LBB7_4:
484; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
485; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
486; GFX10-WAVE32-NEXT:    s_endpgm
487; GFX10-WAVE32-NEXT:  .LBB7_5:
488;
489; GFX11-LABEL: test_kill_control_flow:
490; GFX11:       ; %bb.0: ; %entry
491; GFX11-NEXT:    s_cmp_lg_u32 s0, 0
492; GFX11-NEXT:    s_cbranch_scc0 .LBB7_2
493; GFX11-NEXT:  ; %bb.1: ; %exit
494; GFX11-NEXT:    v_mov_b32_e32 v0, 1.0
495; GFX11-NEXT:    s_branch .LBB7_5
496; GFX11-NEXT:  .LBB7_2: ; %bb
497; GFX11-NEXT:    ;;#ASMSTART
498; GFX11-NEXT:    v_mov_b32_e64 v7, -1
499; GFX11-NEXT:    v_nop_e64
500; GFX11-NEXT:    v_nop_e64
501; GFX11-NEXT:    v_nop_e64
502; GFX11-NEXT:    v_nop_e64
503; GFX11-NEXT:    v_nop_e64
504; GFX11-NEXT:    v_nop_e64
505; GFX11-NEXT:    v_nop_e64
506; GFX11-NEXT:    v_nop_e64
507; GFX11-NEXT:    v_nop_e64
508; GFX11-NEXT:    v_nop_e64
509; GFX11-NEXT:    ;;#ASMEND
510; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
511; GFX11-NEXT:    s_mov_b64 s[2:3], exec
512; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
513; GFX11-NEXT:    s_and_not1_b64 s[2:3], s[2:3], vcc
514; GFX11-NEXT:    s_cbranch_scc0 .LBB7_4
515; GFX11-NEXT:  ; %bb.3: ; %bb
516; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
517; GFX11-NEXT:    v_mov_b32_e32 v0, 1.0
518; GFX11-NEXT:    s_branch .LBB7_5
519; GFX11-NEXT:  .LBB7_4:
520; GFX11-NEXT:    s_mov_b64 exec, 0
521; GFX11-NEXT:    exp mrt0 off, off, off, off done
522; GFX11-NEXT:    s_endpgm
523; GFX11-NEXT:  .LBB7_5:
524entry:
525  %cmp = icmp eq i32 %arg, 0
526  br i1 %cmp, label %bb, label %exit
527
528bb:
529  %var = call float asm sideeffect "v_mov_b32_e64 v7, -1
530    v_nop_e64
531    v_nop_e64
532    v_nop_e64
533    v_nop_e64
534    v_nop_e64
535    v_nop_e64
536    v_nop_e64
537    v_nop_e64
538    v_nop_e64
539    v_nop_e64", "={v7}"()
540  %cmp.var = fcmp olt float %var, 0.0
541  ; TODO: We could do an early-exit here (the branch above is uniform!)
542  call void @llvm.amdgcn.kill(i1 %cmp.var)
543  br label %exit
544
545exit:
546  ret float 1.0
547}
548
549define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 {
550; SI-LABEL: test_kill_control_flow_remainder:
551; SI:       ; %bb.0: ; %entry
552; SI-NEXT:    s_cmp_lg_u32 s0, 0
553; SI-NEXT:    v_mov_b32_e32 v9, 0
554; SI-NEXT:    s_cbranch_scc1 .LBB8_3
555; SI-NEXT:  ; %bb.1: ; %bb
556; SI-NEXT:    s_mov_b64 s[2:3], exec
557; SI-NEXT:    ;;#ASMSTART
558; SI-NEXT:    v_mov_b32_e64 v7, -1
559; SI-NEXT:    v_nop_e64
560; SI-NEXT:    v_nop_e64
561; SI-NEXT:    v_nop_e64
562; SI-NEXT:    v_nop_e64
563; SI-NEXT:    v_nop_e64
564; SI-NEXT:    v_nop_e64
565; SI-NEXT:    v_nop_e64
566; SI-NEXT:    v_nop_e64
567; SI-NEXT:    v_nop_e64
568; SI-NEXT:    v_nop_e64
569; SI-NEXT:    v_nop_e64
570; SI-NEXT:    ;;#ASMEND
571; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
572; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
573; SI-NEXT:    ;;#ASMSTART
574; SI-NEXT:    v_mov_b32_e64 v8, -1
575; SI-NEXT:    ;;#ASMEND
576; SI-NEXT:    s_cbranch_scc0 .LBB8_4
577; SI-NEXT:  ; %bb.2: ; %bb
578; SI-NEXT:    s_andn2_b64 exec, exec, vcc
579; SI-NEXT:    s_mov_b32 s3, 0xf000
580; SI-NEXT:    s_mov_b32 s2, -1
581; SI-NEXT:    buffer_store_dword v8, off, s[0:3], 0
582; SI-NEXT:    s_waitcnt vmcnt(0)
583; SI-NEXT:    ;;#ASMSTART
584; SI-NEXT:    v_mov_b32_e64 v9, -2
585; SI-NEXT:    ;;#ASMEND
586; SI-NEXT:  .LBB8_3: ; %exit
587; SI-NEXT:    s_mov_b32 s3, 0xf000
588; SI-NEXT:    s_mov_b32 s2, -1
589; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0
590; SI-NEXT:    s_endpgm
591; SI-NEXT:  .LBB8_4:
592; SI-NEXT:    s_mov_b64 exec, 0
593; SI-NEXT:    exp null off, off, off, off done vm
594; SI-NEXT:    s_endpgm
595;
596; GFX10-WAVE64-LABEL: test_kill_control_flow_remainder:
597; GFX10-WAVE64:       ; %bb.0: ; %entry
598; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v9, 0
599; GFX10-WAVE64-NEXT:    s_cmp_lg_u32 s0, 0
600; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB8_2
601; GFX10-WAVE64-NEXT:  ; %bb.1: ; %exit
602; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v9, off
603; GFX10-WAVE64-NEXT:    s_endpgm
604; GFX10-WAVE64-NEXT:  .LBB8_2: ; %bb
605; GFX10-WAVE64-NEXT:    ;;#ASMSTART
606; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v7, -1
607; GFX10-WAVE64-NEXT:    v_nop_e64
608; GFX10-WAVE64-NEXT:    v_nop_e64
609; GFX10-WAVE64-NEXT:    v_nop_e64
610; GFX10-WAVE64-NEXT:    v_nop_e64
611; GFX10-WAVE64-NEXT:    v_nop_e64
612; GFX10-WAVE64-NEXT:    v_nop_e64
613; GFX10-WAVE64-NEXT:    v_nop_e64
614; GFX10-WAVE64-NEXT:    v_nop_e64
615; GFX10-WAVE64-NEXT:    v_nop_e64
616; GFX10-WAVE64-NEXT:    v_nop_e64
617; GFX10-WAVE64-NEXT:    v_nop_e64
618; GFX10-WAVE64-NEXT:    ;;#ASMEND
619; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
620; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
621; GFX10-WAVE64-NEXT:    ;;#ASMSTART
622; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v8, -1
623; GFX10-WAVE64-NEXT:    ;;#ASMEND
624; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
625; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB8_4
626; GFX10-WAVE64-NEXT:  ; %bb.3: ; %bb
627; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
628; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v8, off
629; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
630; GFX10-WAVE64-NEXT:    ;;#ASMSTART
631; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v9, -2
632; GFX10-WAVE64-NEXT:    ;;#ASMEND
633; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v9, off
634; GFX10-WAVE64-NEXT:    s_endpgm
635; GFX10-WAVE64-NEXT:  .LBB8_4:
636; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
637; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
638; GFX10-WAVE64-NEXT:    s_endpgm
639;
640; GFX10-WAVE32-LABEL: test_kill_control_flow_remainder:
641; GFX10-WAVE32:       ; %bb.0: ; %entry
642; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v9, 0
643; GFX10-WAVE32-NEXT:    s_cmp_lg_u32 s0, 0
644; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB8_2
645; GFX10-WAVE32-NEXT:  ; %bb.1: ; %exit
646; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v9, off
647; GFX10-WAVE32-NEXT:    s_endpgm
648; GFX10-WAVE32-NEXT:  .LBB8_2: ; %bb
649; GFX10-WAVE32-NEXT:    ;;#ASMSTART
650; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v7, -1
651; GFX10-WAVE32-NEXT:    v_nop_e64
652; GFX10-WAVE32-NEXT:    v_nop_e64
653; GFX10-WAVE32-NEXT:    v_nop_e64
654; GFX10-WAVE32-NEXT:    v_nop_e64
655; GFX10-WAVE32-NEXT:    v_nop_e64
656; GFX10-WAVE32-NEXT:    v_nop_e64
657; GFX10-WAVE32-NEXT:    v_nop_e64
658; GFX10-WAVE32-NEXT:    v_nop_e64
659; GFX10-WAVE32-NEXT:    v_nop_e64
660; GFX10-WAVE32-NEXT:    v_nop_e64
661; GFX10-WAVE32-NEXT:    v_nop_e64
662; GFX10-WAVE32-NEXT:    ;;#ASMEND
663; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v7
664; GFX10-WAVE32-NEXT:    s_mov_b32 s1, exec_lo
665; GFX10-WAVE32-NEXT:    ;;#ASMSTART
666; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v8, -1
667; GFX10-WAVE32-NEXT:    ;;#ASMEND
668; GFX10-WAVE32-NEXT:    s_andn2_b32 s1, s1, vcc_lo
669; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB8_4
670; GFX10-WAVE32-NEXT:  ; %bb.3: ; %bb
671; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
672; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v8, off
673; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
674; GFX10-WAVE32-NEXT:    ;;#ASMSTART
675; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v9, -2
676; GFX10-WAVE32-NEXT:    ;;#ASMEND
677; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v9, off
678; GFX10-WAVE32-NEXT:    s_endpgm
679; GFX10-WAVE32-NEXT:  .LBB8_4:
680; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
681; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
682; GFX10-WAVE32-NEXT:    s_endpgm
683;
684; GFX11-LABEL: test_kill_control_flow_remainder:
685; GFX11:       ; %bb.0: ; %entry
686; GFX11-NEXT:    v_mov_b32_e32 v9, 0
687; GFX11-NEXT:    s_cmp_lg_u32 s0, 0
688; GFX11-NEXT:    s_cbranch_scc0 .LBB8_2
689; GFX11-NEXT:  ; %bb.1: ; %exit
690; GFX11-NEXT:    global_store_b32 v[0:1], v9, off
691; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
692; GFX11-NEXT:    s_endpgm
693; GFX11-NEXT:  .LBB8_2: ; %bb
694; GFX11-NEXT:    ;;#ASMSTART
695; GFX11-NEXT:    v_mov_b32_e64 v7, -1
696; GFX11-NEXT:    v_nop_e64
697; GFX11-NEXT:    v_nop_e64
698; GFX11-NEXT:    v_nop_e64
699; GFX11-NEXT:    v_nop_e64
700; GFX11-NEXT:    v_nop_e64
701; GFX11-NEXT:    v_nop_e64
702; GFX11-NEXT:    v_nop_e64
703; GFX11-NEXT:    v_nop_e64
704; GFX11-NEXT:    v_nop_e64
705; GFX11-NEXT:    v_nop_e64
706; GFX11-NEXT:    v_nop_e64
707; GFX11-NEXT:    ;;#ASMEND
708; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
709; GFX11-NEXT:    s_mov_b64 s[2:3], exec
710; GFX11-NEXT:    ;;#ASMSTART
711; GFX11-NEXT:    v_mov_b32_e64 v8, -1
712; GFX11-NEXT:    ;;#ASMEND
713; GFX11-NEXT:    s_and_not1_b64 s[2:3], s[2:3], vcc
714; GFX11-NEXT:    s_cbranch_scc0 .LBB8_4
715; GFX11-NEXT:  ; %bb.3: ; %bb
716; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
717; GFX11-NEXT:    global_store_b32 v[0:1], v8, off dlc
718; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
719; GFX11-NEXT:    ;;#ASMSTART
720; GFX11-NEXT:    v_mov_b32_e64 v9, -2
721; GFX11-NEXT:    ;;#ASMEND
722; GFX11-NEXT:    global_store_b32 v[0:1], v9, off
723; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
724; GFX11-NEXT:    s_endpgm
725; GFX11-NEXT:  .LBB8_4:
726; GFX11-NEXT:    s_mov_b64 exec, 0
727; GFX11-NEXT:    exp mrt0 off, off, off, off done
728; GFX11-NEXT:    s_endpgm
729entry:
730  %cmp = icmp eq i32 %arg, 0
731  br i1 %cmp, label %bb, label %exit
732
733bb:
734  %var = call float asm sideeffect "v_mov_b32_e64 v7, -1
735    v_nop_e64
736    v_nop_e64
737    v_nop_e64
738    v_nop_e64
739    v_nop_e64
740    v_nop_e64
741    v_nop_e64
742    v_nop_e64
743    v_nop_e64
744    v_nop_e64
745    v_nop_e64", "={v7}"()
746  %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={v8}"()
747  %cmp.var = fcmp olt float %var, 0.0
748  ; TODO: We could do an early-exit here (the branch above is uniform!)
749  call void @llvm.amdgcn.kill(i1 %cmp.var)
750  store volatile float %live.across, float addrspace(1)* undef
751  %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={v9}"()
752  br label %exit
753
754exit:
755  %phi = phi float [ 0.0, %entry ], [ %live.out, %bb ]
756  store float %phi, float addrspace(1)* undef
757  ret void
758}
759
760define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) #0 {
761; SI-LABEL: test_kill_control_flow_return:
762; SI:       ; %bb.0: ; %entry
763; SI-NEXT:    s_cmp_eq_u32 s0, 1
764; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
765; SI-NEXT:    s_mov_b64 s[2:3], exec
766; SI-NEXT:    s_xor_b64 s[4:5], s[4:5], exec
767; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], s[4:5]
768; SI-NEXT:    s_cbranch_scc0 .LBB9_4
769; SI-NEXT:  ; %bb.1: ; %entry
770; SI-NEXT:    s_and_b64 exec, exec, s[2:3]
771; SI-NEXT:    s_cmp_lg_u32 s0, 0
772; SI-NEXT:    v_mov_b32_e32 v0, 0
773; SI-NEXT:    s_cbranch_scc0 .LBB9_3
774; SI-NEXT:  ; %bb.2: ; %exit
775; SI-NEXT:    s_branch .LBB9_5
776; SI-NEXT:  .LBB9_3: ; %bb
777; SI-NEXT:    ;;#ASMSTART
778; SI-NEXT:    v_mov_b32_e64 v7, -1
779; SI-NEXT:    v_nop_e64
780; SI-NEXT:    v_nop_e64
781; SI-NEXT:    v_nop_e64
782; SI-NEXT:    v_nop_e64
783; SI-NEXT:    v_nop_e64
784; SI-NEXT:    v_nop_e64
785; SI-NEXT:    v_nop_e64
786; SI-NEXT:    v_nop_e64
787; SI-NEXT:    v_nop_e64
788; SI-NEXT:    v_nop_e64
789; SI-NEXT:    ;;#ASMEND
790; SI-NEXT:    v_mov_b32_e32 v0, v7
791; SI-NEXT:    s_branch .LBB9_5
792; SI-NEXT:  .LBB9_4:
793; SI-NEXT:    s_mov_b64 exec, 0
794; SI-NEXT:    exp null off, off, off, off done vm
795; SI-NEXT:    s_endpgm
796; SI-NEXT:  .LBB9_5:
797;
798; GFX10-WAVE64-LABEL: test_kill_control_flow_return:
799; GFX10-WAVE64:       ; %bb.0: ; %entry
800; GFX10-WAVE64-NEXT:    s_cmp_eq_u32 s0, 1
801; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
802; GFX10-WAVE64-NEXT:    s_cselect_b64 s[4:5], -1, 0
803; GFX10-WAVE64-NEXT:    s_xor_b64 s[4:5], s[4:5], exec
804; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], s[4:5]
805; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB9_4
806; GFX10-WAVE64-NEXT:  ; %bb.1: ; %entry
807; GFX10-WAVE64-NEXT:    s_and_b64 exec, exec, s[2:3]
808; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 0
809; GFX10-WAVE64-NEXT:    s_cmp_lg_u32 s0, 0
810; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB9_3
811; GFX10-WAVE64-NEXT:  ; %bb.2: ; %exit
812; GFX10-WAVE64-NEXT:    s_branch .LBB9_5
813; GFX10-WAVE64-NEXT:  .LBB9_3: ; %bb
814; GFX10-WAVE64-NEXT:    ;;#ASMSTART
815; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v7, -1
816; GFX10-WAVE64-NEXT:    v_nop_e64
817; GFX10-WAVE64-NEXT:    v_nop_e64
818; GFX10-WAVE64-NEXT:    v_nop_e64
819; GFX10-WAVE64-NEXT:    v_nop_e64
820; GFX10-WAVE64-NEXT:    v_nop_e64
821; GFX10-WAVE64-NEXT:    v_nop_e64
822; GFX10-WAVE64-NEXT:    v_nop_e64
823; GFX10-WAVE64-NEXT:    v_nop_e64
824; GFX10-WAVE64-NEXT:    v_nop_e64
825; GFX10-WAVE64-NEXT:    v_nop_e64
826; GFX10-WAVE64-NEXT:    ;;#ASMEND
827; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, v7
828; GFX10-WAVE64-NEXT:    s_branch .LBB9_5
829; GFX10-WAVE64-NEXT:  .LBB9_4:
830; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
831; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
832; GFX10-WAVE64-NEXT:    s_endpgm
833; GFX10-WAVE64-NEXT:  .LBB9_5:
834;
835; GFX10-WAVE32-LABEL: test_kill_control_flow_return:
836; GFX10-WAVE32:       ; %bb.0: ; %entry
837; GFX10-WAVE32-NEXT:    s_cmp_eq_u32 s0, 1
838; GFX10-WAVE32-NEXT:    s_mov_b32 s1, exec_lo
839; GFX10-WAVE32-NEXT:    s_cselect_b32 s2, -1, 0
840; GFX10-WAVE32-NEXT:    s_xor_b32 s2, s2, exec_lo
841; GFX10-WAVE32-NEXT:    s_andn2_b32 s1, s1, s2
842; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB9_4
843; GFX10-WAVE32-NEXT:  ; %bb.1: ; %entry
844; GFX10-WAVE32-NEXT:    s_and_b32 exec_lo, exec_lo, s1
845; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 0
846; GFX10-WAVE32-NEXT:    s_cmp_lg_u32 s0, 0
847; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB9_3
848; GFX10-WAVE32-NEXT:  ; %bb.2: ; %exit
849; GFX10-WAVE32-NEXT:    s_branch .LBB9_5
850; GFX10-WAVE32-NEXT:  .LBB9_3: ; %bb
851; GFX10-WAVE32-NEXT:    ;;#ASMSTART
852; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v7, -1
853; GFX10-WAVE32-NEXT:    v_nop_e64
854; GFX10-WAVE32-NEXT:    v_nop_e64
855; GFX10-WAVE32-NEXT:    v_nop_e64
856; GFX10-WAVE32-NEXT:    v_nop_e64
857; GFX10-WAVE32-NEXT:    v_nop_e64
858; GFX10-WAVE32-NEXT:    v_nop_e64
859; GFX10-WAVE32-NEXT:    v_nop_e64
860; GFX10-WAVE32-NEXT:    v_nop_e64
861; GFX10-WAVE32-NEXT:    v_nop_e64
862; GFX10-WAVE32-NEXT:    v_nop_e64
863; GFX10-WAVE32-NEXT:    ;;#ASMEND
864; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, v7
865; GFX10-WAVE32-NEXT:    s_branch .LBB9_5
866; GFX10-WAVE32-NEXT:  .LBB9_4:
867; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
868; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
869; GFX10-WAVE32-NEXT:    s_endpgm
870; GFX10-WAVE32-NEXT:  .LBB9_5:
871;
872; GFX11-LABEL: test_kill_control_flow_return:
873; GFX11:       ; %bb.0: ; %entry
874; GFX11-NEXT:    s_cmp_eq_u32 s0, 1
875; GFX11-NEXT:    s_mov_b64 s[2:3], exec
876; GFX11-NEXT:    s_cselect_b64 s[4:5], -1, 0
877; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
878; GFX11-NEXT:    s_xor_b64 s[4:5], s[4:5], exec
879; GFX11-NEXT:    s_and_not1_b64 s[2:3], s[2:3], s[4:5]
880; GFX11-NEXT:    s_cbranch_scc0 .LBB9_4
881; GFX11-NEXT:  ; %bb.1: ; %entry
882; GFX11-NEXT:    s_and_b64 exec, exec, s[2:3]
883; GFX11-NEXT:    v_mov_b32_e32 v0, 0
884; GFX11-NEXT:    s_cmp_lg_u32 s0, 0
885; GFX11-NEXT:    s_cbranch_scc0 .LBB9_3
886; GFX11-NEXT:  ; %bb.2: ; %exit
887; GFX11-NEXT:    s_branch .LBB9_5
888; GFX11-NEXT:  .LBB9_3: ; %bb
889; GFX11-NEXT:    ;;#ASMSTART
890; GFX11-NEXT:    v_mov_b32_e64 v7, -1
891; GFX11-NEXT:    v_nop_e64
892; GFX11-NEXT:    v_nop_e64
893; GFX11-NEXT:    v_nop_e64
894; GFX11-NEXT:    v_nop_e64
895; GFX11-NEXT:    v_nop_e64
896; GFX11-NEXT:    v_nop_e64
897; GFX11-NEXT:    v_nop_e64
898; GFX11-NEXT:    v_nop_e64
899; GFX11-NEXT:    v_nop_e64
900; GFX11-NEXT:    v_nop_e64
901; GFX11-NEXT:    ;;#ASMEND
902; GFX11-NEXT:    v_mov_b32_e32 v0, v7
903; GFX11-NEXT:    s_branch .LBB9_5
904; GFX11-NEXT:  .LBB9_4:
905; GFX11-NEXT:    s_mov_b64 exec, 0
906; GFX11-NEXT:    exp mrt0 off, off, off, off done
907; GFX11-NEXT:    s_endpgm
908; GFX11-NEXT:  .LBB9_5:
909entry:
910  %kill = icmp eq i32 %arg, 1
911  %cmp = icmp eq i32 %arg, 0
912  call void @llvm.amdgcn.kill(i1 %kill)
913  br i1 %cmp, label %bb, label %exit
914
915bb:
916  %var = call float asm sideeffect "v_mov_b32_e64 v7, -1
917    v_nop_e64
918    v_nop_e64
919    v_nop_e64
920    v_nop_e64
921    v_nop_e64
922    v_nop_e64
923    v_nop_e64
924    v_nop_e64
925    v_nop_e64
926    v_nop_e64", "={v7}"()
927  br label %exit
928
929exit:
930  %ret = phi float [ %var, %bb ], [ 0.0, %entry ]
931  ret float %ret
932}
933
934define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 {
935; SI-LABEL: test_kill_divergent_loop:
936; SI:       ; %bb.0: ; %entry
937; SI-NEXT:    s_mov_b64 s[0:1], exec
938; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
939; SI-NEXT:    s_and_saveexec_b64 s[2:3], vcc
940; SI-NEXT:    s_xor_b64 s[4:5], exec, s[2:3]
941; SI-NEXT:    s_cbranch_execz .LBB10_4
942; SI-NEXT:  ; %bb.1: ; %bb.preheader
943; SI-NEXT:    s_mov_b32 s3, 0xf000
944; SI-NEXT:    s_mov_b32 s2, -1
945; SI-NEXT:  .LBB10_2: ; %bb
946; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
947; SI-NEXT:    ;;#ASMSTART
948; SI-NEXT:    v_mov_b32_e64 v7, -1
949; SI-NEXT:    v_nop_e64
950; SI-NEXT:    v_nop_e64
951; SI-NEXT:    v_nop_e64
952; SI-NEXT:    v_nop_e64
953; SI-NEXT:    v_nop_e64
954; SI-NEXT:    v_nop_e64
955; SI-NEXT:    v_nop_e64
956; SI-NEXT:    v_nop_e64
957; SI-NEXT:    v_nop_e64
958; SI-NEXT:    v_nop_e64
959; SI-NEXT:    ;;#ASMEND
960; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
961; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
962; SI-NEXT:    s_cbranch_scc0 .LBB10_5
963; SI-NEXT:  ; %bb.3: ; %bb
964; SI-NEXT:    ; in Loop: Header=BB10_2 Depth=1
965; SI-NEXT:    s_andn2_b64 exec, exec, vcc
966; SI-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
967; SI-NEXT:    s_waitcnt vmcnt(0)
968; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
969; SI-NEXT:    s_cbranch_vccnz .LBB10_2
970; SI-NEXT:  .LBB10_4: ; %Flow1
971; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
972; SI-NEXT:    s_mov_b32 s3, 0xf000
973; SI-NEXT:    s_mov_b32 s2, -1
974; SI-NEXT:    v_mov_b32_e32 v0, 8
975; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
976; SI-NEXT:    s_waitcnt vmcnt(0)
977; SI-NEXT:    s_endpgm
978; SI-NEXT:  .LBB10_5:
979; SI-NEXT:    s_mov_b64 exec, 0
980; SI-NEXT:    exp null off, off, off, off done vm
981; SI-NEXT:    s_endpgm
982;
983; GFX10-WAVE64-LABEL: test_kill_divergent_loop:
984; GFX10-WAVE64:       ; %bb.0: ; %entry
985; GFX10-WAVE64-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
986; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
987; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[2:3], vcc
988; GFX10-WAVE64-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
989; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB10_3
990; GFX10-WAVE64-NEXT:  .LBB10_1: ; %bb
991; GFX10-WAVE64-NEXT:    ; =>This Inner Loop Header: Depth=1
992; GFX10-WAVE64-NEXT:    ;;#ASMSTART
993; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v7, -1
994; GFX10-WAVE64-NEXT:    v_nop_e64
995; GFX10-WAVE64-NEXT:    v_nop_e64
996; GFX10-WAVE64-NEXT:    v_nop_e64
997; GFX10-WAVE64-NEXT:    v_nop_e64
998; GFX10-WAVE64-NEXT:    v_nop_e64
999; GFX10-WAVE64-NEXT:    v_nop_e64
1000; GFX10-WAVE64-NEXT:    v_nop_e64
1001; GFX10-WAVE64-NEXT:    v_nop_e64
1002; GFX10-WAVE64-NEXT:    v_nop_e64
1003; GFX10-WAVE64-NEXT:    v_nop_e64
1004; GFX10-WAVE64-NEXT:    ;;#ASMEND
1005; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
1006; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
1007; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB10_4
1008; GFX10-WAVE64-NEXT:  ; %bb.2: ; %bb
1009; GFX10-WAVE64-NEXT:    ; in Loop: Header=BB10_1 Depth=1
1010; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
1011; GFX10-WAVE64-NEXT:    global_load_dword v0, v[0:1], off glc dlc
1012; GFX10-WAVE64-NEXT:    s_waitcnt vmcnt(0)
1013; GFX10-WAVE64-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1014; GFX10-WAVE64-NEXT:    s_cbranch_vccnz .LBB10_1
1015; GFX10-WAVE64-NEXT:  .LBB10_3: ; %Flow1
1016; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[2:3]
1017; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 8
1018; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v0, off
1019; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
1020; GFX10-WAVE64-NEXT:    s_endpgm
1021; GFX10-WAVE64-NEXT:  .LBB10_4:
1022; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1023; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
1024; GFX10-WAVE64-NEXT:    s_endpgm
1025;
1026; GFX10-WAVE32-LABEL: test_kill_divergent_loop:
1027; GFX10-WAVE32:       ; %bb.0: ; %entry
1028; GFX10-WAVE32-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1029; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
1030; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s1, vcc_lo
1031; GFX10-WAVE32-NEXT:    s_xor_b32 s1, exec_lo, s1
1032; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB10_3
1033; GFX10-WAVE32-NEXT:  .LBB10_1: ; %bb
1034; GFX10-WAVE32-NEXT:    ; =>This Inner Loop Header: Depth=1
1035; GFX10-WAVE32-NEXT:    ;;#ASMSTART
1036; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v7, -1
1037; GFX10-WAVE32-NEXT:    v_nop_e64
1038; GFX10-WAVE32-NEXT:    v_nop_e64
1039; GFX10-WAVE32-NEXT:    v_nop_e64
1040; GFX10-WAVE32-NEXT:    v_nop_e64
1041; GFX10-WAVE32-NEXT:    v_nop_e64
1042; GFX10-WAVE32-NEXT:    v_nop_e64
1043; GFX10-WAVE32-NEXT:    v_nop_e64
1044; GFX10-WAVE32-NEXT:    v_nop_e64
1045; GFX10-WAVE32-NEXT:    v_nop_e64
1046; GFX10-WAVE32-NEXT:    v_nop_e64
1047; GFX10-WAVE32-NEXT:    ;;#ASMEND
1048; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v7
1049; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
1050; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB10_4
1051; GFX10-WAVE32-NEXT:  ; %bb.2: ; %bb
1052; GFX10-WAVE32-NEXT:    ; in Loop: Header=BB10_1 Depth=1
1053; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
1054; GFX10-WAVE32-NEXT:    global_load_dword v0, v[0:1], off glc dlc
1055; GFX10-WAVE32-NEXT:    s_waitcnt vmcnt(0)
1056; GFX10-WAVE32-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1057; GFX10-WAVE32-NEXT:    s_cbranch_vccnz .LBB10_1
1058; GFX10-WAVE32-NEXT:  .LBB10_3: ; %Flow1
1059; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s1
1060; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 8
1061; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v0, off
1062; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
1063; GFX10-WAVE32-NEXT:    s_endpgm
1064; GFX10-WAVE32-NEXT:  .LBB10_4:
1065; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1066; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
1067; GFX10-WAVE32-NEXT:    s_endpgm
1068;
1069; GFX11-LABEL: test_kill_divergent_loop:
1070; GFX11:       ; %bb.0: ; %entry
1071; GFX11-NEXT:    s_mov_b64 s[0:1], exec
1072; GFX11-NEXT:    s_mov_b64 s[2:3], exec
1073; GFX11-NEXT:    v_cmpx_eq_u32_e32 0, v0
1074; GFX11-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
1075; GFX11-NEXT:    s_cbranch_execz .LBB10_3
1076; GFX11-NEXT:  .LBB10_1: ; %bb
1077; GFX11-NEXT:    ; =>This Inner Loop Header: Depth=1
1078; GFX11-NEXT:    ;;#ASMSTART
1079; GFX11-NEXT:    v_mov_b32_e64 v7, -1
1080; GFX11-NEXT:    v_nop_e64
1081; GFX11-NEXT:    v_nop_e64
1082; GFX11-NEXT:    v_nop_e64
1083; GFX11-NEXT:    v_nop_e64
1084; GFX11-NEXT:    v_nop_e64
1085; GFX11-NEXT:    v_nop_e64
1086; GFX11-NEXT:    v_nop_e64
1087; GFX11-NEXT:    v_nop_e64
1088; GFX11-NEXT:    v_nop_e64
1089; GFX11-NEXT:    v_nop_e64
1090; GFX11-NEXT:    ;;#ASMEND
1091; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
1092; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
1093; GFX11-NEXT:    s_cbranch_scc0 .LBB10_4
1094; GFX11-NEXT:  ; %bb.2: ; %bb
1095; GFX11-NEXT:    ; in Loop: Header=BB10_1 Depth=1
1096; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
1097; GFX11-NEXT:    global_load_b32 v0, v[0:1], off glc dlc
1098; GFX11-NEXT:    s_waitcnt vmcnt(0)
1099; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1100; GFX11-NEXT:    s_cbranch_vccnz .LBB10_1
1101; GFX11-NEXT:  .LBB10_3: ; %Flow1
1102; GFX11-NEXT:    s_or_b64 exec, exec, s[2:3]
1103; GFX11-NEXT:    v_mov_b32_e32 v0, 8
1104; GFX11-NEXT:    global_store_b32 v[0:1], v0, off dlc
1105; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1106; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1107; GFX11-NEXT:    s_endpgm
1108; GFX11-NEXT:  .LBB10_4:
1109; GFX11-NEXT:    s_mov_b64 exec, 0
1110; GFX11-NEXT:    exp mrt0 off, off, off, off done
1111; GFX11-NEXT:    s_endpgm
1112entry:
1113  %cmp = icmp eq i32 %arg, 0
1114  br i1 %cmp, label %bb, label %exit
1115
1116bb:
1117  %var = call float asm sideeffect "v_mov_b32_e64 v7, -1
1118    v_nop_e64
1119    v_nop_e64
1120    v_nop_e64
1121    v_nop_e64
1122    v_nop_e64
1123    v_nop_e64
1124    v_nop_e64
1125    v_nop_e64
1126    v_nop_e64
1127    v_nop_e64", "={v7}"()
1128  %cmp.var = fcmp olt float %var, 0.0
1129  call void @llvm.amdgcn.kill(i1 %cmp.var)
1130  %vgpr = load volatile i32, i32 addrspace(1)* undef
1131  %loop.cond = icmp eq i32 %vgpr, 0
1132  br i1 %loop.cond, label %bb, label %exit
1133
1134exit:
1135  store volatile i32 8, i32 addrspace(1)* undef
1136  ret void
1137}
1138
1139; bug 28550
1140define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
1141; SI-LABEL: phi_use_def_before_kill:
1142; SI:       ; %bb.0: ; %bb
1143; SI-NEXT:    v_add_f32_e64 v1, s0, 1.0
1144; SI-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
1145; SI-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc
1146; SI-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v1
1147; SI-NEXT:    s_andn2_b64 exec, exec, vcc
1148; SI-NEXT:    s_cbranch_scc0 .LBB11_6
1149; SI-NEXT:  ; %bb.1: ; %bb
1150; SI-NEXT:    s_andn2_b64 exec, exec, vcc
1151; SI-NEXT:    s_cbranch_scc0 .LBB11_3
1152; SI-NEXT:  ; %bb.2: ; %bb8
1153; SI-NEXT:    s_mov_b32 s3, 0xf000
1154; SI-NEXT:    s_mov_b32 s2, -1
1155; SI-NEXT:    v_mov_b32_e32 v0, 8
1156; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1157; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1158; SI-NEXT:    v_mov_b32_e32 v0, 4.0
1159; SI-NEXT:  .LBB11_3: ; %phibb
1160; SI-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v0
1161; SI-NEXT:    s_cbranch_vccz .LBB11_5
1162; SI-NEXT:  ; %bb.4: ; %bb10
1163; SI-NEXT:    s_mov_b32 s3, 0xf000
1164; SI-NEXT:    s_mov_b32 s2, -1
1165; SI-NEXT:    v_mov_b32_e32 v0, 9
1166; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1167; SI-NEXT:    s_waitcnt vmcnt(0)
1168; SI-NEXT:  .LBB11_5: ; %end
1169; SI-NEXT:    s_endpgm
1170; SI-NEXT:  .LBB11_6:
1171; SI-NEXT:    s_mov_b64 exec, 0
1172; SI-NEXT:    exp null off, off, off, off done vm
1173; SI-NEXT:    s_endpgm
1174;
1175; GFX10-WAVE64-LABEL: phi_use_def_before_kill:
1176; GFX10-WAVE64:       ; %bb.0: ; %bb
1177; GFX10-WAVE64-NEXT:    v_add_f32_e64 v1, s0, 1.0
1178; GFX10-WAVE64-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
1179; GFX10-WAVE64-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc
1180; GFX10-WAVE64-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v1
1181; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
1182; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB11_6
1183; GFX10-WAVE64-NEXT:  ; %bb.1: ; %bb
1184; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
1185; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB11_3
1186; GFX10-WAVE64-NEXT:  ; %bb.2: ; %bb8
1187; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v1, 8
1188; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 4.0
1189; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v1, off
1190; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
1191; GFX10-WAVE64-NEXT:  .LBB11_3: ; %phibb
1192; GFX10-WAVE64-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v0
1193; GFX10-WAVE64-NEXT:    s_cbranch_vccz .LBB11_5
1194; GFX10-WAVE64-NEXT:  ; %bb.4: ; %bb10
1195; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 9
1196; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v0, off
1197; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
1198; GFX10-WAVE64-NEXT:  .LBB11_5: ; %end
1199; GFX10-WAVE64-NEXT:    s_endpgm
1200; GFX10-WAVE64-NEXT:  .LBB11_6:
1201; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1202; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
1203; GFX10-WAVE64-NEXT:    s_endpgm
1204;
1205; GFX10-WAVE32-LABEL: phi_use_def_before_kill:
1206; GFX10-WAVE32:       ; %bb.0: ; %bb
1207; GFX10-WAVE32-NEXT:    v_add_f32_e64 v1, s0, 1.0
1208; GFX10-WAVE32-NEXT:    v_cmp_lt_f32_e32 vcc_lo, 0, v1
1209; GFX10-WAVE32-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc_lo
1210; GFX10-WAVE32-NEXT:    v_cmp_nlt_f32_e32 vcc_lo, 0, v1
1211; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
1212; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB11_6
1213; GFX10-WAVE32-NEXT:  ; %bb.1: ; %bb
1214; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
1215; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB11_3
1216; GFX10-WAVE32-NEXT:  ; %bb.2: ; %bb8
1217; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v1, 8
1218; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 4.0
1219; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v1, off
1220; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
1221; GFX10-WAVE32-NEXT:  .LBB11_3: ; %phibb
1222; GFX10-WAVE32-NEXT:    v_cmp_eq_f32_e32 vcc_lo, 0, v0
1223; GFX10-WAVE32-NEXT:    s_cbranch_vccz .LBB11_5
1224; GFX10-WAVE32-NEXT:  ; %bb.4: ; %bb10
1225; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 9
1226; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v0, off
1227; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
1228; GFX10-WAVE32-NEXT:  .LBB11_5: ; %end
1229; GFX10-WAVE32-NEXT:    s_endpgm
1230; GFX10-WAVE32-NEXT:  .LBB11_6:
1231; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1232; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
1233; GFX10-WAVE32-NEXT:    s_endpgm
1234;
1235; GFX11-LABEL: phi_use_def_before_kill:
1236; GFX11:       ; %bb.0: ; %bb
1237; GFX11-NEXT:    v_add_f32_e64 v1, s0, 1.0
1238; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1239; GFX11-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
1240; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc
1241; GFX11-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v1
1242; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
1243; GFX11-NEXT:    s_cbranch_scc0 .LBB11_6
1244; GFX11-NEXT:  ; %bb.1: ; %bb
1245; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
1246; GFX11-NEXT:    s_cbranch_scc0 .LBB11_3
1247; GFX11-NEXT:  ; %bb.2: ; %bb8
1248; GFX11-NEXT:    v_mov_b32_e32 v1, 8
1249; GFX11-NEXT:    v_mov_b32_e32 v0, 4.0
1250; GFX11-NEXT:    global_store_b32 v[0:1], v1, off dlc
1251; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1252; GFX11-NEXT:  .LBB11_3: ; %phibb
1253; GFX11-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v0
1254; GFX11-NEXT:    s_cbranch_vccz .LBB11_5
1255; GFX11-NEXT:  ; %bb.4: ; %bb10
1256; GFX11-NEXT:    v_mov_b32_e32 v0, 9
1257; GFX11-NEXT:    global_store_b32 v[0:1], v0, off dlc
1258; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1259; GFX11-NEXT:  .LBB11_5: ; %end
1260; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1261; GFX11-NEXT:    s_endpgm
1262; GFX11-NEXT:  .LBB11_6:
1263; GFX11-NEXT:    s_mov_b64 exec, 0
1264; GFX11-NEXT:    exp mrt0 off, off, off, off done
1265; GFX11-NEXT:    s_endpgm
1266bb:
1267  %tmp = fadd float %x, 1.000000e+00
1268  %tmp1 = fcmp olt float 0.000000e+00, %tmp
1269  %tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00
1270  %cmp.tmp2 = fcmp olt float %tmp2, 0.0
1271  call void @llvm.amdgcn.kill(i1 %cmp.tmp2)
1272  br i1 undef, label %phibb, label %bb8
1273
1274phibb:
1275  %tmp5 = phi float [ %tmp2, %bb ], [ 4.0, %bb8 ]
1276  %tmp6 = fcmp oeq float %tmp5, 0.000000e+00
1277  br i1 %tmp6, label %bb10, label %end
1278
1279bb8:
1280  store volatile i32 8, i32 addrspace(1)* undef
1281  br label %phibb
1282
1283bb10:
1284  store volatile i32 9, i32 addrspace(1)* undef
1285  br label %end
1286
1287end:
1288  ret void
1289}
1290
1291define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 {
1292; SI-LABEL: no_skip_no_successors:
1293; SI:       ; %bb.0: ; %bb
1294; SI-NEXT:    v_cmp_nge_f32_e64 s[4:5], s1, 0
1295; SI-NEXT:    s_and_b64 vcc, exec, s[4:5]
1296; SI-NEXT:    s_cbranch_vccz .LBB12_3
1297; SI-NEXT:  ; %bb.1: ; %bb6
1298; SI-NEXT:    s_mov_b64 s[2:3], exec
1299; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
1300; SI-NEXT:    s_cbranch_scc0 .LBB12_5
1301; SI-NEXT:  ; %bb.2: ; %bb6
1302; SI-NEXT:    s_mov_b64 exec, 0
1303; SI-NEXT:  .LBB12_3: ; %bb3
1304; SI-NEXT:    v_mov_b32_e32 v0, 0x3e7ae148
1305; SI-NEXT:    v_cmp_nge_f32_e32 vcc, s0, v0
1306; SI-NEXT:    s_and_b64 vcc, exec, vcc
1307; SI-NEXT:  ; %bb.4: ; %bb5
1308; SI-NEXT:  .LBB12_5:
1309; SI-NEXT:    s_mov_b64 exec, 0
1310; SI-NEXT:    exp null off, off, off, off done vm
1311; SI-NEXT:    s_endpgm
1312;
1313; GFX10-WAVE64-LABEL: no_skip_no_successors:
1314; GFX10-WAVE64:       ; %bb.0: ; %bb
1315; GFX10-WAVE64-NEXT:    v_cmp_nge_f32_e64 s[4:5], s1, 0
1316; GFX10-WAVE64-NEXT:    s_and_b64 vcc, exec, s[4:5]
1317; GFX10-WAVE64-NEXT:    s_cbranch_vccz .LBB12_3
1318; GFX10-WAVE64-NEXT:  ; %bb.1: ; %bb6
1319; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
1320; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
1321; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB12_5
1322; GFX10-WAVE64-NEXT:  ; %bb.2: ; %bb6
1323; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1324; GFX10-WAVE64-NEXT:  .LBB12_3: ; %bb3
1325; GFX10-WAVE64-NEXT:    v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0
1326; GFX10-WAVE64-NEXT:    s_and_b64 vcc, exec, s[0:1]
1327; GFX10-WAVE64-NEXT:  ; %bb.4: ; %bb5
1328; GFX10-WAVE64-NEXT:  .LBB12_5:
1329; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1330; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
1331; GFX10-WAVE64-NEXT:    s_endpgm
1332;
1333; GFX10-WAVE32-LABEL: no_skip_no_successors:
1334; GFX10-WAVE32:       ; %bb.0: ; %bb
1335; GFX10-WAVE32-NEXT:    v_cmp_nge_f32_e64 s1, s1, 0
1336; GFX10-WAVE32-NEXT:    s_and_b32 vcc_lo, exec_lo, s1
1337; GFX10-WAVE32-NEXT:    s_cbranch_vccz .LBB12_3
1338; GFX10-WAVE32-NEXT:  ; %bb.1: ; %bb6
1339; GFX10-WAVE32-NEXT:    s_mov_b32 s2, exec_lo
1340; GFX10-WAVE32-NEXT:    s_andn2_b32 s2, s2, exec_lo
1341; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB12_5
1342; GFX10-WAVE32-NEXT:  ; %bb.2: ; %bb6
1343; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1344; GFX10-WAVE32-NEXT:  .LBB12_3: ; %bb3
1345; GFX10-WAVE32-NEXT:    v_cmp_nle_f32_e64 s0, 0x3e7ae148, s0
1346; GFX10-WAVE32-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
1347; GFX10-WAVE32-NEXT:  ; %bb.4: ; %bb5
1348; GFX10-WAVE32-NEXT:  .LBB12_5:
1349; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1350; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
1351; GFX10-WAVE32-NEXT:    s_endpgm
1352;
1353; GFX11-LABEL: no_skip_no_successors:
1354; GFX11:       ; %bb.0: ; %bb
1355; GFX11-NEXT:    v_cmp_nge_f32_e64 s[4:5], s1, 0
1356; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1357; GFX11-NEXT:    s_and_b64 vcc, exec, s[4:5]
1358; GFX11-NEXT:    s_cbranch_vccz .LBB12_3
1359; GFX11-NEXT:  ; %bb.1: ; %bb6
1360; GFX11-NEXT:    s_mov_b64 s[2:3], exec
1361; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1362; GFX11-NEXT:    s_and_not1_b64 s[2:3], s[2:3], exec
1363; GFX11-NEXT:    s_cbranch_scc0 .LBB12_5
1364; GFX11-NEXT:  ; %bb.2: ; %bb6
1365; GFX11-NEXT:    s_mov_b64 exec, 0
1366; GFX11-NEXT:  .LBB12_3: ; %bb3
1367; GFX11-NEXT:    v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0
1368; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1369; GFX11-NEXT:    s_and_b64 vcc, exec, s[0:1]
1370; GFX11-NEXT:  ; %bb.4: ; %bb5
1371; GFX11-NEXT:  .LBB12_5:
1372; GFX11-NEXT:    s_mov_b64 exec, 0
1373; GFX11-NEXT:    exp mrt0 off, off, off, off done
1374; GFX11-NEXT:    s_endpgm
1375bb:
1376  %tmp = fcmp ult float %arg1, 0.000000e+00
1377  br i1 %tmp, label %bb6, label %bb3
1378
1379bb3:                                              ; preds = %bb
1380  %tmp2 = fcmp ult float %arg, 0x3FCF5C2900000000
1381  br i1 %tmp2, label %bb5, label %bb4
1382
1383bb4:                                              ; preds = %bb3
1384  br i1 true, label %bb5, label %bb7
1385
1386bb5:                                              ; preds = %bb4, %bb3
1387  unreachable
1388
1389bb6:                                              ; preds = %bb
1390  call void @llvm.amdgcn.kill(i1 false)
1391  unreachable
1392
1393bb7:                                              ; preds = %bb4
1394  ret void
1395}
1396
1397define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) #0 {
1398; SI-LABEL: if_after_kill_block:
1399; SI:       ; %bb.0: ; %bb
1400; SI-NEXT:    s_mov_b64 s[0:1], exec
1401; SI-NEXT:    s_wqm_b64 exec, exec
1402; SI-NEXT:    v_cmp_nle_f32_e32 vcc, 0, v1
1403; SI-NEXT:    s_and_saveexec_b64 s[2:3], vcc
1404; SI-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
1405; SI-NEXT:    s_cbranch_execz .LBB13_3
1406; SI-NEXT:  ; %bb.1: ; %bb3
1407; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
1408; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
1409; SI-NEXT:    s_cbranch_scc0 .LBB13_6
1410; SI-NEXT:  ; %bb.2: ; %bb3
1411; SI-NEXT:    s_andn2_b64 exec, exec, vcc
1412; SI-NEXT:  .LBB13_3: ; %bb4
1413; SI-NEXT:    s_or_b64 exec, exec, s[2:3]
1414; SI-NEXT:    image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10
1415; SI-NEXT:    s_waitcnt vmcnt(0)
1416; SI-NEXT:    v_cmp_neq_f32_e32 vcc, 0, v0
1417; SI-NEXT:    s_and_saveexec_b64 s[0:1], vcc
1418; SI-NEXT:    s_cbranch_execz .LBB13_5
1419; SI-NEXT:  ; %bb.4: ; %bb8
1420; SI-NEXT:    s_mov_b32 s3, 0xf000
1421; SI-NEXT:    s_mov_b32 s2, -1
1422; SI-NEXT:    v_mov_b32_e32 v0, 9
1423; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1424; SI-NEXT:    s_waitcnt vmcnt(0)
1425; SI-NEXT:  .LBB13_5: ; %UnifiedReturnBlock
1426; SI-NEXT:    s_endpgm
1427; SI-NEXT:  .LBB13_6:
1428; SI-NEXT:    s_mov_b64 exec, 0
1429; SI-NEXT:    exp null off, off, off, off done vm
1430; SI-NEXT:    s_endpgm
1431;
1432; GFX10-WAVE64-LABEL: if_after_kill_block:
1433; GFX10-WAVE64:       ; %bb.0: ; %bb
1434; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
1435; GFX10-WAVE64-NEXT:    s_wqm_b64 exec, exec
1436; GFX10-WAVE64-NEXT:    v_cmp_nle_f32_e32 vcc, 0, v1
1437; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[2:3], vcc
1438; GFX10-WAVE64-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
1439; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB13_3
1440; GFX10-WAVE64-NEXT:  ; %bb.1: ; %bb3
1441; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
1442; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
1443; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB13_6
1444; GFX10-WAVE64-NEXT:  ; %bb.2: ; %bb3
1445; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
1446; GFX10-WAVE64-NEXT:  .LBB13_3: ; %bb4
1447; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[2:3]
1448; GFX10-WAVE64-NEXT:    image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
1449; GFX10-WAVE64-NEXT:    s_waitcnt vmcnt(0)
1450; GFX10-WAVE64-NEXT:    v_cmp_neq_f32_e32 vcc, 0, v0
1451; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[0:1], vcc
1452; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB13_5
1453; GFX10-WAVE64-NEXT:  ; %bb.4: ; %bb8
1454; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 9
1455; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v0, off
1456; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
1457; GFX10-WAVE64-NEXT:  .LBB13_5: ; %UnifiedReturnBlock
1458; GFX10-WAVE64-NEXT:    s_endpgm
1459; GFX10-WAVE64-NEXT:  .LBB13_6:
1460; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1461; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
1462; GFX10-WAVE64-NEXT:    s_endpgm
1463;
1464; GFX10-WAVE32-LABEL: if_after_kill_block:
1465; GFX10-WAVE32:       ; %bb.0: ; %bb
1466; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
1467; GFX10-WAVE32-NEXT:    s_wqm_b32 exec_lo, exec_lo
1468; GFX10-WAVE32-NEXT:    v_cmp_nle_f32_e32 vcc_lo, 0, v1
1469; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s1, vcc_lo
1470; GFX10-WAVE32-NEXT:    s_xor_b32 s1, exec_lo, s1
1471; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB13_3
1472; GFX10-WAVE32-NEXT:  ; %bb.1: ; %bb3
1473; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
1474; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
1475; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB13_6
1476; GFX10-WAVE32-NEXT:  ; %bb.2: ; %bb3
1477; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
1478; GFX10-WAVE32-NEXT:  .LBB13_3: ; %bb4
1479; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s1
1480; GFX10-WAVE32-NEXT:    image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
1481; GFX10-WAVE32-NEXT:    s_waitcnt vmcnt(0)
1482; GFX10-WAVE32-NEXT:    v_cmp_neq_f32_e32 vcc_lo, 0, v0
1483; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s0, vcc_lo
1484; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB13_5
1485; GFX10-WAVE32-NEXT:  ; %bb.4: ; %bb8
1486; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 9
1487; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v0, off
1488; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
1489; GFX10-WAVE32-NEXT:  .LBB13_5: ; %UnifiedReturnBlock
1490; GFX10-WAVE32-NEXT:    s_endpgm
1491; GFX10-WAVE32-NEXT:  .LBB13_6:
1492; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1493; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
1494; GFX10-WAVE32-NEXT:    s_endpgm
1495;
1496; GFX11-LABEL: if_after_kill_block:
1497; GFX11:       ; %bb.0: ; %bb
1498; GFX11-NEXT:    s_mov_b64 s[0:1], exec
1499; GFX11-NEXT:    s_wqm_b64 exec, exec
1500; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1501; GFX11-NEXT:    s_mov_b64 s[2:3], exec
1502; GFX11-NEXT:    v_cmpx_nle_f32_e32 0, v1
1503; GFX11-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
1504; GFX11-NEXT:    s_cbranch_execz .LBB13_3
1505; GFX11-NEXT:  ; %bb.1: ; %bb3
1506; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
1507; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
1508; GFX11-NEXT:    s_cbranch_scc0 .LBB13_6
1509; GFX11-NEXT:  ; %bb.2: ; %bb3
1510; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
1511; GFX11-NEXT:  .LBB13_3: ; %bb4
1512; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1513; GFX11-NEXT:    s_or_b64 exec, exec, s[2:3]
1514; GFX11-NEXT:    image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
1515; GFX11-NEXT:    s_mov_b64 s[0:1], exec
1516; GFX11-NEXT:    s_waitcnt vmcnt(0)
1517; GFX11-NEXT:    v_cmpx_neq_f32_e32 0, v0
1518; GFX11-NEXT:    s_cbranch_execz .LBB13_5
1519; GFX11-NEXT:  ; %bb.4: ; %bb8
1520; GFX11-NEXT:    v_mov_b32_e32 v0, 9
1521; GFX11-NEXT:    global_store_b32 v[0:1], v0, off dlc
1522; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1523; GFX11-NEXT:  .LBB13_5: ; %UnifiedReturnBlock
1524; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1525; GFX11-NEXT:    s_endpgm
1526; GFX11-NEXT:  .LBB13_6:
1527; GFX11-NEXT:    s_mov_b64 exec, 0
1528; GFX11-NEXT:    exp mrt0 off, off, off, off done
1529; GFX11-NEXT:    s_endpgm
1530bb:
1531  %tmp = fcmp ult float %arg1, 0.000000e+00
1532  br i1 %tmp, label %bb3, label %bb4
1533
1534bb3:                                              ; preds = %bb
1535  %cmp.arg = fcmp olt float %arg, 0.0
1536  call void @llvm.amdgcn.kill(i1 %cmp.arg)
1537  br label %bb4
1538
1539bb4:                                              ; preds = %bb3, %bb
1540  %tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, float %arg2, float %arg3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
1541  %tmp6 = extractelement <4 x float> %tmp5, i32 0
1542  %tmp7 = fcmp une float %tmp6, 0.000000e+00
1543  br i1 %tmp7, label %bb8, label %bb9
1544
1545bb8:                                              ; preds = %bb9, %bb4
1546  store volatile i32 9, i32 addrspace(1)* undef
1547  ret void
1548
1549bb9:                                              ; preds = %bb4
1550  ret void
1551}
1552
1553define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) {
1554; SI-LABEL: cbranch_kill:
1555; SI:       ; %bb.0: ; %.entry
1556; SI-NEXT:    s_mov_b64 s[0:1], exec
1557; SI-NEXT:    v_mov_b32_e32 v4, 0
1558; SI-NEXT:    v_mov_b32_e32 v2, v1
1559; SI-NEXT:    v_mov_b32_e32 v3, v1
1560; SI-NEXT:    image_sample_l v1, v[1:4], s[0:7], s[0:3] dmask:0x1 da
1561; SI-NEXT:    s_waitcnt vmcnt(0)
1562; SI-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v1
1563; SI-NEXT:    s_and_saveexec_b64 s[2:3], vcc
1564; SI-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
1565; SI-NEXT:    s_cbranch_execz .LBB14_3
1566; SI-NEXT:  ; %bb.1: ; %kill
1567; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
1568; SI-NEXT:    ; implicit-def: $vgpr0
1569; SI-NEXT:    ; implicit-def: $vgpr1
1570; SI-NEXT:    s_cbranch_scc0 .LBB14_6
1571; SI-NEXT:  ; %bb.2: ; %kill
1572; SI-NEXT:    s_mov_b64 exec, 0
1573; SI-NEXT:  .LBB14_3: ; %Flow
1574; SI-NEXT:    s_or_saveexec_b64 s[0:1], s[2:3]
1575; SI-NEXT:    ; implicit-def: $vgpr2
1576; SI-NEXT:    s_xor_b64 exec, exec, s[0:1]
1577; SI-NEXT:  ; %bb.4: ; %live
1578; SI-NEXT:    v_mul_f32_e32 v2, v0, v1
1579; SI-NEXT:  ; %bb.5: ; %export
1580; SI-NEXT:    s_or_b64 exec, exec, s[0:1]
1581; SI-NEXT:    exp mrt0 v2, v2, v2, v2 done vm
1582; SI-NEXT:    s_endpgm
1583; SI-NEXT:  .LBB14_6:
1584; SI-NEXT:    s_mov_b64 exec, 0
1585; SI-NEXT:    exp null off, off, off, off done vm
1586; SI-NEXT:    s_endpgm
1587;
1588; GFX10-WAVE64-LABEL: cbranch_kill:
1589; GFX10-WAVE64:       ; %bb.0: ; %.entry
1590; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v2, 0
1591; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
1592; GFX10-WAVE64-NEXT:    image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
1593; GFX10-WAVE64-NEXT:    s_waitcnt vmcnt(0)
1594; GFX10-WAVE64-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v1
1595; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[2:3], vcc
1596; GFX10-WAVE64-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
1597; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB14_3
1598; GFX10-WAVE64-NEXT:  ; %bb.1: ; %kill
1599; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
1600; GFX10-WAVE64-NEXT:    ; implicit-def: $vgpr0
1601; GFX10-WAVE64-NEXT:    ; implicit-def: $vgpr1
1602; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB14_6
1603; GFX10-WAVE64-NEXT:  ; %bb.2: ; %kill
1604; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1605; GFX10-WAVE64-NEXT:  .LBB14_3: ; %Flow
1606; GFX10-WAVE64-NEXT:    s_or_saveexec_b64 s[0:1], s[2:3]
1607; GFX10-WAVE64-NEXT:    ; implicit-def: $vgpr2
1608; GFX10-WAVE64-NEXT:    s_xor_b64 exec, exec, s[0:1]
1609; GFX10-WAVE64-NEXT:  ; %bb.4: ; %live
1610; GFX10-WAVE64-NEXT:    v_mul_f32_e32 v2, v0, v1
1611; GFX10-WAVE64-NEXT:  ; %bb.5: ; %export
1612; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[0:1]
1613; GFX10-WAVE64-NEXT:    exp mrt0 v2, v2, v2, v2 done vm
1614; GFX10-WAVE64-NEXT:    s_endpgm
1615; GFX10-WAVE64-NEXT:  .LBB14_6:
1616; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1617; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
1618; GFX10-WAVE64-NEXT:    s_endpgm
1619;
1620; GFX10-WAVE32-LABEL: cbranch_kill:
1621; GFX10-WAVE32:       ; %bb.0: ; %.entry
1622; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v2, 0
1623; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
1624; GFX10-WAVE32-NEXT:    image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
1625; GFX10-WAVE32-NEXT:    s_waitcnt vmcnt(0)
1626; GFX10-WAVE32-NEXT:    v_cmp_ge_f32_e32 vcc_lo, 0, v1
1627; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s1, vcc_lo
1628; GFX10-WAVE32-NEXT:    s_xor_b32 s1, exec_lo, s1
1629; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB14_3
1630; GFX10-WAVE32-NEXT:  ; %bb.1: ; %kill
1631; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, exec_lo
1632; GFX10-WAVE32-NEXT:    ; implicit-def: $vgpr0
1633; GFX10-WAVE32-NEXT:    ; implicit-def: $vgpr1
1634; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB14_6
1635; GFX10-WAVE32-NEXT:  ; %bb.2: ; %kill
1636; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1637; GFX10-WAVE32-NEXT:  .LBB14_3: ; %Flow
1638; GFX10-WAVE32-NEXT:    s_or_saveexec_b32 s0, s1
1639; GFX10-WAVE32-NEXT:    ; implicit-def: $vgpr2
1640; GFX10-WAVE32-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
1641; GFX10-WAVE32-NEXT:  ; %bb.4: ; %live
1642; GFX10-WAVE32-NEXT:    v_mul_f32_e32 v2, v0, v1
1643; GFX10-WAVE32-NEXT:  ; %bb.5: ; %export
1644; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s0
1645; GFX10-WAVE32-NEXT:    exp mrt0 v2, v2, v2, v2 done vm
1646; GFX10-WAVE32-NEXT:    s_endpgm
1647; GFX10-WAVE32-NEXT:  .LBB14_6:
1648; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1649; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
1650; GFX10-WAVE32-NEXT:    s_endpgm
1651;
1652; GFX11-LABEL: cbranch_kill:
1653; GFX11:       ; %bb.0: ; %.entry
1654; GFX11-NEXT:    v_mov_b32_e32 v2, 0
1655; GFX11-NEXT:    s_mov_b64 s[0:1], exec
1656; GFX11-NEXT:    image_sample_l v1, [v1, v1, v1, v2], s[0:7], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
1657; GFX11-NEXT:    s_mov_b64 s[2:3], exec
1658; GFX11-NEXT:    s_waitcnt vmcnt(0)
1659; GFX11-NEXT:    v_cmpx_ge_f32_e32 0, v1
1660; GFX11-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
1661; GFX11-NEXT:    s_cbranch_execz .LBB14_3
1662; GFX11-NEXT:  ; %bb.1: ; %kill
1663; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], exec
1664; GFX11-NEXT:    ; implicit-def: $vgpr0
1665; GFX11-NEXT:    ; implicit-def: $vgpr1
1666; GFX11-NEXT:    s_cbranch_scc0 .LBB14_6
1667; GFX11-NEXT:  ; %bb.2: ; %kill
1668; GFX11-NEXT:    s_mov_b64 exec, 0
1669; GFX11-NEXT:  .LBB14_3: ; %Flow
1670; GFX11-NEXT:    s_or_saveexec_b64 s[0:1], s[2:3]
1671; GFX11-NEXT:    ; implicit-def: $vgpr2
1672; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1673; GFX11-NEXT:    s_xor_b64 exec, exec, s[0:1]
1674; GFX11-NEXT:  ; %bb.4: ; %live
1675; GFX11-NEXT:    v_mul_f32_e32 v2, v0, v1
1676; GFX11-NEXT:  ; %bb.5: ; %export
1677; GFX11-NEXT:    s_or_b64 exec, exec, s[0:1]
1678; GFX11-NEXT:    exp mrt0 v2, v2, v2, v2 done
1679; GFX11-NEXT:    s_endpgm
1680; GFX11-NEXT:  .LBB14_6:
1681; GFX11-NEXT:    s_mov_b64 exec, 0
1682; GFX11-NEXT:    exp mrt0 off, off, off, off done
1683; GFX11-NEXT:    s_endpgm
1684.entry:
1685  %sample = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 1, float %val1, float %val1, float %val1, float 0.000000e+00, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
1686  %cond0 = fcmp ugt float %sample, 0.000000e+00
1687  br i1 %cond0, label %live, label %kill
1688
1689kill:
1690  call void @llvm.amdgcn.kill(i1 false)
1691  br label %export
1692
1693live:
1694  %scale = fmul reassoc nnan nsz arcp contract float %val0, %sample
1695  br label %export
1696
1697export:
1698  %proxy = phi float [ undef, %kill ], [ %scale, %live ]
1699  call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %proxy, float %proxy, float %proxy, float %proxy, i1 immarg true, i1 immarg true) #3
1700  ret void
1701}
1702
1703
1704define amdgpu_ps void @complex_loop(i32 inreg %cmpa, i32 %cmpb, i32 %cmpc) {
1705; SI-LABEL: complex_loop:
1706; SI:       ; %bb.0: ; %.entry
1707; SI-NEXT:    s_cmp_lt_i32 s0, 1
1708; SI-NEXT:    s_cbranch_scc1 .LBB15_7
1709; SI-NEXT:  ; %bb.1: ; %.lr.ph
1710; SI-NEXT:    s_mov_b64 s[2:3], exec
1711; SI-NEXT:    s_mov_b32 s6, 0
1712; SI-NEXT:    s_mov_b64 s[0:1], 0
1713; SI-NEXT:    s_branch .LBB15_3
1714; SI-NEXT:  .LBB15_2: ; %latch
1715; SI-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1716; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
1717; SI-NEXT:    s_add_i32 s6, s6, 1
1718; SI-NEXT:    v_cmp_ge_i32_e32 vcc, s6, v1
1719; SI-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
1720; SI-NEXT:    v_mov_b32_e32 v2, s6
1721; SI-NEXT:    s_andn2_b64 exec, exec, s[0:1]
1722; SI-NEXT:    s_cbranch_execz .LBB15_6
1723; SI-NEXT:  .LBB15_3: ; %hdr
1724; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
1725; SI-NEXT:    v_cmp_gt_u32_e32 vcc, s6, v0
1726; SI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1727; SI-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
1728; SI-NEXT:    s_cbranch_execz .LBB15_2
1729; SI-NEXT:  ; %bb.4: ; %kill
1730; SI-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1731; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
1732; SI-NEXT:    s_cbranch_scc0 .LBB15_8
1733; SI-NEXT:  ; %bb.5: ; %kill
1734; SI-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1735; SI-NEXT:    s_mov_b64 exec, 0
1736; SI-NEXT:    s_branch .LBB15_2
1737; SI-NEXT:  .LBB15_6: ; %Flow
1738; SI-NEXT:    s_or_b64 exec, exec, s[0:1]
1739; SI-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1740; SI-NEXT:    s_endpgm
1741; SI-NEXT:  .LBB15_7:
1742; SI-NEXT:    v_mov_b32_e32 v2, -1
1743; SI-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1744; SI-NEXT:    s_endpgm
1745; SI-NEXT:  .LBB15_8:
1746; SI-NEXT:    s_mov_b64 exec, 0
1747; SI-NEXT:    exp null off, off, off, off done vm
1748; SI-NEXT:    s_endpgm
1749;
1750; GFX10-WAVE64-LABEL: complex_loop:
1751; GFX10-WAVE64:       ; %bb.0: ; %.entry
1752; GFX10-WAVE64-NEXT:    s_cmp_lt_i32 s0, 1
1753; GFX10-WAVE64-NEXT:    s_cbranch_scc1 .LBB15_7
1754; GFX10-WAVE64-NEXT:  ; %bb.1: ; %.lr.ph
1755; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
1756; GFX10-WAVE64-NEXT:    s_mov_b32 s6, 0
1757; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], 0
1758; GFX10-WAVE64-NEXT:    s_branch .LBB15_3
1759; GFX10-WAVE64-NEXT:  .LBB15_2: ; %latch
1760; GFX10-WAVE64-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1761; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[4:5]
1762; GFX10-WAVE64-NEXT:    s_add_i32 s6, s6, 1
1763; GFX10-WAVE64-NEXT:    v_cmp_ge_i32_e32 vcc, s6, v1
1764; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v2, s6
1765; GFX10-WAVE64-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
1766; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, s[0:1]
1767; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB15_6
1768; GFX10-WAVE64-NEXT:  .LBB15_3: ; %hdr
1769; GFX10-WAVE64-NEXT:    ; =>This Inner Loop Header: Depth=1
1770; GFX10-WAVE64-NEXT:    v_cmp_gt_u32_e32 vcc, s6, v0
1771; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1772; GFX10-WAVE64-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
1773; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB15_2
1774; GFX10-WAVE64-NEXT:  ; %bb.4: ; %kill
1775; GFX10-WAVE64-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1776; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
1777; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB15_8
1778; GFX10-WAVE64-NEXT:  ; %bb.5: ; %kill
1779; GFX10-WAVE64-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1780; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1781; GFX10-WAVE64-NEXT:    s_branch .LBB15_2
1782; GFX10-WAVE64-NEXT:  .LBB15_6: ; %Flow
1783; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[0:1]
1784; GFX10-WAVE64-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1785; GFX10-WAVE64-NEXT:    s_endpgm
1786; GFX10-WAVE64-NEXT:  .LBB15_7:
1787; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v2, -1
1788; GFX10-WAVE64-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1789; GFX10-WAVE64-NEXT:    s_endpgm
1790; GFX10-WAVE64-NEXT:  .LBB15_8:
1791; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1792; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
1793; GFX10-WAVE64-NEXT:    s_endpgm
1794;
1795; GFX10-WAVE32-LABEL: complex_loop:
1796; GFX10-WAVE32:       ; %bb.0: ; %.entry
1797; GFX10-WAVE32-NEXT:    s_cmp_lt_i32 s0, 1
1798; GFX10-WAVE32-NEXT:    s_cbranch_scc1 .LBB15_7
1799; GFX10-WAVE32-NEXT:  ; %bb.1: ; %.lr.ph
1800; GFX10-WAVE32-NEXT:    s_mov_b32 s1, exec_lo
1801; GFX10-WAVE32-NEXT:    s_mov_b32 s0, 0
1802; GFX10-WAVE32-NEXT:    s_mov_b32 s2, 0
1803; GFX10-WAVE32-NEXT:    s_branch .LBB15_3
1804; GFX10-WAVE32-NEXT:  .LBB15_2: ; %latch
1805; GFX10-WAVE32-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1806; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s3
1807; GFX10-WAVE32-NEXT:    s_add_i32 s2, s2, 1
1808; GFX10-WAVE32-NEXT:    v_cmp_ge_i32_e32 vcc_lo, s2, v1
1809; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v2, s2
1810; GFX10-WAVE32-NEXT:    s_or_b32 s0, vcc_lo, s0
1811; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, s0
1812; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB15_6
1813; GFX10-WAVE32-NEXT:  .LBB15_3: ; %hdr
1814; GFX10-WAVE32-NEXT:    ; =>This Inner Loop Header: Depth=1
1815; GFX10-WAVE32-NEXT:    v_cmp_gt_u32_e32 vcc_lo, s2, v0
1816; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s3, vcc_lo
1817; GFX10-WAVE32-NEXT:    s_xor_b32 s3, exec_lo, s3
1818; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB15_2
1819; GFX10-WAVE32-NEXT:  ; %bb.4: ; %kill
1820; GFX10-WAVE32-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1821; GFX10-WAVE32-NEXT:    s_andn2_b32 s1, s1, exec_lo
1822; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB15_8
1823; GFX10-WAVE32-NEXT:  ; %bb.5: ; %kill
1824; GFX10-WAVE32-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1825; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1826; GFX10-WAVE32-NEXT:    s_branch .LBB15_2
1827; GFX10-WAVE32-NEXT:  .LBB15_6: ; %Flow
1828; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s0
1829; GFX10-WAVE32-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1830; GFX10-WAVE32-NEXT:    s_endpgm
1831; GFX10-WAVE32-NEXT:  .LBB15_7:
1832; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v2, -1
1833; GFX10-WAVE32-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1834; GFX10-WAVE32-NEXT:    s_endpgm
1835; GFX10-WAVE32-NEXT:  .LBB15_8:
1836; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1837; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
1838; GFX10-WAVE32-NEXT:    s_endpgm
1839;
1840; GFX11-LABEL: complex_loop:
1841; GFX11:       ; %bb.0: ; %.entry
1842; GFX11-NEXT:    s_cmp_lt_i32 s0, 1
1843; GFX11-NEXT:    s_cbranch_scc1 .LBB15_7
1844; GFX11-NEXT:  ; %bb.1: ; %.lr.ph
1845; GFX11-NEXT:    s_mov_b64 s[2:3], exec
1846; GFX11-NEXT:    s_mov_b32 s6, 0
1847; GFX11-NEXT:    s_mov_b64 s[0:1], 0
1848; GFX11-NEXT:    s_branch .LBB15_3
1849; GFX11-NEXT:  .LBB15_2: ; %latch
1850; GFX11-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1851; GFX11-NEXT:    s_or_b64 exec, exec, s[4:5]
1852; GFX11-NEXT:    s_add_i32 s6, s6, 1
1853; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
1854; GFX11-NEXT:    v_cmp_ge_i32_e32 vcc, s6, v1
1855; GFX11-NEXT:    v_mov_b32_e32 v2, s6
1856; GFX11-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
1857; GFX11-NEXT:    s_and_not1_b64 exec, exec, s[0:1]
1858; GFX11-NEXT:    s_cbranch_execz .LBB15_6
1859; GFX11-NEXT:  .LBB15_3: ; %hdr
1860; GFX11-NEXT:    ; =>This Inner Loop Header: Depth=1
1861; GFX11-NEXT:    s_mov_b64 s[4:5], exec
1862; GFX11-NEXT:    v_cmpx_gt_u32_e64 s6, v0
1863; GFX11-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
1864; GFX11-NEXT:    s_cbranch_execz .LBB15_2
1865; GFX11-NEXT:  ; %bb.4: ; %kill
1866; GFX11-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1867; GFX11-NEXT:    s_and_not1_b64 s[2:3], s[2:3], exec
1868; GFX11-NEXT:    s_cbranch_scc0 .LBB15_8
1869; GFX11-NEXT:  ; %bb.5: ; %kill
1870; GFX11-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1871; GFX11-NEXT:    s_mov_b64 exec, 0
1872; GFX11-NEXT:    s_branch .LBB15_2
1873; GFX11-NEXT:  .LBB15_6: ; %Flow
1874; GFX11-NEXT:    s_or_b64 exec, exec, s[0:1]
1875; GFX11-NEXT:    exp mrt0 v2, v2, v0, v0 done
1876; GFX11-NEXT:    s_endpgm
1877; GFX11-NEXT:  .LBB15_7:
1878; GFX11-NEXT:    v_mov_b32_e32 v2, -1
1879; GFX11-NEXT:    exp mrt0 v2, v2, v0, v0 done
1880; GFX11-NEXT:    s_endpgm
1881; GFX11-NEXT:  .LBB15_8:
1882; GFX11-NEXT:    s_mov_b64 exec, 0
1883; GFX11-NEXT:    exp mrt0 off, off, off, off done
1884; GFX11-NEXT:    s_endpgm
1885.entry:
1886  %flaga = icmp sgt i32 %cmpa, 0
1887  br i1 %flaga, label %.lr.ph, label %._crit_edge
1888
1889.lr.ph:
1890  br label %hdr
1891
1892hdr:
1893  %ctr = phi i32 [ 0, %.lr.ph ], [ %ctr.next, %latch ]
1894  %flagb = icmp ugt i32 %ctr, %cmpb
1895  br i1 %flagb, label %kill, label %latch
1896
1897kill:
1898  call void @llvm.amdgcn.kill(i1 false)
1899  br label %latch
1900
1901latch:
1902  %ctr.next = add nuw nsw i32 %ctr, 1
1903  %flagc = icmp slt i32 %ctr.next, %cmpc
1904  br i1 %flagc, label %hdr, label %._crit_edge
1905
1906._crit_edge:
1907  %tmp = phi i32 [ -1, %.entry ], [ %ctr.next, %latch ]
1908  %out = bitcast i32 %tmp to float
1909  call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %out, float %out, float undef, float undef, i1 immarg true, i1 immarg true)
1910  ret void
1911}
1912
1913define void @skip_mode_switch(i32 %arg) {
1914; SI-LABEL: skip_mode_switch:
1915; SI:       ; %bb.0: ; %entry
1916; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1917; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1918; SI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1919; SI-NEXT:    s_cbranch_execz .LBB16_2
1920; SI-NEXT:  ; %bb.1: ; %bb.0
1921; SI-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
1922; SI-NEXT:  .LBB16_2: ; %bb.1
1923; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
1924; SI-NEXT:    s_setpc_b64 s[30:31]
1925;
1926; GFX10-WAVE64-LABEL: skip_mode_switch:
1927; GFX10-WAVE64:       ; %bb.0: ; %entry
1928; GFX10-WAVE64-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1929; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
1930; GFX10-WAVE64-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1931; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1932; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB16_2
1933; GFX10-WAVE64-NEXT:  ; %bb.1: ; %bb.0
1934; GFX10-WAVE64-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
1935; GFX10-WAVE64-NEXT:  .LBB16_2: ; %bb.1
1936; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[4:5]
1937; GFX10-WAVE64-NEXT:    s_setpc_b64 s[30:31]
1938;
1939; GFX10-WAVE32-LABEL: skip_mode_switch:
1940; GFX10-WAVE32:       ; %bb.0: ; %entry
1941; GFX10-WAVE32-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1942; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
1943; GFX10-WAVE32-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1944; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s4, vcc_lo
1945; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB16_2
1946; GFX10-WAVE32-NEXT:  ; %bb.1: ; %bb.0
1947; GFX10-WAVE32-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
1948; GFX10-WAVE32-NEXT:  .LBB16_2: ; %bb.1
1949; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s4
1950; GFX10-WAVE32-NEXT:    s_setpc_b64 s[30:31]
1951;
1952; GFX11-LABEL: skip_mode_switch:
1953; GFX11:       ; %bb.0: ; %entry
1954; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1955; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1956; GFX11-NEXT:    s_mov_b64 s[0:1], exec
1957; GFX11-NEXT:    v_cmpx_eq_u32_e32 0, v0
1958; GFX11-NEXT:    s_cbranch_execz .LBB16_2
1959; GFX11-NEXT:  ; %bb.1: ; %bb.0
1960; GFX11-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
1961; GFX11-NEXT:  .LBB16_2: ; %bb.1
1962; GFX11-NEXT:    s_or_b64 exec, exec, s[0:1]
1963; GFX11-NEXT:    s_setpc_b64 s[30:31]
1964entry:
1965  %cmp = icmp eq i32 %arg, 0
1966  br i1 %cmp, label %bb.0, label %bb.1
1967
1968bb.0:
1969  call void @llvm.amdgcn.s.setreg(i32 2049, i32 3)
1970  br label %bb.1
1971
1972bb.1:
1973  ret void
1974}
1975
1976declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #3
1977declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
1978declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
1979declare void @llvm.amdgcn.kill(i1) #0
1980
1981declare void @llvm.amdgcn.s.setreg(i32 immarg, i32)
1982
1983attributes #0 = { nounwind }
1984attributes #1 = { nounwind readonly }
1985attributes #2 = { nounwind readnone speculatable }
1986attributes #3 = { inaccessiblememonly nounwind writeonly }
1987