1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,SI %s
3; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,WAVE64,GFX10-WAVE64 %s
4; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX10-WAVE32 %s
5; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefixes=GCN,GFX11 %s
6
7define amdgpu_ps void @test_kill_depth_0_imm_pos() #0 {
8; GCN-LABEL: test_kill_depth_0_imm_pos:
9; GCN:       ; %bb.0:
10; GCN-NEXT:    s_endpgm
11  call void @llvm.amdgcn.kill(i1 true)
12  ret void
13}
14
15define amdgpu_ps void @test_kill_depth_0_imm_neg() #0 {
16; WAVE64-LABEL: test_kill_depth_0_imm_neg:
17; WAVE64:       ; %bb.0:
18; WAVE64-NEXT:    s_andn2_b64 exec, exec, exec
19; WAVE64-NEXT:    s_cbranch_scc0 .LBB1_1
20; WAVE64-NEXT:    s_endpgm
21; WAVE64-NEXT:  .LBB1_1:
22; WAVE64-NEXT:    s_mov_b64 exec, 0
23; WAVE64-NEXT:    exp null off, off, off, off done vm
24; WAVE64-NEXT:    s_endpgm
25;
26; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg:
27; GFX10-WAVE32:       ; %bb.0:
28; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, exec_lo
29; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB1_1
30; GFX10-WAVE32-NEXT:    s_endpgm
31; GFX10-WAVE32-NEXT:  .LBB1_1:
32; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
33; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
34; GFX10-WAVE32-NEXT:    s_endpgm
35;
36; GFX11-LABEL: test_kill_depth_0_imm_neg:
37; GFX11:       ; %bb.0:
38; GFX11-NEXT:    s_and_not1_b64 exec, exec, exec
39; GFX11-NEXT:    s_cbranch_scc0 .LBB1_1
40; GFX11-NEXT:    s_endpgm
41; GFX11-NEXT:  .LBB1_1:
42; GFX11-NEXT:    s_mov_b64 exec, 0
43; GFX11-NEXT:    exp mrt0 off, off, off, off done
44; GFX11-NEXT:    s_endpgm
45  call void @llvm.amdgcn.kill(i1 false)
46  ret void
47}
48
49; FIXME: Ideally only one early-exit would be emitted
50define amdgpu_ps void @test_kill_depth_0_imm_neg_x2() #0 {
51; WAVE64-LABEL: test_kill_depth_0_imm_neg_x2:
52; WAVE64:       ; %bb.0:
53; WAVE64-NEXT:    s_mov_b64 s[0:1], exec
54; WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
55; WAVE64-NEXT:    s_cbranch_scc0 .LBB2_2
56; WAVE64-NEXT:  ; %bb.1:
57; WAVE64-NEXT:    s_mov_b64 exec, 0
58; WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
59; WAVE64-NEXT:    s_cbranch_scc0 .LBB2_2
60; WAVE64-NEXT:    s_endpgm
61; WAVE64-NEXT:  .LBB2_2:
62; WAVE64-NEXT:    s_mov_b64 exec, 0
63; WAVE64-NEXT:    exp null off, off, off, off done vm
64; WAVE64-NEXT:    s_endpgm
65;
66; GFX10-WAVE32-LABEL: test_kill_depth_0_imm_neg_x2:
67; GFX10-WAVE32:       ; %bb.0:
68; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
69; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, exec_lo
70; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB2_2
71; GFX10-WAVE32-NEXT:  ; %bb.1:
72; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
73; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, exec_lo
74; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB2_2
75; GFX10-WAVE32-NEXT:    s_endpgm
76; GFX10-WAVE32-NEXT:  .LBB2_2:
77; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
78; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
79; GFX10-WAVE32-NEXT:    s_endpgm
80;
81; GFX11-LABEL: test_kill_depth_0_imm_neg_x2:
82; GFX11:       ; %bb.0:
83; GFX11-NEXT:    s_mov_b64 s[0:1], exec
84; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
85; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], exec
86; GFX11-NEXT:    s_cbranch_scc0 .LBB2_2
87; GFX11-NEXT:  ; %bb.1:
88; GFX11-NEXT:    s_mov_b64 exec, 0
89; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
90; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], exec
91; GFX11-NEXT:    s_cbranch_scc0 .LBB2_2
92; GFX11-NEXT:    s_endpgm
93; GFX11-NEXT:  .LBB2_2:
94; GFX11-NEXT:    s_mov_b64 exec, 0
95; GFX11-NEXT:    exp mrt0 off, off, off, off done
96; GFX11-NEXT:    s_endpgm
97  call void @llvm.amdgcn.kill(i1 false)
98  call void @llvm.amdgcn.kill(i1 false)
99  ret void
100}
101
102define amdgpu_ps void @test_kill_depth_var(float %x) #0 {
103; WAVE64-LABEL: test_kill_depth_var:
104; WAVE64:       ; %bb.0:
105; WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
106; WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
107; WAVE64-NEXT:    s_cbranch_scc0 .LBB3_1
108; WAVE64-NEXT:    s_endpgm
109; WAVE64-NEXT:  .LBB3_1:
110; WAVE64-NEXT:    s_mov_b64 exec, 0
111; WAVE64-NEXT:    exp null off, off, off, off done vm
112; WAVE64-NEXT:    s_endpgm
113;
114; GFX10-WAVE32-LABEL: test_kill_depth_var:
115; GFX10-WAVE32:       ; %bb.0:
116; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
117; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
118; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB3_1
119; GFX10-WAVE32-NEXT:    s_endpgm
120; GFX10-WAVE32-NEXT:  .LBB3_1:
121; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
122; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
123; GFX10-WAVE32-NEXT:    s_endpgm
124;
125; GFX11-LABEL: test_kill_depth_var:
126; GFX11:       ; %bb.0:
127; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
128; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
129; GFX11-NEXT:    s_cbranch_scc0 .LBB3_1
130; GFX11-NEXT:    s_endpgm
131; GFX11-NEXT:  .LBB3_1:
132; GFX11-NEXT:    s_mov_b64 exec, 0
133; GFX11-NEXT:    exp mrt0 off, off, off, off done
134; GFX11-NEXT:    s_endpgm
135  %cmp = fcmp olt float %x, 0.0
136  call void @llvm.amdgcn.kill(i1 %cmp)
137  ret void
138}
139
140; FIXME: Ideally only one early-exit would be emitted
141define amdgpu_ps void @test_kill_depth_var_x2_same(float %x) #0 {
142; SI-LABEL: test_kill_depth_var_x2_same:
143; SI:       ; %bb.0:
144; SI-NEXT:    s_mov_b64 s[0:1], exec
145; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
146; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
147; SI-NEXT:    s_cbranch_scc0 .LBB4_2
148; SI-NEXT:  ; %bb.1:
149; SI-NEXT:    s_andn2_b64 exec, exec, vcc
150; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
151; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
152; SI-NEXT:    s_cbranch_scc0 .LBB4_2
153; SI-NEXT:    s_endpgm
154; SI-NEXT:  .LBB4_2:
155; SI-NEXT:    s_mov_b64 exec, 0
156; SI-NEXT:    exp null off, off, off, off done vm
157; SI-NEXT:    s_endpgm
158;
159; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_same:
160; GFX10-WAVE64:       ; %bb.0:
161; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
162; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
163; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
164; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB4_2
165; GFX10-WAVE64-NEXT:  ; %bb.1:
166; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
167; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
168; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
169; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB4_2
170; GFX10-WAVE64-NEXT:    s_endpgm
171; GFX10-WAVE64-NEXT:  .LBB4_2:
172; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
173; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
174; GFX10-WAVE64-NEXT:    s_endpgm
175;
176; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_same:
177; GFX10-WAVE32:       ; %bb.0:
178; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
179; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
180; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
181; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB4_2
182; GFX10-WAVE32-NEXT:  ; %bb.1:
183; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
184; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
185; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
186; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB4_2
187; GFX10-WAVE32-NEXT:    s_endpgm
188; GFX10-WAVE32-NEXT:  .LBB4_2:
189; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
190; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
191; GFX10-WAVE32-NEXT:    s_endpgm
192;
193; GFX11-LABEL: test_kill_depth_var_x2_same:
194; GFX11:       ; %bb.0:
195; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
196; GFX11-NEXT:    s_mov_b64 s[0:1], exec
197; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
198; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
199; GFX11-NEXT:    s_cbranch_scc0 .LBB4_2
200; GFX11-NEXT:  ; %bb.1:
201; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
202; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
203; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
204; GFX11-NEXT:    s_cbranch_scc0 .LBB4_2
205; GFX11-NEXT:    s_endpgm
206; GFX11-NEXT:  .LBB4_2:
207; GFX11-NEXT:    s_mov_b64 exec, 0
208; GFX11-NEXT:    exp mrt0 off, off, off, off done
209; GFX11-NEXT:    s_endpgm
210  %cmp = fcmp olt float %x, 0.0
211  call void @llvm.amdgcn.kill(i1 %cmp)
212  call void @llvm.amdgcn.kill(i1 %cmp)
213  ret void
214}
215
216; FIXME: Ideally only one early-exit would be emitted
217define amdgpu_ps void @test_kill_depth_var_x2(float %x, float %y) #0 {
218; SI-LABEL: test_kill_depth_var_x2:
219; SI:       ; %bb.0:
220; SI-NEXT:    s_mov_b64 s[0:1], exec
221; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
222; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
223; SI-NEXT:    s_cbranch_scc0 .LBB5_2
224; SI-NEXT:  ; %bb.1:
225; SI-NEXT:    s_andn2_b64 exec, exec, vcc
226; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v1
227; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
228; SI-NEXT:    s_cbranch_scc0 .LBB5_2
229; SI-NEXT:    s_endpgm
230; SI-NEXT:  .LBB5_2:
231; SI-NEXT:    s_mov_b64 exec, 0
232; SI-NEXT:    exp null off, off, off, off done vm
233; SI-NEXT:    s_endpgm
234;
235; GFX10-WAVE64-LABEL: test_kill_depth_var_x2:
236; GFX10-WAVE64:       ; %bb.0:
237; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
238; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
239; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
240; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB5_2
241; GFX10-WAVE64-NEXT:  ; %bb.1:
242; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
243; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v1
244; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
245; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB5_2
246; GFX10-WAVE64-NEXT:    s_endpgm
247; GFX10-WAVE64-NEXT:  .LBB5_2:
248; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
249; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
250; GFX10-WAVE64-NEXT:    s_endpgm
251;
252; GFX10-WAVE32-LABEL: test_kill_depth_var_x2:
253; GFX10-WAVE32:       ; %bb.0:
254; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
255; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
256; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
257; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB5_2
258; GFX10-WAVE32-NEXT:  ; %bb.1:
259; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
260; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v1
261; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
262; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB5_2
263; GFX10-WAVE32-NEXT:    s_endpgm
264; GFX10-WAVE32-NEXT:  .LBB5_2:
265; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
266; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
267; GFX10-WAVE32-NEXT:    s_endpgm
268;
269; GFX11-LABEL: test_kill_depth_var_x2:
270; GFX11:       ; %bb.0:
271; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
272; GFX11-NEXT:    s_mov_b64 s[0:1], exec
273; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
274; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
275; GFX11-NEXT:    s_cbranch_scc0 .LBB5_2
276; GFX11-NEXT:  ; %bb.1:
277; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
278; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v1
279; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
280; GFX11-NEXT:    s_cbranch_scc0 .LBB5_2
281; GFX11-NEXT:    s_endpgm
282; GFX11-NEXT:  .LBB5_2:
283; GFX11-NEXT:    s_mov_b64 exec, 0
284; GFX11-NEXT:    exp mrt0 off, off, off, off done
285; GFX11-NEXT:    s_endpgm
286  %cmp.x = fcmp olt float %x, 0.0
287  call void @llvm.amdgcn.kill(i1 %cmp.x)
288  %cmp.y = fcmp olt float %y, 0.0
289  call void @llvm.amdgcn.kill(i1 %cmp.y)
290  ret void
291}
292
293define amdgpu_ps void @test_kill_depth_var_x2_instructions(float %x) #0 {
294; SI-LABEL: test_kill_depth_var_x2_instructions:
295; SI:       ; %bb.0:
296; SI-NEXT:    s_mov_b64 s[0:1], exec
297; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
298; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
299; SI-NEXT:    s_cbranch_scc0 .LBB6_2
300; SI-NEXT:  ; %bb.1:
301; SI-NEXT:    s_andn2_b64 exec, exec, vcc
302; SI-NEXT:    ;;#ASMSTART
303; SI-NEXT:    v_mov_b32_e64 v7, -1
304; SI-NEXT:    ;;#ASMEND
305; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
306; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
307; SI-NEXT:    s_cbranch_scc0 .LBB6_2
308; SI-NEXT:    s_endpgm
309; SI-NEXT:  .LBB6_2:
310; SI-NEXT:    s_mov_b64 exec, 0
311; SI-NEXT:    exp null off, off, off, off done vm
312; SI-NEXT:    s_endpgm
313;
314; GFX10-WAVE64-LABEL: test_kill_depth_var_x2_instructions:
315; GFX10-WAVE64:       ; %bb.0:
316; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
317; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
318; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
319; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB6_2
320; GFX10-WAVE64-NEXT:  ; %bb.1:
321; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
322; GFX10-WAVE64-NEXT:    ;;#ASMSTART
323; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v7, -1
324; GFX10-WAVE64-NEXT:    ;;#ASMEND
325; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
326; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
327; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB6_2
328; GFX10-WAVE64-NEXT:    s_endpgm
329; GFX10-WAVE64-NEXT:  .LBB6_2:
330; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
331; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
332; GFX10-WAVE64-NEXT:    s_endpgm
333;
334; GFX10-WAVE32-LABEL: test_kill_depth_var_x2_instructions:
335; GFX10-WAVE32:       ; %bb.0:
336; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
337; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
338; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
339; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB6_2
340; GFX10-WAVE32-NEXT:  ; %bb.1:
341; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
342; GFX10-WAVE32-NEXT:    ;;#ASMSTART
343; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v7, -1
344; GFX10-WAVE32-NEXT:    ;;#ASMEND
345; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v7
346; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
347; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB6_2
348; GFX10-WAVE32-NEXT:    s_endpgm
349; GFX10-WAVE32-NEXT:  .LBB6_2:
350; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
351; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
352; GFX10-WAVE32-NEXT:    s_endpgm
353;
354; GFX11-LABEL: test_kill_depth_var_x2_instructions:
355; GFX11:       ; %bb.0:
356; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
357; GFX11-NEXT:    s_mov_b64 s[0:1], exec
358; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
359; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
360; GFX11-NEXT:    s_cbranch_scc0 .LBB6_2
361; GFX11-NEXT:  ; %bb.1:
362; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
363; GFX11-NEXT:    ;;#ASMSTART
364; GFX11-NEXT:    v_mov_b32_e64 v7, -1
365; GFX11-NEXT:    ;;#ASMEND
366; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
367; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
368; GFX11-NEXT:    s_cbranch_scc0 .LBB6_2
369; GFX11-NEXT:    s_endpgm
370; GFX11-NEXT:  .LBB6_2:
371; GFX11-NEXT:    s_mov_b64 exec, 0
372; GFX11-NEXT:    exp mrt0 off, off, off, off done
373; GFX11-NEXT:    s_endpgm
374  %cmp.x = fcmp olt float %x, 0.0
375  call void @llvm.amdgcn.kill(i1 %cmp.x)
376  %y = call float asm sideeffect "v_mov_b32_e64 v7, -1", "={v7}"()
377  %cmp.y = fcmp olt float %y, 0.0
378  call void @llvm.amdgcn.kill(i1 %cmp.y)
379  ret void
380}
381
382; FIXME: why does the skip depend on the asm length in the same block?
383define amdgpu_ps float @test_kill_control_flow(i32 inreg %arg) #0 {
384; SI-LABEL: test_kill_control_flow:
385; SI:       ; %bb.0: ; %entry
386; SI-NEXT:    s_cmp_lg_u32 s0, 0
387; SI-NEXT:    s_cbranch_scc0 .LBB7_2
388; SI-NEXT:  ; %bb.1: ; %exit
389; SI-NEXT:    v_mov_b32_e32 v0, 1.0
390; SI-NEXT:    s_branch .LBB7_5
391; SI-NEXT:  .LBB7_2: ; %bb
392; SI-NEXT:    s_mov_b64 s[2:3], exec
393; SI-NEXT:    ;;#ASMSTART
394; SI-NEXT:    v_mov_b32_e64 v7, -1
395; SI-NEXT:    v_nop_e64
396; SI-NEXT:    v_nop_e64
397; SI-NEXT:    v_nop_e64
398; SI-NEXT:    v_nop_e64
399; SI-NEXT:    v_nop_e64
400; SI-NEXT:    v_nop_e64
401; SI-NEXT:    v_nop_e64
402; SI-NEXT:    v_nop_e64
403; SI-NEXT:    v_nop_e64
404; SI-NEXT:    v_nop_e64
405; SI-NEXT:    ;;#ASMEND
406; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
407; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
408; SI-NEXT:    s_cbranch_scc0 .LBB7_4
409; SI-NEXT:  ; %bb.3: ; %bb
410; SI-NEXT:    s_andn2_b64 exec, exec, vcc
411; SI-NEXT:    v_mov_b32_e32 v0, 1.0
412; SI-NEXT:    s_branch .LBB7_5
413; SI-NEXT:  .LBB7_4:
414; SI-NEXT:    s_mov_b64 exec, 0
415; SI-NEXT:    exp null off, off, off, off done vm
416; SI-NEXT:    s_endpgm
417; SI-NEXT:  .LBB7_5:
418;
419; GFX10-WAVE64-LABEL: test_kill_control_flow:
420; GFX10-WAVE64:       ; %bb.0: ; %entry
421; GFX10-WAVE64-NEXT:    s_cmp_lg_u32 s0, 0
422; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB7_2
423; GFX10-WAVE64-NEXT:  ; %bb.1: ; %exit
424; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 1.0
425; GFX10-WAVE64-NEXT:    s_branch .LBB7_5
426; GFX10-WAVE64-NEXT:  .LBB7_2: ; %bb
427; GFX10-WAVE64-NEXT:    ;;#ASMSTART
428; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v7, -1
429; GFX10-WAVE64-NEXT:    v_nop_e64
430; GFX10-WAVE64-NEXT:    v_nop_e64
431; GFX10-WAVE64-NEXT:    v_nop_e64
432; GFX10-WAVE64-NEXT:    v_nop_e64
433; GFX10-WAVE64-NEXT:    v_nop_e64
434; GFX10-WAVE64-NEXT:    v_nop_e64
435; GFX10-WAVE64-NEXT:    v_nop_e64
436; GFX10-WAVE64-NEXT:    v_nop_e64
437; GFX10-WAVE64-NEXT:    v_nop_e64
438; GFX10-WAVE64-NEXT:    v_nop_e64
439; GFX10-WAVE64-NEXT:    ;;#ASMEND
440; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
441; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
442; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
443; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB7_4
444; GFX10-WAVE64-NEXT:  ; %bb.3: ; %bb
445; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
446; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 1.0
447; GFX10-WAVE64-NEXT:    s_branch .LBB7_5
448; GFX10-WAVE64-NEXT:  .LBB7_4:
449; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
450; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
451; GFX10-WAVE64-NEXT:    s_endpgm
452; GFX10-WAVE64-NEXT:  .LBB7_5:
453;
454; GFX10-WAVE32-LABEL: test_kill_control_flow:
455; GFX10-WAVE32:       ; %bb.0: ; %entry
456; GFX10-WAVE32-NEXT:    s_cmp_lg_u32 s0, 0
457; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB7_2
458; GFX10-WAVE32-NEXT:  ; %bb.1: ; %exit
459; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 1.0
460; GFX10-WAVE32-NEXT:    s_branch .LBB7_5
461; GFX10-WAVE32-NEXT:  .LBB7_2: ; %bb
462; GFX10-WAVE32-NEXT:    ;;#ASMSTART
463; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v7, -1
464; GFX10-WAVE32-NEXT:    v_nop_e64
465; GFX10-WAVE32-NEXT:    v_nop_e64
466; GFX10-WAVE32-NEXT:    v_nop_e64
467; GFX10-WAVE32-NEXT:    v_nop_e64
468; GFX10-WAVE32-NEXT:    v_nop_e64
469; GFX10-WAVE32-NEXT:    v_nop_e64
470; GFX10-WAVE32-NEXT:    v_nop_e64
471; GFX10-WAVE32-NEXT:    v_nop_e64
472; GFX10-WAVE32-NEXT:    v_nop_e64
473; GFX10-WAVE32-NEXT:    v_nop_e64
474; GFX10-WAVE32-NEXT:    ;;#ASMEND
475; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v7
476; GFX10-WAVE32-NEXT:    s_mov_b32 s1, exec_lo
477; GFX10-WAVE32-NEXT:    s_andn2_b32 s1, s1, vcc_lo
478; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB7_4
479; GFX10-WAVE32-NEXT:  ; %bb.3: ; %bb
480; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
481; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 1.0
482; GFX10-WAVE32-NEXT:    s_branch .LBB7_5
483; GFX10-WAVE32-NEXT:  .LBB7_4:
484; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
485; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
486; GFX10-WAVE32-NEXT:    s_endpgm
487; GFX10-WAVE32-NEXT:  .LBB7_5:
488;
489; GFX11-LABEL: test_kill_control_flow:
490; GFX11:       ; %bb.0: ; %entry
491; GFX11-NEXT:    s_cmp_lg_u32 s0, 0
492; GFX11-NEXT:    s_cbranch_scc0 .LBB7_2
493; GFX11-NEXT:  ; %bb.1: ; %exit
494; GFX11-NEXT:    v_mov_b32_e32 v0, 1.0
495; GFX11-NEXT:    s_branch .LBB7_5
496; GFX11-NEXT:  .LBB7_2: ; %bb
497; GFX11-NEXT:    ;;#ASMSTART
498; GFX11-NEXT:    v_mov_b32_e64 v7, -1
499; GFX11-NEXT:    v_nop_e64
500; GFX11-NEXT:    v_nop_e64
501; GFX11-NEXT:    v_nop_e64
502; GFX11-NEXT:    v_nop_e64
503; GFX11-NEXT:    v_nop_e64
504; GFX11-NEXT:    v_nop_e64
505; GFX11-NEXT:    v_nop_e64
506; GFX11-NEXT:    v_nop_e64
507; GFX11-NEXT:    v_nop_e64
508; GFX11-NEXT:    v_nop_e64
509; GFX11-NEXT:    ;;#ASMEND
510; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
511; GFX11-NEXT:    s_mov_b64 s[2:3], exec
512; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
513; GFX11-NEXT:    s_and_not1_b64 s[2:3], s[2:3], vcc
514; GFX11-NEXT:    s_cbranch_scc0 .LBB7_4
515; GFX11-NEXT:  ; %bb.3: ; %bb
516; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
517; GFX11-NEXT:    v_mov_b32_e32 v0, 1.0
518; GFX11-NEXT:    s_branch .LBB7_5
519; GFX11-NEXT:  .LBB7_4:
520; GFX11-NEXT:    s_mov_b64 exec, 0
521; GFX11-NEXT:    exp mrt0 off, off, off, off done
522; GFX11-NEXT:    s_endpgm
523; GFX11-NEXT:  .LBB7_5:
524entry:
525  %cmp = icmp eq i32 %arg, 0
526  br i1 %cmp, label %bb, label %exit
527
528bb:
529  %var = call float asm sideeffect "v_mov_b32_e64 v7, -1
530    v_nop_e64
531    v_nop_e64
532    v_nop_e64
533    v_nop_e64
534    v_nop_e64
535    v_nop_e64
536    v_nop_e64
537    v_nop_e64
538    v_nop_e64
539    v_nop_e64", "={v7}"()
540  %cmp.var = fcmp olt float %var, 0.0
541  ; TODO: We could do an early-exit here (the branch above is uniform!)
542  call void @llvm.amdgcn.kill(i1 %cmp.var)
543  br label %exit
544
545exit:
546  ret float 1.0
547}
548
549define amdgpu_ps void @test_kill_control_flow_remainder(i32 inreg %arg) #0 {
550; SI-LABEL: test_kill_control_flow_remainder:
551; SI:       ; %bb.0: ; %entry
552; SI-NEXT:    s_cmp_lg_u32 s0, 0
553; SI-NEXT:    v_mov_b32_e32 v9, 0
554; SI-NEXT:    s_cbranch_scc1 .LBB8_3
555; SI-NEXT:  ; %bb.1: ; %bb
556; SI-NEXT:    s_mov_b64 s[2:3], exec
557; SI-NEXT:    ;;#ASMSTART
558; SI-NEXT:    v_mov_b32_e64 v7, -1
559; SI-NEXT:    v_nop_e64
560; SI-NEXT:    v_nop_e64
561; SI-NEXT:    v_nop_e64
562; SI-NEXT:    v_nop_e64
563; SI-NEXT:    v_nop_e64
564; SI-NEXT:    v_nop_e64
565; SI-NEXT:    v_nop_e64
566; SI-NEXT:    v_nop_e64
567; SI-NEXT:    v_nop_e64
568; SI-NEXT:    v_nop_e64
569; SI-NEXT:    v_nop_e64
570; SI-NEXT:    ;;#ASMEND
571; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
572; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
573; SI-NEXT:    ;;#ASMSTART
574; SI-NEXT:    v_mov_b32_e64 v8, -1
575; SI-NEXT:    ;;#ASMEND
576; SI-NEXT:    s_cbranch_scc0 .LBB8_4
577; SI-NEXT:  ; %bb.2: ; %bb
578; SI-NEXT:    s_andn2_b64 exec, exec, vcc
579; SI-NEXT:    s_mov_b32 s3, 0xf000
580; SI-NEXT:    s_mov_b32 s2, -1
581; SI-NEXT:    buffer_store_dword v8, off, s[0:3], 0
582; SI-NEXT:    s_waitcnt vmcnt(0)
583; SI-NEXT:    ;;#ASMSTART
584; SI-NEXT:    v_mov_b32_e64 v9, -2
585; SI-NEXT:    ;;#ASMEND
586; SI-NEXT:  .LBB8_3: ; %exit
587; SI-NEXT:    s_mov_b32 s3, 0xf000
588; SI-NEXT:    s_mov_b32 s2, -1
589; SI-NEXT:    buffer_store_dword v9, off, s[0:3], 0
590; SI-NEXT:    s_endpgm
591; SI-NEXT:  .LBB8_4:
592; SI-NEXT:    s_mov_b64 exec, 0
593; SI-NEXT:    exp null off, off, off, off done vm
594; SI-NEXT:    s_endpgm
595;
596; GFX10-WAVE64-LABEL: test_kill_control_flow_remainder:
597; GFX10-WAVE64:       ; %bb.0: ; %entry
598; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v9, 0
599; GFX10-WAVE64-NEXT:    s_cmp_lg_u32 s0, 0
600; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB8_2
601; GFX10-WAVE64-NEXT:  ; %bb.1: ; %exit
602; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v9, off
603; GFX10-WAVE64-NEXT:    s_endpgm
604; GFX10-WAVE64-NEXT:  .LBB8_2: ; %bb
605; GFX10-WAVE64-NEXT:    ;;#ASMSTART
606; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v7, -1
607; GFX10-WAVE64-NEXT:    v_nop_e64
608; GFX10-WAVE64-NEXT:    v_nop_e64
609; GFX10-WAVE64-NEXT:    v_nop_e64
610; GFX10-WAVE64-NEXT:    v_nop_e64
611; GFX10-WAVE64-NEXT:    v_nop_e64
612; GFX10-WAVE64-NEXT:    v_nop_e64
613; GFX10-WAVE64-NEXT:    v_nop_e64
614; GFX10-WAVE64-NEXT:    v_nop_e64
615; GFX10-WAVE64-NEXT:    v_nop_e64
616; GFX10-WAVE64-NEXT:    v_nop_e64
617; GFX10-WAVE64-NEXT:    v_nop_e64
618; GFX10-WAVE64-NEXT:    ;;#ASMEND
619; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
620; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
621; GFX10-WAVE64-NEXT:    ;;#ASMSTART
622; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v8, -1
623; GFX10-WAVE64-NEXT:    ;;#ASMEND
624; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
625; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB8_4
626; GFX10-WAVE64-NEXT:  ; %bb.3: ; %bb
627; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
628; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v8, off
629; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
630; GFX10-WAVE64-NEXT:    ;;#ASMSTART
631; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v9, -2
632; GFX10-WAVE64-NEXT:    ;;#ASMEND
633; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v9, off
634; GFX10-WAVE64-NEXT:    s_endpgm
635; GFX10-WAVE64-NEXT:  .LBB8_4:
636; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
637; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
638; GFX10-WAVE64-NEXT:    s_endpgm
639;
640; GFX10-WAVE32-LABEL: test_kill_control_flow_remainder:
641; GFX10-WAVE32:       ; %bb.0: ; %entry
642; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v9, 0
643; GFX10-WAVE32-NEXT:    s_cmp_lg_u32 s0, 0
644; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB8_2
645; GFX10-WAVE32-NEXT:  ; %bb.1: ; %exit
646; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v9, off
647; GFX10-WAVE32-NEXT:    s_endpgm
648; GFX10-WAVE32-NEXT:  .LBB8_2: ; %bb
649; GFX10-WAVE32-NEXT:    ;;#ASMSTART
650; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v7, -1
651; GFX10-WAVE32-NEXT:    v_nop_e64
652; GFX10-WAVE32-NEXT:    v_nop_e64
653; GFX10-WAVE32-NEXT:    v_nop_e64
654; GFX10-WAVE32-NEXT:    v_nop_e64
655; GFX10-WAVE32-NEXT:    v_nop_e64
656; GFX10-WAVE32-NEXT:    v_nop_e64
657; GFX10-WAVE32-NEXT:    v_nop_e64
658; GFX10-WAVE32-NEXT:    v_nop_e64
659; GFX10-WAVE32-NEXT:    v_nop_e64
660; GFX10-WAVE32-NEXT:    v_nop_e64
661; GFX10-WAVE32-NEXT:    v_nop_e64
662; GFX10-WAVE32-NEXT:    ;;#ASMEND
663; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v7
664; GFX10-WAVE32-NEXT:    s_mov_b32 s1, exec_lo
665; GFX10-WAVE32-NEXT:    ;;#ASMSTART
666; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v8, -1
667; GFX10-WAVE32-NEXT:    ;;#ASMEND
668; GFX10-WAVE32-NEXT:    s_andn2_b32 s1, s1, vcc_lo
669; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB8_4
670; GFX10-WAVE32-NEXT:  ; %bb.3: ; %bb
671; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
672; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v8, off
673; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
674; GFX10-WAVE32-NEXT:    ;;#ASMSTART
675; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v9, -2
676; GFX10-WAVE32-NEXT:    ;;#ASMEND
677; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v9, off
678; GFX10-WAVE32-NEXT:    s_endpgm
679; GFX10-WAVE32-NEXT:  .LBB8_4:
680; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
681; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
682; GFX10-WAVE32-NEXT:    s_endpgm
683;
684; GFX11-LABEL: test_kill_control_flow_remainder:
685; GFX11:       ; %bb.0: ; %entry
686; GFX11-NEXT:    v_mov_b32_e32 v9, 0
687; GFX11-NEXT:    s_cmp_lg_u32 s0, 0
688; GFX11-NEXT:    s_cbranch_scc0 .LBB8_2
689; GFX11-NEXT:  ; %bb.1: ; %exit
690; GFX11-NEXT:    global_store_b32 v[0:1], v9, off
691; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
692; GFX11-NEXT:    s_endpgm
693; GFX11-NEXT:  .LBB8_2: ; %bb
694; GFX11-NEXT:    ;;#ASMSTART
695; GFX11-NEXT:    v_mov_b32_e64 v7, -1
696; GFX11-NEXT:    v_nop_e64
697; GFX11-NEXT:    v_nop_e64
698; GFX11-NEXT:    v_nop_e64
699; GFX11-NEXT:    v_nop_e64
700; GFX11-NEXT:    v_nop_e64
701; GFX11-NEXT:    v_nop_e64
702; GFX11-NEXT:    v_nop_e64
703; GFX11-NEXT:    v_nop_e64
704; GFX11-NEXT:    v_nop_e64
705; GFX11-NEXT:    v_nop_e64
706; GFX11-NEXT:    v_nop_e64
707; GFX11-NEXT:    ;;#ASMEND
708; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
709; GFX11-NEXT:    s_mov_b64 s[2:3], exec
710; GFX11-NEXT:    ;;#ASMSTART
711; GFX11-NEXT:    v_mov_b32_e64 v8, -1
712; GFX11-NEXT:    ;;#ASMEND
713; GFX11-NEXT:    s_and_not1_b64 s[2:3], s[2:3], vcc
714; GFX11-NEXT:    s_cbranch_scc0 .LBB8_4
715; GFX11-NEXT:  ; %bb.3: ; %bb
716; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
717; GFX11-NEXT:    global_store_b32 v[0:1], v8, off dlc
718; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
719; GFX11-NEXT:    ;;#ASMSTART
720; GFX11-NEXT:    v_mov_b32_e64 v9, -2
721; GFX11-NEXT:    ;;#ASMEND
722; GFX11-NEXT:    global_store_b32 v[0:1], v9, off
723; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
724; GFX11-NEXT:    s_endpgm
725; GFX11-NEXT:  .LBB8_4:
726; GFX11-NEXT:    s_mov_b64 exec, 0
727; GFX11-NEXT:    exp mrt0 off, off, off, off done
728; GFX11-NEXT:    s_endpgm
729entry:
730  %cmp = icmp eq i32 %arg, 0
731  br i1 %cmp, label %bb, label %exit
732
733bb:
734  %var = call float asm sideeffect "v_mov_b32_e64 v7, -1
735    v_nop_e64
736    v_nop_e64
737    v_nop_e64
738    v_nop_e64
739    v_nop_e64
740    v_nop_e64
741    v_nop_e64
742    v_nop_e64
743    v_nop_e64
744    v_nop_e64
745    v_nop_e64", "={v7}"()
746  %live.across = call float asm sideeffect "v_mov_b32_e64 v8, -1", "={v8}"()
747  %cmp.var = fcmp olt float %var, 0.0
748  ; TODO: We could do an early-exit here (the branch above is uniform!)
749  call void @llvm.amdgcn.kill(i1 %cmp.var)
750  store volatile float %live.across, float addrspace(1)* undef
751  %live.out = call float asm sideeffect "v_mov_b32_e64 v9, -2", "={v9}"()
752  br label %exit
753
754exit:
755  %phi = phi float [ 0.0, %entry ], [ %live.out, %bb ]
756  store float %phi, float addrspace(1)* undef
757  ret void
758}
759
760define amdgpu_ps float @test_kill_control_flow_return(i32 inreg %arg) #0 {
761; SI-LABEL: test_kill_control_flow_return:
762; SI:       ; %bb.0: ; %entry
763; SI-NEXT:    s_cmp_eq_u32 s0, 1
764; SI-NEXT:    s_cselect_b64 s[4:5], -1, 0
765; SI-NEXT:    s_mov_b64 s[2:3], exec
766; SI-NEXT:    s_xor_b64 s[4:5], s[4:5], exec
767; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], s[4:5]
768; SI-NEXT:    s_cbranch_scc0 .LBB9_4
769; SI-NEXT:  ; %bb.1: ; %entry
770; SI-NEXT:    s_and_b64 exec, exec, s[2:3]
771; SI-NEXT:    s_cmp_lg_u32 s0, 0
772; SI-NEXT:    v_mov_b32_e32 v0, 0
773; SI-NEXT:    s_cbranch_scc0 .LBB9_3
774; SI-NEXT:  ; %bb.2: ; %exit
775; SI-NEXT:    s_branch .LBB9_5
776; SI-NEXT:  .LBB9_3: ; %bb
777; SI-NEXT:    ;;#ASMSTART
778; SI-NEXT:    v_mov_b32_e64 v7, -1
779; SI-NEXT:    v_nop_e64
780; SI-NEXT:    v_nop_e64
781; SI-NEXT:    v_nop_e64
782; SI-NEXT:    v_nop_e64
783; SI-NEXT:    v_nop_e64
784; SI-NEXT:    v_nop_e64
785; SI-NEXT:    v_nop_e64
786; SI-NEXT:    v_nop_e64
787; SI-NEXT:    v_nop_e64
788; SI-NEXT:    v_nop_e64
789; SI-NEXT:    ;;#ASMEND
790; SI-NEXT:    v_mov_b32_e32 v0, v7
791; SI-NEXT:    s_branch .LBB9_5
792; SI-NEXT:  .LBB9_4:
793; SI-NEXT:    s_mov_b64 exec, 0
794; SI-NEXT:    exp null off, off, off, off done vm
795; SI-NEXT:    s_endpgm
796; SI-NEXT:  .LBB9_5:
797;
798; GFX10-WAVE64-LABEL: test_kill_control_flow_return:
799; GFX10-WAVE64:       ; %bb.0: ; %entry
800; GFX10-WAVE64-NEXT:    s_cmp_eq_u32 s0, 1
801; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
802; GFX10-WAVE64-NEXT:    s_cselect_b64 s[4:5], -1, 0
803; GFX10-WAVE64-NEXT:    s_xor_b64 s[4:5], s[4:5], exec
804; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], s[4:5]
805; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB9_4
806; GFX10-WAVE64-NEXT:  ; %bb.1: ; %entry
807; GFX10-WAVE64-NEXT:    s_and_b64 exec, exec, s[2:3]
808; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 0
809; GFX10-WAVE64-NEXT:    s_cmp_lg_u32 s0, 0
810; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB9_3
811; GFX10-WAVE64-NEXT:  ; %bb.2: ; %exit
812; GFX10-WAVE64-NEXT:    s_branch .LBB9_5
813; GFX10-WAVE64-NEXT:  .LBB9_3: ; %bb
814; GFX10-WAVE64-NEXT:    ;;#ASMSTART
815; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v7, -1
816; GFX10-WAVE64-NEXT:    v_nop_e64
817; GFX10-WAVE64-NEXT:    v_nop_e64
818; GFX10-WAVE64-NEXT:    v_nop_e64
819; GFX10-WAVE64-NEXT:    v_nop_e64
820; GFX10-WAVE64-NEXT:    v_nop_e64
821; GFX10-WAVE64-NEXT:    v_nop_e64
822; GFX10-WAVE64-NEXT:    v_nop_e64
823; GFX10-WAVE64-NEXT:    v_nop_e64
824; GFX10-WAVE64-NEXT:    v_nop_e64
825; GFX10-WAVE64-NEXT:    v_nop_e64
826; GFX10-WAVE64-NEXT:    ;;#ASMEND
827; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, v7
828; GFX10-WAVE64-NEXT:    s_branch .LBB9_5
829; GFX10-WAVE64-NEXT:  .LBB9_4:
830; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
831; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
832; GFX10-WAVE64-NEXT:    s_endpgm
833; GFX10-WAVE64-NEXT:  .LBB9_5:
834;
835; GFX10-WAVE32-LABEL: test_kill_control_flow_return:
836; GFX10-WAVE32:       ; %bb.0: ; %entry
837; GFX10-WAVE32-NEXT:    s_cmp_eq_u32 s0, 1
838; GFX10-WAVE32-NEXT:    s_mov_b32 s1, exec_lo
839; GFX10-WAVE32-NEXT:    s_cselect_b32 s2, -1, 0
840; GFX10-WAVE32-NEXT:    s_xor_b32 s2, s2, exec_lo
841; GFX10-WAVE32-NEXT:    s_andn2_b32 s1, s1, s2
842; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB9_4
843; GFX10-WAVE32-NEXT:  ; %bb.1: ; %entry
844; GFX10-WAVE32-NEXT:    s_and_b32 exec_lo, exec_lo, s1
845; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 0
846; GFX10-WAVE32-NEXT:    s_cmp_lg_u32 s0, 0
847; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB9_3
848; GFX10-WAVE32-NEXT:  ; %bb.2: ; %exit
849; GFX10-WAVE32-NEXT:    s_branch .LBB9_5
850; GFX10-WAVE32-NEXT:  .LBB9_3: ; %bb
851; GFX10-WAVE32-NEXT:    ;;#ASMSTART
852; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v7, -1
853; GFX10-WAVE32-NEXT:    v_nop_e64
854; GFX10-WAVE32-NEXT:    v_nop_e64
855; GFX10-WAVE32-NEXT:    v_nop_e64
856; GFX10-WAVE32-NEXT:    v_nop_e64
857; GFX10-WAVE32-NEXT:    v_nop_e64
858; GFX10-WAVE32-NEXT:    v_nop_e64
859; GFX10-WAVE32-NEXT:    v_nop_e64
860; GFX10-WAVE32-NEXT:    v_nop_e64
861; GFX10-WAVE32-NEXT:    v_nop_e64
862; GFX10-WAVE32-NEXT:    v_nop_e64
863; GFX10-WAVE32-NEXT:    ;;#ASMEND
864; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, v7
865; GFX10-WAVE32-NEXT:    s_branch .LBB9_5
866; GFX10-WAVE32-NEXT:  .LBB9_4:
867; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
868; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
869; GFX10-WAVE32-NEXT:    s_endpgm
870; GFX10-WAVE32-NEXT:  .LBB9_5:
871;
872; GFX11-LABEL: test_kill_control_flow_return:
873; GFX11:       ; %bb.0: ; %entry
874; GFX11-NEXT:    s_cmp_eq_u32 s0, 1
875; GFX11-NEXT:    s_mov_b64 s[2:3], exec
876; GFX11-NEXT:    s_cselect_b64 s[4:5], -1, 0
877; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
878; GFX11-NEXT:    s_xor_b64 s[4:5], s[4:5], exec
879; GFX11-NEXT:    s_and_not1_b64 s[2:3], s[2:3], s[4:5]
880; GFX11-NEXT:    s_cbranch_scc0 .LBB9_4
881; GFX11-NEXT:  ; %bb.1: ; %entry
882; GFX11-NEXT:    s_and_b64 exec, exec, s[2:3]
883; GFX11-NEXT:    v_mov_b32_e32 v0, 0
884; GFX11-NEXT:    s_cmp_lg_u32 s0, 0
885; GFX11-NEXT:    s_cbranch_scc0 .LBB9_3
886; GFX11-NEXT:  ; %bb.2: ; %exit
887; GFX11-NEXT:    s_branch .LBB9_5
888; GFX11-NEXT:  .LBB9_3: ; %bb
889; GFX11-NEXT:    ;;#ASMSTART
890; GFX11-NEXT:    v_mov_b32_e64 v7, -1
891; GFX11-NEXT:    v_nop_e64
892; GFX11-NEXT:    v_nop_e64
893; GFX11-NEXT:    v_nop_e64
894; GFX11-NEXT:    v_nop_e64
895; GFX11-NEXT:    v_nop_e64
896; GFX11-NEXT:    v_nop_e64
897; GFX11-NEXT:    v_nop_e64
898; GFX11-NEXT:    v_nop_e64
899; GFX11-NEXT:    v_nop_e64
900; GFX11-NEXT:    v_nop_e64
901; GFX11-NEXT:    ;;#ASMEND
902; GFX11-NEXT:    v_mov_b32_e32 v0, v7
903; GFX11-NEXT:    s_branch .LBB9_5
904; GFX11-NEXT:  .LBB9_4:
905; GFX11-NEXT:    s_mov_b64 exec, 0
906; GFX11-NEXT:    exp mrt0 off, off, off, off done
907; GFX11-NEXT:    s_endpgm
908; GFX11-NEXT:  .LBB9_5:
909entry:
910  %kill = icmp eq i32 %arg, 1
911  %cmp = icmp eq i32 %arg, 0
912  call void @llvm.amdgcn.kill(i1 %kill)
913  br i1 %cmp, label %bb, label %exit
914
915bb:
916  %var = call float asm sideeffect "v_mov_b32_e64 v7, -1
917    v_nop_e64
918    v_nop_e64
919    v_nop_e64
920    v_nop_e64
921    v_nop_e64
922    v_nop_e64
923    v_nop_e64
924    v_nop_e64
925    v_nop_e64
926    v_nop_e64", "={v7}"()
927  br label %exit
928
929exit:
930  %ret = phi float [ %var, %bb ], [ 0.0, %entry ]
931  ret float %ret
932}
933
934define amdgpu_ps void @test_kill_divergent_loop(i32 %arg) #0 {
935; SI-LABEL: test_kill_divergent_loop:
936; SI:       ; %bb.0: ; %entry
937; SI-NEXT:    s_mov_b64 s[0:1], exec
938; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
939; SI-NEXT:    s_and_saveexec_b64 s[2:3], vcc
940; SI-NEXT:    s_xor_b64 s[4:5], exec, s[2:3]
941; SI-NEXT:    s_cbranch_execz .LBB10_4
942; SI-NEXT:  ; %bb.1: ; %bb.preheader
943; SI-NEXT:    s_mov_b32 s3, 0xf000
944; SI-NEXT:    s_mov_b32 s2, -1
945; SI-NEXT:  .LBB10_2: ; %bb
946; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
947; SI-NEXT:    ;;#ASMSTART
948; SI-NEXT:    v_mov_b32_e64 v7, -1
949; SI-NEXT:    v_nop_e64
950; SI-NEXT:    v_nop_e64
951; SI-NEXT:    v_nop_e64
952; SI-NEXT:    v_nop_e64
953; SI-NEXT:    v_nop_e64
954; SI-NEXT:    v_nop_e64
955; SI-NEXT:    v_nop_e64
956; SI-NEXT:    v_nop_e64
957; SI-NEXT:    v_nop_e64
958; SI-NEXT:    v_nop_e64
959; SI-NEXT:    ;;#ASMEND
960; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
961; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
962; SI-NEXT:    s_cbranch_scc0 .LBB10_5
963; SI-NEXT:  ; %bb.3: ; %bb
964; SI-NEXT:    ; in Loop: Header=BB10_2 Depth=1
965; SI-NEXT:    s_andn2_b64 exec, exec, vcc
966; SI-NEXT:    buffer_load_dword v0, off, s[0:3], 0 glc
967; SI-NEXT:    s_waitcnt vmcnt(0)
968; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
969; SI-NEXT:    s_cbranch_vccnz .LBB10_2
970; SI-NEXT:  .LBB10_4: ; %Flow1
971; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
972; SI-NEXT:    s_mov_b32 s3, 0xf000
973; SI-NEXT:    s_mov_b32 s2, -1
974; SI-NEXT:    v_mov_b32_e32 v0, 8
975; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
976; SI-NEXT:    s_waitcnt vmcnt(0)
977; SI-NEXT:    s_endpgm
978; SI-NEXT:  .LBB10_5:
979; SI-NEXT:    s_mov_b64 exec, 0
980; SI-NEXT:    exp null off, off, off, off done vm
981; SI-NEXT:    s_endpgm
982;
983; GFX10-WAVE64-LABEL: test_kill_divergent_loop:
984; GFX10-WAVE64:       ; %bb.0: ; %entry
985; GFX10-WAVE64-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
986; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
987; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[2:3], vcc
988; GFX10-WAVE64-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
989; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB10_3
990; GFX10-WAVE64-NEXT:  .LBB10_1: ; %bb
991; GFX10-WAVE64-NEXT:    ; =>This Inner Loop Header: Depth=1
992; GFX10-WAVE64-NEXT:    ;;#ASMSTART
993; GFX10-WAVE64-NEXT:    v_mov_b32_e64 v7, -1
994; GFX10-WAVE64-NEXT:    v_nop_e64
995; GFX10-WAVE64-NEXT:    v_nop_e64
996; GFX10-WAVE64-NEXT:    v_nop_e64
997; GFX10-WAVE64-NEXT:    v_nop_e64
998; GFX10-WAVE64-NEXT:    v_nop_e64
999; GFX10-WAVE64-NEXT:    v_nop_e64
1000; GFX10-WAVE64-NEXT:    v_nop_e64
1001; GFX10-WAVE64-NEXT:    v_nop_e64
1002; GFX10-WAVE64-NEXT:    v_nop_e64
1003; GFX10-WAVE64-NEXT:    v_nop_e64
1004; GFX10-WAVE64-NEXT:    ;;#ASMEND
1005; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
1006; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], vcc
1007; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB10_4
1008; GFX10-WAVE64-NEXT:  ; %bb.2: ; %bb
1009; GFX10-WAVE64-NEXT:    ; in Loop: Header=BB10_1 Depth=1
1010; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
1011; GFX10-WAVE64-NEXT:    global_load_dword v0, v[0:1], off glc dlc
1012; GFX10-WAVE64-NEXT:    s_waitcnt vmcnt(0)
1013; GFX10-WAVE64-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1014; GFX10-WAVE64-NEXT:    s_cbranch_vccnz .LBB10_1
1015; GFX10-WAVE64-NEXT:  .LBB10_3: ; %Flow1
1016; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[2:3]
1017; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 8
1018; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v0, off
1019; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
1020; GFX10-WAVE64-NEXT:    s_endpgm
1021; GFX10-WAVE64-NEXT:  .LBB10_4:
1022; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1023; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
1024; GFX10-WAVE64-NEXT:    s_endpgm
1025;
1026; GFX10-WAVE32-LABEL: test_kill_divergent_loop:
1027; GFX10-WAVE32:       ; %bb.0: ; %entry
1028; GFX10-WAVE32-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1029; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
1030; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s1, vcc_lo
1031; GFX10-WAVE32-NEXT:    s_xor_b32 s1, exec_lo, s1
1032; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB10_3
1033; GFX10-WAVE32-NEXT:  .LBB10_1: ; %bb
1034; GFX10-WAVE32-NEXT:    ; =>This Inner Loop Header: Depth=1
1035; GFX10-WAVE32-NEXT:    ;;#ASMSTART
1036; GFX10-WAVE32-NEXT:    v_mov_b32_e64 v7, -1
1037; GFX10-WAVE32-NEXT:    v_nop_e64
1038; GFX10-WAVE32-NEXT:    v_nop_e64
1039; GFX10-WAVE32-NEXT:    v_nop_e64
1040; GFX10-WAVE32-NEXT:    v_nop_e64
1041; GFX10-WAVE32-NEXT:    v_nop_e64
1042; GFX10-WAVE32-NEXT:    v_nop_e64
1043; GFX10-WAVE32-NEXT:    v_nop_e64
1044; GFX10-WAVE32-NEXT:    v_nop_e64
1045; GFX10-WAVE32-NEXT:    v_nop_e64
1046; GFX10-WAVE32-NEXT:    v_nop_e64
1047; GFX10-WAVE32-NEXT:    ;;#ASMEND
1048; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v7
1049; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, vcc_lo
1050; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB10_4
1051; GFX10-WAVE32-NEXT:  ; %bb.2: ; %bb
1052; GFX10-WAVE32-NEXT:    ; in Loop: Header=BB10_1 Depth=1
1053; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
1054; GFX10-WAVE32-NEXT:    global_load_dword v0, v[0:1], off glc dlc
1055; GFX10-WAVE32-NEXT:    s_waitcnt vmcnt(0)
1056; GFX10-WAVE32-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
1057; GFX10-WAVE32-NEXT:    s_cbranch_vccnz .LBB10_1
1058; GFX10-WAVE32-NEXT:  .LBB10_3: ; %Flow1
1059; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s1
1060; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 8
1061; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v0, off
1062; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
1063; GFX10-WAVE32-NEXT:    s_endpgm
1064; GFX10-WAVE32-NEXT:  .LBB10_4:
1065; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1066; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
1067; GFX10-WAVE32-NEXT:    s_endpgm
1068;
1069; GFX11-LABEL: test_kill_divergent_loop:
1070; GFX11:       ; %bb.0: ; %entry
1071; GFX11-NEXT:    s_mov_b64 s[0:1], exec
1072; GFX11-NEXT:    s_mov_b64 s[2:3], exec
1073; GFX11-NEXT:    v_cmpx_eq_u32_e32 0, v0
1074; GFX11-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
1075; GFX11-NEXT:    s_cbranch_execz .LBB10_3
1076; GFX11-NEXT:  .LBB10_1: ; %bb
1077; GFX11-NEXT:    ; =>This Inner Loop Header: Depth=1
1078; GFX11-NEXT:    ;;#ASMSTART
1079; GFX11-NEXT:    v_mov_b32_e64 v7, -1
1080; GFX11-NEXT:    v_nop_e64
1081; GFX11-NEXT:    v_nop_e64
1082; GFX11-NEXT:    v_nop_e64
1083; GFX11-NEXT:    v_nop_e64
1084; GFX11-NEXT:    v_nop_e64
1085; GFX11-NEXT:    v_nop_e64
1086; GFX11-NEXT:    v_nop_e64
1087; GFX11-NEXT:    v_nop_e64
1088; GFX11-NEXT:    v_nop_e64
1089; GFX11-NEXT:    v_nop_e64
1090; GFX11-NEXT:    ;;#ASMEND
1091; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v7
1092; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], vcc
1093; GFX11-NEXT:    s_cbranch_scc0 .LBB10_4
1094; GFX11-NEXT:  ; %bb.2: ; %bb
1095; GFX11-NEXT:    ; in Loop: Header=BB10_1 Depth=1
1096; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
1097; GFX11-NEXT:    global_load_b32 v0, v[0:1], off glc dlc
1098; GFX11-NEXT:    s_waitcnt vmcnt(0)
1099; GFX11-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1100; GFX11-NEXT:    s_cbranch_vccnz .LBB10_1
1101; GFX11-NEXT:  .LBB10_3: ; %Flow1
1102; GFX11-NEXT:    s_or_b64 exec, exec, s[2:3]
1103; GFX11-NEXT:    v_mov_b32_e32 v0, 8
1104; GFX11-NEXT:    global_store_b32 v[0:1], v0, off dlc
1105; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1106; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1107; GFX11-NEXT:    s_endpgm
1108; GFX11-NEXT:  .LBB10_4:
1109; GFX11-NEXT:    s_mov_b64 exec, 0
1110; GFX11-NEXT:    exp mrt0 off, off, off, off done
1111; GFX11-NEXT:    s_endpgm
1112entry:
1113  %cmp = icmp eq i32 %arg, 0
1114  br i1 %cmp, label %bb, label %exit
1115
1116bb:
1117  %var = call float asm sideeffect "v_mov_b32_e64 v7, -1
1118    v_nop_e64
1119    v_nop_e64
1120    v_nop_e64
1121    v_nop_e64
1122    v_nop_e64
1123    v_nop_e64
1124    v_nop_e64
1125    v_nop_e64
1126    v_nop_e64
1127    v_nop_e64", "={v7}"()
1128  %cmp.var = fcmp olt float %var, 0.0
1129  call void @llvm.amdgcn.kill(i1 %cmp.var)
1130  %vgpr = load volatile i32, i32 addrspace(1)* undef
1131  %loop.cond = icmp eq i32 %vgpr, 0
1132  br i1 %loop.cond, label %bb, label %exit
1133
1134exit:
1135  store volatile i32 8, i32 addrspace(1)* undef
1136  ret void
1137}
1138
1139; bug 28550
1140define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 {
1141; SI-LABEL: phi_use_def_before_kill:
1142; SI:       ; %bb.0: ; %bb
1143; SI-NEXT:    v_add_f32_e64 v1, s0, 1.0
1144; SI-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
1145; SI-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc
1146; SI-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v1
1147; SI-NEXT:    s_andn2_b64 exec, exec, vcc
1148; SI-NEXT:    s_cbranch_scc0 .LBB11_6
1149; SI-NEXT:  ; %bb.1: ; %bb
1150; SI-NEXT:    s_andn2_b64 exec, exec, vcc
1151; SI-NEXT:    s_cbranch_scc0 .LBB11_3
1152; SI-NEXT:  ; %bb.2: ; %bb8
1153; SI-NEXT:    s_mov_b32 s3, 0xf000
1154; SI-NEXT:    s_mov_b32 s2, -1
1155; SI-NEXT:    v_mov_b32_e32 v0, 8
1156; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1157; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0)
1158; SI-NEXT:    v_mov_b32_e32 v0, 4.0
1159; SI-NEXT:  .LBB11_3: ; %phibb
1160; SI-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v0
1161; SI-NEXT:    s_cbranch_vccz .LBB11_5
1162; SI-NEXT:  ; %bb.4: ; %bb10
1163; SI-NEXT:    s_mov_b32 s3, 0xf000
1164; SI-NEXT:    s_mov_b32 s2, -1
1165; SI-NEXT:    v_mov_b32_e32 v0, 9
1166; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1167; SI-NEXT:    s_waitcnt vmcnt(0)
1168; SI-NEXT:  .LBB11_5: ; %end
1169; SI-NEXT:    s_endpgm
1170; SI-NEXT:  .LBB11_6:
1171; SI-NEXT:    s_mov_b64 exec, 0
1172; SI-NEXT:    exp null off, off, off, off done vm
1173; SI-NEXT:    s_endpgm
1174;
1175; GFX10-WAVE64-LABEL: phi_use_def_before_kill:
1176; GFX10-WAVE64:       ; %bb.0: ; %bb
1177; GFX10-WAVE64-NEXT:    v_add_f32_e64 v1, s0, 1.0
1178; GFX10-WAVE64-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
1179; GFX10-WAVE64-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc
1180; GFX10-WAVE64-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v1
1181; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
1182; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB11_6
1183; GFX10-WAVE64-NEXT:  ; %bb.1: ; %bb
1184; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
1185; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB11_3
1186; GFX10-WAVE64-NEXT:  ; %bb.2: ; %bb8
1187; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v1, 8
1188; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 4.0
1189; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v1, off
1190; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
1191; GFX10-WAVE64-NEXT:  .LBB11_3: ; %phibb
1192; GFX10-WAVE64-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v0
1193; GFX10-WAVE64-NEXT:    s_cbranch_vccz .LBB11_5
1194; GFX10-WAVE64-NEXT:  ; %bb.4: ; %bb10
1195; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 9
1196; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v0, off
1197; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
1198; GFX10-WAVE64-NEXT:  .LBB11_5: ; %end
1199; GFX10-WAVE64-NEXT:    s_endpgm
1200; GFX10-WAVE64-NEXT:  .LBB11_6:
1201; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1202; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
1203; GFX10-WAVE64-NEXT:    s_endpgm
1204;
1205; GFX10-WAVE32-LABEL: phi_use_def_before_kill:
1206; GFX10-WAVE32:       ; %bb.0: ; %bb
1207; GFX10-WAVE32-NEXT:    v_add_f32_e64 v1, s0, 1.0
1208; GFX10-WAVE32-NEXT:    v_cmp_lt_f32_e32 vcc_lo, 0, v1
1209; GFX10-WAVE32-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc_lo
1210; GFX10-WAVE32-NEXT:    v_cmp_nlt_f32_e32 vcc_lo, 0, v1
1211; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
1212; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB11_6
1213; GFX10-WAVE32-NEXT:  ; %bb.1: ; %bb
1214; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
1215; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB11_3
1216; GFX10-WAVE32-NEXT:  ; %bb.2: ; %bb8
1217; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v1, 8
1218; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 4.0
1219; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v1, off
1220; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
1221; GFX10-WAVE32-NEXT:  .LBB11_3: ; %phibb
1222; GFX10-WAVE32-NEXT:    v_cmp_eq_f32_e32 vcc_lo, 0, v0
1223; GFX10-WAVE32-NEXT:    s_cbranch_vccz .LBB11_5
1224; GFX10-WAVE32-NEXT:  ; %bb.4: ; %bb10
1225; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 9
1226; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v0, off
1227; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
1228; GFX10-WAVE32-NEXT:  .LBB11_5: ; %end
1229; GFX10-WAVE32-NEXT:    s_endpgm
1230; GFX10-WAVE32-NEXT:  .LBB11_6:
1231; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1232; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
1233; GFX10-WAVE32-NEXT:    s_endpgm
1234;
1235; GFX11-LABEL: phi_use_def_before_kill:
1236; GFX11:       ; %bb.0: ; %bb
1237; GFX11-NEXT:    v_add_f32_e64 v1, s0, 1.0
1238; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1239; GFX11-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v1
1240; GFX11-NEXT:    v_cndmask_b32_e64 v0, 0, -1.0, vcc
1241; GFX11-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v1
1242; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
1243; GFX11-NEXT:    s_cbranch_scc0 .LBB11_6
1244; GFX11-NEXT:  ; %bb.1: ; %bb
1245; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
1246; GFX11-NEXT:    s_cbranch_scc0 .LBB11_3
1247; GFX11-NEXT:  ; %bb.2: ; %bb8
1248; GFX11-NEXT:    v_mov_b32_e32 v1, 8
1249; GFX11-NEXT:    v_mov_b32_e32 v0, 4.0
1250; GFX11-NEXT:    global_store_b32 v[0:1], v1, off dlc
1251; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1252; GFX11-NEXT:  .LBB11_3: ; %phibb
1253; GFX11-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v0
1254; GFX11-NEXT:    s_cbranch_vccz .LBB11_5
1255; GFX11-NEXT:  ; %bb.4: ; %bb10
1256; GFX11-NEXT:    v_mov_b32_e32 v0, 9
1257; GFX11-NEXT:    global_store_b32 v[0:1], v0, off dlc
1258; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1259; GFX11-NEXT:  .LBB11_5: ; %end
1260; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1261; GFX11-NEXT:    s_endpgm
1262; GFX11-NEXT:  .LBB11_6:
1263; GFX11-NEXT:    s_mov_b64 exec, 0
1264; GFX11-NEXT:    exp mrt0 off, off, off, off done
1265; GFX11-NEXT:    s_endpgm
1266bb:
1267  %tmp = fadd float %x, 1.000000e+00
1268  %tmp1 = fcmp olt float 0.000000e+00, %tmp
1269  %tmp2 = select i1 %tmp1, float -1.000000e+00, float 0.000000e+00
1270  %cmp.tmp2 = fcmp olt float %tmp2, 0.0
1271  call void @llvm.amdgcn.kill(i1 %cmp.tmp2)
1272  br i1 undef, label %phibb, label %bb8
1273
1274phibb:
1275  %tmp5 = phi float [ %tmp2, %bb ], [ 4.0, %bb8 ]
1276  %tmp6 = fcmp oeq float %tmp5, 0.000000e+00
1277  br i1 %tmp6, label %bb10, label %end
1278
1279bb8:
1280  store volatile i32 8, i32 addrspace(1)* undef
1281  br label %phibb
1282
1283bb10:
1284  store volatile i32 9, i32 addrspace(1)* undef
1285  br label %end
1286
1287end:
1288  ret void
1289}
1290
1291define amdgpu_ps void @no_skip_no_successors(float inreg %arg, float inreg %arg1) #0 {
1292; SI-LABEL: no_skip_no_successors:
1293; SI:       ; %bb.0: ; %bb
1294; SI-NEXT:    v_cmp_nge_f32_e64 s[4:5], s1, 0
1295; SI-NEXT:    s_and_b64 vcc, exec, s[4:5]
1296; SI-NEXT:    s_cbranch_vccz .LBB12_3
1297; SI-NEXT:  ; %bb.1: ; %bb6
1298; SI-NEXT:    s_mov_b64 s[2:3], exec
1299; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
1300; SI-NEXT:    s_cbranch_scc0 .LBB12_5
1301; SI-NEXT:  ; %bb.2: ; %bb6
1302; SI-NEXT:    s_mov_b64 exec, 0
1303; SI-NEXT:  .LBB12_3: ; %bb3
1304; SI-NEXT:    v_mov_b32_e32 v0, 0x3e7ae148
1305; SI-NEXT:    v_cmp_nge_f32_e32 vcc, s0, v0
1306; SI-NEXT:    s_and_b64 vcc, exec, vcc
1307; SI-NEXT:  ; %bb.4: ; %bb5
1308; SI-NEXT:  .LBB12_5:
1309; SI-NEXT:    s_mov_b64 exec, 0
1310; SI-NEXT:    exp null off, off, off, off done vm
1311; SI-NEXT:    s_endpgm
1312;
1313; GFX10-WAVE64-LABEL: no_skip_no_successors:
1314; GFX10-WAVE64:       ; %bb.0: ; %bb
1315; GFX10-WAVE64-NEXT:    v_cmp_nge_f32_e64 s[4:5], s1, 0
1316; GFX10-WAVE64-NEXT:    s_and_b64 vcc, exec, s[4:5]
1317; GFX10-WAVE64-NEXT:    s_cbranch_vccz .LBB12_3
1318; GFX10-WAVE64-NEXT:  ; %bb.1: ; %bb6
1319; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
1320; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
1321; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB12_5
1322; GFX10-WAVE64-NEXT:  ; %bb.2: ; %bb6
1323; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1324; GFX10-WAVE64-NEXT:  .LBB12_3: ; %bb3
1325; GFX10-WAVE64-NEXT:    v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0
1326; GFX10-WAVE64-NEXT:    s_and_b64 vcc, exec, s[0:1]
1327; GFX10-WAVE64-NEXT:  ; %bb.4: ; %bb5
1328; GFX10-WAVE64-NEXT:  .LBB12_5:
1329; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1330; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
1331; GFX10-WAVE64-NEXT:    s_endpgm
1332;
1333; GFX10-WAVE32-LABEL: no_skip_no_successors:
1334; GFX10-WAVE32:       ; %bb.0: ; %bb
1335; GFX10-WAVE32-NEXT:    v_cmp_nge_f32_e64 s1, s1, 0
1336; GFX10-WAVE32-NEXT:    s_and_b32 vcc_lo, exec_lo, s1
1337; GFX10-WAVE32-NEXT:    s_cbranch_vccz .LBB12_3
1338; GFX10-WAVE32-NEXT:  ; %bb.1: ; %bb6
1339; GFX10-WAVE32-NEXT:    s_mov_b32 s2, exec_lo
1340; GFX10-WAVE32-NEXT:    s_andn2_b32 s2, s2, exec_lo
1341; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB12_5
1342; GFX10-WAVE32-NEXT:  ; %bb.2: ; %bb6
1343; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1344; GFX10-WAVE32-NEXT:  .LBB12_3: ; %bb3
1345; GFX10-WAVE32-NEXT:    v_cmp_nle_f32_e64 s0, 0x3e7ae148, s0
1346; GFX10-WAVE32-NEXT:    s_and_b32 vcc_lo, exec_lo, s0
1347; GFX10-WAVE32-NEXT:  ; %bb.4: ; %bb5
1348; GFX10-WAVE32-NEXT:  .LBB12_5:
1349; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1350; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
1351; GFX10-WAVE32-NEXT:    s_endpgm
1352;
1353; GFX11-LABEL: no_skip_no_successors:
1354; GFX11:       ; %bb.0: ; %bb
1355; GFX11-NEXT:    v_cmp_nge_f32_e64 s[4:5], s1, 0
1356; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1357; GFX11-NEXT:    s_and_b64 vcc, exec, s[4:5]
1358; GFX11-NEXT:    s_cbranch_vccz .LBB12_3
1359; GFX11-NEXT:  ; %bb.1: ; %bb6
1360; GFX11-NEXT:    s_mov_b64 s[2:3], exec
1361; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1362; GFX11-NEXT:    s_and_not1_b64 s[2:3], s[2:3], exec
1363; GFX11-NEXT:    s_cbranch_scc0 .LBB12_5
1364; GFX11-NEXT:  ; %bb.2: ; %bb6
1365; GFX11-NEXT:    s_mov_b64 exec, 0
1366; GFX11-NEXT:  .LBB12_3: ; %bb3
1367; GFX11-NEXT:    v_cmp_nle_f32_e64 s[0:1], 0x3e7ae148, s0
1368; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1369; GFX11-NEXT:    s_and_b64 vcc, exec, s[0:1]
1370; GFX11-NEXT:  ; %bb.4: ; %bb5
1371; GFX11-NEXT:  .LBB12_5:
1372; GFX11-NEXT:    s_mov_b64 exec, 0
1373; GFX11-NEXT:    exp mrt0 off, off, off, off done
1374; GFX11-NEXT:    s_endpgm
1375bb:
1376  %tmp = fcmp ult float %arg1, 0.000000e+00
1377  %tmp2 = fcmp ult float %arg, 0x3FCF5C2900000000
1378  br i1 %tmp, label %bb6, label %bb3
1379
1380bb3:                                              ; preds = %bb
1381  br i1 %tmp2, label %bb5, label %bb4
1382
1383bb4:                                              ; preds = %bb3
1384  br i1 true, label %bb5, label %bb7
1385
1386bb5:                                              ; preds = %bb4, %bb3
1387  unreachable
1388
1389bb6:                                              ; preds = %bb
1390  call void @llvm.amdgcn.kill(i1 false)
1391  unreachable
1392
1393bb7:                                              ; preds = %bb4
1394  ret void
1395}
1396
1397define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, float %arg3) #0 {
1398; SI-LABEL: if_after_kill_block:
1399; SI:       ; %bb.0: ; %bb
1400; SI-NEXT:    s_mov_b64 s[2:3], exec
1401; SI-NEXT:    s_wqm_b64 exec, exec
1402; SI-NEXT:    s_mov_b32 s0, 0
1403; SI-NEXT:    v_cmp_nle_f32_e32 vcc, 0, v1
1404; SI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1405; SI-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
1406; SI-NEXT:    s_cbranch_execz .LBB13_3
1407; SI-NEXT:  ; %bb.1: ; %bb3
1408; SI-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
1409; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
1410; SI-NEXT:    s_cbranch_scc0 .LBB13_6
1411; SI-NEXT:  ; %bb.2: ; %bb3
1412; SI-NEXT:    s_andn2_b64 exec, exec, vcc
1413; SI-NEXT:  .LBB13_3: ; %bb4
1414; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
1415; SI-NEXT:    s_mov_b32 s1, s0
1416; SI-NEXT:    s_mov_b32 s2, s0
1417; SI-NEXT:    s_mov_b32 s3, s0
1418; SI-NEXT:    s_mov_b32 s4, s0
1419; SI-NEXT:    s_mov_b32 s5, s0
1420; SI-NEXT:    s_mov_b32 s6, s0
1421; SI-NEXT:    s_mov_b32 s7, s0
1422; SI-NEXT:    image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10
1423; SI-NEXT:    s_waitcnt vmcnt(0)
1424; SI-NEXT:    v_cmp_neq_f32_e32 vcc, 0, v0
1425; SI-NEXT:    s_and_saveexec_b64 s[0:1], vcc
1426; SI-NEXT:    s_cbranch_execz .LBB13_5
1427; SI-NEXT:  ; %bb.4: ; %bb8
1428; SI-NEXT:    s_mov_b32 s3, 0xf000
1429; SI-NEXT:    s_mov_b32 s2, -1
1430; SI-NEXT:    v_mov_b32_e32 v0, 9
1431; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
1432; SI-NEXT:    s_waitcnt vmcnt(0)
1433; SI-NEXT:  .LBB13_5: ; %UnifiedReturnBlock
1434; SI-NEXT:    s_endpgm
1435; SI-NEXT:  .LBB13_6:
1436; SI-NEXT:    s_mov_b64 exec, 0
1437; SI-NEXT:    exp null off, off, off, off done vm
1438; SI-NEXT:    s_endpgm
1439;
1440; GFX10-WAVE64-LABEL: if_after_kill_block:
1441; GFX10-WAVE64:       ; %bb.0: ; %bb
1442; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
1443; GFX10-WAVE64-NEXT:    s_wqm_b64 exec, exec
1444; GFX10-WAVE64-NEXT:    v_cmp_nle_f32_e32 vcc, 0, v1
1445; GFX10-WAVE64-NEXT:    s_mov_b32 s0, 0
1446; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1447; GFX10-WAVE64-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
1448; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB13_3
1449; GFX10-WAVE64-NEXT:  ; %bb.1: ; %bb3
1450; GFX10-WAVE64-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
1451; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], vcc
1452; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB13_6
1453; GFX10-WAVE64-NEXT:  ; %bb.2: ; %bb3
1454; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, vcc
1455; GFX10-WAVE64-NEXT:  .LBB13_3: ; %bb4
1456; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[4:5]
1457; GFX10-WAVE64-NEXT:    s_mov_b32 s1, s0
1458; GFX10-WAVE64-NEXT:    s_mov_b32 s2, s0
1459; GFX10-WAVE64-NEXT:    s_mov_b32 s3, s0
1460; GFX10-WAVE64-NEXT:    s_mov_b32 s4, s0
1461; GFX10-WAVE64-NEXT:    s_mov_b32 s5, s0
1462; GFX10-WAVE64-NEXT:    s_mov_b32 s6, s0
1463; GFX10-WAVE64-NEXT:    s_mov_b32 s7, s0
1464; GFX10-WAVE64-NEXT:    image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
1465; GFX10-WAVE64-NEXT:    s_waitcnt vmcnt(0)
1466; GFX10-WAVE64-NEXT:    v_cmp_neq_f32_e32 vcc, 0, v0
1467; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[0:1], vcc
1468; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB13_5
1469; GFX10-WAVE64-NEXT:  ; %bb.4: ; %bb8
1470; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v0, 9
1471; GFX10-WAVE64-NEXT:    global_store_dword v[0:1], v0, off
1472; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
1473; GFX10-WAVE64-NEXT:  .LBB13_5: ; %UnifiedReturnBlock
1474; GFX10-WAVE64-NEXT:    s_endpgm
1475; GFX10-WAVE64-NEXT:  .LBB13_6:
1476; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1477; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
1478; GFX10-WAVE64-NEXT:    s_endpgm
1479;
1480; GFX10-WAVE32-LABEL: if_after_kill_block:
1481; GFX10-WAVE32:       ; %bb.0: ; %bb
1482; GFX10-WAVE32-NEXT:    s_mov_b32 s1, exec_lo
1483; GFX10-WAVE32-NEXT:    s_wqm_b32 exec_lo, exec_lo
1484; GFX10-WAVE32-NEXT:    v_cmp_nle_f32_e32 vcc_lo, 0, v1
1485; GFX10-WAVE32-NEXT:    s_mov_b32 s0, 0
1486; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s2, vcc_lo
1487; GFX10-WAVE32-NEXT:    s_xor_b32 s2, exec_lo, s2
1488; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB13_3
1489; GFX10-WAVE32-NEXT:  ; %bb.1: ; %bb3
1490; GFX10-WAVE32-NEXT:    v_cmp_ngt_f32_e32 vcc_lo, 0, v0
1491; GFX10-WAVE32-NEXT:    s_andn2_b32 s1, s1, vcc_lo
1492; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB13_6
1493; GFX10-WAVE32-NEXT:  ; %bb.2: ; %bb3
1494; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, vcc_lo
1495; GFX10-WAVE32-NEXT:  .LBB13_3: ; %bb4
1496; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s2
1497; GFX10-WAVE32-NEXT:    s_mov_b32 s1, s0
1498; GFX10-WAVE32-NEXT:    s_mov_b32 s2, s0
1499; GFX10-WAVE32-NEXT:    s_mov_b32 s3, s0
1500; GFX10-WAVE32-NEXT:    s_mov_b32 s4, s0
1501; GFX10-WAVE32-NEXT:    s_mov_b32 s5, s0
1502; GFX10-WAVE32-NEXT:    s_mov_b32 s6, s0
1503; GFX10-WAVE32-NEXT:    s_mov_b32 s7, s0
1504; GFX10-WAVE32-NEXT:    image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
1505; GFX10-WAVE32-NEXT:    s_waitcnt vmcnt(0)
1506; GFX10-WAVE32-NEXT:    v_cmp_neq_f32_e32 vcc_lo, 0, v0
1507; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s0, vcc_lo
1508; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB13_5
1509; GFX10-WAVE32-NEXT:  ; %bb.4: ; %bb8
1510; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v0, 9
1511; GFX10-WAVE32-NEXT:    global_store_dword v[0:1], v0, off
1512; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
1513; GFX10-WAVE32-NEXT:  .LBB13_5: ; %UnifiedReturnBlock
1514; GFX10-WAVE32-NEXT:    s_endpgm
1515; GFX10-WAVE32-NEXT:  .LBB13_6:
1516; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1517; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
1518; GFX10-WAVE32-NEXT:    s_endpgm
1519;
1520; GFX11-LABEL: if_after_kill_block:
1521; GFX11:       ; %bb.0: ; %bb
1522; GFX11-NEXT:    s_mov_b64 s[2:3], exec
1523; GFX11-NEXT:    s_wqm_b64 exec, exec
1524; GFX11-NEXT:    s_mov_b32 s0, 0
1525; GFX11-NEXT:    s_mov_b64 s[4:5], exec
1526; GFX11-NEXT:    v_cmpx_nle_f32_e32 0, v1
1527; GFX11-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
1528; GFX11-NEXT:    s_cbranch_execz .LBB13_3
1529; GFX11-NEXT:  ; %bb.1: ; %bb3
1530; GFX11-NEXT:    v_cmp_ngt_f32_e32 vcc, 0, v0
1531; GFX11-NEXT:    s_and_not1_b64 s[2:3], s[2:3], vcc
1532; GFX11-NEXT:    s_cbranch_scc0 .LBB13_6
1533; GFX11-NEXT:  ; %bb.2: ; %bb3
1534; GFX11-NEXT:    s_and_not1_b64 exec, exec, vcc
1535; GFX11-NEXT:  .LBB13_3: ; %bb4
1536; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1537; GFX11-NEXT:    s_or_b64 exec, exec, s[4:5]
1538; GFX11-NEXT:    s_mov_b32 s1, s0
1539; GFX11-NEXT:    s_mov_b32 s2, s0
1540; GFX11-NEXT:    s_mov_b32 s3, s0
1541; GFX11-NEXT:    s_mov_b32 s4, s0
1542; GFX11-NEXT:    s_mov_b32 s5, s0
1543; GFX11-NEXT:    s_mov_b32 s6, s0
1544; GFX11-NEXT:    s_mov_b32 s7, s0
1545; GFX11-NEXT:    image_sample_c v0, v[2:3], s[0:7], s[0:3] dmask:0x10 dim:SQ_RSRC_IMG_1D
1546; GFX11-NEXT:    s_mov_b64 s[0:1], exec
1547; GFX11-NEXT:    s_waitcnt vmcnt(0)
1548; GFX11-NEXT:    v_cmpx_neq_f32_e32 0, v0
1549; GFX11-NEXT:    s_cbranch_execz .LBB13_5
1550; GFX11-NEXT:  ; %bb.4: ; %bb8
1551; GFX11-NEXT:    v_mov_b32_e32 v0, 9
1552; GFX11-NEXT:    global_store_b32 v[0:1], v0, off dlc
1553; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
1554; GFX11-NEXT:  .LBB13_5: ; %UnifiedReturnBlock
1555; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1556; GFX11-NEXT:    s_endpgm
1557; GFX11-NEXT:  .LBB13_6:
1558; GFX11-NEXT:    s_mov_b64 exec, 0
1559; GFX11-NEXT:    exp mrt0 off, off, off, off done
1560; GFX11-NEXT:    s_endpgm
1561bb:
1562  %tmp = fcmp ult float %arg1, 0.000000e+00
1563  br i1 %tmp, label %bb3, label %bb4
1564
1565bb3:                                              ; preds = %bb
1566  %cmp.arg = fcmp olt float %arg, 0.0
1567  call void @llvm.amdgcn.kill(i1 %cmp.arg)
1568  br label %bb4
1569
1570bb4:                                              ; preds = %bb3, %bb
1571  %tmp5 = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 16, float %arg2, float %arg3, <8 x i32> undef, <4 x i32> undef, i1 0, i32 0, i32 0)
1572  %tmp6 = extractelement <4 x float> %tmp5, i32 0
1573  %tmp7 = fcmp une float %tmp6, 0.000000e+00
1574  br i1 %tmp7, label %bb8, label %bb9
1575
1576bb8:                                              ; preds = %bb9, %bb4
1577  store volatile i32 9, i32 addrspace(1)* undef
1578  ret void
1579
1580bb9:                                              ; preds = %bb4
1581  ret void
1582}
1583
1584define amdgpu_ps void @cbranch_kill(i32 inreg %0, float %val0, float %val1) {
1585; SI-LABEL: cbranch_kill:
1586; SI:       ; %bb.0: ; %.entry
1587; SI-NEXT:    s_mov_b32 s4, 0
1588; SI-NEXT:    s_mov_b64 s[0:1], exec
1589; SI-NEXT:    v_mov_b32_e32 v4, 0
1590; SI-NEXT:    v_mov_b32_e32 v2, v1
1591; SI-NEXT:    v_mov_b32_e32 v3, v1
1592; SI-NEXT:    s_mov_b32 s5, s4
1593; SI-NEXT:    s_mov_b32 s6, s4
1594; SI-NEXT:    s_mov_b32 s7, s4
1595; SI-NEXT:    s_mov_b32 s8, s4
1596; SI-NEXT:    s_mov_b32 s9, s4
1597; SI-NEXT:    s_mov_b32 s10, s4
1598; SI-NEXT:    s_mov_b32 s11, s4
1599; SI-NEXT:    image_sample_l v1, v[1:4], s[4:11], s[0:3] dmask:0x1 da
1600; SI-NEXT:    s_waitcnt vmcnt(0)
1601; SI-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v1
1602; SI-NEXT:    s_and_saveexec_b64 s[2:3], vcc
1603; SI-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
1604; SI-NEXT:    s_cbranch_execz .LBB14_3
1605; SI-NEXT:  ; %bb.1: ; %kill
1606; SI-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
1607; SI-NEXT:    ; implicit-def: $vgpr0
1608; SI-NEXT:    ; implicit-def: $vgpr1
1609; SI-NEXT:    s_cbranch_scc0 .LBB14_6
1610; SI-NEXT:  ; %bb.2: ; %kill
1611; SI-NEXT:    s_mov_b64 exec, 0
1612; SI-NEXT:  .LBB14_3: ; %Flow
1613; SI-NEXT:    s_or_saveexec_b64 s[0:1], s[2:3]
1614; SI-NEXT:    ; implicit-def: $vgpr2
1615; SI-NEXT:    s_xor_b64 exec, exec, s[0:1]
1616; SI-NEXT:  ; %bb.4: ; %live
1617; SI-NEXT:    v_mul_f32_e32 v2, v0, v1
1618; SI-NEXT:  ; %bb.5: ; %export
1619; SI-NEXT:    s_or_b64 exec, exec, s[0:1]
1620; SI-NEXT:    exp mrt0 v2, v2, v2, v2 done vm
1621; SI-NEXT:    s_endpgm
1622; SI-NEXT:  .LBB14_6:
1623; SI-NEXT:    s_mov_b64 exec, 0
1624; SI-NEXT:    exp null off, off, off, off done vm
1625; SI-NEXT:    s_endpgm
1626;
1627; GFX10-WAVE64-LABEL: cbranch_kill:
1628; GFX10-WAVE64:       ; %bb.0: ; %.entry
1629; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v2, 0
1630; GFX10-WAVE64-NEXT:    s_mov_b32 s4, 0
1631; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], exec
1632; GFX10-WAVE64-NEXT:    s_mov_b32 s5, s4
1633; GFX10-WAVE64-NEXT:    s_mov_b32 s6, s4
1634; GFX10-WAVE64-NEXT:    s_mov_b32 s7, s4
1635; GFX10-WAVE64-NEXT:    s_mov_b32 s8, s4
1636; GFX10-WAVE64-NEXT:    s_mov_b32 s9, s4
1637; GFX10-WAVE64-NEXT:    s_mov_b32 s10, s4
1638; GFX10-WAVE64-NEXT:    s_mov_b32 s11, s4
1639; GFX10-WAVE64-NEXT:    image_sample_l v1, [v1, v1, v1, v2], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
1640; GFX10-WAVE64-NEXT:    s_waitcnt vmcnt(0)
1641; GFX10-WAVE64-NEXT:    v_cmp_ge_f32_e32 vcc, 0, v1
1642; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[2:3], vcc
1643; GFX10-WAVE64-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
1644; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB14_3
1645; GFX10-WAVE64-NEXT:  ; %bb.1: ; %kill
1646; GFX10-WAVE64-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
1647; GFX10-WAVE64-NEXT:    ; implicit-def: $vgpr0
1648; GFX10-WAVE64-NEXT:    ; implicit-def: $vgpr1
1649; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB14_6
1650; GFX10-WAVE64-NEXT:  ; %bb.2: ; %kill
1651; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1652; GFX10-WAVE64-NEXT:  .LBB14_3: ; %Flow
1653; GFX10-WAVE64-NEXT:    s_or_saveexec_b64 s[0:1], s[2:3]
1654; GFX10-WAVE64-NEXT:    ; implicit-def: $vgpr2
1655; GFX10-WAVE64-NEXT:    s_xor_b64 exec, exec, s[0:1]
1656; GFX10-WAVE64-NEXT:  ; %bb.4: ; %live
1657; GFX10-WAVE64-NEXT:    v_mul_f32_e32 v2, v0, v1
1658; GFX10-WAVE64-NEXT:  ; %bb.5: ; %export
1659; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[0:1]
1660; GFX10-WAVE64-NEXT:    exp mrt0 v2, v2, v2, v2 done vm
1661; GFX10-WAVE64-NEXT:    s_endpgm
1662; GFX10-WAVE64-NEXT:  .LBB14_6:
1663; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1664; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
1665; GFX10-WAVE64-NEXT:    s_endpgm
1666;
1667; GFX10-WAVE32-LABEL: cbranch_kill:
1668; GFX10-WAVE32:       ; %bb.0: ; %.entry
1669; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v2, 0
1670; GFX10-WAVE32-NEXT:    s_mov_b32 s4, 0
1671; GFX10-WAVE32-NEXT:    s_mov_b32 s0, exec_lo
1672; GFX10-WAVE32-NEXT:    s_mov_b32 s5, s4
1673; GFX10-WAVE32-NEXT:    s_mov_b32 s6, s4
1674; GFX10-WAVE32-NEXT:    s_mov_b32 s7, s4
1675; GFX10-WAVE32-NEXT:    s_mov_b32 s8, s4
1676; GFX10-WAVE32-NEXT:    s_mov_b32 s9, s4
1677; GFX10-WAVE32-NEXT:    s_mov_b32 s10, s4
1678; GFX10-WAVE32-NEXT:    s_mov_b32 s11, s4
1679; GFX10-WAVE32-NEXT:    image_sample_l v1, [v1, v1, v1, v2], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
1680; GFX10-WAVE32-NEXT:    s_waitcnt vmcnt(0)
1681; GFX10-WAVE32-NEXT:    v_cmp_ge_f32_e32 vcc_lo, 0, v1
1682; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s1, vcc_lo
1683; GFX10-WAVE32-NEXT:    s_xor_b32 s1, exec_lo, s1
1684; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB14_3
1685; GFX10-WAVE32-NEXT:  ; %bb.1: ; %kill
1686; GFX10-WAVE32-NEXT:    s_andn2_b32 s0, s0, exec_lo
1687; GFX10-WAVE32-NEXT:    ; implicit-def: $vgpr0
1688; GFX10-WAVE32-NEXT:    ; implicit-def: $vgpr1
1689; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB14_6
1690; GFX10-WAVE32-NEXT:  ; %bb.2: ; %kill
1691; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1692; GFX10-WAVE32-NEXT:  .LBB14_3: ; %Flow
1693; GFX10-WAVE32-NEXT:    s_or_saveexec_b32 s0, s1
1694; GFX10-WAVE32-NEXT:    ; implicit-def: $vgpr2
1695; GFX10-WAVE32-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
1696; GFX10-WAVE32-NEXT:  ; %bb.4: ; %live
1697; GFX10-WAVE32-NEXT:    v_mul_f32_e32 v2, v0, v1
1698; GFX10-WAVE32-NEXT:  ; %bb.5: ; %export
1699; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s0
1700; GFX10-WAVE32-NEXT:    exp mrt0 v2, v2, v2, v2 done vm
1701; GFX10-WAVE32-NEXT:    s_endpgm
1702; GFX10-WAVE32-NEXT:  .LBB14_6:
1703; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1704; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
1705; GFX10-WAVE32-NEXT:    s_endpgm
1706;
1707; GFX11-LABEL: cbranch_kill:
1708; GFX11:       ; %bb.0: ; %.entry
1709; GFX11-NEXT:    v_mov_b32_e32 v2, 0
1710; GFX11-NEXT:    s_mov_b32 s4, 0
1711; GFX11-NEXT:    s_mov_b64 s[0:1], exec
1712; GFX11-NEXT:    s_mov_b32 s5, s4
1713; GFX11-NEXT:    s_mov_b32 s6, s4
1714; GFX11-NEXT:    s_mov_b32 s7, s4
1715; GFX11-NEXT:    s_mov_b32 s8, s4
1716; GFX11-NEXT:    s_mov_b32 s9, s4
1717; GFX11-NEXT:    s_mov_b32 s10, s4
1718; GFX11-NEXT:    s_mov_b32 s11, s4
1719; GFX11-NEXT:    image_sample_l v1, [v1, v1, v1, v2], s[4:11], s[0:3] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY
1720; GFX11-NEXT:    s_mov_b64 s[2:3], exec
1721; GFX11-NEXT:    s_waitcnt vmcnt(0)
1722; GFX11-NEXT:    v_cmpx_ge_f32_e32 0, v1
1723; GFX11-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
1724; GFX11-NEXT:    s_cbranch_execz .LBB14_3
1725; GFX11-NEXT:  ; %bb.1: ; %kill
1726; GFX11-NEXT:    s_and_not1_b64 s[0:1], s[0:1], exec
1727; GFX11-NEXT:    ; implicit-def: $vgpr0
1728; GFX11-NEXT:    ; implicit-def: $vgpr1
1729; GFX11-NEXT:    s_cbranch_scc0 .LBB14_6
1730; GFX11-NEXT:  ; %bb.2: ; %kill
1731; GFX11-NEXT:    s_mov_b64 exec, 0
1732; GFX11-NEXT:  .LBB14_3: ; %Flow
1733; GFX11-NEXT:    s_or_saveexec_b64 s[0:1], s[2:3]
1734; GFX11-NEXT:    ; implicit-def: $vgpr2
1735; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
1736; GFX11-NEXT:    s_xor_b64 exec, exec, s[0:1]
1737; GFX11-NEXT:  ; %bb.4: ; %live
1738; GFX11-NEXT:    v_mul_f32_e32 v2, v0, v1
1739; GFX11-NEXT:  ; %bb.5: ; %export
1740; GFX11-NEXT:    s_or_b64 exec, exec, s[0:1]
1741; GFX11-NEXT:    exp mrt0 v2, v2, v2, v2 done
1742; GFX11-NEXT:    s_endpgm
1743; GFX11-NEXT:  .LBB14_6:
1744; GFX11-NEXT:    s_mov_b64 exec, 0
1745; GFX11-NEXT:    exp mrt0 off, off, off, off done
1746; GFX11-NEXT:    s_endpgm
1747.entry:
1748  %sample = call float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 1, float %val1, float %val1, float %val1, float 0.000000e+00, <8 x i32> undef, <4 x i32> undef, i1 false, i32 0, i32 0)
1749  %cond0 = fcmp ugt float %sample, 0.000000e+00
1750  br i1 %cond0, label %live, label %kill
1751
1752kill:
1753  call void @llvm.amdgcn.kill(i1 false)
1754  br label %export
1755
1756live:
1757  %scale = fmul reassoc nnan nsz arcp contract float %val0, %sample
1758  br label %export
1759
1760export:
1761  %proxy = phi float [ undef, %kill ], [ %scale, %live ]
1762  call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %proxy, float %proxy, float %proxy, float %proxy, i1 immarg true, i1 immarg true) #3
1763  ret void
1764}
1765
1766
1767define amdgpu_ps void @complex_loop(i32 inreg %cmpa, i32 %cmpb, i32 %cmpc) {
1768; SI-LABEL: complex_loop:
1769; SI:       ; %bb.0: ; %.entry
1770; SI-NEXT:    s_cmp_lt_i32 s0, 1
1771; SI-NEXT:    s_cbranch_scc1 .LBB15_7
1772; SI-NEXT:  ; %bb.1: ; %.lr.ph
1773; SI-NEXT:    s_mov_b64 s[2:3], exec
1774; SI-NEXT:    s_mov_b32 s6, 0
1775; SI-NEXT:    s_mov_b64 s[0:1], 0
1776; SI-NEXT:    s_branch .LBB15_3
1777; SI-NEXT:  .LBB15_2: ; %latch
1778; SI-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1779; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
1780; SI-NEXT:    s_add_i32 s6, s6, 1
1781; SI-NEXT:    v_cmp_ge_i32_e32 vcc, s6, v1
1782; SI-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
1783; SI-NEXT:    v_mov_b32_e32 v2, s6
1784; SI-NEXT:    s_andn2_b64 exec, exec, s[0:1]
1785; SI-NEXT:    s_cbranch_execz .LBB15_6
1786; SI-NEXT:  .LBB15_3: ; %hdr
1787; SI-NEXT:    ; =>This Inner Loop Header: Depth=1
1788; SI-NEXT:    v_cmp_gt_u32_e32 vcc, s6, v0
1789; SI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1790; SI-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
1791; SI-NEXT:    s_cbranch_execz .LBB15_2
1792; SI-NEXT:  ; %bb.4: ; %kill
1793; SI-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1794; SI-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
1795; SI-NEXT:    s_cbranch_scc0 .LBB15_8
1796; SI-NEXT:  ; %bb.5: ; %kill
1797; SI-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1798; SI-NEXT:    s_mov_b64 exec, 0
1799; SI-NEXT:    s_branch .LBB15_2
1800; SI-NEXT:  .LBB15_6: ; %Flow
1801; SI-NEXT:    s_or_b64 exec, exec, s[0:1]
1802; SI-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1803; SI-NEXT:    s_endpgm
1804; SI-NEXT:  .LBB15_7:
1805; SI-NEXT:    v_mov_b32_e32 v2, -1
1806; SI-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1807; SI-NEXT:    s_endpgm
1808; SI-NEXT:  .LBB15_8:
1809; SI-NEXT:    s_mov_b64 exec, 0
1810; SI-NEXT:    exp null off, off, off, off done vm
1811; SI-NEXT:    s_endpgm
1812;
1813; GFX10-WAVE64-LABEL: complex_loop:
1814; GFX10-WAVE64:       ; %bb.0: ; %.entry
1815; GFX10-WAVE64-NEXT:    s_cmp_lt_i32 s0, 1
1816; GFX10-WAVE64-NEXT:    s_cbranch_scc1 .LBB15_7
1817; GFX10-WAVE64-NEXT:  ; %bb.1: ; %.lr.ph
1818; GFX10-WAVE64-NEXT:    s_mov_b64 s[2:3], exec
1819; GFX10-WAVE64-NEXT:    s_mov_b32 s6, 0
1820; GFX10-WAVE64-NEXT:    s_mov_b64 s[0:1], 0
1821; GFX10-WAVE64-NEXT:    s_branch .LBB15_3
1822; GFX10-WAVE64-NEXT:  .LBB15_2: ; %latch
1823; GFX10-WAVE64-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1824; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[4:5]
1825; GFX10-WAVE64-NEXT:    s_add_i32 s6, s6, 1
1826; GFX10-WAVE64-NEXT:    v_cmp_ge_i32_e32 vcc, s6, v1
1827; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v2, s6
1828; GFX10-WAVE64-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
1829; GFX10-WAVE64-NEXT:    s_andn2_b64 exec, exec, s[0:1]
1830; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB15_6
1831; GFX10-WAVE64-NEXT:  .LBB15_3: ; %hdr
1832; GFX10-WAVE64-NEXT:    ; =>This Inner Loop Header: Depth=1
1833; GFX10-WAVE64-NEXT:    v_cmp_gt_u32_e32 vcc, s6, v0
1834; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1835; GFX10-WAVE64-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
1836; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB15_2
1837; GFX10-WAVE64-NEXT:  ; %bb.4: ; %kill
1838; GFX10-WAVE64-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1839; GFX10-WAVE64-NEXT:    s_andn2_b64 s[2:3], s[2:3], exec
1840; GFX10-WAVE64-NEXT:    s_cbranch_scc0 .LBB15_8
1841; GFX10-WAVE64-NEXT:  ; %bb.5: ; %kill
1842; GFX10-WAVE64-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1843; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1844; GFX10-WAVE64-NEXT:    s_branch .LBB15_2
1845; GFX10-WAVE64-NEXT:  .LBB15_6: ; %Flow
1846; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[0:1]
1847; GFX10-WAVE64-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1848; GFX10-WAVE64-NEXT:    s_endpgm
1849; GFX10-WAVE64-NEXT:  .LBB15_7:
1850; GFX10-WAVE64-NEXT:    v_mov_b32_e32 v2, -1
1851; GFX10-WAVE64-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1852; GFX10-WAVE64-NEXT:    s_endpgm
1853; GFX10-WAVE64-NEXT:  .LBB15_8:
1854; GFX10-WAVE64-NEXT:    s_mov_b64 exec, 0
1855; GFX10-WAVE64-NEXT:    exp null off, off, off, off done vm
1856; GFX10-WAVE64-NEXT:    s_endpgm
1857;
1858; GFX10-WAVE32-LABEL: complex_loop:
1859; GFX10-WAVE32:       ; %bb.0: ; %.entry
1860; GFX10-WAVE32-NEXT:    s_cmp_lt_i32 s0, 1
1861; GFX10-WAVE32-NEXT:    s_cbranch_scc1 .LBB15_7
1862; GFX10-WAVE32-NEXT:  ; %bb.1: ; %.lr.ph
1863; GFX10-WAVE32-NEXT:    s_mov_b32 s1, exec_lo
1864; GFX10-WAVE32-NEXT:    s_mov_b32 s0, 0
1865; GFX10-WAVE32-NEXT:    s_mov_b32 s2, 0
1866; GFX10-WAVE32-NEXT:    s_branch .LBB15_3
1867; GFX10-WAVE32-NEXT:  .LBB15_2: ; %latch
1868; GFX10-WAVE32-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1869; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s3
1870; GFX10-WAVE32-NEXT:    s_add_i32 s2, s2, 1
1871; GFX10-WAVE32-NEXT:    v_cmp_ge_i32_e32 vcc_lo, s2, v1
1872; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v2, s2
1873; GFX10-WAVE32-NEXT:    s_or_b32 s0, vcc_lo, s0
1874; GFX10-WAVE32-NEXT:    s_andn2_b32 exec_lo, exec_lo, s0
1875; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB15_6
1876; GFX10-WAVE32-NEXT:  .LBB15_3: ; %hdr
1877; GFX10-WAVE32-NEXT:    ; =>This Inner Loop Header: Depth=1
1878; GFX10-WAVE32-NEXT:    v_cmp_gt_u32_e32 vcc_lo, s2, v0
1879; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s3, vcc_lo
1880; GFX10-WAVE32-NEXT:    s_xor_b32 s3, exec_lo, s3
1881; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB15_2
1882; GFX10-WAVE32-NEXT:  ; %bb.4: ; %kill
1883; GFX10-WAVE32-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1884; GFX10-WAVE32-NEXT:    s_andn2_b32 s1, s1, exec_lo
1885; GFX10-WAVE32-NEXT:    s_cbranch_scc0 .LBB15_8
1886; GFX10-WAVE32-NEXT:  ; %bb.5: ; %kill
1887; GFX10-WAVE32-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1888; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1889; GFX10-WAVE32-NEXT:    s_branch .LBB15_2
1890; GFX10-WAVE32-NEXT:  .LBB15_6: ; %Flow
1891; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s0
1892; GFX10-WAVE32-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1893; GFX10-WAVE32-NEXT:    s_endpgm
1894; GFX10-WAVE32-NEXT:  .LBB15_7:
1895; GFX10-WAVE32-NEXT:    v_mov_b32_e32 v2, -1
1896; GFX10-WAVE32-NEXT:    exp mrt0 v2, v2, v0, v0 done vm
1897; GFX10-WAVE32-NEXT:    s_endpgm
1898; GFX10-WAVE32-NEXT:  .LBB15_8:
1899; GFX10-WAVE32-NEXT:    s_mov_b32 exec_lo, 0
1900; GFX10-WAVE32-NEXT:    exp null off, off, off, off done vm
1901; GFX10-WAVE32-NEXT:    s_endpgm
1902;
1903; GFX11-LABEL: complex_loop:
1904; GFX11:       ; %bb.0: ; %.entry
1905; GFX11-NEXT:    s_cmp_lt_i32 s0, 1
1906; GFX11-NEXT:    s_cbranch_scc1 .LBB15_7
1907; GFX11-NEXT:  ; %bb.1: ; %.lr.ph
1908; GFX11-NEXT:    s_mov_b64 s[2:3], exec
1909; GFX11-NEXT:    s_mov_b32 s6, 0
1910; GFX11-NEXT:    s_mov_b64 s[0:1], 0
1911; GFX11-NEXT:    s_branch .LBB15_3
1912; GFX11-NEXT:  .LBB15_2: ; %latch
1913; GFX11-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1914; GFX11-NEXT:    s_or_b64 exec, exec, s[4:5]
1915; GFX11-NEXT:    s_add_i32 s6, s6, 1
1916; GFX11-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
1917; GFX11-NEXT:    v_cmp_ge_i32_e32 vcc, s6, v1
1918; GFX11-NEXT:    v_mov_b32_e32 v2, s6
1919; GFX11-NEXT:    s_or_b64 s[0:1], vcc, s[0:1]
1920; GFX11-NEXT:    s_and_not1_b64 exec, exec, s[0:1]
1921; GFX11-NEXT:    s_cbranch_execz .LBB15_6
1922; GFX11-NEXT:  .LBB15_3: ; %hdr
1923; GFX11-NEXT:    ; =>This Inner Loop Header: Depth=1
1924; GFX11-NEXT:    s_mov_b64 s[4:5], exec
1925; GFX11-NEXT:    v_cmpx_gt_u32_e64 s6, v0
1926; GFX11-NEXT:    s_xor_b64 s[4:5], exec, s[4:5]
1927; GFX11-NEXT:    s_cbranch_execz .LBB15_2
1928; GFX11-NEXT:  ; %bb.4: ; %kill
1929; GFX11-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1930; GFX11-NEXT:    s_and_not1_b64 s[2:3], s[2:3], exec
1931; GFX11-NEXT:    s_cbranch_scc0 .LBB15_8
1932; GFX11-NEXT:  ; %bb.5: ; %kill
1933; GFX11-NEXT:    ; in Loop: Header=BB15_3 Depth=1
1934; GFX11-NEXT:    s_mov_b64 exec, 0
1935; GFX11-NEXT:    s_branch .LBB15_2
1936; GFX11-NEXT:  .LBB15_6: ; %Flow
1937; GFX11-NEXT:    s_or_b64 exec, exec, s[0:1]
1938; GFX11-NEXT:    exp mrt0 v2, v2, v0, v0 done
1939; GFX11-NEXT:    s_endpgm
1940; GFX11-NEXT:  .LBB15_7:
1941; GFX11-NEXT:    v_mov_b32_e32 v2, -1
1942; GFX11-NEXT:    exp mrt0 v2, v2, v0, v0 done
1943; GFX11-NEXT:    s_endpgm
1944; GFX11-NEXT:  .LBB15_8:
1945; GFX11-NEXT:    s_mov_b64 exec, 0
1946; GFX11-NEXT:    exp mrt0 off, off, off, off done
1947; GFX11-NEXT:    s_endpgm
1948.entry:
1949  %flaga = icmp sgt i32 %cmpa, 0
1950  br i1 %flaga, label %.lr.ph, label %._crit_edge
1951
1952.lr.ph:
1953  br label %hdr
1954
1955hdr:
1956  %ctr = phi i32 [ 0, %.lr.ph ], [ %ctr.next, %latch ]
1957  %flagb = icmp ugt i32 %ctr, %cmpb
1958  br i1 %flagb, label %kill, label %latch
1959
1960kill:
1961  call void @llvm.amdgcn.kill(i1 false)
1962  br label %latch
1963
1964latch:
1965  %ctr.next = add nuw nsw i32 %ctr, 1
1966  %flagc = icmp slt i32 %ctr.next, %cmpc
1967  br i1 %flagc, label %hdr, label %._crit_edge
1968
1969._crit_edge:
1970  %tmp = phi i32 [ -1, %.entry ], [ %ctr.next, %latch ]
1971  %out = bitcast i32 %tmp to float
1972  call void @llvm.amdgcn.exp.f32(i32 immarg 0, i32 immarg 15, float %out, float %out, float undef, float undef, i1 immarg true, i1 immarg true)
1973  ret void
1974}
1975
1976define void @skip_mode_switch(i32 %arg) {
1977; SI-LABEL: skip_mode_switch:
1978; SI:       ; %bb.0: ; %entry
1979; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1980; SI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1981; SI-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1982; SI-NEXT:    s_cbranch_execz .LBB16_2
1983; SI-NEXT:  ; %bb.1: ; %bb.0
1984; SI-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
1985; SI-NEXT:  .LBB16_2: ; %bb.1
1986; SI-NEXT:    s_or_b64 exec, exec, s[4:5]
1987; SI-NEXT:    s_setpc_b64 s[30:31]
1988;
1989; GFX10-WAVE64-LABEL: skip_mode_switch:
1990; GFX10-WAVE64:       ; %bb.0: ; %entry
1991; GFX10-WAVE64-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1992; GFX10-WAVE64-NEXT:    s_waitcnt_vscnt null, 0x0
1993; GFX10-WAVE64-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
1994; GFX10-WAVE64-NEXT:    s_and_saveexec_b64 s[4:5], vcc
1995; GFX10-WAVE64-NEXT:    s_cbranch_execz .LBB16_2
1996; GFX10-WAVE64-NEXT:  ; %bb.1: ; %bb.0
1997; GFX10-WAVE64-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
1998; GFX10-WAVE64-NEXT:  .LBB16_2: ; %bb.1
1999; GFX10-WAVE64-NEXT:    s_or_b64 exec, exec, s[4:5]
2000; GFX10-WAVE64-NEXT:    s_setpc_b64 s[30:31]
2001;
2002; GFX10-WAVE32-LABEL: skip_mode_switch:
2003; GFX10-WAVE32:       ; %bb.0: ; %entry
2004; GFX10-WAVE32-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2005; GFX10-WAVE32-NEXT:    s_waitcnt_vscnt null, 0x0
2006; GFX10-WAVE32-NEXT:    v_cmp_eq_u32_e32 vcc_lo, 0, v0
2007; GFX10-WAVE32-NEXT:    s_and_saveexec_b32 s4, vcc_lo
2008; GFX10-WAVE32-NEXT:    s_cbranch_execz .LBB16_2
2009; GFX10-WAVE32-NEXT:  ; %bb.1: ; %bb.0
2010; GFX10-WAVE32-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
2011; GFX10-WAVE32-NEXT:  .LBB16_2: ; %bb.1
2012; GFX10-WAVE32-NEXT:    s_or_b32 exec_lo, exec_lo, s4
2013; GFX10-WAVE32-NEXT:    s_setpc_b64 s[30:31]
2014;
2015; GFX11-LABEL: skip_mode_switch:
2016; GFX11:       ; %bb.0: ; %entry
2017; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
2018; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
2019; GFX11-NEXT:    s_mov_b64 s[0:1], exec
2020; GFX11-NEXT:    v_cmpx_eq_u32_e32 0, v0
2021; GFX11-NEXT:    s_cbranch_execz .LBB16_2
2022; GFX11-NEXT:  ; %bb.1: ; %bb.0
2023; GFX11-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 0, 2), 3
2024; GFX11-NEXT:  .LBB16_2: ; %bb.1
2025; GFX11-NEXT:    s_or_b64 exec, exec, s[0:1]
2026; GFX11-NEXT:    s_setpc_b64 s[30:31]
2027entry:
2028  %cmp = icmp eq i32 %arg, 0
2029  br i1 %cmp, label %bb.0, label %bb.1
2030
2031bb.0:
2032  call void @llvm.amdgcn.s.setreg(i32 2049, i32 3)
2033  br label %bb.1
2034
2035bb.1:
2036  ret void
2037}
2038
2039declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #3
2040declare float @llvm.amdgcn.image.sample.l.2darray.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #1
2041declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1
2042declare void @llvm.amdgcn.kill(i1) #0
2043
2044declare void @llvm.amdgcn.s.setreg(i32 immarg, i32)
2045
2046attributes #0 = { nounwind }
2047attributes #1 = { nounwind readonly }
2048attributes #2 = { nounwind readnone speculatable }
2049attributes #3 = { inaccessiblememonly nounwind writeonly }
2050