1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope %s
3
4; Although it's modeled without any control flow in order to get better code
5; out of the structurizer, @llvm.amdgcn.kill actually ends the thread that calls
6; it with "true". In case it's called in a provably infinite loop, we still
7; need to successfully exit and export something, even if we can't know where
8; to jump to in the LLVM IR. Therefore we insert a null export ourselves in
9; this case right before the s_endpgm to avoid GPU hangs, which is what this
10; tests.
11
12define amdgpu_ps void @return_void(float %0) #0 {
13; CHECK-LABEL: return_void:
14; CHECK:       ; %bb.0: ; %main_body
15; CHECK-NEXT:    s_mov_b64 s[0:1], exec
16; CHECK-NEXT:    s_mov_b32 s2, 0x41200000
17; CHECK-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v0
18; CHECK-NEXT:    s_and_saveexec_b64 s[2:3], vcc
19; CHECK-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
20; CHECK-NEXT:    s_cbranch_execz .LBB0_3
21; CHECK-NEXT:  .LBB0_1: ; %loop
22; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
23; CHECK-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
24; CHECK-NEXT:    s_cbranch_scc0 .LBB0_6
25; CHECK-NEXT:  ; %bb.2: ; %loop
26; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
27; CHECK-NEXT:    s_mov_b64 exec, 0
28; CHECK-NEXT:    s_mov_b64 vcc, 0
29; CHECK-NEXT:    s_branch .LBB0_1
30; CHECK-NEXT:  .LBB0_3: ; %Flow1
31; CHECK-NEXT:    s_andn2_saveexec_b64 s[0:1], s[2:3]
32; CHECK-NEXT:    s_cbranch_execz .LBB0_5
33; CHECK-NEXT:  ; %bb.4: ; %end
34; CHECK-NEXT:    v_mov_b32_e32 v0, 1.0
35; CHECK-NEXT:    v_mov_b32_e32 v1, 0
36; CHECK-NEXT:    exp mrt0 v1, v1, v1, v0 done vm
37; CHECK-NEXT:  .LBB0_5: ; %UnifiedReturnBlock
38; CHECK-NEXT:    s_endpgm
39; CHECK-NEXT:  .LBB0_6:
40; CHECK-NEXT:    s_mov_b64 exec, 0
41; CHECK-NEXT:    exp null off, off, off, off done vm
42; CHECK-NEXT:    s_endpgm
43main_body:
44  %cmp = fcmp olt float %0, 1.000000e+01
45  br i1 %cmp, label %end, label %loop
46
47loop:
48  call void @llvm.amdgcn.kill(i1 false) #3
49  br label %loop
50
51end:
52  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 0., float 0., float 0., float 1., i1 true, i1 true) #3
53  ret void
54}
55
56define amdgpu_ps void @return_void_compr(float %0) #0 {
57; CHECK-LABEL: return_void_compr:
58; CHECK:       ; %bb.0: ; %main_body
59; CHECK-NEXT:    s_mov_b64 s[0:1], exec
60; CHECK-NEXT:    s_mov_b32 s2, 0x41200000
61; CHECK-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v0
62; CHECK-NEXT:    s_and_saveexec_b64 s[2:3], vcc
63; CHECK-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
64; CHECK-NEXT:    s_cbranch_execz .LBB1_3
65; CHECK-NEXT:  .LBB1_1: ; %loop
66; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
67; CHECK-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
68; CHECK-NEXT:    s_cbranch_scc0 .LBB1_6
69; CHECK-NEXT:  ; %bb.2: ; %loop
70; CHECK-NEXT:    ; in Loop: Header=BB1_1 Depth=1
71; CHECK-NEXT:    s_mov_b64 exec, 0
72; CHECK-NEXT:    s_mov_b64 vcc, 0
73; CHECK-NEXT:    s_branch .LBB1_1
74; CHECK-NEXT:  .LBB1_3: ; %Flow1
75; CHECK-NEXT:    s_andn2_saveexec_b64 s[0:1], s[2:3]
76; CHECK-NEXT:    s_cbranch_execz .LBB1_5
77; CHECK-NEXT:  ; %bb.4: ; %end
78; CHECK-NEXT:    v_mov_b32_e32 v0, 0
79; CHECK-NEXT:    exp mrt0 v0, off, v0, off done compr vm
80; CHECK-NEXT:  .LBB1_5: ; %UnifiedReturnBlock
81; CHECK-NEXT:    s_endpgm
82; CHECK-NEXT:  .LBB1_6:
83; CHECK-NEXT:    s_mov_b64 exec, 0
84; CHECK-NEXT:    exp null off, off, off, off done vm
85; CHECK-NEXT:    s_endpgm
86main_body:
87  %cmp = fcmp olt float %0, 1.000000e+01
88  br i1 %cmp, label %end, label %loop
89
90loop:
91  call void @llvm.amdgcn.kill(i1 false) #3
92  br label %loop
93
94end:
95  call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 5, <2 x i16> < i16 0, i16 0 >, <2 x i16> < i16 0, i16 0 >, i1 true, i1 true) #3
96  ret void
97}
98
99; test the case where there's only a kill in an infinite loop
100define amdgpu_ps void @only_kill() #0 {
101; CHECK-LABEL: only_kill:
102; CHECK:       ; %bb.0: ; %main_body
103; CHECK-NEXT:    s_mov_b64 s[0:1], exec
104; CHECK-NEXT:  .LBB2_1: ; %loop
105; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
106; CHECK-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
107; CHECK-NEXT:    s_cbranch_scc0 .LBB2_3
108; CHECK-NEXT:  ; %bb.2: ; %loop
109; CHECK-NEXT:    ; in Loop: Header=BB2_1 Depth=1
110; CHECK-NEXT:    s_mov_b64 exec, 0
111; CHECK-NEXT:    s_branch .LBB2_1
112; CHECK-NEXT:  .LBB2_3:
113; CHECK-NEXT:    s_mov_b64 exec, 0
114; CHECK-NEXT:    exp null off, off, off, off done vm
115; CHECK-NEXT:    s_endpgm
116main_body:
117  br label %loop
118
119loop:
120  call void @llvm.amdgcn.kill(i1 false) #3
121  br label %loop
122}
123
124; Check that the epilog is the final block
125define amdgpu_ps float @return_nonvoid(float %0) #0 {
126; CHECK-LABEL: return_nonvoid:
127; CHECK:       ; %bb.0: ; %main_body
128; CHECK-NEXT:    s_mov_b64 s[0:1], exec
129; CHECK-NEXT:    s_mov_b32 s2, 0x41200000
130; CHECK-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v0
131; CHECK-NEXT:    s_and_saveexec_b64 s[2:3], vcc
132; CHECK-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
133; CHECK-NEXT:    s_cbranch_execz .LBB3_3
134; CHECK-NEXT:  .LBB3_1: ; %loop
135; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
136; CHECK-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
137; CHECK-NEXT:    s_cbranch_scc0 .LBB3_4
138; CHECK-NEXT:  ; %bb.2: ; %loop
139; CHECK-NEXT:    ; in Loop: Header=BB3_1 Depth=1
140; CHECK-NEXT:    s_mov_b64 exec, 0
141; CHECK-NEXT:    s_mov_b64 vcc, exec
142; CHECK-NEXT:    s_cbranch_execnz .LBB3_1
143; CHECK-NEXT:  .LBB3_3: ; %Flow1
144; CHECK-NEXT:    s_or_b64 exec, exec, s[2:3]
145; CHECK-NEXT:    v_mov_b32_e32 v0, 0
146; CHECK-NEXT:    s_branch .LBB3_5
147; CHECK-NEXT:  .LBB3_4:
148; CHECK-NEXT:    s_mov_b64 exec, 0
149; CHECK-NEXT:    exp null off, off, off, off done vm
150; CHECK-NEXT:    s_endpgm
151; CHECK-NEXT:  .LBB3_5:
152main_body:
153  %cmp = fcmp olt float %0, 1.000000e+01
154  br i1 %cmp, label %end, label %loop
155
156loop:
157  call void @llvm.amdgcn.kill(i1 false) #3
158  br label %loop
159
160end:
161  ret float 0.
162}
163
164declare void @llvm.amdgcn.kill(i1) #0
165declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #0
166declare void @llvm.amdgcn.exp.compr.v2i16(i32 immarg, i32 immarg, <2 x i16>, <2 x i16>, i1 immarg, i1 immarg) #0
167
168attributes #0 = { nounwind }
169