1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -enable-var-scope %s
3
4; Although it's modeled without any control flow in order to get better code
5; out of the structurizer, @llvm.amdgcn.kill actually ends the thread that calls
6; it with "true". In case it's called in a provably infinite loop, we still
7; need to successfully exit and export something, even if we can't know where
8; to jump to in the LLVM IR. Therefore we insert a null export ourselves in
9; this case right before the s_endpgm to avoid GPU hangs, which is what this
10; tests.
11
12define amdgpu_ps void @return_void(float %0) #0 {
13; CHECK-LABEL: return_void:
14; CHECK:       ; %bb.0: ; %main_body
15; CHECK-NEXT:    s_mov_b64 s[0:1], exec
16; CHECK-NEXT:    s_mov_b32 s2, 0x41200000
17; CHECK-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v0
18; CHECK-NEXT:    s_and_saveexec_b64 s[2:3], vcc
19; CHECK-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
20; CHECK-NEXT:    s_cbranch_execz .LBB0_3
21; CHECK-NEXT:  .LBB0_1: ; %loop
22; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
23; CHECK-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
24; CHECK-NEXT:    s_cbranch_scc0 .LBB0_6
25; CHECK-NEXT:  ; %bb.2: ; %loop
26; CHECK-NEXT:    ; in Loop: Header=BB0_1 Depth=1
27; CHECK-NEXT:    s_mov_b64 exec, 0
28; CHECK-NEXT:    s_mov_b64 vcc, 0
29; CHECK-NEXT:    s_branch .LBB0_1
30; CHECK-NEXT:  .LBB0_3: ; %Flow1
31; CHECK-NEXT:    s_or_saveexec_b64 s[0:1], s[2:3]
32; CHECK-NEXT:    s_xor_b64 exec, exec, s[0:1]
33; CHECK-NEXT:    s_cbranch_execz .LBB0_5
34; CHECK-NEXT:  ; %bb.4: ; %end
35; CHECK-NEXT:    v_mov_b32_e32 v0, 1.0
36; CHECK-NEXT:    v_mov_b32_e32 v1, 0
37; CHECK-NEXT:    exp mrt0 v1, v1, v1, v0 done vm
38; CHECK-NEXT:  .LBB0_5: ; %UnifiedReturnBlock
39; CHECK-NEXT:    s_endpgm
40; CHECK-NEXT:  .LBB0_6:
41; CHECK-NEXT:    s_mov_b64 exec, 0
42; CHECK-NEXT:    exp null off, off, off, off done vm
43; CHECK-NEXT:    s_endpgm
44main_body:
45  %cmp = fcmp olt float %0, 1.000000e+01
46  br i1 %cmp, label %end, label %loop
47
48loop:
49  call void @llvm.amdgcn.kill(i1 false) #3
50  br label %loop
51
52end:
53  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 0., float 0., float 0., float 1., i1 true, i1 true) #3
54  ret void
55}
56
57define amdgpu_ps void @return_void_compr(float %0) #0 {
58; CHECK-LABEL: return_void_compr:
59; CHECK:       ; %bb.0: ; %main_body
60; CHECK-NEXT:    s_mov_b64 s[0:1], exec
61; CHECK-NEXT:    s_mov_b32 s2, 0x41200000
62; CHECK-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v0
63; CHECK-NEXT:    s_and_saveexec_b64 s[2:3], vcc
64; CHECK-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
65; CHECK-NEXT:    s_cbranch_execz .LBB1_3
66; CHECK-NEXT:  .LBB1_1: ; %loop
67; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
68; CHECK-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
69; CHECK-NEXT:    s_cbranch_scc0 .LBB1_6
70; CHECK-NEXT:  ; %bb.2: ; %loop
71; CHECK-NEXT:    ; in Loop: Header=BB1_1 Depth=1
72; CHECK-NEXT:    s_mov_b64 exec, 0
73; CHECK-NEXT:    s_mov_b64 vcc, 0
74; CHECK-NEXT:    s_branch .LBB1_1
75; CHECK-NEXT:  .LBB1_3: ; %Flow1
76; CHECK-NEXT:    s_or_saveexec_b64 s[0:1], s[2:3]
77; CHECK-NEXT:    s_xor_b64 exec, exec, s[0:1]
78; CHECK-NEXT:    s_cbranch_execz .LBB1_5
79; CHECK-NEXT:  ; %bb.4: ; %end
80; CHECK-NEXT:    v_mov_b32_e32 v0, 0
81; CHECK-NEXT:    exp mrt0 v0, off, v0, off done compr vm
82; CHECK-NEXT:  .LBB1_5: ; %UnifiedReturnBlock
83; CHECK-NEXT:    s_endpgm
84; CHECK-NEXT:  .LBB1_6:
85; CHECK-NEXT:    s_mov_b64 exec, 0
86; CHECK-NEXT:    exp null off, off, off, off done vm
87; CHECK-NEXT:    s_endpgm
88main_body:
89  %cmp = fcmp olt float %0, 1.000000e+01
90  br i1 %cmp, label %end, label %loop
91
92loop:
93  call void @llvm.amdgcn.kill(i1 false) #3
94  br label %loop
95
96end:
97  call void @llvm.amdgcn.exp.compr.v2i16(i32 0, i32 5, <2 x i16> < i16 0, i16 0 >, <2 x i16> < i16 0, i16 0 >, i1 true, i1 true) #3
98  ret void
99}
100
101; test the case where there's only a kill in an infinite loop
102define amdgpu_ps void @only_kill() #0 {
103; CHECK-LABEL: only_kill:
104; CHECK:       ; %bb.0: ; %main_body
105; CHECK-NEXT:    s_mov_b64 s[0:1], exec
106; CHECK-NEXT:  .LBB2_1: ; %loop
107; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
108; CHECK-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
109; CHECK-NEXT:    s_cbranch_scc0 .LBB2_3
110; CHECK-NEXT:  ; %bb.2: ; %loop
111; CHECK-NEXT:    ; in Loop: Header=BB2_1 Depth=1
112; CHECK-NEXT:    s_mov_b64 exec, 0
113; CHECK-NEXT:    s_branch .LBB2_1
114; CHECK-NEXT:  .LBB2_3:
115; CHECK-NEXT:    s_mov_b64 exec, 0
116; CHECK-NEXT:    exp null off, off, off, off done vm
117; CHECK-NEXT:    s_endpgm
118main_body:
119  br label %loop
120
121loop:
122  call void @llvm.amdgcn.kill(i1 false) #3
123  br label %loop
124}
125
126; Check that the epilog is the final block
127define amdgpu_ps float @return_nonvoid(float %0) #0 {
128; CHECK-LABEL: return_nonvoid:
129; CHECK:       ; %bb.0: ; %main_body
130; CHECK-NEXT:    s_mov_b64 s[0:1], exec
131; CHECK-NEXT:    s_mov_b32 s2, 0x41200000
132; CHECK-NEXT:    v_cmp_ngt_f32_e32 vcc, s2, v0
133; CHECK-NEXT:    s_and_saveexec_b64 s[2:3], vcc
134; CHECK-NEXT:    s_xor_b64 s[2:3], exec, s[2:3]
135; CHECK-NEXT:    s_cbranch_execz .LBB3_3
136; CHECK-NEXT:  .LBB3_1: ; %loop
137; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
138; CHECK-NEXT:    s_andn2_b64 s[0:1], s[0:1], exec
139; CHECK-NEXT:    s_cbranch_scc0 .LBB3_4
140; CHECK-NEXT:  ; %bb.2: ; %loop
141; CHECK-NEXT:    ; in Loop: Header=BB3_1 Depth=1
142; CHECK-NEXT:    s_mov_b64 exec, 0
143; CHECK-NEXT:    s_mov_b64 vcc, exec
144; CHECK-NEXT:    s_cbranch_execnz .LBB3_1
145; CHECK-NEXT:  .LBB3_3: ; %Flow1
146; CHECK-NEXT:    s_or_b64 exec, exec, s[2:3]
147; CHECK-NEXT:    v_mov_b32_e32 v0, 0
148; CHECK-NEXT:    s_branch .LBB3_5
149; CHECK-NEXT:  .LBB3_4:
150; CHECK-NEXT:    s_mov_b64 exec, 0
151; CHECK-NEXT:    exp null off, off, off, off done vm
152; CHECK-NEXT:    s_endpgm
153; CHECK-NEXT:  .LBB3_5:
154main_body:
155  %cmp = fcmp olt float %0, 1.000000e+01
156  br i1 %cmp, label %end, label %loop
157
158loop:
159  call void @llvm.amdgcn.kill(i1 false) #3
160  br label %loop
161
162end:
163  ret float 0.
164}
165
166declare void @llvm.amdgcn.kill(i1) #0
167declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #0
168declare void @llvm.amdgcn.exp.compr.v2i16(i32 immarg, i32 immarg, <2 x i16>, <2 x i16>, i1 immarg, i1 immarg) #0
169
170attributes #0 = { nounwind }
171