1; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,CI %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=fiji -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=2 -mcpu=iceland -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,VI-BUG %s
4
5; Make sure to run a GPU with the SGPR allocation bug.
6
7; GCN-LABEL: {{^}}use_vcc:
8; GCN: ; NumSgprs: 34
9; GCN: ; NumVgprs: 0
10define void @use_vcc() #1 {
11  call void asm sideeffect "", "~{vcc}" () #0
12  ret void
13}
14
15; GCN-LABEL: {{^}}indirect_use_vcc:
16; GCN: v_writelane_b32 v40, s33, 2
17; GCN: v_writelane_b32 v40, s30, 0
18; GCN: v_writelane_b32 v40, s31, 1
19; GCN: s_swappc_b64
20; GCN: v_readlane_b32 s31, v40, 1
21; GCN: v_readlane_b32 s30, v40, 0
22; GCN: v_readlane_b32 s33, v40, 2
23; GCN: s_setpc_b64 s[30:31]
24; GCN: ; NumSgprs: 36
25; GCN: ; NumVgprs: 41
26define void @indirect_use_vcc() #1 {
27  call void @use_vcc()
28  ret void
29}
30
31; GCN-LABEL: {{^}}indirect_2level_use_vcc_kernel:
32; GCN: is_dynamic_callstack = 0
33; CI: ; NumSgprs: 38
34; VI-NOBUG: ; NumSgprs: 40
35; VI-BUG: ; NumSgprs: 96
36; GCN: ; NumVgprs: 41
37define amdgpu_kernel void @indirect_2level_use_vcc_kernel(i32 addrspace(1)* %out) #0 {
38  call void @indirect_use_vcc()
39  ret void
40}
41
42; GCN-LABEL: {{^}}use_flat_scratch:
43; CI: ; NumSgprs: 36
44; VI: ; NumSgprs: 38
45; GCN: ; NumVgprs: 0
46define void @use_flat_scratch() #1 {
47  call void asm sideeffect "", "~{flat_scratch}" () #0
48  ret void
49}
50
51; GCN-LABEL: {{^}}indirect_use_flat_scratch:
52; CI: ; NumSgprs: 38
53; VI: ; NumSgprs: 40
54; GCN: ; NumVgprs: 41
55define void @indirect_use_flat_scratch() #1 {
56  call void @use_flat_scratch()
57  ret void
58}
59
60; GCN-LABEL: {{^}}indirect_2level_use_flat_scratch_kernel:
61; GCN: is_dynamic_callstack = 0
62; CI: ; NumSgprs: 38
63; VI-NOBUG: ; NumSgprs: 40
64; VI-BUG: ; NumSgprs: 96
65; GCN: ; NumVgprs: 41
66define amdgpu_kernel void @indirect_2level_use_flat_scratch_kernel(i32 addrspace(1)* %out) #0 {
67  call void @indirect_use_flat_scratch()
68  ret void
69}
70
71; GCN-LABEL: {{^}}use_10_vgpr:
72; GCN: ; NumVgprs: 10
73define void @use_10_vgpr() #1 {
74  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4}"() #0
75  call void asm sideeffect "", "~{v5},~{v6},~{v7},~{v8},~{v9}"() #0
76  ret void
77}
78
79; GCN-LABEL: {{^}}indirect_use_10_vgpr:
80; GCN: ; NumVgprs: 41
81define void @indirect_use_10_vgpr() #0 {
82  call void @use_10_vgpr()
83  ret void
84}
85
86; GCN-LABEL: {{^}}indirect_2_level_use_10_vgpr:
87; GCN: is_dynamic_callstack = 0
88; GCN: ; NumVgprs: 41
89define amdgpu_kernel void @indirect_2_level_use_10_vgpr() #0 {
90  call void @indirect_use_10_vgpr()
91  ret void
92}
93
94; GCN-LABEL: {{^}}use_50_vgpr:
95; GCN: ; NumVgprs: 50
96define void @use_50_vgpr() #1 {
97  call void asm sideeffect "", "~{v49}"() #0
98  ret void
99}
100
101; GCN-LABEL: {{^}}indirect_use_50_vgpr:
102; GCN: ; NumVgprs: 50
103define void @indirect_use_50_vgpr() #0 {
104  call void @use_50_vgpr()
105  ret void
106}
107
108; GCN-LABEL: {{^}}use_80_sgpr:
109; GCN: ; NumSgprs: 80
110define void @use_80_sgpr() #1 {
111  call void asm sideeffect "", "~{s79}"() #0
112  ret void
113}
114
115; GCN-LABEL: {{^}}indirect_use_80_sgpr:
116; GCN: ; NumSgprs: 82
117define void @indirect_use_80_sgpr() #1 {
118  call void @use_80_sgpr()
119  ret void
120}
121
122; GCN-LABEL: {{^}}indirect_2_level_use_80_sgpr:
123; GCN: is_dynamic_callstack = 0
124; CI: ; NumSgprs: 84
125; VI-NOBUG: ; NumSgprs: 86
126; VI-BUG: ; NumSgprs: 96
127define amdgpu_kernel void @indirect_2_level_use_80_sgpr() #0 {
128  call void @indirect_use_80_sgpr()
129  ret void
130}
131
132
133; GCN-LABEL: {{^}}use_stack0:
134; GCN: ScratchSize: 2052
135define void @use_stack0() #1 {
136  %alloca = alloca [512 x i32], align 4, addrspace(5)
137  call void asm sideeffect "; use $0", "v"([512 x i32] addrspace(5)* %alloca) #0
138  ret void
139}
140
141; GCN-LABEL: {{^}}use_stack1:
142; GCN: ScratchSize: 404
143define void @use_stack1() #1 {
144  %alloca = alloca [100 x i32], align 4, addrspace(5)
145  call void asm sideeffect "; use $0", "v"([100 x i32] addrspace(5)* %alloca) #0
146  ret void
147}
148
149; GCN-LABEL: {{^}}indirect_use_stack:
150; GCN: ScratchSize: 2132
151define void @indirect_use_stack() #1 {
152  %alloca = alloca [16 x i32], align 4, addrspace(5)
153  call void asm sideeffect "; use $0", "v"([16 x i32] addrspace(5)* %alloca) #0
154  call void @use_stack0()
155  ret void
156}
157
158; GCN-LABEL: {{^}}indirect_2_level_use_stack:
159; GCN: is_dynamic_callstack = 0
160; GCN: ScratchSize: 2132
161define amdgpu_kernel void @indirect_2_level_use_stack() #0 {
162  call void @indirect_use_stack()
163  ret void
164}
165
166
167; Should be maximum of callee usage
168; GCN-LABEL: {{^}}multi_call_use_use_stack:
169; GCN: is_dynamic_callstack = 0
170; GCN: ScratchSize: 2052
171define amdgpu_kernel void @multi_call_use_use_stack() #0 {
172  call void @use_stack0()
173  call void @use_stack1()
174  ret void
175}
176
177
178declare void @external() #0
179
180; GCN-LABEL: {{^}}usage_external:
181; GCN: is_dynamic_callstack = 1
182; NumSgprs: 48
183; NumVgprs: 24
184; GCN: ScratchSize: 16384
185define amdgpu_kernel void @usage_external() #0 {
186  call void @external()
187  ret void
188}
189
190declare void @external_recurse() #2
191
192; GCN-LABEL: {{^}}usage_external_recurse:
193; GCN: is_dynamic_callstack = 1
194; NumSgprs: 48
195; NumVgprs: 24
196; GCN: ScratchSize: 16384
197define amdgpu_kernel void @usage_external_recurse() #0 {
198  call void @external_recurse()
199  ret void
200}
201
202; GCN-LABEL: {{^}}direct_recursion_use_stack:
203; GCN: ScratchSize: 18448{{$}}
204define void @direct_recursion_use_stack(i32 %val) #2 {
205  %alloca = alloca [512 x i32], align 4, addrspace(5)
206  call void asm sideeffect "; use $0", "v"([512 x i32] addrspace(5)* %alloca) #0
207  %cmp = icmp eq i32 %val, 0
208  br i1 %cmp, label %ret, label %call
209
210call:
211  %val.sub1 = sub i32 %val, 1
212  call void @direct_recursion_use_stack(i32 %val.sub1)
213  br label %ret
214
215ret:
216  ret void
217}
218
219; GCN-LABEL: {{^}}usage_direct_recursion:
220; GCN: is_ptr64 = 1
221; GCN: is_dynamic_callstack = 1
222; GCN: workitem_private_segment_byte_size = 18448{{$}}
223define amdgpu_kernel void @usage_direct_recursion(i32 %n) #0 {
224  call void @direct_recursion_use_stack(i32 %n)
225  ret void
226}
227
228; Make sure there's no assert when a sgpr96 is used.
229; GCN-LABEL: {{^}}count_use_sgpr96_external_call
230; GCN: ; sgpr96 s[{{[0-9]+}}:{{[0-9]+}}]
231; CI: NumSgprs: 84
232; VI-NOBUG: NumSgprs: 86
233; VI-BUG: NumSgprs: 96
234; GCN: NumVgprs: 50
235define amdgpu_kernel void @count_use_sgpr96_external_call()  {
236entry:
237  tail call void asm sideeffect "; sgpr96 $0", "s"(<3 x i32> <i32 10, i32 11, i32 12>) #1
238  call void @external()
239  ret void
240}
241
242; Make sure there's no assert when a sgpr160 is used.
243; GCN-LABEL: {{^}}count_use_sgpr160_external_call
244; GCN: ; sgpr160 s[{{[0-9]+}}:{{[0-9]+}}]
245; CI: NumSgprs: 84
246; VI-NOBUG: NumSgprs: 86
247; VI-BUG: NumSgprs: 96
248; GCN: NumVgprs: 50
249define amdgpu_kernel void @count_use_sgpr160_external_call()  {
250entry:
251  tail call void asm sideeffect "; sgpr160 $0", "s"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
252  call void @external()
253  ret void
254}
255
256; Make sure there's no assert when a vgpr160 is used.
257; GCN-LABEL: {{^}}count_use_vgpr160_external_call
258; GCN: ; vgpr160 v[{{[0-9]+}}:{{[0-9]+}}]
259; CI: NumSgprs: 84
260; VI-NOBUG: NumSgprs: 86
261; VI-BUG: NumSgprs: 96
262; GCN: NumVgprs: 50
263define amdgpu_kernel void @count_use_vgpr160_external_call()  {
264entry:
265  tail call void asm sideeffect "; vgpr160 $0", "v"(<5 x i32> <i32 10, i32 11, i32 12, i32 13, i32 14>) #1
266  call void @external()
267  ret void
268}
269
270attributes #0 = { nounwind noinline norecurse }
271attributes #1 = { nounwind noinline norecurse }
272attributes #2 = { nounwind noinline }
273