1; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX8 -enable-var-scope %s
2; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
3; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL,GFX9 -enable-var-scope %s
4
5declare amdgpu_gfx float @extern_func(float) #0
6declare amdgpu_gfx float @extern_func_many_args(<64 x float>) #0
7
8@funcptr = external hidden unnamed_addr addrspace(4) constant void()*, align 4
9
10define amdgpu_gfx float @no_stack(float %arg0) #0 {
11  %add = fadd float %arg0, 1.0
12  ret float %add
13}
14
15define amdgpu_gfx float @simple_stack(float %arg0) #0 {
16  %stack = alloca float, i32 4, align 4, addrspace(5)
17  store volatile float 2.0, float addrspace(5)* %stack
18  %val = load volatile float, float addrspace(5)* %stack
19  %add = fadd float %arg0, %val
20  ret float %add
21}
22
23define amdgpu_gfx float @multiple_stack(float %arg0) #0 {
24  %stack = alloca float, i32 4, align 4, addrspace(5)
25  store volatile float 2.0, float addrspace(5)* %stack
26  %val = load volatile float, float addrspace(5)* %stack
27  %add = fadd float %arg0, %val
28  %stack2 = alloca float, i32 4, align 4, addrspace(5)
29  store volatile float 2.0, float addrspace(5)* %stack2
30  %val2 = load volatile float, float addrspace(5)* %stack2
31  %add2 = fadd float %add, %val2
32  ret float %add2
33}
34
35define amdgpu_gfx float @dynamic_stack(float %arg0) #0 {
36bb0:
37  %cmp = fcmp ogt float %arg0, 0.0
38  br i1 %cmp, label %bb1, label %bb2
39
40bb1:
41  %stack = alloca float, i32 4, align 4, addrspace(5)
42  store volatile float 2.0, float addrspace(5)* %stack
43  %val = load volatile float, float addrspace(5)* %stack
44  %add = fadd float %arg0, %val
45  br label %bb2
46
47bb2:
48  %res = phi float [ 0.0, %bb0 ], [ %add, %bb1 ]
49  ret float %res
50}
51
52define amdgpu_gfx float @dynamic_stack_loop(float %arg0) #0 {
53bb0:
54  br label %bb1
55
56bb1:
57  %ctr = phi i32 [ 0, %bb0 ], [ %newctr, %bb1 ]
58  %stack = alloca float, i32 4, align 4, addrspace(5)
59  store volatile float 2.0, float addrspace(5)* %stack
60  %val = load volatile float, float addrspace(5)* %stack
61  %add = fadd float %arg0, %val
62  %cmp = icmp sgt i32 %ctr, 0
63  %newctr = sub i32 %ctr, 1
64  br i1 %cmp, label %bb1, label %bb2
65
66bb2:
67  ret float %add
68}
69
70define amdgpu_gfx float @no_stack_call(float %arg0) #0 {
71  %res = call amdgpu_gfx float @simple_stack(float %arg0)
72  ret float %res
73}
74
75define amdgpu_gfx float @simple_stack_call(float %arg0) #0 {
76  %stack = alloca float, i32 4, align 4, addrspace(5)
77  store volatile float 2.0, float addrspace(5)* %stack
78  %val = load volatile float, float addrspace(5)* %stack
79  %res = call amdgpu_gfx float @simple_stack(float %arg0)
80  %add = fadd float %res, %val
81  ret float %add
82}
83
84define amdgpu_gfx float @no_stack_extern_call(float %arg0) #0 {
85  %res = call amdgpu_gfx float @extern_func(float %arg0)
86  ret float %res
87}
88
89define amdgpu_gfx float @simple_stack_extern_call(float %arg0) #0 {
90  %stack = alloca float, i32 4, align 4, addrspace(5)
91  store volatile float 2.0, float addrspace(5)* %stack
92  %val = load volatile float, float addrspace(5)* %stack
93  %res = call amdgpu_gfx float @extern_func(float %arg0)
94  %add = fadd float %res, %val
95  ret float %add
96}
97
98define amdgpu_gfx float @no_stack_extern_call_many_args(<64 x float> %arg0) #0 {
99  %res = call amdgpu_gfx float @extern_func_many_args(<64 x float> %arg0)
100  ret float %res
101}
102
103define amdgpu_gfx float @no_stack_indirect_call(float %arg0) #0 {
104  %fptr = load void()*, void()* addrspace(4)* @funcptr
105  call amdgpu_gfx void %fptr()
106  ret float %arg0
107}
108
109define amdgpu_gfx float @simple_stack_indirect_call(float %arg0) #0 {
110  %stack = alloca float, i32 4, align 4, addrspace(5)
111  store volatile float 2.0, float addrspace(5)* %stack
112  %val = load volatile float, float addrspace(5)* %stack
113  %fptr = load void()*, void()* addrspace(4)* @funcptr
114  call amdgpu_gfx void %fptr()
115  %add = fadd float %arg0, %val
116  ret float %add
117}
118
119define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 {
120  %stack = alloca float, i32 4, align 4, addrspace(5)
121  store volatile float 2.0, float addrspace(5)* %stack
122  %val = load volatile float, float addrspace(5)* %stack
123  %res = call amdgpu_gfx float @simple_stack_recurse(float %arg0)
124  %add = fadd float %res, %val
125  ret float %add
126}
127
128@lds = internal addrspace(3) global [64 x float] undef
129
130define amdgpu_gfx float @simple_lds(float %arg0) #0 {
131  %lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
132  %val = load float, float addrspace(3)* %lds_ptr
133  ret float %val
134}
135
136define amdgpu_gfx float @simple_lds_recurse(float %arg0) #0 {
137  %lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
138  %val = load float, float addrspace(3)* %lds_ptr
139  %res = call amdgpu_gfx float @simple_lds_recurse(float %val)
140  ret float %res
141}
142
143attributes #0 = { nounwind }
144
145; GCN: amdpal.pipelines:
146; GCN-NEXT:  - .registers:
147; GCN-NEXT:      0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}}
148; GCN-NEXT:      0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
149; GCN-NEXT:    .shader_functions:
150; GCN-NEXT:      dynamic_stack:
151; GCN-NEXT:        .lds_size:       0{{$}}
152; GCN-NEXT:        .sgpr_count:     0x28{{$}}
153; GCN-NEXT:        .stack_frame_size_in_bytes: 0x10{{$}}
154; SDAG-NEXT:        .vgpr_count:     0x2{{$}}
155; GISEL-NEXT:        .vgpr_count:     0x3{{$}}
156; GCN-NEXT:      dynamic_stack_loop:
157; GCN-NEXT:        .lds_size:       0{{$}}
158; SDAG-NEXT:        .sgpr_count:     0x25{{$}}
159; GISEL-NEXT:        .sgpr_count:     0x26{{$}}
160; GCN-NEXT:        .stack_frame_size_in_bytes: 0x10{{$}}
161; SDAG-NEXT:        .vgpr_count:     0x3{{$}}
162; GISEL-NEXT:        .vgpr_count:     0x4{{$}}
163; GCN-NEXT:      multiple_stack:
164; GCN-NEXT:        .lds_size:       0{{$}}
165; GCN-NEXT:        .sgpr_count:     0x21{{$}}
166; GCN-NEXT:        .stack_frame_size_in_bytes: 0x24{{$}}
167; GCN-NEXT:        .vgpr_count:     0x3{{$}}
168; GCN-NEXT:      no_stack:
169; GCN-NEXT:        .lds_size:       0{{$}}
170; GCN-NEXT:        .sgpr_count:     0x20{{$}}
171; GCN-NEXT:        .stack_frame_size_in_bytes: 0{{$}}
172; GCN-NEXT:        .vgpr_count:     0x1{{$}}
173; GCN-NEXT:      no_stack_call:
174; GCN-NEXT:        .lds_size:       0{{$}}
175; GCN-NEXT:        .sgpr_count:     0x24{{$}}
176; GCN-NEXT:        .stack_frame_size_in_bytes: 0x10{{$}}
177; GCN-NEXT:        .vgpr_count:     0x3{{$}}
178; GCN-NEXT:      no_stack_extern_call:
179; GCN-NEXT:        .lds_size:       0{{$}}
180; GFX8-NEXT:        .sgpr_count:     0x28{{$}}
181; GFX9-NEXT:        .sgpr_count:     0x2c{{$}}
182; GCN-NEXT:        .stack_frame_size_in_bytes: 0x10{{$}}
183; GCN-NEXT:        .vgpr_count:    0x2b{{$}}
184; GCN-NEXT:      no_stack_extern_call_many_args:
185; GCN-NEXT:        .lds_size:       0{{$}}
186; GFX8-NEXT:        .sgpr_count:     0x28{{$}}
187; GFX9-NEXT:        .sgpr_count:     0x2c{{$}}
188; GCN-NEXT:        .stack_frame_size_in_bytes: 0x90{{$}}
189; GCN-NEXT:        .vgpr_count:     0x2b{{$}}
190; GCN-NEXT:      no_stack_indirect_call:
191; GCN-NEXT:        .lds_size:       0{{$}}
192; GFX8-NEXT:        .sgpr_count:     0x28{{$}}
193; GFX9-NEXT:        .sgpr_count:     0x2c{{$}}
194; GCN-NEXT:        .stack_frame_size_in_bytes: 0x10{{$}}
195; GCN-NEXT:        .vgpr_count:     0x2b{{$}}
196; GCN-NEXT:      simple_lds:
197; GCN-NEXT:        .lds_size:       0x100{{$}}
198; GCN-NEXT:        .sgpr_count:     0x20{{$}}
199; GCN-NEXT:        .stack_frame_size_in_bytes: 0{{$}}
200; GCN-NEXT:        .vgpr_count:     0x1{{$}}
201; GCN-NEXT:      simple_lds_recurse:
202; GCN-NEXT:        .lds_size:       0x100{{$}}
203; GCN-NEXT:        .sgpr_count:     0x26{{$}}
204; GCN-NEXT:        .stack_frame_size_in_bytes: 0x10{{$}}
205; GCN-NEXT:        .vgpr_count:     0x29{{$}}
206; GCN-NEXT:      simple_stack:
207; GCN-NEXT:        .lds_size:       0{{$}}
208; GCN-NEXT:        .sgpr_count:     0x21{{$}}
209; GCN-NEXT:        .stack_frame_size_in_bytes: 0x14{{$}}
210; GCN-NEXT:        .vgpr_count:     0x2{{$}}
211; GCN-NEXT:      simple_stack_call:
212; GCN-NEXT:        .lds_size:       0{{$}}
213; GCN-NEXT:        .sgpr_count:     0x24{{$}}
214; GCN-NEXT:        .stack_frame_size_in_bytes: 0x20{{$}}
215; GCN-NEXT:        .vgpr_count:     0x4{{$}}
216; GCN-NEXT:      simple_stack_extern_call:
217; GCN-NEXT:        .lds_size:       0{{$}}
218; GFX8-NEXT:        .sgpr_count:     0x28{{$}}
219; GFX9-NEXT:        .sgpr_count:     0x2c{{$}}
220; GCN-NEXT:        .stack_frame_size_in_bytes: 0x20{{$}}
221; GCN-NEXT:        .vgpr_count:     0x2b{{$}}
222; GCN-NEXT:      simple_stack_indirect_call:
223; GCN-NEXT:        .lds_size:       0{{$}}
224; GFX8-NEXT:        .sgpr_count:     0x28{{$}}
225; GFX9-NEXT:        .sgpr_count:     0x2c{{$}}
226; GCN-NEXT:        .stack_frame_size_in_bytes: 0x20{{$}}
227; GCN-NEXT:        .vgpr_count:     0x2b{{$}}
228; GCN-NEXT:      simple_stack_recurse:
229; GCN-NEXT:        .lds_size:       0{{$}}
230; GCN-NEXT:        .sgpr_count:     0x26{{$}}
231; GCN-NEXT:        .stack_frame_size_in_bytes: 0x20{{$}}
232; GCN-NEXT:        .vgpr_count:     0x2a{{$}}
233; GCN-NEXT: ...
234