1; RUN: llc -mtriple=amdgcn--amdpal -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX8 -enable-var-scope %s
2; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SDAG,GFX9 -enable-var-scope %s
3; RUN: llc -global-isel -mtriple=amdgcn--amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GISEL,GFX9 -enable-var-scope %s
4
5declare amdgpu_gfx float @extern_func(float) #0
6declare amdgpu_gfx float @extern_func_many_args(<64 x float>) #0
7
8@funcptr = external hidden unnamed_addr addrspace(4) constant void()*, align 4
9
10define amdgpu_gfx float @no_stack(float %arg0) #0 {
11  %add = fadd float %arg0, 1.0
12  ret float %add
13}
14
15define amdgpu_gfx float @simple_stack(float %arg0) #0 {
16  %stack = alloca float, i32 4, align 4, addrspace(5)
17  store volatile float 2.0, float addrspace(5)* %stack
18  %val = load volatile float, float addrspace(5)* %stack
19  %add = fadd float %arg0, %val
20  ret float %add
21}
22
23define amdgpu_gfx float @multiple_stack(float %arg0) #0 {
24  %stack = alloca float, i32 4, align 4, addrspace(5)
25  store volatile float 2.0, float addrspace(5)* %stack
26  %val = load volatile float, float addrspace(5)* %stack
27  %add = fadd float %arg0, %val
28  %stack2 = alloca float, i32 4, align 4, addrspace(5)
29  store volatile float 2.0, float addrspace(5)* %stack2
30  %val2 = load volatile float, float addrspace(5)* %stack2
31  %add2 = fadd float %add, %val2
32  ret float %add2
33}
34
35define amdgpu_gfx float @dynamic_stack(float %arg0) #0 {
36bb0:
37  %cmp = fcmp ogt float %arg0, 0.0
38  br i1 %cmp, label %bb1, label %bb2
39
40bb1:
41  %stack = alloca float, i32 4, align 4, addrspace(5)
42  store volatile float 2.0, float addrspace(5)* %stack
43  %val = load volatile float, float addrspace(5)* %stack
44  %add = fadd float %arg0, %val
45  br label %bb2
46
47bb2:
48  %res = phi float [ 0.0, %bb0 ], [ %add, %bb1 ]
49  ret float %res
50}
51
52define amdgpu_gfx float @dynamic_stack_loop(float %arg0) #0 {
53bb0:
54  br label %bb1
55
56bb1:
57  %ctr = phi i32 [ 0, %bb0 ], [ %newctr, %bb1 ]
58  %stack = alloca float, i32 4, align 4, addrspace(5)
59  store volatile float 2.0, float addrspace(5)* %stack
60  %val = load volatile float, float addrspace(5)* %stack
61  %add = fadd float %arg0, %val
62  %cmp = icmp sgt i32 %ctr, 0
63  %newctr = sub i32 %ctr, 1
64  br i1 %cmp, label %bb1, label %bb2
65
66bb2:
67  ret float %add
68}
69
70define amdgpu_gfx float @no_stack_call(float %arg0) #0 {
71  %res = call amdgpu_gfx float @simple_stack(float %arg0)
72  ret float %res
73}
74
75define amdgpu_gfx float @simple_stack_call(float %arg0) #0 {
76  %stack = alloca float, i32 4, align 4, addrspace(5)
77  store volatile float 2.0, float addrspace(5)* %stack
78  %val = load volatile float, float addrspace(5)* %stack
79  %res = call amdgpu_gfx float @simple_stack(float %arg0)
80  %add = fadd float %res, %val
81  ret float %add
82}
83
84define amdgpu_gfx float @no_stack_extern_call(float %arg0) #0 {
85  %res = call amdgpu_gfx float @extern_func(float %arg0)
86  ret float %res
87}
88
89define amdgpu_gfx float @simple_stack_extern_call(float %arg0) #0 {
90  %stack = alloca float, i32 4, align 4, addrspace(5)
91  store volatile float 2.0, float addrspace(5)* %stack
92  %val = load volatile float, float addrspace(5)* %stack
93  %res = call amdgpu_gfx float @extern_func(float %arg0)
94  %add = fadd float %res, %val
95  ret float %add
96}
97
98define amdgpu_gfx float @no_stack_extern_call_many_args(<64 x float> %arg0) #0 {
99  %res = call amdgpu_gfx float @extern_func_many_args(<64 x float> %arg0)
100  ret float %res
101}
102
103define amdgpu_gfx float @no_stack_indirect_call(float %arg0) #0 {
104  %fptr = load void()*, void()* addrspace(4)* @funcptr
105  call amdgpu_gfx void %fptr()
106  ret float %arg0
107}
108
109define amdgpu_gfx float @simple_stack_indirect_call(float %arg0) #0 {
110  %stack = alloca float, i32 4, align 4, addrspace(5)
111  store volatile float 2.0, float addrspace(5)* %stack
112  %val = load volatile float, float addrspace(5)* %stack
113  %fptr = load void()*, void()* addrspace(4)* @funcptr
114  call amdgpu_gfx void %fptr()
115  %add = fadd float %arg0, %val
116  ret float %add
117}
118
119define amdgpu_gfx float @simple_stack_recurse(float %arg0) #0 {
120  %stack = alloca float, i32 4, align 4, addrspace(5)
121  store volatile float 2.0, float addrspace(5)* %stack
122  %val = load volatile float, float addrspace(5)* %stack
123  %res = call amdgpu_gfx float @simple_stack_recurse(float %arg0)
124  %add = fadd float %res, %val
125  ret float %add
126}
127
128@lds = internal addrspace(3) global [64 x float] undef
129
130define amdgpu_gfx float @simple_lds(float %arg0) #0 {
131  %lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
132  %val = load float, float addrspace(3)* %lds_ptr
133  ret float %val
134}
135
136define amdgpu_gfx float @simple_lds_recurse(float %arg0) #0 {
137  %lds_ptr = getelementptr [64 x float], [64 x float] addrspace(3)* @lds, i32 0, i32 0
138  %val = load float, float addrspace(3)* %lds_ptr
139  %res = call amdgpu_gfx float @simple_lds_recurse(float %val)
140  ret float %res
141}
142
143attributes #0 = { nounwind }
144
145; GCN: amdpal.pipelines:
146; GCN-NEXT:  - .registers:
147; SDAG-NEXT:      0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ca{{$}}
148; GISEL-NEXT:      0x2e12 (COMPUTE_PGM_RSRC1): 0xaf01ce{{$}}
149; GCN-NEXT:      0x2e13 (COMPUTE_PGM_RSRC2): 0x8001{{$}}
150; GCN-NEXT:    .shader_functions:
151; GCN-NEXT:      dynamic_stack:
152; GCN-NEXT:        .lds_size:       0{{$}}
153; GCN-NEXT:        .sgpr_count:     0x28{{$}}
154; GCN-NEXT:        .stack_frame_size_in_bytes: 0x10{{$}}
155; SDAG-NEXT:        .vgpr_count:     0x2{{$}}
156; GISEL-NEXT:        .vgpr_count:     0x3{{$}}
157; GCN-NEXT:      dynamic_stack_loop:
158; GCN-NEXT:        .lds_size:       0{{$}}
159; SDAG-NEXT:        .sgpr_count:     0x25{{$}}
160; GISEL-NEXT:        .sgpr_count:     0x26{{$}}
161; GCN-NEXT:        .stack_frame_size_in_bytes: 0x10{{$}}
162; SDAG-NEXT:        .vgpr_count:     0x3{{$}}
163; GISEL-NEXT:        .vgpr_count:     0x4{{$}}
164; GCN-NEXT:      multiple_stack:
165; GCN-NEXT:        .lds_size:       0{{$}}
166; GCN-NEXT:        .sgpr_count:     0x21{{$}}
167; GCN-NEXT:        .stack_frame_size_in_bytes: 0x24{{$}}
168; GCN-NEXT:        .vgpr_count:     0x3{{$}}
169; GCN-NEXT:      no_stack:
170; GCN-NEXT:        .lds_size:       0{{$}}
171; GCN-NEXT:        .sgpr_count:     0x20{{$}}
172; GCN-NEXT:        .stack_frame_size_in_bytes: 0{{$}}
173; GCN-NEXT:        .vgpr_count:     0x1{{$}}
174; GCN-NEXT:      no_stack_call:
175; GCN-NEXT:        .lds_size:       0{{$}}
176; GCN-NEXT:        .sgpr_count:     0x26{{$}}
177; GCN-NEXT:        .stack_frame_size_in_bytes: 0{{$}}
178; GCN-NEXT:        .vgpr_count:     0x2{{$}}
179; GCN-NEXT:      no_stack_extern_call:
180; GCN-NEXT:        .lds_size:       0{{$}}
181; GFX8-NEXT:        .sgpr_count:     0x28{{$}}
182; GFX9-NEXT:        .sgpr_count:     0x2c{{$}}
183; GCN-NEXT:        .stack_frame_size_in_bytes: 0x10{{$}}
184; GCN-NEXT:        .vgpr_count:     0x29{{$}}
185; GCN-NEXT:      no_stack_extern_call_many_args:
186; GCN-NEXT:        .lds_size:       0{{$}}
187; GFX8-NEXT:        .sgpr_count:     0x28{{$}}
188; GFX9-NEXT:        .sgpr_count:     0x2c{{$}}
189; GCN-NEXT:        .stack_frame_size_in_bytes: 0x90{{$}}
190; SDAG-NEXT:        .vgpr_count:     0x2a{{$}}
191; GISEL-NEXT:        .vgpr_count:     0x34{{$}}
192; GCN-NEXT:      no_stack_indirect_call:
193; GCN-NEXT:        .lds_size:       0{{$}}
194; GFX8-NEXT:        .sgpr_count:     0x28{{$}}
195; GFX9-NEXT:        .sgpr_count:     0x2c{{$}}
196; GCN-NEXT:        .stack_frame_size_in_bytes: 0x10{{$}}
197; SDAG-NEXT:        .vgpr_count:     0x2a{{$}}
198; GISEL-NEXT:        .vgpr_count:     0x34{{$}}
199; GCN-NEXT:      simple_lds:
200; GCN-NEXT:        .lds_size:       0x100{{$}}
201; GCN-NEXT:        .sgpr_count:     0x20{{$}}
202; GCN-NEXT:        .stack_frame_size_in_bytes: 0{{$}}
203; GCN-NEXT:        .vgpr_count:     0x1{{$}}
204; GCN-NEXT:      simple_lds_recurse:
205; GCN-NEXT:        .lds_size:       0x100{{$}}
206; GCN-NEXT:        .sgpr_count:     0x26{{$}}
207; GCN-NEXT:        .stack_frame_size_in_bytes: 0x10{{$}}
208; GCN-NEXT:        .vgpr_count:     0x29{{$}}
209; GCN-NEXT:      simple_stack:
210; GCN-NEXT:        .lds_size:       0{{$}}
211; GCN-NEXT:        .sgpr_count:     0x21{{$}}
212; GCN-NEXT:        .stack_frame_size_in_bytes: 0x14{{$}}
213; GCN-NEXT:        .vgpr_count:     0x2{{$}}
214; GCN-NEXT:      simple_stack_call:
215; GCN-NEXT:        .lds_size:       0{{$}}
216; GCN-NEXT:        .sgpr_count:     0x26{{$}}
217; GCN-NEXT:        .stack_frame_size_in_bytes: 0x20{{$}}
218; GCN-NEXT:        .vgpr_count:     0x3{{$}}
219; GCN-NEXT:      simple_stack_extern_call:
220; GCN-NEXT:        .lds_size:       0{{$}}
221; GFX8-NEXT:        .sgpr_count:     0x28{{$}}
222; GFX9-NEXT:        .sgpr_count:     0x2c{{$}}
223; GCN-NEXT:        .stack_frame_size_in_bytes: 0x20{{$}}
224; GCN-NEXT:        .vgpr_count:     0x2a{{$}}
225; GCN-NEXT:      simple_stack_indirect_call:
226; GCN-NEXT:        .lds_size:       0{{$}}
227; GFX8-NEXT:        .sgpr_count:     0x28{{$}}
228; GFX9-NEXT:        .sgpr_count:     0x2c{{$}}
229; GCN-NEXT:        .stack_frame_size_in_bytes: 0x20{{$}}
230; SDAG-NEXT:        .vgpr_count:     0x2b{{$}}
231; GISEL-NEXT:        .vgpr_count:     0x34{{$}}
232; GCN-NEXT:      simple_stack_recurse:
233; GCN-NEXT:        .lds_size:       0{{$}}
234; GCN-NEXT:        .sgpr_count:     0x26{{$}}
235; GCN-NEXT:        .stack_frame_size_in_bytes: 0x20{{$}}
236; GCN-NEXT:        .vgpr_count:     0x2a{{$}}
237; GCN-NEXT: ...
238