1; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -enable-ipra -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN %s
3
4; Kernels are not called, so there is no call preserved mask.
5; GCN-LABEL: {{^}}kernel:
6; GCN: flat_store_dword
7define amdgpu_kernel void @kernel(i32 addrspace(1)* %out) #0 {
8entry:
9  store i32 0, i32 addrspace(1)* %out
10  ret void
11}
12
13; GCN-LABEL: {{^}}func:
14; GCN: ; NumVgprs: 8
15define hidden void @func() #1 {
16  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0
17  ret void
18}
19
20; GCN-LABEL: {{^}}kernel_call:
21; GCN-NOT: buffer_store
22; GCN-NOT: buffer_load
23; GCN-NOT: readlane
24; GCN-NOT: writelane
25; GCN: flat_load_dword v8
26; GCN: s_swappc_b64
27; GCN-NOT: buffer_store
28; GCN-NOT: buffer_load
29; GCN-NOT: readlane
30; GCN-NOT: writelane
31; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8
32
33; GCN: ; NumSgprs: 37
34; GCN: ; NumVgprs: 9
35define amdgpu_kernel void @kernel_call() #0 {
36  %vgpr = load volatile i32, i32 addrspace(1)* undef
37  tail call void @func()
38  store volatile i32 %vgpr, i32 addrspace(1)* undef
39  ret void
40}
41
42; GCN-LABEL: {{^}}func_regular_call:
43; GCN-NOT: buffer_store
44; GCN-NOT: buffer_load
45; GCN-NOT: readlane
46; GCN-NOT: writelane
47; GCN: flat_load_dword v8
48; GCN: s_swappc_b64
49; GCN-NOT: buffer_store
50; GCN-NOT: buffer_load
51; GCN-NOT: readlane
52; GCN-NOT: writelane
53; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8
54
55; GCN: ; NumSgprs: 32
56; GCN: ; NumVgprs: 9
57define void @func_regular_call() #1 {
58  %vgpr = load volatile i32, i32 addrspace(1)* undef
59  tail call void @func()
60  store volatile i32 %vgpr, i32 addrspace(1)* undef
61  ret void
62}
63
64; GCN-LABEL: {{^}}func_tail_call:
65; GCN: s_waitcnt
66; GCN-NEXT: s_getpc_b64 s[4:5]
67; GCN-NEXT: s_add_u32 s4,
68; GCN-NEXT: s_addc_u32 s5,
69; GCN-NEXT: s_setpc_b64 s[4:5]
70
71; GCN: ; NumSgprs: 32
72; GCN: ; NumVgprs: 8
73define void @func_tail_call() #1 {
74  tail call void @func()
75  ret void
76}
77
78; GCN-LABEL: {{^}}func_call_tail_call:
79; GCN: flat_load_dword v8
80; GCN: s_swappc_b64
81; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8
82; GCN: s_setpc_b64
83
84; GCN: ; NumSgprs: 32
85; GCN: ; NumVgprs: 9
86define void @func_call_tail_call() #1 {
87  %vgpr = load volatile i32, i32 addrspace(1)* undef
88  tail call void @func()
89  store volatile i32 %vgpr, i32 addrspace(1)* undef
90  tail call void @func()
91  ret void
92}
93
94define void @void_func_void() noinline {
95  ret void
96}
97
98; Make sure we don't get save/restore of FP between calls.
99; GCN-LABEL: {{^}}test_funcx2:
100; GCN-NOT: s5
101; GCN-NOT: s32
102define void @test_funcx2() #0 {
103  call void @void_func_void()
104  call void @void_func_void()
105  ret void
106}
107
108define weak amdgpu_kernel void @wombat(i32* %arg, i32* %arg2) {
109bb:
110  call void @hoge() #0
111  ret void
112}
113
114; Make sure we save/restore the return address around the call.
115; Function Attrs: norecurse
116define internal void @hoge() #2 {
117bb:
118; GCN-LABEL: {{^}}hoge:
119; GCN-DAG: v_writelane_b32 [[CSR_VGPR:v[0-9]+]], s30,
120; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31,
121; GCN: s_swappc_b64 s[30:31]
122; GCN-DAG: v_readlane_b32 s4, [[CSR_VGPR]],
123; GCN-DAG: v_readlane_b32 s5, [[CSR_VGPR]],
124; GCN: s_waitcnt vmcnt(0)
125; GCN: s_setpc_b64 s[4:5]
126  call void @eggs()
127  ret void
128}
129
130declare dso_local void @eggs()
131
132
133attributes #0 = { nounwind }
134attributes #1 = { nounwind noinline "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
135attributes #2 = { norecurse }
136