1; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck  -enable-var-scope -check-prefixes=GCN,MUBUF %s
2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck  -enable-var-scope -check-prefixes=GCN,MUBUF %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-flat-scratch < %s | FileCheck  -enable-var-scope -check-prefixes=GCN,FLATSCR %s
4
5; GCN-LABEL: {{^}}callee_no_stack:
6; GCN: ; %bb.0:
7; GCN-NEXT: s_waitcnt
8; GCN-NEXT: s_setpc_b64
9define void @callee_no_stack() #0 {
10  ret void
11}
12
13; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim_all:
14; GCN: ; %bb.0:
15; GCN-NEXT: s_waitcnt
16; MUBUF-NEXT:   s_mov_b32 [[FP_COPY:s4]], s33
17; FLATSCR-NEXT: s_mov_b32 [[FP_COPY:s0]], s33
18; GCN-NEXT: s_mov_b32 s33, s32
19; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
20; GCN-NEXT: s_setpc_b64
21define void @callee_no_stack_no_fp_elim_all() #1 {
22  ret void
23}
24
25; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim_nonleaf:
26; GCN: ; %bb.0:
27; GCN-NEXT: s_waitcnt
28; GCN-NEXT: s_setpc_b64
29define void @callee_no_stack_no_fp_elim_nonleaf() #2 {
30  ret void
31}
32
33; GCN-LABEL: {{^}}callee_with_stack:
34; GCN: ; %bb.0:
35; GCN-NEXT: s_waitcnt
36; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
37; MUBUF-NEXT:   buffer_store_dword v0, off, s[0:3], s32{{$}}
38; FLATSCR-NEXT: scratch_store_dword off, v0, s32
39; GCN-NEXT: s_waitcnt
40; GCN-NEXT: s_setpc_b64
41define void @callee_with_stack() #0 {
42  %alloca = alloca i32, addrspace(5)
43  store volatile i32 0, i32 addrspace(5)* %alloca
44  ret void
45}
46
47; Can use free call clobbered register to preserve original FP value.
48
49; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_all:
50; GCN: ; %bb.0:
51; GCN-NEXT: s_waitcnt
52; MUBUF-NEXT:   s_mov_b32 [[FP_COPY:s4]], s33
53; FLATSCR-NEXT: s_mov_b32 [[FP_COPY:s0]], s33
54; GCN-NEXT: s_mov_b32 s33, s32
55; MUBUF-NEXT:   s_addk_i32 s32, 0x200
56; FLATSCR-NEXT: s_add_i32 s32, s32, 8
57; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
58; MUBUF-NEXT:   buffer_store_dword v0, off, s[0:3], s33{{$}}
59; FLATSCR-NEXT: scratch_store_dword off, v0, s33{{$}}
60; GCN-NEXT: s_waitcnt vmcnt(0)
61; MUBUF-NEXT:   s_addk_i32 s32, 0xfe00
62; FLATSCR-NEXT: s_add_i32 s32, s32, -8
63; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
64; GCN-NEXT: s_setpc_b64
65define void @callee_with_stack_no_fp_elim_all() #1 {
66  %alloca = alloca i32, addrspace(5)
67  store volatile i32 0, i32 addrspace(5)* %alloca
68  ret void
69}
70
71; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_non_leaf:
72; GCN: ; %bb.0:
73; GCN-NEXT: s_waitcnt
74; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
75; MUBUF-NEXT:   buffer_store_dword v0, off, s[0:3], s32{{$}}
76; FLATSCR-NEXT: scratch_store_dword off, v0, s32{{$}}
77; GCN-NEXT: s_waitcnt
78; GCN-NEXT: s_setpc_b64
79define void @callee_with_stack_no_fp_elim_non_leaf() #2 {
80  %alloca = alloca i32, addrspace(5)
81  store volatile i32 0, i32 addrspace(5)* %alloca
82  ret void
83}
84
85; GCN-LABEL: {{^}}callee_with_stack_and_call:
86; GCN: ; %bb.0:
87; GCN-NEXT: s_waitcnt
88; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
89; MUBUF-NEXT:   buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
90; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:4 ; 4-byte Folded Spill
91; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
92; GCN: v_writelane_b32 [[CSR_VGPR]], s33, 2
93; GCN-DAG: s_mov_b32 s33, s32
94; MUBUF-DAG:   s_addk_i32 s32, 0x400{{$}}
95; FLATSCR-DAG: s_add_i32 s32, s32, 16{{$}}
96; GCN-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0{{$}}
97; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30,
98; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31,
99
100; MUBUF-DAG:   buffer_store_dword [[ZERO]], off, s[0:3], s33{{$}}
101; FLATSCR-DAG: scratch_store_dword off, [[ZERO]], s33{{$}}
102
103; GCN: s_swappc_b64
104
105; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]]
106; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]]
107
108; MUBUF:    s_addk_i32 s32, 0xfc00{{$}}
109; FLATSCR:  s_add_i32 s32, s32, -16{{$}}
110; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2
111; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
112; MUBUF-NEXT:   buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
113; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:4 ; 4-byte Folded Reload
114; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
115; GCN-NEXT: s_waitcnt vmcnt(0)
116
117; GCN-NEXT: s_setpc_b64 s[30:31]
118define void @callee_with_stack_and_call() #0 {
119  %alloca = alloca i32, addrspace(5)
120  store volatile i32 0, i32 addrspace(5)* %alloca
121  call void @external_void_func_void()
122  ret void
123}
124
125; Should be able to copy incoming stack pointer directly to inner
126; call's stack pointer argument.
127
128; There is stack usage only because of the need to evict a VGPR for
129; spilling CSR SGPRs.
130
131; GCN-LABEL: {{^}}callee_no_stack_with_call:
132; GCN: s_waitcnt
133; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
134; MUBUF-NEXT:   buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 ; 4-byte Folded Spill
135; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 ; 4-byte Folded Spill
136; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
137; MUBUF-DAG:   s_addk_i32 s32, 0x400
138; FLATSCR-DAG: s_add_i32 s32, s32, 16
139; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s33, [[FP_SPILL_LANE:[0-9]+]]
140
141; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s30, 0
142; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, 1
143; GCN: s_swappc_b64
144
145; GCN-DAG: v_readlane_b32 s30, [[CSR_VGPR]], 0
146; GCN-DAG: v_readlane_b32 s31, [[CSR_VGPR]], 1
147
148; MUBUF:   s_addk_i32 s32, 0xfc00
149; FLATSCR: s_add_i32 s32, s32, -16
150; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], [[FP_SPILL_LANE]]
151; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
152; MUBUF-NEXT:   buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 ; 4-byte Folded Reload
153; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 ; 4-byte Folded Reload
154; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
155; GCN-NEXT: s_waitcnt vmcnt(0)
156; GCN-NEXT: s_setpc_b64 s[30:31]
157define void @callee_no_stack_with_call() #0 {
158  call void @external_void_func_void()
159  ret void
160}
161
162declare hidden void @external_void_func_void() #0
163
164; Make sure if a CSR vgpr is used for SGPR spilling, it is saved and
165; restored. No FP is required.
166;
167; GCN-LABEL: {{^}}callee_func_sgpr_spill_no_calls:
168; GCN: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
169; MUBUF-NEXT:   buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 ; 4-byte Folded Spill
170; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 ; 4-byte Folded Spill
171; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
172; GCN: v_writelane_b32 [[CSR_VGPR]], s
173; GCN: v_writelane_b32 [[CSR_VGPR]], s
174
175; GCN: ;;#ASMSTART
176; GCN: v_readlane_b32 s{{[0-9]+}}, [[CSR_VGPR]]
177; GCN: v_readlane_b32 s{{[0-9]+}}, [[CSR_VGPR]]
178
179; GCN: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
180; MUBUF-NEXT:   buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 ; 4-byte Folded Reload
181; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 ; 4-byte Folded Reload
182; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
183; GCN-NEXT: s_waitcnt
184; GCN-NEXT: s_setpc_b64
185define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
186  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0
187  call void asm sideeffect "", "~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15}"() #0
188  call void asm sideeffect "", "~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23}"() #0
189  call void asm sideeffect "", "~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() #0
190  call void asm sideeffect "", "~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"() #0
191
192  %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
193  %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
194  %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
195  %wide.sgpr5 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
196  %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
197  %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
198
199  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
200  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
201  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
202  call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) #0
203  call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
204  call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr5) #0
205  ret void
206}
207
208; Has no spilled CSR VGPRs used for SGPR spilling, so no need to
209; enable all lanes and restore.
210
211; GCN-LABEL: {{^}}spill_only_csr_sgpr:
212; GCN: s_waitcnt
213; GCN-NEXT: s_or_saveexec_b64
214; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
215; FLATSCR-NEXT: scratch_store_dword off, v0, s32 ; 4-byte Folded Spill
216; GCN-NEXT: s_mov_b64 exec,
217; GCN-NEXT: v_writelane_b32 v0, s42, 0
218; GCN-NEXT: ;;#ASMSTART
219; GCN-NEXT: ; clobber s42
220; GCN-NEXT: ;;#ASMEND
221; GCN-NEXT: v_readlane_b32 s42, v0, 0
222; GCN-NEXT: s_or_saveexec_b64
223; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
224; FLATSCR-NEXT: scratch_load_dword v0, off, s32 ; 4-byte Folded Reload
225; GCN-NEXT: s_mov_b64 exec,
226; GCN-NEXT: s_waitcnt vmcnt(0)
227; GCN-NEXT: s_setpc_b64
228define void @spill_only_csr_sgpr() {
229  call void asm sideeffect "; clobber s42", "~{s42}"()
230  ret void
231}
232
233; TODO: Can the SP inc/deec be remvoed?
234; GCN-LABEL: {{^}}callee_with_stack_no_fp_elim_csr_vgpr:
235; GCN: s_waitcnt
236; GCN-NEXT:s_mov_b32 [[FP_COPY:s[0-9]+]], s33
237; GCN-NEXT: s_mov_b32 s33, s32
238; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
239; MUBUF-DAG:   buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
240; FLATSCR-DAG: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill
241; MUBUF-DAG:   buffer_store_dword [[ZERO]], off, s[0:3], s33 offset:4
242; FLATSCR-DAG: scratch_store_dword off, [[ZERO]], s33 offset:4
243
244; GCN:	;;#ASMSTART
245; GCN-NEXT: ; clobber v41
246; GCN-NEXT: ;;#ASMEND
247
248; MUBUF:   buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
249; FLATSCR: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload
250; MUBUF:        s_addk_i32 s32, 0x300
251; MUBUF-NEXT:   s_addk_i32 s32, 0xfd00
252; MUBUF-NEXT:   s_mov_b32 s33, s4
253; FLATSCR:      s_add_i32 s32, s32, 12
254; FLATSCR-NEXT: s_add_i32 s32, s32, -12
255; FLATSCR-NEXT: s_mov_b32 s33, s0
256; GCN-NEXT: s_waitcnt vmcnt(0)
257; GCN-NEXT: s_setpc_b64
258define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 {
259  %alloca = alloca i32, addrspace(5)
260  store volatile i32 0, i32 addrspace(5)* %alloca
261  call void asm sideeffect "; clobber v41", "~{v41}"()
262  ret void
263}
264
265; Use a copy to a free SGPR instead of introducing a second CSR VGPR.
266; GCN-LABEL: {{^}}last_lane_vgpr_for_fp_csr:
267; GCN: s_waitcnt
268; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
269; MUBUF-NEXT:   buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
270; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:8 ; 4-byte Folded Spill
271; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
272; GCN-NEXT: v_writelane_b32 v0, s33, 63
273; GCN-COUNT-60: v_writelane_b32 v0
274; GCN: s_mov_b32 s33, s32
275; GCN: v_writelane_b32 v0
276; MUBUF:   buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
277; FLATSCR: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill
278; GCN: v_writelane_b32 v0
279; MUBUF:   buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:4
280; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s33 offset:4
281; GCN: ;;#ASMSTART
282; GCN: v_writelane_b32 v0
283
284; MUBUF:        s_addk_i32 s32, 0x400
285; MUBUF:        s_addk_i32 s32, 0xfc00
286; FLATSCR:      s_add_i32 s32, s32, 16
287; FLATSCR:      s_add_i32 s32, s32, -16
288; GCN-NEXT: v_readlane_b32 s33, v0, 63
289; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
290; MUBUF-NEXT:   buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
291; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:8 ; 4-byte Folded Reload
292; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
293; GCN-NEXT: s_waitcnt vmcnt(0)
294; GCN-NEXT: s_setpc_b64
295define void @last_lane_vgpr_for_fp_csr() #1 {
296  %alloca = alloca i32, addrspace(5)
297  store volatile i32 0, i32 addrspace(5)* %alloca
298  call void asm sideeffect "; clobber v41", "~{v41}"()
299  call void asm sideeffect "",
300    "~{s40},~{s41},~{s42},~{s43},~{s44},~{s45},~{s46},~{s47},~{s48},~{s49}
301    ,~{s50},~{s51},~{s52},~{s53},~{s54},~{s55},~{s56},~{s57},~{s58},~{s59}
302    ,~{s60},~{s61},~{s62},~{s63},~{s64},~{s65},~{s66},~{s67},~{s68},~{s69}
303    ,~{s70},~{s71},~{s72},~{s73},~{s74},~{s75},~{s76},~{s77},~{s78},~{s79}
304    ,~{s80},~{s81},~{s82},~{s83},~{s84},~{s85},~{s86},~{s87},~{s88},~{s89}
305    ,~{s90},~{s91},~{s92},~{s93},~{s94},~{s95},~{s96},~{s97},~{s98},~{s99}
306    ,~{s100},~{s101},~{s102}"() #1
307
308  ret void
309}
310
311; Use a copy to a free SGPR instead of introducing a second CSR VGPR.
312; GCN-LABEL: {{^}}no_new_vgpr_for_fp_csr:
313; GCN: s_waitcnt
314; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
315; MUBUF-NEXT:   buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
316; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:8 ; 4-byte Folded Spill
317; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
318; GCN-COUNT-61: v_writelane_b32 v0,
319; FLATSCR: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
320; FLATSCR-NEXT: s_mov_b32 s33, s32
321; MUBUF: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
322; MUBUF-NEXT: s_mov_b32 s33, s32
323; MUBUF:   buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
324; FLATSCR: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill
325; GCN: v_writelane_b32 v0,
326; MUBUF:   buffer_store_dword
327; FLATSCR: scratch_store_dword
328; GCN: ;;#ASMSTART
329; GCN: v_writelane_b32 v0,
330; MUBUF:   buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
331; FLATSCR: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload
332; MUBUF:        s_addk_i32 s32, 0x400
333; FLATSCR:      s_add_i32 s32, s32, 16
334; GCN-COUNT-64: v_readlane_b32 s{{[0-9]+}}, v0
335; MUBUF-NEXT:   s_addk_i32 s32, 0xfc00
336; FLATSCR-NEXT: s_add_i32 s32, s32, -16
337; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
338; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
339; MUBUF-NEXT:   buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
340; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:8 ; 4-byte Folded Reload
341; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
342; GCN-NEXT: s_waitcnt vmcnt(0)
343; GCN-NEXT: s_setpc_b64
344define void @no_new_vgpr_for_fp_csr() #1 {
345  %alloca = alloca i32, addrspace(5)
346  store volatile i32 0, i32 addrspace(5)* %alloca
347  call void asm sideeffect "; clobber v41", "~{v41}"()
348  call void asm sideeffect "",
349    "~{s39},~{s40},~{s41},~{s42},~{s43},~{s44},~{s45},~{s46},~{s47},~{s48},~{s49}
350    ,~{s50},~{s51},~{s52},~{s53},~{s54},~{s55},~{s56},~{s57},~{s58},~{s59}
351    ,~{s60},~{s61},~{s62},~{s63},~{s64},~{s65},~{s66},~{s67},~{s68},~{s69}
352    ,~{s70},~{s71},~{s72},~{s73},~{s74},~{s75},~{s76},~{s77},~{s78},~{s79}
353    ,~{s80},~{s81},~{s82},~{s83},~{s84},~{s85},~{s86},~{s87},~{s88},~{s89}
354    ,~{s90},~{s91},~{s92},~{s93},~{s94},~{s95},~{s96},~{s97},~{s98},~{s99}
355    ,~{s100},~{s101},~{s102}"() #1
356
357  ret void
358}
359
360; GCN-LABEL: {{^}}realign_stack_no_fp_elim:
361; GCN: s_waitcnt
362; MUBUF-NEXT:   s_mov_b32 [[FP_COPY:s4]], s33
363; FLATSCR-NEXT: s_mov_b32 [[FP_COPY:s0]], s33
364; MUBUF-NEXT:   s_add_i32 s33, s32, 0x7ffc0
365; FLATSCR-NEXT: s_add_i32 s33, s32, 0x1fff
366; MUBUF-NEXT:   s_and_b32 s33, s33, 0xfff80000
367; FLATSCR-NEXT: s_and_b32 s33, s33, 0xffffe000
368; MUBUF-NEXT:   s_add_i32 s32, s32, 0x180000
369; FLATSCR-NEXT: s_addk_i32 s32, 0x6000
370; GCN-NEXT:     v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
371; MUBUF-NEXT:   v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x2000{{$}}
372; MUBUF-NEXT:   buffer_store_dword [[ZERO]], [[OFFSET]], s[0:3], s33 offen{{$}}
373; FLATSCR-NEXT: s_add_i32 vcc_hi, s33, 0x2000
374; FLATSCR-NEXT: scratch_store_dword off, [[ZERO]], vcc_hi
375; GCN-NEXT: s_waitcnt vmcnt(0)
376; MUBUF-NEXT:   s_add_i32 s32, s32, 0xffe80000
377; FLATSCR-NEXT: s_addk_i32 s32, 0xa000
378; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
379; GCN-NEXT: s_setpc_b64
380define void @realign_stack_no_fp_elim() #1 {
381  %alloca = alloca i32, align 8192, addrspace(5)
382  store volatile i32 0, i32 addrspace(5)* %alloca
383  ret void
384}
385
386; GCN-LABEL: {{^}}no_unused_non_csr_sgpr_for_fp:
387; GCN: s_waitcnt
388; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
389; MUBUF-NEXT:   buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
390; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:4 ; 4-byte Folded Spill
391; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
392; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2
393; GCN-NEXT: s_mov_b32 s33, s32
394; GCN: v_writelane_b32 [[CSR_VGPR]], s30, 0
395; GCN: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0
396; MUBUF:        s_addk_i32 s32, 0x300
397; FLATSCR:      s_add_i32 s32, s32, 12
398; GCN: v_writelane_b32 [[CSR_VGPR]], s31, 1
399; MUBUF:   buffer_store_dword [[ZERO]], off, s[0:3], s33{{$}}
400; FLATSCR: scratch_store_dword off, [[ZERO]], s33{{$}}
401; GCN-NEXT:     s_waitcnt vmcnt(0)
402; GCN: ;;#ASMSTART
403; GCN:     v_readlane_b32 s31, [[CSR_VGPR]], 1
404; GCN:     v_readlane_b32 s30, [[CSR_VGPR]], 0
405; MUBUF:   s_addk_i32 s32, 0xfd00
406; FLATSCR: s_add_i32 s32, s32, -12
407; GCN-NEXT:     v_readlane_b32 s33, [[CSR_VGPR]], 2
408; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
409; MUBUF-NEXT:   buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
410; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:4 ; 4-byte Folded Reload
411; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
412; GCN-NEXT: s_waitcnt vmcnt(0)
413; GCN-NEXT: s_setpc_b64 s[30:31]
414define void @no_unused_non_csr_sgpr_for_fp() #1 {
415  %alloca = alloca i32, addrspace(5)
416  store volatile i32 0, i32 addrspace(5)* %alloca
417
418  ; Use all clobberable registers, so FP has to spill to a VGPR.
419  call void asm sideeffect "",
420    "~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
421    ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
422    ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
423    ,~{s30},~{s31}"() #0
424
425  ret void
426}
427
428; Need a new CSR VGPR to satisfy the FP spill.
429; GCN-LABEL: {{^}}no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr:
430; GCN: s_waitcnt
431; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
432; MUBUF-NEXT:   buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
433; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s32 offset:4 ; 4-byte Folded Spill
434; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
435; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2
436; GCN-NEXT: s_mov_b32 s33, s32
437; MUBUF:       s_addk_i32 s32, 0x300{{$}}
438; FLATSCR:     s_add_i32 s32, s32, 12{{$}}
439
440; MUBUF-DAG:   buffer_store_dword
441; FLATSCR-DAG: scratch_store_dword
442
443; GCN: ;;#ASMSTART
444; MUBUF:   s_addk_i32 s32, 0xfd00{{$}}
445; FLATSCR: s_add_i32 s32, s32, -12{{$}}
446; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2
447; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
448; MUBUF-NEXT:   buffer_load_dword [[CSR_VGPR]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
449; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s32 offset:4 ; 4-byte Folded Reload
450; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
451; GCN-NEXT: s_waitcnt vmcnt(0)
452; GCN-NEXT: s_setpc_b64 s[30:31]
453define void @no_unused_non_csr_sgpr_for_fp_no_scratch_vgpr() #1 {
454  %alloca = alloca i32, addrspace(5)
455  store volatile i32 0, i32 addrspace(5)* %alloca
456
457  ; Use all clobberable registers, so FP has to spill to a VGPR.
458  call void asm sideeffect "",
459    "~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
460    ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
461    ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
462    ,~{s30},~{s31}"() #0
463
464  call void asm sideeffect "; clobber nonpreserved initial VGPRs",
465    "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
466    ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
467    ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
468    ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"() #1
469
470  ret void
471}
472
473; The byval argument exceeds the MUBUF constant offset, so a scratch
474; register is needed to access the CSR VGPR slot.
475; GCN-LABEL: {{^}}scratch_reg_needed_mubuf_offset:
476; GCN: s_waitcnt
477; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
478; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40100
479; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
480; FLATSCR-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x1004
481; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
482; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
483; GCN-NEXT: v_writelane_b32 [[CSR_VGPR]], s33, 2
484; GCN-DAG:  s_mov_b32 s33, s32
485; MUBUF-DAG:   s_add_i32 s32, s32, 0x40300{{$}}
486; FLATSCR-DAG: s_addk_i32 s32, 0x100c{{$}}
487; MUBUF-DAG:   buffer_store_dword
488; FLATSCR-DAG: scratch_store_dword
489
490; GCN: ;;#ASMSTART
491; MUBUF:   s_add_i32 s32, s32, 0xfffbfd00{{$}}
492; FLATSCR: s_addk_i32 s32, 0xeff4{{$}}
493; GCN-NEXT: v_readlane_b32 s33, [[CSR_VGPR]], 2
494; GCN-NEXT: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
495; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40100
496; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Reload
497; FLATSCR-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x1004
498; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, [[SCRATCH_SGPR]] ; 4-byte Folded Reload
499; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
500; GCN-NEXT: s_waitcnt vmcnt(0)
501; GCN-NEXT: s_setpc_b64 s[30:31]
502define void @scratch_reg_needed_mubuf_offset([4096 x i8] addrspace(5)* byval([4096 x i8]) align 4 %arg) #1 {
503  %alloca = alloca i32, addrspace(5)
504  store volatile i32 0, i32 addrspace(5)* %alloca
505
506  ; Use all clobberable registers, so FP has to spill to a VGPR.
507  call void asm sideeffect "; clobber nonpreserved SGPRs",
508    "~{s0},~{s1},~{s2},~{s3},~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
509    ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
510    ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
511    ,~{s30},~{s31}"() #0
512
513  ; Use all clobberable VGPRs, so a CSR spill is needed for the VGPR
514  call void asm sideeffect "; clobber nonpreserved VGPRs",
515    "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
516    ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
517    ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
518    ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"() #1
519
520  ret void
521}
522
523; GCN-LABEL: {{^}}local_empty_func:
524; GCN: s_waitcnt
525; GCN-NEXT: s_setpc_b64
526define internal void @local_empty_func() #0 {
527  ret void
528}
529
530; An FP is needed, despite not needing any spills
531; TODO: Ccould see callee does not use stack and omit FP.
532; GCN-LABEL: {{^}}ipra_call_with_stack:
533; GCN: v_writelane_b32 v0, s33, 2
534; GCN: s_mov_b32 s33, s32
535; MUBUF:   s_addk_i32 s32, 0x400
536; FLATSCR: s_add_i32 s32, s32, 16
537; MUBUF:   buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33{{$}}
538; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s33{{$}}
539; GCN:     s_swappc_b64
540; MUBUF:   s_addk_i32 s32, 0xfc00
541; FLATSCR: s_add_i32 s32, s32, -16
542; GCN: v_readlane_b32 s33, v0, 2
543define void @ipra_call_with_stack() #0 {
544  %alloca = alloca i32, addrspace(5)
545  store volatile i32 0, i32 addrspace(5)* %alloca
546  call void @local_empty_func()
547  ret void
548}
549
550; With no free registers, we must spill the FP to memory.
551; GCN-LABEL: {{^}}callee_need_to_spill_fp_to_memory:
552; MUBUF:   v_mov_b32_e32 [[TMP_VGPR1:v[0-9]+]], s33
553; MUBUF:   buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s32 ; 4-byte Folded Spill
554; FLATSCR: s_mov_b32 s0, s33
555; GCN:     s_mov_b32 s33, s32
556; MUBUF:   buffer_load_dword [[TMP_VGPR2:v[0-9]+]], off, s[0:3], s32 ; 4-byte Folded Reload
557; FLATSCR: s_mov_b32 s33, s0
558; MUBUF:   s_waitcnt vmcnt(0)
559; MUBUF:   v_readfirstlane_b32 s33, [[TMP_VGPR2]]
560; GCN:     s_setpc_b64
561; MUBUF:   ScratchSize: 8
562; FLATSCR: ScratchSize: 0
563define void @callee_need_to_spill_fp_to_memory() #3 {
564  call void asm sideeffect "; clobber nonpreserved SGPRs",
565    "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
566    ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
567    ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
568    ,~{vcc}"()
569
570  call void asm sideeffect "; clobber all VGPRs",
571    "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
572    ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
573    ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
574    ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"()
575  ret void
576}
577
578; If we have a reserved VGPR that can be used for SGPR spills, we may still
579; need to spill the FP to memory if there are no free lanes in the reserved
580; VGPR.
581; GCN-LABEL: {{^}}callee_need_to_spill_fp_to_memory_full_reserved_vgpr:
582; MUBUF:   s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
583; MUBUF:   s_mov_b64 exec, [[COPY_EXEC1]]
584; MUBUF:   v_mov_b32_e32 [[TMP_VGPR1:v[0-9]+]], s33
585; MUBUF:   buffer_store_dword [[TMP_VGPR1]], off, s[0:3], s32 offset:[[OFF:[0-9]+]]
586; GCN-NOT: v_writelane_b32 v40, s33
587; MUBUF:   s_mov_b32 s33, s32
588; FLATSCR: s_mov_b32 s33, s0
589; GCN-NOT: v_readlane_b32 s33, v40
590; MUBUF:   buffer_load_dword [[TMP_VGPR2:v[0-9]+]], off, s[0:3], s32 offset:[[OFF]]
591; MUBUF:   v_readfirstlane_b32 s33, [[TMP_VGPR2]]
592; MUBUF:   s_or_saveexec_b64 [[COPY_EXEC2:s\[[0-9]+:[0-9]+\]]], -1{{$}}
593; MUBUF:   s_mov_b64 exec, [[COPY_EXEC2]]
594; GCN:     s_setpc_b64
595define void @callee_need_to_spill_fp_to_memory_full_reserved_vgpr() #3 {
596  call void asm sideeffect "; clobber nonpreserved SGPRs and 64 CSRs",
597    "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
598    ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
599    ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
600    ,~{s40},~{s41},~{s42},~{s43},~{s44},~{s45},~{s46},~{s47},~{s48},~{s49}
601    ,~{s50},~{s51},~{s52},~{s53},~{s54},~{s55},~{s56},~{s57},~{s58},~{s59}
602    ,~{s60},~{s61},~{s62},~{s63},~{s64},~{s65},~{s66},~{s67},~{s68},~{s69}
603    ,~{s70},~{s71},~{s72},~{s73},~{s74},~{s75},~{s76},~{s77},~{s78},~{s79}
604    ,~{s80},~{s81},~{s82},~{s83},~{s84},~{s85},~{s86},~{s87},~{s88},~{s89}
605    ,~{s90},~{s91},~{s92},~{s93},~{s94},~{s95},~{s96},~{s97},~{s98},~{s99}
606    ,~{s100},~{s101},~{s102},~{s39},~{vcc}"()
607
608  call void asm sideeffect "; clobber all VGPRs except CSR v40",
609    "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
610    ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
611    ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
612    ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38}"()
613  ret void
614}
615
616; When flat-scratch is enabled, we save the FP to s0. At the same time,
617; the exec register is saved to s0 when saving CSR in the function prolog.
618; Make sure that the FP save happens after restoring exec from the same
619; register.
620; GCN-LABEL: {{^}}callee_need_to_spill_fp_to_reg:
621; GCN-NOT: v_writelane_b32 v40, s33
622; FLATSCR: s_or_saveexec_b64 s[0:1], -1
623; FLATSCR: s_mov_b64 exec, s[0:1]
624; FLATSCR: s_mov_b32 s0, s33
625; FLATSCR: s_mov_b32 s33, s32
626; FLATSCR: s_mov_b32 s33, s0
627; FLATSCR: s_or_saveexec_b64 s[0:1], -1
628; GCN-NOT: v_readlane_b32 s33, v40
629; GCN:     s_setpc_b64
630define void @callee_need_to_spill_fp_to_reg() #1 {
631  call void asm sideeffect "; clobber nonpreserved SGPRs and 64 CSRs",
632    "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
633    ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
634    ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
635    ,~{s40},~{s41},~{s42},~{s43},~{s44},~{s45},~{s46},~{s47},~{s48},~{s49}
636    ,~{s50},~{s51},~{s52},~{s53},~{s54},~{s55},~{s56},~{s57},~{s58},~{s59}
637    ,~{s60},~{s61},~{s62},~{s63},~{s64},~{s65},~{s66},~{s67},~{s68},~{s69}
638    ,~{s70},~{s71},~{s72},~{s73},~{s74},~{s75},~{s76},~{s77},~{s78},~{s79}
639    ,~{s80},~{s81},~{s82},~{s83},~{s84},~{s85},~{s86},~{s87},~{s88},~{s89}
640    ,~{s90},~{s91},~{s92},~{s93},~{s94},~{s95},~{s96},~{s97},~{s98},~{s99}
641    ,~{s100},~{s101},~{s102},~{s39},~{vcc}"()
642
643  call void asm sideeffect "; clobber all VGPRs except CSR v40",
644    "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
645    ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
646    ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
647    ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38},~{v39}"()
648  ret void
649}
650
651; If the size of the offset exceeds the MUBUF offset field we need another
652; scratch VGPR to hold the offset.
653; GCN-LABEL: {{^}}spill_fp_to_memory_scratch_reg_needed_mubuf_offset
654; MUBUF: s_or_saveexec_b64 s[4:5], -1
655; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40100
656; MUBUF-NEXT: buffer_store_dword v39, off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
657; MUBUF: v_mov_b32_e32 v0, s33
658; GCN-NOT: v_mov_b32_e32 v0, 0x100c
659; MUBUF-NEXT: s_add_i32 [[SCRATCH_SGPR:s[0-9]+]], s32, 0x40200
660; MUBUF: buffer_store_dword v0, off, s[0:3], [[SCRATCH_SGPR]] ; 4-byte Folded Spill
661; FLATSCR: v_mov_b32_e32 v0, 0
662; FLATSCR: s_add_i32 [[SOFF:s[0-9]+]], s33, 0x1000
663; FLATSCR: scratch_store_dword off, v0, [[SOFF]]
664define void @spill_fp_to_memory_scratch_reg_needed_mubuf_offset([4096 x i8] addrspace(5)* byval([4096 x i8]) align 4 %arg) #3 {
665  %alloca = alloca i32, addrspace(5)
666  store volatile i32 0, i32 addrspace(5)* %alloca
667
668  call void asm sideeffect "; clobber nonpreserved SGPRs and 64 CSRs",
669    "~{s4},~{s5},~{s6},~{s7},~{s8},~{s9}
670    ,~{s10},~{s11},~{s12},~{s13},~{s14},~{s15},~{s16},~{s17},~{s18},~{s19}
671    ,~{s20},~{s21},~{s22},~{s23},~{s24},~{s25},~{s26},~{s27},~{s28},~{s29}
672    ,~{s40},~{s41},~{s42},~{s43},~{s44},~{s45},~{s46},~{s47},~{s48},~{s49}
673    ,~{s50},~{s51},~{s52},~{s53},~{s54},~{s55},~{s56},~{s57},~{s58},~{s59}
674    ,~{s60},~{s61},~{s62},~{s63},~{s64},~{s65},~{s66},~{s67},~{s68},~{s69}
675    ,~{s70},~{s71},~{s72},~{s73},~{s74},~{s75},~{s76},~{s77},~{s78},~{s79}
676    ,~{s80},~{s81},~{s82},~{s83},~{s84},~{s85},~{s86},~{s87},~{s88},~{s89}
677    ,~{s90},~{s91},~{s92},~{s93},~{s94},~{s95},~{s96},~{s97},~{s98},~{s99}
678    ,~{s100},~{s101},~{s102},~{s39},~{vcc}"()
679
680  call void asm sideeffect "; clobber all VGPRs except CSR v40",
681    "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9}
682    ,~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19}
683    ,~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}
684    ,~{v30},~{v31},~{v32},~{v33},~{v34},~{v35},~{v36},~{v37},~{v38}"()
685  ret void
686}
687
688attributes #0 = { nounwind }
689attributes #1 = { nounwind "frame-pointer"="all" }
690attributes #2 = { nounwind "frame-pointer"="non-leaf" }
691attributes #3 = { nounwind "frame-pointer"="all" "amdgpu-waves-per-eu"="6,6" }
692