1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefix=GFX803 %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX900 %s
4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX1010 %s
5
6define amdgpu_kernel void @test_kern_empty() local_unnamed_addr #0 {
7; GFX803-LABEL: test_kern_empty:
8; GFX803:       ; %bb.0: ; %entry
9; GFX803-NEXT:    s_endpgm
10;
11; GFX900-LABEL: test_kern_empty:
12; GFX900:       ; %bb.0: ; %entry
13; GFX900-NEXT:    s_endpgm
14;
15; GFX1010-LABEL: test_kern_empty:
16; GFX1010:       ; %bb.0: ; %entry
17; GFX1010-NEXT:    s_endpgm
18entry:
19  ret void
20}
21
22define amdgpu_kernel void @test_kern_stack() local_unnamed_addr #0 {
23; GFX803-LABEL: test_kern_stack:
24; GFX803:       ; %bb.0: ; %entry
25; GFX803-NEXT:    s_add_u32 s0, s0, s7
26; GFX803-NEXT:    s_addc_u32 s1, s1, 0
27; GFX803-NEXT:    v_mov_b32_e32 v0, 0
28; GFX803-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
29; GFX803-NEXT:    s_waitcnt vmcnt(0)
30; GFX803-NEXT:    s_endpgm
31;
32; GFX900-LABEL: test_kern_stack:
33; GFX900:       ; %bb.0: ; %entry
34; GFX900-NEXT:    s_add_u32 s0, s0, s7
35; GFX900-NEXT:    s_addc_u32 s1, s1, 0
36; GFX900-NEXT:    v_mov_b32_e32 v0, 0
37; GFX900-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
38; GFX900-NEXT:    s_waitcnt vmcnt(0)
39; GFX900-NEXT:    s_endpgm
40;
41; GFX1010-LABEL: test_kern_stack:
42; GFX1010:       ; %bb.0: ; %entry
43; GFX1010-NEXT:    v_mov_b32_e32 v0, 0
44; GFX1010-NEXT:    s_add_u32 s0, s0, s7
45; GFX1010-NEXT:    s_addc_u32 s1, s1, 0
46; GFX1010-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
47; GFX1010-NEXT:    s_waitcnt_vscnt null, 0x0
48; GFX1010-NEXT:    s_endpgm
49entry:
50  %x = alloca i32, align 4, addrspace(5)
51  store volatile i32 0, i32 addrspace(5)* %x, align 4
52  ret void
53}
54
55define amdgpu_kernel void @test_kern_call() local_unnamed_addr #0 {
56; GFX803-LABEL: test_kern_call:
57; GFX803:       ; %bb.0: ; %entry
58; GFX803-NEXT:    s_add_i32 s10, s10, s15
59; GFX803-NEXT:    s_lshr_b32 flat_scratch_hi, s10, 8
60; GFX803-NEXT:    s_add_u32 s0, s0, s15
61; GFX803-NEXT:    s_addc_u32 s1, s1, 0
62; GFX803-NEXT:    s_mov_b32 s32, 0
63; GFX803-NEXT:    s_mov_b32 flat_scratch_lo, s11
64; GFX803-NEXT:    s_getpc_b64 s[4:5]
65; GFX803-NEXT:    s_add_u32 s4, s4, ex@rel32@lo+4
66; GFX803-NEXT:    s_addc_u32 s5, s5, ex@rel32@hi+12
67; GFX803-NEXT:    s_swappc_b64 s[30:31], s[4:5]
68; GFX803-NEXT:    s_endpgm
69;
70; GFX900-LABEL: test_kern_call:
71; GFX900:       ; %bb.0: ; %entry
72; GFX900-NEXT:    s_add_u32 flat_scratch_lo, s10, s15
73; GFX900-NEXT:    s_addc_u32 flat_scratch_hi, s11, 0
74; GFX900-NEXT:    s_add_u32 s0, s0, s15
75; GFX900-NEXT:    s_addc_u32 s1, s1, 0
76; GFX900-NEXT:    s_mov_b32 s32, 0
77; GFX900-NEXT:    s_getpc_b64 s[4:5]
78; GFX900-NEXT:    s_add_u32 s4, s4, ex@rel32@lo+4
79; GFX900-NEXT:    s_addc_u32 s5, s5, ex@rel32@hi+12
80; GFX900-NEXT:    s_swappc_b64 s[30:31], s[4:5]
81; GFX900-NEXT:    s_endpgm
82;
83; GFX1010-LABEL: test_kern_call:
84; GFX1010:       ; %bb.0: ; %entry
85; GFX1010-NEXT:    s_add_u32 s10, s10, s15
86; GFX1010-NEXT:    s_mov_b32 s32, 0
87; GFX1010-NEXT:    s_addc_u32 s11, s11, 0
88; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
89; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
90; GFX1010-NEXT:    s_add_u32 s0, s0, s15
91; GFX1010-NEXT:    s_addc_u32 s1, s1, 0
92; GFX1010-NEXT:    s_getpc_b64 s[4:5]
93; GFX1010-NEXT:    s_add_u32 s4, s4, ex@rel32@lo+4
94; GFX1010-NEXT:    s_addc_u32 s5, s5, ex@rel32@hi+12
95; GFX1010-NEXT:    s_swappc_b64 s[30:31], s[4:5]
96; GFX1010-NEXT:    s_endpgm
97entry:
98  tail call void @ex() #0
99  ret void
100}
101
102define amdgpu_kernel void @test_kern_stack_and_call() local_unnamed_addr #0 {
103; GFX803-LABEL: test_kern_stack_and_call:
104; GFX803:       ; %bb.0: ; %entry
105; GFX803-NEXT:    s_add_i32 s10, s10, s15
106; GFX803-NEXT:    s_lshr_b32 flat_scratch_hi, s10, 8
107; GFX803-NEXT:    s_add_u32 s0, s0, s15
108; GFX803-NEXT:    s_addc_u32 s1, s1, 0
109; GFX803-NEXT:    v_mov_b32_e32 v0, 0
110; GFX803-NEXT:    s_movk_i32 s32, 0x400
111; GFX803-NEXT:    s_mov_b32 flat_scratch_lo, s11
112; GFX803-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
113; GFX803-NEXT:    s_waitcnt vmcnt(0)
114; GFX803-NEXT:    s_getpc_b64 s[4:5]
115; GFX803-NEXT:    s_add_u32 s4, s4, ex@rel32@lo+4
116; GFX803-NEXT:    s_addc_u32 s5, s5, ex@rel32@hi+12
117; GFX803-NEXT:    s_swappc_b64 s[30:31], s[4:5]
118; GFX803-NEXT:    s_endpgm
119;
120; GFX900-LABEL: test_kern_stack_and_call:
121; GFX900:       ; %bb.0: ; %entry
122; GFX900-NEXT:    s_add_u32 flat_scratch_lo, s10, s15
123; GFX900-NEXT:    s_addc_u32 flat_scratch_hi, s11, 0
124; GFX900-NEXT:    s_add_u32 s0, s0, s15
125; GFX900-NEXT:    s_addc_u32 s1, s1, 0
126; GFX900-NEXT:    v_mov_b32_e32 v0, 0
127; GFX900-NEXT:    s_movk_i32 s32, 0x400
128; GFX900-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
129; GFX900-NEXT:    s_waitcnt vmcnt(0)
130; GFX900-NEXT:    s_getpc_b64 s[4:5]
131; GFX900-NEXT:    s_add_u32 s4, s4, ex@rel32@lo+4
132; GFX900-NEXT:    s_addc_u32 s5, s5, ex@rel32@hi+12
133; GFX900-NEXT:    s_swappc_b64 s[30:31], s[4:5]
134; GFX900-NEXT:    s_endpgm
135;
136; GFX1010-LABEL: test_kern_stack_and_call:
137; GFX1010:       ; %bb.0: ; %entry
138; GFX1010-NEXT:    s_add_u32 s10, s10, s15
139; GFX1010-NEXT:    s_movk_i32 s32, 0x200
140; GFX1010-NEXT:    s_addc_u32 s11, s11, 0
141; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
142; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
143; GFX1010-NEXT:    v_mov_b32_e32 v0, 0
144; GFX1010-NEXT:    s_add_u32 s0, s0, s15
145; GFX1010-NEXT:    s_addc_u32 s1, s1, 0
146; GFX1010-NEXT:    s_getpc_b64 s[4:5]
147; GFX1010-NEXT:    s_add_u32 s4, s4, ex@rel32@lo+4
148; GFX1010-NEXT:    s_addc_u32 s5, s5, ex@rel32@hi+12
149; GFX1010-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
150; GFX1010-NEXT:    s_waitcnt_vscnt null, 0x0
151; GFX1010-NEXT:    s_swappc_b64 s[30:31], s[4:5]
152; GFX1010-NEXT:    s_endpgm
153entry:
154  %x = alloca i32, align 4, addrspace(5)
155  store volatile i32 0, i32 addrspace(5)* %x, align 4
156  tail call void @ex() #0
157  ret void
158}
159
160define amdgpu_kernel void @test_force_fp_kern_empty() local_unnamed_addr #2 {
161; GFX803-LABEL: test_force_fp_kern_empty:
162; GFX803:       ; %bb.0: ; %entry
163; GFX803-NEXT:    s_mov_b32 s33, 0
164; GFX803-NEXT:    s_endpgm
165;
166; GFX900-LABEL: test_force_fp_kern_empty:
167; GFX900:       ; %bb.0: ; %entry
168; GFX900-NEXT:    s_mov_b32 s33, 0
169; GFX900-NEXT:    s_endpgm
170;
171; GFX1010-LABEL: test_force_fp_kern_empty:
172; GFX1010:       ; %bb.0: ; %entry
173; GFX1010-NEXT:    s_mov_b32 s33, 0
174; GFX1010-NEXT:    s_endpgm
175entry:
176  ret void
177}
178
179define amdgpu_kernel void @test_force_fp_kern_stack() local_unnamed_addr #2 {
180; GFX803-LABEL: test_force_fp_kern_stack:
181; GFX803:       ; %bb.0: ; %entry
182; GFX803-NEXT:    s_add_u32 s0, s0, s7
183; GFX803-NEXT:    s_mov_b32 s33, 0
184; GFX803-NEXT:    s_addc_u32 s1, s1, 0
185; GFX803-NEXT:    v_mov_b32_e32 v0, 0
186; GFX803-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:4
187; GFX803-NEXT:    s_waitcnt vmcnt(0)
188; GFX803-NEXT:    s_endpgm
189;
190; GFX900-LABEL: test_force_fp_kern_stack:
191; GFX900:       ; %bb.0: ; %entry
192; GFX900-NEXT:    s_add_u32 s0, s0, s7
193; GFX900-NEXT:    s_mov_b32 s33, 0
194; GFX900-NEXT:    s_addc_u32 s1, s1, 0
195; GFX900-NEXT:    v_mov_b32_e32 v0, 0
196; GFX900-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:4
197; GFX900-NEXT:    s_waitcnt vmcnt(0)
198; GFX900-NEXT:    s_endpgm
199;
200; GFX1010-LABEL: test_force_fp_kern_stack:
201; GFX1010:       ; %bb.0: ; %entry
202; GFX1010-NEXT:    v_mov_b32_e32 v0, 0
203; GFX1010-NEXT:    s_add_u32 s0, s0, s7
204; GFX1010-NEXT:    s_mov_b32 s33, 0
205; GFX1010-NEXT:    s_addc_u32 s1, s1, 0
206; GFX1010-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:4
207; GFX1010-NEXT:    s_waitcnt_vscnt null, 0x0
208; GFX1010-NEXT:    s_endpgm
209entry:
210  %x = alloca i32, align 4, addrspace(5)
211  store volatile i32 0, i32 addrspace(5)* %x, align 4
212  ret void
213}
214
215define amdgpu_kernel void @test_force_fp_kern_call() local_unnamed_addr #2 {
216; GFX803-LABEL: test_force_fp_kern_call:
217; GFX803:       ; %bb.0: ; %entry
218; GFX803-NEXT:    s_add_i32 s10, s10, s15
219; GFX803-NEXT:    s_lshr_b32 flat_scratch_hi, s10, 8
220; GFX803-NEXT:    s_add_u32 s0, s0, s15
221; GFX803-NEXT:    s_addc_u32 s1, s1, 0
222; GFX803-NEXT:    s_mov_b32 s32, 0
223; GFX803-NEXT:    s_mov_b32 s33, 0
224; GFX803-NEXT:    s_mov_b32 flat_scratch_lo, s11
225; GFX803-NEXT:    s_getpc_b64 s[4:5]
226; GFX803-NEXT:    s_add_u32 s4, s4, ex@rel32@lo+4
227; GFX803-NEXT:    s_addc_u32 s5, s5, ex@rel32@hi+12
228; GFX803-NEXT:    s_swappc_b64 s[30:31], s[4:5]
229; GFX803-NEXT:    s_endpgm
230;
231; GFX900-LABEL: test_force_fp_kern_call:
232; GFX900:       ; %bb.0: ; %entry
233; GFX900-NEXT:    s_add_u32 flat_scratch_lo, s10, s15
234; GFX900-NEXT:    s_addc_u32 flat_scratch_hi, s11, 0
235; GFX900-NEXT:    s_add_u32 s0, s0, s15
236; GFX900-NEXT:    s_addc_u32 s1, s1, 0
237; GFX900-NEXT:    s_mov_b32 s32, 0
238; GFX900-NEXT:    s_mov_b32 s33, 0
239; GFX900-NEXT:    s_getpc_b64 s[4:5]
240; GFX900-NEXT:    s_add_u32 s4, s4, ex@rel32@lo+4
241; GFX900-NEXT:    s_addc_u32 s5, s5, ex@rel32@hi+12
242; GFX900-NEXT:    s_swappc_b64 s[30:31], s[4:5]
243; GFX900-NEXT:    s_endpgm
244;
245; GFX1010-LABEL: test_force_fp_kern_call:
246; GFX1010:       ; %bb.0: ; %entry
247; GFX1010-NEXT:    s_add_u32 s10, s10, s15
248; GFX1010-NEXT:    s_mov_b32 s32, 0
249; GFX1010-NEXT:    s_mov_b32 s33, 0
250; GFX1010-NEXT:    s_addc_u32 s11, s11, 0
251; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
252; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
253; GFX1010-NEXT:    s_add_u32 s0, s0, s15
254; GFX1010-NEXT:    s_addc_u32 s1, s1, 0
255; GFX1010-NEXT:    s_getpc_b64 s[4:5]
256; GFX1010-NEXT:    s_add_u32 s4, s4, ex@rel32@lo+4
257; GFX1010-NEXT:    s_addc_u32 s5, s5, ex@rel32@hi+12
258; GFX1010-NEXT:    s_swappc_b64 s[30:31], s[4:5]
259; GFX1010-NEXT:    s_endpgm
260entry:
261  tail call void @ex() #2
262  ret void
263}
264
265define amdgpu_kernel void @test_force_fp_kern_stack_and_call() local_unnamed_addr #2 {
266; GFX803-LABEL: test_force_fp_kern_stack_and_call:
267; GFX803:       ; %bb.0: ; %entry
268; GFX803-NEXT:    s_add_i32 s10, s10, s15
269; GFX803-NEXT:    s_lshr_b32 flat_scratch_hi, s10, 8
270; GFX803-NEXT:    s_add_u32 s0, s0, s15
271; GFX803-NEXT:    s_mov_b32 s33, 0
272; GFX803-NEXT:    s_addc_u32 s1, s1, 0
273; GFX803-NEXT:    v_mov_b32_e32 v0, 0
274; GFX803-NEXT:    s_movk_i32 s32, 0x400
275; GFX803-NEXT:    s_mov_b32 flat_scratch_lo, s11
276; GFX803-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:4
277; GFX803-NEXT:    s_waitcnt vmcnt(0)
278; GFX803-NEXT:    s_getpc_b64 s[4:5]
279; GFX803-NEXT:    s_add_u32 s4, s4, ex@rel32@lo+4
280; GFX803-NEXT:    s_addc_u32 s5, s5, ex@rel32@hi+12
281; GFX803-NEXT:    s_swappc_b64 s[30:31], s[4:5]
282; GFX803-NEXT:    s_endpgm
283;
284; GFX900-LABEL: test_force_fp_kern_stack_and_call:
285; GFX900:       ; %bb.0: ; %entry
286; GFX900-NEXT:    s_add_u32 flat_scratch_lo, s10, s15
287; GFX900-NEXT:    s_addc_u32 flat_scratch_hi, s11, 0
288; GFX900-NEXT:    s_add_u32 s0, s0, s15
289; GFX900-NEXT:    s_mov_b32 s33, 0
290; GFX900-NEXT:    s_addc_u32 s1, s1, 0
291; GFX900-NEXT:    v_mov_b32_e32 v0, 0
292; GFX900-NEXT:    s_movk_i32 s32, 0x400
293; GFX900-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:4
294; GFX900-NEXT:    s_waitcnt vmcnt(0)
295; GFX900-NEXT:    s_getpc_b64 s[4:5]
296; GFX900-NEXT:    s_add_u32 s4, s4, ex@rel32@lo+4
297; GFX900-NEXT:    s_addc_u32 s5, s5, ex@rel32@hi+12
298; GFX900-NEXT:    s_swappc_b64 s[30:31], s[4:5]
299; GFX900-NEXT:    s_endpgm
300;
301; GFX1010-LABEL: test_force_fp_kern_stack_and_call:
302; GFX1010:       ; %bb.0: ; %entry
303; GFX1010-NEXT:    s_add_u32 s10, s10, s15
304; GFX1010-NEXT:    s_movk_i32 s32, 0x200
305; GFX1010-NEXT:    s_mov_b32 s33, 0
306; GFX1010-NEXT:    s_addc_u32 s11, s11, 0
307; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s10
308; GFX1010-NEXT:    s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s11
309; GFX1010-NEXT:    v_mov_b32_e32 v0, 0
310; GFX1010-NEXT:    s_add_u32 s0, s0, s15
311; GFX1010-NEXT:    s_addc_u32 s1, s1, 0
312; GFX1010-NEXT:    s_getpc_b64 s[4:5]
313; GFX1010-NEXT:    s_add_u32 s4, s4, ex@rel32@lo+4
314; GFX1010-NEXT:    s_addc_u32 s5, s5, ex@rel32@hi+12
315; GFX1010-NEXT:    buffer_store_dword v0, off, s[0:3], s33 offset:4
316; GFX1010-NEXT:    s_waitcnt_vscnt null, 0x0
317; GFX1010-NEXT:    s_swappc_b64 s[30:31], s[4:5]
318; GFX1010-NEXT:    s_endpgm
319entry:
320  %x = alloca i32, align 4, addrspace(5)
321  store volatile i32 0, i32 addrspace(5)* %x, align 4
322  tail call void @ex() #2
323  ret void
324}
325
326define amdgpu_kernel void @test_sgpr_offset_kernel() #1 {
327; GFX803-LABEL: test_sgpr_offset_kernel:
328; GFX803:       ; %bb.0: ; %entry
329; GFX803-NEXT:    s_add_u32 s0, s0, s7
330; GFX803-NEXT:    s_addc_u32 s1, s1, 0
331; GFX803-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:8 glc
332; GFX803-NEXT:    s_waitcnt vmcnt(0)
333; GFX803-NEXT:    s_mov_b32 s4, 0x40000
334; GFX803-NEXT:    buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
335; GFX803-NEXT:    ;;#ASMSTART
336; GFX803-NEXT:    ;;#ASMEND
337; GFX803-NEXT:    s_mov_b32 s4, 0x40000
338; GFX803-NEXT:    buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
339; GFX803-NEXT:    s_waitcnt vmcnt(0)
340; GFX803-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:8
341; GFX803-NEXT:    s_waitcnt vmcnt(0)
342; GFX803-NEXT:    s_endpgm
343;
344; GFX900-LABEL: test_sgpr_offset_kernel:
345; GFX900:       ; %bb.0: ; %entry
346; GFX900-NEXT:    s_add_u32 s0, s0, s7
347; GFX900-NEXT:    s_addc_u32 s1, s1, 0
348; GFX900-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:8 glc
349; GFX900-NEXT:    s_waitcnt vmcnt(0)
350; GFX900-NEXT:    s_mov_b32 s4, 0x40000
351; GFX900-NEXT:    buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
352; GFX900-NEXT:    ;;#ASMSTART
353; GFX900-NEXT:    ;;#ASMEND
354; GFX900-NEXT:    s_mov_b32 s4, 0x40000
355; GFX900-NEXT:    buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
356; GFX900-NEXT:    s_waitcnt vmcnt(0)
357; GFX900-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:8
358; GFX900-NEXT:    s_waitcnt vmcnt(0)
359; GFX900-NEXT:    s_endpgm
360;
361; GFX1010-LABEL: test_sgpr_offset_kernel:
362; GFX1010:       ; %bb.0: ; %entry
363; GFX1010-NEXT:    s_add_u32 s0, s0, s7
364; GFX1010-NEXT:    s_addc_u32 s1, s1, 0
365; GFX1010-NEXT:    s_mov_b32 s4, 0x20000
366; GFX1010-NEXT:    buffer_load_dword v0, off, s[0:3], 0 offset:8 glc dlc
367; GFX1010-NEXT:    s_waitcnt vmcnt(0)
368; GFX1010-NEXT:    buffer_store_dword v0, off, s[0:3], s4 ; 4-byte Folded Spill
369; GFX1010-NEXT:    s_waitcnt_depctr 0xffe3
370; GFX1010-NEXT:    s_mov_b32 s4, 0x20000
371; GFX1010-NEXT:    ;;#ASMSTART
372; GFX1010-NEXT:    ;;#ASMEND
373; GFX1010-NEXT:    buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
374; GFX1010-NEXT:    s_waitcnt vmcnt(0)
375; GFX1010-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:8
376; GFX1010-NEXT:    s_waitcnt_vscnt null, 0x0
377; GFX1010-NEXT:    s_endpgm
378entry:
379  ; Occupy 4096 bytes of scratch, so the offset of the spill of %a does not
380  ; fit in the instruction, and has to live in the SGPR offset.
381  %alloca = alloca i8, i32 4092, align 4, addrspace(5)
382  %buf = bitcast i8 addrspace(5)* %alloca to i32 addrspace(5)*
383
384  %aptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
385  ; 0x40000 / 64 = 4096 (for wave64)
386  ; CHECK: s_add_u32 s6, s7, 0x40000
387  ; CHECK: buffer_store_dword v{{[0-9]+}}, off, s[{{[0-9]+:[0-9]+}}], s6 ; 4-byte Folded Spill
388  %a = load volatile i32, i32 addrspace(5)* %aptr
389
390  ; Force %a to spill
391  call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}" ()
392
393  %outptr = getelementptr i32, i32 addrspace(5)* %buf, i32 1
394  store volatile i32 %a, i32 addrspace(5)* %outptr
395
396  ret void
397}
398
399declare hidden void @ex() local_unnamed_addr #0
400
401attributes #0 = { nounwind }
402attributes #1 = { nounwind "amdgpu-num-vgpr"="8" }
403attributes #2 = { nounwind "frame-pointer"="all" }
404