1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji --amdhsa-code-object-version=3 < %s | FileCheck -check-prefix=VI %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck -check-prefix=GFX9 %s
3
4; Make sure the stack is never realigned for entry functions.
5
6define amdgpu_kernel void @max_alignment_128() #0 {
7; VI-LABEL: max_alignment_128:
8; VI:       ; %bb.0:
9; VI-NEXT:    s_add_u32 s0, s0, s7
10; VI-NEXT:    s_addc_u32 s1, s1, 0
11; VI-NEXT:    v_mov_b32_e32 v0, 9
12; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:128
13; VI-NEXT:    s_waitcnt vmcnt(0)
14; VI-NEXT:    s_endpgm
15; VI-NEXT:    .section .rodata,#alloc
16; VI-NEXT:    .p2align 6
17; VI-NEXT:    .amdhsa_kernel max_alignment_128
18; VI-NEXT:     .amdhsa_group_segment_fixed_size 0
19; VI-NEXT:     .amdhsa_private_segment_fixed_size 256
20; VI-NEXT:     .amdhsa_kernarg_size 0
21; VI-NEXT:     .amdhsa_user_sgpr_count 6
22; VI-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
23; VI-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
24; VI-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
25; VI-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
26; VI-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
27; VI-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
28; VI-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
29; VI-NEXT:     .amdhsa_uses_dynamic_stack 0
30; VI-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
31; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
32; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
33; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
34; VI-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
35; VI-NEXT:     .amdhsa_system_vgpr_workitem_id 0
36; VI-NEXT:     .amdhsa_next_free_vgpr 1
37; VI-NEXT:     .amdhsa_next_free_sgpr 8
38; VI-NEXT:     .amdhsa_reserve_vcc 0
39; VI-NEXT:     .amdhsa_reserve_flat_scratch 0
40; VI-NEXT:     .amdhsa_float_round_mode_32 0
41; VI-NEXT:     .amdhsa_float_round_mode_16_64 0
42; VI-NEXT:     .amdhsa_float_denorm_mode_32 3
43; VI-NEXT:     .amdhsa_float_denorm_mode_16_64 3
44; VI-NEXT:     .amdhsa_dx10_clamp 1
45; VI-NEXT:     .amdhsa_ieee_mode 1
46; VI-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
47; VI-NEXT:     .amdhsa_exception_fp_denorm_src 0
48; VI-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
49; VI-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
50; VI-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
51; VI-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
52; VI-NEXT:     .amdhsa_exception_int_div_zero 0
53; VI-NEXT:    .end_amdhsa_kernel
54; VI-NEXT:    .text
55;
56; GFX9-LABEL: max_alignment_128:
57; GFX9:       ; %bb.0:
58; GFX9-NEXT:    s_add_u32 s0, s0, s7
59; GFX9-NEXT:    s_addc_u32 s1, s1, 0
60; GFX9-NEXT:    v_mov_b32_e32 v0, 9
61; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:128
62; GFX9-NEXT:    s_waitcnt vmcnt(0)
63; GFX9-NEXT:    s_endpgm
64; GFX9-NEXT:    .section .rodata,#alloc
65; GFX9-NEXT:    .p2align 6
66; GFX9-NEXT:    .amdhsa_kernel max_alignment_128
67; GFX9-NEXT:     .amdhsa_group_segment_fixed_size 0
68; GFX9-NEXT:     .amdhsa_private_segment_fixed_size 256
69; GFX9-NEXT:     .amdhsa_kernarg_size 0
70; GFX9-NEXT:     .amdhsa_user_sgpr_count 6
71; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
72; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
73; GFX9-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
74; GFX9-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
75; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
76; GFX9-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
77; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
78; GFX9-NEXT:     .amdhsa_uses_dynamic_stack 0
79; GFX9-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
80; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
81; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
82; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
83; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
84; GFX9-NEXT:     .amdhsa_system_vgpr_workitem_id 0
85; GFX9-NEXT:     .amdhsa_next_free_vgpr 1
86; GFX9-NEXT:     .amdhsa_next_free_sgpr 8
87; GFX9-NEXT:     .amdhsa_reserve_vcc 0
88; GFX9-NEXT:     .amdhsa_reserve_flat_scratch 0
89; GFX9-NEXT:     .amdhsa_reserve_xnack_mask 1
90; GFX9-NEXT:     .amdhsa_float_round_mode_32 0
91; GFX9-NEXT:     .amdhsa_float_round_mode_16_64 0
92; GFX9-NEXT:     .amdhsa_float_denorm_mode_32 3
93; GFX9-NEXT:     .amdhsa_float_denorm_mode_16_64 3
94; GFX9-NEXT:     .amdhsa_dx10_clamp 1
95; GFX9-NEXT:     .amdhsa_ieee_mode 1
96; GFX9-NEXT:     .amdhsa_fp16_overflow 0
97; GFX9-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
98; GFX9-NEXT:     .amdhsa_exception_fp_denorm_src 0
99; GFX9-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
100; GFX9-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
101; GFX9-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
102; GFX9-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
103; GFX9-NEXT:     .amdhsa_exception_int_div_zero 0
104; GFX9-NEXT:    .end_amdhsa_kernel
105; GFX9-NEXT:    .text
106  %alloca.align = alloca i32, align 128, addrspace(5)
107  store volatile i32 9, i32 addrspace(5)* %alloca.align, align 128
108  ret void
109}
110
111define amdgpu_kernel void @stackrealign_attr() #1 {
112; VI-LABEL: stackrealign_attr:
113; VI:       ; %bb.0:
114; VI-NEXT:    s_add_u32 s0, s0, s7
115; VI-NEXT:    s_addc_u32 s1, s1, 0
116; VI-NEXT:    v_mov_b32_e32 v0, 9
117; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
118; VI-NEXT:    s_waitcnt vmcnt(0)
119; VI-NEXT:    s_endpgm
120; VI-NEXT:    .section .rodata,#alloc
121; VI-NEXT:    .p2align 6
122; VI-NEXT:    .amdhsa_kernel stackrealign_attr
123; VI-NEXT:     .amdhsa_group_segment_fixed_size 0
124; VI-NEXT:     .amdhsa_private_segment_fixed_size 8
125; VI-NEXT:     .amdhsa_kernarg_size 0
126; VI-NEXT:     .amdhsa_user_sgpr_count 6
127; VI-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
128; VI-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
129; VI-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
130; VI-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
131; VI-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
132; VI-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
133; VI-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
134; VI-NEXT:     .amdhsa_uses_dynamic_stack 0
135; VI-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
136; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
137; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
138; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
139; VI-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
140; VI-NEXT:     .amdhsa_system_vgpr_workitem_id 0
141; VI-NEXT:     .amdhsa_next_free_vgpr 1
142; VI-NEXT:     .amdhsa_next_free_sgpr 8
143; VI-NEXT:     .amdhsa_reserve_vcc 0
144; VI-NEXT:     .amdhsa_reserve_flat_scratch 0
145; VI-NEXT:     .amdhsa_float_round_mode_32 0
146; VI-NEXT:     .amdhsa_float_round_mode_16_64 0
147; VI-NEXT:     .amdhsa_float_denorm_mode_32 3
148; VI-NEXT:     .amdhsa_float_denorm_mode_16_64 3
149; VI-NEXT:     .amdhsa_dx10_clamp 1
150; VI-NEXT:     .amdhsa_ieee_mode 1
151; VI-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
152; VI-NEXT:     .amdhsa_exception_fp_denorm_src 0
153; VI-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
154; VI-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
155; VI-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
156; VI-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
157; VI-NEXT:     .amdhsa_exception_int_div_zero 0
158; VI-NEXT:    .end_amdhsa_kernel
159; VI-NEXT:    .text
160;
161; GFX9-LABEL: stackrealign_attr:
162; GFX9:       ; %bb.0:
163; GFX9-NEXT:    s_add_u32 s0, s0, s7
164; GFX9-NEXT:    s_addc_u32 s1, s1, 0
165; GFX9-NEXT:    v_mov_b32_e32 v0, 9
166; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
167; GFX9-NEXT:    s_waitcnt vmcnt(0)
168; GFX9-NEXT:    s_endpgm
169; GFX9-NEXT:    .section .rodata,#alloc
170; GFX9-NEXT:    .p2align 6
171; GFX9-NEXT:    .amdhsa_kernel stackrealign_attr
172; GFX9-NEXT:     .amdhsa_group_segment_fixed_size 0
173; GFX9-NEXT:     .amdhsa_private_segment_fixed_size 8
174; GFX9-NEXT:     .amdhsa_kernarg_size 0
175; GFX9-NEXT:     .amdhsa_user_sgpr_count 6
176; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
177; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
178; GFX9-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
179; GFX9-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
180; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
181; GFX9-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
182; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
183; GFX9-NEXT:     .amdhsa_uses_dynamic_stack 0
184; GFX9-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
185; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
186; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
187; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
188; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
189; GFX9-NEXT:     .amdhsa_system_vgpr_workitem_id 0
190; GFX9-NEXT:     .amdhsa_next_free_vgpr 1
191; GFX9-NEXT:     .amdhsa_next_free_sgpr 8
192; GFX9-NEXT:     .amdhsa_reserve_vcc 0
193; GFX9-NEXT:     .amdhsa_reserve_flat_scratch 0
194; GFX9-NEXT:     .amdhsa_reserve_xnack_mask 1
195; GFX9-NEXT:     .amdhsa_float_round_mode_32 0
196; GFX9-NEXT:     .amdhsa_float_round_mode_16_64 0
197; GFX9-NEXT:     .amdhsa_float_denorm_mode_32 3
198; GFX9-NEXT:     .amdhsa_float_denorm_mode_16_64 3
199; GFX9-NEXT:     .amdhsa_dx10_clamp 1
200; GFX9-NEXT:     .amdhsa_ieee_mode 1
201; GFX9-NEXT:     .amdhsa_fp16_overflow 0
202; GFX9-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
203; GFX9-NEXT:     .amdhsa_exception_fp_denorm_src 0
204; GFX9-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
205; GFX9-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
206; GFX9-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
207; GFX9-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
208; GFX9-NEXT:     .amdhsa_exception_int_div_zero 0
209; GFX9-NEXT:    .end_amdhsa_kernel
210; GFX9-NEXT:    .text
211  %alloca.align = alloca i32, align 4, addrspace(5)
212  store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4
213  ret void
214}
215
216define amdgpu_kernel void @alignstack_attr() #2 {
217; VI-LABEL: alignstack_attr:
218; VI:       ; %bb.0:
219; VI-NEXT:    s_add_u32 s0, s0, s7
220; VI-NEXT:    s_addc_u32 s1, s1, 0
221; VI-NEXT:    v_mov_b32_e32 v0, 9
222; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
223; VI-NEXT:    s_waitcnt vmcnt(0)
224; VI-NEXT:    s_endpgm
225; VI-NEXT:    .section .rodata,#alloc
226; VI-NEXT:    .p2align 6
227; VI-NEXT:    .amdhsa_kernel alignstack_attr
228; VI-NEXT:     .amdhsa_group_segment_fixed_size 0
229; VI-NEXT:     .amdhsa_private_segment_fixed_size 128
230; VI-NEXT:     .amdhsa_kernarg_size 0
231; VI-NEXT:     .amdhsa_user_sgpr_count 6
232; VI-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
233; VI-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
234; VI-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
235; VI-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
236; VI-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
237; VI-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
238; VI-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
239; VI-NEXT:     .amdhsa_uses_dynamic_stack 0
240; VI-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
241; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
242; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
243; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
244; VI-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
245; VI-NEXT:     .amdhsa_system_vgpr_workitem_id 0
246; VI-NEXT:     .amdhsa_next_free_vgpr 1
247; VI-NEXT:     .amdhsa_next_free_sgpr 8
248; VI-NEXT:     .amdhsa_reserve_vcc 0
249; VI-NEXT:     .amdhsa_reserve_flat_scratch 0
250; VI-NEXT:     .amdhsa_float_round_mode_32 0
251; VI-NEXT:     .amdhsa_float_round_mode_16_64 0
252; VI-NEXT:     .amdhsa_float_denorm_mode_32 3
253; VI-NEXT:     .amdhsa_float_denorm_mode_16_64 3
254; VI-NEXT:     .amdhsa_dx10_clamp 1
255; VI-NEXT:     .amdhsa_ieee_mode 1
256; VI-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
257; VI-NEXT:     .amdhsa_exception_fp_denorm_src 0
258; VI-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
259; VI-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
260; VI-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
261; VI-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
262; VI-NEXT:     .amdhsa_exception_int_div_zero 0
263; VI-NEXT:    .end_amdhsa_kernel
264; VI-NEXT:    .text
265;
266; GFX9-LABEL: alignstack_attr:
267; GFX9:       ; %bb.0:
268; GFX9-NEXT:    s_add_u32 s0, s0, s7
269; GFX9-NEXT:    s_addc_u32 s1, s1, 0
270; GFX9-NEXT:    v_mov_b32_e32 v0, 9
271; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0 offset:4
272; GFX9-NEXT:    s_waitcnt vmcnt(0)
273; GFX9-NEXT:    s_endpgm
274; GFX9-NEXT:    .section .rodata,#alloc
275; GFX9-NEXT:    .p2align 6
276; GFX9-NEXT:    .amdhsa_kernel alignstack_attr
277; GFX9-NEXT:     .amdhsa_group_segment_fixed_size 0
278; GFX9-NEXT:     .amdhsa_private_segment_fixed_size 128
279; GFX9-NEXT:     .amdhsa_kernarg_size 0
280; GFX9-NEXT:     .amdhsa_user_sgpr_count 6
281; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
282; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
283; GFX9-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
284; GFX9-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
285; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
286; GFX9-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
287; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
288; GFX9-NEXT:     .amdhsa_uses_dynamic_stack 0
289; GFX9-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
290; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
291; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
292; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
293; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
294; GFX9-NEXT:     .amdhsa_system_vgpr_workitem_id 0
295; GFX9-NEXT:     .amdhsa_next_free_vgpr 1
296; GFX9-NEXT:     .amdhsa_next_free_sgpr 8
297; GFX9-NEXT:     .amdhsa_reserve_vcc 0
298; GFX9-NEXT:     .amdhsa_reserve_flat_scratch 0
299; GFX9-NEXT:     .amdhsa_reserve_xnack_mask 1
300; GFX9-NEXT:     .amdhsa_float_round_mode_32 0
301; GFX9-NEXT:     .amdhsa_float_round_mode_16_64 0
302; GFX9-NEXT:     .amdhsa_float_denorm_mode_32 3
303; GFX9-NEXT:     .amdhsa_float_denorm_mode_16_64 3
304; GFX9-NEXT:     .amdhsa_dx10_clamp 1
305; GFX9-NEXT:     .amdhsa_ieee_mode 1
306; GFX9-NEXT:     .amdhsa_fp16_overflow 0
307; GFX9-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
308; GFX9-NEXT:     .amdhsa_exception_fp_denorm_src 0
309; GFX9-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
310; GFX9-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
311; GFX9-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
312; GFX9-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
313; GFX9-NEXT:     .amdhsa_exception_int_div_zero 0
314; GFX9-NEXT:    .end_amdhsa_kernel
315; GFX9-NEXT:    .text
316  %alloca.align = alloca i32, align 4, addrspace(5)
317  store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4
318  ret void
319}
320
321attributes #0 = { nounwind }
322attributes #1 = { nounwind "stackrealign" }
323attributes #2 = { nounwind alignstack=128 }
324