1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=VI %s
3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900  < %s | FileCheck -check-prefix=GFX9 %s
4
5; Make sure the stack is never realigned for entry functions.
6
7define amdgpu_kernel void @max_alignment_128() #0 {
8; VI-LABEL: max_alignment_128:
9; VI:       ; %bb.0:
10; VI-NEXT:    s_add_u32 s4, s4, s7
11; VI-NEXT:    v_mov_b32_e32 v0, 9
12; VI-NEXT:    s_mov_b32 flat_scratch_lo, s5
13; VI-NEXT:    s_lshr_b32 flat_scratch_hi, s4, 8
14; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s7 offset:128
15; VI-NEXT:    s_endpgm
16; VI-NEXT:    .section .rodata,#alloc
17; VI-NEXT:    .p2align 6
18; VI-NEXT:    .amdhsa_kernel max_alignment_128
19; VI-NEXT:     .amdhsa_group_segment_fixed_size 0
20; VI-NEXT:     .amdhsa_private_segment_fixed_size 256
21; VI-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
22; VI-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
23; VI-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
24; VI-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
25; VI-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
26; VI-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
27; VI-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
28; VI-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
29; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
30; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
31; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
32; VI-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
33; VI-NEXT:     .amdhsa_system_vgpr_workitem_id 0
34; VI-NEXT:     .amdhsa_next_free_vgpr 1
35; VI-NEXT:     .amdhsa_next_free_sgpr 8
36; VI-NEXT:     .amdhsa_reserve_vcc 0
37; VI-NEXT:     .amdhsa_float_round_mode_32 0
38; VI-NEXT:     .amdhsa_float_round_mode_16_64 0
39; VI-NEXT:     .amdhsa_float_denorm_mode_32 0
40; VI-NEXT:     .amdhsa_float_denorm_mode_16_64 3
41; VI-NEXT:     .amdhsa_dx10_clamp 1
42; VI-NEXT:     .amdhsa_ieee_mode 1
43; VI-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
44; VI-NEXT:     .amdhsa_exception_fp_denorm_src 0
45; VI-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
46; VI-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
47; VI-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
48; VI-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
49; VI-NEXT:     .amdhsa_exception_int_div_zero 0
50; VI-NEXT:    .end_amdhsa_kernel
51; VI-NEXT:    .text
52;
53; GFX9-LABEL: max_alignment_128:
54; GFX9:       ; %bb.0:
55; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s4, s7
56; GFX9-NEXT:    v_mov_b32_e32 v0, 9
57; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s5, 0
58; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s7 offset:128
59; GFX9-NEXT:    s_endpgm
60; GFX9-NEXT:    .section .rodata,#alloc
61; GFX9-NEXT:    .p2align 6
62; GFX9-NEXT:    .amdhsa_kernel max_alignment_128
63; GFX9-NEXT:     .amdhsa_group_segment_fixed_size 0
64; GFX9-NEXT:     .amdhsa_private_segment_fixed_size 256
65; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
66; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
67; GFX9-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
68; GFX9-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
69; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
70; GFX9-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
71; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
72; GFX9-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
73; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
74; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
75; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
76; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
77; GFX9-NEXT:     .amdhsa_system_vgpr_workitem_id 0
78; GFX9-NEXT:     .amdhsa_next_free_vgpr 1
79; GFX9-NEXT:     .amdhsa_next_free_sgpr 8
80; GFX9-NEXT:     .amdhsa_reserve_vcc 0
81; GFX9-NEXT:     .amdhsa_float_round_mode_32 0
82; GFX9-NEXT:     .amdhsa_float_round_mode_16_64 0
83; GFX9-NEXT:     .amdhsa_float_denorm_mode_32 0
84; GFX9-NEXT:     .amdhsa_float_denorm_mode_16_64 3
85; GFX9-NEXT:     .amdhsa_dx10_clamp 1
86; GFX9-NEXT:     .amdhsa_ieee_mode 1
87; GFX9-NEXT:     .amdhsa_fp16_overflow 0
88; GFX9-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
89; GFX9-NEXT:     .amdhsa_exception_fp_denorm_src 0
90; GFX9-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
91; GFX9-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
92; GFX9-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
93; GFX9-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
94; GFX9-NEXT:     .amdhsa_exception_int_div_zero 0
95; GFX9-NEXT:    .end_amdhsa_kernel
96; GFX9-NEXT:    .text
97  %alloca.align = alloca i32, align 128, addrspace(5)
98  store volatile i32 9, i32 addrspace(5)* %alloca.align, align 128
99  ret void
100}
101
102define amdgpu_kernel void @stackrealign_attr() #1 {
103; VI-LABEL: stackrealign_attr:
104; VI:       ; %bb.0:
105; VI-NEXT:    s_add_u32 s4, s4, s7
106; VI-NEXT:    v_mov_b32_e32 v0, 9
107; VI-NEXT:    s_mov_b32 flat_scratch_lo, s5
108; VI-NEXT:    s_lshr_b32 flat_scratch_hi, s4, 8
109; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s7 offset:4
110; VI-NEXT:    s_endpgm
111; VI-NEXT:    .section .rodata,#alloc
112; VI-NEXT:    .p2align 6
113; VI-NEXT:    .amdhsa_kernel stackrealign_attr
114; VI-NEXT:     .amdhsa_group_segment_fixed_size 0
115; VI-NEXT:     .amdhsa_private_segment_fixed_size 8
116; VI-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
117; VI-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
118; VI-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
119; VI-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
120; VI-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
121; VI-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
122; VI-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
123; VI-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
124; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
125; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
126; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
127; VI-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
128; VI-NEXT:     .amdhsa_system_vgpr_workitem_id 0
129; VI-NEXT:     .amdhsa_next_free_vgpr 1
130; VI-NEXT:     .amdhsa_next_free_sgpr 8
131; VI-NEXT:     .amdhsa_reserve_vcc 0
132; VI-NEXT:     .amdhsa_float_round_mode_32 0
133; VI-NEXT:     .amdhsa_float_round_mode_16_64 0
134; VI-NEXT:     .amdhsa_float_denorm_mode_32 0
135; VI-NEXT:     .amdhsa_float_denorm_mode_16_64 3
136; VI-NEXT:     .amdhsa_dx10_clamp 1
137; VI-NEXT:     .amdhsa_ieee_mode 1
138; VI-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
139; VI-NEXT:     .amdhsa_exception_fp_denorm_src 0
140; VI-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
141; VI-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
142; VI-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
143; VI-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
144; VI-NEXT:     .amdhsa_exception_int_div_zero 0
145; VI-NEXT:    .end_amdhsa_kernel
146; VI-NEXT:    .text
147;
148; GFX9-LABEL: stackrealign_attr:
149; GFX9:       ; %bb.0:
150; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s4, s7
151; GFX9-NEXT:    v_mov_b32_e32 v0, 9
152; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s5, 0
153; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s7 offset:4
154; GFX9-NEXT:    s_endpgm
155; GFX9-NEXT:    .section .rodata,#alloc
156; GFX9-NEXT:    .p2align 6
157; GFX9-NEXT:    .amdhsa_kernel stackrealign_attr
158; GFX9-NEXT:     .amdhsa_group_segment_fixed_size 0
159; GFX9-NEXT:     .amdhsa_private_segment_fixed_size 8
160; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
161; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
162; GFX9-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
163; GFX9-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
164; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
165; GFX9-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
166; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
167; GFX9-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
168; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
169; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
170; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
171; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
172; GFX9-NEXT:     .amdhsa_system_vgpr_workitem_id 0
173; GFX9-NEXT:     .amdhsa_next_free_vgpr 1
174; GFX9-NEXT:     .amdhsa_next_free_sgpr 8
175; GFX9-NEXT:     .amdhsa_reserve_vcc 0
176; GFX9-NEXT:     .amdhsa_float_round_mode_32 0
177; GFX9-NEXT:     .amdhsa_float_round_mode_16_64 0
178; GFX9-NEXT:     .amdhsa_float_denorm_mode_32 0
179; GFX9-NEXT:     .amdhsa_float_denorm_mode_16_64 3
180; GFX9-NEXT:     .amdhsa_dx10_clamp 1
181; GFX9-NEXT:     .amdhsa_ieee_mode 1
182; GFX9-NEXT:     .amdhsa_fp16_overflow 0
183; GFX9-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
184; GFX9-NEXT:     .amdhsa_exception_fp_denorm_src 0
185; GFX9-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
186; GFX9-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
187; GFX9-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
188; GFX9-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
189; GFX9-NEXT:     .amdhsa_exception_int_div_zero 0
190; GFX9-NEXT:    .end_amdhsa_kernel
191; GFX9-NEXT:    .text
192  %alloca.align = alloca i32, align 4, addrspace(5)
193  store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4
194  ret void
195}
196
197define amdgpu_kernel void @alignstack_attr() #2 {
198; VI-LABEL: alignstack_attr:
199; VI:       ; %bb.0:
200; VI-NEXT:    s_add_u32 s4, s4, s7
201; VI-NEXT:    v_mov_b32_e32 v0, 9
202; VI-NEXT:    s_mov_b32 flat_scratch_lo, s5
203; VI-NEXT:    s_lshr_b32 flat_scratch_hi, s4, 8
204; VI-NEXT:    buffer_store_dword v0, off, s[0:3], s7 offset:4
205; VI-NEXT:    s_endpgm
206; VI-NEXT:    .section .rodata,#alloc
207; VI-NEXT:    .p2align 6
208; VI-NEXT:    .amdhsa_kernel alignstack_attr
209; VI-NEXT:     .amdhsa_group_segment_fixed_size 0
210; VI-NEXT:     .amdhsa_private_segment_fixed_size 128
211; VI-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
212; VI-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
213; VI-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
214; VI-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
215; VI-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
216; VI-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
217; VI-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
218; VI-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
219; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
220; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
221; VI-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
222; VI-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
223; VI-NEXT:     .amdhsa_system_vgpr_workitem_id 0
224; VI-NEXT:     .amdhsa_next_free_vgpr 1
225; VI-NEXT:     .amdhsa_next_free_sgpr 8
226; VI-NEXT:     .amdhsa_reserve_vcc 0
227; VI-NEXT:     .amdhsa_float_round_mode_32 0
228; VI-NEXT:     .amdhsa_float_round_mode_16_64 0
229; VI-NEXT:     .amdhsa_float_denorm_mode_32 0
230; VI-NEXT:     .amdhsa_float_denorm_mode_16_64 3
231; VI-NEXT:     .amdhsa_dx10_clamp 1
232; VI-NEXT:     .amdhsa_ieee_mode 1
233; VI-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
234; VI-NEXT:     .amdhsa_exception_fp_denorm_src 0
235; VI-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
236; VI-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
237; VI-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
238; VI-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
239; VI-NEXT:     .amdhsa_exception_int_div_zero 0
240; VI-NEXT:    .end_amdhsa_kernel
241; VI-NEXT:    .text
242;
243; GFX9-LABEL: alignstack_attr:
244; GFX9:       ; %bb.0:
245; GFX9-NEXT:    s_add_u32 flat_scratch_lo, s4, s7
246; GFX9-NEXT:    v_mov_b32_e32 v0, 9
247; GFX9-NEXT:    s_addc_u32 flat_scratch_hi, s5, 0
248; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], s7 offset:4
249; GFX9-NEXT:    s_endpgm
250; GFX9-NEXT:    .section .rodata,#alloc
251; GFX9-NEXT:    .p2align 6
252; GFX9-NEXT:    .amdhsa_kernel alignstack_attr
253; GFX9-NEXT:     .amdhsa_group_segment_fixed_size 0
254; GFX9-NEXT:     .amdhsa_private_segment_fixed_size 128
255; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
256; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_ptr 0
257; GFX9-NEXT:     .amdhsa_user_sgpr_queue_ptr 0
258; GFX9-NEXT:     .amdhsa_user_sgpr_kernarg_segment_ptr 0
259; GFX9-NEXT:     .amdhsa_user_sgpr_dispatch_id 0
260; GFX9-NEXT:     .amdhsa_user_sgpr_flat_scratch_init 1
261; GFX9-NEXT:     .amdhsa_user_sgpr_private_segment_size 0
262; GFX9-NEXT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
263; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_x 1
264; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_y 0
265; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_id_z 0
266; GFX9-NEXT:     .amdhsa_system_sgpr_workgroup_info 0
267; GFX9-NEXT:     .amdhsa_system_vgpr_workitem_id 0
268; GFX9-NEXT:     .amdhsa_next_free_vgpr 1
269; GFX9-NEXT:     .amdhsa_next_free_sgpr 8
270; GFX9-NEXT:     .amdhsa_reserve_vcc 0
271; GFX9-NEXT:     .amdhsa_float_round_mode_32 0
272; GFX9-NEXT:     .amdhsa_float_round_mode_16_64 0
273; GFX9-NEXT:     .amdhsa_float_denorm_mode_32 0
274; GFX9-NEXT:     .amdhsa_float_denorm_mode_16_64 3
275; GFX9-NEXT:     .amdhsa_dx10_clamp 1
276; GFX9-NEXT:     .amdhsa_ieee_mode 1
277; GFX9-NEXT:     .amdhsa_fp16_overflow 0
278; GFX9-NEXT:     .amdhsa_exception_fp_ieee_invalid_op 0
279; GFX9-NEXT:     .amdhsa_exception_fp_denorm_src 0
280; GFX9-NEXT:     .amdhsa_exception_fp_ieee_div_zero 0
281; GFX9-NEXT:     .amdhsa_exception_fp_ieee_overflow 0
282; GFX9-NEXT:     .amdhsa_exception_fp_ieee_underflow 0
283; GFX9-NEXT:     .amdhsa_exception_fp_ieee_inexact 0
284; GFX9-NEXT:     .amdhsa_exception_int_div_zero 0
285; GFX9-NEXT:    .end_amdhsa_kernel
286; GFX9-NEXT:    .text
287  %alloca.align = alloca i32, align 4, addrspace(5)
288  store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4
289  ret void
290}
291
292attributes #0 = { nounwind }
293attributes #1 = { nounwind "stackrealign" }
294attributes #2 = { nounwind alignstack=128 }
295