1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji --amdhsa-code-object-version=3 < %s | FileCheck -check-prefix=VI %s 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 < %s | FileCheck -check-prefix=GFX9 %s 3 4; Make sure the stack is never realigned for entry functions. 5 6define amdgpu_kernel void @max_alignment_128() #0 { 7; VI-LABEL: max_alignment_128: 8; VI: ; %bb.0: 9; VI-NEXT: s_add_u32 s0, s0, s7 10; VI-NEXT: s_addc_u32 s1, s1, 0 11; VI-NEXT: v_mov_b32_e32 v0, 9 12; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:128 13; VI-NEXT: s_waitcnt vmcnt(0) 14; VI-NEXT: s_endpgm 15; VI-NEXT: .section .rodata,#alloc 16; VI-NEXT: .p2align 6 17; VI-NEXT: .amdhsa_kernel max_alignment_128 18; VI-NEXT: .amdhsa_group_segment_fixed_size 0 19; VI-NEXT: .amdhsa_private_segment_fixed_size 256 20; VI-NEXT: .amdhsa_kernarg_size 0 21; VI-NEXT: .amdhsa_user_sgpr_count 6 22; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 23; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 24; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0 25; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 26; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0 27; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 28; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 29; VI-NEXT: .amdhsa_uses_dynamic_stack 0 30; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 31; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 32; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 33; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 34; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0 35; VI-NEXT: .amdhsa_system_vgpr_workitem_id 0 36; VI-NEXT: .amdhsa_next_free_vgpr 1 37; VI-NEXT: .amdhsa_next_free_sgpr 8 38; VI-NEXT: .amdhsa_reserve_vcc 0 39; VI-NEXT: .amdhsa_reserve_flat_scratch 0 40; VI-NEXT: .amdhsa_float_round_mode_32 0 41; VI-NEXT: .amdhsa_float_round_mode_16_64 0 42; VI-NEXT: .amdhsa_float_denorm_mode_32 3 43; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3 44; VI-NEXT: .amdhsa_dx10_clamp 1 45; VI-NEXT: .amdhsa_ieee_mode 1 46; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 47; VI-NEXT: .amdhsa_exception_fp_denorm_src 0 48; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 49; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0 50; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0 51; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0 52; VI-NEXT: .amdhsa_exception_int_div_zero 0 53; VI-NEXT: .end_amdhsa_kernel 54; VI-NEXT: .text 55; 56; GFX9-LABEL: max_alignment_128: 57; GFX9: ; %bb.0: 58; GFX9-NEXT: s_add_u32 s0, s0, s7 59; GFX9-NEXT: s_addc_u32 s1, s1, 0 60; GFX9-NEXT: v_mov_b32_e32 v0, 9 61; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:128 62; GFX9-NEXT: s_waitcnt vmcnt(0) 63; GFX9-NEXT: s_endpgm 64; GFX9-NEXT: .section .rodata,#alloc 65; GFX9-NEXT: .p2align 6 66; GFX9-NEXT: .amdhsa_kernel max_alignment_128 67; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0 68; GFX9-NEXT: .amdhsa_private_segment_fixed_size 256 69; GFX9-NEXT: .amdhsa_kernarg_size 0 70; GFX9-NEXT: .amdhsa_user_sgpr_count 6 71; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 72; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 73; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0 74; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 75; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0 76; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 77; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 78; GFX9-NEXT: .amdhsa_uses_dynamic_stack 0 79; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 80; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 81; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 82; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 83; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0 84; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 0 85; GFX9-NEXT: .amdhsa_next_free_vgpr 1 86; GFX9-NEXT: .amdhsa_next_free_sgpr 8 87; GFX9-NEXT: .amdhsa_reserve_vcc 0 88; GFX9-NEXT: .amdhsa_reserve_flat_scratch 0 89; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1 90; GFX9-NEXT: .amdhsa_float_round_mode_32 0 91; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0 92; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3 93; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3 94; GFX9-NEXT: .amdhsa_dx10_clamp 1 95; GFX9-NEXT: .amdhsa_ieee_mode 1 96; GFX9-NEXT: .amdhsa_fp16_overflow 0 97; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 98; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0 99; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 100; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0 101; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0 102; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0 103; GFX9-NEXT: .amdhsa_exception_int_div_zero 0 104; GFX9-NEXT: .end_amdhsa_kernel 105; GFX9-NEXT: .text 106 %alloca.align = alloca i32, align 128, addrspace(5) 107 store volatile i32 9, i32 addrspace(5)* %alloca.align, align 128 108 ret void 109} 110 111define amdgpu_kernel void @stackrealign_attr() #1 { 112; VI-LABEL: stackrealign_attr: 113; VI: ; %bb.0: 114; VI-NEXT: s_add_u32 s0, s0, s7 115; VI-NEXT: s_addc_u32 s1, s1, 0 116; VI-NEXT: v_mov_b32_e32 v0, 9 117; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 118; VI-NEXT: s_waitcnt vmcnt(0) 119; VI-NEXT: s_endpgm 120; VI-NEXT: .section .rodata,#alloc 121; VI-NEXT: .p2align 6 122; VI-NEXT: .amdhsa_kernel stackrealign_attr 123; VI-NEXT: .amdhsa_group_segment_fixed_size 0 124; VI-NEXT: .amdhsa_private_segment_fixed_size 8 125; VI-NEXT: .amdhsa_kernarg_size 0 126; VI-NEXT: .amdhsa_user_sgpr_count 6 127; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 128; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 129; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0 130; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 131; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0 132; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 133; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 134; VI-NEXT: .amdhsa_uses_dynamic_stack 0 135; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 136; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 137; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 138; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 139; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0 140; VI-NEXT: .amdhsa_system_vgpr_workitem_id 0 141; VI-NEXT: .amdhsa_next_free_vgpr 1 142; VI-NEXT: .amdhsa_next_free_sgpr 8 143; VI-NEXT: .amdhsa_reserve_vcc 0 144; VI-NEXT: .amdhsa_reserve_flat_scratch 0 145; VI-NEXT: .amdhsa_float_round_mode_32 0 146; VI-NEXT: .amdhsa_float_round_mode_16_64 0 147; VI-NEXT: .amdhsa_float_denorm_mode_32 3 148; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3 149; VI-NEXT: .amdhsa_dx10_clamp 1 150; VI-NEXT: .amdhsa_ieee_mode 1 151; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 152; VI-NEXT: .amdhsa_exception_fp_denorm_src 0 153; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 154; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0 155; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0 156; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0 157; VI-NEXT: .amdhsa_exception_int_div_zero 0 158; VI-NEXT: .end_amdhsa_kernel 159; VI-NEXT: .text 160; 161; GFX9-LABEL: stackrealign_attr: 162; GFX9: ; %bb.0: 163; GFX9-NEXT: s_add_u32 s0, s0, s7 164; GFX9-NEXT: s_addc_u32 s1, s1, 0 165; GFX9-NEXT: v_mov_b32_e32 v0, 9 166; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 167; GFX9-NEXT: s_waitcnt vmcnt(0) 168; GFX9-NEXT: s_endpgm 169; GFX9-NEXT: .section .rodata,#alloc 170; GFX9-NEXT: .p2align 6 171; GFX9-NEXT: .amdhsa_kernel stackrealign_attr 172; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0 173; GFX9-NEXT: .amdhsa_private_segment_fixed_size 8 174; GFX9-NEXT: .amdhsa_kernarg_size 0 175; GFX9-NEXT: .amdhsa_user_sgpr_count 6 176; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 177; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 178; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0 179; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 180; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0 181; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 182; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 183; GFX9-NEXT: .amdhsa_uses_dynamic_stack 0 184; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 185; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 186; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 187; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 188; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0 189; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 0 190; GFX9-NEXT: .amdhsa_next_free_vgpr 1 191; GFX9-NEXT: .amdhsa_next_free_sgpr 8 192; GFX9-NEXT: .amdhsa_reserve_vcc 0 193; GFX9-NEXT: .amdhsa_reserve_flat_scratch 0 194; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1 195; GFX9-NEXT: .amdhsa_float_round_mode_32 0 196; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0 197; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3 198; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3 199; GFX9-NEXT: .amdhsa_dx10_clamp 1 200; GFX9-NEXT: .amdhsa_ieee_mode 1 201; GFX9-NEXT: .amdhsa_fp16_overflow 0 202; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 203; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0 204; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 205; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0 206; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0 207; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0 208; GFX9-NEXT: .amdhsa_exception_int_div_zero 0 209; GFX9-NEXT: .end_amdhsa_kernel 210; GFX9-NEXT: .text 211 %alloca.align = alloca i32, align 4, addrspace(5) 212 store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4 213 ret void 214} 215 216define amdgpu_kernel void @alignstack_attr() #2 { 217; VI-LABEL: alignstack_attr: 218; VI: ; %bb.0: 219; VI-NEXT: s_add_u32 s0, s0, s7 220; VI-NEXT: s_addc_u32 s1, s1, 0 221; VI-NEXT: v_mov_b32_e32 v0, 9 222; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 223; VI-NEXT: s_waitcnt vmcnt(0) 224; VI-NEXT: s_endpgm 225; VI-NEXT: .section .rodata,#alloc 226; VI-NEXT: .p2align 6 227; VI-NEXT: .amdhsa_kernel alignstack_attr 228; VI-NEXT: .amdhsa_group_segment_fixed_size 0 229; VI-NEXT: .amdhsa_private_segment_fixed_size 128 230; VI-NEXT: .amdhsa_kernarg_size 0 231; VI-NEXT: .amdhsa_user_sgpr_count 6 232; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 233; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 234; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0 235; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 236; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0 237; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 238; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 239; VI-NEXT: .amdhsa_uses_dynamic_stack 0 240; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 241; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 242; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 243; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 244; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0 245; VI-NEXT: .amdhsa_system_vgpr_workitem_id 0 246; VI-NEXT: .amdhsa_next_free_vgpr 1 247; VI-NEXT: .amdhsa_next_free_sgpr 8 248; VI-NEXT: .amdhsa_reserve_vcc 0 249; VI-NEXT: .amdhsa_reserve_flat_scratch 0 250; VI-NEXT: .amdhsa_float_round_mode_32 0 251; VI-NEXT: .amdhsa_float_round_mode_16_64 0 252; VI-NEXT: .amdhsa_float_denorm_mode_32 3 253; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3 254; VI-NEXT: .amdhsa_dx10_clamp 1 255; VI-NEXT: .amdhsa_ieee_mode 1 256; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 257; VI-NEXT: .amdhsa_exception_fp_denorm_src 0 258; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 259; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0 260; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0 261; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0 262; VI-NEXT: .amdhsa_exception_int_div_zero 0 263; VI-NEXT: .end_amdhsa_kernel 264; VI-NEXT: .text 265; 266; GFX9-LABEL: alignstack_attr: 267; GFX9: ; %bb.0: 268; GFX9-NEXT: s_add_u32 s0, s0, s7 269; GFX9-NEXT: s_addc_u32 s1, s1, 0 270; GFX9-NEXT: v_mov_b32_e32 v0, 9 271; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 272; GFX9-NEXT: s_waitcnt vmcnt(0) 273; GFX9-NEXT: s_endpgm 274; GFX9-NEXT: .section .rodata,#alloc 275; GFX9-NEXT: .p2align 6 276; GFX9-NEXT: .amdhsa_kernel alignstack_attr 277; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0 278; GFX9-NEXT: .amdhsa_private_segment_fixed_size 128 279; GFX9-NEXT: .amdhsa_kernarg_size 0 280; GFX9-NEXT: .amdhsa_user_sgpr_count 6 281; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 282; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 283; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0 284; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 285; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0 286; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 287; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 288; GFX9-NEXT: .amdhsa_uses_dynamic_stack 0 289; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 290; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 291; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 292; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 293; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0 294; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 0 295; GFX9-NEXT: .amdhsa_next_free_vgpr 1 296; GFX9-NEXT: .amdhsa_next_free_sgpr 8 297; GFX9-NEXT: .amdhsa_reserve_vcc 0 298; GFX9-NEXT: .amdhsa_reserve_flat_scratch 0 299; GFX9-NEXT: .amdhsa_reserve_xnack_mask 1 300; GFX9-NEXT: .amdhsa_float_round_mode_32 0 301; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0 302; GFX9-NEXT: .amdhsa_float_denorm_mode_32 3 303; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3 304; GFX9-NEXT: .amdhsa_dx10_clamp 1 305; GFX9-NEXT: .amdhsa_ieee_mode 1 306; GFX9-NEXT: .amdhsa_fp16_overflow 0 307; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 308; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0 309; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 310; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0 311; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0 312; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0 313; GFX9-NEXT: .amdhsa_exception_int_div_zero 0 314; GFX9-NEXT: .end_amdhsa_kernel 315; GFX9-NEXT: .text 316 %alloca.align = alloca i32, align 4, addrspace(5) 317 store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4 318 ret void 319} 320 321attributes #0 = { nounwind } 322attributes #1 = { nounwind "stackrealign" } 323attributes #2 = { nounwind alignstack=128 } 324