1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck -check-prefix=VI %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 4 5; Make sure the stack is never realigned for entry functions. 6 7define amdgpu_kernel void @max_alignment_128() #0 { 8; VI-LABEL: max_alignment_128: 9; VI: ; %bb.0: 10; VI-NEXT: s_add_u32 s4, s4, s7 11; VI-NEXT: v_mov_b32_e32 v0, 9 12; VI-NEXT: s_mov_b32 flat_scratch_lo, s5 13; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 14; VI-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:128 15; VI-NEXT: s_endpgm 16; VI-NEXT: .section .rodata,#alloc 17; VI-NEXT: .p2align 6 18; VI-NEXT: .amdhsa_kernel max_alignment_128 19; VI-NEXT: .amdhsa_group_segment_fixed_size 0 20; VI-NEXT: .amdhsa_private_segment_fixed_size 256 21; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 22; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 23; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0 24; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 25; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0 26; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 27; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 28; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 29; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 30; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 31; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 32; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0 33; VI-NEXT: .amdhsa_system_vgpr_workitem_id 0 34; VI-NEXT: .amdhsa_next_free_vgpr 1 35; VI-NEXT: .amdhsa_next_free_sgpr 8 36; VI-NEXT: .amdhsa_reserve_vcc 0 37; VI-NEXT: .amdhsa_float_round_mode_32 0 38; VI-NEXT: .amdhsa_float_round_mode_16_64 0 39; VI-NEXT: .amdhsa_float_denorm_mode_32 0 40; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3 41; VI-NEXT: .amdhsa_dx10_clamp 1 42; VI-NEXT: .amdhsa_ieee_mode 1 43; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 44; VI-NEXT: .amdhsa_exception_fp_denorm_src 0 45; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 46; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0 47; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0 48; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0 49; VI-NEXT: .amdhsa_exception_int_div_zero 0 50; VI-NEXT: .end_amdhsa_kernel 51; VI-NEXT: .text 52; 53; GFX9-LABEL: max_alignment_128: 54; GFX9: ; %bb.0: 55; GFX9-NEXT: s_add_u32 flat_scratch_lo, s4, s7 56; GFX9-NEXT: v_mov_b32_e32 v0, 9 57; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 58; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:128 59; GFX9-NEXT: s_endpgm 60; GFX9-NEXT: .section .rodata,#alloc 61; GFX9-NEXT: .p2align 6 62; GFX9-NEXT: .amdhsa_kernel max_alignment_128 63; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0 64; GFX9-NEXT: .amdhsa_private_segment_fixed_size 256 65; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 66; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 67; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0 68; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 69; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0 70; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 71; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 72; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 73; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 74; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 75; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 76; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0 77; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 0 78; GFX9-NEXT: .amdhsa_next_free_vgpr 1 79; GFX9-NEXT: .amdhsa_next_free_sgpr 8 80; GFX9-NEXT: .amdhsa_reserve_vcc 0 81; GFX9-NEXT: .amdhsa_float_round_mode_32 0 82; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0 83; GFX9-NEXT: .amdhsa_float_denorm_mode_32 0 84; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3 85; GFX9-NEXT: .amdhsa_dx10_clamp 1 86; GFX9-NEXT: .amdhsa_ieee_mode 1 87; GFX9-NEXT: .amdhsa_fp16_overflow 0 88; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 89; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0 90; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 91; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0 92; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0 93; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0 94; GFX9-NEXT: .amdhsa_exception_int_div_zero 0 95; GFX9-NEXT: .end_amdhsa_kernel 96; GFX9-NEXT: .text 97 %alloca.align = alloca i32, align 128, addrspace(5) 98 store volatile i32 9, i32 addrspace(5)* %alloca.align, align 128 99 ret void 100} 101 102define amdgpu_kernel void @stackrealign_attr() #1 { 103; VI-LABEL: stackrealign_attr: 104; VI: ; %bb.0: 105; VI-NEXT: s_add_u32 s4, s4, s7 106; VI-NEXT: v_mov_b32_e32 v0, 9 107; VI-NEXT: s_mov_b32 flat_scratch_lo, s5 108; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 109; VI-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:4 110; VI-NEXT: s_endpgm 111; VI-NEXT: .section .rodata,#alloc 112; VI-NEXT: .p2align 6 113; VI-NEXT: .amdhsa_kernel stackrealign_attr 114; VI-NEXT: .amdhsa_group_segment_fixed_size 0 115; VI-NEXT: .amdhsa_private_segment_fixed_size 8 116; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 117; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 118; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0 119; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 120; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0 121; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 122; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 123; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 124; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 125; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 126; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 127; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0 128; VI-NEXT: .amdhsa_system_vgpr_workitem_id 0 129; VI-NEXT: .amdhsa_next_free_vgpr 1 130; VI-NEXT: .amdhsa_next_free_sgpr 8 131; VI-NEXT: .amdhsa_reserve_vcc 0 132; VI-NEXT: .amdhsa_float_round_mode_32 0 133; VI-NEXT: .amdhsa_float_round_mode_16_64 0 134; VI-NEXT: .amdhsa_float_denorm_mode_32 0 135; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3 136; VI-NEXT: .amdhsa_dx10_clamp 1 137; VI-NEXT: .amdhsa_ieee_mode 1 138; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 139; VI-NEXT: .amdhsa_exception_fp_denorm_src 0 140; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 141; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0 142; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0 143; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0 144; VI-NEXT: .amdhsa_exception_int_div_zero 0 145; VI-NEXT: .end_amdhsa_kernel 146; VI-NEXT: .text 147; 148; GFX9-LABEL: stackrealign_attr: 149; GFX9: ; %bb.0: 150; GFX9-NEXT: s_add_u32 flat_scratch_lo, s4, s7 151; GFX9-NEXT: v_mov_b32_e32 v0, 9 152; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 153; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:4 154; GFX9-NEXT: s_endpgm 155; GFX9-NEXT: .section .rodata,#alloc 156; GFX9-NEXT: .p2align 6 157; GFX9-NEXT: .amdhsa_kernel stackrealign_attr 158; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0 159; GFX9-NEXT: .amdhsa_private_segment_fixed_size 8 160; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 161; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 162; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0 163; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 164; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0 165; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 166; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 167; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 168; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 169; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 170; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 171; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0 172; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 0 173; GFX9-NEXT: .amdhsa_next_free_vgpr 1 174; GFX9-NEXT: .amdhsa_next_free_sgpr 8 175; GFX9-NEXT: .amdhsa_reserve_vcc 0 176; GFX9-NEXT: .amdhsa_float_round_mode_32 0 177; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0 178; GFX9-NEXT: .amdhsa_float_denorm_mode_32 0 179; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3 180; GFX9-NEXT: .amdhsa_dx10_clamp 1 181; GFX9-NEXT: .amdhsa_ieee_mode 1 182; GFX9-NEXT: .amdhsa_fp16_overflow 0 183; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 184; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0 185; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 186; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0 187; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0 188; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0 189; GFX9-NEXT: .amdhsa_exception_int_div_zero 0 190; GFX9-NEXT: .end_amdhsa_kernel 191; GFX9-NEXT: .text 192 %alloca.align = alloca i32, align 4, addrspace(5) 193 store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4 194 ret void 195} 196 197define amdgpu_kernel void @alignstack_attr() #2 { 198; VI-LABEL: alignstack_attr: 199; VI: ; %bb.0: 200; VI-NEXT: s_add_u32 s4, s4, s7 201; VI-NEXT: v_mov_b32_e32 v0, 9 202; VI-NEXT: s_mov_b32 flat_scratch_lo, s5 203; VI-NEXT: s_lshr_b32 flat_scratch_hi, s4, 8 204; VI-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:4 205; VI-NEXT: s_endpgm 206; VI-NEXT: .section .rodata,#alloc 207; VI-NEXT: .p2align 6 208; VI-NEXT: .amdhsa_kernel alignstack_attr 209; VI-NEXT: .amdhsa_group_segment_fixed_size 0 210; VI-NEXT: .amdhsa_private_segment_fixed_size 128 211; VI-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 212; VI-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 213; VI-NEXT: .amdhsa_user_sgpr_queue_ptr 0 214; VI-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 215; VI-NEXT: .amdhsa_user_sgpr_dispatch_id 0 216; VI-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 217; VI-NEXT: .amdhsa_user_sgpr_private_segment_size 0 218; VI-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 219; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 220; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 221; VI-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 222; VI-NEXT: .amdhsa_system_sgpr_workgroup_info 0 223; VI-NEXT: .amdhsa_system_vgpr_workitem_id 0 224; VI-NEXT: .amdhsa_next_free_vgpr 1 225; VI-NEXT: .amdhsa_next_free_sgpr 8 226; VI-NEXT: .amdhsa_reserve_vcc 0 227; VI-NEXT: .amdhsa_float_round_mode_32 0 228; VI-NEXT: .amdhsa_float_round_mode_16_64 0 229; VI-NEXT: .amdhsa_float_denorm_mode_32 0 230; VI-NEXT: .amdhsa_float_denorm_mode_16_64 3 231; VI-NEXT: .amdhsa_dx10_clamp 1 232; VI-NEXT: .amdhsa_ieee_mode 1 233; VI-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 234; VI-NEXT: .amdhsa_exception_fp_denorm_src 0 235; VI-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 236; VI-NEXT: .amdhsa_exception_fp_ieee_overflow 0 237; VI-NEXT: .amdhsa_exception_fp_ieee_underflow 0 238; VI-NEXT: .amdhsa_exception_fp_ieee_inexact 0 239; VI-NEXT: .amdhsa_exception_int_div_zero 0 240; VI-NEXT: .end_amdhsa_kernel 241; VI-NEXT: .text 242; 243; GFX9-LABEL: alignstack_attr: 244; GFX9: ; %bb.0: 245; GFX9-NEXT: s_add_u32 flat_scratch_lo, s4, s7 246; GFX9-NEXT: v_mov_b32_e32 v0, 9 247; GFX9-NEXT: s_addc_u32 flat_scratch_hi, s5, 0 248; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s7 offset:4 249; GFX9-NEXT: s_endpgm 250; GFX9-NEXT: .section .rodata,#alloc 251; GFX9-NEXT: .p2align 6 252; GFX9-NEXT: .amdhsa_kernel alignstack_attr 253; GFX9-NEXT: .amdhsa_group_segment_fixed_size 0 254; GFX9-NEXT: .amdhsa_private_segment_fixed_size 128 255; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1 256; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0 257; GFX9-NEXT: .amdhsa_user_sgpr_queue_ptr 0 258; GFX9-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0 259; GFX9-NEXT: .amdhsa_user_sgpr_dispatch_id 0 260; GFX9-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1 261; GFX9-NEXT: .amdhsa_user_sgpr_private_segment_size 0 262; GFX9-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 263; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1 264; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0 265; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0 266; GFX9-NEXT: .amdhsa_system_sgpr_workgroup_info 0 267; GFX9-NEXT: .amdhsa_system_vgpr_workitem_id 0 268; GFX9-NEXT: .amdhsa_next_free_vgpr 1 269; GFX9-NEXT: .amdhsa_next_free_sgpr 8 270; GFX9-NEXT: .amdhsa_reserve_vcc 0 271; GFX9-NEXT: .amdhsa_float_round_mode_32 0 272; GFX9-NEXT: .amdhsa_float_round_mode_16_64 0 273; GFX9-NEXT: .amdhsa_float_denorm_mode_32 0 274; GFX9-NEXT: .amdhsa_float_denorm_mode_16_64 3 275; GFX9-NEXT: .amdhsa_dx10_clamp 1 276; GFX9-NEXT: .amdhsa_ieee_mode 1 277; GFX9-NEXT: .amdhsa_fp16_overflow 0 278; GFX9-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0 279; GFX9-NEXT: .amdhsa_exception_fp_denorm_src 0 280; GFX9-NEXT: .amdhsa_exception_fp_ieee_div_zero 0 281; GFX9-NEXT: .amdhsa_exception_fp_ieee_overflow 0 282; GFX9-NEXT: .amdhsa_exception_fp_ieee_underflow 0 283; GFX9-NEXT: .amdhsa_exception_fp_ieee_inexact 0 284; GFX9-NEXT: .amdhsa_exception_int_div_zero 0 285; GFX9-NEXT: .end_amdhsa_kernel 286; GFX9-NEXT: .text 287 %alloca.align = alloca i32, align 4, addrspace(5) 288 store volatile i32 9, i32 addrspace(5)* %alloca.align, align 4 289 ret void 290} 291 292attributes #0 = { nounwind } 293attributes #1 = { nounwind "stackrealign" } 294attributes #2 = { nounwind alignstack=128 } 295