1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,RW-FLAT %s 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch < %s | FileCheck -check-prefixes=GCN,RO-FLAT %s 3 4; Make sure flat_scratch_init is set 5 6; GCN-LABEL: {{^}}stack_object_addrspacecast_in_kernel_no_calls: 7; RW-FLAT: s_add_u32 flat_scratch_lo, s4, s7 8; RW-FLAT: s_addc_u32 flat_scratch_hi, s5, 0 9; RO-FLAT-NOT: flat_scratch 10; GCN: flat_store_dword 11; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer 12; RW-FLAT: .amdhsa_user_sgpr_flat_scratch_init 1 13; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init 14; RW-FLAT: .amdhsa_system_sgpr_private_segment_wavefront_offset 15; RW-FLAT-NOT: .amdhsa_enable_private_segment 16; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset 17; RO-FLAT: .amdhsa_enable_private_segment 1 18; GCN-NOT: .amdhsa_reserve_flat_scratch 19; GCN: COMPUTE_PGM_RSRC2:SCRATCH_EN: 1 20; RW-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 6 21; RO-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 0 22define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() { 23 %alloca = alloca i32, addrspace(5) 24 %cast = addrspacecast i32 addrspace(5)* %alloca to i32* 25 store volatile i32 0, i32* %cast 26 ret void 27} 28 29; TODO: Could optimize out in this case 30; GCN-LABEL: {{^}}stack_object_in_kernel_no_calls: 31; RO-FLAT-NOT: flat_scratch 32; RW-FLAT: buffer_store_dword 33; RO-FLAT: scratch_store_dword 34; RW-FLAT: .amdhsa_user_sgpr_private_segment_buffer 1 35; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer 36; RW-FLAT: .amdhsa_user_sgpr_flat_scratch_init 1 37; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init 38; RW-FLAT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 39; RW-FLAT-NOT: .amdhsa_enable_private_segment 40; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset 41; RO-FLAT: .amdhsa_enable_private_segment 1 42; RW-FLAT: .amdhsa_reserve_flat_scratch 0 43; RO-FLAT-NOT: .amdhsa_reserve_flat_scratch 44; GCN: COMPUTE_PGM_RSRC2:SCRATCH_EN: 1 45; RW-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 6 46; RO-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 0 47define amdgpu_kernel void @stack_object_in_kernel_no_calls() { 48 %alloca = alloca i32, addrspace(5) 49 store volatile i32 0, i32 addrspace(5)* %alloca 50 ret void 51} 52 53; GCN-LABEL: {{^}}kernel_no_calls_no_stack: 54; GCN-NOT: flat_scratch 55; RW-FLAT: .amdhsa_user_sgpr_private_segment_buffer 1 56; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer 57; RW-FLAT: .amdhsa_user_sgpr_flat_scratch_init 0 58; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init 59; RW-FLAT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 60; RW-FLAT-NOT: .amdhsa_enable_private_segment 61; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset 62; RO-FLAT: .amdhsa_enable_private_segment 0 63; RW-FLAT: .amdhsa_reserve_flat_scratch 0 64; RO-FLAT-NOT: .amdhsa_reserve_flat_scratch 0 65; GCN: COMPUTE_PGM_RSRC2:SCRATCH_EN: 0 66; RW-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 4 67; RO-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 0 68define amdgpu_kernel void @kernel_no_calls_no_stack() { 69 ret void 70} 71