1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,RW-FLAT %s 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch < %s | FileCheck -check-prefixes=GCN,RO-FLAT %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GCN,RO-FLAT %s 4 5; Make sure flat_scratch_init is set 6 7; GCN-LABEL: {{^}}stack_object_addrspacecast_in_kernel_no_calls: 8; RW-FLAT: s_add_u32 flat_scratch_lo, s4, s7 9; RW-FLAT: s_addc_u32 flat_scratch_hi, s5, 0 10; RO-FLAT-NOT: flat_scratch 11; GCN: flat_store_dword 12; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer 13; RW-FLAT: .amdhsa_user_sgpr_flat_scratch_init 1 14; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init 15; RW-FLAT: .amdhsa_system_sgpr_private_segment_wavefront_offset 16; RW-FLAT-NOT: .amdhsa_enable_private_segment 17; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset 18; RO-FLAT: .amdhsa_enable_private_segment 1 19; GCN-NOT: .amdhsa_reserve_flat_scratch 20; GCN: COMPUTE_PGM_RSRC2:SCRATCH_EN: 1 21; RW-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 6 22; RO-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 0 23define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() { 24 %alloca = alloca i32, addrspace(5) 25 %cast = addrspacecast i32 addrspace(5)* %alloca to i32* 26 store volatile i32 0, i32* %cast 27 ret void 28} 29 30; TODO: Could optimize out in this case 31; GCN-LABEL: {{^}}stack_object_in_kernel_no_calls: 32; RO-FLAT-NOT: flat_scratch 33; RW-FLAT: buffer_store_dword 34; RO-FLAT: scratch_store_dword 35; RW-FLAT: .amdhsa_user_sgpr_private_segment_buffer 1 36; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer 37; RW-FLAT: .amdhsa_user_sgpr_flat_scratch_init 1 38; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init 39; RW-FLAT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1 40; RW-FLAT-NOT: .amdhsa_enable_private_segment 41; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset 42; RO-FLAT: .amdhsa_enable_private_segment 1 43; RW-FLAT: .amdhsa_reserve_flat_scratch 0 44; RO-FLAT-NOT: .amdhsa_reserve_flat_scratch 45; GCN: COMPUTE_PGM_RSRC2:SCRATCH_EN: 1 46; RW-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 6 47; RO-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 0 48define amdgpu_kernel void @stack_object_in_kernel_no_calls() { 49 %alloca = alloca i32, addrspace(5) 50 store volatile i32 0, i32 addrspace(5)* %alloca 51 ret void 52} 53 54; GCN-LABEL: {{^}}kernel_no_calls_no_stack: 55; GCN-NOT: flat_scratch 56; RW-FLAT: .amdhsa_user_sgpr_private_segment_buffer 1 57; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer 58; RW-FLAT: .amdhsa_user_sgpr_flat_scratch_init 0 59; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init 60; RW-FLAT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0 61; RW-FLAT-NOT: .amdhsa_enable_private_segment 62; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset 63; RO-FLAT: .amdhsa_enable_private_segment 0 64; RW-FLAT: .amdhsa_reserve_flat_scratch 0 65; RO-FLAT-NOT: .amdhsa_reserve_flat_scratch 0 66; GCN: COMPUTE_PGM_RSRC2:SCRATCH_EN: 0 67; RW-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 4 68; RO-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 0 69define amdgpu_kernel void @kernel_no_calls_no_stack() { 70 ret void 71} 72