1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,RW-FLAT %s
2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch < %s | FileCheck -check-prefixes=GCN,RO-FLAT %s
3
4; Make sure flat_scratch_init is set
5
6; GCN-LABEL: {{^}}stack_object_addrspacecast_in_kernel_no_calls:
7; RW-FLAT:     s_add_u32 flat_scratch_lo, s4, s7
8; RW-FLAT:     s_addc_u32 flat_scratch_hi, s5, 0
9; RO-FLAT-NOT: flat_scratch
10; GCN:         flat_store_dword
11; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer
12; RW-FLAT:     .amdhsa_user_sgpr_flat_scratch_init 1
13; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init
14; RW-FLAT:     .amdhsa_system_sgpr_private_segment_wavefront_offset
15; RW-FLAT-NOT: .amdhsa_enable_private_segment
16; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset
17; RO-FLAT:     .amdhsa_enable_private_segment 1
18; GCN-NOT:     .amdhsa_reserve_flat_scratch
19; GCN:         COMPUTE_PGM_RSRC2:SCRATCH_EN: 1
20; RW-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 6
21; RO-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 0
22define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() {
23  %alloca = alloca i32, addrspace(5)
24  %cast = addrspacecast i32 addrspace(5)* %alloca to i32*
25  store volatile i32 0, i32* %cast
26  ret void
27}
28
29; TODO: Could optimize out in this case
30; GCN-LABEL: {{^}}stack_object_in_kernel_no_calls:
31; RO-FLAT-NOT: flat_scratch
32; RW-FLAT:     buffer_store_dword
33; RO-FLAT:     scratch_store_dword
34; RW-FLAT:     .amdhsa_user_sgpr_private_segment_buffer 1
35; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer
36; RW-FLAT:     .amdhsa_user_sgpr_flat_scratch_init 1
37; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init
38; RW-FLAT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
39; RW-FLAT-NOT: .amdhsa_enable_private_segment
40; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset
41; RO-FLAT:     .amdhsa_enable_private_segment 1
42; RW-FLAT:     .amdhsa_reserve_flat_scratch 0
43; RO-FLAT-NOT: .amdhsa_reserve_flat_scratch
44; GCN:         COMPUTE_PGM_RSRC2:SCRATCH_EN: 1
45; RW-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 6
46; RO-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 0
47define amdgpu_kernel void @stack_object_in_kernel_no_calls() {
48  %alloca = alloca i32, addrspace(5)
49  store volatile i32 0, i32 addrspace(5)* %alloca
50  ret void
51}
52
53; GCN-LABEL: {{^}}kernel_no_calls_no_stack:
54; GCN-NOT:    flat_scratch
55; RW-FLAT:     .amdhsa_user_sgpr_private_segment_buffer 1
56; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer
57; RW-FLAT:     .amdhsa_user_sgpr_flat_scratch_init 0
58; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init
59; RW-FLAT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 0
60; RW-FLAT-NOT: .amdhsa_enable_private_segment
61; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset
62; RO-FLAT:     .amdhsa_enable_private_segment 0
63; RW-FLAT:     .amdhsa_reserve_flat_scratch 0
64; RO-FLAT-NOT: .amdhsa_reserve_flat_scratch 0
65; GCN:         COMPUTE_PGM_RSRC2:SCRATCH_EN: 0
66; RW-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 4
67; RO-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 0
68define amdgpu_kernel void @kernel_no_calls_no_stack() {
69  ret void
70}
71