1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4@gds0 = internal addrspace(2) global [4 x i32] undef, align 4
5@lds0 = internal addrspace(3) global [4 x i32] undef, align 128
6@lds1 = internal addrspace(3) global [4 x i32] undef, align 256
7
8; These two objects should be allocated at the same constant offsets
9; from the base.
10define amdgpu_kernel void @alloc_lds_gds(i32 addrspace(1)* %out) #1 {
11; GCN-LABEL: alloc_lds_gds:
12; GCN:       ; %bb.0:
13; GCN-NEXT:    v_mov_b32_e32 v0, 5
14; GCN-NEXT:    v_mov_b32_e32 v1, 0
15; GCN-NEXT:    s_mov_b32 m0, 16
16; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
17; GCN-NEXT:    ds_add_u32 v1, v0 offset:12 gds
18; GCN-NEXT:    s_waitcnt lgkmcnt(0)
19; GCN-NEXT:    buffer_wbinvl1
20; GCN-NEXT:    s_waitcnt lgkmcnt(0)
21; GCN-NEXT:    ds_add_u32 v1, v0 offset:12
22; GCN-NEXT:    s_waitcnt lgkmcnt(0)
23; GCN-NEXT:    s_endpgm
24  %gep.gds = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds0, i32 0, i32 3
25  %val0 = atomicrmw add i32 addrspace(2)* %gep.gds, i32 5 acq_rel
26  %gep.lds = getelementptr [4 x i32], [4 x i32] addrspace(3)* @lds0, i32 0, i32 3
27  %val1 = atomicrmw add i32 addrspace(3)* %gep.lds, i32 5 acq_rel
28  ret void
29}
30
31; The LDS alignment shouldn't change offset of GDS.
32define amdgpu_kernel void @alloc_lds_gds_align(i32 addrspace(1)* %out) #1 {
33; GCN-LABEL: alloc_lds_gds_align:
34; GCN:       ; %bb.0:
35; GCN-NEXT:    v_mov_b32_e32 v0, 5
36; GCN-NEXT:    v_mov_b32_e32 v1, 0
37; GCN-NEXT:    s_mov_b32 m0, 16
38; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
39; GCN-NEXT:    ds_add_u32 v1, v0 offset:12 gds
40; GCN-NEXT:    s_waitcnt lgkmcnt(0)
41; GCN-NEXT:    buffer_wbinvl1
42; GCN-NEXT:    s_waitcnt lgkmcnt(0)
43; GCN-NEXT:    ds_add_u32 v1, v0 offset:140
44; GCN-NEXT:    s_waitcnt lgkmcnt(0)
45; GCN-NEXT:    ds_add_u32 v1, v0 offset:12
46; GCN-NEXT:    s_waitcnt lgkmcnt(0)
47; GCN-NEXT:    s_endpgm
48  %gep.gds = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds0, i32 0, i32 3
49  %val0 = atomicrmw add i32 addrspace(2)* %gep.gds, i32 5 acq_rel
50
51  %gep.lds0 = getelementptr [4 x i32], [4 x i32] addrspace(3)* @lds0, i32 0, i32 3
52  %val1 = atomicrmw add i32 addrspace(3)* %gep.lds0, i32 5 acq_rel
53
54  %gep.lds1 = getelementptr [4 x i32], [4 x i32] addrspace(3)* @lds1, i32 0, i32 3
55  %val2 = atomicrmw add i32 addrspace(3)* %gep.lds1, i32 5 acq_rel
56  ret void
57}
58
59@gds_align8 = internal addrspace(2) global [4 x i32] undef, align 8
60@gds_align32 = internal addrspace(2) global [4 x i32] undef, align 32
61
62define amdgpu_kernel void @gds_global_align(i32 addrspace(1)* %out) {
63; GCN-LABEL: gds_global_align:
64; GCN:       ; %bb.0:
65; GCN-NEXT:    v_mov_b32_e32 v0, 5
66; GCN-NEXT:    v_mov_b32_e32 v1, 0
67; GCN-NEXT:    s_mov_b32 m0, 32
68; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
69; GCN-NEXT:    ds_add_u32 v1, v0 offset:28 gds
70; GCN-NEXT:    s_waitcnt lgkmcnt(0)
71; GCN-NEXT:    buffer_wbinvl1
72; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
73; GCN-NEXT:    ds_add_u32 v1, v0 offset:12 gds
74; GCN-NEXT:    s_waitcnt lgkmcnt(0)
75; GCN-NEXT:    buffer_wbinvl1
76; GCN-NEXT:    s_endpgm
77  %gep.gds0 = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds_align8, i32 0, i32 3
78  %val0 = atomicrmw add i32 addrspace(2)* %gep.gds0, i32 5 acq_rel
79  %gep.gds1 = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds_align32, i32 0, i32 3
80  %val1 = atomicrmw add i32 addrspace(2)* %gep.gds1, i32 5 acq_rel
81  ret void
82}
83
84define amdgpu_kernel void @gds_global_align_plus_attr(i32 addrspace(1)* %out) #0 {
85; GCN-LABEL: gds_global_align_plus_attr:
86; GCN:       ; %bb.0:
87; GCN-NEXT:    v_mov_b32_e32 v0, 5
88; GCN-NEXT:    v_mov_b32_e32 v1, 0
89; GCN-NEXT:    s_movk_i32 m0, 0x420
90; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
91; GCN-NEXT:    ds_add_u32 v1, v0 offset:1052 gds
92; GCN-NEXT:    s_waitcnt lgkmcnt(0)
93; GCN-NEXT:    buffer_wbinvl1
94; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
95; GCN-NEXT:    ds_add_u32 v1, v0 offset:1036 gds
96; GCN-NEXT:    s_waitcnt lgkmcnt(0)
97; GCN-NEXT:    buffer_wbinvl1
98; GCN-NEXT:    s_endpgm
99  %gep.gds0 = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds_align8, i32 0, i32 3
100  %val0 = atomicrmw add i32 addrspace(2)* %gep.gds0, i32 5 acq_rel
101  %gep.gds1 = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds_align32, i32 0, i32 3
102  %val1 = atomicrmw add i32 addrspace(2)* %gep.gds1, i32 5 acq_rel
103  ret void
104}
105
106@small.gds = internal addrspace(2) global i8 undef, align 1
107@gds.external = external unnamed_addr addrspace(3) global [0 x i32], align 4
108
109define amdgpu_kernel void @gds_extern_align(i32 addrspace(1)* %out, [4 x i32] addrspace(2)* %gds.arg) #0 {
110; GCN-LABEL: gds_extern_align:
111; GCN:       ; %bb.0:
112; GCN-NEXT:    s_load_dword s0, s[0:1], 0x8
113; GCN-NEXT:    v_mov_b32_e32 v0, 5
114; GCN-NEXT:    s_movk_i32 m0, 0x401
115; GCN-NEXT:    s_movk_i32 s1, 0x400
116; GCN-NEXT:    ;;#ASMSTART
117; GCN-NEXT:    ; use s1
118; GCN-NEXT:    ;;#ASMEND
119; GCN-NEXT:    s_waitcnt lgkmcnt(0)
120; GCN-NEXT:    v_mov_b32_e32 v1, s0
121; GCN-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
122; GCN-NEXT:    ds_add_u32 v1, v0 offset:12 gds
123; GCN-NEXT:    s_waitcnt lgkmcnt(0)
124; GCN-NEXT:    buffer_wbinvl1
125; GCN-NEXT:    s_endpgm
126  call void asm sideeffect "; use $0","s"(i8 addrspace(2)* @small.gds)
127  %gep.gds0 = getelementptr [4 x i32], [4 x i32] addrspace(2)* %gds.arg, i32 0, i32 3
128  %val0 = atomicrmw add i32 addrspace(2)* %gep.gds0, i32 5 acq_rel
129  ret void
130}
131
132attributes #0 = { "amdgpu-gds-size"="1024" }
133