1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3 4@gds0 = internal addrspace(2) global [4 x i32] undef, align 4 5@lds0 = internal addrspace(3) global [4 x i32] undef, align 128 6@lds1 = internal addrspace(3) global [4 x i32] undef, align 256 7 8; These two objects should be allocated at the same constant offsets 9; from the base. 10define amdgpu_kernel void @alloc_lds_gds(i32 addrspace(1)* %out) #1 { 11; GCN-LABEL: alloc_lds_gds: 12; GCN: ; %bb.0: 13; GCN-NEXT: v_mov_b32_e32 v0, 5 14; GCN-NEXT: v_mov_b32_e32 v1, 0 15; GCN-NEXT: s_mov_b32 m0, 16 16; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 17; GCN-NEXT: ds_add_u32 v1, v0 offset:12 gds 18; GCN-NEXT: s_waitcnt lgkmcnt(0) 19; GCN-NEXT: buffer_wbinvl1 20; GCN-NEXT: s_waitcnt lgkmcnt(0) 21; GCN-NEXT: ds_add_u32 v1, v0 offset:12 22; GCN-NEXT: s_waitcnt lgkmcnt(0) 23; GCN-NEXT: s_endpgm 24 %gep.gds = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds0, i32 0, i32 3 25 %val0 = atomicrmw add i32 addrspace(2)* %gep.gds, i32 5 acq_rel 26 %gep.lds = getelementptr [4 x i32], [4 x i32] addrspace(3)* @lds0, i32 0, i32 3 27 %val1 = atomicrmw add i32 addrspace(3)* %gep.lds, i32 5 acq_rel 28 ret void 29} 30 31; The LDS alignment shouldn't change offset of GDS. 32define amdgpu_kernel void @alloc_lds_gds_align(i32 addrspace(1)* %out) #1 { 33; GCN-LABEL: alloc_lds_gds_align: 34; GCN: ; %bb.0: 35; GCN-NEXT: v_mov_b32_e32 v0, 5 36; GCN-NEXT: v_mov_b32_e32 v1, 0 37; GCN-NEXT: s_mov_b32 m0, 16 38; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 39; GCN-NEXT: ds_add_u32 v1, v0 offset:12 gds 40; GCN-NEXT: s_waitcnt lgkmcnt(0) 41; GCN-NEXT: buffer_wbinvl1 42; GCN-NEXT: s_waitcnt lgkmcnt(0) 43; GCN-NEXT: ds_add_u32 v1, v0 offset:140 44; GCN-NEXT: s_waitcnt lgkmcnt(0) 45; GCN-NEXT: ds_add_u32 v1, v0 offset:12 46; GCN-NEXT: s_waitcnt lgkmcnt(0) 47; GCN-NEXT: s_endpgm 48 %gep.gds = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds0, i32 0, i32 3 49 %val0 = atomicrmw add i32 addrspace(2)* %gep.gds, i32 5 acq_rel 50 51 %gep.lds0 = getelementptr [4 x i32], [4 x i32] addrspace(3)* @lds0, i32 0, i32 3 52 %val1 = atomicrmw add i32 addrspace(3)* %gep.lds0, i32 5 acq_rel 53 54 %gep.lds1 = getelementptr [4 x i32], [4 x i32] addrspace(3)* @lds1, i32 0, i32 3 55 %val2 = atomicrmw add i32 addrspace(3)* %gep.lds1, i32 5 acq_rel 56 ret void 57} 58 59@gds_align8 = internal addrspace(2) global [4 x i32] undef, align 8 60@gds_align32 = internal addrspace(2) global [4 x i32] undef, align 32 61 62define amdgpu_kernel void @gds_global_align(i32 addrspace(1)* %out) { 63; GCN-LABEL: gds_global_align: 64; GCN: ; %bb.0: 65; GCN-NEXT: v_mov_b32_e32 v0, 5 66; GCN-NEXT: v_mov_b32_e32 v1, 0 67; GCN-NEXT: s_mov_b32 m0, 32 68; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 69; GCN-NEXT: ds_add_u32 v1, v0 offset:28 gds 70; GCN-NEXT: s_waitcnt lgkmcnt(0) 71; GCN-NEXT: buffer_wbinvl1 72; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 73; GCN-NEXT: ds_add_u32 v1, v0 offset:12 gds 74; GCN-NEXT: s_waitcnt lgkmcnt(0) 75; GCN-NEXT: buffer_wbinvl1 76; GCN-NEXT: s_endpgm 77 %gep.gds0 = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds_align8, i32 0, i32 3 78 %val0 = atomicrmw add i32 addrspace(2)* %gep.gds0, i32 5 acq_rel 79 %gep.gds1 = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds_align32, i32 0, i32 3 80 %val1 = atomicrmw add i32 addrspace(2)* %gep.gds1, i32 5 acq_rel 81 ret void 82} 83 84define amdgpu_kernel void @gds_global_align_plus_attr(i32 addrspace(1)* %out) #0 { 85; GCN-LABEL: gds_global_align_plus_attr: 86; GCN: ; %bb.0: 87; GCN-NEXT: v_mov_b32_e32 v0, 5 88; GCN-NEXT: v_mov_b32_e32 v1, 0 89; GCN-NEXT: s_movk_i32 m0, 0x420 90; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 91; GCN-NEXT: ds_add_u32 v1, v0 offset:1052 gds 92; GCN-NEXT: s_waitcnt lgkmcnt(0) 93; GCN-NEXT: buffer_wbinvl1 94; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 95; GCN-NEXT: ds_add_u32 v1, v0 offset:1036 gds 96; GCN-NEXT: s_waitcnt lgkmcnt(0) 97; GCN-NEXT: buffer_wbinvl1 98; GCN-NEXT: s_endpgm 99 %gep.gds0 = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds_align8, i32 0, i32 3 100 %val0 = atomicrmw add i32 addrspace(2)* %gep.gds0, i32 5 acq_rel 101 %gep.gds1 = getelementptr [4 x i32], [4 x i32] addrspace(2)* @gds_align32, i32 0, i32 3 102 %val1 = atomicrmw add i32 addrspace(2)* %gep.gds1, i32 5 acq_rel 103 ret void 104} 105 106@small.gds = internal addrspace(2) global i8 undef, align 1 107@gds.external = external unnamed_addr addrspace(3) global [0 x i32], align 4 108 109define amdgpu_kernel void @gds_extern_align(i32 addrspace(1)* %out, [4 x i32] addrspace(2)* %gds.arg) #0 { 110; GCN-LABEL: gds_extern_align: 111; GCN: ; %bb.0: 112; GCN-NEXT: s_load_dword s0, s[0:1], 0x8 113; GCN-NEXT: v_mov_b32_e32 v0, 5 114; GCN-NEXT: s_movk_i32 m0, 0x401 115; GCN-NEXT: s_movk_i32 s1, 0x400 116; GCN-NEXT: ;;#ASMSTART 117; GCN-NEXT: ; use s1 118; GCN-NEXT: ;;#ASMEND 119; GCN-NEXT: s_waitcnt lgkmcnt(0) 120; GCN-NEXT: v_mov_b32_e32 v1, s0 121; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 122; GCN-NEXT: ds_add_u32 v1, v0 offset:12 gds 123; GCN-NEXT: s_waitcnt lgkmcnt(0) 124; GCN-NEXT: buffer_wbinvl1 125; GCN-NEXT: s_endpgm 126 call void asm sideeffect "; use $0","s"(i8 addrspace(2)* @small.gds) 127 %gep.gds0 = getelementptr [4 x i32], [4 x i32] addrspace(2)* %gds.arg, i32 0, i32 3 128 %val0 = atomicrmw add i32 addrspace(2)* %gep.gds0, i32 5 acq_rel 129 ret void 130} 131 132attributes #0 = { "amdgpu-gds-size"="1024" } 133