15257a60eSMichael Liao; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck %s
25257a60eSMichael Liao
35257a60eSMichael Liao@lds0 = addrspace(3) global [512 x float] undef
45257a60eSMichael Liao@lds1 = addrspace(3) global [256 x float] undef
55257a60eSMichael Liao@lds2 = addrspace(3) global [4096 x float] undef
65257a60eSMichael Liao@lds3 = addrspace(3) global [67 x i8] undef
75257a60eSMichael Liao
85257a60eSMichael Liao@dynamic_shared0 = external addrspace(3) global [0 x float]
95257a60eSMichael Liao@dynamic_shared1 = external addrspace(3) global [0 x double]
105257a60eSMichael Liao@dynamic_shared2 = external addrspace(3) global [0 x double], align 4
115257a60eSMichael Liao@dynamic_shared3 = external addrspace(3) global [0 x double], align 16
125257a60eSMichael Liao
135257a60eSMichael Liao; CHECK-LABEL: {{^}}dynamic_shared_array_0:
145257a60eSMichael Liao; CHECK: v_add_u32_e32 v{{[0-9]+}}, 0x800, v{{[0-9]+}}
155257a60eSMichael Liaodefine amdgpu_kernel void @dynamic_shared_array_0(float addrspace(1)* %out) {
165257a60eSMichael Liao  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
175257a60eSMichael Liao  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %tid.x
185257a60eSMichael Liao  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
195257a60eSMichael Liao  %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
205257a60eSMichael Liao  store float %val0, float addrspace(3)* %arrayidx1, align 4
215257a60eSMichael Liao  ret void
225257a60eSMichael Liao}
235257a60eSMichael Liao
245257a60eSMichael Liao; CHECK-LABEL: {{^}}dynamic_shared_array_1:
255257a60eSMichael Liao; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0xc00
265257a60eSMichael Liao; CHECK: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 2, [[DYNLDS]]
275257a60eSMichael Liaodefine amdgpu_kernel void @dynamic_shared_array_1(float addrspace(1)* %out, i32 %cond) {
285257a60eSMichael Liaoentry:
295257a60eSMichael Liao  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
305257a60eSMichael Liao  %idx.0 = add nsw i32 %tid.x, 64
315257a60eSMichael Liao  %tmp = icmp eq i32 %cond, 0
325257a60eSMichael Liao  br i1 %tmp, label %if, label %else
335257a60eSMichael Liao
345257a60eSMichael Liaoif:                                               ; preds = %entry
355257a60eSMichael Liao  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0
365257a60eSMichael Liao  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
375257a60eSMichael Liao  br label %endif
385257a60eSMichael Liao
395257a60eSMichael Liaoelse:                                             ; preds = %entry
405257a60eSMichael Liao  %arrayidx1 = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0
415257a60eSMichael Liao  %val1 = load float, float addrspace(3)* %arrayidx1, align 4
425257a60eSMichael Liao  br label %endif
435257a60eSMichael Liao
445257a60eSMichael Liaoendif:                                            ; preds = %else, %if
455257a60eSMichael Liao  %val = phi float [ %val0, %if ], [ %val1, %else ]
465257a60eSMichael Liao  %arrayidx = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
475257a60eSMichael Liao  store float %val, float addrspace(3)* %arrayidx, align 4
485257a60eSMichael Liao  ret void
495257a60eSMichael Liao}
505257a60eSMichael Liao
515257a60eSMichael Liao; CHECK-LABEL: {{^}}dynamic_shared_array_2:
525257a60eSMichael Liao; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x4000
535257a60eSMichael Liao; CHECK: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 2, [[DYNLDS]]
545257a60eSMichael Liaodefine amdgpu_kernel void @dynamic_shared_array_2(i32 %idx) {
555257a60eSMichael Liao  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
565257a60eSMichael Liao  %vidx = add i32 %tid.x, %idx
575257a60eSMichael Liao  %arrayidx0 = getelementptr inbounds [4096 x float], [4096 x float] addrspace(3)* @lds2, i32 0, i32 %vidx
585257a60eSMichael Liao  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
595257a60eSMichael Liao  %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
605257a60eSMichael Liao  store float %val0, float addrspace(3)* %arrayidx1, align 4
615257a60eSMichael Liao  ret void
625257a60eSMichael Liao}
635257a60eSMichael Liao
645257a60eSMichael Liao; The offset to the dynamic shared memory array should be aligned on the type
655257a60eSMichael Liao; specified.
665257a60eSMichael Liao; CHECK-LABEL: {{^}}dynamic_shared_array_3:
675257a60eSMichael Liao; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x44
685257a60eSMichael Liao; CHECK: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 2, [[DYNLDS]]
695257a60eSMichael Liaodefine amdgpu_kernel void @dynamic_shared_array_3(i32 %idx) {
705257a60eSMichael Liao  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
715257a60eSMichael Liao  %vidx = add i32 %tid.x, %idx
725257a60eSMichael Liao  %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx
735257a60eSMichael Liao  %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4
745257a60eSMichael Liao  %val1 = uitofp i8 %val0 to float
755257a60eSMichael Liao  %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
765257a60eSMichael Liao  store float %val1, float addrspace(3)* %arrayidx1, align 4
775257a60eSMichael Liao  ret void
785257a60eSMichael Liao}
795257a60eSMichael Liao
805257a60eSMichael Liao; The offset to the dynamic shared memory array should be aligned on the
815257a60eSMichael Liao; maximal one.
825257a60eSMichael Liao; CHECK-LABEL: {{^}}dynamic_shared_array_4:
835257a60eSMichael Liao; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x48
845257a60eSMichael Liao; CHECK-DAG: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 2, [[DYNLDS]]
855257a60eSMichael Liao; CHECK-DAG: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 3, [[DYNLDS]]
865257a60eSMichael Liaodefine amdgpu_kernel void @dynamic_shared_array_4(i32 %idx) {
875257a60eSMichael Liao  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
885257a60eSMichael Liao  %vidx = add i32 %tid.x, %idx
895257a60eSMichael Liao  %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx
905257a60eSMichael Liao  %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4
915257a60eSMichael Liao  %val1 = uitofp i8 %val0 to float
925257a60eSMichael Liao  %val2 = uitofp i8 %val0 to double
935257a60eSMichael Liao  %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
945257a60eSMichael Liao  store float %val1, float addrspace(3)* %arrayidx1, align 4
955257a60eSMichael Liao  %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared1, i32 0, i32 %tid.x
965257a60eSMichael Liao  store double %val2, double addrspace(3)* %arrayidx2, align 4
975257a60eSMichael Liao  ret void
985257a60eSMichael Liao}
995257a60eSMichael Liao
1005257a60eSMichael Liao; Honor the explicit alignment from the specified variable.
1015257a60eSMichael Liao; CHECK-LABEL: {{^}}dynamic_shared_array_5:
1025257a60eSMichael Liao; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x44
1035257a60eSMichael Liao; CHECK-DAG: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 2, [[DYNLDS]]
1045257a60eSMichael Liao; CHECK-DAG: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 3, [[DYNLDS]]
1055257a60eSMichael Liaodefine amdgpu_kernel void @dynamic_shared_array_5(i32 %idx) {
1065257a60eSMichael Liao  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
1075257a60eSMichael Liao  %vidx = add i32 %tid.x, %idx
1085257a60eSMichael Liao  %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx
1095257a60eSMichael Liao  %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4
1105257a60eSMichael Liao  %val1 = uitofp i8 %val0 to float
1115257a60eSMichael Liao  %val2 = uitofp i8 %val0 to double
1125257a60eSMichael Liao  %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
1135257a60eSMichael Liao  store float %val1, float addrspace(3)* %arrayidx1, align 4
1145257a60eSMichael Liao  %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared2, i32 0, i32 %tid.x
1155257a60eSMichael Liao  store double %val2, double addrspace(3)* %arrayidx2, align 4
1165257a60eSMichael Liao  ret void
1175257a60eSMichael Liao}
1185257a60eSMichael Liao
1195257a60eSMichael Liao; Honor the explicit alignment from the specified variable.
1205257a60eSMichael Liao; CHECK-LABEL: {{^}}dynamic_shared_array_6:
1215257a60eSMichael Liao; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x50
1225257a60eSMichael Liao; CHECK-DAG: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 2, [[DYNLDS]]
1235257a60eSMichael Liao; CHECK-DAG: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 3, [[DYNLDS]]
1245257a60eSMichael Liaodefine amdgpu_kernel void @dynamic_shared_array_6(i32 %idx) {
1255257a60eSMichael Liao  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
1265257a60eSMichael Liao  %vidx = add i32 %tid.x, %idx
1275257a60eSMichael Liao  %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx
1285257a60eSMichael Liao  %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4
1295257a60eSMichael Liao  %val1 = uitofp i8 %val0 to float
1305257a60eSMichael Liao  %val2 = uitofp i8 %val0 to double
1315257a60eSMichael Liao  %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x
1325257a60eSMichael Liao  store float %val1, float addrspace(3)* %arrayidx1, align 4
1335257a60eSMichael Liao  %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared3, i32 0, i32 %tid.x
1345257a60eSMichael Liao  store double %val2, double addrspace(3)* %arrayidx2, align 4
1355257a60eSMichael Liao  ret void
1365257a60eSMichael Liao}
1375257a60eSMichael Liao
138*106959acSVang Thao; CHECK-LABEL: dynamic_shared_array_with_call:
139*106959acSVang Thao; CHECK-NOT: s_swappc_b64
140*106959acSVang Thaodefine amdgpu_kernel void @dynamic_shared_array_with_call(float addrspace(1)* nocapture readnone %out) local_unnamed_addr {
141*106959acSVang Thao  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
142*106959acSVang Thao  %1 = sext i32 %tid.x to i64
143*106959acSVang Thao  %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i64 0, i64 %1
144*106959acSVang Thao  %val0 = load float, float addrspace(3)* %arrayidx0, align 4
145*106959acSVang Thao  tail call void @store_value(float %val0)
146*106959acSVang Thao  ret void
147*106959acSVang Thao}
148*106959acSVang Thao
149*106959acSVang Thao; CHECK-NOT: store_value
150*106959acSVang Thaodefine linkonce_odr hidden void @store_value(float %val1) local_unnamed_addr {
151*106959acSVang Thaoentry:
152*106959acSVang Thao  %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x()
153*106959acSVang Thao  %0 = sext i32 %tid.x to i64
154*106959acSVang Thao  %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i64 0, i64 %0
155*106959acSVang Thao  store float %val1, float addrspace(3)* %arrayidx1, align 4
156*106959acSVang Thao  ret void
157*106959acSVang Thao}
158*106959acSVang Thao
1595257a60eSMichael Liaodeclare i32 @llvm.amdgcn.workitem.id.x()
160