15257a60eSMichael Liao; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx900 -verify-machineinstrs -o - %s | FileCheck %s 25257a60eSMichael Liao 35257a60eSMichael Liao@lds0 = addrspace(3) global [512 x float] undef 45257a60eSMichael Liao@lds1 = addrspace(3) global [256 x float] undef 55257a60eSMichael Liao@lds2 = addrspace(3) global [4096 x float] undef 65257a60eSMichael Liao@lds3 = addrspace(3) global [67 x i8] undef 75257a60eSMichael Liao 85257a60eSMichael Liao@dynamic_shared0 = external addrspace(3) global [0 x float] 95257a60eSMichael Liao@dynamic_shared1 = external addrspace(3) global [0 x double] 105257a60eSMichael Liao@dynamic_shared2 = external addrspace(3) global [0 x double], align 4 115257a60eSMichael Liao@dynamic_shared3 = external addrspace(3) global [0 x double], align 16 125257a60eSMichael Liao 135257a60eSMichael Liao; CHECK-LABEL: {{^}}dynamic_shared_array_0: 145257a60eSMichael Liao; CHECK: v_add_u32_e32 v{{[0-9]+}}, 0x800, v{{[0-9]+}} 155257a60eSMichael Liaodefine amdgpu_kernel void @dynamic_shared_array_0(float addrspace(1)* %out) { 165257a60eSMichael Liao %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 175257a60eSMichael Liao %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %tid.x 185257a60eSMichael Liao %val0 = load float, float addrspace(3)* %arrayidx0, align 4 195257a60eSMichael Liao %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x 205257a60eSMichael Liao store float %val0, float addrspace(3)* %arrayidx1, align 4 215257a60eSMichael Liao ret void 225257a60eSMichael Liao} 235257a60eSMichael Liao 245257a60eSMichael Liao; CHECK-LABEL: {{^}}dynamic_shared_array_1: 255257a60eSMichael Liao; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0xc00 265257a60eSMichael Liao; CHECK: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 2, [[DYNLDS]] 275257a60eSMichael Liaodefine amdgpu_kernel void @dynamic_shared_array_1(float addrspace(1)* %out, i32 %cond) { 285257a60eSMichael Liaoentry: 295257a60eSMichael Liao %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 305257a60eSMichael Liao %idx.0 = add nsw i32 %tid.x, 64 315257a60eSMichael Liao %tmp = icmp eq i32 %cond, 0 325257a60eSMichael Liao br i1 %tmp, label %if, label %else 335257a60eSMichael Liao 345257a60eSMichael Liaoif: ; preds = %entry 355257a60eSMichael Liao %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i32 0, i32 %idx.0 365257a60eSMichael Liao %val0 = load float, float addrspace(3)* %arrayidx0, align 4 375257a60eSMichael Liao br label %endif 385257a60eSMichael Liao 395257a60eSMichael Liaoelse: ; preds = %entry 405257a60eSMichael Liao %arrayidx1 = getelementptr inbounds [256 x float], [256 x float] addrspace(3)* @lds1, i32 0, i32 %idx.0 415257a60eSMichael Liao %val1 = load float, float addrspace(3)* %arrayidx1, align 4 425257a60eSMichael Liao br label %endif 435257a60eSMichael Liao 445257a60eSMichael Liaoendif: ; preds = %else, %if 455257a60eSMichael Liao %val = phi float [ %val0, %if ], [ %val1, %else ] 465257a60eSMichael Liao %arrayidx = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x 475257a60eSMichael Liao store float %val, float addrspace(3)* %arrayidx, align 4 485257a60eSMichael Liao ret void 495257a60eSMichael Liao} 505257a60eSMichael Liao 515257a60eSMichael Liao; CHECK-LABEL: {{^}}dynamic_shared_array_2: 525257a60eSMichael Liao; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x4000 535257a60eSMichael Liao; CHECK: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 2, [[DYNLDS]] 545257a60eSMichael Liaodefine amdgpu_kernel void @dynamic_shared_array_2(i32 %idx) { 555257a60eSMichael Liao %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 565257a60eSMichael Liao %vidx = add i32 %tid.x, %idx 575257a60eSMichael Liao %arrayidx0 = getelementptr inbounds [4096 x float], [4096 x float] addrspace(3)* @lds2, i32 0, i32 %vidx 585257a60eSMichael Liao %val0 = load float, float addrspace(3)* %arrayidx0, align 4 595257a60eSMichael Liao %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x 605257a60eSMichael Liao store float %val0, float addrspace(3)* %arrayidx1, align 4 615257a60eSMichael Liao ret void 625257a60eSMichael Liao} 635257a60eSMichael Liao 645257a60eSMichael Liao; The offset to the dynamic shared memory array should be aligned on the type 655257a60eSMichael Liao; specified. 665257a60eSMichael Liao; CHECK-LABEL: {{^}}dynamic_shared_array_3: 675257a60eSMichael Liao; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x44 685257a60eSMichael Liao; CHECK: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 2, [[DYNLDS]] 695257a60eSMichael Liaodefine amdgpu_kernel void @dynamic_shared_array_3(i32 %idx) { 705257a60eSMichael Liao %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 715257a60eSMichael Liao %vidx = add i32 %tid.x, %idx 725257a60eSMichael Liao %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx 735257a60eSMichael Liao %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4 745257a60eSMichael Liao %val1 = uitofp i8 %val0 to float 755257a60eSMichael Liao %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x 765257a60eSMichael Liao store float %val1, float addrspace(3)* %arrayidx1, align 4 775257a60eSMichael Liao ret void 785257a60eSMichael Liao} 795257a60eSMichael Liao 805257a60eSMichael Liao; The offset to the dynamic shared memory array should be aligned on the 815257a60eSMichael Liao; maximal one. 825257a60eSMichael Liao; CHECK-LABEL: {{^}}dynamic_shared_array_4: 835257a60eSMichael Liao; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x48 845257a60eSMichael Liao; CHECK-DAG: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 2, [[DYNLDS]] 855257a60eSMichael Liao; CHECK-DAG: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 3, [[DYNLDS]] 865257a60eSMichael Liaodefine amdgpu_kernel void @dynamic_shared_array_4(i32 %idx) { 875257a60eSMichael Liao %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 885257a60eSMichael Liao %vidx = add i32 %tid.x, %idx 895257a60eSMichael Liao %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx 905257a60eSMichael Liao %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4 915257a60eSMichael Liao %val1 = uitofp i8 %val0 to float 925257a60eSMichael Liao %val2 = uitofp i8 %val0 to double 935257a60eSMichael Liao %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x 945257a60eSMichael Liao store float %val1, float addrspace(3)* %arrayidx1, align 4 955257a60eSMichael Liao %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared1, i32 0, i32 %tid.x 965257a60eSMichael Liao store double %val2, double addrspace(3)* %arrayidx2, align 4 975257a60eSMichael Liao ret void 985257a60eSMichael Liao} 995257a60eSMichael Liao 1005257a60eSMichael Liao; Honor the explicit alignment from the specified variable. 1015257a60eSMichael Liao; CHECK-LABEL: {{^}}dynamic_shared_array_5: 1025257a60eSMichael Liao; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x44 1035257a60eSMichael Liao; CHECK-DAG: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 2, [[DYNLDS]] 1045257a60eSMichael Liao; CHECK-DAG: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 3, [[DYNLDS]] 1055257a60eSMichael Liaodefine amdgpu_kernel void @dynamic_shared_array_5(i32 %idx) { 1065257a60eSMichael Liao %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 1075257a60eSMichael Liao %vidx = add i32 %tid.x, %idx 1085257a60eSMichael Liao %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx 1095257a60eSMichael Liao %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4 1105257a60eSMichael Liao %val1 = uitofp i8 %val0 to float 1115257a60eSMichael Liao %val2 = uitofp i8 %val0 to double 1125257a60eSMichael Liao %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x 1135257a60eSMichael Liao store float %val1, float addrspace(3)* %arrayidx1, align 4 1145257a60eSMichael Liao %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared2, i32 0, i32 %tid.x 1155257a60eSMichael Liao store double %val2, double addrspace(3)* %arrayidx2, align 4 1165257a60eSMichael Liao ret void 1175257a60eSMichael Liao} 1185257a60eSMichael Liao 1195257a60eSMichael Liao; Honor the explicit alignment from the specified variable. 1205257a60eSMichael Liao; CHECK-LABEL: {{^}}dynamic_shared_array_6: 1215257a60eSMichael Liao; CHECK: v_mov_b32_e32 [[DYNLDS:v[0-9]+]], 0x50 1225257a60eSMichael Liao; CHECK-DAG: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 2, [[DYNLDS]] 1235257a60eSMichael Liao; CHECK-DAG: v_lshl_add_u32 {{v[0-9]+}}, {{v[0-9]+}}, 3, [[DYNLDS]] 1245257a60eSMichael Liaodefine amdgpu_kernel void @dynamic_shared_array_6(i32 %idx) { 1255257a60eSMichael Liao %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 1265257a60eSMichael Liao %vidx = add i32 %tid.x, %idx 1275257a60eSMichael Liao %arrayidx0 = getelementptr inbounds [67 x i8], [67 x i8] addrspace(3)* @lds3, i32 0, i32 %vidx 1285257a60eSMichael Liao %val0 = load i8, i8 addrspace(3)* %arrayidx0, align 4 1295257a60eSMichael Liao %val1 = uitofp i8 %val0 to float 1305257a60eSMichael Liao %val2 = uitofp i8 %val0 to double 1315257a60eSMichael Liao %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i32 0, i32 %tid.x 1325257a60eSMichael Liao store float %val1, float addrspace(3)* %arrayidx1, align 4 1335257a60eSMichael Liao %arrayidx2 = getelementptr inbounds [0 x double], [0 x double] addrspace(3)* @dynamic_shared3, i32 0, i32 %tid.x 1345257a60eSMichael Liao store double %val2, double addrspace(3)* %arrayidx2, align 4 1355257a60eSMichael Liao ret void 1365257a60eSMichael Liao} 1375257a60eSMichael Liao 138*106959acSVang Thao; CHECK-LABEL: dynamic_shared_array_with_call: 139*106959acSVang Thao; CHECK-NOT: s_swappc_b64 140*106959acSVang Thaodefine amdgpu_kernel void @dynamic_shared_array_with_call(float addrspace(1)* nocapture readnone %out) local_unnamed_addr { 141*106959acSVang Thao %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 142*106959acSVang Thao %1 = sext i32 %tid.x to i64 143*106959acSVang Thao %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds0, i64 0, i64 %1 144*106959acSVang Thao %val0 = load float, float addrspace(3)* %arrayidx0, align 4 145*106959acSVang Thao tail call void @store_value(float %val0) 146*106959acSVang Thao ret void 147*106959acSVang Thao} 148*106959acSVang Thao 149*106959acSVang Thao; CHECK-NOT: store_value 150*106959acSVang Thaodefine linkonce_odr hidden void @store_value(float %val1) local_unnamed_addr { 151*106959acSVang Thaoentry: 152*106959acSVang Thao %tid.x = tail call i32 @llvm.amdgcn.workitem.id.x() 153*106959acSVang Thao %0 = sext i32 %tid.x to i64 154*106959acSVang Thao %arrayidx1 = getelementptr inbounds [0 x float], [0 x float] addrspace(3)* @dynamic_shared0, i64 0, i64 %0 155*106959acSVang Thao store float %val1, float addrspace(3)* %arrayidx1, align 4 156*106959acSVang Thao ret void 157*106959acSVang Thao} 158*106959acSVang Thao 1595257a60eSMichael Liaodeclare i32 @llvm.amdgcn.workitem.id.x() 160