1; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s 2 3; DESCRIPTION: 4; There is one lds global defined here, and this lds is used within a single non-kernel 5; function, as an operand of nested constant expression, and this non-kernel function is 6; reachable from kernel. Hence nested constant expression should to be converted into a 7; series of instructons and pointer replacement should take place. But, important note 8; is - only constant expression operands which uses lds should be converted into 9; instructions, other constant expression operands which do not use lds should be left 10; untouched. 11; 12 13; Original LDS should exist. 14; CHECK: @lds_used_within_function = internal addrspace(3) global [4 x i32] undef, align 4 15@lds_used_within_function = internal addrspace(3) global [4 x i32] undef, align 4 16 17; Non-LDS global should exist as it is. 18; CHECK: @global_var = internal addrspace(1) global [4 x i32] undef, align 4 19@global_var = internal addrspace(1) global [4 x i32] undef, align 4 20 21; Pointer should be created. 22; CHECK: @lds_used_within_function.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2 23 24; Pointer replacement code should be added. 25define internal void @function() { 26; CHECK-LABEL: entry: 27; CHECK: %0 = load i16, i16 addrspace(3)* @lds_used_within_function.ptr, align 2 28; CHECK: %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0 29; CHECK: %2 = bitcast i8 addrspace(3)* %1 to [4 x i32] addrspace(3)* 30; CHECK: %3 = getelementptr inbounds [4 x i32], [4 x i32] addrspace(3)* %2, i32 0, i32 2 31; CHECK: %4 = addrspacecast i32 addrspace(3)* %3 to i32* 32; CHECK: %5 = ptrtoint i32* %4 to i32 33; CHECK: %6 = add i32 %5, ptrtoint (i32 addrspace(1)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(1)* @global_var, i32 0, i32 2) to i32) 34; CHECK: ret void 35entry: 36 %0 = add i32 ptrtoint (i32* addrspacecast (i32 addrspace(3)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(3)* @lds_used_within_function, i32 0, i32 2) to i32*) to i32), ptrtoint (i32 addrspace(1)* getelementptr inbounds ([4 x i32], [4 x i32] addrspace(1)* @global_var, i32 0, i32 2) to i32) 37 ret void 38} 39 40; Pointer initialization code shoud be added 41define protected amdgpu_kernel void @kernel() { 42; CHECK-LABEL: entry: 43; CHECK: %0 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) 44; CHECK: %1 = icmp eq i32 %0, 0 45; CHECK: br i1 %1, label %2, label %3 46; 47; CHECK-LABEL: 2: 48; CHECK: store i16 ptrtoint ([4 x i32] addrspace(3)* @lds_used_within_function to i16), i16 addrspace(3)* @lds_used_within_function.ptr, align 2 49; CHECK: br label %3 50; 51; CHECK-LABEL: 3: 52; CHECK: call void @llvm.amdgcn.wave.barrier() 53; CHECK: call void @function() 54; CHECK: ret void 55entry: 56 call void @function() 57 ret void 58} 59