1; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s 2; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s 3 4; Padding to meet alignment, so references to @var1 replaced with gep ptr, 0, 2 5; No i64 as addrspace(3) types with initializers are ignored. Likewise no addrspace(4). 6; CHECK: %llvm.amdgcn.module.lds.t = type { float, [4 x i8], i32 } 7 8; Variables removed by pass 9; CHECK-NOT: @var0 10; CHECK-NOT: @var1 11 12@var0 = addrspace(3) global float undef, align 8 13@var1 = addrspace(3) global i32 undef, align 8 14 15@ptr = addrspace(1) global i32 addrspace(3)* @var1, align 4 16 17; A variable that is unchanged by pass 18; CHECK: @with_init = addrspace(3) global i64 0 19@with_init = addrspace(3) global i64 0 20 21; Instance of new type, aligned to max of element alignment 22; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 8 23 24; Use in func rewritten to access struct at address zero 25; CHECK-LABEL: @func() 26; CHECK: %dec = atomicrmw fsub float addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0), float 1.0 27; CHECK: %val0 = load i32, i32 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 2), align 8 28; CHECK: %val1 = add i32 %val0, 4 29; CHECK: store i32 %val1, i32 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 2), align 8 30; CHECK: %unused0 = atomicrmw add i64 addrspace(3)* @with_init, i64 1 monotonic 31define void @func() { 32 %dec = atomicrmw fsub float addrspace(3)* @var0, float 1.0 monotonic 33 %val0 = load i32, i32 addrspace(3)* @var1, align 4 34 %val1 = add i32 %val0, 4 35 store i32 %val1, i32 addrspace(3)* @var1, align 4 36 %unused0 = atomicrmw add i64 addrspace(3)* @with_init, i64 1 monotonic 37 ret void 38} 39 40; This kernel calls a function that uses LDS so needs the block 41; CHECK-LABEL: @kern_call() 42; CHECK: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ] 43; CHECK: call void @func() 44; CHECK: %dec = atomicrmw fsub float addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0), float 2.000000e+00 monotonic, align 8 45define amdgpu_kernel void @kern_call() { 46 call void @func() 47 %dec = atomicrmw fsub float addrspace(3)* @var0, float 2.0 monotonic 48 ret void 49} 50 51; This kernel does alloc the LDS block as it makes no calls 52; CHECK-LABEL: @kern_empty() 53; CHECK-NOT: call void @llvm.donothing() 54define spir_kernel void @kern_empty() #0{ 55 ret void 56} 57 58; Make sure we don't crash trying to insert code into a kernel 59; declaration. 60declare amdgpu_kernel void @kernel_declaration() 61 62attributes #0 = { "amdgpu-elide-module-lds" } 63; CHECK: attributes #0 = { "amdgpu-elide-module-lds" } 64