1; RUN: opt -S -mtriple=amdgcn-- -amdgpu-lower-module-lds < %s | FileCheck %s
2; RUN: opt -S -mtriple=amdgcn-- -passes=amdgpu-lower-module-lds < %s | FileCheck %s
3
4; Padding to meet alignment, so references to @var1 replaced with gep ptr, 0, 2
5; No i64 as addrspace(3) types with initializers are ignored. Likewise no addrspace(4).
6; CHECK: %llvm.amdgcn.module.lds.t = type { float, [4 x i8], i32 }
7
8; Variables removed by pass
9; CHECK-NOT: @var0
10; CHECK-NOT: @var1
11
12@var0 = addrspace(3) global float undef, align 8
13@var1 = addrspace(3) global i32 undef, align 8
14
15@ptr =  addrspace(1) global i32 addrspace(3)* @var1, align 4
16
17; A variable that is unchanged by pass
18; CHECK: @with_init = addrspace(3) global i64 0
19@with_init = addrspace(3) global i64 0
20
21; Instance of new type, aligned to max of element alignment
22; CHECK: @llvm.amdgcn.module.lds = internal addrspace(3) global %llvm.amdgcn.module.lds.t undef, align 8
23
24; Use in func rewritten to access struct at address zero
25; CHECK-LABEL: @func()
26; CHECK: %dec = atomicrmw fsub float addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0), float 1.0
27; CHECK: %val0 = load i32, i32 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 2), align 8
28; CHECK: %val1 = add i32 %val0, 4
29; CHECK: store i32 %val1, i32 addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 2), align 8
30; CHECK: %unused0 = atomicrmw add i64 addrspace(3)* @with_init, i64 1 monotonic
31define void @func() {
32  %dec = atomicrmw fsub float addrspace(3)* @var0, float 1.0 monotonic
33  %val0 = load i32, i32 addrspace(3)* @var1, align 4
34  %val1 = add i32 %val0, 4
35  store i32 %val1, i32 addrspace(3)* @var1, align 4
36  %unused0 = atomicrmw add i64 addrspace(3)* @with_init, i64 1 monotonic
37  ret void
38}
39
40; This kernel calls a function that uses LDS so needs the block
41; CHECK-LABEL: @kern_call()
42; CHECK: call void @llvm.donothing() [ "ExplicitUse"(%llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds) ]
43; CHECK: call void @func()
44; CHECK: %dec = atomicrmw fsub float addrspace(3)* getelementptr inbounds (%llvm.amdgcn.module.lds.t, %llvm.amdgcn.module.lds.t addrspace(3)* @llvm.amdgcn.module.lds, i32 0, i32 0), float 2.000000e+00 monotonic, align 8
45define amdgpu_kernel void @kern_call() {
46  call void @func()
47  %dec = atomicrmw fsub float addrspace(3)* @var0, float 2.0 monotonic
48  ret void
49}
50
51; This kernel does alloc the LDS block as it makes no calls
52; CHECK-LABEL: @kern_empty()
53; CHECK-NOT: call void @llvm.donothing()
54define spir_kernel void @kern_empty() #0{
55  ret void
56}
57
58; Make sure we don't crash trying to insert code into a kernel
59; declaration.
60declare amdgpu_kernel void @kernel_declaration()
61
62attributes #0 = { "amdgpu-elide-module-lds" }
63; CHECK: attributes #0 = { "amdgpu-elide-module-lds" }
64