1*cee313d2SEric Christopher; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -infer-address-spaces %s | FileCheck %s
2*cee313d2SEric Christopher
3*cee313d2SEric Christopher; Trivial optimization of generic addressing
4*cee313d2SEric Christopher
5*cee313d2SEric Christopher; CHECK-LABEL: @load_global_from_flat(
6*cee313d2SEric Christopher; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)*
7*cee313d2SEric Christopher; CHECK-NEXT: %tmp1 = load float, float addrspace(1)* %tmp0
8*cee313d2SEric Christopher; CHECK-NEXT: ret float %tmp1
9*cee313d2SEric Christopherdefine float @load_global_from_flat(float* %generic_scalar) #0 {
10*cee313d2SEric Christopher  %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)*
11*cee313d2SEric Christopher  %tmp1 = load float, float addrspace(1)* %tmp0
12*cee313d2SEric Christopher  ret float %tmp1
13*cee313d2SEric Christopher}
14*cee313d2SEric Christopher
15*cee313d2SEric Christopher; CHECK-LABEL: @load_constant_from_flat(
16*cee313d2SEric Christopher; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(4)*
17*cee313d2SEric Christopher; CHECK-NEXT: %tmp1 = load float, float addrspace(4)* %tmp0
18*cee313d2SEric Christopher; CHECK-NEXT: ret float %tmp1
19*cee313d2SEric Christopherdefine float @load_constant_from_flat(float* %generic_scalar) #0 {
20*cee313d2SEric Christopher  %tmp0 = addrspacecast float* %generic_scalar to float addrspace(4)*
21*cee313d2SEric Christopher  %tmp1 = load float, float addrspace(4)* %tmp0
22*cee313d2SEric Christopher  ret float %tmp1
23*cee313d2SEric Christopher}
24*cee313d2SEric Christopher
25*cee313d2SEric Christopher; CHECK-LABEL: @load_group_from_flat(
26*cee313d2SEric Christopher; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)*
27*cee313d2SEric Christopher; CHECK-NEXT: %tmp1 = load float, float addrspace(3)* %tmp0
28*cee313d2SEric Christopher; CHECK-NEXT: ret float %tmp1
29*cee313d2SEric Christopherdefine float @load_group_from_flat(float* %generic_scalar) #0 {
30*cee313d2SEric Christopher  %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)*
31*cee313d2SEric Christopher  %tmp1 = load float, float addrspace(3)* %tmp0
32*cee313d2SEric Christopher  ret float %tmp1
33*cee313d2SEric Christopher}
34*cee313d2SEric Christopher
35*cee313d2SEric Christopher; CHECK-LABEL: @load_private_from_flat(
36*cee313d2SEric Christopher; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)*
37*cee313d2SEric Christopher; CHECK-NEXT: %tmp1 = load float, float addrspace(5)* %tmp0
38*cee313d2SEric Christopher; CHECK-NEXT: ret float %tmp1
39*cee313d2SEric Christopherdefine float @load_private_from_flat(float* %generic_scalar) #0 {
40*cee313d2SEric Christopher  %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)*
41*cee313d2SEric Christopher  %tmp1 = load float, float addrspace(5)* %tmp0
42*cee313d2SEric Christopher  ret float %tmp1
43*cee313d2SEric Christopher}
44*cee313d2SEric Christopher
45*cee313d2SEric Christopher; CHECK-LABEL: @store_global_from_flat(
46*cee313d2SEric Christopher; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)*
47*cee313d2SEric Christopher; CHECK-NEXT: store float 0.000000e+00, float addrspace(1)* %tmp0
48*cee313d2SEric Christopherdefine amdgpu_kernel void @store_global_from_flat(float* %generic_scalar) #0 {
49*cee313d2SEric Christopher  %tmp0 = addrspacecast float* %generic_scalar to float addrspace(1)*
50*cee313d2SEric Christopher  store float 0.0, float addrspace(1)* %tmp0
51*cee313d2SEric Christopher  ret void
52*cee313d2SEric Christopher}
53*cee313d2SEric Christopher
54*cee313d2SEric Christopher; CHECK-LABEL: @store_group_from_flat(
55*cee313d2SEric Christopher; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)*
56*cee313d2SEric Christopher; CHECK-NEXT: store float 0.000000e+00, float addrspace(3)* %tmp0
57*cee313d2SEric Christopherdefine amdgpu_kernel void @store_group_from_flat(float* %generic_scalar) #0 {
58*cee313d2SEric Christopher  %tmp0 = addrspacecast float* %generic_scalar to float addrspace(3)*
59*cee313d2SEric Christopher  store float 0.0, float addrspace(3)* %tmp0
60*cee313d2SEric Christopher  ret void
61*cee313d2SEric Christopher}
62*cee313d2SEric Christopher
63*cee313d2SEric Christopher; CHECK-LABEL: @store_private_from_flat(
64*cee313d2SEric Christopher; CHECK-NEXT: %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)*
65*cee313d2SEric Christopher; CHECK-NEXT: store float 0.000000e+00, float addrspace(5)* %tmp0
66*cee313d2SEric Christopherdefine amdgpu_kernel void @store_private_from_flat(float* %generic_scalar) #0 {
67*cee313d2SEric Christopher  %tmp0 = addrspacecast float* %generic_scalar to float addrspace(5)*
68*cee313d2SEric Christopher  store float 0.0, float addrspace(5)* %tmp0
69*cee313d2SEric Christopher  ret void
70*cee313d2SEric Christopher}
71*cee313d2SEric Christopher
72*cee313d2SEric Christopher; optimized to global load/store.
73*cee313d2SEric Christopher; CHECK-LABEL: @load_store_global(
74*cee313d2SEric Christopher; CHECK-NEXT: %val = load i32, i32 addrspace(1)* %input, align 4
75*cee313d2SEric Christopher; CHECK-NEXT: store i32 %val, i32 addrspace(1)* %output, align 4
76*cee313d2SEric Christopher; CHECK-NEXT: ret void
77*cee313d2SEric Christopherdefine amdgpu_kernel void @load_store_global(i32 addrspace(1)* nocapture %input, i32 addrspace(1)* nocapture %output) #0 {
78*cee313d2SEric Christopher  %tmp0 = addrspacecast i32 addrspace(1)* %input to i32*
79*cee313d2SEric Christopher  %tmp1 = addrspacecast i32 addrspace(1)* %output to i32*
80*cee313d2SEric Christopher  %val = load i32, i32* %tmp0, align 4
81*cee313d2SEric Christopher  store i32 %val, i32* %tmp1, align 4
82*cee313d2SEric Christopher  ret void
83*cee313d2SEric Christopher}
84*cee313d2SEric Christopher
85*cee313d2SEric Christopher; Optimized to group load/store.
86*cee313d2SEric Christopher; CHECK-LABEL: @load_store_group(
87*cee313d2SEric Christopher; CHECK-NEXT: %val = load i32, i32 addrspace(3)* %input, align 4
88*cee313d2SEric Christopher; CHECK-NEXT: store i32 %val, i32 addrspace(3)* %output, align 4
89*cee313d2SEric Christopher; CHECK-NEXT: ret void
90*cee313d2SEric Christopherdefine amdgpu_kernel void @load_store_group(i32 addrspace(3)* nocapture %input, i32 addrspace(3)* nocapture %output) #0 {
91*cee313d2SEric Christopher  %tmp0 = addrspacecast i32 addrspace(3)* %input to i32*
92*cee313d2SEric Christopher  %tmp1 = addrspacecast i32 addrspace(3)* %output to i32*
93*cee313d2SEric Christopher  %val = load i32, i32* %tmp0, align 4
94*cee313d2SEric Christopher  store i32 %val, i32* %tmp1, align 4
95*cee313d2SEric Christopher  ret void
96*cee313d2SEric Christopher}
97*cee313d2SEric Christopher
98*cee313d2SEric Christopher; Optimized to private load/store.
99*cee313d2SEric Christopher; CHECK-LABEL: @load_store_private(
100*cee313d2SEric Christopher; CHECK-NEXT: %val = load i32, i32 addrspace(5)* %input, align 4
101*cee313d2SEric Christopher; CHECK-NEXT: store i32 %val, i32 addrspace(5)* %output, align 4
102*cee313d2SEric Christopher; CHECK-NEXT: ret void
103*cee313d2SEric Christopherdefine amdgpu_kernel void @load_store_private(i32 addrspace(5)* nocapture %input, i32 addrspace(5)* nocapture %output) #0 {
104*cee313d2SEric Christopher  %tmp0 = addrspacecast i32 addrspace(5)* %input to i32*
105*cee313d2SEric Christopher  %tmp1 = addrspacecast i32 addrspace(5)* %output to i32*
106*cee313d2SEric Christopher  %val = load i32, i32* %tmp0, align 4
107*cee313d2SEric Christopher  store i32 %val, i32* %tmp1, align 4
108*cee313d2SEric Christopher  ret void
109*cee313d2SEric Christopher}
110*cee313d2SEric Christopher
111*cee313d2SEric Christopher; No optimization. flat load/store.
112*cee313d2SEric Christopher; CHECK-LABEL: @load_store_flat(
113*cee313d2SEric Christopher; CHECK-NEXT: %val = load i32, i32* %input, align 4
114*cee313d2SEric Christopher; CHECK-NEXT: store i32 %val, i32* %output, align 4
115*cee313d2SEric Christopher; CHECK-NEXT: ret void
116*cee313d2SEric Christopherdefine amdgpu_kernel void @load_store_flat(i32* nocapture %input, i32* nocapture %output) #0 {
117*cee313d2SEric Christopher  %val = load i32, i32* %input, align 4
118*cee313d2SEric Christopher  store i32 %val, i32* %output, align 4
119*cee313d2SEric Christopher  ret void
120*cee313d2SEric Christopher}
121*cee313d2SEric Christopher
122*cee313d2SEric Christopher; CHECK-LABEL: @store_addrspacecast_ptr_value(
123*cee313d2SEric Christopher; CHECK: %cast = addrspacecast i32 addrspace(1)* %input to i32*
124*cee313d2SEric Christopher; CHECK-NEXT: store i32* %cast, i32* addrspace(1)* %output, align 4
125*cee313d2SEric Christopherdefine amdgpu_kernel void @store_addrspacecast_ptr_value(i32 addrspace(1)* nocapture %input, i32* addrspace(1)* nocapture %output) #0 {
126*cee313d2SEric Christopher  %cast = addrspacecast i32 addrspace(1)* %input to i32*
127*cee313d2SEric Christopher  store i32* %cast, i32* addrspace(1)* %output, align 4
128*cee313d2SEric Christopher  ret void
129*cee313d2SEric Christopher}
130*cee313d2SEric Christopher
131*cee313d2SEric Christopher; CHECK-LABEL: @atomicrmw_add_global_to_flat(
132*cee313d2SEric Christopher; CHECK-NEXT: %ret = atomicrmw add i32 addrspace(1)* %global.ptr, i32 %y seq_cst
133*cee313d2SEric Christopherdefine i32 @atomicrmw_add_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %y) #0 {
134*cee313d2SEric Christopher  %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
135*cee313d2SEric Christopher  %ret = atomicrmw add i32* %cast, i32 %y seq_cst
136*cee313d2SEric Christopher  ret i32 %ret
137*cee313d2SEric Christopher}
138*cee313d2SEric Christopher
139*cee313d2SEric Christopher; CHECK-LABEL: @atomicrmw_add_group_to_flat(
140*cee313d2SEric Christopher; CHECK-NEXT: %ret = atomicrmw add i32 addrspace(3)* %group.ptr, i32 %y seq_cst
141*cee313d2SEric Christopherdefine i32 @atomicrmw_add_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %y) #0 {
142*cee313d2SEric Christopher  %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
143*cee313d2SEric Christopher  %ret = atomicrmw add i32* %cast, i32 %y seq_cst
144*cee313d2SEric Christopher  ret i32 %ret
145*cee313d2SEric Christopher}
146*cee313d2SEric Christopher
147*cee313d2SEric Christopher; CHECK-LABEL: @cmpxchg_global_to_flat(
148*cee313d2SEric Christopher; CHECK: %ret = cmpxchg i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val seq_cst monotonic
149*cee313d2SEric Christopherdefine { i32, i1 } @cmpxchg_global_to_flat(i32 addrspace(1)* %global.ptr, i32 %cmp, i32 %val) #0 {
150*cee313d2SEric Christopher  %cast = addrspacecast i32 addrspace(1)* %global.ptr to i32*
151*cee313d2SEric Christopher  %ret = cmpxchg i32* %cast, i32 %cmp, i32 %val seq_cst monotonic
152*cee313d2SEric Christopher  ret { i32, i1 } %ret
153*cee313d2SEric Christopher}
154*cee313d2SEric Christopher
155*cee313d2SEric Christopher; CHECK-LABEL: @cmpxchg_group_to_flat(
156*cee313d2SEric Christopher; CHECK: %ret = cmpxchg i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val seq_cst monotonic
157*cee313d2SEric Christopherdefine { i32, i1 } @cmpxchg_group_to_flat(i32 addrspace(3)* %group.ptr, i32 %cmp, i32 %val) #0 {
158*cee313d2SEric Christopher  %cast = addrspacecast i32 addrspace(3)* %group.ptr to i32*
159*cee313d2SEric Christopher  %ret = cmpxchg i32* %cast, i32 %cmp, i32 %val seq_cst monotonic
160*cee313d2SEric Christopher  ret { i32, i1 } %ret
161*cee313d2SEric Christopher}
162*cee313d2SEric Christopher
163*cee313d2SEric Christopher; Not pointer operand
164*cee313d2SEric Christopher; CHECK-LABEL: @cmpxchg_group_to_flat_wrong_operand(
165*cee313d2SEric Christopher; CHECK: %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32*
166*cee313d2SEric Christopher; CHECK: %ret = cmpxchg i32* addrspace(3)* %cas.ptr, i32* %cast.cmp, i32* %val seq_cst monotonic
167*cee313d2SEric Christopherdefine { i32*, i1 } @cmpxchg_group_to_flat_wrong_operand(i32* addrspace(3)* %cas.ptr, i32 addrspace(3)* %cmp.ptr, i32* %val) #0 {
168*cee313d2SEric Christopher  %cast.cmp = addrspacecast i32 addrspace(3)* %cmp.ptr to i32*
169*cee313d2SEric Christopher  %ret = cmpxchg i32* addrspace(3)* %cas.ptr, i32* %cast.cmp, i32* %val seq_cst monotonic
170*cee313d2SEric Christopher  ret { i32*, i1 } %ret
171*cee313d2SEric Christopher}
172*cee313d2SEric Christopher
173*cee313d2SEric Christopher; Null pointer in local addr space
174*cee313d2SEric Christopher; CHECK-LABEL: @local_nullptr
175*cee313d2SEric Christopher; CHECK: icmp ne i8 addrspace(3)* %a, addrspacecast (i8 addrspace(5)* null to i8 addrspace(3)*)
176*cee313d2SEric Christopher; CHECK-NOT: i8 addrspace(3)* null
177*cee313d2SEric Christopherdefine void @local_nullptr(i32 addrspace(1)* nocapture %results, i8 addrspace(3)* %a) {
178*cee313d2SEric Christopherentry:
179*cee313d2SEric Christopher  %tobool = icmp ne i8 addrspace(3)* %a, addrspacecast (i8 addrspace(5)* null to i8 addrspace(3)*)
180*cee313d2SEric Christopher  %conv = zext i1 %tobool to i32
181*cee313d2SEric Christopher  store i32 %conv, i32 addrspace(1)* %results, align 4
182*cee313d2SEric Christopher  ret void
183*cee313d2SEric Christopher}
184*cee313d2SEric Christopher
185*cee313d2SEric Christopherattributes #0 = { nounwind }
186