1471c806aSMichael Liao // REQUIRES: x86-registered-target
20723b189SMichael Liao // REQUIRES: amdgpu-registered-target
3471c806aSMichael Liao 
4*532dc62bSNikita Popov // RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=COMMON,CHECK %s
5*532dc62bSNikita Popov // RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=COMMON,OPT
6*532dc62bSNikita Popov // RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - | FileCheck -check-prefix=HOST %s
715140e4bSMichael Liao 
815140e4bSMichael Liao #include "Inputs/cuda.h"
915140e4bSMichael Liao 
1015140e4bSMichael Liao // Coerced struct from `struct S` without all generic pointers lowered into
1115140e4bSMichael Liao // global ones.
1215140e4bSMichael Liao 
1315140e4bSMichael Liao // On the host-side compilation, generic pointer won't be coerced.
1415140e4bSMichael Liao // HOST-NOT: %struct.S.coerce
1515140e4bSMichael Liao // HOST-NOT: %struct.T.coerce
1615140e4bSMichael Liao 
171b1c8d83Shyeongyu kim // HOST: define{{.*}} void @_Z22__device_stub__kernel1Pi(i32* noundef %x)
18fd739804SFangrui Song // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel1Pi(i32 addrspace(1)*{{.*}} %x.coerce)
19fd739804SFangrui Song // CHECK:     ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
20fd739804SFangrui Song // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
219eabea39SStanislav Mekhanoshin // OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4, !amdgpu.noclobber !2
22dccfaacfSMichael Liao // OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1
23dccfaacfSMichael Liao // OPT: store i32 [[INC]], i32 addrspace(1)* %x.coerce, align 4
24dccfaacfSMichael Liao // OPT: ret void
kernel1(int * x)2515140e4bSMichael Liao __global__ void kernel1(int *x) {
2615140e4bSMichael Liao   x[0]++;
2715140e4bSMichael Liao }
2815140e4bSMichael Liao 
291b1c8d83Shyeongyu kim // HOST: define{{.*}} void @_Z22__device_stub__kernel2Ri(i32* noundef nonnull align 4 dereferenceable(4) %x)
30fd739804SFangrui Song // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel2Ri(i32 addrspace(1)*{{.*}} nonnull align 4 dereferenceable(4) %x.coerce)
31fd739804SFangrui Song // CHECK:     ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
32fd739804SFangrui Song // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
339eabea39SStanislav Mekhanoshin // OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4, !amdgpu.noclobber !2
34dccfaacfSMichael Liao // OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1
35dccfaacfSMichael Liao // OPT: store i32 [[INC]], i32 addrspace(1)* %x.coerce, align 4
36dccfaacfSMichael Liao // OPT: ret void
kernel2(int & x)3715140e4bSMichael Liao __global__ void kernel2(int &x) {
3815140e4bSMichael Liao   x++;
3915140e4bSMichael Liao }
4015140e4bSMichael Liao 
411b1c8d83Shyeongyu kim // HOST: define{{.*}} void @_Z22__device_stub__kernel3PU3AS2iPU3AS1i(i32 addrspace(2)*  noundef %x, i32 addrspace(1)* noundef %y)
42fd739804SFangrui Song // CHECK-LABEL: define{{.*}} amdgpu_kernel void  @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)*{{.*}} %x, i32 addrspace(1)*{{.*}} %y)
43fd739804SFangrui Song // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
kernel3(int * x,int * y)4415140e4bSMichael Liao __global__ void kernel3(__attribute__((address_space(2))) int *x,
4515140e4bSMichael Liao                         __attribute__((address_space(1))) int *y) {
4615140e4bSMichael Liao   y[0] = x[0];
4715140e4bSMichael Liao }
4815140e4bSMichael Liao 
49fd739804SFangrui Song // COMMON-LABEL: define{{.*}} void @_Z4funcPi(i32*{{.*}} %x)
50fd739804SFangrui Song // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
func(int * x)5115140e4bSMichael Liao __device__ void func(int *x) {
5215140e4bSMichael Liao   x[0]++;
5315140e4bSMichael Liao }
5415140e4bSMichael Liao 
5515140e4bSMichael Liao struct S {
5615140e4bSMichael Liao   int *x;
5715140e4bSMichael Liao   float *y;
5815140e4bSMichael Liao };
59f375885aSMichael Liao // `by-val` struct is passed by-indirect-alias (a mix of by-ref and indirect
60f375885aSMichael Liao // by-val). However, the enhanced address inferring pass should be able to
61f375885aSMichael Liao // assume they are global pointers.
62f375885aSMichael Liao //
63fd739804SFangrui Song // HOST: define{{.*}} void @_Z22__device_stub__kernel41S(i32* %s.coerce0, float* %s.coerce1)
64fd739804SFangrui Song // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel41S(%struct.S addrspace(4)*{{.*}} byref(%struct.S) align 8 %0)
658920ef06SMichael Liao // OPT: [[R0:%.*]] = getelementptr inbounds %struct.S, %struct.S addrspace(4)* %0, i64 0, i32 0
668920ef06SMichael Liao // OPT: [[P0:%.*]] = load i32*, i32* addrspace(4)* [[R0]], align 8
67fd739804SFangrui Song // OPT: [[G0:%.*]] ={{.*}} addrspacecast i32* [[P0]] to i32 addrspace(1)*
688920ef06SMichael Liao // OPT: [[R1:%.*]] = getelementptr inbounds %struct.S, %struct.S addrspace(4)* %0, i64 0, i32 1
698920ef06SMichael Liao // OPT: [[P1:%.*]] = load float*, float* addrspace(4)* [[R1]], align 8
70fd739804SFangrui Song // OPT: [[G1:%.*]] ={{.*}} addrspacecast float* [[P1]] to float addrspace(1)*
719eabea39SStanislav Mekhanoshin // OPT: [[V0:%.*]] = load i32, i32 addrspace(1)* [[G0]], align 4, !amdgpu.noclobber !2
72dccfaacfSMichael Liao // OPT: [[INC:%.*]] = add nsw i32 [[V0]], 1
73f375885aSMichael Liao // OPT: store i32 [[INC]], i32 addrspace(1)* [[G0]], align 4
74f375885aSMichael Liao // OPT: [[V1:%.*]] = load float, float addrspace(1)* [[G1]], align 4
75dccfaacfSMichael Liao // OPT: [[ADD:%.*]] = fadd contract float [[V1]], 1.000000e+00
76f375885aSMichael Liao // OPT: store float [[ADD]], float addrspace(1)* [[G1]], align 4
77dccfaacfSMichael Liao // OPT: ret void
kernel4(struct S s)7815140e4bSMichael Liao __global__ void kernel4(struct S s) {
7915140e4bSMichael Liao   s.x[0]++;
8015140e4bSMichael Liao   s.y[0] += 1.f;
8115140e4bSMichael Liao }
8215140e4bSMichael Liao 
8315140e4bSMichael Liao // If a pointer to struct is passed, only the pointer itself is coerced into the global one.
841b1c8d83Shyeongyu kim // HOST: define{{.*}} void @_Z22__device_stub__kernel5P1S(%struct.S* noundef %s)
85fd739804SFangrui Song // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel5P1S(%struct.S addrspace(1)*{{.*}} %s.coerce)
kernel5(struct S * s)8615140e4bSMichael Liao __global__ void kernel5(struct S *s) {
8715140e4bSMichael Liao   s->x[0]++;
8815140e4bSMichael Liao   s->y[0] += 1.f;
8915140e4bSMichael Liao }
9015140e4bSMichael Liao 
9115140e4bSMichael Liao struct T {
9215140e4bSMichael Liao   float *x[2];
9315140e4bSMichael Liao };
94f375885aSMichael Liao // `by-val` array is passed by-indirect-alias (a mix of by-ref and indirect
95f375885aSMichael Liao // by-val). However, the enhanced address inferring pass should be able to
96f375885aSMichael Liao // assume they are global pointers.
97f375885aSMichael Liao //
98fd739804SFangrui Song // HOST: define{{.*}} void @_Z22__device_stub__kernel61T(float* %t.coerce0, float* %t.coerce1)
99fd739804SFangrui Song // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel61T(%struct.T addrspace(4)*{{.*}} byref(%struct.T) align 8 %0)
1008920ef06SMichael Liao // OPT: [[R0:%.*]] = getelementptr inbounds %struct.T, %struct.T addrspace(4)* %0, i64 0, i32 0, i64 0
1018920ef06SMichael Liao // OPT: [[P0:%.*]] = load float*, float* addrspace(4)* [[R0]], align 8
102fd739804SFangrui Song // OPT: [[G0:%.*]] ={{.*}} addrspacecast float* [[P0]] to float addrspace(1)*
1038920ef06SMichael Liao // OPT: [[R1:%.*]] = getelementptr inbounds %struct.T, %struct.T addrspace(4)* %0, i64 0, i32 0, i64 1
1048920ef06SMichael Liao // OPT: [[P1:%.*]] = load float*, float* addrspace(4)* [[R1]], align 8
105fd739804SFangrui Song // OPT: [[G1:%.*]] ={{.*}} addrspacecast float* [[P1]] to float addrspace(1)*
1069eabea39SStanislav Mekhanoshin // OPT: [[V0:%.*]] = load float, float addrspace(1)* [[G0]], align 4, !amdgpu.noclobber !2
107dccfaacfSMichael Liao // OPT: [[ADD0:%.*]] = fadd contract float [[V0]], 1.000000e+00
108f375885aSMichael Liao // OPT: store float [[ADD0]], float addrspace(1)* [[G0]], align 4
109f375885aSMichael Liao // OPT: [[V1:%.*]] = load float, float addrspace(1)* [[G1]], align 4
110dccfaacfSMichael Liao // OPT: [[ADD1:%.*]] = fadd contract float [[V1]], 2.000000e+00
111f375885aSMichael Liao // OPT: store float [[ADD1]], float addrspace(1)* [[G1]], align 4
112dccfaacfSMichael Liao // OPT: ret void
kernel6(struct T t)11315140e4bSMichael Liao __global__ void kernel6(struct T t) {
11415140e4bSMichael Liao   t.x[0][0] += 1.f;
11515140e4bSMichael Liao   t.x[1][0] += 2.f;
11615140e4bSMichael Liao }
1179142c0b4SMichael Liao 
1189142c0b4SMichael Liao // Check that coerced pointers retain the noalias attribute when qualified with __restrict.
1191b1c8d83Shyeongyu kim // HOST: define{{.*}} void @_Z22__device_stub__kernel7Pi(i32* noalias noundef %x)
120fd739804SFangrui Song // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel7Pi(i32 addrspace(1)* noalias{{.*}} %x.coerce)
kernel7(int * __restrict x)1219142c0b4SMichael Liao __global__ void kernel7(int *__restrict x) {
1229142c0b4SMichael Liao   x[0]++;
1239142c0b4SMichael Liao }
1248920ef06SMichael Liao 
1258920ef06SMichael Liao // Single element struct.
1268920ef06SMichael Liao struct SS {
1278920ef06SMichael Liao   float *x;
1288920ef06SMichael Liao };
129fd739804SFangrui Song // HOST: define{{.*}} void @_Z22__device_stub__kernel82SS(float* %a.coerce)
130fd739804SFangrui Song // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel82SS(float addrspace(1)*{{.*}} %a.coerce)
131fd739804SFangrui Song // CHECK:     ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
132fd739804SFangrui Song // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
1339eabea39SStanislav Mekhanoshin // OPT: [[VAL:%.*]] = load float, float addrspace(1)* %a.coerce, align 4, !amdgpu.noclobber !2
1348920ef06SMichael Liao // OPT: [[INC:%.*]] = fadd contract float [[VAL]], 3.000000e+00
1358920ef06SMichael Liao // OPT: store float [[INC]], float addrspace(1)* %a.coerce, align 4
1368920ef06SMichael Liao // OPT: ret void
kernel8(struct SS a)1378920ef06SMichael Liao __global__ void kernel8(struct SS a) {
1388920ef06SMichael Liao   *a.x += 3.f;
1398920ef06SMichael Liao }
140