1*83c431fbSJon Chesterfield // REQUIRES: amdgpu-registered-target
2*83c431fbSJon Chesterfield 
3*83c431fbSJon Chesterfield // RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck --check-prefixes=COMMON,CHECK %s
4*83c431fbSJon Chesterfield 
5*83c431fbSJon Chesterfield // Derived from CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu by deleting references to HOST
6*83c431fbSJon Chesterfield // The original test passes the result through opt O2, but that seems to introduce invalid
7*83c431fbSJon Chesterfield // addrspace casts which are not being fixed as part of the present change.
8*83c431fbSJon Chesterfield 
9*83c431fbSJon Chesterfield // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel1Pi(i32* {{.*}} %x)
10*83c431fbSJon Chesterfield // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
kernel1(int * x)11*83c431fbSJon Chesterfield __attribute__((amdgpu_kernel)) void kernel1(int *x) {
12*83c431fbSJon Chesterfield   x[0]++;
13*83c431fbSJon Chesterfield }
14*83c431fbSJon Chesterfield 
15*83c431fbSJon Chesterfield // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel2Ri(i32* {{.*}} nonnull align 4 dereferenceable(4) %x)
16*83c431fbSJon Chesterfield // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
kernel2(int & x)17*83c431fbSJon Chesterfield __attribute__((amdgpu_kernel)) void kernel2(int &x) {
18*83c431fbSJon Chesterfield   x++;
19*83c431fbSJon Chesterfield }
20*83c431fbSJon Chesterfield 
21*83c431fbSJon Chesterfield // CHECK-LABEL: define{{.*}} amdgpu_kernel void  @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)*{{.*}} %x, i32 addrspace(1)*{{.*}} %y)
22*83c431fbSJon Chesterfield // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
kernel3(int * x,int * y)23*83c431fbSJon Chesterfield __attribute__((amdgpu_kernel)) void kernel3(__attribute__((address_space(2))) int *x,
24*83c431fbSJon Chesterfield                                             __attribute__((address_space(1))) int *y) {
25*83c431fbSJon Chesterfield   y[0] = x[0];
26*83c431fbSJon Chesterfield }
27*83c431fbSJon Chesterfield 
28*83c431fbSJon Chesterfield // COMMON-LABEL: define{{.*}} void @_Z4funcPi(i32*{{.*}} %x)
29*83c431fbSJon Chesterfield // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
func(int * x)30*83c431fbSJon Chesterfield __attribute__((amdgpu_kernel)) void func(int *x) {
31*83c431fbSJon Chesterfield   x[0]++;
32*83c431fbSJon Chesterfield }
33*83c431fbSJon Chesterfield 
34*83c431fbSJon Chesterfield struct S {
35*83c431fbSJon Chesterfield   int *x;
36*83c431fbSJon Chesterfield   float *y;
37*83c431fbSJon Chesterfield };
38*83c431fbSJon Chesterfield // `by-val` struct is passed by-indirect-alias (a mix of by-ref and indirect
39*83c431fbSJon Chesterfield // by-val). However, the enhanced address inferring pass should be able to
40*83c431fbSJon Chesterfield // assume they are global pointers.
41*83c431fbSJon Chesterfield //
42*83c431fbSJon Chesterfield 
43*83c431fbSJon Chesterfield // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel41S(%struct.S addrspace(4)*{{.*}} byref(%struct.S) align 8 %0)
kernel4(struct S s)44*83c431fbSJon Chesterfield __attribute__((amdgpu_kernel)) void kernel4(struct S s) {
45*83c431fbSJon Chesterfield   s.x[0]++;
46*83c431fbSJon Chesterfield   s.y[0] += 1.f;
47*83c431fbSJon Chesterfield }
48*83c431fbSJon Chesterfield 
49*83c431fbSJon Chesterfield // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel5P1S(%struct.S* {{.*}} %s)
kernel5(struct S * s)50*83c431fbSJon Chesterfield __attribute__((amdgpu_kernel)) void kernel5(struct S *s) {
51*83c431fbSJon Chesterfield   s->x[0]++;
52*83c431fbSJon Chesterfield   s->y[0] += 1.f;
53*83c431fbSJon Chesterfield }
54*83c431fbSJon Chesterfield 
55*83c431fbSJon Chesterfield struct T {
56*83c431fbSJon Chesterfield   float *x[2];
57*83c431fbSJon Chesterfield };
58*83c431fbSJon Chesterfield // `by-val` array is passed by-indirect-alias (a mix of by-ref and indirect
59*83c431fbSJon Chesterfield // by-val). However, the enhanced address inferring pass should be able to
60*83c431fbSJon Chesterfield // assume they are global pointers.
61*83c431fbSJon Chesterfield //
62*83c431fbSJon Chesterfield // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel61T(%struct.T addrspace(4)*{{.*}} byref(%struct.T) align 8 %0)
kernel6(struct T t)63*83c431fbSJon Chesterfield __attribute__((amdgpu_kernel)) void kernel6(struct T t) {
64*83c431fbSJon Chesterfield   t.x[0][0] += 1.f;
65*83c431fbSJon Chesterfield   t.x[1][0] += 2.f;
66*83c431fbSJon Chesterfield }
67*83c431fbSJon Chesterfield 
68*83c431fbSJon Chesterfield // Check that coerced pointers retain the noalias attribute when qualified with __restrict.
69*83c431fbSJon Chesterfield // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel7Pi(i32* noalias{{.*}} %x)
kernel7(int * __restrict x)70*83c431fbSJon Chesterfield __attribute__((amdgpu_kernel)) void kernel7(int *__restrict x) {
71*83c431fbSJon Chesterfield   x[0]++;
72*83c431fbSJon Chesterfield }
73*83c431fbSJon Chesterfield 
74*83c431fbSJon Chesterfield // Single element struct.
75*83c431fbSJon Chesterfield struct SS {
76*83c431fbSJon Chesterfield   float *x;
77*83c431fbSJon Chesterfield };
78*83c431fbSJon Chesterfield // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel82SS(float* %a.coerce)
79*83c431fbSJon Chesterfield // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
kernel8(struct SS a)80*83c431fbSJon Chesterfield __attribute__((amdgpu_kernel)) void kernel8(struct SS a) {
81*83c431fbSJon Chesterfield   *a.x += 3.f;
82*83c431fbSJon Chesterfield }
83*83c431fbSJon Chesterfield 
84