1*83c431fbSJon Chesterfield // REQUIRES: amdgpu-registered-target 2*83c431fbSJon Chesterfield 3*83c431fbSJon Chesterfield // RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck --check-prefixes=COMMON,CHECK %s 4*83c431fbSJon Chesterfield 5*83c431fbSJon Chesterfield // Derived from CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu by deleting references to HOST 6*83c431fbSJon Chesterfield // The original test passes the result through opt O2, but that seems to introduce invalid 7*83c431fbSJon Chesterfield // addrspace casts which are not being fixed as part of the present change. 8*83c431fbSJon Chesterfield 9*83c431fbSJon Chesterfield // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel1Pi(i32* {{.*}} %x) 10*83c431fbSJon Chesterfield // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* kernel1(int * x)11*83c431fbSJon Chesterfield__attribute__((amdgpu_kernel)) void kernel1(int *x) { 12*83c431fbSJon Chesterfield x[0]++; 13*83c431fbSJon Chesterfield } 14*83c431fbSJon Chesterfield 15*83c431fbSJon Chesterfield // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel2Ri(i32* {{.*}} nonnull align 4 dereferenceable(4) %x) 16*83c431fbSJon Chesterfield // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* kernel2(int & x)17*83c431fbSJon Chesterfield__attribute__((amdgpu_kernel)) void kernel2(int &x) { 18*83c431fbSJon Chesterfield x++; 19*83c431fbSJon Chesterfield } 20*83c431fbSJon Chesterfield 21*83c431fbSJon Chesterfield // CHECK-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)*{{.*}} %x, i32 addrspace(1)*{{.*}} %y) 22*83c431fbSJon Chesterfield // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* kernel3(int * x,int * y)23*83c431fbSJon Chesterfield__attribute__((amdgpu_kernel)) void kernel3(__attribute__((address_space(2))) int *x, 24*83c431fbSJon Chesterfield __attribute__((address_space(1))) int *y) { 25*83c431fbSJon Chesterfield y[0] = x[0]; 26*83c431fbSJon Chesterfield } 27*83c431fbSJon Chesterfield 28*83c431fbSJon Chesterfield // COMMON-LABEL: define{{.*}} void @_Z4funcPi(i32*{{.*}} %x) 29*83c431fbSJon Chesterfield // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* func(int * x)30*83c431fbSJon Chesterfield__attribute__((amdgpu_kernel)) void func(int *x) { 31*83c431fbSJon Chesterfield x[0]++; 32*83c431fbSJon Chesterfield } 33*83c431fbSJon Chesterfield 34*83c431fbSJon Chesterfield struct S { 35*83c431fbSJon Chesterfield int *x; 36*83c431fbSJon Chesterfield float *y; 37*83c431fbSJon Chesterfield }; 38*83c431fbSJon Chesterfield // `by-val` struct is passed by-indirect-alias (a mix of by-ref and indirect 39*83c431fbSJon Chesterfield // by-val). However, the enhanced address inferring pass should be able to 40*83c431fbSJon Chesterfield // assume they are global pointers. 41*83c431fbSJon Chesterfield // 42*83c431fbSJon Chesterfield 43*83c431fbSJon Chesterfield // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel41S(%struct.S addrspace(4)*{{.*}} byref(%struct.S) align 8 %0) kernel4(struct S s)44*83c431fbSJon Chesterfield__attribute__((amdgpu_kernel)) void kernel4(struct S s) { 45*83c431fbSJon Chesterfield s.x[0]++; 46*83c431fbSJon Chesterfield s.y[0] += 1.f; 47*83c431fbSJon Chesterfield } 48*83c431fbSJon Chesterfield 49*83c431fbSJon Chesterfield // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel5P1S(%struct.S* {{.*}} %s) kernel5(struct S * s)50*83c431fbSJon Chesterfield__attribute__((amdgpu_kernel)) void kernel5(struct S *s) { 51*83c431fbSJon Chesterfield s->x[0]++; 52*83c431fbSJon Chesterfield s->y[0] += 1.f; 53*83c431fbSJon Chesterfield } 54*83c431fbSJon Chesterfield 55*83c431fbSJon Chesterfield struct T { 56*83c431fbSJon Chesterfield float *x[2]; 57*83c431fbSJon Chesterfield }; 58*83c431fbSJon Chesterfield // `by-val` array is passed by-indirect-alias (a mix of by-ref and indirect 59*83c431fbSJon Chesterfield // by-val). However, the enhanced address inferring pass should be able to 60*83c431fbSJon Chesterfield // assume they are global pointers. 61*83c431fbSJon Chesterfield // 62*83c431fbSJon Chesterfield // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel61T(%struct.T addrspace(4)*{{.*}} byref(%struct.T) align 8 %0) kernel6(struct T t)63*83c431fbSJon Chesterfield__attribute__((amdgpu_kernel)) void kernel6(struct T t) { 64*83c431fbSJon Chesterfield t.x[0][0] += 1.f; 65*83c431fbSJon Chesterfield t.x[1][0] += 2.f; 66*83c431fbSJon Chesterfield } 67*83c431fbSJon Chesterfield 68*83c431fbSJon Chesterfield // Check that coerced pointers retain the noalias attribute when qualified with __restrict. 69*83c431fbSJon Chesterfield // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel7Pi(i32* noalias{{.*}} %x) kernel7(int * __restrict x)70*83c431fbSJon Chesterfield__attribute__((amdgpu_kernel)) void kernel7(int *__restrict x) { 71*83c431fbSJon Chesterfield x[0]++; 72*83c431fbSJon Chesterfield } 73*83c431fbSJon Chesterfield 74*83c431fbSJon Chesterfield // Single element struct. 75*83c431fbSJon Chesterfield struct SS { 76*83c431fbSJon Chesterfield float *x; 77*83c431fbSJon Chesterfield }; 78*83c431fbSJon Chesterfield // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel82SS(float* %a.coerce) 79*83c431fbSJon Chesterfield // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* kernel8(struct SS a)80*83c431fbSJon Chesterfield__attribute__((amdgpu_kernel)) void kernel8(struct SS a) { 81*83c431fbSJon Chesterfield *a.x += 3.f; 82*83c431fbSJon Chesterfield } 83*83c431fbSJon Chesterfield 84