1 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s 2 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - | FileCheck -check-prefix=HOST %s 3 4 #include "Inputs/cuda.h" 5 6 // Coerced struct from `struct S` without all generic pointers lowered into 7 // global ones. 8 // CHECK: %struct.S.coerce = type { i32 addrspace(1)*, float addrspace(1)* } 9 // CHECK: %struct.T.coerce = type { [2 x float addrspace(1)*] } 10 11 // On the host-side compilation, generic pointer won't be coerced. 12 // HOST-NOT: %struct.S.coerce 13 // HOST-NOT: %struct.T.coerce 14 15 // CHECK: define amdgpu_kernel void @_Z7kernel1Pi(i32 addrspace(1)* %x.coerce) 16 // HOST: define void @_Z22__device_stub__kernel1Pi(i32* %x) 17 __global__ void kernel1(int *x) { 18 x[0]++; 19 } 20 21 // CHECK: define amdgpu_kernel void @_Z7kernel2Ri(i32 addrspace(1)* dereferenceable(4) %x.coerce) 22 // HOST: define void @_Z22__device_stub__kernel2Ri(i32* dereferenceable(4) %x) 23 __global__ void kernel2(int &x) { 24 x++; 25 } 26 27 // CHECK: define amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y) 28 // HOST: define void @_Z22__device_stub__kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y) 29 __global__ void kernel3(__attribute__((address_space(2))) int *x, 30 __attribute__((address_space(1))) int *y) { 31 y[0] = x[0]; 32 } 33 34 // CHECK: define void @_Z4funcPi(i32* %x) 35 __device__ void func(int *x) { 36 x[0]++; 37 } 38 39 struct S { 40 int *x; 41 float *y; 42 }; 43 // `by-val` struct will be coerced into a similar struct with all generic 44 // pointers lowerd into global ones. 45 // CHECK: define amdgpu_kernel void @_Z7kernel41S(%struct.S.coerce %s.coerce) 46 // HOST: define void @_Z22__device_stub__kernel41S(i32* %s.coerce0, float* %s.coerce1) 47 __global__ void kernel4(struct S s) { 48 s.x[0]++; 49 s.y[0] += 1.f; 50 } 51 52 // If a pointer to struct is passed, only the pointer itself is coerced into the global one. 53 // CHECK: define amdgpu_kernel void @_Z7kernel5P1S(%struct.S addrspace(1)* %s.coerce) 54 // HOST: define void @_Z22__device_stub__kernel5P1S(%struct.S* %s) 55 __global__ void kernel5(struct S *s) { 56 s->x[0]++; 57 s->y[0] += 1.f; 58 } 59 60 struct T { 61 float *x[2]; 62 }; 63 // `by-val` array is also coerced. 64 // CHECK: define amdgpu_kernel void @_Z7kernel61T(%struct.T.coerce %t.coerce) 65 // HOST: define void @_Z22__device_stub__kernel61T(float* %t.coerce0, float* %t.coerce1) 66 __global__ void kernel6(struct T t) { 67 t.x[0][0] += 1.f; 68 t.x[1][0] += 2.f; 69 } 70 71 // Check that coerced pointers retain the noalias attribute when qualified with __restrict. 72 // CHECK: define amdgpu_kernel void @_Z7kernel7Pi(i32 addrspace(1)* noalias %x.coerce) 73 // HOST: define void @_Z22__device_stub__kernel7Pi(i32* noalias %x) 74 __global__ void kernel7(int *__restrict x) { 75 x[0]++; 76 } 77