1 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck %s
2 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - | FileCheck -check-prefix=HOST %s
3 
4 #include "Inputs/cuda.h"
5 
6 // Coerced struct from `struct S` without all generic pointers lowered into
7 // global ones.
8 // CHECK: %struct.S.coerce = type { i32 addrspace(1)*, float addrspace(1)* }
9 // CHECK: %struct.T.coerce = type { [2 x float addrspace(1)*] }
10 
11 // On the host-side compilation, generic pointer won't be coerced.
12 // HOST-NOT: %struct.S.coerce
13 // HOST-NOT: %struct.T.coerce
14 
15 // CHECK: define amdgpu_kernel void  @_Z7kernel1Pi(i32 addrspace(1)* %x.coerce)
16 // HOST: define void @_Z22__device_stub__kernel1Pi(i32* %x)
17 __global__ void kernel1(int *x) {
18   x[0]++;
19 }
20 
21 // CHECK: define amdgpu_kernel void  @_Z7kernel2Ri(i32 addrspace(1)* dereferenceable(4) %x.coerce)
22 // HOST: define void @_Z22__device_stub__kernel2Ri(i32* dereferenceable(4) %x)
23 __global__ void kernel2(int &x) {
24   x++;
25 }
26 
27 // CHECK: define amdgpu_kernel void  @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y)
28 // HOST: define void @_Z22__device_stub__kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y)
29 __global__ void kernel3(__attribute__((address_space(2))) int *x,
30                         __attribute__((address_space(1))) int *y) {
31   y[0] = x[0];
32 }
33 
34 // CHECK: define void @_Z4funcPi(i32* %x)
35 __device__ void func(int *x) {
36   x[0]++;
37 }
38 
39 struct S {
40   int *x;
41   float *y;
42 };
43 // `by-val` struct will be coerced into a similar struct with all generic
44 // pointers lowerd into global ones.
45 // CHECK: define amdgpu_kernel void @_Z7kernel41S(%struct.S.coerce %s.coerce)
46 // HOST: define void @_Z22__device_stub__kernel41S(i32* %s.coerce0, float* %s.coerce1)
47 __global__ void kernel4(struct S s) {
48   s.x[0]++;
49   s.y[0] += 1.f;
50 }
51 
52 // If a pointer to struct is passed, only the pointer itself is coerced into the global one.
53 // CHECK: define amdgpu_kernel void @_Z7kernel5P1S(%struct.S addrspace(1)* %s.coerce)
54 // HOST: define void @_Z22__device_stub__kernel5P1S(%struct.S* %s)
55 __global__ void kernel5(struct S *s) {
56   s->x[0]++;
57   s->y[0] += 1.f;
58 }
59 
60 struct T {
61   float *x[2];
62 };
63 // `by-val` array is also coerced.
64 // CHECK: define amdgpu_kernel void @_Z7kernel61T(%struct.T.coerce %t.coerce)
65 // HOST: define void @_Z22__device_stub__kernel61T(float* %t.coerce0, float* %t.coerce1)
66 __global__ void kernel6(struct T t) {
67   t.x[0][0] += 1.f;
68   t.x[1][0] += 2.f;
69 }
70 
71 // Check that coerced pointers retain the noalias attribute when qualified with __restrict.
72 // CHECK: define amdgpu_kernel void @_Z7kernel7Pi(i32 addrspace(1)* noalias %x.coerce)
73 // HOST: define void @_Z22__device_stub__kernel7Pi(i32* noalias %x)
74 __global__ void kernel7(int *__restrict x) {
75   x[0]++;
76 }
77