1 // REQUIRES: x86-registered-target
2 // REQUIRES: amdgpu-registered-target
3 
4 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=COMMON,CHECK %s
5 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=COMMON,OPT
6 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - | FileCheck -check-prefix=HOST %s
7 
8 #include "Inputs/cuda.h"
9 
10 // Coerced struct from `struct S` without all generic pointers lowered into
11 // global ones.
12 // COMMON: %struct.S.coerce = type { i32 addrspace(1)*, float addrspace(1)* }
13 // COMMON: %struct.T.coerce = type { [2 x float addrspace(1)*] }
14 
15 // On the host-side compilation, generic pointer won't be coerced.
16 // HOST-NOT: %struct.S.coerce
17 // HOST-NOT: %struct.T.coerce
18 
19 // HOST: define void @_Z22__device_stub__kernel1Pi(i32* %x)
20 // COMMON-LABEL: define amdgpu_kernel void @_Z7kernel1Pi(i32 addrspace(1)*{{.*}} %x.coerce)
21 // CHECK:     = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
22 // CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
23 // OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4
24 // OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1
25 // OPT: store i32 [[INC]], i32 addrspace(1)* %x.coerce, align 4
26 // OPT: ret void
27 __global__ void kernel1(int *x) {
28   x[0]++;
29 }
30 
31 // HOST: define void @_Z22__device_stub__kernel2Ri(i32* nonnull align 4 dereferenceable(4) %x)
32 // COMMON-LABEL: define amdgpu_kernel void @_Z7kernel2Ri(i32 addrspace(1)*{{.*}} nonnull align 4 dereferenceable(4) %x.coerce)
33 // CHECK:     = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
34 // CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
35 // OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4
36 // OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1
37 // OPT: store i32 [[INC]], i32 addrspace(1)* %x.coerce, align 4
38 // OPT: ret void
39 __global__ void kernel2(int &x) {
40   x++;
41 }
42 
43 // HOST: define void @_Z22__device_stub__kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y)
44 // CHECK-LABEL: define amdgpu_kernel void  @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)*{{.*}} %x, i32 addrspace(1)*{{.*}} %y)
45 // CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
46 __global__ void kernel3(__attribute__((address_space(2))) int *x,
47                         __attribute__((address_space(1))) int *y) {
48   y[0] = x[0];
49 }
50 
51 // COMMON-LABEL: define void @_Z4funcPi(i32*{{.*}} %x)
52 // CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]*
53 __device__ void func(int *x) {
54   x[0]++;
55 }
56 
57 struct S {
58   int *x;
59   float *y;
60 };
61 // `by-val` struct will be coerced into a similar struct with all generic
62 // pointers lowerd into global ones.
63 // HOST: define void @_Z22__device_stub__kernel41S(i32* %s.coerce0, float* %s.coerce1)
64 // COMMON-LABEL: define amdgpu_kernel void @_Z7kernel41S(%struct.S.coerce %s.coerce)
65 // OPT: [[P0:%.*]] = extractvalue %struct.S.coerce %s.coerce, 0
66 // OPT: [[P1:%.*]] = extractvalue %struct.S.coerce %s.coerce, 1
67 // OPT: [[V0:%.*]] = load i32, i32 addrspace(1)* [[P0]], align 4
68 // OPT: [[INC:%.*]] = add nsw i32 [[V0]], 1
69 // OPT: store i32 [[INC]], i32 addrspace(1)* [[P0]], align 4
70 // OPT: [[V1:%.*]] = load float, float addrspace(1)* [[P1]], align 4
71 // OPT: [[ADD:%.*]] = fadd contract float [[V1]], 1.000000e+00
72 // OPT: store float [[ADD]], float addrspace(1)* [[P1]], align 4
73 // OPT: ret void
74 __global__ void kernel4(struct S s) {
75   s.x[0]++;
76   s.y[0] += 1.f;
77 }
78 
79 // If a pointer to struct is passed, only the pointer itself is coerced into the global one.
80 // HOST: define void @_Z22__device_stub__kernel5P1S(%struct.S* %s)
81 // COMMON-LABEL: define amdgpu_kernel void @_Z7kernel5P1S(%struct.S addrspace(1)*{{.*}} %s.coerce)
82 __global__ void kernel5(struct S *s) {
83   s->x[0]++;
84   s->y[0] += 1.f;
85 }
86 
87 struct T {
88   float *x[2];
89 };
90 // `by-val` array is also coerced.
91 // HOST: define void @_Z22__device_stub__kernel61T(float* %t.coerce0, float* %t.coerce1)
92 // COMMON-LABEL: define amdgpu_kernel void @_Z7kernel61T(%struct.T.coerce %t.coerce)
93 // OPT: [[ARR:%.*]] = extractvalue %struct.T.coerce %t.coerce, 0
94 // OPT: [[P0:%.*]] = extractvalue [2 x float addrspace(1)*] [[ARR]], 0
95 // OPT: [[P1:%.*]] = extractvalue [2 x float addrspace(1)*] [[ARR]], 1
96 // OPT: [[V0:%.*]] = load float, float addrspace(1)* [[P0]], align 4
97 // OPT: [[ADD0:%.*]] = fadd contract float [[V0]], 1.000000e+00
98 // OPT: store float [[ADD0]], float addrspace(1)* [[P0]], align 4
99 // OPT: [[V1:%.*]] = load float, float addrspace(1)* [[P1]], align 4
100 // OPT: [[ADD1:%.*]] = fadd contract float [[V1]], 2.000000e+00
101 // OPT: store float [[ADD1]], float addrspace(1)* [[P1]], align 4
102 // OPT: ret void
103 __global__ void kernel6(struct T t) {
104   t.x[0][0] += 1.f;
105   t.x[1][0] += 2.f;
106 }
107 
108 // Check that coerced pointers retain the noalias attribute when qualified with __restrict.
109 // HOST: define void @_Z22__device_stub__kernel7Pi(i32* noalias %x)
110 // COMMON-LABEL: define amdgpu_kernel void @_Z7kernel7Pi(i32 addrspace(1)* noalias{{.*}} %x.coerce)
111 __global__ void kernel7(int *__restrict x) {
112   x[0]++;
113 }
114