1 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=COMMON,CHECK %s 2 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=COMMON,OPT 3 // RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - | FileCheck -check-prefix=HOST %s 4 5 #include "Inputs/cuda.h" 6 7 // Coerced struct from `struct S` without all generic pointers lowered into 8 // global ones. 9 // COMMON: %struct.S.coerce = type { i32 addrspace(1)*, float addrspace(1)* } 10 // COMMON: %struct.T.coerce = type { [2 x float addrspace(1)*] } 11 12 // On the host-side compilation, generic pointer won't be coerced. 13 // HOST-NOT: %struct.S.coerce 14 // HOST-NOT: %struct.T.coerce 15 16 // HOST: define void @_Z22__device_stub__kernel1Pi(i32* %x) 17 // COMMON-LABEL: define amdgpu_kernel void @_Z7kernel1Pi(i32 addrspace(1)*{{.*}} %x.coerce) 18 // CHECK: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* 19 // CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* 20 // OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4 21 // OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1 22 // OPT: store i32 [[INC]], i32 addrspace(1)* %x.coerce, align 4 23 // OPT: ret void 24 __global__ void kernel1(int *x) { 25 x[0]++; 26 } 27 28 // HOST: define void @_Z22__device_stub__kernel2Ri(i32* nonnull align 4 dereferenceable(4) %x) 29 // COMMON-LABEL: define amdgpu_kernel void @_Z7kernel2Ri(i32 addrspace(1)*{{.*}} nonnull align 4 dereferenceable(4) %x.coerce) 30 // CHECK: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* 31 // CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* 32 // OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4 33 // OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1 34 // OPT: store i32 [[INC]], i32 addrspace(1)* %x.coerce, align 4 35 // OPT: ret void 36 __global__ void kernel2(int &x) { 37 x++; 38 } 39 40 // HOST: define void @_Z22__device_stub__kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* %x, i32 addrspace(1)* %y) 41 // CHECK-LABEL: define amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)*{{.*}} %x, i32 addrspace(1)*{{.*}} %y) 42 // CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* 43 __global__ void kernel3(__attribute__((address_space(2))) int *x, 44 __attribute__((address_space(1))) int *y) { 45 y[0] = x[0]; 46 } 47 48 // COMMON-LABEL: define void @_Z4funcPi(i32*{{.*}} %x) 49 // CHECK-NOT: = addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* 50 __device__ void func(int *x) { 51 x[0]++; 52 } 53 54 struct S { 55 int *x; 56 float *y; 57 }; 58 // `by-val` struct will be coerced into a similar struct with all generic 59 // pointers lowerd into global ones. 60 // HOST: define void @_Z22__device_stub__kernel41S(i32* %s.coerce0, float* %s.coerce1) 61 // COMMON-LABEL: define amdgpu_kernel void @_Z7kernel41S(%struct.S.coerce %s.coerce) 62 // OPT: [[P0:%.*]] = extractvalue %struct.S.coerce %s.coerce, 0 63 // OPT: [[P1:%.*]] = extractvalue %struct.S.coerce %s.coerce, 1 64 // OPT: [[V0:%.*]] = load i32, i32 addrspace(1)* [[P0]], align 4 65 // OPT: [[INC:%.*]] = add nsw i32 [[V0]], 1 66 // OPT: store i32 [[INC]], i32 addrspace(1)* [[P0]], align 4 67 // OPT: [[V1:%.*]] = load float, float addrspace(1)* [[P1]], align 4 68 // OPT: [[ADD:%.*]] = fadd contract float [[V1]], 1.000000e+00 69 // OPT: store float [[ADD]], float addrspace(1)* [[P1]], align 4 70 // OPT: ret void 71 __global__ void kernel4(struct S s) { 72 s.x[0]++; 73 s.y[0] += 1.f; 74 } 75 76 // If a pointer to struct is passed, only the pointer itself is coerced into the global one. 77 // HOST: define void @_Z22__device_stub__kernel5P1S(%struct.S* %s) 78 // COMMON-LABEL: define amdgpu_kernel void @_Z7kernel5P1S(%struct.S addrspace(1)*{{.*}} %s.coerce) 79 __global__ void kernel5(struct S *s) { 80 s->x[0]++; 81 s->y[0] += 1.f; 82 } 83 84 struct T { 85 float *x[2]; 86 }; 87 // `by-val` array is also coerced. 88 // HOST: define void @_Z22__device_stub__kernel61T(float* %t.coerce0, float* %t.coerce1) 89 // COMMON-LABEL: define amdgpu_kernel void @_Z7kernel61T(%struct.T.coerce %t.coerce) 90 // OPT: [[ARR:%.*]] = extractvalue %struct.T.coerce %t.coerce, 0 91 // OPT: [[P0:%.*]] = extractvalue [2 x float addrspace(1)*] [[ARR]], 0 92 // OPT: [[P1:%.*]] = extractvalue [2 x float addrspace(1)*] [[ARR]], 1 93 // OPT: [[V0:%.*]] = load float, float addrspace(1)* [[P0]], align 4 94 // OPT: [[ADD0:%.*]] = fadd contract float [[V0]], 1.000000e+00 95 // OPT: store float [[ADD0]], float addrspace(1)* [[P0]], align 4 96 // OPT: [[V1:%.*]] = load float, float addrspace(1)* [[P1]], align 4 97 // OPT: [[ADD1:%.*]] = fadd contract float [[V1]], 2.000000e+00 98 // OPT: store float [[ADD1]], float addrspace(1)* [[P1]], align 4 99 // OPT: ret void 100 __global__ void kernel6(struct T t) { 101 t.x[0][0] += 1.f; 102 t.x[1][0] += 2.f; 103 } 104 105 // Check that coerced pointers retain the noalias attribute when qualified with __restrict. 106 // HOST: define void @_Z22__device_stub__kernel7Pi(i32* noalias %x) 107 // COMMON-LABEL: define amdgpu_kernel void @_Z7kernel7Pi(i32 addrspace(1)* noalias{{.*}} %x.coerce) 108 __global__ void kernel7(int *__restrict x) { 109 x[0]++; 110 } 111