1 // REQUIRES: x86-registered-target 2 // REQUIRES: amdgpu-registered-target 3 4 // RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -o - | FileCheck --check-prefixes=COMMON,CHECK %s 5 // RUN: %clang_cc1 -no-opaque-pointers -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -x hip %s -disable-O0-optnone -o - | opt -S -O2 | FileCheck %s --check-prefixes=COMMON,OPT 6 // RUN: %clang_cc1 -no-opaque-pointers -triple x86_64-unknown-linux-gnu -emit-llvm -x hip %s -o - | FileCheck -check-prefix=HOST %s 7 8 #include "Inputs/cuda.h" 9 10 // Coerced struct from `struct S` without all generic pointers lowered into 11 // global ones. 12 13 // On the host-side compilation, generic pointer won't be coerced. 14 // HOST-NOT: %struct.S.coerce 15 // HOST-NOT: %struct.T.coerce 16 17 // HOST: define{{.*}} void @_Z22__device_stub__kernel1Pi(i32* noundef %x) 18 // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel1Pi(i32 addrspace(1)*{{.*}} %x.coerce) 19 // CHECK: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* 20 // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* 21 // OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4, !amdgpu.noclobber !2 22 // OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1 23 // OPT: store i32 [[INC]], i32 addrspace(1)* %x.coerce, align 4 24 // OPT: ret void 25 __global__ void kernel1(int *x) { 26 x[0]++; 27 } 28 29 // HOST: define{{.*}} void @_Z22__device_stub__kernel2Ri(i32* noundef nonnull align 4 dereferenceable(4) %x) 30 // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel2Ri(i32 addrspace(1)*{{.*}} nonnull align 4 dereferenceable(4) %x.coerce) 31 // CHECK: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* 32 // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* 33 // OPT: [[VAL:%.*]] = load i32, i32 addrspace(1)* %x.coerce, align 4, !amdgpu.noclobber !2 34 // OPT: [[INC:%.*]] = add nsw i32 [[VAL]], 1 35 // OPT: store i32 [[INC]], i32 addrspace(1)* %x.coerce, align 4 36 // OPT: ret void 37 __global__ void kernel2(int &x) { 38 x++; 39 } 40 41 // HOST: define{{.*}} void @_Z22__device_stub__kernel3PU3AS2iPU3AS1i(i32 addrspace(2)* noundef %x, i32 addrspace(1)* noundef %y) 42 // CHECK-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel3PU3AS2iPU3AS1i(i32 addrspace(2)*{{.*}} %x, i32 addrspace(1)*{{.*}} %y) 43 // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* 44 __global__ void kernel3(__attribute__((address_space(2))) int *x, 45 __attribute__((address_space(1))) int *y) { 46 y[0] = x[0]; 47 } 48 49 // COMMON-LABEL: define{{.*}} void @_Z4funcPi(i32*{{.*}} %x) 50 // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* 51 __device__ void func(int *x) { 52 x[0]++; 53 } 54 55 struct S { 56 int *x; 57 float *y; 58 }; 59 // `by-val` struct is passed by-indirect-alias (a mix of by-ref and indirect 60 // by-val). However, the enhanced address inferring pass should be able to 61 // assume they are global pointers. 62 // 63 // HOST: define{{.*}} void @_Z22__device_stub__kernel41S(i32* %s.coerce0, float* %s.coerce1) 64 // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel41S(%struct.S addrspace(4)*{{.*}} byref(%struct.S) align 8 %0) 65 // OPT: [[R0:%.*]] = getelementptr inbounds %struct.S, %struct.S addrspace(4)* %0, i64 0, i32 0 66 // OPT: [[P0:%.*]] = load i32*, i32* addrspace(4)* [[R0]], align 8 67 // OPT: [[G0:%.*]] ={{.*}} addrspacecast i32* [[P0]] to i32 addrspace(1)* 68 // OPT: [[R1:%.*]] = getelementptr inbounds %struct.S, %struct.S addrspace(4)* %0, i64 0, i32 1 69 // OPT: [[P1:%.*]] = load float*, float* addrspace(4)* [[R1]], align 8 70 // OPT: [[G1:%.*]] ={{.*}} addrspacecast float* [[P1]] to float addrspace(1)* 71 // OPT: [[V0:%.*]] = load i32, i32 addrspace(1)* [[G0]], align 4, !amdgpu.noclobber !2 72 // OPT: [[INC:%.*]] = add nsw i32 [[V0]], 1 73 // OPT: store i32 [[INC]], i32 addrspace(1)* [[G0]], align 4 74 // OPT: [[V1:%.*]] = load float, float addrspace(1)* [[G1]], align 4 75 // OPT: [[ADD:%.*]] = fadd contract float [[V1]], 1.000000e+00 76 // OPT: store float [[ADD]], float addrspace(1)* [[G1]], align 4 77 // OPT: ret void 78 __global__ void kernel4(struct S s) { 79 s.x[0]++; 80 s.y[0] += 1.f; 81 } 82 83 // If a pointer to struct is passed, only the pointer itself is coerced into the global one. 84 // HOST: define{{.*}} void @_Z22__device_stub__kernel5P1S(%struct.S* noundef %s) 85 // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel5P1S(%struct.S addrspace(1)*{{.*}} %s.coerce) 86 __global__ void kernel5(struct S *s) { 87 s->x[0]++; 88 s->y[0] += 1.f; 89 } 90 91 struct T { 92 float *x[2]; 93 }; 94 // `by-val` array is passed by-indirect-alias (a mix of by-ref and indirect 95 // by-val). However, the enhanced address inferring pass should be able to 96 // assume they are global pointers. 97 // 98 // HOST: define{{.*}} void @_Z22__device_stub__kernel61T(float* %t.coerce0, float* %t.coerce1) 99 // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel61T(%struct.T addrspace(4)*{{.*}} byref(%struct.T) align 8 %0) 100 // OPT: [[R0:%.*]] = getelementptr inbounds %struct.T, %struct.T addrspace(4)* %0, i64 0, i32 0, i64 0 101 // OPT: [[P0:%.*]] = load float*, float* addrspace(4)* [[R0]], align 8 102 // OPT: [[G0:%.*]] ={{.*}} addrspacecast float* [[P0]] to float addrspace(1)* 103 // OPT: [[R1:%.*]] = getelementptr inbounds %struct.T, %struct.T addrspace(4)* %0, i64 0, i32 0, i64 1 104 // OPT: [[P1:%.*]] = load float*, float* addrspace(4)* [[R1]], align 8 105 // OPT: [[G1:%.*]] ={{.*}} addrspacecast float* [[P1]] to float addrspace(1)* 106 // OPT: [[V0:%.*]] = load float, float addrspace(1)* [[G0]], align 4, !amdgpu.noclobber !2 107 // OPT: [[ADD0:%.*]] = fadd contract float [[V0]], 1.000000e+00 108 // OPT: store float [[ADD0]], float addrspace(1)* [[G0]], align 4 109 // OPT: [[V1:%.*]] = load float, float addrspace(1)* [[G1]], align 4 110 // OPT: [[ADD1:%.*]] = fadd contract float [[V1]], 2.000000e+00 111 // OPT: store float [[ADD1]], float addrspace(1)* [[G1]], align 4 112 // OPT: ret void 113 __global__ void kernel6(struct T t) { 114 t.x[0][0] += 1.f; 115 t.x[1][0] += 2.f; 116 } 117 118 // Check that coerced pointers retain the noalias attribute when qualified with __restrict. 119 // HOST: define{{.*}} void @_Z22__device_stub__kernel7Pi(i32* noalias noundef %x) 120 // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel7Pi(i32 addrspace(1)* noalias{{.*}} %x.coerce) 121 __global__ void kernel7(int *__restrict x) { 122 x[0]++; 123 } 124 125 // Single element struct. 126 struct SS { 127 float *x; 128 }; 129 // HOST: define{{.*}} void @_Z22__device_stub__kernel82SS(float* %a.coerce) 130 // COMMON-LABEL: define{{.*}} amdgpu_kernel void @_Z7kernel82SS(float addrspace(1)*{{.*}} %a.coerce) 131 // CHECK: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* 132 // CHECK-NOT: ={{.*}} addrspacecast [[TYPE:.*]] addrspace(1)* %{{.*}} to [[TYPE]]* 133 // OPT: [[VAL:%.*]] = load float, float addrspace(1)* %a.coerce, align 4, !amdgpu.noclobber !2 134 // OPT: [[INC:%.*]] = fadd contract float [[VAL]], 3.000000e+00 135 // OPT: store float [[INC]], float addrspace(1)* %a.coerce, align 4 136 // OPT: ret void 137 __global__ void kernel8(struct SS a) { 138 *a.x += 3.f; 139 } 140