1 // Test for linking with CUDA's libdevice as outlined in 2 // http://llvm.org/docs/NVPTXUsage.html#linking-with-libdevice 3 // 4 // REQUIRES: nvptx-registered-target 5 // 6 // Prepare bitcode file to link with 7 // RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \ 8 // RUN: -disable-llvm-passes -o %t.bc %S/Inputs/device-code.ll 9 // RUN: %clang_cc1 -triple nvptx-unknown-cuda -emit-llvm-bc \ 10 // RUN: -disable-llvm-passes -o %t-2.bc %S/Inputs/device-code-2.ll 11 // 12 // Make sure function in device-code gets linked in and internalized. 13 // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ 14 // RUN: -mlink-builtin-bitcode %t.bc -emit-llvm \ 15 // RUN: -disable-llvm-passes -o - %s \ 16 // RUN: | FileCheck %s -check-prefix CHECK-IR 17 18 // Make sure legacy flag name works 19 // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ 20 // RUN: -mlink-cuda-bitcode %t.bc -emit-llvm \ 21 // RUN: -disable-llvm-passes -o - %s \ 22 // RUN: | FileCheck %s -check-prefix CHECK-IR 23 // 24 // Make sure we can link two bitcode files. 25 // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ 26 // RUN: -mlink-builtin-bitcode %t.bc -mlink-builtin-bitcode %t-2.bc \ 27 // RUN: -emit-llvm -disable-llvm-passes -o - %s \ 28 // RUN: | FileCheck %s -check-prefix CHECK-IR -check-prefix CHECK-IR-2 29 // 30 // Make sure function in device-code gets linked but is not internalized 31 // without -fcuda-uses-libdevice 32 // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ 33 // RUN: -mlink-bitcode-file %t.bc -emit-llvm \ 34 // RUN: -disable-llvm-passes -o - %s \ 35 // RUN: | FileCheck %s -check-prefix CHECK-IR-NLD 36 // 37 // Make sure NVVMReflect pass is enabled in NVPTX back-end. 38 // RUN: %clang_cc1 -triple nvptx-unknown-cuda -fcuda-is-device \ 39 // RUN: -mlink-builtin-bitcode %t.bc -S -o /dev/null %s \ 40 // RUN: -mllvm -debug-pass=Structure 2>&1 \ 41 // RUN: | FileCheck %s -check-prefix CHECK-REFLECT 42 43 #include "Inputs/cuda.h" 44 45 __device__ float device_mul_or_add(float a, float b); 46 extern "C" __device__ double __nv_sin(double x); 47 extern "C" __device__ double __nv_exp(double x); 48 49 // CHECK-IR-LABEL: define{{.*}} void @_Z26should_not_be_internalizedPf( 50 // CHECK-PTX-LABEL: .visible .func _Z26should_not_be_internalizedPf( 51 __device__ void should_not_be_internalized(float *data) {} 52 53 // Make sure kernel call has not been internalized. 54 // CHECK-IR-LABEL: define{{.*}} void @_Z6kernelPfS_ 55 // CHECK-PTX-LABEL: .visible .entry _Z6kernelPfS_( 56 __global__ __attribute__((used)) void kernel(float *out, float *in) { 57 *out = device_mul_or_add(in[0], in[1]); 58 *out += __nv_exp(__nv_sin(*out)); 59 should_not_be_internalized(out); 60 } 61 62 // Make sure device_mul_or_add() is present in IR, is internal and 63 // calls __nvvm_reflect(). 64 // CHECK-IR-LABEL: define internal float @_Z17device_mul_or_addff( 65 // CHECK-IR-NLD-LABEL: define{{.*}} float @_Z17device_mul_or_addff( 66 // CHECK-IR: call i32 @__nvvm_reflect 67 // CHECK-IR: ret float 68 69 // Make sure we've linked in and internalized only needed functions 70 // from the second bitcode file. 71 // CHECK-IR-2-LABEL: define internal double @__nv_sin 72 // CHECK-IR-2-LABEL: define internal double @__nv_exp 73 // CHECK-IR-2-NOT: double @__unused 74 75 // Verify that NVVMReflect pass is among the passes run by NVPTX back-end. 76 // CHECK-REFLECT: Replace occurrences of __nvvm_reflect() calls with 0/1 77