1 // RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \ 2 // RUN: -triple x86_64-linux-gnu \ 3 // RUN: | FileCheck -check-prefix=HOST %s 4 // RUN: %clang_cc1 -x hip -emit-llvm -std=c++11 %s -o - \ 5 // RUN: -triple amdgcn-amd-amdhsa -fcuda-is-device \ 6 // RUN: | FileCheck -check-prefix=DEV %s 7 8 #include "Inputs/cuda.h" 9 10 // Device side kernel name. 11 // HOST: @[[KERN_CAPTURE:[0-9]+]] = {{.*}} c"_Z1gIZ12test_capturevEUlvE_EvT_\00" 12 // HOST: @[[KERN_RESOLVE:[0-9]+]] = {{.*}} c"_Z1gIZ12test_resolvevEUlvE_EvT_\00" 13 14 // Check functions emitted for test_capture in host compilation. 15 // Check lambda is not emitted in host compilation. 16 // HOST-LABEL: define void @_Z12test_capturev 17 // HOST: call void @_Z19test_capture_helperIZ12test_capturevEUlvE_EvT_ 18 // HOST-LABEL: define internal void @_Z19test_capture_helperIZ12test_capturevEUlvE_EvT_ 19 // HOST: call void @_Z16__device_stub__gIZ12test_capturevEUlvE_EvT_ 20 // HOST-NOT: define{{.*}}@_ZZ4mainENKUlvE_clEv 21 22 // Check functions emitted for test_resolve in host compilation. 23 // Check host version of template function 'overloaded' is emitted and called 24 // by the lambda function. 25 // HOST-LABEL: define void @_Z12test_resolvev 26 // HOST: call void @_Z19test_resolve_helperIZ12test_resolvevEUlvE_EvT_() 27 // HOST-LABEL: define internal void @_Z19test_resolve_helperIZ12test_resolvevEUlvE_EvT_ 28 // HOST: call void @_Z16__device_stub__gIZ12test_resolvevEUlvE_EvT_ 29 // HOST: call void @_ZZ12test_resolvevENKUlvE_clEv 30 // HOST-LABEL: define internal void @_ZZ12test_resolvevENKUlvE_clEv 31 // HOST: call i32 @_Z10overloadedIiET_v 32 // HOST-LABEL: define linkonce_odr i32 @_Z10overloadedIiET_v 33 // HOST: ret i32 2 34 35 // Check kernel is registered with correct device side kernel name. 36 // HOST: @__hipRegisterFunction({{.*}}@[[KERN_CAPTURE]] 37 // HOST: @__hipRegisterFunction({{.*}}@[[KERN_RESOLVE]] 38 39 // DEV: @a = addrspace(1) externally_initialized global i32 0 40 41 // Check functions emitted for test_capture in device compilation. 42 // Check lambda is emitted in device compilation and accessing device variable. 43 // DEV-LABEL: define amdgpu_kernel void @_Z1gIZ12test_capturevEUlvE_EvT_ 44 // DEV: call void @_ZZ12test_capturevENKUlvE_clEv 45 // DEV-LABEL: define internal void @_ZZ12test_capturevENKUlvE_clEv 46 // DEV: store i32 1, i32* addrspacecast (i32 addrspace(1)* @a to i32*) 47 48 // Check functions emitted for test_resolve in device compilation. 49 // Check device version of template function 'overloaded' is emitted and called 50 // by the lambda function. 51 // DEV-LABEL: define amdgpu_kernel void @_Z1gIZ12test_resolvevEUlvE_EvT_ 52 // DEV: call void @_ZZ12test_resolvevENKUlvE_clEv 53 // DEV-LABEL: define internal void @_ZZ12test_resolvevENKUlvE_clEv 54 // DEV: call i32 @_Z10overloadedIiET_v 55 // DEV-LABEL: define linkonce_odr i32 @_Z10overloadedIiET_v 56 // DEV: ret i32 1 57 58 __device__ int a; 59 60 template<class T> 61 __device__ T overloaded() { return 1; } 62 63 template<class T> 64 __host__ T overloaded() { return 2; } 65 66 template<class F> 67 __global__ void g(F f) { f(); } 68 69 template<class F> 70 void test_capture_helper(F f) { g<<<1,1>>>(f); } 71 72 template<class F> 73 void test_resolve_helper(F f) { g<<<1,1>>>(f); f(); } 74 75 // Test capture of device variable in lambda function. 76 void test_capture(void) { 77 test_capture_helper([](){ a = 1;}); 78 } 79 80 // Test resolving host/device function in lambda function. 81 // Callee should resolve to correct host/device function based on where 82 // the lambda function is called, not where it is defined. 83 void test_resolve(void) { 84 test_resolve_helper([](){ overloaded<int>();}); 85 } 86