1 // REQUIRES: nvptx-registered-target 2 3 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm -target-cpu sm_30 %s -o - | FileCheck %s --check-prefix=NO_SYNC 4 // RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fcuda-is-device -emit-llvm -target-cpu sm_30 -target-feature +ptx70 -DSYNC -DCUDA_VERSION=9000 %s -o - | FileCheck %s --check-prefix=SYNC 5 6 #include "Inputs/cuda.h" 7 8 __device__ void *memcpy(void *dest, const void *src, size_t n); 9 10 #define warpSize 32 11 #include <__clang_cuda_intrinsics.h> 12 13 __device__ void use(unsigned long long, long long); 14 15 // Test function, 4 shfl calls. 16 // NO_SYNC: define{{.*}} @_Z14test_long_longv 17 // NO_SYNC: call noundef i64 @_Z6__shflyii( 18 // NO_SYNC: call noundef i64 @_Z6__shflxii( 19 20 // SYNC: define{{.*}} @_Z14test_long_longv 21 // SYNC: call noundef i64 @_Z11__shfl_syncjyii( 22 // SYNC: call noundef i64 @_Z11__shfl_syncjxii( 23 24 // unsigned long long -> long long 25 // NO_SYNC: define{{.*}} @_Z6__shflyii 26 // NO_SYNC: call noundef i64 @_Z6__shflxii( 27 28 // long long -> int + int 29 // NO_SYNC: define{{.*}} @_Z6__shflxii 30 // NO_SYNC: call noundef i32 @_Z6__shfliii( 31 // NO_SYNC: call noundef i32 @_Z6__shfliii( 32 33 // NO_SYNC: define{{.*}} @_Z6__shfliii 34 // NO_SYNC: call i32 @llvm.nvvm.shfl.idx.i32 35 36 // unsigned long long -> long long 37 // SYNC: _Z11__shfl_syncjyii 38 // SYNC: call noundef i64 @_Z11__shfl_syncjxii( 39 40 // long long -> int + int 41 // SYNC: define{{.*}} @_Z11__shfl_syncjxii 42 // SYNC: call noundef i32 @_Z11__shfl_syncjiii( 43 // SYNC: call noundef i32 @_Z11__shfl_syncjiii( 44 45 // SYNC: define{{.*}} @_Z11__shfl_syncjiii 46 // SYNC: call i32 @llvm.nvvm.shfl.sync.idx.i32 47 48 __device__ void test_long_long() { 49 unsigned long long ull = 13; 50 long long ll = 17; 51 #ifndef SYNC 52 ull = __shfl(ull, 7, 32); 53 ll = __shfl(ll, 7, 32); 54 use(ull, ll); 55 #else 56 ull = __shfl_sync(0x11, ull, 7, 32); 57 ll = __shfl_sync(0x11, ll, 7, 32); 58 use(ull, ll); 59 #endif 60 } 61 62