1 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \ 2 // RUN: -triple powerpc64le-unknown-unknown -DCUDA \ 3 // RUN: -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o \ 4 // RUN: %t-ppc-host.bc 5 6 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \ 7 // RUN: -triple nvptx64-unknown-unknown -DCUA \ 8 // RUN: -fopenmp-targets=nvptx64-nvidia-cuda -DCUDA -emit-llvm %s \ 9 // RUN: -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc \ 10 // RUN: -o - | FileCheck %s --check-prefix CHECK 11 12 // RUN: %clang_cc1 -verify -fopenmp -x c++ \ 13 // RUN: -triple powerpc64le-unknown-unknown -DDIAG\ 14 // RUN: -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm \ 15 // RUN: %s -o - | FileCheck %s \ 16 // RUN: --check-prefix=CHECK1 17 18 // RUN: %clang_cc1 -verify -fopenmp -x c++ \ 19 // RUN: -triple i386-unknown-unknown \ 20 // RUN: -fopenmp-targets=i386-pc-linux-gnu -emit-llvm \ 21 // RUN: %s -o - | FileCheck %s \ 22 // RUN: --check-prefix=CHECK2 23 24 25 #if defined(CUDA) 26 // expected-no-diagnostics 27 28 int foo(int n) { 29 double *e; 30 //no error and no implicit map generated for e[:1] 31 #pragma omp target parallel reduction(+: e[:1]) 32 *e=10; 33 ; 34 return 0; 35 } 36 // CHECK-NOT @.offload_maptypes 37 // CHECK: call void @__kmpc_nvptx_end_reduce_nowait( 38 #elif defined(DIAG) 39 class S2 { 40 mutable int a; 41 public: 42 S2():a(0) { } 43 S2(S2 &s2):a(s2.a) { } 44 S2 &operator +(S2 &s); 45 }; 46 int bar() { 47 S2 o[5]; 48 //warnig "copyable and not guaranteed to be mapped correctly" and 49 //implicit map generated. 50 #pragma omp target parallel reduction(+:o[0]) //expected-warning {{Type 'S2' is not trivially copyable and not guaranteed to be mapped correctly}} 51 for (int i = 0; i < 10; i++); 52 double b[10][10][10]; 53 //no error no implicit map generated, the map for b is generated but not 54 //for b[0:2][2:4][1]. 55 #pragma omp target parallel for reduction(task, +: b[0:2][2:4][1]) 56 for (long long i = 0; i < 10; ++i); 57 return 0; 58 } 59 // map for variable o 60 // CHECK1: offload_sizes = private unnamed_addr constant [1 x i64] [i64 4] 61 // CHECK1: offload_maptypes = private unnamed_addr constant [1 x i64] [i64 547] 62 // map for b: 63 // CHECK1: @.offload_sizes{{.*}} = private unnamed_addr constant [1 x i64] [i64 8000] 64 // CHECK1: @.offload_maptypes{{.*}} = private unnamed_addr constant [1 x i64] [i64 547] 65 #else 66 // expected-no-diagnostics 67 68 // generate implicit map for array elements or array sections in reduction 69 // clause. In following case: the implicit map is generate for output[0] 70 // with map size 4 and output[:3] with map size 12. 71 void sum(int* input, int size, int* output) 72 { 73 #pragma omp target teams distribute parallel for reduction(+: output[0]) \ 74 map(to: input [0:size]) 75 for (int i = 0; i < size; i++) 76 output[0] += input[i]; 77 #pragma omp target teams distribute parallel for reduction(+: output[:3]) \ 78 map(to: input [0:size]) 79 for (int i = 0; i < size; i++) 80 output[0] += input[i]; 81 int a[10]; 82 #pragma omp target parallel reduction(+: a[:2]) 83 for (int i = 0; i < size; i++) 84 ; 85 #pragma omp target parallel reduction(+: a[3]) 86 for (int i = 0; i < size; i++) 87 ; 88 } 89 //CHECK2: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 4, i64 8] 90 //CHECK2: @.offload_maptypes.10 = private unnamed_addr constant [2 x i64] [i64 800, i64 547] 91 //CHECK2: @.offload_sizes.13 = private unnamed_addr constant [2 x i64] [i64 4, i64 4] 92 //CHECK2: @.offload_maptypes.14 = private unnamed_addr constant [2 x i64] [i64 800, i64 547] 93 //CHECK2: define dso_local void @_Z3sumPiiS_ 94 //CHECK2-NEXT: entry 95 //CHECK2-NEXT: [[INP:%.*]] = alloca i32* 96 //CHECK2-NEXT: [[SIZE:%.*]] = alloca i32 97 //CHECK2-NEXT: [[OUTP:%.*]] = alloca i32* 98 //CHECK2: [[OFFSIZE:%.*]] = alloca [3 x i64] 99 //CHECK2: [[OFFSIZE10:%.*]] = alloca [3 x i64] 100 //CHECK2: [[T15:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[OFFSIZE]], i32 0, i32 0 101 //CHECK2-NEXT: store i64 4, i64* [[T15]] 102 //CHECK2: [[T21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[OFFSIZE]], i32 0, i32 1 103 //CHECK2-NEXT: store i64 4, i64* [[T21]] 104 //CHECK2: [[T53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[OFFSIZE10]], i32 0, i32 0 105 //CHECK2-NEXT: store i64 4, i64* [[T53]] 106 //CHECK2: [[T59:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[OFFSIZE10]], i32 0, i32 1 107 //CHECK2-NEXT: store i64 12, i64* [[T59]] 108 #endif 109 int main() 110 { 111 #if defined(CUDA) 112 int a = foo(10); 113 #elif defined(DIAG) 114 int a = bar(); 115 #else 116 const int size = 100; 117 int *array = new int[size]; 118 int result = 0; 119 sum(array, size, &result); 120 #endif 121 return 0; 122 } 123