// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \ // RUN: -triple powerpc64le-unknown-unknown -DCUDA \ // RUN: -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o \ // RUN: %t-ppc-host.bc // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ \ // RUN: -triple nvptx64-unknown-unknown -DCUA \ // RUN: -fopenmp-targets=nvptx64-nvidia-cuda -DCUDA -emit-llvm %s \ // RUN: -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc \ // RUN: -o - | FileCheck %s --check-prefix CHECK // RUN: %clang_cc1 -verify -fopenmp -x c++ \ // RUN: -triple powerpc64le-unknown-unknown -DDIAG\ // RUN: -fopenmp-targets=powerpc64le-ibm-linux-gnu -emit-llvm \ // RUN: %s -o - | FileCheck %s \ // RUN: --check-prefix=CHECK1 // RUN: %clang_cc1 -verify -fopenmp -x c++ \ // RUN: -triple i386-unknown-unknown \ // RUN: -fopenmp-targets=i386-pc-linux-gnu -emit-llvm \ // RUN: %s -o - | FileCheck %s \ // RUN: --check-prefix=CHECK2 #if defined(CUDA) // expected-no-diagnostics int foo(int n) { double *e; //no error and no implicit map generated for e[:1] #pragma omp target parallel reduction(+: e[:1]) *e=10; ; return 0; } // CHECK-NOT @.offload_maptypes // CHECK: call void @__kmpc_nvptx_end_reduce_nowait( #elif defined(DIAG) class S2 { mutable int a; public: S2():a(0) { } S2(S2 &s2):a(s2.a) { } S2 &operator +(S2 &s); }; int bar() { S2 o[5]; //warnig "copyable and not guaranteed to be mapped correctly" and //implicit map generated. #pragma omp target parallel reduction(+:o[0]) //expected-warning {{Type 'S2' is not trivially copyable and not guaranteed to be mapped correctly}} for (int i = 0; i < 10; i++); double b[10][10][10]; //no error no implicit map generated, the map for b is generated but not //for b[0:2][2:4][1]. #pragma omp target parallel for reduction(task, +: b[0:2][2:4][1]) for (long long i = 0; i < 10; ++i); return 0; } // map for variable o // CHECK1: offload_sizes = private unnamed_addr constant [1 x i64] [i64 4] // CHECK1: offload_maptypes = private unnamed_addr constant [1 x i64] [i64 547] // map for b: // CHECK1: @.offload_sizes{{.*}} = private unnamed_addr constant [1 x i64] [i64 8000] // CHECK1: @.offload_maptypes{{.*}} = private unnamed_addr constant [1 x i64] [i64 547] #else // expected-no-diagnostics // generate implicit map for array elements or array sections in reduction // clause. In following case: the implicit map is generate for output[0] // with map size 4 and output[:3] with map size 12. void sum(int* input, int size, int* output) { #pragma omp target teams distribute parallel for reduction(+: output[0]) \ map(to: input [0:size]) for (int i = 0; i < size; i++) output[0] += input[i]; #pragma omp target teams distribute parallel for reduction(+: output[:3]) \ map(to: input [0:size]) for (int i = 0; i < size; i++) output[0] += input[i]; int a[10]; #pragma omp target parallel reduction(+: a[:2]) for (int i = 0; i < size; i++) ; #pragma omp target parallel reduction(+: a[3]) for (int i = 0; i < size; i++) ; } //CHECK2: @.offload_sizes = private unnamed_addr constant [2 x i64] [i64 4, i64 8] //CHECK2: @.offload_maptypes.10 = private unnamed_addr constant [2 x i64] [i64 800, i64 547] //CHECK2: @.offload_sizes.13 = private unnamed_addr constant [2 x i64] [i64 4, i64 4] //CHECK2: @.offload_maptypes.14 = private unnamed_addr constant [2 x i64] [i64 800, i64 547] //CHECK2: define dso_local void @_Z3sumPiiS_ //CHECK2-NEXT: entry //CHECK2-NEXT: [[INP:%.*]] = alloca i32* //CHECK2-NEXT: [[SIZE:%.*]] = alloca i32 //CHECK2-NEXT: [[OUTP:%.*]] = alloca i32* //CHECK2: [[OFFSIZE:%.*]] = alloca [3 x i64] //CHECK2: [[OFFSIZE10:%.*]] = alloca [3 x i64] //CHECK2: [[T15:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[OFFSIZE]], i32 0, i32 0 //CHECK2-NEXT: store i64 4, i64* [[T15]] //CHECK2: [[T21:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[OFFSIZE]], i32 0, i32 1 //CHECK2-NEXT: store i64 4, i64* [[T21]] //CHECK2: [[T53:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[OFFSIZE10]], i32 0, i32 0 //CHECK2-NEXT: store i64 4, i64* [[T53]] //CHECK2: [[T59:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[OFFSIZE10]], i32 0, i32 1 //CHECK2-NEXT: store i64 12, i64* [[T59]] #endif int main() { #if defined(CUDA) int a = foo(10); #elif defined(DIAG) int a = bar(); #else const int size = 100; int *array = new int[size]; int result = 0; sum(array, size, &result); #endif return 0; }