1 // RUN: %libomptarget-compilexx-aarch64-unknown-linux-gnu -O3 && %libomptarget-run-aarch64-unknown-linux-gnu
2 // RUN: %libomptarget-compilexx-powerpc64-ibm-linux-gnu -O3 && %libomptarget-run-powerpc64-ibm-linux-gnu
3 // RUN: %libomptarget-compilexx-powerpc64le-ibm-linux-gnu -O3 && %libomptarget-run-powerpc64le-ibm-linux-gnu
4 // RUN: %libomptarget-compilexx-x86_64-pc-linux-gnu -O3 && %libomptarget-run-x86_64-pc-linux-gnu
5 // RUN: %libomptarget-compilexx-nvptx64-nvidia-cuda -O3 && %libomptarget-run-nvptx64-nvidia-cuda
6 
7 #include <iostream>
8 
9 template <typename T> int test_map() {
10   std::cout << "map(complex<>)" << std::endl;
11   T a(0.2), a_check;
12 #pragma omp target map(from : a_check)
13   { a_check = a; }
14 
15   if (a_check != a) {
16     std::cout << " wrong results";
17     return 1;
18   }
19 
20   return 0;
21 }
22 
23 template <typename T> int test_reduction() {
24   std::cout << "flat parallelism" << std::endl;
25   T sum(0), sum_host(0);
26   const int size = 100;
27   T array[size];
28   for (int i = 0; i < size; i++) {
29     array[i] = i;
30     sum_host += array[i];
31   }
32 
33 #pragma omp target teams distribute parallel for map(to: array[:size])         \
34                                                  reduction(+ : sum)
35   for (int i = 0; i < size; i++)
36     sum += array[i];
37 
38   if (sum != sum_host)
39     std::cout << " wrong results " << sum << " host " << sum_host << std::endl;
40 
41   std::cout << "hierarchical parallelism" << std::endl;
42   const int nblock(10), block_size(10);
43   T block_sum[nblock];
44 #pragma omp target teams distribute map(to                                     \
45                                         : array[:size])                        \
46     map(from                                                                   \
47         : block_sum[:nblock])
48   for (int ib = 0; ib < nblock; ib++) {
49     T partial_sum = 0;
50     const int istart = ib * block_size;
51     const int iend = (ib + 1) * block_size;
52 #pragma omp parallel for reduction(+ : partial_sum)
53     for (int i = istart; i < iend; i++)
54       partial_sum += array[i];
55     block_sum[ib] = partial_sum;
56   }
57 
58   sum = 0;
59   for (int ib = 0; ib < nblock; ib++) {
60     sum += block_sum[ib];
61   }
62 
63   if (sum != sum_host) {
64     std::cout << " wrong results " << sum << " host " << sum_host << std::endl;
65     return 1;
66   }
67 
68   return 0;
69 }
70 
71 template <typename T> int test_complex() {
72   int ret = 0;
73   ret |= test_map<T>();
74   ret |= test_reduction<T>();
75   return ret;
76 }
77 
78 int main() {
79   int ret = 0;
80   std::cout << "Testing float" << std::endl;
81   ret |= test_complex<float>();
82   std::cout << "Testing double" << std::endl;
83   ret |= test_complex<double>();
84   return ret;
85 }
86