124f836e8SJoachim Protze // RUN: %libomptarget-compilexx-generic -O3 && %libomptarget-run-generic
20f0ce3c1SShilei Tian 
38b7e99c4SJon Chesterfield // Hangs
48b7e99c4SJon Chesterfield // UNSUPPORTED: amdgcn-amd-amdhsa
5ae23be84SJoseph Huber // UNSUPPORTED: amdgcn-amd-amdhsa-oldDriver
6*d5d83663SJoseph Huber // UNSUPPORTED: amdgcn-amd-amdhsa-LTO
78b7e99c4SJon Chesterfield 
80f0ce3c1SShilei Tian #include <iostream>
90f0ce3c1SShilei Tian 
test_map()100f0ce3c1SShilei Tian template <typename T> int test_map() {
110f0ce3c1SShilei Tian   std::cout << "map(complex<>)" << std::endl;
120f0ce3c1SShilei Tian   T a(0.2), a_check;
130f0ce3c1SShilei Tian #pragma omp target map(from : a_check)
140f0ce3c1SShilei Tian   { a_check = a; }
150f0ce3c1SShilei Tian 
160f0ce3c1SShilei Tian   if (a_check != a) {
170f0ce3c1SShilei Tian     std::cout << " wrong results";
180f0ce3c1SShilei Tian     return 1;
190f0ce3c1SShilei Tian   }
200f0ce3c1SShilei Tian 
210f0ce3c1SShilei Tian   return 0;
220f0ce3c1SShilei Tian }
230f0ce3c1SShilei Tian 
test_reduction()240f0ce3c1SShilei Tian template <typename T> int test_reduction() {
250f0ce3c1SShilei Tian   std::cout << "flat parallelism" << std::endl;
260f0ce3c1SShilei Tian   T sum(0), sum_host(0);
270f0ce3c1SShilei Tian   const int size = 100;
280f0ce3c1SShilei Tian   T array[size];
290f0ce3c1SShilei Tian   for (int i = 0; i < size; i++) {
300f0ce3c1SShilei Tian     array[i] = i;
310f0ce3c1SShilei Tian     sum_host += array[i];
320f0ce3c1SShilei Tian   }
330f0ce3c1SShilei Tian 
340f0ce3c1SShilei Tian #pragma omp target teams distribute parallel for map(to: array[:size])         \
350f0ce3c1SShilei Tian                                                  reduction(+ : sum)
360f0ce3c1SShilei Tian   for (int i = 0; i < size; i++)
370f0ce3c1SShilei Tian     sum += array[i];
380f0ce3c1SShilei Tian 
390f0ce3c1SShilei Tian   if (sum != sum_host)
400f0ce3c1SShilei Tian     std::cout << " wrong results " << sum << " host " << sum_host << std::endl;
410f0ce3c1SShilei Tian 
420f0ce3c1SShilei Tian   std::cout << "hierarchical parallelism" << std::endl;
430f0ce3c1SShilei Tian   const int nblock(10), block_size(10);
440f0ce3c1SShilei Tian   T block_sum[nblock];
450f0ce3c1SShilei Tian #pragma omp target teams distribute map(to                                     \
460f0ce3c1SShilei Tian                                         : array[:size])                        \
470f0ce3c1SShilei Tian     map(from                                                                   \
480f0ce3c1SShilei Tian         : block_sum[:nblock])
490f0ce3c1SShilei Tian   for (int ib = 0; ib < nblock; ib++) {
500f0ce3c1SShilei Tian     T partial_sum = 0;
510f0ce3c1SShilei Tian     const int istart = ib * block_size;
520f0ce3c1SShilei Tian     const int iend = (ib + 1) * block_size;
530f0ce3c1SShilei Tian #pragma omp parallel for reduction(+ : partial_sum)
540f0ce3c1SShilei Tian     for (int i = istart; i < iend; i++)
550f0ce3c1SShilei Tian       partial_sum += array[i];
560f0ce3c1SShilei Tian     block_sum[ib] = partial_sum;
570f0ce3c1SShilei Tian   }
580f0ce3c1SShilei Tian 
590f0ce3c1SShilei Tian   sum = 0;
600f0ce3c1SShilei Tian   for (int ib = 0; ib < nblock; ib++) {
610f0ce3c1SShilei Tian     sum += block_sum[ib];
620f0ce3c1SShilei Tian   }
630f0ce3c1SShilei Tian 
640f0ce3c1SShilei Tian   if (sum != sum_host) {
650f0ce3c1SShilei Tian     std::cout << " wrong results " << sum << " host " << sum_host << std::endl;
660f0ce3c1SShilei Tian     return 1;
670f0ce3c1SShilei Tian   }
680f0ce3c1SShilei Tian 
690f0ce3c1SShilei Tian   return 0;
700f0ce3c1SShilei Tian }
710f0ce3c1SShilei Tian 
test_complex()720f0ce3c1SShilei Tian template <typename T> int test_complex() {
730f0ce3c1SShilei Tian   int ret = 0;
740f0ce3c1SShilei Tian   ret |= test_map<T>();
750f0ce3c1SShilei Tian   ret |= test_reduction<T>();
760f0ce3c1SShilei Tian   return ret;
770f0ce3c1SShilei Tian }
780f0ce3c1SShilei Tian 
main()790f0ce3c1SShilei Tian int main() {
800f0ce3c1SShilei Tian   int ret = 0;
810f0ce3c1SShilei Tian   std::cout << "Testing float" << std::endl;
820f0ce3c1SShilei Tian   ret |= test_complex<float>();
830f0ce3c1SShilei Tian   std::cout << "Testing double" << std::endl;
840f0ce3c1SShilei Tian   ret |= test_complex<double>();
850f0ce3c1SShilei Tian   return ret;
860f0ce3c1SShilei Tian }
87