124f836e8SJoachim Protze // RUN: %libomptarget-compilexx-generic -O3 && %libomptarget-run-generic 20f0ce3c1SShilei Tian 38b7e99c4SJon Chesterfield // Hangs 48b7e99c4SJon Chesterfield // UNSUPPORTED: amdgcn-amd-amdhsa 5ae23be84SJoseph Huber // UNSUPPORTED: amdgcn-amd-amdhsa-oldDriver 6*d5d83663SJoseph Huber // UNSUPPORTED: amdgcn-amd-amdhsa-LTO 78b7e99c4SJon Chesterfield 80f0ce3c1SShilei Tian #include <iostream> 90f0ce3c1SShilei Tian test_map()100f0ce3c1SShilei Tiantemplate <typename T> int test_map() { 110f0ce3c1SShilei Tian std::cout << "map(complex<>)" << std::endl; 120f0ce3c1SShilei Tian T a(0.2), a_check; 130f0ce3c1SShilei Tian #pragma omp target map(from : a_check) 140f0ce3c1SShilei Tian { a_check = a; } 150f0ce3c1SShilei Tian 160f0ce3c1SShilei Tian if (a_check != a) { 170f0ce3c1SShilei Tian std::cout << " wrong results"; 180f0ce3c1SShilei Tian return 1; 190f0ce3c1SShilei Tian } 200f0ce3c1SShilei Tian 210f0ce3c1SShilei Tian return 0; 220f0ce3c1SShilei Tian } 230f0ce3c1SShilei Tian test_reduction()240f0ce3c1SShilei Tiantemplate <typename T> int test_reduction() { 250f0ce3c1SShilei Tian std::cout << "flat parallelism" << std::endl; 260f0ce3c1SShilei Tian T sum(0), sum_host(0); 270f0ce3c1SShilei Tian const int size = 100; 280f0ce3c1SShilei Tian T array[size]; 290f0ce3c1SShilei Tian for (int i = 0; i < size; i++) { 300f0ce3c1SShilei Tian array[i] = i; 310f0ce3c1SShilei Tian sum_host += array[i]; 320f0ce3c1SShilei Tian } 330f0ce3c1SShilei Tian 340f0ce3c1SShilei Tian #pragma omp target teams distribute parallel for map(to: array[:size]) \ 350f0ce3c1SShilei Tian reduction(+ : sum) 360f0ce3c1SShilei Tian for (int i = 0; i < size; i++) 370f0ce3c1SShilei Tian sum += array[i]; 380f0ce3c1SShilei Tian 390f0ce3c1SShilei Tian if (sum != sum_host) 400f0ce3c1SShilei Tian std::cout << " wrong results " << sum << " host " << sum_host << std::endl; 410f0ce3c1SShilei Tian 420f0ce3c1SShilei Tian std::cout << "hierarchical parallelism" << std::endl; 430f0ce3c1SShilei Tian const int nblock(10), block_size(10); 440f0ce3c1SShilei Tian T block_sum[nblock]; 450f0ce3c1SShilei Tian #pragma omp target teams distribute map(to \ 460f0ce3c1SShilei Tian : array[:size]) \ 470f0ce3c1SShilei Tian map(from \ 480f0ce3c1SShilei Tian : block_sum[:nblock]) 490f0ce3c1SShilei Tian for (int ib = 0; ib < nblock; ib++) { 500f0ce3c1SShilei Tian T partial_sum = 0; 510f0ce3c1SShilei Tian const int istart = ib * block_size; 520f0ce3c1SShilei Tian const int iend = (ib + 1) * block_size; 530f0ce3c1SShilei Tian #pragma omp parallel for reduction(+ : partial_sum) 540f0ce3c1SShilei Tian for (int i = istart; i < iend; i++) 550f0ce3c1SShilei Tian partial_sum += array[i]; 560f0ce3c1SShilei Tian block_sum[ib] = partial_sum; 570f0ce3c1SShilei Tian } 580f0ce3c1SShilei Tian 590f0ce3c1SShilei Tian sum = 0; 600f0ce3c1SShilei Tian for (int ib = 0; ib < nblock; ib++) { 610f0ce3c1SShilei Tian sum += block_sum[ib]; 620f0ce3c1SShilei Tian } 630f0ce3c1SShilei Tian 640f0ce3c1SShilei Tian if (sum != sum_host) { 650f0ce3c1SShilei Tian std::cout << " wrong results " << sum << " host " << sum_host << std::endl; 660f0ce3c1SShilei Tian return 1; 670f0ce3c1SShilei Tian } 680f0ce3c1SShilei Tian 690f0ce3c1SShilei Tian return 0; 700f0ce3c1SShilei Tian } 710f0ce3c1SShilei Tian test_complex()720f0ce3c1SShilei Tiantemplate <typename T> int test_complex() { 730f0ce3c1SShilei Tian int ret = 0; 740f0ce3c1SShilei Tian ret |= test_map<T>(); 750f0ce3c1SShilei Tian ret |= test_reduction<T>(); 760f0ce3c1SShilei Tian return ret; 770f0ce3c1SShilei Tian } 780f0ce3c1SShilei Tian main()790f0ce3c1SShilei Tianint main() { 800f0ce3c1SShilei Tian int ret = 0; 810f0ce3c1SShilei Tian std::cout << "Testing float" << std::endl; 820f0ce3c1SShilei Tian ret |= test_complex<float>(); 830f0ce3c1SShilei Tian std::cout << "Testing double" << std::endl; 840f0ce3c1SShilei Tian ret |= test_complex<double>(); 850f0ce3c1SShilei Tian return ret; 860f0ce3c1SShilei Tian } 87