1 /* 2 Copyright (c) 2005-2020 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include "common/test.h" 18 #include "common/utils.h" 19 #include "common/utils_env.h" 20 #include "tbb/global_control.h" 21 #include "tbb/blocked_range.h" 22 #include "tbb/parallel_for.h" 23 #include "tbb/parallel_reduce.h" 24 25 // Test mixing OpenMP and TBB 26 #include <omp.h> 27 28 //! \file test_openmp.cpp 29 //! \brief Test for [internal] functionality 30 31 using data_type = short; 32 33 void SerialConvolve( data_type c[], const data_type a[], int m, const data_type b[], int n ) { 34 for (int i = 0; i < m + n - 1; ++i) { 35 int start = i < n ? 0 : i - n + 1; 36 int finish = i < m ? i + 1 : m; 37 data_type sum = 0; 38 for (int j = start; j < finish; ++j) 39 sum += a[j] * b[i - j]; 40 c[i] = sum; 41 } 42 } 43 44 #if _MSC_VER && !defined(__INTEL_COMPILER) 45 // Suppress overzealous warning about short+=short 46 #pragma warning( push ) 47 #pragma warning( disable: 4244 ) 48 #endif 49 50 class InnerBody: utils::NoAssign { 51 const data_type* my_a; 52 const data_type* my_b; 53 const int i; 54 public: 55 data_type sum; 56 InnerBody( data_type /*c*/[], const data_type a[], const data_type b[], int ii ) : 57 my_a(a), my_b(b), i(ii), sum(0) 58 {} 59 InnerBody( InnerBody& x, tbb::split ) : 60 my_a(x.my_a), my_b(x.my_b), i(x.i), sum(0) 61 { 62 } 63 void join( InnerBody& x ) { sum += x.sum; } 64 void operator()( const tbb::blocked_range<int>& range ) { 65 for (int j = range.begin(); j != range.end(); ++j) 66 sum += my_a[j] * my_b[i - j]; 67 } 68 }; 69 70 #if _MSC_VER && !defined(__INTEL_COMPILER) 71 #pragma warning( pop ) 72 #endif 73 74 //! Test OpenMMP loop around TBB loop 75 void OpenMP_TBB_Convolve( data_type c[], const data_type a[], int m, const data_type b[], int n ) { 76 #pragma omp parallel 77 { 78 #pragma omp for 79 for (int i = 0; i < m + n - 1; ++i) { 80 int start = i < n ? 0 : i - n + 1; 81 int finish = i < m ? i + 1 : m; 82 InnerBody body(c, a, b, i); 83 tbb::parallel_reduce(tbb::blocked_range<int>(start, finish, 10), body); 84 c[i] = body.sum; 85 } 86 } 87 } 88 89 class OuterBody: utils::NoAssign { 90 const data_type* my_a; 91 const data_type* my_b; 92 data_type* my_c; 93 const int m; 94 const int n; 95 public: 96 OuterBody( data_type c[], const data_type a[], int m_, const data_type b[], int n_ ) : 97 my_a(a), my_b(b), my_c(c), m(m_), n(n_) 98 {} 99 void operator()( const tbb::blocked_range<int>& range ) const { 100 for (int i = range.begin(); i != range.end(); ++i) { 101 int start = i < n ? 0 : i - n + 1; 102 int finish = i < m ? i + 1 : m; 103 data_type sum = 0; 104 #pragma omp parallel for reduction(+:sum) 105 for (int j = start; j < finish; ++j) 106 sum += my_a[j] * my_b[i - j]; 107 my_c[i] = sum; 108 } 109 } 110 }; 111 112 //! Test TBB loop around OpenMP loop 113 void TBB_OpenMP_Convolve( data_type c[], const data_type a[], int m, const data_type b[], int n ) { 114 tbb::parallel_for(tbb::blocked_range<int>(0, m + n - 1, 10), OuterBody(c, a, m, b, n)); 115 } 116 117 #if __INTEL_COMPILER 118 void TestNumThreads() { 119 utils::SetEnv("KMP_AFFINITY", "compact"); 120 // Make an OpenMP call before initializing TBB 121 int omp_nthreads = omp_get_max_threads(); 122 #pragma omp parallel 123 {} 124 int tbb_nthreads = tbb::this_task_arena::max_concurrency(); 125 // For the purpose of testing, assume that OpenMP and TBB should utilize the same # of threads. 126 // If it's not true on some platforms, the test will need to be adjusted. 127 REQUIRE_MESSAGE(tbb_nthreads == omp_nthreads, "Initialization of TBB is possibly affected by OpenMP"); 128 } 129 #endif // __INTEL_COMPILER 130 131 const int M = 17 * 17; 132 const int N = 13 * 13; 133 data_type A[M], B[N]; 134 data_type expected[M+N], actual[M+N]; 135 136 template <class Func> 137 void RunTest( Func F, int m, int n, std::size_t p) { 138 tbb::global_control limit(tbb::global_control::max_allowed_parallelism, p); 139 memset(actual, -1, (m + n) * sizeof(data_type)); 140 F(actual, A, m, B, n); 141 CHECK(memcmp(actual, expected, (m + n - 1) * sizeof(data_type)) == 0); 142 } 143 144 //! \brief \ref error_guessing 145 TEST_CASE("Testing oneTBB with OpenMP") { 146 #if __INTEL_COMPILER 147 TestNumThreads(); // Testing initialization-related behavior; must be the first 148 #endif // __INTEL_COMPILER 149 150 for (std::size_t p = utils::MinThread; p <= utils::MaxThread; ++p) { 151 for (std::size_t m = 1; m <= M; m *= 17) { 152 for (std::size_t n = 1; n <= N; n *= 13) { 153 for (std::size_t i = 0; i < m; ++i) A[i] = data_type(1 + i / 5); 154 for (std::size_t i = 0; i < n; ++i) B[i] = data_type(1 + i / 7); 155 SerialConvolve( expected, A, m, B, n ); 156 RunTest( OpenMP_TBB_Convolve, m, n, p ); 157 RunTest( TBB_OpenMP_Convolve, m, n, p ); 158 } 159 } 160 } 161 } 162