1 /* 2 Copyright (c) 2005-2021 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #include "common/test.h" 18 #include "common/utils.h" 19 #include "common/utils_env.h" 20 #include "tbb/global_control.h" 21 #include "tbb/blocked_range.h" 22 #include "tbb/parallel_for.h" 23 #include "tbb/parallel_reduce.h" 24 25 // Test mixing OpenMP and TBB 26 #include <omp.h> 27 28 //! \file test_openmp.cpp 29 //! \brief Test for [internal] functionality 30 31 using data_type = short; 32 33 void SerialConvolve( data_type c[], const data_type a[], int m, const data_type b[], int n ) { 34 for (int i = 0; i < m + n - 1; ++i) { 35 int start = i < n ? 0 : i - n + 1; 36 int finish = i < m ? i + 1 : m; 37 data_type sum = 0; 38 for (int j = start; j < finish; ++j) 39 sum += a[j] * b[i - j]; 40 c[i] = sum; 41 } 42 } 43 44 #if _MSC_VER && !defined(__INTEL_COMPILER) 45 // Suppress overzealous warning about short+=short 46 #pragma warning( push ) 47 #pragma warning( disable: 4244 ) 48 #endif 49 50 class InnerBody: utils::NoAssign { 51 const data_type* my_a; 52 const data_type* my_b; 53 const int i; 54 public: 55 data_type sum; 56 InnerBody( data_type /*c*/[], const data_type a[], const data_type b[], int ii ) : 57 my_a(a), my_b(b), i(ii), sum(0) 58 {} 59 InnerBody( InnerBody& x, tbb::split ) : 60 my_a(x.my_a), my_b(x.my_b), i(x.i), sum(0) 61 { 62 } 63 void join( InnerBody& x ) { sum += x.sum; } 64 void operator()( const tbb::blocked_range<int>& range ) { 65 for (int j = range.begin(); j != range.end(); ++j) 66 sum += my_a[j] * my_b[i - j]; 67 } 68 }; 69 70 #if _MSC_VER && !defined(__INTEL_COMPILER) 71 #pragma warning( pop ) 72 #endif 73 74 //! Test OpenMP loop around TBB loop 75 void OpenMP_TBB_Convolve( data_type c[], const data_type a[], int m, const data_type b[], int n, std::size_t p ) { 76 utils::suppress_unused_warning(p); 77 #pragma omp parallel num_threads(p) 78 { 79 #pragma omp for 80 for (int i = 0; i < m + n - 1; ++i) { 81 int start = i < n ? 0 : i - n + 1; 82 int finish = i < m ? i + 1 : m; 83 InnerBody body(c, a, b, i); 84 tbb::parallel_reduce(tbb::blocked_range<int>(start, finish, 10), body); 85 c[i] = body.sum; 86 } 87 } 88 } 89 90 class OuterBody: utils::NoAssign { 91 const data_type* my_a; 92 const data_type* my_b; 93 data_type* my_c; 94 const int m; 95 const int n; 96 const std::size_t p; 97 public: 98 OuterBody( data_type c[], const data_type a[], int m_, const data_type b[], int n_, std::size_t p_ ) : 99 my_a(a), my_b(b), my_c(c), m(m_), n(n_), p(p_) 100 {} 101 void operator()( const tbb::blocked_range<int>& range ) const { 102 for (int i = range.begin(); i != range.end(); ++i) { 103 int start = i < n ? 0 : i - n + 1; 104 int finish = i < m ? i + 1 : m; 105 data_type sum = 0; 106 #pragma omp parallel for reduction(+:sum) num_threads(p) 107 for (int j = start; j < finish; ++j) 108 sum += my_a[j] * my_b[i - j]; 109 my_c[i] = sum; 110 } 111 } 112 }; 113 114 //! Test TBB loop around OpenMP loop 115 void TBB_OpenMP_Convolve( data_type c[], const data_type a[], int m, const data_type b[], int n, std::size_t p ) { 116 tbb::parallel_for(tbb::blocked_range<int>(0, m + n - 1, 10), OuterBody(c, a, m, b, n, p)); 117 } 118 119 #if __INTEL_COMPILER 120 void TestNumThreads() { 121 utils::SetEnv("KMP_AFFINITY", "compact"); 122 // Make an OpenMP call before initializing TBB 123 int omp_nthreads = omp_get_max_threads(); 124 #pragma omp parallel 125 {} 126 int tbb_nthreads = tbb::this_task_arena::max_concurrency(); 127 // For the purpose of testing, assume that OpenMP and TBB should utilize the same # of threads. 128 // If it's not true on some platforms, the test will need to be adjusted. 129 REQUIRE_MESSAGE(tbb_nthreads == omp_nthreads, "Initialization of TBB is possibly affected by OpenMP"); 130 } 131 #endif // __INTEL_COMPILER 132 133 const int M = 17 * 17; 134 const int N = 13 * 13; 135 data_type A[M], B[N]; 136 data_type expected[M+N], actual[M+N]; 137 138 template <class Func> 139 void RunTest( Func F, int m, int n, std::size_t p) { 140 tbb::global_control limit(tbb::global_control::max_allowed_parallelism, p); 141 memset(actual, -1, (m + n) * sizeof(data_type)); 142 F(actual, A, m, B, n, p); 143 CHECK(memcmp(actual, expected, (m + n - 1) * sizeof(data_type)) == 0); 144 } 145 146 //! \brief \ref error_guessing 147 TEST_CASE("Testing oneTBB with OpenMP") { 148 #if __INTEL_COMPILER 149 TestNumThreads(); // Testing initialization-related behavior; must be the first 150 #endif // __INTEL_COMPILER 151 for (std::size_t p = utils::MinThread; p <= utils::MaxThread; ++p) { 152 for (std::size_t m = 1; m <= M; m *= 17) { 153 for (std::size_t n = 1; n <= N; n *= 13) { 154 for (std::size_t i = 0; i < m; ++i) A[i] = data_type(1 + i / 5); 155 for (std::size_t i = 0; i < n; ++i) B[i] = data_type(1 + i / 7); 156 SerialConvolve( expected, A, m, B, n ); 157 RunTest( OpenMP_TBB_Convolve, m, n, p ); 158 RunTest( TBB_OpenMP_Convolve, m, n, p ); 159 } 160 } 161 } 162 } 163