1 /* 2 Copyright (c) 2005-2021 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 //! \file test_openmp.cpp 18 //! \brief Test for [internal] functionality 19 20 #if _WIN32 || _WIN64 21 #define _CRT_SECURE_NO_WARNINGS 22 #endif 23 24 #include "common/test.h" 25 #include "common/utils.h" 26 #include "common/utils_env.h" 27 #include "tbb/global_control.h" 28 #include "tbb/blocked_range.h" 29 #include "tbb/parallel_for.h" 30 #include "tbb/parallel_reduce.h" 31 32 // Test mixing OpenMP and TBB 33 #include <omp.h> 34 35 using data_type = short; 36 37 void SerialConvolve( data_type c[], const data_type a[], int m, const data_type b[], int n ) { 38 for (int i = 0; i < m + n - 1; ++i) { 39 int start = i < n ? 0 : i - n + 1; 40 int finish = i < m ? i + 1 : m; 41 data_type sum = 0; 42 for (int j = start; j < finish; ++j) 43 sum += a[j] * b[i - j]; 44 c[i] = sum; 45 } 46 } 47 48 #if _MSC_VER && !defined(__INTEL_COMPILER) 49 // Suppress overzealous warning about short+=short 50 #pragma warning( push ) 51 #pragma warning( disable: 4244 ) 52 #endif 53 54 class InnerBody: utils::NoAssign { 55 const data_type* my_a; 56 const data_type* my_b; 57 const int i; 58 public: 59 data_type sum; 60 InnerBody( data_type /*c*/[], const data_type a[], const data_type b[], int ii ) : 61 my_a(a), my_b(b), i(ii), sum(0) 62 {} 63 InnerBody( InnerBody& x, tbb::split ) : 64 my_a(x.my_a), my_b(x.my_b), i(x.i), sum(0) 65 { 66 } 67 void join( InnerBody& x ) { sum += x.sum; } 68 void operator()( const tbb::blocked_range<int>& range ) { 69 for (int j = range.begin(); j != range.end(); ++j) 70 sum += my_a[j] * my_b[i - j]; 71 } 72 }; 73 74 #if _MSC_VER && !defined(__INTEL_COMPILER) 75 #pragma warning( pop ) 76 #endif 77 78 //! Test OpenMP loop around TBB loop 79 void OpenMP_TBB_Convolve( data_type c[], const data_type a[], int m, const data_type b[], int n, std::size_t p ) { 80 utils::suppress_unused_warning(p); 81 #pragma omp parallel num_threads(p) 82 { 83 #pragma omp for 84 for (int i = 0; i < m + n - 1; ++i) { 85 int start = i < n ? 0 : i - n + 1; 86 int finish = i < m ? i + 1 : m; 87 InnerBody body(c, a, b, i); 88 tbb::parallel_reduce(tbb::blocked_range<int>(start, finish, 10), body); 89 c[i] = body.sum; 90 } 91 } 92 } 93 94 class OuterBody: utils::NoAssign { 95 const data_type* my_a; 96 const data_type* my_b; 97 data_type* my_c; 98 const int m; 99 const int n; 100 const std::size_t p; 101 public: 102 OuterBody( data_type c[], const data_type a[], int m_, const data_type b[], int n_, std::size_t p_ ) : 103 my_a(a), my_b(b), my_c(c), m(m_), n(n_), p(p_) 104 {} 105 void operator()( const tbb::blocked_range<int>& range ) const { 106 for (int i = range.begin(); i != range.end(); ++i) { 107 int start = i < n ? 0 : i - n + 1; 108 int finish = i < m ? i + 1 : m; 109 data_type sum = 0; 110 #pragma omp parallel for reduction(+:sum) num_threads(p) 111 for (int j = start; j < finish; ++j) 112 sum += my_a[j] * my_b[i - j]; 113 my_c[i] = sum; 114 } 115 } 116 }; 117 118 //! Test TBB loop around OpenMP loop 119 void TBB_OpenMP_Convolve( data_type c[], const data_type a[], int m, const data_type b[], int n, std::size_t p ) { 120 tbb::parallel_for(tbb::blocked_range<int>(0, m + n - 1, 10), OuterBody(c, a, m, b, n, p)); 121 } 122 123 #if __INTEL_COMPILER 124 void TestNumThreads() { 125 utils::SetEnv("KMP_AFFINITY", "compact"); 126 // Make an OpenMP call before initializing TBB 127 int omp_nthreads = omp_get_max_threads(); 128 #pragma omp parallel 129 {} 130 int tbb_nthreads = tbb::this_task_arena::max_concurrency(); 131 // For the purpose of testing, assume that OpenMP and TBB should utilize the same # of threads. 132 // If it's not true on some platforms, the test will need to be adjusted. 133 REQUIRE_MESSAGE(tbb_nthreads == omp_nthreads, "Initialization of TBB is possibly affected by OpenMP"); 134 } 135 #endif // __INTEL_COMPILER 136 137 const int M = 17 * 17; 138 const int N = 13 * 13; 139 data_type A[M], B[N]; 140 data_type expected[M+N], actual[M+N]; 141 142 template <class Func> 143 void RunTest( Func F, int m, int n, std::size_t p) { 144 tbb::global_control limit(tbb::global_control::max_allowed_parallelism, p); 145 memset(actual, -1, (m + n) * sizeof(data_type)); 146 F(actual, A, m, B, n, p); 147 CHECK(memcmp(actual, expected, (m + n - 1) * sizeof(data_type)) == 0); 148 } 149 150 //! \brief \ref error_guessing 151 TEST_CASE("Testing oneTBB with OpenMP") { 152 #if __INTEL_COMPILER 153 TestNumThreads(); // Testing initialization-related behavior; must be the first 154 #endif // __INTEL_COMPILER 155 for (std::size_t p = utils::MinThread; p <= utils::MaxThread; ++p) { 156 for (std::size_t m = 1; m <= M; m *= 17) { 157 for (std::size_t n = 1; n <= N; n *= 13) { 158 for (std::size_t i = 0; i < m; ++i) A[i] = data_type(1 + i / 5); 159 for (std::size_t i = 0; i < n; ++i) B[i] = data_type(1 + i / 7); 160 SerialConvolve( expected, A, m, B, n ); 161 RunTest( OpenMP_TBB_Convolve, m, n, p ); 162 RunTest( TBB_OpenMP_Convolve, m, n, p ); 163 } 164 } 165 } 166 } 167