xref: /oneTBB/test/tbb/test_openmp.cpp (revision dbccbee9)
1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 //! \file test_openmp.cpp
18 //! \brief Test for [internal] functionality
19 
20 #if _WIN32 || _WIN64
21 #define _CRT_SECURE_NO_WARNINGS
22 #endif
23 
24 #include "common/test.h"
25 #include "common/utils.h"
26 #include "common/utils_env.h"
27 #include "tbb/global_control.h"
28 #include "tbb/blocked_range.h"
29 #include "tbb/parallel_for.h"
30 #include "tbb/parallel_reduce.h"
31 
32 // Test mixing OpenMP and TBB
33 #include <omp.h>
34 
35 using data_type = short;
36 
SerialConvolve(data_type c[],const data_type a[],int m,const data_type b[],int n)37 void SerialConvolve( data_type c[], const data_type a[], int m, const data_type b[], int n ) {
38     for (int i = 0; i < m + n - 1; ++i) {
39         int start = i < n ? 0 : i - n + 1;
40         int finish = i < m ? i + 1 : m;
41         data_type sum = 0;
42         for (int j = start; j < finish; ++j)
43             sum += a[j] * b[i - j];
44         c[i] = sum;
45     }
46 }
47 
48 #if _MSC_VER && !defined(__INTEL_COMPILER)
49     // Suppress overzealous warning about short+=short
50     #pragma warning( push )
51     #pragma warning( disable: 4244 )
52 #endif
53 
54 class InnerBody: utils::NoAssign {
55     const data_type* my_a;
56     const data_type* my_b;
57     const int i;
58 public:
59     data_type sum;
InnerBody(data_type[],const data_type a[],const data_type b[],int ii)60     InnerBody( data_type /*c*/[], const data_type a[], const data_type b[], int ii ) :
61         my_a(a), my_b(b), i(ii), sum(0)
62     {}
InnerBody(InnerBody & x,tbb::split)63     InnerBody( InnerBody& x, tbb::split ) :
64         my_a(x.my_a), my_b(x.my_b), i(x.i), sum(0)
65     {
66     }
join(InnerBody & x)67     void join( InnerBody& x ) { sum += x.sum; }
operator ()(const tbb::blocked_range<int> & range)68     void operator()( const tbb::blocked_range<int>& range ) {
69         for (int j = range.begin(); j != range.end(); ++j)
70             sum += my_a[j] * my_b[i - j];
71     }
72 };
73 
74 #if _MSC_VER && !defined(__INTEL_COMPILER)
75     #pragma warning( pop )
76 #endif
77 
78 //! Test OpenMP loop around TBB loop
OpenMP_TBB_Convolve(data_type c[],const data_type a[],int m,const data_type b[],int n,int p)79 void OpenMP_TBB_Convolve( data_type c[], const data_type a[], int m, const data_type b[], int n, int p ) {
80     utils::suppress_unused_warning(p);
81 #pragma omp parallel num_threads(p)
82     {
83 #pragma omp for
84         for (int i = 0; i < m + n - 1; ++i) {
85             int start = i < n ? 0 : i - n + 1;
86             int finish = i < m ? i + 1 : m;
87             InnerBody body(c, a, b, i);
88             tbb::parallel_reduce(tbb::blocked_range<int>(start, finish, 10), body);
89             c[i] = body.sum;
90         }
91     }
92 }
93 
94 class OuterBody: utils::NoAssign {
95     const data_type* my_a;
96     const data_type* my_b;
97     data_type* my_c;
98     const int m;
99     const int n;
100 #if __clang__ && !__INTEL_COMPILER
101     #pragma clang diagnostic push
102     #pragma clang diagnostic ignored "-Wunused-private-field"
103 #endif
104     const int p;
105 #if __clang__ && !__INTEL_COMPILER
106     #pragma clang diagnostic pop // "-Wunused-private-field"
107 #endif
108 public:
OuterBody(data_type c[],const data_type a[],int m_,const data_type b[],int n_,int p_)109     OuterBody( data_type c[], const data_type a[], int m_, const data_type b[], int n_, int p_ ) :
110         my_a(a), my_b(b), my_c(c), m(m_), n(n_), p(p_)
111     {}
operator ()(const tbb::blocked_range<int> & range) const112     void operator()( const tbb::blocked_range<int>& range ) const {
113         for (int i = range.begin(); i != range.end(); ++i) {
114             int start = i < n ? 0 : i - n + 1;
115             int finish = i < m ? i + 1 : m;
116             data_type sum = 0;
117 #pragma omp parallel for reduction(+:sum) num_threads(p)
118             for (int j = start; j < finish; ++j)
119                 sum += my_a[j] * my_b[i - j];
120             my_c[i] = sum;
121         }
122     }
123 };
124 
125 //! Test TBB loop around OpenMP loop
TBB_OpenMP_Convolve(data_type c[],const data_type a[],int m,const data_type b[],int n,int p)126 void TBB_OpenMP_Convolve( data_type c[], const data_type a[], int m, const data_type b[], int n, int p ) {
127     tbb::parallel_for(tbb::blocked_range<int>(0, m + n - 1, 10), OuterBody(c, a, m, b, n, p));
128 }
129 
130 #if __INTEL_COMPILER
TestNumThreads()131 void TestNumThreads() {
132     utils::SetEnv("KMP_AFFINITY", "compact");
133     // Make an OpenMP call before initializing TBB
134     int omp_nthreads = omp_get_max_threads();
135     #pragma omp parallel
136     {}
137     int tbb_nthreads = tbb::this_task_arena::max_concurrency();
138     // For the purpose of testing, assume that OpenMP and TBB should utilize the same # of threads.
139     // If it's not true on some platforms, the test will need to be adjusted.
140     REQUIRE_MESSAGE(tbb_nthreads == omp_nthreads, "Initialization of TBB is possibly affected by OpenMP");
141 }
142 #endif // __INTEL_COMPILER
143 
144 const int M = 17 * 17;
145 const int N = 13 * 13;
146 data_type A[M], B[N];
147 data_type expected[M+N], actual[M+N];
148 
149 template <class Func>
RunTest(Func F,int m,int n,int p)150 void RunTest( Func F, int m, int n, int p) {
151     tbb::global_control limit(tbb::global_control::max_allowed_parallelism, p);
152     memset(actual, -1, (m + n) * sizeof(data_type));
153     F(actual, A, m, B, n, p);
154     CHECK(memcmp(actual, expected, (m + n - 1) * sizeof(data_type)) == 0);
155 }
156 
157 // Disable it because OpenMP isn't instrumented that leads to false positive
158 #if !__TBB_USE_THREAD_SANITIZER
159 //! \brief \ref error_guessing
160 TEST_CASE("Testing oneTBB with OpenMP") {
161 #if __INTEL_COMPILER
162     TestNumThreads(); // Testing initialization-related behavior; must be the first
163 #endif // __INTEL_COMPILER
164     for (int p = static_cast<int>(utils::MinThread); p <= static_cast<int>(utils::MaxThread); ++p) {
165         for (int m = 1; m <= M; m *= 17) {
166             for (int n = 1; n <= N; n *= 13) {
167                 for (int i = 0; i < m; ++i) A[i] = data_type(1 + i / 5);
168                 for (int i = 0; i < n; ++i) B[i] = data_type(1 + i / 7);
169                 SerialConvolve( expected, A, m, B, n );
170                 RunTest( OpenMP_TBB_Convolve, m, n, p );
171                 RunTest( TBB_OpenMP_Convolve, m, n, p );
172             }
173         }
174     }
175 }
176 #endif
177