xref: /oneTBB/test/tbb/test_openmp.cpp (revision dbccbee9)
151c0b2f7Stbbdev /*
2b15aabb3Stbbdev     Copyright (c) 2005-2021 Intel Corporation
351c0b2f7Stbbdev 
451c0b2f7Stbbdev     Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev     you may not use this file except in compliance with the License.
651c0b2f7Stbbdev     You may obtain a copy of the License at
751c0b2f7Stbbdev 
851c0b2f7Stbbdev         http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev 
1051c0b2f7Stbbdev     Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev     distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev     See the License for the specific language governing permissions and
1451c0b2f7Stbbdev     limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev 
17478de5b1Stbbdev //! \file test_openmp.cpp
18478de5b1Stbbdev //! \brief Test for [internal] functionality
19478de5b1Stbbdev 
20478de5b1Stbbdev #if _WIN32 || _WIN64
21478de5b1Stbbdev #define _CRT_SECURE_NO_WARNINGS
22478de5b1Stbbdev #endif
23478de5b1Stbbdev 
2451c0b2f7Stbbdev #include "common/test.h"
2551c0b2f7Stbbdev #include "common/utils.h"
2651c0b2f7Stbbdev #include "common/utils_env.h"
2751c0b2f7Stbbdev #include "tbb/global_control.h"
2851c0b2f7Stbbdev #include "tbb/blocked_range.h"
2951c0b2f7Stbbdev #include "tbb/parallel_for.h"
3051c0b2f7Stbbdev #include "tbb/parallel_reduce.h"
3151c0b2f7Stbbdev 
3251c0b2f7Stbbdev // Test mixing OpenMP and TBB
3351c0b2f7Stbbdev #include <omp.h>
3451c0b2f7Stbbdev 
3551c0b2f7Stbbdev using data_type = short;
3651c0b2f7Stbbdev 
SerialConvolve(data_type c[],const data_type a[],int m,const data_type b[],int n)3751c0b2f7Stbbdev void SerialConvolve( data_type c[], const data_type a[], int m, const data_type b[], int n ) {
3851c0b2f7Stbbdev     for (int i = 0; i < m + n - 1; ++i) {
3951c0b2f7Stbbdev         int start = i < n ? 0 : i - n + 1;
4051c0b2f7Stbbdev         int finish = i < m ? i + 1 : m;
4151c0b2f7Stbbdev         data_type sum = 0;
4251c0b2f7Stbbdev         for (int j = start; j < finish; ++j)
4351c0b2f7Stbbdev             sum += a[j] * b[i - j];
4451c0b2f7Stbbdev         c[i] = sum;
4551c0b2f7Stbbdev     }
4651c0b2f7Stbbdev }
4751c0b2f7Stbbdev 
4851c0b2f7Stbbdev #if _MSC_VER && !defined(__INTEL_COMPILER)
4951c0b2f7Stbbdev     // Suppress overzealous warning about short+=short
5051c0b2f7Stbbdev     #pragma warning( push )
5151c0b2f7Stbbdev     #pragma warning( disable: 4244 )
5251c0b2f7Stbbdev #endif
5351c0b2f7Stbbdev 
5451c0b2f7Stbbdev class InnerBody: utils::NoAssign {
5551c0b2f7Stbbdev     const data_type* my_a;
5651c0b2f7Stbbdev     const data_type* my_b;
5751c0b2f7Stbbdev     const int i;
5851c0b2f7Stbbdev public:
5951c0b2f7Stbbdev     data_type sum;
InnerBody(data_type[],const data_type a[],const data_type b[],int ii)6051c0b2f7Stbbdev     InnerBody( data_type /*c*/[], const data_type a[], const data_type b[], int ii ) :
6151c0b2f7Stbbdev         my_a(a), my_b(b), i(ii), sum(0)
6251c0b2f7Stbbdev     {}
InnerBody(InnerBody & x,tbb::split)6351c0b2f7Stbbdev     InnerBody( InnerBody& x, tbb::split ) :
6451c0b2f7Stbbdev         my_a(x.my_a), my_b(x.my_b), i(x.i), sum(0)
6551c0b2f7Stbbdev     {
6651c0b2f7Stbbdev     }
join(InnerBody & x)6751c0b2f7Stbbdev     void join( InnerBody& x ) { sum += x.sum; }
operator ()(const tbb::blocked_range<int> & range)6851c0b2f7Stbbdev     void operator()( const tbb::blocked_range<int>& range ) {
6951c0b2f7Stbbdev         for (int j = range.begin(); j != range.end(); ++j)
7051c0b2f7Stbbdev             sum += my_a[j] * my_b[i - j];
7151c0b2f7Stbbdev     }
7251c0b2f7Stbbdev };
7351c0b2f7Stbbdev 
7451c0b2f7Stbbdev #if _MSC_VER && !defined(__INTEL_COMPILER)
7551c0b2f7Stbbdev     #pragma warning( pop )
7651c0b2f7Stbbdev #endif
7751c0b2f7Stbbdev 
78b15aabb3Stbbdev //! Test OpenMP loop around TBB loop
OpenMP_TBB_Convolve(data_type c[],const data_type a[],int m,const data_type b[],int n,int p)7955f9b178SIvan Kochin void OpenMP_TBB_Convolve( data_type c[], const data_type a[], int m, const data_type b[], int n, int p ) {
80b15aabb3Stbbdev     utils::suppress_unused_warning(p);
81b15aabb3Stbbdev #pragma omp parallel num_threads(p)
8251c0b2f7Stbbdev     {
8351c0b2f7Stbbdev #pragma omp for
8451c0b2f7Stbbdev         for (int i = 0; i < m + n - 1; ++i) {
8551c0b2f7Stbbdev             int start = i < n ? 0 : i - n + 1;
8651c0b2f7Stbbdev             int finish = i < m ? i + 1 : m;
8751c0b2f7Stbbdev             InnerBody body(c, a, b, i);
8851c0b2f7Stbbdev             tbb::parallel_reduce(tbb::blocked_range<int>(start, finish, 10), body);
8951c0b2f7Stbbdev             c[i] = body.sum;
9051c0b2f7Stbbdev         }
9151c0b2f7Stbbdev     }
9251c0b2f7Stbbdev }
9351c0b2f7Stbbdev 
9451c0b2f7Stbbdev class OuterBody: utils::NoAssign {
9551c0b2f7Stbbdev     const data_type* my_a;
9651c0b2f7Stbbdev     const data_type* my_b;
9751c0b2f7Stbbdev     data_type* my_c;
9851c0b2f7Stbbdev     const int m;
9951c0b2f7Stbbdev     const int n;
100*dbccbee9SIlya Mishin #if __clang__ && !__INTEL_COMPILER
101*dbccbee9SIlya Mishin     #pragma clang diagnostic push
102*dbccbee9SIlya Mishin     #pragma clang diagnostic ignored "-Wunused-private-field"
103*dbccbee9SIlya Mishin #endif
10455f9b178SIvan Kochin     const int p;
105*dbccbee9SIlya Mishin #if __clang__ && !__INTEL_COMPILER
106*dbccbee9SIlya Mishin     #pragma clang diagnostic pop // "-Wunused-private-field"
107*dbccbee9SIlya Mishin #endif
10851c0b2f7Stbbdev public:
OuterBody(data_type c[],const data_type a[],int m_,const data_type b[],int n_,int p_)10955f9b178SIvan Kochin     OuterBody( data_type c[], const data_type a[], int m_, const data_type b[], int n_, int p_ ) :
110b15aabb3Stbbdev         my_a(a), my_b(b), my_c(c), m(m_), n(n_), p(p_)
11151c0b2f7Stbbdev     {}
operator ()(const tbb::blocked_range<int> & range) const11251c0b2f7Stbbdev     void operator()( const tbb::blocked_range<int>& range ) const {
11351c0b2f7Stbbdev         for (int i = range.begin(); i != range.end(); ++i) {
11451c0b2f7Stbbdev             int start = i < n ? 0 : i - n + 1;
11551c0b2f7Stbbdev             int finish = i < m ? i + 1 : m;
11651c0b2f7Stbbdev             data_type sum = 0;
117b15aabb3Stbbdev #pragma omp parallel for reduction(+:sum) num_threads(p)
11851c0b2f7Stbbdev             for (int j = start; j < finish; ++j)
11951c0b2f7Stbbdev                 sum += my_a[j] * my_b[i - j];
12051c0b2f7Stbbdev             my_c[i] = sum;
12151c0b2f7Stbbdev         }
12251c0b2f7Stbbdev     }
12351c0b2f7Stbbdev };
12451c0b2f7Stbbdev 
12551c0b2f7Stbbdev //! Test TBB loop around OpenMP loop
TBB_OpenMP_Convolve(data_type c[],const data_type a[],int m,const data_type b[],int n,int p)12655f9b178SIvan Kochin void TBB_OpenMP_Convolve( data_type c[], const data_type a[], int m, const data_type b[], int n, int p ) {
127b15aabb3Stbbdev     tbb::parallel_for(tbb::blocked_range<int>(0, m + n - 1, 10), OuterBody(c, a, m, b, n, p));
12851c0b2f7Stbbdev }
12951c0b2f7Stbbdev 
13051c0b2f7Stbbdev #if __INTEL_COMPILER
TestNumThreads()13151c0b2f7Stbbdev void TestNumThreads() {
13251c0b2f7Stbbdev     utils::SetEnv("KMP_AFFINITY", "compact");
13351c0b2f7Stbbdev     // Make an OpenMP call before initializing TBB
13451c0b2f7Stbbdev     int omp_nthreads = omp_get_max_threads();
13551c0b2f7Stbbdev     #pragma omp parallel
13651c0b2f7Stbbdev     {}
13751c0b2f7Stbbdev     int tbb_nthreads = tbb::this_task_arena::max_concurrency();
13851c0b2f7Stbbdev     // For the purpose of testing, assume that OpenMP and TBB should utilize the same # of threads.
13951c0b2f7Stbbdev     // If it's not true on some platforms, the test will need to be adjusted.
14051c0b2f7Stbbdev     REQUIRE_MESSAGE(tbb_nthreads == omp_nthreads, "Initialization of TBB is possibly affected by OpenMP");
14151c0b2f7Stbbdev }
14251c0b2f7Stbbdev #endif // __INTEL_COMPILER
14351c0b2f7Stbbdev 
14451c0b2f7Stbbdev const int M = 17 * 17;
14551c0b2f7Stbbdev const int N = 13 * 13;
14651c0b2f7Stbbdev data_type A[M], B[N];
14751c0b2f7Stbbdev data_type expected[M+N], actual[M+N];
14851c0b2f7Stbbdev 
14951c0b2f7Stbbdev template <class Func>
RunTest(Func F,int m,int n,int p)15055f9b178SIvan Kochin void RunTest( Func F, int m, int n, int p) {
15151c0b2f7Stbbdev     tbb::global_control limit(tbb::global_control::max_allowed_parallelism, p);
15251c0b2f7Stbbdev     memset(actual, -1, (m + n) * sizeof(data_type));
153b15aabb3Stbbdev     F(actual, A, m, B, n, p);
15451c0b2f7Stbbdev     CHECK(memcmp(actual, expected, (m + n - 1) * sizeof(data_type)) == 0);
15551c0b2f7Stbbdev }
15651c0b2f7Stbbdev 
157a080baf9SAlex // Disable it because OpenMP isn't instrumented that leads to false positive
158a080baf9SAlex #if !__TBB_USE_THREAD_SANITIZER
15951c0b2f7Stbbdev //! \brief \ref error_guessing
16051c0b2f7Stbbdev TEST_CASE("Testing oneTBB with OpenMP") {
16151c0b2f7Stbbdev #if __INTEL_COMPILER
16251c0b2f7Stbbdev     TestNumThreads(); // Testing initialization-related behavior; must be the first
16351c0b2f7Stbbdev #endif // __INTEL_COMPILER
16455f9b178SIvan Kochin     for (int p = static_cast<int>(utils::MinThread); p <= static_cast<int>(utils::MaxThread); ++p) {
16555f9b178SIvan Kochin         for (int m = 1; m <= M; m *= 17) {
16655f9b178SIvan Kochin             for (int n = 1; n <= N; n *= 13) {
16755f9b178SIvan Kochin                 for (int i = 0; i < m; ++i) A[i] = data_type(1 + i / 5);
16855f9b178SIvan Kochin                 for (int i = 0; i < n; ++i) B[i] = data_type(1 + i / 7);
16951c0b2f7Stbbdev                 SerialConvolve( expected, A, m, B, n );
17051c0b2f7Stbbdev                 RunTest( OpenMP_TBB_Convolve, m, n, p );
17151c0b2f7Stbbdev                 RunTest( TBB_OpenMP_Convolve, m, n, p );
17251c0b2f7Stbbdev             }
17351c0b2f7Stbbdev         }
17451c0b2f7Stbbdev     }
17551c0b2f7Stbbdev }
176a080baf9SAlex #endif
177