151c0b2f7Stbbdev /*
2b15aabb3Stbbdev Copyright (c) 2005-2021 Intel Corporation
351c0b2f7Stbbdev
451c0b2f7Stbbdev Licensed under the Apache License, Version 2.0 (the "License");
551c0b2f7Stbbdev you may not use this file except in compliance with the License.
651c0b2f7Stbbdev You may obtain a copy of the License at
751c0b2f7Stbbdev
851c0b2f7Stbbdev http://www.apache.org/licenses/LICENSE-2.0
951c0b2f7Stbbdev
1051c0b2f7Stbbdev Unless required by applicable law or agreed to in writing, software
1151c0b2f7Stbbdev distributed under the License is distributed on an "AS IS" BASIS,
1251c0b2f7Stbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1351c0b2f7Stbbdev See the License for the specific language governing permissions and
1451c0b2f7Stbbdev limitations under the License.
1551c0b2f7Stbbdev */
1651c0b2f7Stbbdev
17478de5b1Stbbdev //! \file test_openmp.cpp
18478de5b1Stbbdev //! \brief Test for [internal] functionality
19478de5b1Stbbdev
20478de5b1Stbbdev #if _WIN32 || _WIN64
21478de5b1Stbbdev #define _CRT_SECURE_NO_WARNINGS
22478de5b1Stbbdev #endif
23478de5b1Stbbdev
2451c0b2f7Stbbdev #include "common/test.h"
2551c0b2f7Stbbdev #include "common/utils.h"
2651c0b2f7Stbbdev #include "common/utils_env.h"
2751c0b2f7Stbbdev #include "tbb/global_control.h"
2851c0b2f7Stbbdev #include "tbb/blocked_range.h"
2951c0b2f7Stbbdev #include "tbb/parallel_for.h"
3051c0b2f7Stbbdev #include "tbb/parallel_reduce.h"
3151c0b2f7Stbbdev
3251c0b2f7Stbbdev // Test mixing OpenMP and TBB
3351c0b2f7Stbbdev #include <omp.h>
3451c0b2f7Stbbdev
3551c0b2f7Stbbdev using data_type = short;
3651c0b2f7Stbbdev
SerialConvolve(data_type c[],const data_type a[],int m,const data_type b[],int n)3751c0b2f7Stbbdev void SerialConvolve( data_type c[], const data_type a[], int m, const data_type b[], int n ) {
3851c0b2f7Stbbdev for (int i = 0; i < m + n - 1; ++i) {
3951c0b2f7Stbbdev int start = i < n ? 0 : i - n + 1;
4051c0b2f7Stbbdev int finish = i < m ? i + 1 : m;
4151c0b2f7Stbbdev data_type sum = 0;
4251c0b2f7Stbbdev for (int j = start; j < finish; ++j)
4351c0b2f7Stbbdev sum += a[j] * b[i - j];
4451c0b2f7Stbbdev c[i] = sum;
4551c0b2f7Stbbdev }
4651c0b2f7Stbbdev }
4751c0b2f7Stbbdev
4851c0b2f7Stbbdev #if _MSC_VER && !defined(__INTEL_COMPILER)
4951c0b2f7Stbbdev // Suppress overzealous warning about short+=short
5051c0b2f7Stbbdev #pragma warning( push )
5151c0b2f7Stbbdev #pragma warning( disable: 4244 )
5251c0b2f7Stbbdev #endif
5351c0b2f7Stbbdev
5451c0b2f7Stbbdev class InnerBody: utils::NoAssign {
5551c0b2f7Stbbdev const data_type* my_a;
5651c0b2f7Stbbdev const data_type* my_b;
5751c0b2f7Stbbdev const int i;
5851c0b2f7Stbbdev public:
5951c0b2f7Stbbdev data_type sum;
InnerBody(data_type[],const data_type a[],const data_type b[],int ii)6051c0b2f7Stbbdev InnerBody( data_type /*c*/[], const data_type a[], const data_type b[], int ii ) :
6151c0b2f7Stbbdev my_a(a), my_b(b), i(ii), sum(0)
6251c0b2f7Stbbdev {}
InnerBody(InnerBody & x,tbb::split)6351c0b2f7Stbbdev InnerBody( InnerBody& x, tbb::split ) :
6451c0b2f7Stbbdev my_a(x.my_a), my_b(x.my_b), i(x.i), sum(0)
6551c0b2f7Stbbdev {
6651c0b2f7Stbbdev }
join(InnerBody & x)6751c0b2f7Stbbdev void join( InnerBody& x ) { sum += x.sum; }
operator ()(const tbb::blocked_range<int> & range)6851c0b2f7Stbbdev void operator()( const tbb::blocked_range<int>& range ) {
6951c0b2f7Stbbdev for (int j = range.begin(); j != range.end(); ++j)
7051c0b2f7Stbbdev sum += my_a[j] * my_b[i - j];
7151c0b2f7Stbbdev }
7251c0b2f7Stbbdev };
7351c0b2f7Stbbdev
7451c0b2f7Stbbdev #if _MSC_VER && !defined(__INTEL_COMPILER)
7551c0b2f7Stbbdev #pragma warning( pop )
7651c0b2f7Stbbdev #endif
7751c0b2f7Stbbdev
78b15aabb3Stbbdev //! Test OpenMP loop around TBB loop
OpenMP_TBB_Convolve(data_type c[],const data_type a[],int m,const data_type b[],int n,int p)7955f9b178SIvan Kochin void OpenMP_TBB_Convolve( data_type c[], const data_type a[], int m, const data_type b[], int n, int p ) {
80b15aabb3Stbbdev utils::suppress_unused_warning(p);
81b15aabb3Stbbdev #pragma omp parallel num_threads(p)
8251c0b2f7Stbbdev {
8351c0b2f7Stbbdev #pragma omp for
8451c0b2f7Stbbdev for (int i = 0; i < m + n - 1; ++i) {
8551c0b2f7Stbbdev int start = i < n ? 0 : i - n + 1;
8651c0b2f7Stbbdev int finish = i < m ? i + 1 : m;
8751c0b2f7Stbbdev InnerBody body(c, a, b, i);
8851c0b2f7Stbbdev tbb::parallel_reduce(tbb::blocked_range<int>(start, finish, 10), body);
8951c0b2f7Stbbdev c[i] = body.sum;
9051c0b2f7Stbbdev }
9151c0b2f7Stbbdev }
9251c0b2f7Stbbdev }
9351c0b2f7Stbbdev
9451c0b2f7Stbbdev class OuterBody: utils::NoAssign {
9551c0b2f7Stbbdev const data_type* my_a;
9651c0b2f7Stbbdev const data_type* my_b;
9751c0b2f7Stbbdev data_type* my_c;
9851c0b2f7Stbbdev const int m;
9951c0b2f7Stbbdev const int n;
100*dbccbee9SIlya Mishin #if __clang__ && !__INTEL_COMPILER
101*dbccbee9SIlya Mishin #pragma clang diagnostic push
102*dbccbee9SIlya Mishin #pragma clang diagnostic ignored "-Wunused-private-field"
103*dbccbee9SIlya Mishin #endif
10455f9b178SIvan Kochin const int p;
105*dbccbee9SIlya Mishin #if __clang__ && !__INTEL_COMPILER
106*dbccbee9SIlya Mishin #pragma clang diagnostic pop // "-Wunused-private-field"
107*dbccbee9SIlya Mishin #endif
10851c0b2f7Stbbdev public:
OuterBody(data_type c[],const data_type a[],int m_,const data_type b[],int n_,int p_)10955f9b178SIvan Kochin OuterBody( data_type c[], const data_type a[], int m_, const data_type b[], int n_, int p_ ) :
110b15aabb3Stbbdev my_a(a), my_b(b), my_c(c), m(m_), n(n_), p(p_)
11151c0b2f7Stbbdev {}
operator ()(const tbb::blocked_range<int> & range) const11251c0b2f7Stbbdev void operator()( const tbb::blocked_range<int>& range ) const {
11351c0b2f7Stbbdev for (int i = range.begin(); i != range.end(); ++i) {
11451c0b2f7Stbbdev int start = i < n ? 0 : i - n + 1;
11551c0b2f7Stbbdev int finish = i < m ? i + 1 : m;
11651c0b2f7Stbbdev data_type sum = 0;
117b15aabb3Stbbdev #pragma omp parallel for reduction(+:sum) num_threads(p)
11851c0b2f7Stbbdev for (int j = start; j < finish; ++j)
11951c0b2f7Stbbdev sum += my_a[j] * my_b[i - j];
12051c0b2f7Stbbdev my_c[i] = sum;
12151c0b2f7Stbbdev }
12251c0b2f7Stbbdev }
12351c0b2f7Stbbdev };
12451c0b2f7Stbbdev
12551c0b2f7Stbbdev //! Test TBB loop around OpenMP loop
TBB_OpenMP_Convolve(data_type c[],const data_type a[],int m,const data_type b[],int n,int p)12655f9b178SIvan Kochin void TBB_OpenMP_Convolve( data_type c[], const data_type a[], int m, const data_type b[], int n, int p ) {
127b15aabb3Stbbdev tbb::parallel_for(tbb::blocked_range<int>(0, m + n - 1, 10), OuterBody(c, a, m, b, n, p));
12851c0b2f7Stbbdev }
12951c0b2f7Stbbdev
13051c0b2f7Stbbdev #if __INTEL_COMPILER
TestNumThreads()13151c0b2f7Stbbdev void TestNumThreads() {
13251c0b2f7Stbbdev utils::SetEnv("KMP_AFFINITY", "compact");
13351c0b2f7Stbbdev // Make an OpenMP call before initializing TBB
13451c0b2f7Stbbdev int omp_nthreads = omp_get_max_threads();
13551c0b2f7Stbbdev #pragma omp parallel
13651c0b2f7Stbbdev {}
13751c0b2f7Stbbdev int tbb_nthreads = tbb::this_task_arena::max_concurrency();
13851c0b2f7Stbbdev // For the purpose of testing, assume that OpenMP and TBB should utilize the same # of threads.
13951c0b2f7Stbbdev // If it's not true on some platforms, the test will need to be adjusted.
14051c0b2f7Stbbdev REQUIRE_MESSAGE(tbb_nthreads == omp_nthreads, "Initialization of TBB is possibly affected by OpenMP");
14151c0b2f7Stbbdev }
14251c0b2f7Stbbdev #endif // __INTEL_COMPILER
14351c0b2f7Stbbdev
14451c0b2f7Stbbdev const int M = 17 * 17;
14551c0b2f7Stbbdev const int N = 13 * 13;
14651c0b2f7Stbbdev data_type A[M], B[N];
14751c0b2f7Stbbdev data_type expected[M+N], actual[M+N];
14851c0b2f7Stbbdev
14951c0b2f7Stbbdev template <class Func>
RunTest(Func F,int m,int n,int p)15055f9b178SIvan Kochin void RunTest( Func F, int m, int n, int p) {
15151c0b2f7Stbbdev tbb::global_control limit(tbb::global_control::max_allowed_parallelism, p);
15251c0b2f7Stbbdev memset(actual, -1, (m + n) * sizeof(data_type));
153b15aabb3Stbbdev F(actual, A, m, B, n, p);
15451c0b2f7Stbbdev CHECK(memcmp(actual, expected, (m + n - 1) * sizeof(data_type)) == 0);
15551c0b2f7Stbbdev }
15651c0b2f7Stbbdev
157a080baf9SAlex // Disable it because OpenMP isn't instrumented that leads to false positive
158a080baf9SAlex #if !__TBB_USE_THREAD_SANITIZER
15951c0b2f7Stbbdev //! \brief \ref error_guessing
16051c0b2f7Stbbdev TEST_CASE("Testing oneTBB with OpenMP") {
16151c0b2f7Stbbdev #if __INTEL_COMPILER
16251c0b2f7Stbbdev TestNumThreads(); // Testing initialization-related behavior; must be the first
16351c0b2f7Stbbdev #endif // __INTEL_COMPILER
16455f9b178SIvan Kochin for (int p = static_cast<int>(utils::MinThread); p <= static_cast<int>(utils::MaxThread); ++p) {
16555f9b178SIvan Kochin for (int m = 1; m <= M; m *= 17) {
16655f9b178SIvan Kochin for (int n = 1; n <= N; n *= 13) {
16755f9b178SIvan Kochin for (int i = 0; i < m; ++i) A[i] = data_type(1 + i / 5);
16855f9b178SIvan Kochin for (int i = 0; i < n; ++i) B[i] = data_type(1 + i / 7);
16951c0b2f7Stbbdev SerialConvolve( expected, A, m, B, n );
17051c0b2f7Stbbdev RunTest( OpenMP_TBB_Convolve, m, n, p );
17151c0b2f7Stbbdev RunTest( TBB_OpenMP_Convolve, m, n, p );
17251c0b2f7Stbbdev }
17351c0b2f7Stbbdev }
17451c0b2f7Stbbdev }
17551c0b2f7Stbbdev }
176a080baf9SAlex #endif
177