xref: /oneTBB/test/tbb/test_partitioner.cpp (revision 7cee2251)
1478de5b1Stbbdev /*
2*7cee2251SJhaShweta1     Copyright (c) 2021-2023 Intel Corporation
3478de5b1Stbbdev 
4478de5b1Stbbdev     Licensed under the Apache License, Version 2.0 (the "License");
5478de5b1Stbbdev     you may not use this file except in compliance with the License.
6478de5b1Stbbdev     You may obtain a copy of the License at
7478de5b1Stbbdev 
8478de5b1Stbbdev         http://www.apache.org/licenses/LICENSE-2.0
9478de5b1Stbbdev 
10478de5b1Stbbdev     Unless required by applicable law or agreed to in writing, software
11478de5b1Stbbdev     distributed under the License is distributed on an "AS IS" BASIS,
12478de5b1Stbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13478de5b1Stbbdev     See the License for the specific language governing permissions and
14478de5b1Stbbdev     limitations under the License.
15478de5b1Stbbdev */
16478de5b1Stbbdev 
17478de5b1Stbbdev #include "common/test.h"
18478de5b1Stbbdev 
19478de5b1Stbbdev #include "tbb/parallel_for.h"
20478de5b1Stbbdev #include "tbb/task_arena.h"
21478de5b1Stbbdev #include "tbb/global_control.h"
22cc3a9aa9SPavel Kumbrasev #include "oneapi/tbb/mutex.h"
23478de5b1Stbbdev 
24478de5b1Stbbdev #include "common/utils.h"
25478de5b1Stbbdev #include "common/utils_concurrency_limit.h"
26478de5b1Stbbdev #include "common/dummy_body.h"
27478de5b1Stbbdev #include "common/spin_barrier.h"
28478de5b1Stbbdev 
29478de5b1Stbbdev #include <cstddef>
30478de5b1Stbbdev #include <utility>
31478de5b1Stbbdev #include <vector>
32cc3a9aa9SPavel Kumbrasev #include <algorithm> // std::min_element
33478de5b1Stbbdev 
34478de5b1Stbbdev //! \file test_partitioner.cpp
35478de5b1Stbbdev //! \brief Test for [internal] functionality
36478de5b1Stbbdev 
37478de5b1Stbbdev namespace task_affinity_retention {
38478de5b1Stbbdev 
test(PerBodyFunc && body)39478de5b1Stbbdev template <typename PerBodyFunc> float test(PerBodyFunc&& body) {
40478de5b1Stbbdev     const std::size_t num_threads = 2 * utils::get_platform_max_threads();
41478de5b1Stbbdev     tbb::global_control concurrency(tbb::global_control::max_allowed_parallelism, num_threads);
4255f9b178SIvan Kochin     tbb::task_arena big_arena(static_cast<int>(num_threads));
43478de5b1Stbbdev 
440f0d1cb9SPavel Kumbrasev #if __TBB_USE_THREAD_SANITIZER
450f0d1cb9SPavel Kumbrasev     // Reduce execution time under Thread Sanitizer
460f0d1cb9SPavel Kumbrasev     const std::size_t repeats = 50;
47*7cee2251SJhaShweta1 #elif EMSCRIPTEN
48*7cee2251SJhaShweta1     // Reduce execution time for emscripten
49*7cee2251SJhaShweta1     const std::size_t repeats = 10;
500f0d1cb9SPavel Kumbrasev #else
51478de5b1Stbbdev     const std::size_t repeats = 100;
520f0d1cb9SPavel Kumbrasev #endif
53478de5b1Stbbdev     const std::size_t per_thread_iters = 1000;
54478de5b1Stbbdev 
55478de5b1Stbbdev     using range = std::pair<std::size_t, std::size_t>;
56478de5b1Stbbdev     using execution_trace = std::vector< std::vector<range> >;
57478de5b1Stbbdev 
58478de5b1Stbbdev     execution_trace trace(num_threads);
59478de5b1Stbbdev     for (auto& v : trace)
60478de5b1Stbbdev         v.reserve(repeats);
61478de5b1Stbbdev 
62478de5b1Stbbdev     for (std::size_t repeat = 0; repeat < repeats; ++repeat) {
63478de5b1Stbbdev         big_arena.execute([&] {
64478de5b1Stbbdev             tbb::parallel_for(
65478de5b1Stbbdev                 tbb::blocked_range<std::size_t>(0, per_thread_iters * num_threads),
66478de5b1Stbbdev                 [&](const tbb::blocked_range<std::size_t>& r) {
67478de5b1Stbbdev                     int thread_id = tbb::this_task_arena::current_thread_index();
68478de5b1Stbbdev                     trace[thread_id].emplace_back(r.begin(), r.end());
69478de5b1Stbbdev 
70478de5b1Stbbdev                     const bool is_uniform_split = r.size() == per_thread_iters;
71478de5b1Stbbdev                     CHECK_MESSAGE(is_uniform_split, "static partitioner split the range incorrectly.");
72478de5b1Stbbdev 
73478de5b1Stbbdev                     std::this_thread::yield();
74478de5b1Stbbdev 
75478de5b1Stbbdev                     std::forward<PerBodyFunc>(body)();
76478de5b1Stbbdev                 },
77478de5b1Stbbdev                 tbb::static_partitioner()
78478de5b1Stbbdev             );
79478de5b1Stbbdev         });
80478de5b1Stbbdev         // TODO:
81478de5b1Stbbdev         //   - Consider introducing an observer to guarantee the threads left the arena.
82478de5b1Stbbdev     }
83478de5b1Stbbdev 
84478de5b1Stbbdev     std::size_t range_shifts = 0;
85478de5b1Stbbdev     for (std::size_t thread_id = 0; thread_id < num_threads; ++thread_id) {
86478de5b1Stbbdev         auto trace_size = trace[thread_id].size();
87478de5b1Stbbdev         if (trace_size > 1) {
88478de5b1Stbbdev             auto previous_call_range = trace[thread_id][1];
89478de5b1Stbbdev 
90478de5b1Stbbdev             for (std::size_t invocation = 2; invocation < trace_size; ++invocation) {
91478de5b1Stbbdev                 const auto& current_call_range = trace[thread_id][invocation];
92478de5b1Stbbdev 
93478de5b1Stbbdev                 const bool is_range_changed = previous_call_range != current_call_range;
94478de5b1Stbbdev                 if (is_range_changed) {
95478de5b1Stbbdev                     previous_call_range = current_call_range;
96478de5b1Stbbdev                     // count thread changes its execution strategy
97478de5b1Stbbdev                     ++range_shifts;
98478de5b1Stbbdev                 }
99478de5b1Stbbdev             }
100478de5b1Stbbdev         }
101478de5b1Stbbdev 
102478de5b1Stbbdev #if TBB_USE_DEBUG
103478de5b1Stbbdev         WARN_MESSAGE(
104478de5b1Stbbdev             trace_size <= repeats,
105478de5b1Stbbdev             "Thread " << thread_id << " executed extra " << trace_size - repeats
106478de5b1Stbbdev             << " ranges assigned to other threads."
107478de5b1Stbbdev         );
108478de5b1Stbbdev         WARN_MESSAGE(
109478de5b1Stbbdev             trace_size >= repeats,
110478de5b1Stbbdev             "Thread " << thread_id << " executed " << repeats - trace_size
111478de5b1Stbbdev             << " fewer ranges than expected."
112478de5b1Stbbdev         );
113478de5b1Stbbdev #endif
114478de5b1Stbbdev     }
115478de5b1Stbbdev 
116478de5b1Stbbdev #if TBB_USE_DEBUG
117478de5b1Stbbdev     WARN_MESSAGE(
118478de5b1Stbbdev         range_shifts == 0,
119478de5b1Stbbdev         "Threads change subranges " << range_shifts << " times out of "
120478de5b1Stbbdev         << num_threads * repeats - num_threads << " possible."
121478de5b1Stbbdev     );
122478de5b1Stbbdev #endif
123478de5b1Stbbdev 
124478de5b1Stbbdev     return float(range_shifts) / float(repeats * num_threads);
125478de5b1Stbbdev }
126478de5b1Stbbdev 
relaxed_test()127478de5b1Stbbdev void relaxed_test() {
128478de5b1Stbbdev     float range_shifts_part = test(/*per body invocation call*/[]{});
129478de5b1Stbbdev     const float require_tolerance = 0.5f;
130478de5b1Stbbdev     // TODO: investigate why switching could happen in more than half of the cases
131478de5b1Stbbdev     WARN_MESSAGE(
132478de5b1Stbbdev         (0 <= range_shifts_part && range_shifts_part <= require_tolerance),
133478de5b1Stbbdev         "Tasks affinitization was not respected in " << range_shifts_part * 100 << "% of the cases."
134478de5b1Stbbdev     );
135478de5b1Stbbdev }
136478de5b1Stbbdev 
strict_test()137478de5b1Stbbdev void strict_test() {
138478de5b1Stbbdev     utils::SpinBarrier barrier(2 * utils::get_platform_max_threads());
139478de5b1Stbbdev     const float tolerance = 1e-5f;
140478de5b1Stbbdev     while (test(/*per body invocation call*/[&barrier] { barrier.wait(); }) > tolerance);
141478de5b1Stbbdev }
142478de5b1Stbbdev 
143478de5b1Stbbdev } // namespace task_affinity_retention
144478de5b1Stbbdev 
145478de5b1Stbbdev //! Testing affinitized tasks are not stolen
146478de5b1Stbbdev //! \brief \ref error_guessing
147478de5b1Stbbdev TEST_CASE("Threads respect task affinity") {
148478de5b1Stbbdev     task_affinity_retention::relaxed_test();
149478de5b1Stbbdev     task_affinity_retention::strict_test();
150478de5b1Stbbdev }
151cc3a9aa9SPavel Kumbrasev 
152cc3a9aa9SPavel Kumbrasev template <typename Range>
test_custom_range(int diff_mult)153cc3a9aa9SPavel Kumbrasev void test_custom_range(int diff_mult) {
154cc3a9aa9SPavel Kumbrasev     int num_trials = 100;
155cc3a9aa9SPavel Kumbrasev 
156cc3a9aa9SPavel Kumbrasev     std::vector<std::vector<std::size_t>> results(num_trials);
157cc3a9aa9SPavel Kumbrasev     oneapi::tbb::mutex results_mutex;
158cc3a9aa9SPavel Kumbrasev 
159cc3a9aa9SPavel Kumbrasev     for (int i = 0; i < num_trials; ++i) {
160cc3a9aa9SPavel Kumbrasev         oneapi::tbb::parallel_for(Range(0, int(100 * utils::get_platform_max_threads()), 1), [&] (const Range& r) {
161cc3a9aa9SPavel Kumbrasev             oneapi::tbb::mutex::scoped_lock lock(results_mutex);
162cc3a9aa9SPavel Kumbrasev             results[i].push_back(r.size());
163cc3a9aa9SPavel Kumbrasev         }, oneapi::tbb::static_partitioner{});
164cc3a9aa9SPavel Kumbrasev     }
165cc3a9aa9SPavel Kumbrasev 
166cc3a9aa9SPavel Kumbrasev     for (auto& res : results) {
167cc3a9aa9SPavel Kumbrasev         REQUIRE(res.size() == utils::get_platform_max_threads());
168cc3a9aa9SPavel Kumbrasev 
169cc3a9aa9SPavel Kumbrasev         std::size_t min_size = *std::min_element(res.begin(), res.end());
170cc3a9aa9SPavel Kumbrasev         for (auto elem : res) {
171cc3a9aa9SPavel Kumbrasev             REQUIRE(min_size * diff_mult + 2 >= elem);
172cc3a9aa9SPavel Kumbrasev         }
173cc3a9aa9SPavel Kumbrasev     }
174cc3a9aa9SPavel Kumbrasev }
175cc3a9aa9SPavel Kumbrasev 
176cc3a9aa9SPavel Kumbrasev //! \brief \ref regression
177cc3a9aa9SPavel Kumbrasev TEST_CASE("Test partitioned tasks count and size for static_partitioner") {
178cc3a9aa9SPavel Kumbrasev     class custom_range : public oneapi::tbb::blocked_range<int> {
179cc3a9aa9SPavel Kumbrasev         using base_type = oneapi::tbb::blocked_range<int>;
180cc3a9aa9SPavel Kumbrasev     public:
custom_range(int l,int r,int g)181cc3a9aa9SPavel Kumbrasev         custom_range(int l, int r, int g) : base_type(l, r, g) {}
custom_range(const custom_range & r)182cc3a9aa9SPavel Kumbrasev         custom_range(const custom_range& r) : base_type(r) {}
183cc3a9aa9SPavel Kumbrasev 
custom_range(custom_range & r,tbb::split)184cc3a9aa9SPavel Kumbrasev         custom_range(custom_range& r, tbb::split) : base_type(r, tbb::split()) {}
185cc3a9aa9SPavel Kumbrasev     };
186cc3a9aa9SPavel Kumbrasev 
187cc3a9aa9SPavel Kumbrasev     test_custom_range<custom_range>(2);
188cc3a9aa9SPavel Kumbrasev 
189cc3a9aa9SPavel Kumbrasev     class custom_range_with_psplit : public oneapi::tbb::blocked_range<int> {
190cc3a9aa9SPavel Kumbrasev         using base_type = oneapi::tbb::blocked_range<int>;
191cc3a9aa9SPavel Kumbrasev     public:
custom_range_with_psplit(int l,int r,int g)192cc3a9aa9SPavel Kumbrasev         custom_range_with_psplit(int l, int r, int g) : base_type(l, r, g) {}
custom_range_with_psplit(const custom_range_with_psplit & r)193cc3a9aa9SPavel Kumbrasev         custom_range_with_psplit(const custom_range_with_psplit& r) : base_type(r) {}
194cc3a9aa9SPavel Kumbrasev 
custom_range_with_psplit(custom_range_with_psplit & r,tbb::split)195cc3a9aa9SPavel Kumbrasev         custom_range_with_psplit(custom_range_with_psplit& r, tbb::split) : base_type(r, tbb::split()) {}
custom_range_with_psplit(custom_range_with_psplit & r,tbb::proportional_split & p)196cc3a9aa9SPavel Kumbrasev         custom_range_with_psplit(custom_range_with_psplit& r, tbb::proportional_split& p) : base_type(r, p) {}
197cc3a9aa9SPavel Kumbrasev     };
198cc3a9aa9SPavel Kumbrasev 
199cc3a9aa9SPavel Kumbrasev     test_custom_range<custom_range_with_psplit>(1);
200cc3a9aa9SPavel Kumbrasev }
201