1478de5b1Stbbdev /*
2*7cee2251SJhaShweta1 Copyright (c) 2021-2023 Intel Corporation
3478de5b1Stbbdev
4478de5b1Stbbdev Licensed under the Apache License, Version 2.0 (the "License");
5478de5b1Stbbdev you may not use this file except in compliance with the License.
6478de5b1Stbbdev You may obtain a copy of the License at
7478de5b1Stbbdev
8478de5b1Stbbdev http://www.apache.org/licenses/LICENSE-2.0
9478de5b1Stbbdev
10478de5b1Stbbdev Unless required by applicable law or agreed to in writing, software
11478de5b1Stbbdev distributed under the License is distributed on an "AS IS" BASIS,
12478de5b1Stbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13478de5b1Stbbdev See the License for the specific language governing permissions and
14478de5b1Stbbdev limitations under the License.
15478de5b1Stbbdev */
16478de5b1Stbbdev
17478de5b1Stbbdev #include "common/test.h"
18478de5b1Stbbdev
19478de5b1Stbbdev #include "tbb/parallel_for.h"
20478de5b1Stbbdev #include "tbb/task_arena.h"
21478de5b1Stbbdev #include "tbb/global_control.h"
22cc3a9aa9SPavel Kumbrasev #include "oneapi/tbb/mutex.h"
23478de5b1Stbbdev
24478de5b1Stbbdev #include "common/utils.h"
25478de5b1Stbbdev #include "common/utils_concurrency_limit.h"
26478de5b1Stbbdev #include "common/dummy_body.h"
27478de5b1Stbbdev #include "common/spin_barrier.h"
28478de5b1Stbbdev
29478de5b1Stbbdev #include <cstddef>
30478de5b1Stbbdev #include <utility>
31478de5b1Stbbdev #include <vector>
32cc3a9aa9SPavel Kumbrasev #include <algorithm> // std::min_element
33478de5b1Stbbdev
34478de5b1Stbbdev //! \file test_partitioner.cpp
35478de5b1Stbbdev //! \brief Test for [internal] functionality
36478de5b1Stbbdev
37478de5b1Stbbdev namespace task_affinity_retention {
38478de5b1Stbbdev
test(PerBodyFunc && body)39478de5b1Stbbdev template <typename PerBodyFunc> float test(PerBodyFunc&& body) {
40478de5b1Stbbdev const std::size_t num_threads = 2 * utils::get_platform_max_threads();
41478de5b1Stbbdev tbb::global_control concurrency(tbb::global_control::max_allowed_parallelism, num_threads);
4255f9b178SIvan Kochin tbb::task_arena big_arena(static_cast<int>(num_threads));
43478de5b1Stbbdev
440f0d1cb9SPavel Kumbrasev #if __TBB_USE_THREAD_SANITIZER
450f0d1cb9SPavel Kumbrasev // Reduce execution time under Thread Sanitizer
460f0d1cb9SPavel Kumbrasev const std::size_t repeats = 50;
47*7cee2251SJhaShweta1 #elif EMSCRIPTEN
48*7cee2251SJhaShweta1 // Reduce execution time for emscripten
49*7cee2251SJhaShweta1 const std::size_t repeats = 10;
500f0d1cb9SPavel Kumbrasev #else
51478de5b1Stbbdev const std::size_t repeats = 100;
520f0d1cb9SPavel Kumbrasev #endif
53478de5b1Stbbdev const std::size_t per_thread_iters = 1000;
54478de5b1Stbbdev
55478de5b1Stbbdev using range = std::pair<std::size_t, std::size_t>;
56478de5b1Stbbdev using execution_trace = std::vector< std::vector<range> >;
57478de5b1Stbbdev
58478de5b1Stbbdev execution_trace trace(num_threads);
59478de5b1Stbbdev for (auto& v : trace)
60478de5b1Stbbdev v.reserve(repeats);
61478de5b1Stbbdev
62478de5b1Stbbdev for (std::size_t repeat = 0; repeat < repeats; ++repeat) {
63478de5b1Stbbdev big_arena.execute([&] {
64478de5b1Stbbdev tbb::parallel_for(
65478de5b1Stbbdev tbb::blocked_range<std::size_t>(0, per_thread_iters * num_threads),
66478de5b1Stbbdev [&](const tbb::blocked_range<std::size_t>& r) {
67478de5b1Stbbdev int thread_id = tbb::this_task_arena::current_thread_index();
68478de5b1Stbbdev trace[thread_id].emplace_back(r.begin(), r.end());
69478de5b1Stbbdev
70478de5b1Stbbdev const bool is_uniform_split = r.size() == per_thread_iters;
71478de5b1Stbbdev CHECK_MESSAGE(is_uniform_split, "static partitioner split the range incorrectly.");
72478de5b1Stbbdev
73478de5b1Stbbdev std::this_thread::yield();
74478de5b1Stbbdev
75478de5b1Stbbdev std::forward<PerBodyFunc>(body)();
76478de5b1Stbbdev },
77478de5b1Stbbdev tbb::static_partitioner()
78478de5b1Stbbdev );
79478de5b1Stbbdev });
80478de5b1Stbbdev // TODO:
81478de5b1Stbbdev // - Consider introducing an observer to guarantee the threads left the arena.
82478de5b1Stbbdev }
83478de5b1Stbbdev
84478de5b1Stbbdev std::size_t range_shifts = 0;
85478de5b1Stbbdev for (std::size_t thread_id = 0; thread_id < num_threads; ++thread_id) {
86478de5b1Stbbdev auto trace_size = trace[thread_id].size();
87478de5b1Stbbdev if (trace_size > 1) {
88478de5b1Stbbdev auto previous_call_range = trace[thread_id][1];
89478de5b1Stbbdev
90478de5b1Stbbdev for (std::size_t invocation = 2; invocation < trace_size; ++invocation) {
91478de5b1Stbbdev const auto& current_call_range = trace[thread_id][invocation];
92478de5b1Stbbdev
93478de5b1Stbbdev const bool is_range_changed = previous_call_range != current_call_range;
94478de5b1Stbbdev if (is_range_changed) {
95478de5b1Stbbdev previous_call_range = current_call_range;
96478de5b1Stbbdev // count thread changes its execution strategy
97478de5b1Stbbdev ++range_shifts;
98478de5b1Stbbdev }
99478de5b1Stbbdev }
100478de5b1Stbbdev }
101478de5b1Stbbdev
102478de5b1Stbbdev #if TBB_USE_DEBUG
103478de5b1Stbbdev WARN_MESSAGE(
104478de5b1Stbbdev trace_size <= repeats,
105478de5b1Stbbdev "Thread " << thread_id << " executed extra " << trace_size - repeats
106478de5b1Stbbdev << " ranges assigned to other threads."
107478de5b1Stbbdev );
108478de5b1Stbbdev WARN_MESSAGE(
109478de5b1Stbbdev trace_size >= repeats,
110478de5b1Stbbdev "Thread " << thread_id << " executed " << repeats - trace_size
111478de5b1Stbbdev << " fewer ranges than expected."
112478de5b1Stbbdev );
113478de5b1Stbbdev #endif
114478de5b1Stbbdev }
115478de5b1Stbbdev
116478de5b1Stbbdev #if TBB_USE_DEBUG
117478de5b1Stbbdev WARN_MESSAGE(
118478de5b1Stbbdev range_shifts == 0,
119478de5b1Stbbdev "Threads change subranges " << range_shifts << " times out of "
120478de5b1Stbbdev << num_threads * repeats - num_threads << " possible."
121478de5b1Stbbdev );
122478de5b1Stbbdev #endif
123478de5b1Stbbdev
124478de5b1Stbbdev return float(range_shifts) / float(repeats * num_threads);
125478de5b1Stbbdev }
126478de5b1Stbbdev
relaxed_test()127478de5b1Stbbdev void relaxed_test() {
128478de5b1Stbbdev float range_shifts_part = test(/*per body invocation call*/[]{});
129478de5b1Stbbdev const float require_tolerance = 0.5f;
130478de5b1Stbbdev // TODO: investigate why switching could happen in more than half of the cases
131478de5b1Stbbdev WARN_MESSAGE(
132478de5b1Stbbdev (0 <= range_shifts_part && range_shifts_part <= require_tolerance),
133478de5b1Stbbdev "Tasks affinitization was not respected in " << range_shifts_part * 100 << "% of the cases."
134478de5b1Stbbdev );
135478de5b1Stbbdev }
136478de5b1Stbbdev
strict_test()137478de5b1Stbbdev void strict_test() {
138478de5b1Stbbdev utils::SpinBarrier barrier(2 * utils::get_platform_max_threads());
139478de5b1Stbbdev const float tolerance = 1e-5f;
140478de5b1Stbbdev while (test(/*per body invocation call*/[&barrier] { barrier.wait(); }) > tolerance);
141478de5b1Stbbdev }
142478de5b1Stbbdev
143478de5b1Stbbdev } // namespace task_affinity_retention
144478de5b1Stbbdev
145478de5b1Stbbdev //! Testing affinitized tasks are not stolen
146478de5b1Stbbdev //! \brief \ref error_guessing
147478de5b1Stbbdev TEST_CASE("Threads respect task affinity") {
148478de5b1Stbbdev task_affinity_retention::relaxed_test();
149478de5b1Stbbdev task_affinity_retention::strict_test();
150478de5b1Stbbdev }
151cc3a9aa9SPavel Kumbrasev
152cc3a9aa9SPavel Kumbrasev template <typename Range>
test_custom_range(int diff_mult)153cc3a9aa9SPavel Kumbrasev void test_custom_range(int diff_mult) {
154cc3a9aa9SPavel Kumbrasev int num_trials = 100;
155cc3a9aa9SPavel Kumbrasev
156cc3a9aa9SPavel Kumbrasev std::vector<std::vector<std::size_t>> results(num_trials);
157cc3a9aa9SPavel Kumbrasev oneapi::tbb::mutex results_mutex;
158cc3a9aa9SPavel Kumbrasev
159cc3a9aa9SPavel Kumbrasev for (int i = 0; i < num_trials; ++i) {
160cc3a9aa9SPavel Kumbrasev oneapi::tbb::parallel_for(Range(0, int(100 * utils::get_platform_max_threads()), 1), [&] (const Range& r) {
161cc3a9aa9SPavel Kumbrasev oneapi::tbb::mutex::scoped_lock lock(results_mutex);
162cc3a9aa9SPavel Kumbrasev results[i].push_back(r.size());
163cc3a9aa9SPavel Kumbrasev }, oneapi::tbb::static_partitioner{});
164cc3a9aa9SPavel Kumbrasev }
165cc3a9aa9SPavel Kumbrasev
166cc3a9aa9SPavel Kumbrasev for (auto& res : results) {
167cc3a9aa9SPavel Kumbrasev REQUIRE(res.size() == utils::get_platform_max_threads());
168cc3a9aa9SPavel Kumbrasev
169cc3a9aa9SPavel Kumbrasev std::size_t min_size = *std::min_element(res.begin(), res.end());
170cc3a9aa9SPavel Kumbrasev for (auto elem : res) {
171cc3a9aa9SPavel Kumbrasev REQUIRE(min_size * diff_mult + 2 >= elem);
172cc3a9aa9SPavel Kumbrasev }
173cc3a9aa9SPavel Kumbrasev }
174cc3a9aa9SPavel Kumbrasev }
175cc3a9aa9SPavel Kumbrasev
176cc3a9aa9SPavel Kumbrasev //! \brief \ref regression
177cc3a9aa9SPavel Kumbrasev TEST_CASE("Test partitioned tasks count and size for static_partitioner") {
178cc3a9aa9SPavel Kumbrasev class custom_range : public oneapi::tbb::blocked_range<int> {
179cc3a9aa9SPavel Kumbrasev using base_type = oneapi::tbb::blocked_range<int>;
180cc3a9aa9SPavel Kumbrasev public:
custom_range(int l,int r,int g)181cc3a9aa9SPavel Kumbrasev custom_range(int l, int r, int g) : base_type(l, r, g) {}
custom_range(const custom_range & r)182cc3a9aa9SPavel Kumbrasev custom_range(const custom_range& r) : base_type(r) {}
183cc3a9aa9SPavel Kumbrasev
custom_range(custom_range & r,tbb::split)184cc3a9aa9SPavel Kumbrasev custom_range(custom_range& r, tbb::split) : base_type(r, tbb::split()) {}
185cc3a9aa9SPavel Kumbrasev };
186cc3a9aa9SPavel Kumbrasev
187cc3a9aa9SPavel Kumbrasev test_custom_range<custom_range>(2);
188cc3a9aa9SPavel Kumbrasev
189cc3a9aa9SPavel Kumbrasev class custom_range_with_psplit : public oneapi::tbb::blocked_range<int> {
190cc3a9aa9SPavel Kumbrasev using base_type = oneapi::tbb::blocked_range<int>;
191cc3a9aa9SPavel Kumbrasev public:
custom_range_with_psplit(int l,int r,int g)192cc3a9aa9SPavel Kumbrasev custom_range_with_psplit(int l, int r, int g) : base_type(l, r, g) {}
custom_range_with_psplit(const custom_range_with_psplit & r)193cc3a9aa9SPavel Kumbrasev custom_range_with_psplit(const custom_range_with_psplit& r) : base_type(r) {}
194cc3a9aa9SPavel Kumbrasev
custom_range_with_psplit(custom_range_with_psplit & r,tbb::split)195cc3a9aa9SPavel Kumbrasev custom_range_with_psplit(custom_range_with_psplit& r, tbb::split) : base_type(r, tbb::split()) {}
custom_range_with_psplit(custom_range_with_psplit & r,tbb::proportional_split & p)196cc3a9aa9SPavel Kumbrasev custom_range_with_psplit(custom_range_with_psplit& r, tbb::proportional_split& p) : base_type(r, p) {}
197cc3a9aa9SPavel Kumbrasev };
198cc3a9aa9SPavel Kumbrasev
199cc3a9aa9SPavel Kumbrasev test_custom_range<custom_range_with_psplit>(1);
200cc3a9aa9SPavel Kumbrasev }
201