1 /*
2 Copyright (c) 2005-2023 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 #ifndef __TBB_partitioner_H
18 #define __TBB_partitioner_H
19
20 #ifndef __TBB_INITIAL_CHUNKS
21 // initial task divisions per thread
22 #define __TBB_INITIAL_CHUNKS 2
23 #endif
24 #ifndef __TBB_RANGE_POOL_CAPACITY
25 // maximum number of elements in range pool
26 #define __TBB_RANGE_POOL_CAPACITY 8
27 #endif
28 #ifndef __TBB_INIT_DEPTH
29 // initial value for depth of range pool
30 #define __TBB_INIT_DEPTH 5
31 #endif
32 #ifndef __TBB_DEMAND_DEPTH_ADD
33 // when imbalance is found range splits this value times more
34 #define __TBB_DEMAND_DEPTH_ADD 1
35 #endif
36
37 #include "detail/_config.h"
38 #include "detail/_namespace_injection.h"
39 #include "detail/_aligned_space.h"
40 #include "detail/_utils.h"
41 #include "detail/_template_helpers.h"
42 #include "detail/_range_common.h"
43 #include "detail/_task.h"
44 #include "detail/_small_object_pool.h"
45
46 #include "cache_aligned_allocator.h"
47 #include "task_group.h" // task_group_context
48 #include "task_arena.h"
49
50 #include <algorithm>
51 #include <atomic>
52 #include <type_traits>
53
54 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
55 // Workaround for overzealous compiler warnings
56 #pragma warning (push)
57 #pragma warning (disable: 4244)
58 #endif
59
60 namespace tbb {
61 namespace detail {
62
63 namespace d1 {
64 class auto_partitioner;
65 class simple_partitioner;
66 class static_partitioner;
67 class affinity_partitioner;
68 class affinity_partition_type;
69 class affinity_partitioner_base;
70
get_initial_auto_partitioner_divisor()71 inline std::size_t get_initial_auto_partitioner_divisor() {
72 const std::size_t factor = 4;
73 return factor * static_cast<std::size_t>(max_concurrency());
74 }
75
76 //! Defines entry point for affinity partitioner into oneTBB run-time library.
77 class affinity_partitioner_base: no_copy {
78 friend class affinity_partitioner;
79 friend class affinity_partition_type;
80 //! Array that remembers affinities of tree positions to affinity_id.
81 /** nullptr if my_size==0. */
82 slot_id* my_array;
83 //! Number of elements in my_array.
84 std::size_t my_size;
85 //! Zeros the fields.
affinity_partitioner_base()86 affinity_partitioner_base() : my_array(nullptr), my_size(0) {}
87 //! Deallocates my_array.
~affinity_partitioner_base()88 ~affinity_partitioner_base() { resize(0); }
89 //! Resize my_array.
90 /** Retains values if resulting size is the same. */
resize(unsigned factor)91 void resize(unsigned factor) {
92 // Check factor to avoid asking for number of workers while there might be no arena.
93 unsigned max_threads_in_arena = static_cast<unsigned>(max_concurrency());
94 std::size_t new_size = factor ? factor * max_threads_in_arena : 0;
95 if (new_size != my_size) {
96 if (my_array) {
97 r1::cache_aligned_deallocate(my_array);
98 // Following two assignments must be done here for sake of exception safety.
99 my_array = nullptr;
100 my_size = 0;
101 }
102 if (new_size) {
103 my_array = static_cast<slot_id*>(r1::cache_aligned_allocate(new_size * sizeof(slot_id)));
104 std::fill_n(my_array, new_size, no_slot);
105 my_size = new_size;
106 }
107 }
108 }
109 };
110
111 template<typename Range, typename Body, typename Partitioner> struct start_for;
112 template<typename Range, typename Body, typename Partitioner> struct start_scan;
113 template<typename Range, typename Body, typename Partitioner> struct start_reduce;
114 template<typename Range, typename Body, typename Partitioner> struct start_deterministic_reduce;
115
116 struct node {
117 node* my_parent{};
118 std::atomic<int> m_ref_count{};
119
120 node() = default;
nodenode121 node(node* parent, int ref_count) :
122 my_parent{parent}, m_ref_count{ref_count} {
123 __TBB_ASSERT(ref_count > 0, "The ref count must be positive");
124 }
125 };
126
127 struct wait_node : node {
wait_nodewait_node128 wait_node() : node{ nullptr, 1 } {}
129 wait_context m_wait{1};
130 };
131
132 //! Join task node that contains shared flag for stealing feedback
133 struct tree_node : public node {
134 small_object_allocator m_allocator;
135 std::atomic<bool> m_child_stolen{false};
136
tree_nodetree_node137 tree_node(node* parent, int ref_count, small_object_allocator& alloc)
138 : node{parent, ref_count}
139 , m_allocator{alloc} {}
140
jointree_node141 void join(task_group_context*) {/*dummy, required only for reduction algorithms*/};
142
143 template <typename Task>
mark_task_stolentree_node144 static void mark_task_stolen(Task &t) {
145 std::atomic<bool> &flag = static_cast<tree_node*>(t.my_parent)->m_child_stolen;
146 #if TBB_USE_PROFILING_TOOLS
147 // Threading tools respect lock prefix but report false-positive data-race via plain store
148 flag.exchange(true);
149 #else
150 flag.store(true, std::memory_order_relaxed);
151 #endif // TBB_USE_PROFILING_TOOLS
152 }
153 template <typename Task>
is_peer_stolentree_node154 static bool is_peer_stolen(Task &t) {
155 return static_cast<tree_node*>(t.my_parent)->m_child_stolen.load(std::memory_order_relaxed);
156 }
157 };
158
159 // Context used to check cancellation state during reduction join process
160 template<typename TreeNodeType>
fold_tree(node * n,const execution_data & ed)161 void fold_tree(node* n, const execution_data& ed) {
162 for (;;) {
163 __TBB_ASSERT(n, nullptr);
164 __TBB_ASSERT(n->m_ref_count.load(std::memory_order_relaxed) > 0, "The refcount must be positive.");
165 call_itt_task_notify(releasing, n);
166 if (--n->m_ref_count > 0) {
167 return;
168 }
169 node* parent = n->my_parent;
170 if (!parent) {
171 break;
172 };
173
174 call_itt_task_notify(acquired, n);
175 TreeNodeType* self = static_cast<TreeNodeType*>(n);
176 self->join(ed.context);
177 self->m_allocator.delete_object(self, ed);
178 n = parent;
179 }
180 // Finish parallel for execution when the root (last node) is reached
181 static_cast<wait_node*>(n)->m_wait.release();
182 }
183
184 //! Depth is a relative depth of recursive division inside a range pool. Relative depth allows
185 //! infinite absolute depth of the recursion for heavily unbalanced workloads with range represented
186 //! by a number that cannot fit into machine word.
187 typedef unsigned char depth_t;
188
189 //! Range pool stores ranges of type T in a circular buffer with MaxCapacity
190 template <typename T, depth_t MaxCapacity>
191 class range_vector {
192 depth_t my_head;
193 depth_t my_tail;
194 depth_t my_size;
195 depth_t my_depth[MaxCapacity]; // relative depths of stored ranges
196 tbb::detail::aligned_space<T, MaxCapacity> my_pool;
197
198 public:
199 //! initialize via first range in pool
range_vector(const T & elem)200 range_vector(const T& elem) : my_head(0), my_tail(0), my_size(1) {
201 my_depth[0] = 0;
202 new( static_cast<void *>(my_pool.begin()) ) T(elem);//TODO: std::move?
203 }
~range_vector()204 ~range_vector() {
205 while( !empty() ) pop_back();
206 }
empty()207 bool empty() const { return my_size == 0; }
size()208 depth_t size() const { return my_size; }
209 //! Populates range pool via ranges up to max depth or while divisible
210 //! max_depth starts from 0, e.g. value 2 makes 3 ranges in the pool up to two 1/4 pieces
split_to_fill(depth_t max_depth)211 void split_to_fill(depth_t max_depth) {
212 while( my_size < MaxCapacity && is_divisible(max_depth) ) {
213 depth_t prev = my_head;
214 my_head = (my_head + 1) % MaxCapacity;
215 new(my_pool.begin()+my_head) T(my_pool.begin()[prev]); // copy TODO: std::move?
216 my_pool.begin()[prev].~T(); // instead of assignment
217 new(my_pool.begin()+prev) T(my_pool.begin()[my_head], detail::split()); // do 'inverse' split
218 my_depth[my_head] = ++my_depth[prev];
219 my_size++;
220 }
221 }
pop_back()222 void pop_back() {
223 __TBB_ASSERT(my_size > 0, "range_vector::pop_back() with empty size");
224 my_pool.begin()[my_head].~T();
225 my_size--;
226 my_head = (my_head + MaxCapacity - 1) % MaxCapacity;
227 }
pop_front()228 void pop_front() {
229 __TBB_ASSERT(my_size > 0, "range_vector::pop_front() with empty size");
230 my_pool.begin()[my_tail].~T();
231 my_size--;
232 my_tail = (my_tail + 1) % MaxCapacity;
233 }
back()234 T& back() {
235 __TBB_ASSERT(my_size > 0, "range_vector::back() with empty size");
236 return my_pool.begin()[my_head];
237 }
front()238 T& front() {
239 __TBB_ASSERT(my_size > 0, "range_vector::front() with empty size");
240 return my_pool.begin()[my_tail];
241 }
242 //! similarly to front(), returns depth of the first range in the pool
front_depth()243 depth_t front_depth() {
244 __TBB_ASSERT(my_size > 0, "range_vector::front_depth() with empty size");
245 return my_depth[my_tail];
246 }
back_depth()247 depth_t back_depth() {
248 __TBB_ASSERT(my_size > 0, "range_vector::back_depth() with empty size");
249 return my_depth[my_head];
250 }
is_divisible(depth_t max_depth)251 bool is_divisible(depth_t max_depth) {
252 return back_depth() < max_depth && back().is_divisible();
253 }
254 };
255
256 //! Provides default methods for partition objects and common algorithm blocks.
257 template <typename Partition>
258 struct partition_type_base {
259 typedef detail::split split_type;
260 // decision makers
note_affinitypartition_type_base261 void note_affinity( slot_id ) {}
262 template <typename Task>
check_being_stolenpartition_type_base263 bool check_being_stolen(Task&, const execution_data&) { return false; } // part of old should_execute_range()
get_splitpartition_type_base264 template <typename Range> split_type get_split() { return split(); }
selfpartition_type_base265 Partition& self() { return *static_cast<Partition*>(this); } // CRTP helper
266
267 template<typename StartType, typename Range>
work_balancepartition_type_base268 void work_balance(StartType &start, Range &range, const execution_data&) {
269 start.run_body( range ); // static partitioner goes here
270 }
271
272 template<typename StartType, typename Range>
executepartition_type_base273 void execute(StartType &start, Range &range, execution_data& ed) {
274 // The algorithm in a few words ([]-denotes calls to decision methods of partitioner):
275 // [If this task is stolen, adjust depth and divisions if necessary, set flag].
276 // If range is divisible {
277 // Spread the work while [initial divisions left];
278 // Create trap task [if necessary];
279 // }
280 // If not divisible or [max depth is reached], execute, else do the range pool part
281 if ( range.is_divisible() ) {
282 if ( self().is_divisible() ) {
283 do { // split until is divisible
284 typename Partition::split_type split_obj = self().template get_split<Range>();
285 start.offer_work( split_obj, ed );
286 } while ( range.is_divisible() && self().is_divisible() );
287 }
288 }
289 self().work_balance(start, range, ed);
290 }
291 };
292
293 //! Provides default splitting strategy for partition objects.
294 template <typename Partition>
295 struct adaptive_mode : partition_type_base<Partition> {
296 typedef Partition my_partition;
297 std::size_t my_divisor;
298 // For affinity_partitioner, my_divisor indicates the number of affinity array indices the task reserves.
299 // A task which has only one index must produce the right split without reserved index in order to avoid
300 // it to be overwritten in note_affinity() of the created (right) task.
301 // I.e. a task created deeper than the affinity array can remember must not save its affinity (LIFO order)
302 static const unsigned factor = 1;
adaptive_modeadaptive_mode303 adaptive_mode() : my_divisor(get_initial_auto_partitioner_divisor() / 4 * my_partition::factor) {}
adaptive_modeadaptive_mode304 adaptive_mode(adaptive_mode &src, split) : my_divisor(do_split(src, split())) {}
adaptive_modeadaptive_mode305 adaptive_mode(adaptive_mode&, const proportional_split&) : my_divisor(0)
306 {
307 // left blank as my_divisor gets overridden in the successors' constructors
308 }
309 /*! Override do_split methods in order to specify splitting strategy */
do_splitadaptive_mode310 std::size_t do_split(adaptive_mode &src, split) {
311 return src.my_divisor /= 2u;
312 }
313 };
314
315
316 //! Provides proportional splitting strategy for partition objects
317 template <typename Partition>
318 struct proportional_mode : adaptive_mode<Partition> {
319 typedef Partition my_partition;
320 using partition_type_base<Partition>::self; // CRTP helper to get access to derived classes
321
proportional_modeproportional_mode322 proportional_mode() : adaptive_mode<Partition>() {}
proportional_modeproportional_mode323 proportional_mode(proportional_mode &src, split) : adaptive_mode<Partition>(src, split()) {}
proportional_modeproportional_mode324 proportional_mode(proportional_mode &src, const proportional_split& split_obj)
325 : adaptive_mode<Partition>(src, split_obj)
326 {
327 self().my_divisor = do_split(src, split_obj);
328 }
do_splitproportional_mode329 std::size_t do_split(proportional_mode &src, const proportional_split& split_obj) {
330 std::size_t portion = split_obj.right() * my_partition::factor;
331 portion = (portion + my_partition::factor/2) & (0ul - my_partition::factor);
332 src.my_divisor -= portion;
333 return portion;
334 }
is_divisibleproportional_mode335 bool is_divisible() { // part of old should_execute_range()
336 return self().my_divisor > my_partition::factor;
337 }
338 template <typename Range>
get_splitproportional_mode339 proportional_split get_split() {
340 // Create the proportion from partitioner internal resources (threads) that would be used:
341 // - into proportional_mode constructor to split the partitioner
342 // - if Range supports the proportional_split constructor it would use proposed proportion,
343 // otherwise, the tbb::proportional_split object will be implicitly (for Range implementer)
344 // casted to tbb::split
345
346 std::size_t n = self().my_divisor / my_partition::factor;
347 std::size_t right = n / 2;
348 std::size_t left = n - right;
349 return proportional_split(left, right);
350 }
351 };
352
get_initial_partition_head()353 static std::size_t get_initial_partition_head() {
354 int current_index = tbb::this_task_arena::current_thread_index();
355 if (current_index == tbb::task_arena::not_initialized)
356 current_index = 0;
357 return size_t(current_index);
358 }
359
360 //! Provides default linear indexing of partitioner's sequence
361 template <typename Partition>
362 struct linear_affinity_mode : proportional_mode<Partition> {
363 std::size_t my_head;
364 std::size_t my_max_affinity;
365 using proportional_mode<Partition>::self;
linear_affinity_modelinear_affinity_mode366 linear_affinity_mode() : proportional_mode<Partition>(), my_head(get_initial_partition_head()),
367 my_max_affinity(self().my_divisor) {}
linear_affinity_modelinear_affinity_mode368 linear_affinity_mode(linear_affinity_mode &src, split) : proportional_mode<Partition>(src, split())
369 , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {}
linear_affinity_modelinear_affinity_mode370 linear_affinity_mode(linear_affinity_mode &src, const proportional_split& split_obj) : proportional_mode<Partition>(src, split_obj)
371 , my_head((src.my_head + src.my_divisor) % src.my_max_affinity), my_max_affinity(src.my_max_affinity) {}
spawn_tasklinear_affinity_mode372 void spawn_task(task& t, task_group_context& ctx) {
373 if (self().my_divisor) {
374 spawn(t, ctx, slot_id(my_head));
375 } else {
376 spawn(t, ctx);
377 }
378 }
379 };
380
is_stolen_task(const execution_data & ed)381 static bool is_stolen_task(const execution_data& ed) {
382 return execution_slot(ed) != original_slot(ed);
383 }
384
385 /*! Determine work-balance phase implementing splitting & stealing actions */
386 template<class Mode>
387 struct dynamic_grainsize_mode : Mode {
388 using Mode::self;
389 enum {
390 begin = 0,
391 run,
392 pass
393 } my_delay;
394 depth_t my_max_depth;
395 static const unsigned range_pool_size = __TBB_RANGE_POOL_CAPACITY;
dynamic_grainsize_modedynamic_grainsize_mode396 dynamic_grainsize_mode(): Mode()
397 , my_delay(begin)
398 , my_max_depth(__TBB_INIT_DEPTH) {}
dynamic_grainsize_modedynamic_grainsize_mode399 dynamic_grainsize_mode(dynamic_grainsize_mode& p, split)
400 : Mode(p, split())
401 , my_delay(pass)
402 , my_max_depth(p.my_max_depth) {}
dynamic_grainsize_modedynamic_grainsize_mode403 dynamic_grainsize_mode(dynamic_grainsize_mode& p, const proportional_split& split_obj)
404 : Mode(p, split_obj)
405 , my_delay(begin)
406 , my_max_depth(p.my_max_depth) {}
407 template <typename Task>
check_being_stolendynamic_grainsize_mode408 bool check_being_stolen(Task &t, const execution_data& ed) { // part of old should_execute_range()
409 if( !(self().my_divisor / Mode::my_partition::factor) ) { // if not from the top P tasks of binary tree
410 self().my_divisor = 1; // TODO: replace by on-stack flag (partition_state's member)?
411 if( is_stolen_task(ed) && t.my_parent->m_ref_count >= 2 ) { // runs concurrently with the left task
412 #if __TBB_USE_OPTIONAL_RTTI
413 // RTTI is available, check whether the cast is valid
414 // TODO: TBB_REVAMP_TODO __TBB_ASSERT(dynamic_cast<tree_node*>(t.m_parent), 0);
415 // correctness of the cast relies on avoiding the root task for which:
416 // - initial value of my_divisor != 0 (protected by separate assertion)
417 // - is_stolen_task() always returns false for the root task.
418 #endif
419 tree_node::mark_task_stolen(t);
420 if( !my_max_depth ) my_max_depth++;
421 my_max_depth += __TBB_DEMAND_DEPTH_ADD;
422 return true;
423 }
424 }
425 return false;
426 }
max_depthdynamic_grainsize_mode427 depth_t max_depth() { return my_max_depth; }
align_depthdynamic_grainsize_mode428 void align_depth(depth_t base) {
429 __TBB_ASSERT(base <= my_max_depth, nullptr);
430 my_max_depth -= base;
431 }
432 template<typename StartType, typename Range>
work_balancedynamic_grainsize_mode433 void work_balance(StartType &start, Range &range, execution_data& ed) {
434 if( !range.is_divisible() || !self().max_depth() ) {
435 start.run_body( range );
436 }
437 else { // do range pool
438 range_vector<Range, range_pool_size> range_pool(range);
439 do {
440 range_pool.split_to_fill(self().max_depth()); // fill range pool
441 if( self().check_for_demand( start ) ) {
442 if( range_pool.size() > 1 ) {
443 start.offer_work( range_pool.front(), range_pool.front_depth(), ed );
444 range_pool.pop_front();
445 continue;
446 }
447 if( range_pool.is_divisible(self().max_depth()) ) // was not enough depth to fork a task
448 continue; // note: next split_to_fill() should split range at least once
449 }
450 start.run_body( range_pool.back() );
451 range_pool.pop_back();
452 } while( !range_pool.empty() && !ed.context->is_group_execution_cancelled() );
453 }
454 }
455 template <typename Task>
check_for_demanddynamic_grainsize_mode456 bool check_for_demand(Task& t) {
457 if ( pass == my_delay ) {
458 if ( self().my_divisor > 1 ) // produce affinitized tasks while they have slot in array
459 return true; // do not do my_max_depth++ here, but be sure range_pool is splittable once more
460 else if ( self().my_divisor && my_max_depth ) { // make balancing task
461 self().my_divisor = 0; // once for each task; depth will be decreased in align_depth()
462 return true;
463 }
464 else if ( tree_node::is_peer_stolen(t) ) {
465 my_max_depth += __TBB_DEMAND_DEPTH_ADD;
466 return true;
467 }
468 } else if( begin == my_delay ) {
469 my_delay = pass;
470 }
471 return false;
472 }
473 };
474
475 class auto_partition_type: public dynamic_grainsize_mode<adaptive_mode<auto_partition_type> > {
476 public:
auto_partition_type(const auto_partitioner &)477 auto_partition_type( const auto_partitioner& ) {
478 my_divisor *= __TBB_INITIAL_CHUNKS;
479 }
auto_partition_type(auto_partition_type & src,split)480 auto_partition_type( auto_partition_type& src, split)
481 : dynamic_grainsize_mode<adaptive_mode<auto_partition_type> >(src, split()) {}
is_divisible()482 bool is_divisible() { // part of old should_execute_range()
483 if( my_divisor > 1 ) return true;
484 if( my_divisor && my_max_depth ) { // can split the task. TODO: on-stack flag instead
485 // keep same fragmentation while splitting for the local task pool
486 my_max_depth--;
487 my_divisor = 0; // decrease max_depth once per task
488 return true;
489 } else return false;
490 }
491 template <typename Task>
check_for_demand(Task & t)492 bool check_for_demand(Task& t) {
493 if (tree_node::is_peer_stolen(t)) {
494 my_max_depth += __TBB_DEMAND_DEPTH_ADD;
495 return true;
496 } else return false;
497 }
spawn_task(task & t,task_group_context & ctx)498 void spawn_task(task& t, task_group_context& ctx) {
499 spawn(t, ctx);
500 }
501 };
502
503 class simple_partition_type: public partition_type_base<simple_partition_type> {
504 public:
simple_partition_type(const simple_partitioner &)505 simple_partition_type( const simple_partitioner& ) {}
simple_partition_type(const simple_partition_type &,split)506 simple_partition_type( const simple_partition_type&, split ) {}
507 //! simplified algorithm
508 template<typename StartType, typename Range>
execute(StartType & start,Range & range,execution_data & ed)509 void execute(StartType &start, Range &range, execution_data& ed) {
510 split_type split_obj = split(); // start.offer_work accepts split_type as reference
511 while( range.is_divisible() )
512 start.offer_work( split_obj, ed );
513 start.run_body( range );
514 }
spawn_task(task & t,task_group_context & ctx)515 void spawn_task(task& t, task_group_context& ctx) {
516 spawn(t, ctx);
517 }
518 };
519
520 class static_partition_type : public linear_affinity_mode<static_partition_type> {
521 public:
522 typedef detail::proportional_split split_type;
static_partition_type(const static_partitioner &)523 static_partition_type( const static_partitioner& ) {}
static_partition_type(static_partition_type & p,const proportional_split & split_obj)524 static_partition_type( static_partition_type& p, const proportional_split& split_obj )
525 : linear_affinity_mode<static_partition_type>(p, split_obj) {}
526 };
527
528 class affinity_partition_type : public dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> > {
529 static const unsigned factor_power = 4; // TODO: get a unified formula based on number of computing units
530 slot_id* my_array;
531 public:
532 static const unsigned factor = 1 << factor_power; // number of slots in affinity array per task
533 typedef detail::proportional_split split_type;
affinity_partition_type(affinity_partitioner_base & ap)534 affinity_partition_type( affinity_partitioner_base& ap ) {
535 __TBB_ASSERT( (factor&(factor-1))==0, "factor must be power of two" );
536 ap.resize(factor);
537 my_array = ap.my_array;
538 my_max_depth = factor_power + 1;
539 __TBB_ASSERT( my_max_depth < __TBB_RANGE_POOL_CAPACITY, nullptr );
540 }
affinity_partition_type(affinity_partition_type & p,split)541 affinity_partition_type(affinity_partition_type& p, split)
542 : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split())
543 , my_array(p.my_array) {}
affinity_partition_type(affinity_partition_type & p,const proportional_split & split_obj)544 affinity_partition_type(affinity_partition_type& p, const proportional_split& split_obj)
545 : dynamic_grainsize_mode<linear_affinity_mode<affinity_partition_type> >(p, split_obj)
546 , my_array(p.my_array) {}
note_affinity(slot_id id)547 void note_affinity(slot_id id) {
548 if( my_divisor )
549 my_array[my_head] = id;
550 }
spawn_task(task & t,task_group_context & ctx)551 void spawn_task(task& t, task_group_context& ctx) {
552 if (my_divisor) {
553 if (!my_array[my_head]) {
554 // TODO: consider new ideas with my_array for both affinity and static partitioner's, then code reuse
555 spawn(t, ctx, slot_id(my_head / factor));
556 } else {
557 spawn(t, ctx, my_array[my_head]);
558 }
559 } else {
560 spawn(t, ctx);
561 }
562 }
563 };
564
565 //! A simple partitioner
566 /** Divides the range until the range is not divisible.
567 @ingroup algorithms */
568 class simple_partitioner {
569 public:
simple_partitioner()570 simple_partitioner() {}
571 private:
572 template<typename Range, typename Body, typename Partitioner> friend struct start_for;
573 template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
574 template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
575 template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
576 // new implementation just extends existing interface
577 typedef simple_partition_type task_partition_type;
578 // TODO: consider to make split_type public
579 typedef simple_partition_type::split_type split_type;
580
581 // for parallel_scan only
582 class partition_type {
583 public:
should_execute_range(const execution_data &)584 bool should_execute_range(const execution_data& ) {return false;}
partition_type(const simple_partitioner &)585 partition_type( const simple_partitioner& ) {}
partition_type(const partition_type &,split)586 partition_type( const partition_type&, split ) {}
587 };
588 };
589
590 //! An auto partitioner
591 /** The range is initial divided into several large chunks.
592 Chunks are further subdivided into smaller pieces if demand detected and they are divisible.
593 @ingroup algorithms */
594 class auto_partitioner {
595 public:
auto_partitioner()596 auto_partitioner() {}
597
598 private:
599 template<typename Range, typename Body, typename Partitioner> friend struct start_for;
600 template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
601 template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
602 template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
603 // new implementation just extends existing interface
604 typedef auto_partition_type task_partition_type;
605 // TODO: consider to make split_type public
606 typedef auto_partition_type::split_type split_type;
607
608 //! Backward-compatible partition for auto and affinity partition objects.
609 class partition_type {
610 size_t num_chunks;
611 static const size_t VICTIM_CHUNKS = 4;
612 public:
should_execute_range(const execution_data & ed)613 bool should_execute_range(const execution_data& ed) {
614 if( num_chunks<VICTIM_CHUNKS && is_stolen_task(ed) )
615 num_chunks = VICTIM_CHUNKS;
616 return num_chunks==1;
617 }
partition_type(const auto_partitioner &)618 partition_type( const auto_partitioner& )
619 : num_chunks(get_initial_auto_partitioner_divisor()*__TBB_INITIAL_CHUNKS/4) {}
partition_type(partition_type & pt,split)620 partition_type( partition_type& pt, split ) {
621 num_chunks = pt.num_chunks = (pt.num_chunks+1u) / 2u;
622 }
623 };
624 };
625
626 //! A static partitioner
627 class static_partitioner {
628 public:
static_partitioner()629 static_partitioner() {}
630 private:
631 template<typename Range, typename Body, typename Partitioner> friend struct start_for;
632 template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
633 template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
634 template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
635 // new implementation just extends existing interface
636 typedef static_partition_type task_partition_type;
637 // TODO: consider to make split_type public
638 typedef static_partition_type::split_type split_type;
639 };
640
641 //! An affinity partitioner
642 class affinity_partitioner : affinity_partitioner_base {
643 public:
affinity_partitioner()644 affinity_partitioner() {}
645
646 private:
647 template<typename Range, typename Body, typename Partitioner> friend struct start_for;
648 template<typename Range, typename Body, typename Partitioner> friend struct start_reduce;
649 template<typename Range, typename Body, typename Partitioner> friend struct start_deterministic_reduce;
650 template<typename Range, typename Body, typename Partitioner> friend struct start_scan;
651 // new implementation just extends existing interface
652 typedef affinity_partition_type task_partition_type;
653 // TODO: consider to make split_type public
654 typedef affinity_partition_type::split_type split_type;
655 };
656
657 } // namespace d1
658 } // namespace detail
659
660 inline namespace v1 {
661 // Partitioners
662 using detail::d1::auto_partitioner;
663 using detail::d1::simple_partitioner;
664 using detail::d1::static_partitioner;
665 using detail::d1::affinity_partitioner;
666 // Split types
667 using detail::split;
668 using detail::proportional_split;
669 } // namespace v1
670
671 } // namespace tbb
672
673 #if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
674 #pragma warning (pop)
675 #endif // warning 4244 is back
676
677 #undef __TBB_INITIAL_CHUNKS
678 #undef __TBB_RANGE_POOL_CAPACITY
679 #undef __TBB_INIT_DEPTH
680
681 #endif /* __TBB_partitioner_H */
682