| /oneTBB/examples/parallel_for/tachyon/src/ |
| H A D | vector.cpp | 55 return (a->x * b->x + a->y * b->y + a->z * b->z); in VDot() 59 c->x = (a->y * b->z) - (a->z * b->y); in VCross() 60 c->y = (a->z * b->x) - (a->x * b->z); in VCross() 61 c->z = (a->x * b->y) - (a->y * b->x); in VCross() 65 return (flt)sqrt((a->x * a->x) + (a->y * a->y) + (a->z * a->z)); in VLength() 71 len = sqrt((a->x * a->x) + (a->y * a->y) + (a->z * a->z)); in VNorm() 100 temp.x = a->o.x + (a->d.x * t); in Raypnt() 101 temp.y = a->o.y + (a->d.y * t); in Raypnt() 102 temp.z = a->o.z + (a->d.z * t); in Raypnt() 108 a->x *= s; in VScale() [all …]
|
| H A D | macros.hpp | 53 #define MYMAX(a, b) ((a) > (b) ? (a) : (b)) argument 54 #define MYMIN(a, b) ((a) < (b) ? (a) : (b)) argument 56 #define VDOT(return, a, b) return = (a.x * b.x + a.y * b.y + a.z * b.z); argument 59 c.x = a.o.x + (a.d.x * b); \ 60 c.y = a.o.y + (a.d.y * b); \ 61 c.z = a.o.z + (a.d.z * b); 63 #define VSUB(a, b, c) \ argument 66 c.z = (a.z - b.z); 69 c->x = (a->y * b->z) - (a->z * b->y); \ 70 c->y = (a->z * b->x) - (a->x * b->z); \ [all …]
|
| H A D | box.cpp | 100 a = tx1; in box_intersect() 102 tx2 = a; in box_intersect() 122 a = ty1; in box_intersect() 124 ty2 = a; in box_intersect() 144 a = tz1; in box_intersect() 146 tz2 = a; in box_intersect() 163 vector a, b, c; in box_normal() local 181 t = MYMAX(a.x, MYMAX(a.y, a.z)); in box_normal() 183 if (t == a.x) in box_normal() 186 if (t == a.y) in box_normal() [all …]
|
| /oneTBB/test/common/ |
| H A D | allocator_stl_test_common.h | 27 Container c(a); in TestSequence() 41 void TestSet(const typename Set::allocator_type &a) { in TestSet() argument 42 Set s(typename Set::key_compare(), a); in TestSet() 52 void TestMap(const typename Map::allocator_type &a) { in TestMap() argument 53 Map m(typename Map::key_compare(), a); in TestMap() 106 TestSequence<std::deque <int,Ai> >(a); 107 TestSequence<std::list <int,Ai> >(a); 108 TestSequence<std::vector<int,Ai> >(a); 124 TestSequence<std::deque <const int,Aci> >(a); 126 TestSequence<std::list <const int,Aci> >(a); [all …]
|
| H A D | allocator_test_common.h | 147 a.construct(p, cx); in TestBrokenAllocator() 151 a.destroy(p); in TestBrokenAllocator() 153 a.deallocate(p, 1); in TestBrokenAllocator() 209 a.deallocate(p1, too_big); in TestAllocatorExceptions() 226 A &a; member 227 Body(A &a_) : a(a_) {} in Body() 270 void TestThreadSafety(A &a) { in TestThreadSafety() argument 296 Allocator a_cpy(a); 297 AllocatorFooChar a1(a); 298 AllocatorFooDouble a2(a); [all …]
|
| /oneTBB/test/tbbmalloc/ |
| H A D | test_malloc_regression.cpp | 31 tbb::scalable_allocator<char> a; in operator ()() local 32 char* str = a.allocate( size ); in operator ()() 33 a.deallocate( str, size ); in operator ()() 56 tbb::scalable_allocator<char> a; in TestBootstrapLeak() local 59 array[i] = a.allocate( alloc_size ); in TestBootstrapLeak() 88 a.deallocate( array[i], alloc_size ); in TestBootstrapLeak() 128 for (int a=0; align[a]; a++) in TestAlignedMsize() local 180 a = b; 182 sum = a+b; 184 for (size_t a=2; a<=64*1024; a*=2) { variable [all …]
|
| /oneTBB/doc/main/tbb_userguide/ |
| H A D | work_isolation.rst | 10 In |full_name|, a thread waiting for a 12 particular, when a parallel construct calls another parallel 13 construct, a thread can obtain a task from the outer-level construct 36 *unsequenced* even within a single thread. In most cases, this 42 errors. For example, a thread-local variable might unexpectedly 43 change its value after a nested parallel construct: 51 // Set a thread specific value 77 // Set a thread specific value 88 However, using a separate arena for work isolation is not always 96 When entered a task waiting call or a blocking parallel construct [all …]
|
| H A D | parallel_for_os.rst | 7 Suppose you want to apply a function ``Foo`` to each element of an 15 void SerialApplyFoo( float a[], size_t n ) { 17 Foo(a[i]); 39 float *a = my_a; 41 Foo(a[i]); 43 ApplyFoo( float a[] ) : 44 my_a(a) 68 object have a copy constructor, which is invoked to create a separate 79 whether ``operator()`` is acting on the original or a copy. As a 84 The example ``operator()`` loads ``my_a`` into a local variable ``a``. [all …]
|
| H A D | Flow_Graph_Buffering_in_Nodes.rst | 8 to communicate data and to enforce dependencies. If a node passes a 11 Broadcast-push, a message may be passed to one or to multiple 16 There are times when a node cannot successfully push a message to any 25 If a node discards messages that are not forwarded, and this behavior is 26 not desired, the node should be connected to a buffering node that does 30 If a message has been stored by a node, there are two ways it can be 39 If a ``try_get()`` successfully forwards a message, it is removed from 40 the node that stored it. If a node is connected using ``make_edge`` the 41 node will attempt to push a stored message to the new successor.
|
| H A D | Floating_Point_Settings.rst | 8 * When a ``task_arena`` or a task scheduler for a given application thread is initialized, they cap… 9 * The ``task_group_context`` class has a method to capture the current floating-point settings. 11 By default, worker threads use floating-point settings obtained during the initialization of a ``ta… 14 …r floating point behavior, a thread may capture the current settings in a task group context. Do i… 33 …aptured to a task group context prevail over the settings captured during task scheduler initializ… 34 Otherwise, if floating-point settings are not captured to the context, or a context is not explicit… 36 In a nested call to a parallel algorithm that does not use the context of a task group with explici… 41 * Floating-point settings are applied to all tasks executed within a task arena, if they are captur… 43 * To a task group context. 46 * A call to a oneTBB parallel algorithm does not change the floating-point settings of the calling … [all …]
|
| H A D | use_nested_algorithms.rst | 7 One powerful way to increase the scalability of a flow graph is to nest 8 other parallel algorithms inside of node bodies. Doing so, you can use a 9 flow graph as a coordination language, expressing the most 15 ``matrix_source``, that reads a sequence of matrices from a file, two 17 new matrices by applying a function to each element, and two final 21 expressions for ``n1`` and ``n2``, a ``parallel_for`` is used to apply the functions 32 double *a = read_next_matrix(); 33 if ( a ) { 34 return a; 43 b[i] = f1(a[i]); [all …]
|
| H A D | Nodes.rst | 7 A node is a class that inherits from oneapi::tbb::flow::graph_node and also 16 types are used to construct a graph. 20 represents a simple function with one input and one output. The 21 constructor for a ``function_node`` takes three arguments: 50 Below is code for creating a simple graph that contains a single 52 graph g, and has a second argument of 1, which allows at most 1 53 invocation of the node to occur concurrently. The body is a lambda 95 In the above example code, the function_node n was created with a 101 try_put do not block until a task is spawned; if a node cannot 124 to spawn a task as soon as a message arrives, regardless of how many [all …]
|
| H A D | Dependence_Graph.rst | 7 In a dependence graph, the nodes invoke body objects to perform 8 computations and the edges create a partial ordering of these 12 application that could be expressed using a dependence graph. 19 Dependence Graph for Making a Sandwich 30 general data flow graph, nodes in a dependence graph do not spawn a task 71 The first argument is the graph it belongs to and the second is a 72 function object or lambda expression. Unlike a function_node, a 74 immediately spawn a task whenever its dependencies are met. 90 node_t A(g, [](msg_t){ a(); } ); 117 Execution Timeline for a Dependence Graph [all …]
|
| H A D | Predefined_Node_Types.rst | 10 flow_graph.h. Below is a table that lists all of the predefined types 11 with a basic description. See the Developer Reference for a more 25 - A single-output node, with a generic output type. 26 …When activated, it executes a user body to generate its output. Its body is invoked if downstream … 31 … successors. It has a single input that requires 1 or more inputs of type continue_msg and has a… 33 …A single-input multi-output node. It has a generic input type and several generic output types.… 39 …ypes and the output type is a tuple of these generic types. The node combines one message from e… 41 …a tuple of generic types and there is one output port for each of the types in the tuple. The… 47 …essage to all of its successors. The input type is a list of generic types and the output type i… 49 …th input and output. The composite_node packages a group of other nodes together and maintain… [all …]
|
| H A D | Controlling_Chunking_os.rst | 7 Chunking is controlled by a *partitioner* and a *grainsize.*\ To gain 35 void ParallelApplyFoo( float a[], size_t n ) { 36 parallel_for(blocked_range<size_t>(0,n,G), ApplyFoo(a), 41 The grainsize sets a minimum threshold for parallelization. The 44 iterations in a chunk. Using ``simple_partitioner`` guarantees that 84 shows how too small a grainsize leads to a relatively high proportion of 89 number of processors when setting a grainsize. 103 step 3 will guide you to a much smaller value. 117 on the side of being a little too high instead of a little too low, 128 versus grainsize, based on the floating point ``a[i]=b[i]*c`` [all …]
|
| H A D | parallel_reduce.rst | 16 sum += Foo(a[i]); 29 SumFoo sf(a); 48 float *a = my_a; 52 sum += Foo(a[i]); 63 SumFoo(float a[] ) : 64 my_a(a), my_sum(0) 71 SumFoo::my_sum. Second, ``SumFoo`` has a *splitting constructor* and a 76 a copy constructor. 98 happens when a worker is available: 147 sum += Foo(a[i]); [all …]
|
| H A D | estimate_flow_graph_performance.rst | 7 The performance or scalability of a flow graph is not easy to predict. 8 However there are a few key points that can guide you in estimating the 15 .. rubric:: The Critical Path Limits the Scalability in a Dependence 19 A critical path is the most time consuming path from a node with no 20 predecessors to a node with no successors. In a dependence graph, the 21 execution of the nodes along a path cannot be overlapped since they 22 have a strict ordering. Therefore, for a dependence graph, the 29 this path cannot be overlapped even in a parallel execution. 39 .. rubric:: There is Overhead in Spawning a Node's Body as a Task 45 scheduling when estimating the time it takes for a node to execute
|
| H A D | appendix_A.rst | 8 threads. Each logical thread is serviced for a *time slice* by a 9 physical thread. If a thread runs longer than a time slice, as most do, 25 reality of set-associative caches is a bit more complicated, but this is 26 not a cache primer.) When a logical thread gets its time slice, as it 27 references a piece of data for the first time, this data will be pulled 30 cache, and only take a few cycles. Such data is called "hot in cache". 36 worse yet, the next time slice for thread A may be on a different 37 physical thread that has a different cache altogether. 40 Another cost is *lock preemption.* This happens if a thread acquires a 41 lock on a resource, and its time slice runs out before it releases the [all …]
|
| H A D | Throughput_of_pipeline.rst | 7 The throughput of a pipeline is the rate at which tokens flow through 8 it, and is limited by two constraints. First, if a pipeline is run with 11 experimentation. Too low a value limits parallelism; too high a value 13 throughput of a pipeline is limited by the throughput of the slowest 14 sequential filter. This is true even for a pipeline with no parallel 23 with files that are on a local disk, you are unlikely to see a speedup 24 much more than 2. To really benefit from a pipeline, the parallel 32 cache. A good guideline is to try for a large window size that still 33 fits in cache. You may have to experiment a bit to find a good window
|
| H A D | Reader_Writer_Mutexes.rst | 7 Mutual exclusion is necessary when at least one thread *writes* to a 8 shared variable. But it does no harm to permit multiple readers into a 12 lock on a given mutex. 15 Requests for a reader lock are distinguished from requests for a writer 17 ``scoped_lock``. The parameter is false to request a reader lock and 18 true to request a writer lock. It defaults to ``true`` so that when 19 omitted, a ``spin_rw_mutex`` or ``queuing_rw_mutex`` behaves like its
|
| /oneTBB/examples/parallel_pipeline/square/ |
| H A D | gen_input.cpp | 42 int a = 0; in gen_input() local 45 fprintf(fptr, "%u\n", a); in gen_input() 46 b += a; in gen_input() 47 a = (b - a) % 10000; in gen_input() 48 if (a < 0) in gen_input() 49 a = -a; in gen_input()
|
| /oneTBB/src/tbb/ |
| H A D | arena.cpp | 456 a.my_tc_client = control->create_client(a); in create() 459 return a; in create() 571 assert_pointer_valid(a); in terminate() 581 arena* a = td->my_arena; in attach() local 601 arena* a = ta ? in enqueue() local 611 a->enqueue_task(t, *ctx, *td); in enqueue() 812 if (a->my_max_num_workers != 0) { in wait() 813 while (a->num_workers_active() || !a->is_empty()) { in wait() 820 arena* a = nullptr; in max_concurrency() local 829 if (a->is_arena_workerless() && a->my_num_reserved_slots == 1) { in max_concurrency() [all …]
|
| H A D | task_dispatcher.cpp | 26 a->advertise_new_work<arena::work_spawned>(); in spawn_and_notify() 33 arena* a = tls->my_arena; in spawn() local 39 spawn_and_notify(t, slot, a); in spawn() 45 arena* a = tls->my_arena; in spawn() local 65 proxy->outbox = &a->mailbox(id); in spawn() 71 spawn_and_notify(*proxy, slot, a); in spawn() 73 spawn_and_notify(t, slot, a); in spawn() 79 assert_pointer_valid(a); in submit() 90 if ( tls.is_attached_to(a) ) { in submit() 215 arena* a = m_thread_data->my_arena; in co_local_wait_for_all() local [all …]
|
| /oneTBB/ |
| H A D | third-party-programs.txt | 168 …a work means to copy from or adapt all or part of the work in a fashion requiring copyright permis… 174 …a work means any kind of propagation that enables other parties to make or receive copies. Mere in… 179 …ource code" for a work means the preferred form of the work for making modifications to it. "Objec… 211 a) The work must carry prominent notices stating that you modified it, and giving a relevant date. 220 …a) Convey the object code in, or embodied in, a physical product (including a physical distributio… 221 …a physical product (including a physical distribution medium), accompanied by a written offer, val… 250 …a notice stating that it is governed by this License along with a term that is a further restricti… 252 …a covered work in accord with this section, you must place, in the relevant source files, a statem… 282 …a "patent license" is any express agreement or commitment, however denominated, not to enforce a p… 323 <one line to give the program's name and a brief idea of what it does.> [all …]
|
| /oneTBB/doc/main/tbb_userguide/design_patterns/ |
| H A D | Fenced_Data_Transfer.rst | 13 Write a message to memory and have another processor read it on 14 hardware that does not have a sequentially consistent memory model. 24 concurrently act on a memory location, or are using reads and writes 29 Modern hardware and compilers can reorder memory operations in a way 30 that preserves the order of a thread's operation from its viewpoint, 32 write a message and mark it as ready to ready as shown in the 130 semantics and release of a lock has *release* semantics. Thus a 131 thread that acquires a lock on a mutex always sees any memory writes 132 done by another thread before it released a lock on that mutex. 158 Similarly, it is a mistake to assume that a processor cannot read the [all …]
|