1d86ed7fbStbbdev /*
2*c21e688aSSergey Zheltov Copyright (c) 2005-2022 Intel Corporation
3d86ed7fbStbbdev
4d86ed7fbStbbdev Licensed under the Apache License, Version 2.0 (the "License");
5d86ed7fbStbbdev you may not use this file except in compliance with the License.
6d86ed7fbStbbdev You may obtain a copy of the License at
7d86ed7fbStbbdev
8d86ed7fbStbbdev http://www.apache.org/licenses/LICENSE-2.0
9d86ed7fbStbbdev
10d86ed7fbStbbdev Unless required by applicable law or agreed to in writing, software
11d86ed7fbStbbdev distributed under the License is distributed on an "AS IS" BASIS,
12d86ed7fbStbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13d86ed7fbStbbdev See the License for the specific language governing permissions and
14d86ed7fbStbbdev limitations under the License.
15d86ed7fbStbbdev */
16d86ed7fbStbbdev
17d86ed7fbStbbdev /* Example program that computes Fibonacci numbers in different ways.
18d86ed7fbStbbdev Arguments are: [ Number [Threads [Repeats]]]
19d86ed7fbStbbdev The defaults are Number=500 Threads=1:4 Repeats=1.
20d86ed7fbStbbdev
21d86ed7fbStbbdev The point of this program is to check that the library is working properly.
22d86ed7fbStbbdev Most of the computations are deliberately silly and not expected to
23d86ed7fbStbbdev show any speedup on multiprocessors.
24d86ed7fbStbbdev */
25d86ed7fbStbbdev
26d86ed7fbStbbdev // enable assertions
27d86ed7fbStbbdev #ifdef NDEBUG
28d86ed7fbStbbdev #undef NDEBUG
29d86ed7fbStbbdev #endif
30d86ed7fbStbbdev
31d86ed7fbStbbdev #include <cstdio>
32d86ed7fbStbbdev #include <cstdlib>
33d86ed7fbStbbdev #include <cassert>
34d86ed7fbStbbdev
35d86ed7fbStbbdev #include <utility>
36d86ed7fbStbbdev #include <thread>
37d86ed7fbStbbdev #include <atomic>
38d86ed7fbStbbdev #include <mutex>
39d86ed7fbStbbdev
40d86ed7fbStbbdev #include "oneapi/tbb/tick_count.h"
41d86ed7fbStbbdev #include "oneapi/tbb/blocked_range.h"
42d86ed7fbStbbdev #include "oneapi/tbb/concurrent_vector.h"
43d86ed7fbStbbdev #include "oneapi/tbb/concurrent_queue.h"
44d86ed7fbStbbdev #include "oneapi/tbb/concurrent_hash_map.h"
45d86ed7fbStbbdev #include "oneapi/tbb/parallel_for.h"
46d86ed7fbStbbdev #include "oneapi/tbb/parallel_reduce.h"
47d86ed7fbStbbdev #include "oneapi/tbb/parallel_scan.h"
48d86ed7fbStbbdev #include "oneapi/tbb/parallel_pipeline.h"
49d86ed7fbStbbdev #include "oneapi/tbb/spin_mutex.h"
50d86ed7fbStbbdev #include "oneapi/tbb/queuing_mutex.h"
51d86ed7fbStbbdev #include "oneapi/tbb/global_control.h"
52d86ed7fbStbbdev
53d86ed7fbStbbdev //! type used for Fibonacci number computations
54d86ed7fbStbbdev typedef long long value;
55d86ed7fbStbbdev
56d86ed7fbStbbdev //! Matrix 2x2 class
57d86ed7fbStbbdev struct Matrix2x2 {
58d86ed7fbStbbdev //! Array of values
59d86ed7fbStbbdev value v[2][2];
Matrix2x2Matrix2x260d86ed7fbStbbdev Matrix2x2() {}
Matrix2x2Matrix2x261d86ed7fbStbbdev Matrix2x2(value v00, value v01, value v10, value v11) {
62d86ed7fbStbbdev v[0][0] = v00;
63d86ed7fbStbbdev v[0][1] = v01;
64d86ed7fbStbbdev v[1][0] = v10;
65d86ed7fbStbbdev v[1][1] = v11;
66d86ed7fbStbbdev }
67d86ed7fbStbbdev Matrix2x2 operator*(const Matrix2x2 &to) const; //< Multiply two Matrices
68d86ed7fbStbbdev };
69d86ed7fbStbbdev //! Identity matrix
70d86ed7fbStbbdev static const Matrix2x2 MatrixIdentity(1, 0, 0, 1);
71d86ed7fbStbbdev //! Default matrix to multiply
72d86ed7fbStbbdev static const Matrix2x2 Matrix1110(1, 1, 1, 0);
73d86ed7fbStbbdev //! Raw arrays matrices multiply
74d86ed7fbStbbdev void Matrix2x2Multiply(const value a[2][2], const value b[2][2], value c[2][2]);
75d86ed7fbStbbdev
76d86ed7fbStbbdev /////////////////////// Serial methods ////////////////////////
77d86ed7fbStbbdev
78d86ed7fbStbbdev //! Plain serial sum
SerialFib(int n)79d86ed7fbStbbdev value SerialFib(int n) {
80d86ed7fbStbbdev if (n < 2)
81d86ed7fbStbbdev return n;
82d86ed7fbStbbdev value a = 0, b = 1, sum;
83d86ed7fbStbbdev int i;
84d86ed7fbStbbdev for (i = 2; i <= n; i++) { // n is really index of Fibonacci number
85d86ed7fbStbbdev sum = a + b;
86d86ed7fbStbbdev a = b;
87d86ed7fbStbbdev b = sum;
88d86ed7fbStbbdev }
89d86ed7fbStbbdev return sum;
90d86ed7fbStbbdev }
91d86ed7fbStbbdev //! Serial n-1 matrices multiplication
SerialMatrixFib(int n)92d86ed7fbStbbdev value SerialMatrixFib(int n) {
93d86ed7fbStbbdev value c[2][2], a[2][2] = { { 1, 1 }, { 1, 0 } }, b[2][2] = { { 1, 1 }, { 1, 0 } };
94d86ed7fbStbbdev int i;
95d86ed7fbStbbdev for (i = 2; i < n; i++) { // Using condition to prevent copying of values
96d86ed7fbStbbdev if (i & 1)
97d86ed7fbStbbdev Matrix2x2Multiply(a, c, b);
98d86ed7fbStbbdev else
99d86ed7fbStbbdev Matrix2x2Multiply(a, b, c);
100d86ed7fbStbbdev }
101d86ed7fbStbbdev return (i & 1) ? c[0][0] : b[0][0]; // get result from upper left cell
102d86ed7fbStbbdev }
103d86ed7fbStbbdev //! Recursive summing. Just for complete list of serial algorithms, not used
SerialRecursiveFib(int n)104d86ed7fbStbbdev value SerialRecursiveFib(int n) {
105d86ed7fbStbbdev value result;
106d86ed7fbStbbdev if (n < 2)
107d86ed7fbStbbdev result = n;
108d86ed7fbStbbdev else
109d86ed7fbStbbdev result = SerialRecursiveFib(n - 1) + SerialRecursiveFib(n - 2);
110d86ed7fbStbbdev return result;
111d86ed7fbStbbdev }
112b15aabb3Stbbdev
113b15aabb3Stbbdev // GCC 4.8 C++ standard library implements std::this_thread::yield as no-op.
114b15aabb3Stbbdev #if __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN
yield()115b15aabb3Stbbdev static inline void yield() {
116b15aabb3Stbbdev sched_yield();
117b15aabb3Stbbdev }
118b15aabb3Stbbdev #else
119b15aabb3Stbbdev using std::this_thread::yield;
120b15aabb3Stbbdev #endif
121b15aabb3Stbbdev
122d86ed7fbStbbdev //! Introducing of queue method in serial
SerialQueueFib(int n)123d86ed7fbStbbdev value SerialQueueFib(int n) {
124d86ed7fbStbbdev oneapi::tbb::concurrent_queue<Matrix2x2> Q;
125d86ed7fbStbbdev for (int i = 1; i < n; i++)
126d86ed7fbStbbdev Q.push(Matrix1110);
127d86ed7fbStbbdev Matrix2x2 A, B;
128d86ed7fbStbbdev while (true) {
129d86ed7fbStbbdev while (!Q.try_pop(A))
130b15aabb3Stbbdev yield();
131d86ed7fbStbbdev if (Q.empty())
132d86ed7fbStbbdev break;
133d86ed7fbStbbdev while (!Q.try_pop(B))
134b15aabb3Stbbdev yield();
135d86ed7fbStbbdev Q.push(A * B);
136d86ed7fbStbbdev }
137d86ed7fbStbbdev return A.v[0][0];
138d86ed7fbStbbdev }
139d86ed7fbStbbdev //! Trying to use concurrent_vector
SerialVectorFib(int n)140d86ed7fbStbbdev value SerialVectorFib(int n) {
141d86ed7fbStbbdev oneapi::tbb::concurrent_vector<value> A;
142d86ed7fbStbbdev A.grow_by(2);
143d86ed7fbStbbdev A[0] = 0;
144d86ed7fbStbbdev A[1] = 1;
145d86ed7fbStbbdev for (int i = 2; i <= n; i++) {
146d86ed7fbStbbdev A.grow_to_at_least(i + 1);
147d86ed7fbStbbdev A[i] = A[i - 1] + A[i - 2];
148d86ed7fbStbbdev }
149d86ed7fbStbbdev return A[n];
150d86ed7fbStbbdev }
151d86ed7fbStbbdev
152d86ed7fbStbbdev ///////////////////// Parallel methods ////////////////////////
153d86ed7fbStbbdev
154d86ed7fbStbbdev // *** Serial shared by mutexes *** //
155d86ed7fbStbbdev
156d86ed7fbStbbdev //! Shared glabals
157d86ed7fbStbbdev value SharedA = 0, SharedB = 1;
158d86ed7fbStbbdev int SharedI = 1, SharedN;
159d86ed7fbStbbdev
160d86ed7fbStbbdev //! Template task class which computes Fibonacci numbers with shared globals
161d86ed7fbStbbdev template <typename M>
162d86ed7fbStbbdev class SharedSerialFibBody {
163d86ed7fbStbbdev M &mutex;
164d86ed7fbStbbdev
165d86ed7fbStbbdev public:
SharedSerialFibBody(M & m)166d86ed7fbStbbdev SharedSerialFibBody(M &m) : mutex(m) {}
167d86ed7fbStbbdev //! main loop
operator ()(const oneapi::tbb::blocked_range<int> & range) const168d86ed7fbStbbdev void operator()(const oneapi::tbb::blocked_range<int> &range) const {
169d86ed7fbStbbdev for (;;) {
170d86ed7fbStbbdev typename M::scoped_lock lock(mutex);
171d86ed7fbStbbdev if (SharedI >= SharedN)
172d86ed7fbStbbdev break;
173d86ed7fbStbbdev value sum = SharedA + SharedB;
174d86ed7fbStbbdev SharedA = SharedB;
175d86ed7fbStbbdev SharedB = sum;
176d86ed7fbStbbdev ++SharedI;
177d86ed7fbStbbdev }
178d86ed7fbStbbdev }
179d86ed7fbStbbdev };
180d86ed7fbStbbdev
181d86ed7fbStbbdev template <>
operator ()(const oneapi::tbb::blocked_range<int> & range) const182d86ed7fbStbbdev void SharedSerialFibBody<std::mutex>::operator()(
183d86ed7fbStbbdev const oneapi::tbb::blocked_range<int> &range) const {
184d86ed7fbStbbdev for (;;) {
185d86ed7fbStbbdev std::lock_guard<std::mutex> lock(mutex);
186d86ed7fbStbbdev if (SharedI >= SharedN)
187d86ed7fbStbbdev break;
188d86ed7fbStbbdev value sum = SharedA + SharedB;
189d86ed7fbStbbdev SharedA = SharedB;
190d86ed7fbStbbdev SharedB = sum;
191d86ed7fbStbbdev ++SharedI;
192d86ed7fbStbbdev }
193d86ed7fbStbbdev }
194d86ed7fbStbbdev
195d86ed7fbStbbdev //! Root function
196d86ed7fbStbbdev template <class M>
SharedSerialFib(int n)197d86ed7fbStbbdev value SharedSerialFib(int n) {
198d86ed7fbStbbdev SharedA = 0;
199d86ed7fbStbbdev SharedB = 1;
200d86ed7fbStbbdev SharedI = 1;
201d86ed7fbStbbdev SharedN = n;
202d86ed7fbStbbdev M mutex;
203d86ed7fbStbbdev parallel_for(oneapi::tbb::blocked_range<int>(0, 4, 1), SharedSerialFibBody<M>(mutex));
204d86ed7fbStbbdev return SharedB;
205d86ed7fbStbbdev }
206d86ed7fbStbbdev
207d86ed7fbStbbdev // *** Serial shared by concurrent hash map *** //
208d86ed7fbStbbdev
209d86ed7fbStbbdev //! Hash comparer
210d86ed7fbStbbdev struct IntHashCompare {
equalIntHashCompare211d86ed7fbStbbdev bool equal(const int j, const int k) const {
212d86ed7fbStbbdev return j == k;
213d86ed7fbStbbdev }
hashIntHashCompare2144a23d002Skboyarinov std::size_t hash(const int k) const {
2154a23d002Skboyarinov return (std::size_t)k;
216d86ed7fbStbbdev }
217d86ed7fbStbbdev };
218d86ed7fbStbbdev //! NumbersTable type based on concurrent_hash_map
219d86ed7fbStbbdev typedef oneapi::tbb::concurrent_hash_map<int, value, IntHashCompare> NumbersTable;
220d86ed7fbStbbdev //! task for serial method using shared concurrent_hash_map
221d86ed7fbStbbdev class ConcurrentHashSerialFibTask {
222d86ed7fbStbbdev NumbersTable &Fib;
223d86ed7fbStbbdev int my_n;
224d86ed7fbStbbdev
225d86ed7fbStbbdev public:
226d86ed7fbStbbdev //! constructor
ConcurrentHashSerialFibTask(NumbersTable & cht,int n)227d86ed7fbStbbdev ConcurrentHashSerialFibTask(NumbersTable &cht, int n) : Fib(cht), my_n(n) {}
228d86ed7fbStbbdev //! executing task
operator ()() const229d86ed7fbStbbdev void operator()() const {
230d86ed7fbStbbdev for (int i = 2; i <= my_n; ++i) { // there is no difference in to recycle or to make loop
231d86ed7fbStbbdev NumbersTable::const_accessor f1, f2; // same as iterators
232d86ed7fbStbbdev if (!Fib.find(f1, i - 1) || !Fib.find(f2, i - 2)) {
233d86ed7fbStbbdev // Something is seriously wrong, because i-1 and i-2 must have been inserted
234d86ed7fbStbbdev // earlier by this thread or another thread.
235d86ed7fbStbbdev assert(0);
236d86ed7fbStbbdev }
237d86ed7fbStbbdev value sum = f1->second + f2->second;
238d86ed7fbStbbdev NumbersTable::const_accessor fsum;
239d86ed7fbStbbdev Fib.insert(fsum, std::make_pair(i, sum)); // inserting
240d86ed7fbStbbdev assert(fsum->second == sum); // check value
241d86ed7fbStbbdev }
242d86ed7fbStbbdev }
243d86ed7fbStbbdev };
244d86ed7fbStbbdev
245d86ed7fbStbbdev //! Root function
ConcurrentHashSerialFib(int n)246d86ed7fbStbbdev value ConcurrentHashSerialFib(int n) {
247d86ed7fbStbbdev NumbersTable Fib;
248d86ed7fbStbbdev bool okay;
249d86ed7fbStbbdev okay = Fib.insert(std::make_pair(0, 0));
250d86ed7fbStbbdev assert(okay); // assign initial values
251d86ed7fbStbbdev okay = Fib.insert(std::make_pair(1, 1));
252d86ed7fbStbbdev assert(okay);
253d86ed7fbStbbdev
254d86ed7fbStbbdev // task_list list;
255d86ed7fbStbbdev oneapi::tbb::task_group tg;
256d86ed7fbStbbdev // allocate tasks
257d86ed7fbStbbdev tg.run(ConcurrentHashSerialFibTask(Fib, n));
258d86ed7fbStbbdev tg.run(ConcurrentHashSerialFibTask(Fib, n));
259d86ed7fbStbbdev tg.wait();
260d86ed7fbStbbdev NumbersTable::const_accessor fresult;
261d86ed7fbStbbdev okay = Fib.find(fresult, n);
262d86ed7fbStbbdev assert(okay);
263d86ed7fbStbbdev return fresult->second;
264d86ed7fbStbbdev }
265d86ed7fbStbbdev
266d86ed7fbStbbdev // *** Queue with parallel_pipeline *** //
267d86ed7fbStbbdev
268d86ed7fbStbbdev typedef oneapi::tbb::concurrent_queue<Matrix2x2> queue_t;
269d86ed7fbStbbdev namespace parallel_pipeline_ns {
270d86ed7fbStbbdev std::atomic<int> N; //< index of Fibonacci number minus 1
271d86ed7fbStbbdev queue_t Queue;
272d86ed7fbStbbdev } // namespace parallel_pipeline_ns
273d86ed7fbStbbdev
274d86ed7fbStbbdev //! functor to fills queue
275d86ed7fbStbbdev struct InputFunc {
InputFuncInputFunc276d86ed7fbStbbdev InputFunc() {}
operator ()InputFunc277d86ed7fbStbbdev queue_t *operator()(oneapi::tbb::flow_control &fc) const {
278d86ed7fbStbbdev using namespace parallel_pipeline_ns;
279d86ed7fbStbbdev
280d86ed7fbStbbdev int n = --N;
281d86ed7fbStbbdev if (n <= 0) {
282d86ed7fbStbbdev fc.stop();
283d86ed7fbStbbdev return nullptr;
284d86ed7fbStbbdev }
285d86ed7fbStbbdev Queue.push(Matrix1110);
286d86ed7fbStbbdev return &Queue;
287d86ed7fbStbbdev }
288d86ed7fbStbbdev };
289d86ed7fbStbbdev //! functor to process queue
290d86ed7fbStbbdev struct MultiplyFunc {
MultiplyFuncMultiplyFunc291d86ed7fbStbbdev MultiplyFunc() {}
operator ()MultiplyFunc292d86ed7fbStbbdev void operator()(queue_t *queue) const {
293d86ed7fbStbbdev //concurrent_queue<Matrix2x2> &Queue = *static_cast<concurrent_queue<Matrix2x2> *>(p);
294d86ed7fbStbbdev Matrix2x2 m1, m2;
295d86ed7fbStbbdev // get two elements
296d86ed7fbStbbdev while (!queue->try_pop(m1))
297b15aabb3Stbbdev yield();
298d86ed7fbStbbdev while (!queue->try_pop(m2))
299b15aabb3Stbbdev yield();
300d86ed7fbStbbdev m1 = m1 * m2; // process them
301d86ed7fbStbbdev queue->push(m1); // and push back
302d86ed7fbStbbdev }
303d86ed7fbStbbdev };
304d86ed7fbStbbdev //! Root function
ParallelPipeFib(int n)305d86ed7fbStbbdev value ParallelPipeFib(int n) {
306d86ed7fbStbbdev using namespace parallel_pipeline_ns;
307d86ed7fbStbbdev
308d86ed7fbStbbdev N = n - 1;
309d86ed7fbStbbdev Queue.push(Matrix1110);
310d86ed7fbStbbdev
311d86ed7fbStbbdev oneapi::tbb::parallel_pipeline(
312d86ed7fbStbbdev n,
313d86ed7fbStbbdev oneapi::tbb::make_filter<void, queue_t *>(oneapi::tbb::filter_mode::parallel, InputFunc()) &
314d86ed7fbStbbdev oneapi::tbb::make_filter<queue_t *, void>(oneapi::tbb::filter_mode::parallel,
315d86ed7fbStbbdev MultiplyFunc()));
316d86ed7fbStbbdev
317d86ed7fbStbbdev assert(Queue.unsafe_size() == 1);
318d86ed7fbStbbdev Matrix2x2 M;
319d86ed7fbStbbdev bool result = Queue.try_pop(M); // get last element
320d86ed7fbStbbdev assert(result);
321d86ed7fbStbbdev value res = M.v[0][0]; // get value
322d86ed7fbStbbdev Queue.clear();
323d86ed7fbStbbdev return res;
324d86ed7fbStbbdev }
325d86ed7fbStbbdev
326d86ed7fbStbbdev // *** parallel_reduce *** //
327d86ed7fbStbbdev
328d86ed7fbStbbdev //! Functor for parallel_reduce
329d86ed7fbStbbdev struct parallel_reduceFibBody {
330d86ed7fbStbbdev Matrix2x2 sum;
331d86ed7fbStbbdev int split_flag; //< flag to make one less operation for split bodies
332d86ed7fbStbbdev //! Constructor fills sum with initial matrix
parallel_reduceFibBodyparallel_reduceFibBody333d86ed7fbStbbdev parallel_reduceFibBody() : sum(Matrix1110), split_flag(0) {}
334d86ed7fbStbbdev //! Splitting constructor
parallel_reduceFibBodyparallel_reduceFibBody335d86ed7fbStbbdev parallel_reduceFibBody(parallel_reduceFibBody &other, oneapi::tbb::split)
336d86ed7fbStbbdev : sum(Matrix1110),
337d86ed7fbStbbdev split_flag(1 /*note that it is split*/) {}
338d86ed7fbStbbdev //! Join point
joinparallel_reduceFibBody339d86ed7fbStbbdev void join(parallel_reduceFibBody &s) {
340d86ed7fbStbbdev sum = sum * s.sum;
341d86ed7fbStbbdev }
342d86ed7fbStbbdev //! Process multiplications
operator ()parallel_reduceFibBody343d86ed7fbStbbdev void operator()(const oneapi::tbb::blocked_range<int> &r) {
344d86ed7fbStbbdev for (int k = r.begin() + split_flag; k < r.end(); ++k)
345d86ed7fbStbbdev sum = sum * Matrix1110;
346d86ed7fbStbbdev split_flag = 0; // reset flag, because this method can be reused for next range
347d86ed7fbStbbdev }
348d86ed7fbStbbdev };
349d86ed7fbStbbdev //! Root function
parallel_reduceFib(int n)350d86ed7fbStbbdev value parallel_reduceFib(int n) {
351d86ed7fbStbbdev parallel_reduceFibBody b;
352d86ed7fbStbbdev oneapi::tbb::parallel_reduce(oneapi::tbb::blocked_range<int>(2, n, 3),
353d86ed7fbStbbdev b); // do parallel reduce on range [2, n) for b
354d86ed7fbStbbdev return b.sum.v[0][0];
355d86ed7fbStbbdev }
356d86ed7fbStbbdev
357d86ed7fbStbbdev // *** parallel_scan *** //
358d86ed7fbStbbdev
359d86ed7fbStbbdev //! Functor for parallel_scan
360d86ed7fbStbbdev struct parallel_scanFibBody {
361d86ed7fbStbbdev /** Though parallel_scan is usually used to accumulate running sums,
362d86ed7fbStbbdev it can be used to accumulate running products too. */
363d86ed7fbStbbdev Matrix2x2 product;
364d86ed7fbStbbdev /** Pointer to output sequence */
365d86ed7fbStbbdev value *const output;
366d86ed7fbStbbdev //! Constructor sets product to identity matrix
parallel_scanFibBodyparallel_scanFibBody367d86ed7fbStbbdev parallel_scanFibBody(value *output_) : product(MatrixIdentity), output(output_) {}
368d86ed7fbStbbdev //! Splitting constructor
parallel_scanFibBodyparallel_scanFibBody369d86ed7fbStbbdev parallel_scanFibBody(parallel_scanFibBody &b, oneapi::tbb::split)
370d86ed7fbStbbdev : product(MatrixIdentity),
371d86ed7fbStbbdev output(b.output) {}
372d86ed7fbStbbdev //! Method for merging summary information from a, which was split off from *this, into *this.
reverse_joinparallel_scanFibBody373d86ed7fbStbbdev void reverse_join(parallel_scanFibBody &a) {
374d86ed7fbStbbdev // When using non-commutative reduction operation, reverse_join
375d86ed7fbStbbdev // should put argument "a" on the left side of the operation.
376d86ed7fbStbbdev // The reversal from the argument order is why the method is
377d86ed7fbStbbdev // called "reverse_join" instead of "join".
378d86ed7fbStbbdev product = a.product * product;
379d86ed7fbStbbdev }
380d86ed7fbStbbdev //! Method for assigning final result back to original body.
assignparallel_scanFibBody381d86ed7fbStbbdev void assign(parallel_scanFibBody &b) {
382d86ed7fbStbbdev product = b.product;
383d86ed7fbStbbdev }
384d86ed7fbStbbdev //! Compute matrix running product.
385d86ed7fbStbbdev /** Tag indicates whether is is the final scan over the range, or
386d86ed7fbStbbdev just a helper "prescan" that is computing a partial reduction. */
387d86ed7fbStbbdev template <typename Tag>
operator ()parallel_scanFibBody388d86ed7fbStbbdev void operator()(const oneapi::tbb::blocked_range<int> &r, Tag tag) {
389d86ed7fbStbbdev for (int k = r.begin(); k < r.end(); ++k) {
390d86ed7fbStbbdev // Code performs an "exclusive" scan, which outputs a value *before* updating the product.
391d86ed7fbStbbdev // For an "inclusive" scan, output the value after the update.
392d86ed7fbStbbdev if (tag.is_final_scan())
393d86ed7fbStbbdev output[k] = product.v[0][1];
394d86ed7fbStbbdev product = product * Matrix1110;
395d86ed7fbStbbdev }
396d86ed7fbStbbdev }
397d86ed7fbStbbdev };
398d86ed7fbStbbdev //! Root function
parallel_scanFib(int n)399d86ed7fbStbbdev value parallel_scanFib(int n) {
400d86ed7fbStbbdev value *output = new value[n];
401d86ed7fbStbbdev parallel_scanFibBody b(output);
402d86ed7fbStbbdev oneapi::tbb::parallel_scan(oneapi::tbb::blocked_range<int>(0, n, 3), b);
403d86ed7fbStbbdev // output[0..n-1] now contains the Fibonacci sequence (modulo integer wrap-around).
404d86ed7fbStbbdev // Check the last two values for correctness.
405d86ed7fbStbbdev assert(n < 2 || output[n - 2] + output[n - 1] == b.product.v[0][1]);
406d86ed7fbStbbdev delete[] output;
407d86ed7fbStbbdev return b.product.v[0][1];
408d86ed7fbStbbdev }
409d86ed7fbStbbdev
410d86ed7fbStbbdev /////////////////////////// Main ////////////////////////////////////////////////////
411d86ed7fbStbbdev
412d86ed7fbStbbdev //! A closed range of int.
413d86ed7fbStbbdev struct IntRange {
414d86ed7fbStbbdev int low;
415d86ed7fbStbbdev int high;
416d86ed7fbStbbdev void set_from_string(const char *s);
IntRangeIntRange417d86ed7fbStbbdev IntRange(int low_, int high_) : low(low_), high(high_) {}
418d86ed7fbStbbdev };
419d86ed7fbStbbdev
set_from_string(const char * s)420d86ed7fbStbbdev void IntRange::set_from_string(const char *s) {
421d86ed7fbStbbdev char *end;
422d86ed7fbStbbdev high = low = strtol(s, &end, 0);
423d86ed7fbStbbdev switch (*end) {
42457f524caSIlya Isaev case ':': high = strtol(end + 1, nullptr, 0); break;
425d86ed7fbStbbdev case '\0': break;
426d86ed7fbStbbdev default: printf("unexpected character = %c\n", *end);
427d86ed7fbStbbdev }
428d86ed7fbStbbdev }
429d86ed7fbStbbdev
430d86ed7fbStbbdev //! Tick count for start
431d86ed7fbStbbdev static oneapi::tbb::tick_count t0;
432d86ed7fbStbbdev
433d86ed7fbStbbdev //! Verbose output flag
434d86ed7fbStbbdev static bool Verbose = false;
435d86ed7fbStbbdev
436d86ed7fbStbbdev typedef value (*MeasureFunc)(int);
437d86ed7fbStbbdev //! Measure ticks count in loop [2..n]
Measure(const char * name,MeasureFunc func,int n)438d86ed7fbStbbdev value Measure(const char *name, MeasureFunc func, int n) {
439d86ed7fbStbbdev value result;
440d86ed7fbStbbdev if (Verbose)
441d86ed7fbStbbdev printf("%s", name);
442d86ed7fbStbbdev t0 = oneapi::tbb::tick_count::now();
443d86ed7fbStbbdev for (int number = 2; number <= n; number++)
444d86ed7fbStbbdev result = func(number);
445d86ed7fbStbbdev if (Verbose)
446d86ed7fbStbbdev printf("\t- in %f msec\n", (oneapi::tbb::tick_count::now() - t0).seconds() * 1000);
447d86ed7fbStbbdev return result;
448d86ed7fbStbbdev }
449d86ed7fbStbbdev
450d86ed7fbStbbdev //! program entry
main(int argc,char * argv[])451d86ed7fbStbbdev int main(int argc, char *argv[]) {
452d86ed7fbStbbdev if (argc > 1)
453d86ed7fbStbbdev Verbose = true;
45457f524caSIlya Isaev int NumbersCount = argc > 1 ? strtol(argv[1], nullptr, 0) : 500;
455d86ed7fbStbbdev IntRange NThread(1, 4); // Number of threads to use.
456d86ed7fbStbbdev if (argc > 2)
457d86ed7fbStbbdev NThread.set_from_string(argv[2]);
45857f524caSIlya Isaev unsigned long ntrial = argc > 3 ? (unsigned long)strtoul(argv[3], nullptr, 0) : 1;
459d86ed7fbStbbdev value result, sum;
460d86ed7fbStbbdev
461d86ed7fbStbbdev if (Verbose)
462d86ed7fbStbbdev printf("Fibonacci numbers example. Generating %d numbers..\n", NumbersCount);
463d86ed7fbStbbdev
464d86ed7fbStbbdev result = Measure("Serial loop", SerialFib, NumbersCount);
465d86ed7fbStbbdev sum = Measure("Serial matrix", SerialMatrixFib, NumbersCount);
466d86ed7fbStbbdev assert(result == sum);
467d86ed7fbStbbdev sum = Measure("Serial vector", SerialVectorFib, NumbersCount);
468d86ed7fbStbbdev assert(result == sum);
469d86ed7fbStbbdev sum = Measure("Serial queue", SerialQueueFib, NumbersCount);
470d86ed7fbStbbdev assert(result == sum);
471d86ed7fbStbbdev // now in parallel
472d86ed7fbStbbdev for (unsigned long i = 0; i < ntrial; ++i) {
473d86ed7fbStbbdev for (int threads = NThread.low; threads <= NThread.high; threads *= 2) {
474d86ed7fbStbbdev oneapi::tbb::global_control c(oneapi::tbb::global_control::max_allowed_parallelism,
475d86ed7fbStbbdev threads);
476d86ed7fbStbbdev if (Verbose)
477d86ed7fbStbbdev printf("\nThreads number is %d\n", threads);
478d86ed7fbStbbdev
479d86ed7fbStbbdev sum = Measure("Shared serial (mutex)\t", SharedSerialFib<std::mutex>, NumbersCount);
480d86ed7fbStbbdev assert(result == sum);
481d86ed7fbStbbdev sum = Measure("Shared serial (spin_mutex)",
482d86ed7fbStbbdev SharedSerialFib<oneapi::tbb::spin_mutex>,
483d86ed7fbStbbdev NumbersCount);
484d86ed7fbStbbdev assert(result == sum);
485d86ed7fbStbbdev sum = Measure("Shared serial (queuing_mutex)",
486d86ed7fbStbbdev SharedSerialFib<oneapi::tbb::queuing_mutex>,
487d86ed7fbStbbdev NumbersCount);
488d86ed7fbStbbdev assert(result == sum);
489d86ed7fbStbbdev sum = Measure("Shared serial (Conc.HashTable)", ConcurrentHashSerialFib, NumbersCount);
490d86ed7fbStbbdev assert(result == sum);
491d86ed7fbStbbdev sum = Measure("Parallel pipe/queue\t", ParallelPipeFib, NumbersCount);
492d86ed7fbStbbdev assert(result == sum);
493d86ed7fbStbbdev sum = Measure("Parallel reduce\t\t", parallel_reduceFib, NumbersCount);
494d86ed7fbStbbdev assert(result == sum);
495d86ed7fbStbbdev sum = Measure("Parallel scan\t\t", parallel_scanFib, NumbersCount);
496d86ed7fbStbbdev assert(result == sum);
497d86ed7fbStbbdev }
498d86ed7fbStbbdev
499d86ed7fbStbbdev #ifdef __GNUC__
500d86ed7fbStbbdev if (Verbose)
501d86ed7fbStbbdev printf("Fibonacci number #%d modulo 2^64 is %lld\n\n", NumbersCount, result);
502d86ed7fbStbbdev #else
503d86ed7fbStbbdev if (Verbose)
504d86ed7fbStbbdev printf("Fibonacci number #%d modulo 2^64 is %I64d\n\n", NumbersCount, result);
505d86ed7fbStbbdev #endif
506d86ed7fbStbbdev }
507d86ed7fbStbbdev if (!Verbose)
508d86ed7fbStbbdev printf("TEST PASSED\n");
5094523a761Stbbdev // flush to prevent bufferization on exit
5104523a761Stbbdev fflush(stdout);
511d86ed7fbStbbdev return 0;
512d86ed7fbStbbdev }
513d86ed7fbStbbdev
514d86ed7fbStbbdev // Utils
515d86ed7fbStbbdev
Matrix2x2Multiply(const value a[2][2],const value b[2][2],value c[2][2])516d86ed7fbStbbdev void Matrix2x2Multiply(const value a[2][2], const value b[2][2], value c[2][2]) {
517d86ed7fbStbbdev for (int i = 0; i <= 1; i++)
518d86ed7fbStbbdev for (int j = 0; j <= 1; j++)
519d86ed7fbStbbdev c[i][j] = a[i][0] * b[0][j] + a[i][1] * b[1][j];
520d86ed7fbStbbdev }
521d86ed7fbStbbdev
operator *(const Matrix2x2 & to) const522d86ed7fbStbbdev Matrix2x2 Matrix2x2::operator*(const Matrix2x2 &to) const {
523d86ed7fbStbbdev Matrix2x2 result;
524d86ed7fbStbbdev Matrix2x2Multiply(v, to.v, result.v);
525d86ed7fbStbbdev return result;
526d86ed7fbStbbdev }
527