1 /*
2 Copyright (c) 2005-2022 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 /* Example program that computes Fibonacci numbers in different ways.
18 Arguments are: [ Number [Threads [Repeats]]]
19 The defaults are Number=500 Threads=1:4 Repeats=1.
20
21 The point of this program is to check that the library is working properly.
22 Most of the computations are deliberately silly and not expected to
23 show any speedup on multiprocessors.
24 */
25
26 // enable assertions
27 #ifdef NDEBUG
28 #undef NDEBUG
29 #endif
30
31 #include <cstdio>
32 #include <cstdlib>
33 #include <cassert>
34
35 #include <utility>
36 #include <thread>
37 #include <atomic>
38 #include <mutex>
39
40 #include "oneapi/tbb/tick_count.h"
41 #include "oneapi/tbb/blocked_range.h"
42 #include "oneapi/tbb/concurrent_vector.h"
43 #include "oneapi/tbb/concurrent_queue.h"
44 #include "oneapi/tbb/concurrent_hash_map.h"
45 #include "oneapi/tbb/parallel_for.h"
46 #include "oneapi/tbb/parallel_reduce.h"
47 #include "oneapi/tbb/parallel_scan.h"
48 #include "oneapi/tbb/parallel_pipeline.h"
49 #include "oneapi/tbb/spin_mutex.h"
50 #include "oneapi/tbb/queuing_mutex.h"
51 #include "oneapi/tbb/global_control.h"
52
53 //! type used for Fibonacci number computations
54 typedef long long value;
55
56 //! Matrix 2x2 class
57 struct Matrix2x2 {
58 //! Array of values
59 value v[2][2];
Matrix2x2Matrix2x260 Matrix2x2() {}
Matrix2x2Matrix2x261 Matrix2x2(value v00, value v01, value v10, value v11) {
62 v[0][0] = v00;
63 v[0][1] = v01;
64 v[1][0] = v10;
65 v[1][1] = v11;
66 }
67 Matrix2x2 operator*(const Matrix2x2 &to) const; //< Multiply two Matrices
68 };
69 //! Identity matrix
70 static const Matrix2x2 MatrixIdentity(1, 0, 0, 1);
71 //! Default matrix to multiply
72 static const Matrix2x2 Matrix1110(1, 1, 1, 0);
73 //! Raw arrays matrices multiply
74 void Matrix2x2Multiply(const value a[2][2], const value b[2][2], value c[2][2]);
75
76 /////////////////////// Serial methods ////////////////////////
77
78 //! Plain serial sum
SerialFib(int n)79 value SerialFib(int n) {
80 if (n < 2)
81 return n;
82 value a = 0, b = 1, sum;
83 int i;
84 for (i = 2; i <= n; i++) { // n is really index of Fibonacci number
85 sum = a + b;
86 a = b;
87 b = sum;
88 }
89 return sum;
90 }
91 //! Serial n-1 matrices multiplication
SerialMatrixFib(int n)92 value SerialMatrixFib(int n) {
93 value c[2][2], a[2][2] = { { 1, 1 }, { 1, 0 } }, b[2][2] = { { 1, 1 }, { 1, 0 } };
94 int i;
95 for (i = 2; i < n; i++) { // Using condition to prevent copying of values
96 if (i & 1)
97 Matrix2x2Multiply(a, c, b);
98 else
99 Matrix2x2Multiply(a, b, c);
100 }
101 return (i & 1) ? c[0][0] : b[0][0]; // get result from upper left cell
102 }
103 //! Recursive summing. Just for complete list of serial algorithms, not used
SerialRecursiveFib(int n)104 value SerialRecursiveFib(int n) {
105 value result;
106 if (n < 2)
107 result = n;
108 else
109 result = SerialRecursiveFib(n - 1) + SerialRecursiveFib(n - 2);
110 return result;
111 }
112
113 // GCC 4.8 C++ standard library implements std::this_thread::yield as no-op.
114 #if __TBB_GLIBCXX_THIS_THREAD_YIELD_BROKEN
yield()115 static inline void yield() {
116 sched_yield();
117 }
118 #else
119 using std::this_thread::yield;
120 #endif
121
122 //! Introducing of queue method in serial
SerialQueueFib(int n)123 value SerialQueueFib(int n) {
124 oneapi::tbb::concurrent_queue<Matrix2x2> Q;
125 for (int i = 1; i < n; i++)
126 Q.push(Matrix1110);
127 Matrix2x2 A, B;
128 while (true) {
129 while (!Q.try_pop(A))
130 yield();
131 if (Q.empty())
132 break;
133 while (!Q.try_pop(B))
134 yield();
135 Q.push(A * B);
136 }
137 return A.v[0][0];
138 }
139 //! Trying to use concurrent_vector
SerialVectorFib(int n)140 value SerialVectorFib(int n) {
141 oneapi::tbb::concurrent_vector<value> A;
142 A.grow_by(2);
143 A[0] = 0;
144 A[1] = 1;
145 for (int i = 2; i <= n; i++) {
146 A.grow_to_at_least(i + 1);
147 A[i] = A[i - 1] + A[i - 2];
148 }
149 return A[n];
150 }
151
152 ///////////////////// Parallel methods ////////////////////////
153
154 // *** Serial shared by mutexes *** //
155
156 //! Shared glabals
157 value SharedA = 0, SharedB = 1;
158 int SharedI = 1, SharedN;
159
160 //! Template task class which computes Fibonacci numbers with shared globals
161 template <typename M>
162 class SharedSerialFibBody {
163 M &mutex;
164
165 public:
SharedSerialFibBody(M & m)166 SharedSerialFibBody(M &m) : mutex(m) {}
167 //! main loop
operator ()(const oneapi::tbb::blocked_range<int> & range) const168 void operator()(const oneapi::tbb::blocked_range<int> &range) const {
169 for (;;) {
170 typename M::scoped_lock lock(mutex);
171 if (SharedI >= SharedN)
172 break;
173 value sum = SharedA + SharedB;
174 SharedA = SharedB;
175 SharedB = sum;
176 ++SharedI;
177 }
178 }
179 };
180
181 template <>
operator ()(const oneapi::tbb::blocked_range<int> & range) const182 void SharedSerialFibBody<std::mutex>::operator()(
183 const oneapi::tbb::blocked_range<int> &range) const {
184 for (;;) {
185 std::lock_guard<std::mutex> lock(mutex);
186 if (SharedI >= SharedN)
187 break;
188 value sum = SharedA + SharedB;
189 SharedA = SharedB;
190 SharedB = sum;
191 ++SharedI;
192 }
193 }
194
195 //! Root function
196 template <class M>
SharedSerialFib(int n)197 value SharedSerialFib(int n) {
198 SharedA = 0;
199 SharedB = 1;
200 SharedI = 1;
201 SharedN = n;
202 M mutex;
203 parallel_for(oneapi::tbb::blocked_range<int>(0, 4, 1), SharedSerialFibBody<M>(mutex));
204 return SharedB;
205 }
206
207 // *** Serial shared by concurrent hash map *** //
208
209 //! Hash comparer
210 struct IntHashCompare {
equalIntHashCompare211 bool equal(const int j, const int k) const {
212 return j == k;
213 }
hashIntHashCompare214 std::size_t hash(const int k) const {
215 return (std::size_t)k;
216 }
217 };
218 //! NumbersTable type based on concurrent_hash_map
219 typedef oneapi::tbb::concurrent_hash_map<int, value, IntHashCompare> NumbersTable;
220 //! task for serial method using shared concurrent_hash_map
221 class ConcurrentHashSerialFibTask {
222 NumbersTable &Fib;
223 int my_n;
224
225 public:
226 //! constructor
ConcurrentHashSerialFibTask(NumbersTable & cht,int n)227 ConcurrentHashSerialFibTask(NumbersTable &cht, int n) : Fib(cht), my_n(n) {}
228 //! executing task
operator ()() const229 void operator()() const {
230 for (int i = 2; i <= my_n; ++i) { // there is no difference in to recycle or to make loop
231 NumbersTable::const_accessor f1, f2; // same as iterators
232 if (!Fib.find(f1, i - 1) || !Fib.find(f2, i - 2)) {
233 // Something is seriously wrong, because i-1 and i-2 must have been inserted
234 // earlier by this thread or another thread.
235 assert(0);
236 }
237 value sum = f1->second + f2->second;
238 NumbersTable::const_accessor fsum;
239 Fib.insert(fsum, std::make_pair(i, sum)); // inserting
240 assert(fsum->second == sum); // check value
241 }
242 }
243 };
244
245 //! Root function
ConcurrentHashSerialFib(int n)246 value ConcurrentHashSerialFib(int n) {
247 NumbersTable Fib;
248 bool okay;
249 okay = Fib.insert(std::make_pair(0, 0));
250 assert(okay); // assign initial values
251 okay = Fib.insert(std::make_pair(1, 1));
252 assert(okay);
253
254 // task_list list;
255 oneapi::tbb::task_group tg;
256 // allocate tasks
257 tg.run(ConcurrentHashSerialFibTask(Fib, n));
258 tg.run(ConcurrentHashSerialFibTask(Fib, n));
259 tg.wait();
260 NumbersTable::const_accessor fresult;
261 okay = Fib.find(fresult, n);
262 assert(okay);
263 return fresult->second;
264 }
265
266 // *** Queue with parallel_pipeline *** //
267
268 typedef oneapi::tbb::concurrent_queue<Matrix2x2> queue_t;
269 namespace parallel_pipeline_ns {
270 std::atomic<int> N; //< index of Fibonacci number minus 1
271 queue_t Queue;
272 } // namespace parallel_pipeline_ns
273
274 //! functor to fills queue
275 struct InputFunc {
InputFuncInputFunc276 InputFunc() {}
operator ()InputFunc277 queue_t *operator()(oneapi::tbb::flow_control &fc) const {
278 using namespace parallel_pipeline_ns;
279
280 int n = --N;
281 if (n <= 0) {
282 fc.stop();
283 return nullptr;
284 }
285 Queue.push(Matrix1110);
286 return &Queue;
287 }
288 };
289 //! functor to process queue
290 struct MultiplyFunc {
MultiplyFuncMultiplyFunc291 MultiplyFunc() {}
operator ()MultiplyFunc292 void operator()(queue_t *queue) const {
293 //concurrent_queue<Matrix2x2> &Queue = *static_cast<concurrent_queue<Matrix2x2> *>(p);
294 Matrix2x2 m1, m2;
295 // get two elements
296 while (!queue->try_pop(m1))
297 yield();
298 while (!queue->try_pop(m2))
299 yield();
300 m1 = m1 * m2; // process them
301 queue->push(m1); // and push back
302 }
303 };
304 //! Root function
ParallelPipeFib(int n)305 value ParallelPipeFib(int n) {
306 using namespace parallel_pipeline_ns;
307
308 N = n - 1;
309 Queue.push(Matrix1110);
310
311 oneapi::tbb::parallel_pipeline(
312 n,
313 oneapi::tbb::make_filter<void, queue_t *>(oneapi::tbb::filter_mode::parallel, InputFunc()) &
314 oneapi::tbb::make_filter<queue_t *, void>(oneapi::tbb::filter_mode::parallel,
315 MultiplyFunc()));
316
317 assert(Queue.unsafe_size() == 1);
318 Matrix2x2 M;
319 bool result = Queue.try_pop(M); // get last element
320 assert(result);
321 value res = M.v[0][0]; // get value
322 Queue.clear();
323 return res;
324 }
325
326 // *** parallel_reduce *** //
327
328 //! Functor for parallel_reduce
329 struct parallel_reduceFibBody {
330 Matrix2x2 sum;
331 int split_flag; //< flag to make one less operation for split bodies
332 //! Constructor fills sum with initial matrix
parallel_reduceFibBodyparallel_reduceFibBody333 parallel_reduceFibBody() : sum(Matrix1110), split_flag(0) {}
334 //! Splitting constructor
parallel_reduceFibBodyparallel_reduceFibBody335 parallel_reduceFibBody(parallel_reduceFibBody &other, oneapi::tbb::split)
336 : sum(Matrix1110),
337 split_flag(1 /*note that it is split*/) {}
338 //! Join point
joinparallel_reduceFibBody339 void join(parallel_reduceFibBody &s) {
340 sum = sum * s.sum;
341 }
342 //! Process multiplications
operator ()parallel_reduceFibBody343 void operator()(const oneapi::tbb::blocked_range<int> &r) {
344 for (int k = r.begin() + split_flag; k < r.end(); ++k)
345 sum = sum * Matrix1110;
346 split_flag = 0; // reset flag, because this method can be reused for next range
347 }
348 };
349 //! Root function
parallel_reduceFib(int n)350 value parallel_reduceFib(int n) {
351 parallel_reduceFibBody b;
352 oneapi::tbb::parallel_reduce(oneapi::tbb::blocked_range<int>(2, n, 3),
353 b); // do parallel reduce on range [2, n) for b
354 return b.sum.v[0][0];
355 }
356
357 // *** parallel_scan *** //
358
359 //! Functor for parallel_scan
360 struct parallel_scanFibBody {
361 /** Though parallel_scan is usually used to accumulate running sums,
362 it can be used to accumulate running products too. */
363 Matrix2x2 product;
364 /** Pointer to output sequence */
365 value *const output;
366 //! Constructor sets product to identity matrix
parallel_scanFibBodyparallel_scanFibBody367 parallel_scanFibBody(value *output_) : product(MatrixIdentity), output(output_) {}
368 //! Splitting constructor
parallel_scanFibBodyparallel_scanFibBody369 parallel_scanFibBody(parallel_scanFibBody &b, oneapi::tbb::split)
370 : product(MatrixIdentity),
371 output(b.output) {}
372 //! Method for merging summary information from a, which was split off from *this, into *this.
reverse_joinparallel_scanFibBody373 void reverse_join(parallel_scanFibBody &a) {
374 // When using non-commutative reduction operation, reverse_join
375 // should put argument "a" on the left side of the operation.
376 // The reversal from the argument order is why the method is
377 // called "reverse_join" instead of "join".
378 product = a.product * product;
379 }
380 //! Method for assigning final result back to original body.
assignparallel_scanFibBody381 void assign(parallel_scanFibBody &b) {
382 product = b.product;
383 }
384 //! Compute matrix running product.
385 /** Tag indicates whether is is the final scan over the range, or
386 just a helper "prescan" that is computing a partial reduction. */
387 template <typename Tag>
operator ()parallel_scanFibBody388 void operator()(const oneapi::tbb::blocked_range<int> &r, Tag tag) {
389 for (int k = r.begin(); k < r.end(); ++k) {
390 // Code performs an "exclusive" scan, which outputs a value *before* updating the product.
391 // For an "inclusive" scan, output the value after the update.
392 if (tag.is_final_scan())
393 output[k] = product.v[0][1];
394 product = product * Matrix1110;
395 }
396 }
397 };
398 //! Root function
parallel_scanFib(int n)399 value parallel_scanFib(int n) {
400 value *output = new value[n];
401 parallel_scanFibBody b(output);
402 oneapi::tbb::parallel_scan(oneapi::tbb::blocked_range<int>(0, n, 3), b);
403 // output[0..n-1] now contains the Fibonacci sequence (modulo integer wrap-around).
404 // Check the last two values for correctness.
405 assert(n < 2 || output[n - 2] + output[n - 1] == b.product.v[0][1]);
406 delete[] output;
407 return b.product.v[0][1];
408 }
409
410 /////////////////////////// Main ////////////////////////////////////////////////////
411
412 //! A closed range of int.
413 struct IntRange {
414 int low;
415 int high;
416 void set_from_string(const char *s);
IntRangeIntRange417 IntRange(int low_, int high_) : low(low_), high(high_) {}
418 };
419
set_from_string(const char * s)420 void IntRange::set_from_string(const char *s) {
421 char *end;
422 high = low = strtol(s, &end, 0);
423 switch (*end) {
424 case ':': high = strtol(end + 1, nullptr, 0); break;
425 case '\0': break;
426 default: printf("unexpected character = %c\n", *end);
427 }
428 }
429
430 //! Tick count for start
431 static oneapi::tbb::tick_count t0;
432
433 //! Verbose output flag
434 static bool Verbose = false;
435
436 typedef value (*MeasureFunc)(int);
437 //! Measure ticks count in loop [2..n]
Measure(const char * name,MeasureFunc func,int n)438 value Measure(const char *name, MeasureFunc func, int n) {
439 value result;
440 if (Verbose)
441 printf("%s", name);
442 t0 = oneapi::tbb::tick_count::now();
443 for (int number = 2; number <= n; number++)
444 result = func(number);
445 if (Verbose)
446 printf("\t- in %f msec\n", (oneapi::tbb::tick_count::now() - t0).seconds() * 1000);
447 return result;
448 }
449
450 //! program entry
main(int argc,char * argv[])451 int main(int argc, char *argv[]) {
452 if (argc > 1)
453 Verbose = true;
454 int NumbersCount = argc > 1 ? strtol(argv[1], nullptr, 0) : 500;
455 IntRange NThread(1, 4); // Number of threads to use.
456 if (argc > 2)
457 NThread.set_from_string(argv[2]);
458 unsigned long ntrial = argc > 3 ? (unsigned long)strtoul(argv[3], nullptr, 0) : 1;
459 value result, sum;
460
461 if (Verbose)
462 printf("Fibonacci numbers example. Generating %d numbers..\n", NumbersCount);
463
464 result = Measure("Serial loop", SerialFib, NumbersCount);
465 sum = Measure("Serial matrix", SerialMatrixFib, NumbersCount);
466 assert(result == sum);
467 sum = Measure("Serial vector", SerialVectorFib, NumbersCount);
468 assert(result == sum);
469 sum = Measure("Serial queue", SerialQueueFib, NumbersCount);
470 assert(result == sum);
471 // now in parallel
472 for (unsigned long i = 0; i < ntrial; ++i) {
473 for (int threads = NThread.low; threads <= NThread.high; threads *= 2) {
474 oneapi::tbb::global_control c(oneapi::tbb::global_control::max_allowed_parallelism,
475 threads);
476 if (Verbose)
477 printf("\nThreads number is %d\n", threads);
478
479 sum = Measure("Shared serial (mutex)\t", SharedSerialFib<std::mutex>, NumbersCount);
480 assert(result == sum);
481 sum = Measure("Shared serial (spin_mutex)",
482 SharedSerialFib<oneapi::tbb::spin_mutex>,
483 NumbersCount);
484 assert(result == sum);
485 sum = Measure("Shared serial (queuing_mutex)",
486 SharedSerialFib<oneapi::tbb::queuing_mutex>,
487 NumbersCount);
488 assert(result == sum);
489 sum = Measure("Shared serial (Conc.HashTable)", ConcurrentHashSerialFib, NumbersCount);
490 assert(result == sum);
491 sum = Measure("Parallel pipe/queue\t", ParallelPipeFib, NumbersCount);
492 assert(result == sum);
493 sum = Measure("Parallel reduce\t\t", parallel_reduceFib, NumbersCount);
494 assert(result == sum);
495 sum = Measure("Parallel scan\t\t", parallel_scanFib, NumbersCount);
496 assert(result == sum);
497 }
498
499 #ifdef __GNUC__
500 if (Verbose)
501 printf("Fibonacci number #%d modulo 2^64 is %lld\n\n", NumbersCount, result);
502 #else
503 if (Verbose)
504 printf("Fibonacci number #%d modulo 2^64 is %I64d\n\n", NumbersCount, result);
505 #endif
506 }
507 if (!Verbose)
508 printf("TEST PASSED\n");
509 // flush to prevent bufferization on exit
510 fflush(stdout);
511 return 0;
512 }
513
514 // Utils
515
Matrix2x2Multiply(const value a[2][2],const value b[2][2],value c[2][2])516 void Matrix2x2Multiply(const value a[2][2], const value b[2][2], value c[2][2]) {
517 for (int i = 0; i <= 1; i++)
518 for (int j = 0; j <= 1; j++)
519 c[i][j] = a[i][0] * b[0][j] + a[i][1] * b[1][j];
520 }
521
operator *(const Matrix2x2 & to) const522 Matrix2x2 Matrix2x2::operator*(const Matrix2x2 &to) const {
523 Matrix2x2 result;
524 Matrix2x2Multiply(v, to.v, result.v);
525 return result;
526 }
527