1d86ed7fbStbbdev /*
2b15aabb3Stbbdev Copyright (c) 2005-2021 Intel Corporation
3d86ed7fbStbbdev
4d86ed7fbStbbdev Licensed under the Apache License, Version 2.0 (the "License");
5d86ed7fbStbbdev you may not use this file except in compliance with the License.
6d86ed7fbStbbdev You may obtain a copy of the License at
7d86ed7fbStbbdev
8d86ed7fbStbbdev http://www.apache.org/licenses/LICENSE-2.0
9d86ed7fbStbbdev
10d86ed7fbStbbdev Unless required by applicable law or agreed to in writing, software
11d86ed7fbStbbdev distributed under the License is distributed on an "AS IS" BASIS,
12d86ed7fbStbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13d86ed7fbStbbdev See the License for the specific language governing permissions and
14d86ed7fbStbbdev limitations under the License.
15d86ed7fbStbbdev */
16d86ed7fbStbbdev
17d86ed7fbStbbdev /*
18d86ed7fbStbbdev Evolution.cpp: implementation file for evolution classes; evolution
19d86ed7fbStbbdev classes do looped evolution of patterns in a defined
20d86ed7fbStbbdev 2 dimensional space
21d86ed7fbStbbdev */
22d86ed7fbStbbdev
23d86ed7fbStbbdev #include "common/utility/get_default_num_threads.hpp"
24d86ed7fbStbbdev
25d86ed7fbStbbdev #include "Evolution.hpp"
26d86ed7fbStbbdev #include "Board.hpp"
27d86ed7fbStbbdev
28d86ed7fbStbbdev #ifdef USE_SSE
29d86ed7fbStbbdev #define GRAIN_SIZE 14
30d86ed7fbStbbdev #else
31d86ed7fbStbbdev #define GRAIN_SIZE 4000
32d86ed7fbStbbdev #endif
33d86ed7fbStbbdev #define TIME_SLICE 330
34d86ed7fbStbbdev
35d86ed7fbStbbdev /*
36d86ed7fbStbbdev Evolution
37d86ed7fbStbbdev */
38d86ed7fbStbbdev
39d86ed7fbStbbdev /**
40d86ed7fbStbbdev Evolution::UpdateMatrix() - moves the calculated destination data
41d86ed7fbStbbdev to the source data block. No destination zeroing is required since it will
42d86ed7fbStbbdev be completely overwritten during the next calculation cycle.
43d86ed7fbStbbdev **/
UpdateMatrix()44d86ed7fbStbbdev void Evolution::UpdateMatrix() {
45d86ed7fbStbbdev memcpy(m_matrix->data, m_dest, m_size);
46d86ed7fbStbbdev }
47d86ed7fbStbbdev
48d86ed7fbStbbdev /*
49d86ed7fbStbbdev SequentialEvolution
50d86ed7fbStbbdev */
51d86ed7fbStbbdev
52d86ed7fbStbbdev //! SequentialEvolution::Run - begins looped evolution
Run(double execution_time,int nthread)53d86ed7fbStbbdev void SequentialEvolution::Run(double execution_time, int nthread) {
54d86ed7fbStbbdev printf("Starting game (Sequential evolution)\n");
55d86ed7fbStbbdev
56d86ed7fbStbbdev m_nIteration = 0;
57d86ed7fbStbbdev m_serial_time = 0;
58d86ed7fbStbbdev oneapi::tbb::tick_count t0 = oneapi::tbb::tick_count::now();
59d86ed7fbStbbdev while (!m_done) {
60d86ed7fbStbbdev if (!is_paused) {
61d86ed7fbStbbdev oneapi::tbb::tick_count t = oneapi::tbb::tick_count::now();
62d86ed7fbStbbdev Step();
63d86ed7fbStbbdev oneapi::tbb::tick_count t1 = oneapi::tbb::tick_count::now();
64d86ed7fbStbbdev ++m_nIteration;
65d86ed7fbStbbdev double work_time = (t1 - t0).seconds();
66d86ed7fbStbbdev m_serial_time += work_time;
67d86ed7fbStbbdev }
68d86ed7fbStbbdev //! Let the parallel algorithm work uncontended almost the same time
69d86ed7fbStbbdev //! as the serial one. See ParallelEvolution::Run() as well.
70d86ed7fbStbbdev t0 = oneapi::tbb::tick_count::now();
71d86ed7fbStbbdev if (m_serial_time > execution_time) {
72d86ed7fbStbbdev printf("iterations count = %d time = %g\n", m_nIteration, m_serial_time);
73d86ed7fbStbbdev break;
74d86ed7fbStbbdev }
75d86ed7fbStbbdev }
76d86ed7fbStbbdev }
77d86ed7fbStbbdev
78d86ed7fbStbbdev //! SequentialEvolution::Step() - override of step method
Step()79d86ed7fbStbbdev void SequentialEvolution::Step() {
80d86ed7fbStbbdev if (!is_paused) {
81d86ed7fbStbbdev #ifdef USE_SSE
82d86ed7fbStbbdev UpdateState(m_matrix, m_matrix->data, 0, m_matrix->height);
83d86ed7fbStbbdev #else
84d86ed7fbStbbdev UpdateState(m_matrix, m_dest, 0, (m_matrix->width * m_matrix->height) - 1);
85d86ed7fbStbbdev UpdateMatrix();
86d86ed7fbStbbdev #endif
87d86ed7fbStbbdev }
88d86ed7fbStbbdev }
89d86ed7fbStbbdev
90d86ed7fbStbbdev /*
91d86ed7fbStbbdev ParallelEvolution
92d86ed7fbStbbdev */
93d86ed7fbStbbdev
94d86ed7fbStbbdev //! SequentialEvolution::Run - begins looped evolution
Run(double execution_time,int nthread)95d86ed7fbStbbdev void ParallelEvolution::Run(double execution_time, int nthread) {
96d86ed7fbStbbdev if (nthread == utility::get_default_num_threads())
97d86ed7fbStbbdev printf("Starting game (Parallel evolution for automatic number of thread(s))\n");
98d86ed7fbStbbdev else
99d86ed7fbStbbdev printf("Starting game (Parallel evolution for %d thread(s))\n", nthread);
100d86ed7fbStbbdev
101d86ed7fbStbbdev m_nIteration = 0;
102d86ed7fbStbbdev m_parallel_time = 0;
103d86ed7fbStbbdev
104d86ed7fbStbbdev oneapi::tbb::global_control* pGlobControl = new oneapi::tbb::global_control(
105d86ed7fbStbbdev oneapi::tbb::global_control::max_allowed_parallelism, nthread);
106d86ed7fbStbbdev
107d86ed7fbStbbdev double work_time = m_serial_time;
108d86ed7fbStbbdev oneapi::tbb::tick_count t0 = oneapi::tbb::tick_count::now();
109d86ed7fbStbbdev
110d86ed7fbStbbdev while (!m_done) {
111d86ed7fbStbbdev if (!is_paused) {
112d86ed7fbStbbdev oneapi::tbb::tick_count t = oneapi::tbb::tick_count::now();
113d86ed7fbStbbdev Step();
114d86ed7fbStbbdev oneapi::tbb::tick_count t1 = oneapi::tbb::tick_count::now();
115d86ed7fbStbbdev ++m_nIteration;
116d86ed7fbStbbdev double real_work_time = (t1 - t0).seconds();
117d86ed7fbStbbdev m_parallel_time += real_work_time;
118d86ed7fbStbbdev }
119d86ed7fbStbbdev //! Let the serial algorithm work the same time as the parallel one.
120d86ed7fbStbbdev t0 = oneapi::tbb::tick_count::now();
121d86ed7fbStbbdev if (m_parallel_time > execution_time) {
122d86ed7fbStbbdev printf("iterations count = %d time = %g\n", m_nIteration, m_parallel_time);
123d86ed7fbStbbdev delete pGlobControl;
124d86ed7fbStbbdev pGlobControl = nullptr;
125d86ed7fbStbbdev break;
126d86ed7fbStbbdev }
127d86ed7fbStbbdev }
128d86ed7fbStbbdev delete pGlobControl;
129*ba947f18SIlya Isaev pGlobControl = nullptr;
130d86ed7fbStbbdev }
131d86ed7fbStbbdev
132d86ed7fbStbbdev /**
133d86ed7fbStbbdev class tbb_parallel_task
134d86ed7fbStbbdev
135d86ed7fbStbbdev TBB requires a class for parallel loop implementations. The actual
136d86ed7fbStbbdev loop "chunks" are performed using the () operator of the class.
137d86ed7fbStbbdev The blocked_range contains the range to calculate. Please see the
138d86ed7fbStbbdev TBB documentation for more information.
139d86ed7fbStbbdev **/
140d86ed7fbStbbdev class tbb_parallel_task {
141d86ed7fbStbbdev public:
set_values(Matrix * source,char * dest)142d86ed7fbStbbdev static void set_values(Matrix* source, char* dest) {
143d86ed7fbStbbdev m_source = source;
144d86ed7fbStbbdev m_dest = dest;
145d86ed7fbStbbdev return;
146d86ed7fbStbbdev }
147d86ed7fbStbbdev
operator ()(const oneapi::tbb::blocked_range<std::size_t> & r) const148d86ed7fbStbbdev void operator()(const oneapi::tbb::blocked_range<std::size_t>& r) const {
149d86ed7fbStbbdev int begin = (int)r.begin(); //! capture lower range number for this chunk
150d86ed7fbStbbdev int end = (int)r.end(); //! capture upper range number for this chunk
151d86ed7fbStbbdev UpdateState(m_source, m_dest, begin, end);
152d86ed7fbStbbdev }
153d86ed7fbStbbdev
tbb_parallel_task()154d86ed7fbStbbdev tbb_parallel_task() {}
155d86ed7fbStbbdev
156d86ed7fbStbbdev private:
157d86ed7fbStbbdev static Matrix* m_source;
158d86ed7fbStbbdev static char* m_dest;
159d86ed7fbStbbdev };
160d86ed7fbStbbdev
161d86ed7fbStbbdev Matrix* tbb_parallel_task::m_source;
162d86ed7fbStbbdev char* tbb_parallel_task::m_dest;
163d86ed7fbStbbdev
164d86ed7fbStbbdev //! ParallelEvolution::Step() - override of Step method
Step()165d86ed7fbStbbdev void ParallelEvolution::Step() {
166d86ed7fbStbbdev std::size_t begin = 0; //! beginning cell position
167d86ed7fbStbbdev #ifdef USE_SSE
168d86ed7fbStbbdev std::size_t end = m_matrix->height; //! ending cell position
169d86ed7fbStbbdev #else
170d86ed7fbStbbdev std::size_t end = m_size - 1; //! ending cell position
171d86ed7fbStbbdev #endif
172d86ed7fbStbbdev
173d86ed7fbStbbdev //! set matrix pointers
174d86ed7fbStbbdev tbb_parallel_task::set_values(m_matrix, m_dest);
175d86ed7fbStbbdev
176d86ed7fbStbbdev //! do calculation loop
177d86ed7fbStbbdev parallel_for(oneapi::tbb::blocked_range<std::size_t>(begin, end, GRAIN_SIZE),
178d86ed7fbStbbdev tbb_parallel_task());
179d86ed7fbStbbdev UpdateMatrix();
180d86ed7fbStbbdev }
181