1d86ed7fbStbbdev /*
2*b15aabb3Stbbdev     Copyright (c) 2005-2021 Intel Corporation
3d86ed7fbStbbdev 
4d86ed7fbStbbdev     Licensed under the Apache License, Version 2.0 (the "License");
5d86ed7fbStbbdev     you may not use this file except in compliance with the License.
6d86ed7fbStbbdev     You may obtain a copy of the License at
7d86ed7fbStbbdev 
8d86ed7fbStbbdev         http://www.apache.org/licenses/LICENSE-2.0
9d86ed7fbStbbdev 
10d86ed7fbStbbdev     Unless required by applicable law or agreed to in writing, software
11d86ed7fbStbbdev     distributed under the License is distributed on an "AS IS" BASIS,
12d86ed7fbStbbdev     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13d86ed7fbStbbdev     See the License for the specific language governing permissions and
14d86ed7fbStbbdev     limitations under the License.
15d86ed7fbStbbdev */
16d86ed7fbStbbdev 
17d86ed7fbStbbdev #include "Evolution.hpp"
18d86ed7fbStbbdev 
19d86ed7fbStbbdev #ifdef USE_SSE
20d86ed7fbStbbdev /* Update states with SSE */
21d86ed7fbStbbdev 
22d86ed7fbStbbdev #include <xmmintrin.h>
23d86ed7fbStbbdev #include <emmintrin.h>
24d86ed7fbStbbdev 
create_record(char * src,unsigned * dst,unsigned width)25d86ed7fbStbbdev inline void create_record(char* src, unsigned* dst, unsigned width) {
26d86ed7fbStbbdev     dst[0] |= src[width - 1];
27d86ed7fbStbbdev     for (unsigned i = 0; i < 31u; ++i)
28d86ed7fbStbbdev         dst[0] |= src[i] << (i + 1);
29d86ed7fbStbbdev     unsigned col;
30d86ed7fbStbbdev     for (unsigned col = 31u; col < width; ++col)
31d86ed7fbStbbdev         dst[(col + 1) / 32u] |= src[col] << ((col + 1) % 32u);
32d86ed7fbStbbdev     dst[(col + 1) / 32u] |= src[0] << ((col + 1) % 32u);
33d86ed7fbStbbdev }
34d86ed7fbStbbdev 
sum_offset(__m128i * X,__m128i * A,__m128i * B,__m128i * C,unsigned size_sse_ar,unsigned shift)35d86ed7fbStbbdev inline void sum_offset(__m128i* X,
36d86ed7fbStbbdev                        __m128i* A,
37d86ed7fbStbbdev                        __m128i* B,
38d86ed7fbStbbdev                        __m128i* C,
39d86ed7fbStbbdev                        unsigned size_sse_ar,
40d86ed7fbStbbdev                        unsigned shift) {
41d86ed7fbStbbdev     for (unsigned i = 0; i < size_sse_ar; ++i) {
42d86ed7fbStbbdev         __m128i tmp = _mm_and_si128(A[i], X[shift + i]);
43d86ed7fbStbbdev         A[i] = _mm_xor_si128(A[i], X[shift + i]);
44d86ed7fbStbbdev         C[i] = _mm_or_si128(C[i], _mm_and_si128(B[i], tmp));
45d86ed7fbStbbdev         B[i] = _mm_xor_si128(B[i], tmp);
46d86ed7fbStbbdev     }
47d86ed7fbStbbdev }
48d86ed7fbStbbdev 
shift_left2D(__m128i * X,unsigned height,unsigned size_sse_row)49d86ed7fbStbbdev inline void shift_left2D(__m128i* X, unsigned height, unsigned size_sse_row) {
50d86ed7fbStbbdev     for (unsigned row = 0; row < height; ++row) {
51d86ed7fbStbbdev         unsigned ind = row * size_sse_row;
52d86ed7fbStbbdev         unsigned x0 = X[ind].m128i_u32[0] & 1;
53d86ed7fbStbbdev 
54d86ed7fbStbbdev         X[ind] =
55d86ed7fbStbbdev             _mm_or_si128(_mm_srli_epi16(X[ind], 1), _mm_slli_epi16(_mm_srli_si128(X[ind], 2), 15));
56d86ed7fbStbbdev 
57d86ed7fbStbbdev         unsigned x1 = X[ind + 1].m128i_u32[0] & 1;
58d86ed7fbStbbdev         X[ind + 1] = _mm_or_si128(_mm_srli_epi16(X[ind + 1], 1),
59d86ed7fbStbbdev                                   _mm_slli_epi16(_mm_srli_si128(X[ind + 1], 2), 15));
60d86ed7fbStbbdev         X[ind].m128i_u32[3] |= x1 << 31;
61d86ed7fbStbbdev 
62d86ed7fbStbbdev         unsigned x2 = X[ind + 2].m128i_u32[0] & 1;
63d86ed7fbStbbdev         X[ind + 2] = _mm_or_si128(_mm_srli_epi16(X[ind + 2], 1),
64d86ed7fbStbbdev                                   _mm_slli_epi16(_mm_srli_si128(X[ind + 2], 2), 15));
65d86ed7fbStbbdev         X[ind + 1].m128i_u32[3] |= x2 << 31;
66d86ed7fbStbbdev 
67d86ed7fbStbbdev         unsigned* dst = (unsigned*)&X[ind];
68d86ed7fbStbbdev         dst[301 / 32u] |= x0 << (301 % 32u);
69d86ed7fbStbbdev     }
70d86ed7fbStbbdev }
71d86ed7fbStbbdev 
shift_right2D(__m128i * X,unsigned height,unsigned size_sse_row)72d86ed7fbStbbdev inline void shift_right2D(__m128i* X, unsigned height, unsigned size_sse_row) {
73d86ed7fbStbbdev     for (unsigned row = 0; row < height; ++row) {
74d86ed7fbStbbdev         unsigned ind = row * size_sse_row;
75d86ed7fbStbbdev 
76d86ed7fbStbbdev         unsigned x0 = X[ind].m128i_u32[3];
77d86ed7fbStbbdev         x0 >>= 31;
78d86ed7fbStbbdev         X[ind] =
79d86ed7fbStbbdev             _mm_or_si128(_mm_slli_epi16(X[ind], 1), _mm_srli_epi16(_mm_slli_si128(X[ind], 2), 15));
80d86ed7fbStbbdev 
81d86ed7fbStbbdev         unsigned x1 = X[ind + 1].m128i_u32[3];
82d86ed7fbStbbdev         x1 >>= 31;
83d86ed7fbStbbdev         X[ind + 1] = _mm_or_si128(_mm_slli_epi16(X[ind + 1], 1),
84d86ed7fbStbbdev                                   _mm_srli_epi16(_mm_slli_si128(X[ind + 1], 2), 15));
85d86ed7fbStbbdev         X[ind + 1].m128i_u32[0] |= x0;
86d86ed7fbStbbdev 
87d86ed7fbStbbdev         unsigned* dst = (unsigned*)&X[ind];
88d86ed7fbStbbdev         unsigned x2 = dst[301 / 32u] & (1 << (301 % 32u));
89d86ed7fbStbbdev         x2 >>= (301 % 32u);
90d86ed7fbStbbdev         X[ind + 2] = _mm_or_si128(_mm_slli_epi16(X[ind + 2], 1),
91d86ed7fbStbbdev                                   _mm_srli_epi16(_mm_slli_si128(X[ind + 2], 2), 15));
92d86ed7fbStbbdev         X[ind + 2].m128i_u32[0] |= x1;
93d86ed7fbStbbdev         X[ind].m128i_u32[0] |= x2;
94d86ed7fbStbbdev     }
95d86ed7fbStbbdev }
96d86ed7fbStbbdev 
UpdateState(Matrix * m_matrix,char * dest,int begin,int end)97d86ed7fbStbbdev void UpdateState(Matrix* m_matrix, char* dest, int begin, int end) {
98d86ed7fbStbbdev     //300/128 + 1 =3, 3*300=900
99d86ed7fbStbbdev     unsigned size_sse_row = m_matrix->width / 128 + 1; //3
100d86ed7fbStbbdev     unsigned size_sse_ar = size_sse_row * (end - begin);
101d86ed7fbStbbdev     __m128i X[906], A[900], B[900], C[900];
102d86ed7fbStbbdev     char* mas = m_matrix->data;
103d86ed7fbStbbdev 
104d86ed7fbStbbdev     for (unsigned i = 0; i < size_sse_ar; ++i) {
105d86ed7fbStbbdev         A[i].m128i_u32[0] = 0;
106d86ed7fbStbbdev         A[i].m128i_u32[1] = 0;
107d86ed7fbStbbdev         A[i].m128i_u32[2] = 0;
108d86ed7fbStbbdev         A[i].m128i_u32[3] = 0;
109d86ed7fbStbbdev         B[i].m128i_u32[0] = 0;
110d86ed7fbStbbdev         B[i].m128i_u32[1] = 0;
111d86ed7fbStbbdev         B[i].m128i_u32[2] = 0;
112d86ed7fbStbbdev         B[i].m128i_u32[3] = 0;
113d86ed7fbStbbdev         C[i].m128i_u32[0] = 0;
114d86ed7fbStbbdev         C[i].m128i_u32[1] = 0;
115d86ed7fbStbbdev         C[i].m128i_u32[2] = 0;
116d86ed7fbStbbdev         C[i].m128i_u32[3] = 0;
117d86ed7fbStbbdev     }
118d86ed7fbStbbdev 
119d86ed7fbStbbdev     for (unsigned i = 0; i < size_sse_ar + 6; ++i) {
120d86ed7fbStbbdev         X[i].m128i_u32[0] = 0;
121d86ed7fbStbbdev         X[i].m128i_u32[1] = 0;
122d86ed7fbStbbdev         X[i].m128i_u32[2] = 0;
123d86ed7fbStbbdev         X[i].m128i_u32[3] = 0;
124d86ed7fbStbbdev     }
125d86ed7fbStbbdev 
126d86ed7fbStbbdev     // create X[] with bounds
127d86ed7fbStbbdev     unsigned height = end - begin;
128d86ed7fbStbbdev     unsigned width = m_matrix->width;
129d86ed7fbStbbdev     for (unsigned row = 0; row < height; ++row) {
130d86ed7fbStbbdev         char* src = &mas[(row + begin) * width];
131d86ed7fbStbbdev         unsigned* dst = (unsigned*)&X[(row + 1) * size_sse_row];
132d86ed7fbStbbdev         create_record(src, dst, width);
133d86ed7fbStbbdev     }
134d86ed7fbStbbdev     // create high row in X[]
135d86ed7fbStbbdev     char* src;
136d86ed7fbStbbdev     if (begin == 0) {
137d86ed7fbStbbdev         src = &mas[(m_matrix->height - 1) * width];
138d86ed7fbStbbdev     }
139d86ed7fbStbbdev     else {
140d86ed7fbStbbdev         src = &mas[(begin - 1) * width];
141d86ed7fbStbbdev     }
142d86ed7fbStbbdev     unsigned* dst = (unsigned*)X;
143d86ed7fbStbbdev     create_record(src, dst, width);
144d86ed7fbStbbdev 
145d86ed7fbStbbdev     //create lower row in X[]
146d86ed7fbStbbdev     if (end == m_matrix->height) {
147d86ed7fbStbbdev         src = mas;
148d86ed7fbStbbdev     }
149d86ed7fbStbbdev     else {
150d86ed7fbStbbdev         src = &mas[end * width];
151d86ed7fbStbbdev     }
152d86ed7fbStbbdev     dst = (unsigned*)&X[(height + 1) * size_sse_row];
153d86ed7fbStbbdev     create_record(src, dst, width);
154d86ed7fbStbbdev 
155d86ed7fbStbbdev     //sum( C, B, A, X+offset_for_upwards ); high-left friend
156d86ed7fbStbbdev     sum_offset(X, A, B, C, size_sse_ar, 0);
157d86ed7fbStbbdev 
158d86ed7fbStbbdev     //sum( C, B, A, X+offset_for_no_vertical_shift );
159d86ed7fbStbbdev     sum_offset(X, A, B, C, size_sse_ar, size_sse_row);
160d86ed7fbStbbdev 
161d86ed7fbStbbdev     //sum( C, B, A, X+offset_for_downwards );
162d86ed7fbStbbdev     sum_offset(X, A, B, C, size_sse_ar, 2 * size_sse_row);
163d86ed7fbStbbdev 
164d86ed7fbStbbdev     //shift_left( X ); (when view 2D) in our logic it is in right
165d86ed7fbStbbdev     height = end - begin + 2;
166d86ed7fbStbbdev     shift_left2D(X, height, size_sse_row);
167d86ed7fbStbbdev 
168d86ed7fbStbbdev     //sum( C, B, A, X+offset_for_upwards ); high-left friend
169d86ed7fbStbbdev     sum_offset(X, A, B, C, size_sse_ar, 0);
170d86ed7fbStbbdev 
171d86ed7fbStbbdev     //sum( C, B, A, X+offset_for_downwards );
172d86ed7fbStbbdev     sum_offset(X, A, B, C, size_sse_ar, 2 * size_sse_row);
173d86ed7fbStbbdev 
174d86ed7fbStbbdev     //shift_left( X ); (view in 2D) in our logic it is right shift
175d86ed7fbStbbdev     height = end - begin + 2;
176d86ed7fbStbbdev     shift_left2D(X, height, size_sse_row);
177d86ed7fbStbbdev 
178d86ed7fbStbbdev     //sum( C, B, A, X+offset_for_upwards ); high-right friend
179d86ed7fbStbbdev     sum_offset(X, A, B, C, size_sse_ar, 0);
180d86ed7fbStbbdev 
181d86ed7fbStbbdev     //sum( C, B, A, X+offset_for_no_vertical_shift ); right friend
182d86ed7fbStbbdev     sum_offset(X, A, B, C, size_sse_ar, size_sse_row);
183d86ed7fbStbbdev 
184d86ed7fbStbbdev     //sum( C, B, A, X+offset_for_downwards ); right down friend
185d86ed7fbStbbdev     sum_offset(X, A, B, C, size_sse_ar, 2 * size_sse_row);
186d86ed7fbStbbdev 
187d86ed7fbStbbdev     //shift_right( X ); (when view in 2D) in our case it left shift.
188d86ed7fbStbbdev     height = end - begin + 2;
189d86ed7fbStbbdev     shift_right2D(X, height, size_sse_row);
190d86ed7fbStbbdev 
191d86ed7fbStbbdev     //X = (X|A)&B&~C (done bitwise over the arrays)
192d86ed7fbStbbdev     unsigned shift = size_sse_row;
193d86ed7fbStbbdev     for (unsigned i = 0; i < size_sse_ar; ++i) {
194d86ed7fbStbbdev         C[i].m128i_u32[0] = ~C[i].m128i_u32[0];
195d86ed7fbStbbdev         C[i].m128i_u32[1] = ~C[i].m128i_u32[1];
196d86ed7fbStbbdev         C[i].m128i_u32[2] = ~C[i].m128i_u32[2];
197d86ed7fbStbbdev         C[i].m128i_u32[3] = ~C[i].m128i_u32[3];
198d86ed7fbStbbdev         X[shift + i] = _mm_and_si128(_mm_and_si128(_mm_or_si128(X[shift + i], A[i]), B[i]), C[i]);
199d86ed7fbStbbdev     }
200d86ed7fbStbbdev 
201d86ed7fbStbbdev     height = end - begin;
202d86ed7fbStbbdev     width = m_matrix->width;
203d86ed7fbStbbdev     for (unsigned row = 0; row < height; ++row) {
204d86ed7fbStbbdev         char* dst = &dest[(row + begin) * width];
205d86ed7fbStbbdev         unsigned* src = (unsigned*)&X[(row + 1) * size_sse_row];
206d86ed7fbStbbdev         for (unsigned col = 0; col < width; ++col) {
207d86ed7fbStbbdev             unsigned c = src[col / 32u] & 1 << (col % 32u);
208d86ed7fbStbbdev             dst[col] = c >> (col % 32u);
209d86ed7fbStbbdev         }
210d86ed7fbStbbdev     }
211d86ed7fbStbbdev }
212d86ed7fbStbbdev #else
213d86ed7fbStbbdev /* end SSE block */
214d86ed7fbStbbdev 
215d86ed7fbStbbdev // ----------------------------------------------------------------------
216d86ed7fbStbbdev // GetAdjacentCellState() - returns the state (value) of the specified
217d86ed7fbStbbdev // adjacent cell of the current cell "cellNumber"
GetAdjacentCellState(char * source,int x,int y,int cellNumber,int cp)218d86ed7fbStbbdev char GetAdjacentCellState(char* source, // pointer to source data block
219d86ed7fbStbbdev                           int x, // logical width of field
220d86ed7fbStbbdev                           int y, // logical height of field
221d86ed7fbStbbdev                           int cellNumber, // number of cell position to examine
222d86ed7fbStbbdev                           int cp // which adjacent position
223d86ed7fbStbbdev ) {
224d86ed7fbStbbdev     /*
225d86ed7fbStbbdev cp
226d86ed7fbStbbdev *-- cp=1 ... --- cp=8 (summary: -1-2-3-
227d86ed7fbStbbdev -x-          -x-                -4-x-5-
228d86ed7fbStbbdev ---          --*                -6-7-8- )
229d86ed7fbStbbdev */
230d86ed7fbStbbdev     char cellState = 0; // return value
231d86ed7fbStbbdev 
232d86ed7fbStbbdev     // set up boundary flags to trigger field-wrap logic
233d86ed7fbStbbdev     bool onTopRow = false;
234d86ed7fbStbbdev     bool onBottomRow = false;
235d86ed7fbStbbdev     bool onLeftColumn = false;
236d86ed7fbStbbdev     bool onRightColumn = false;
237d86ed7fbStbbdev 
238d86ed7fbStbbdev     // check to see if cell is on top row
239d86ed7fbStbbdev     if (cellNumber < x) {
240d86ed7fbStbbdev         onTopRow = true;
241d86ed7fbStbbdev     }
242d86ed7fbStbbdev     // check to see if cell is on bottom row
243d86ed7fbStbbdev     if ((x * y) - cellNumber <= x) {
244d86ed7fbStbbdev         onBottomRow = true;
245d86ed7fbStbbdev     }
246d86ed7fbStbbdev     // check to see if cell is on left column
247d86ed7fbStbbdev     if (cellNumber % x == 0) {
248d86ed7fbStbbdev         onLeftColumn = true;
249d86ed7fbStbbdev     }
250d86ed7fbStbbdev     // check to see if cell is on right column
251d86ed7fbStbbdev     if ((cellNumber + 1) % x == 0) {
252d86ed7fbStbbdev         onRightColumn = true;
253d86ed7fbStbbdev     }
254d86ed7fbStbbdev 
255d86ed7fbStbbdev     switch (cp) {
256d86ed7fbStbbdev         case 1:
257d86ed7fbStbbdev             if (onTopRow && onLeftColumn) {
258d86ed7fbStbbdev                 return *(source + ((x * y) - 1));
259d86ed7fbStbbdev             }
260d86ed7fbStbbdev             if (onTopRow && !onLeftColumn) {
261d86ed7fbStbbdev                 return *(source + (((x * y) - x) + (cellNumber - 1)));
262d86ed7fbStbbdev             }
263d86ed7fbStbbdev             if (onLeftColumn && !onTopRow) {
264d86ed7fbStbbdev                 return *(source + (cellNumber - 1));
265d86ed7fbStbbdev             }
266d86ed7fbStbbdev             return *((source + cellNumber) - (x + 1));
267d86ed7fbStbbdev 
268d86ed7fbStbbdev         case 2:
269d86ed7fbStbbdev             if (onTopRow) {
270d86ed7fbStbbdev                 return *(source + (((x * y) - x) + cellNumber));
271d86ed7fbStbbdev             }
272d86ed7fbStbbdev             return *((source + cellNumber) - x);
273d86ed7fbStbbdev 
274d86ed7fbStbbdev         case 3:
275d86ed7fbStbbdev             if (onTopRow && onRightColumn) {
276d86ed7fbStbbdev                 return *(source + ((x * y) - x));
277d86ed7fbStbbdev             }
278d86ed7fbStbbdev             if (onTopRow && !onRightColumn) {
279d86ed7fbStbbdev                 return *(source + (((x * y) - x) + (cellNumber + 1)));
280d86ed7fbStbbdev             }
281d86ed7fbStbbdev             if (onRightColumn && !onTopRow) {
282d86ed7fbStbbdev                 return *(source + ((cellNumber - (x * 2)) + 1));
283d86ed7fbStbbdev             }
284d86ed7fbStbbdev             return *(source + (cellNumber - (x - 1)));
285d86ed7fbStbbdev 
286d86ed7fbStbbdev         case 4:
287d86ed7fbStbbdev             if (onRightColumn) {
288d86ed7fbStbbdev                 return *(source + (cellNumber - (x - 1)));
289d86ed7fbStbbdev             }
290d86ed7fbStbbdev             return *(source + (cellNumber + 1));
291d86ed7fbStbbdev 
292d86ed7fbStbbdev         case 5:
293d86ed7fbStbbdev             if (onBottomRow && onRightColumn) {
294d86ed7fbStbbdev                 return *source;
295d86ed7fbStbbdev             }
296d86ed7fbStbbdev             if (onBottomRow && !onRightColumn) {
297d86ed7fbStbbdev                 return *(source + ((cellNumber - ((x * y) - x)) + 1));
298d86ed7fbStbbdev             }
299d86ed7fbStbbdev             if (onRightColumn && !onBottomRow) {
300d86ed7fbStbbdev                 return *(source + (cellNumber + 1));
301d86ed7fbStbbdev             }
302d86ed7fbStbbdev             return *(source + (((cellNumber + x)) + 1));
303d86ed7fbStbbdev 
304d86ed7fbStbbdev         case 6:
305d86ed7fbStbbdev             if (onBottomRow) {
306d86ed7fbStbbdev                 return *(source + (cellNumber - ((x * y) - x)));
307d86ed7fbStbbdev             }
308d86ed7fbStbbdev             return *(source + (cellNumber + x));
309d86ed7fbStbbdev 
310d86ed7fbStbbdev         case 7:
311d86ed7fbStbbdev             if (onBottomRow && onLeftColumn) {
312d86ed7fbStbbdev                 return *(source + (x - 1));
313d86ed7fbStbbdev             }
314d86ed7fbStbbdev             if (onBottomRow && !onLeftColumn) {
315d86ed7fbStbbdev                 return *(source + (cellNumber - ((x * y) - x) - 1));
316d86ed7fbStbbdev             }
317d86ed7fbStbbdev             if (onLeftColumn && !onBottomRow) {
318d86ed7fbStbbdev                 return *(source + (cellNumber + ((x * 2) - 1)));
319d86ed7fbStbbdev             }
320d86ed7fbStbbdev             return *(source + (cellNumber + (x - 1)));
321d86ed7fbStbbdev 
322d86ed7fbStbbdev         case 8:
323d86ed7fbStbbdev             if (onLeftColumn) {
324d86ed7fbStbbdev                 return *(source + (cellNumber + (x - 1)));
325d86ed7fbStbbdev             }
326d86ed7fbStbbdev             return *(source + (cellNumber - 1));
327d86ed7fbStbbdev     }
328d86ed7fbStbbdev     return cellState;
329d86ed7fbStbbdev }
330d86ed7fbStbbdev 
CheckCell(Matrix * m_matrix,int cellNumber)331d86ed7fbStbbdev char CheckCell(Matrix* m_matrix, int cellNumber) {
332d86ed7fbStbbdev     char total = 0;
333d86ed7fbStbbdev     char* source = m_matrix->data;
334d86ed7fbStbbdev     //look around to find cell's with status "alive"
335d86ed7fbStbbdev     for (int i = 1; i < 9; i++) {
336d86ed7fbStbbdev         total += GetAdjacentCellState(source, m_matrix->width, m_matrix->height, cellNumber, i);
337d86ed7fbStbbdev     }
338d86ed7fbStbbdev     // if the number of adjacent live cells is < 2 or > 3, the result is a dead
339d86ed7fbStbbdev     // cell regardless of its current state. (A live cell dies of loneliness if it
340d86ed7fbStbbdev     // has less than 2 neighbors, and of overcrowding if it has more than 3; a new
341d86ed7fbStbbdev     // cell is born in an empty spot only if it has exactly 3 neighbors.
342d86ed7fbStbbdev     if (total < 2 || total > 3) {
343d86ed7fbStbbdev         return 0;
344d86ed7fbStbbdev     }
345d86ed7fbStbbdev 
346d86ed7fbStbbdev     // if we get here and the cell position holds a living cell, it stays alive
347d86ed7fbStbbdev     if (*(source + cellNumber)) {
348d86ed7fbStbbdev         return 1;
349d86ed7fbStbbdev     }
350d86ed7fbStbbdev 
351d86ed7fbStbbdev     // we have an empty position. If there are only 2 neighbors, the position stays
352d86ed7fbStbbdev     // empty.
353d86ed7fbStbbdev     if (total == 2) {
354d86ed7fbStbbdev         return 0;
355d86ed7fbStbbdev     }
356d86ed7fbStbbdev 
357d86ed7fbStbbdev     // we have an empty position and exactly 3 neighbors. A cell is born.
358d86ed7fbStbbdev     return 1;
359d86ed7fbStbbdev }
360d86ed7fbStbbdev 
UpdateState(Matrix * m_matrix,char * dest,int begin,int end)361d86ed7fbStbbdev void UpdateState(Matrix* m_matrix, char* dest, int begin, int end) {
362d86ed7fbStbbdev     for (int i = begin; i <= end; i++) {
363d86ed7fbStbbdev         *(dest + i) = CheckCell(m_matrix, i);
364d86ed7fbStbbdev     }
365d86ed7fbStbbdev }
366d86ed7fbStbbdev 
367d86ed7fbStbbdev #endif
368d86ed7fbStbbdev /* end non-SSE block */
369