1d86ed7fbStbbdev /*
2*b15aabb3Stbbdev Copyright (c) 2005-2021 Intel Corporation
3d86ed7fbStbbdev
4d86ed7fbStbbdev Licensed under the Apache License, Version 2.0 (the "License");
5d86ed7fbStbbdev you may not use this file except in compliance with the License.
6d86ed7fbStbbdev You may obtain a copy of the License at
7d86ed7fbStbbdev
8d86ed7fbStbbdev http://www.apache.org/licenses/LICENSE-2.0
9d86ed7fbStbbdev
10d86ed7fbStbbdev Unless required by applicable law or agreed to in writing, software
11d86ed7fbStbbdev distributed under the License is distributed on an "AS IS" BASIS,
12d86ed7fbStbbdev WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13d86ed7fbStbbdev See the License for the specific language governing permissions and
14d86ed7fbStbbdev limitations under the License.
15d86ed7fbStbbdev */
16d86ed7fbStbbdev
17d86ed7fbStbbdev #include "Evolution.hpp"
18d86ed7fbStbbdev
19d86ed7fbStbbdev #ifdef USE_SSE
20d86ed7fbStbbdev /* Update states with SSE */
21d86ed7fbStbbdev
22d86ed7fbStbbdev #include <xmmintrin.h>
23d86ed7fbStbbdev #include <emmintrin.h>
24d86ed7fbStbbdev
create_record(char * src,unsigned * dst,unsigned width)25d86ed7fbStbbdev inline void create_record(char* src, unsigned* dst, unsigned width) {
26d86ed7fbStbbdev dst[0] |= src[width - 1];
27d86ed7fbStbbdev for (unsigned i = 0; i < 31u; ++i)
28d86ed7fbStbbdev dst[0] |= src[i] << (i + 1);
29d86ed7fbStbbdev unsigned col;
30d86ed7fbStbbdev for (unsigned col = 31u; col < width; ++col)
31d86ed7fbStbbdev dst[(col + 1) / 32u] |= src[col] << ((col + 1) % 32u);
32d86ed7fbStbbdev dst[(col + 1) / 32u] |= src[0] << ((col + 1) % 32u);
33d86ed7fbStbbdev }
34d86ed7fbStbbdev
sum_offset(__m128i * X,__m128i * A,__m128i * B,__m128i * C,unsigned size_sse_ar,unsigned shift)35d86ed7fbStbbdev inline void sum_offset(__m128i* X,
36d86ed7fbStbbdev __m128i* A,
37d86ed7fbStbbdev __m128i* B,
38d86ed7fbStbbdev __m128i* C,
39d86ed7fbStbbdev unsigned size_sse_ar,
40d86ed7fbStbbdev unsigned shift) {
41d86ed7fbStbbdev for (unsigned i = 0; i < size_sse_ar; ++i) {
42d86ed7fbStbbdev __m128i tmp = _mm_and_si128(A[i], X[shift + i]);
43d86ed7fbStbbdev A[i] = _mm_xor_si128(A[i], X[shift + i]);
44d86ed7fbStbbdev C[i] = _mm_or_si128(C[i], _mm_and_si128(B[i], tmp));
45d86ed7fbStbbdev B[i] = _mm_xor_si128(B[i], tmp);
46d86ed7fbStbbdev }
47d86ed7fbStbbdev }
48d86ed7fbStbbdev
shift_left2D(__m128i * X,unsigned height,unsigned size_sse_row)49d86ed7fbStbbdev inline void shift_left2D(__m128i* X, unsigned height, unsigned size_sse_row) {
50d86ed7fbStbbdev for (unsigned row = 0; row < height; ++row) {
51d86ed7fbStbbdev unsigned ind = row * size_sse_row;
52d86ed7fbStbbdev unsigned x0 = X[ind].m128i_u32[0] & 1;
53d86ed7fbStbbdev
54d86ed7fbStbbdev X[ind] =
55d86ed7fbStbbdev _mm_or_si128(_mm_srli_epi16(X[ind], 1), _mm_slli_epi16(_mm_srli_si128(X[ind], 2), 15));
56d86ed7fbStbbdev
57d86ed7fbStbbdev unsigned x1 = X[ind + 1].m128i_u32[0] & 1;
58d86ed7fbStbbdev X[ind + 1] = _mm_or_si128(_mm_srli_epi16(X[ind + 1], 1),
59d86ed7fbStbbdev _mm_slli_epi16(_mm_srli_si128(X[ind + 1], 2), 15));
60d86ed7fbStbbdev X[ind].m128i_u32[3] |= x1 << 31;
61d86ed7fbStbbdev
62d86ed7fbStbbdev unsigned x2 = X[ind + 2].m128i_u32[0] & 1;
63d86ed7fbStbbdev X[ind + 2] = _mm_or_si128(_mm_srli_epi16(X[ind + 2], 1),
64d86ed7fbStbbdev _mm_slli_epi16(_mm_srli_si128(X[ind + 2], 2), 15));
65d86ed7fbStbbdev X[ind + 1].m128i_u32[3] |= x2 << 31;
66d86ed7fbStbbdev
67d86ed7fbStbbdev unsigned* dst = (unsigned*)&X[ind];
68d86ed7fbStbbdev dst[301 / 32u] |= x0 << (301 % 32u);
69d86ed7fbStbbdev }
70d86ed7fbStbbdev }
71d86ed7fbStbbdev
shift_right2D(__m128i * X,unsigned height,unsigned size_sse_row)72d86ed7fbStbbdev inline void shift_right2D(__m128i* X, unsigned height, unsigned size_sse_row) {
73d86ed7fbStbbdev for (unsigned row = 0; row < height; ++row) {
74d86ed7fbStbbdev unsigned ind = row * size_sse_row;
75d86ed7fbStbbdev
76d86ed7fbStbbdev unsigned x0 = X[ind].m128i_u32[3];
77d86ed7fbStbbdev x0 >>= 31;
78d86ed7fbStbbdev X[ind] =
79d86ed7fbStbbdev _mm_or_si128(_mm_slli_epi16(X[ind], 1), _mm_srli_epi16(_mm_slli_si128(X[ind], 2), 15));
80d86ed7fbStbbdev
81d86ed7fbStbbdev unsigned x1 = X[ind + 1].m128i_u32[3];
82d86ed7fbStbbdev x1 >>= 31;
83d86ed7fbStbbdev X[ind + 1] = _mm_or_si128(_mm_slli_epi16(X[ind + 1], 1),
84d86ed7fbStbbdev _mm_srli_epi16(_mm_slli_si128(X[ind + 1], 2), 15));
85d86ed7fbStbbdev X[ind + 1].m128i_u32[0] |= x0;
86d86ed7fbStbbdev
87d86ed7fbStbbdev unsigned* dst = (unsigned*)&X[ind];
88d86ed7fbStbbdev unsigned x2 = dst[301 / 32u] & (1 << (301 % 32u));
89d86ed7fbStbbdev x2 >>= (301 % 32u);
90d86ed7fbStbbdev X[ind + 2] = _mm_or_si128(_mm_slli_epi16(X[ind + 2], 1),
91d86ed7fbStbbdev _mm_srli_epi16(_mm_slli_si128(X[ind + 2], 2), 15));
92d86ed7fbStbbdev X[ind + 2].m128i_u32[0] |= x1;
93d86ed7fbStbbdev X[ind].m128i_u32[0] |= x2;
94d86ed7fbStbbdev }
95d86ed7fbStbbdev }
96d86ed7fbStbbdev
UpdateState(Matrix * m_matrix,char * dest,int begin,int end)97d86ed7fbStbbdev void UpdateState(Matrix* m_matrix, char* dest, int begin, int end) {
98d86ed7fbStbbdev //300/128 + 1 =3, 3*300=900
99d86ed7fbStbbdev unsigned size_sse_row = m_matrix->width / 128 + 1; //3
100d86ed7fbStbbdev unsigned size_sse_ar = size_sse_row * (end - begin);
101d86ed7fbStbbdev __m128i X[906], A[900], B[900], C[900];
102d86ed7fbStbbdev char* mas = m_matrix->data;
103d86ed7fbStbbdev
104d86ed7fbStbbdev for (unsigned i = 0; i < size_sse_ar; ++i) {
105d86ed7fbStbbdev A[i].m128i_u32[0] = 0;
106d86ed7fbStbbdev A[i].m128i_u32[1] = 0;
107d86ed7fbStbbdev A[i].m128i_u32[2] = 0;
108d86ed7fbStbbdev A[i].m128i_u32[3] = 0;
109d86ed7fbStbbdev B[i].m128i_u32[0] = 0;
110d86ed7fbStbbdev B[i].m128i_u32[1] = 0;
111d86ed7fbStbbdev B[i].m128i_u32[2] = 0;
112d86ed7fbStbbdev B[i].m128i_u32[3] = 0;
113d86ed7fbStbbdev C[i].m128i_u32[0] = 0;
114d86ed7fbStbbdev C[i].m128i_u32[1] = 0;
115d86ed7fbStbbdev C[i].m128i_u32[2] = 0;
116d86ed7fbStbbdev C[i].m128i_u32[3] = 0;
117d86ed7fbStbbdev }
118d86ed7fbStbbdev
119d86ed7fbStbbdev for (unsigned i = 0; i < size_sse_ar + 6; ++i) {
120d86ed7fbStbbdev X[i].m128i_u32[0] = 0;
121d86ed7fbStbbdev X[i].m128i_u32[1] = 0;
122d86ed7fbStbbdev X[i].m128i_u32[2] = 0;
123d86ed7fbStbbdev X[i].m128i_u32[3] = 0;
124d86ed7fbStbbdev }
125d86ed7fbStbbdev
126d86ed7fbStbbdev // create X[] with bounds
127d86ed7fbStbbdev unsigned height = end - begin;
128d86ed7fbStbbdev unsigned width = m_matrix->width;
129d86ed7fbStbbdev for (unsigned row = 0; row < height; ++row) {
130d86ed7fbStbbdev char* src = &mas[(row + begin) * width];
131d86ed7fbStbbdev unsigned* dst = (unsigned*)&X[(row + 1) * size_sse_row];
132d86ed7fbStbbdev create_record(src, dst, width);
133d86ed7fbStbbdev }
134d86ed7fbStbbdev // create high row in X[]
135d86ed7fbStbbdev char* src;
136d86ed7fbStbbdev if (begin == 0) {
137d86ed7fbStbbdev src = &mas[(m_matrix->height - 1) * width];
138d86ed7fbStbbdev }
139d86ed7fbStbbdev else {
140d86ed7fbStbbdev src = &mas[(begin - 1) * width];
141d86ed7fbStbbdev }
142d86ed7fbStbbdev unsigned* dst = (unsigned*)X;
143d86ed7fbStbbdev create_record(src, dst, width);
144d86ed7fbStbbdev
145d86ed7fbStbbdev //create lower row in X[]
146d86ed7fbStbbdev if (end == m_matrix->height) {
147d86ed7fbStbbdev src = mas;
148d86ed7fbStbbdev }
149d86ed7fbStbbdev else {
150d86ed7fbStbbdev src = &mas[end * width];
151d86ed7fbStbbdev }
152d86ed7fbStbbdev dst = (unsigned*)&X[(height + 1) * size_sse_row];
153d86ed7fbStbbdev create_record(src, dst, width);
154d86ed7fbStbbdev
155d86ed7fbStbbdev //sum( C, B, A, X+offset_for_upwards ); high-left friend
156d86ed7fbStbbdev sum_offset(X, A, B, C, size_sse_ar, 0);
157d86ed7fbStbbdev
158d86ed7fbStbbdev //sum( C, B, A, X+offset_for_no_vertical_shift );
159d86ed7fbStbbdev sum_offset(X, A, B, C, size_sse_ar, size_sse_row);
160d86ed7fbStbbdev
161d86ed7fbStbbdev //sum( C, B, A, X+offset_for_downwards );
162d86ed7fbStbbdev sum_offset(X, A, B, C, size_sse_ar, 2 * size_sse_row);
163d86ed7fbStbbdev
164d86ed7fbStbbdev //shift_left( X ); (when view 2D) in our logic it is in right
165d86ed7fbStbbdev height = end - begin + 2;
166d86ed7fbStbbdev shift_left2D(X, height, size_sse_row);
167d86ed7fbStbbdev
168d86ed7fbStbbdev //sum( C, B, A, X+offset_for_upwards ); high-left friend
169d86ed7fbStbbdev sum_offset(X, A, B, C, size_sse_ar, 0);
170d86ed7fbStbbdev
171d86ed7fbStbbdev //sum( C, B, A, X+offset_for_downwards );
172d86ed7fbStbbdev sum_offset(X, A, B, C, size_sse_ar, 2 * size_sse_row);
173d86ed7fbStbbdev
174d86ed7fbStbbdev //shift_left( X ); (view in 2D) in our logic it is right shift
175d86ed7fbStbbdev height = end - begin + 2;
176d86ed7fbStbbdev shift_left2D(X, height, size_sse_row);
177d86ed7fbStbbdev
178d86ed7fbStbbdev //sum( C, B, A, X+offset_for_upwards ); high-right friend
179d86ed7fbStbbdev sum_offset(X, A, B, C, size_sse_ar, 0);
180d86ed7fbStbbdev
181d86ed7fbStbbdev //sum( C, B, A, X+offset_for_no_vertical_shift ); right friend
182d86ed7fbStbbdev sum_offset(X, A, B, C, size_sse_ar, size_sse_row);
183d86ed7fbStbbdev
184d86ed7fbStbbdev //sum( C, B, A, X+offset_for_downwards ); right down friend
185d86ed7fbStbbdev sum_offset(X, A, B, C, size_sse_ar, 2 * size_sse_row);
186d86ed7fbStbbdev
187d86ed7fbStbbdev //shift_right( X ); (when view in 2D) in our case it left shift.
188d86ed7fbStbbdev height = end - begin + 2;
189d86ed7fbStbbdev shift_right2D(X, height, size_sse_row);
190d86ed7fbStbbdev
191d86ed7fbStbbdev //X = (X|A)&B&~C (done bitwise over the arrays)
192d86ed7fbStbbdev unsigned shift = size_sse_row;
193d86ed7fbStbbdev for (unsigned i = 0; i < size_sse_ar; ++i) {
194d86ed7fbStbbdev C[i].m128i_u32[0] = ~C[i].m128i_u32[0];
195d86ed7fbStbbdev C[i].m128i_u32[1] = ~C[i].m128i_u32[1];
196d86ed7fbStbbdev C[i].m128i_u32[2] = ~C[i].m128i_u32[2];
197d86ed7fbStbbdev C[i].m128i_u32[3] = ~C[i].m128i_u32[3];
198d86ed7fbStbbdev X[shift + i] = _mm_and_si128(_mm_and_si128(_mm_or_si128(X[shift + i], A[i]), B[i]), C[i]);
199d86ed7fbStbbdev }
200d86ed7fbStbbdev
201d86ed7fbStbbdev height = end - begin;
202d86ed7fbStbbdev width = m_matrix->width;
203d86ed7fbStbbdev for (unsigned row = 0; row < height; ++row) {
204d86ed7fbStbbdev char* dst = &dest[(row + begin) * width];
205d86ed7fbStbbdev unsigned* src = (unsigned*)&X[(row + 1) * size_sse_row];
206d86ed7fbStbbdev for (unsigned col = 0; col < width; ++col) {
207d86ed7fbStbbdev unsigned c = src[col / 32u] & 1 << (col % 32u);
208d86ed7fbStbbdev dst[col] = c >> (col % 32u);
209d86ed7fbStbbdev }
210d86ed7fbStbbdev }
211d86ed7fbStbbdev }
212d86ed7fbStbbdev #else
213d86ed7fbStbbdev /* end SSE block */
214d86ed7fbStbbdev
215d86ed7fbStbbdev // ----------------------------------------------------------------------
216d86ed7fbStbbdev // GetAdjacentCellState() - returns the state (value) of the specified
217d86ed7fbStbbdev // adjacent cell of the current cell "cellNumber"
GetAdjacentCellState(char * source,int x,int y,int cellNumber,int cp)218d86ed7fbStbbdev char GetAdjacentCellState(char* source, // pointer to source data block
219d86ed7fbStbbdev int x, // logical width of field
220d86ed7fbStbbdev int y, // logical height of field
221d86ed7fbStbbdev int cellNumber, // number of cell position to examine
222d86ed7fbStbbdev int cp // which adjacent position
223d86ed7fbStbbdev ) {
224d86ed7fbStbbdev /*
225d86ed7fbStbbdev cp
226d86ed7fbStbbdev *-- cp=1 ... --- cp=8 (summary: -1-2-3-
227d86ed7fbStbbdev -x- -x- -4-x-5-
228d86ed7fbStbbdev --- --* -6-7-8- )
229d86ed7fbStbbdev */
230d86ed7fbStbbdev char cellState = 0; // return value
231d86ed7fbStbbdev
232d86ed7fbStbbdev // set up boundary flags to trigger field-wrap logic
233d86ed7fbStbbdev bool onTopRow = false;
234d86ed7fbStbbdev bool onBottomRow = false;
235d86ed7fbStbbdev bool onLeftColumn = false;
236d86ed7fbStbbdev bool onRightColumn = false;
237d86ed7fbStbbdev
238d86ed7fbStbbdev // check to see if cell is on top row
239d86ed7fbStbbdev if (cellNumber < x) {
240d86ed7fbStbbdev onTopRow = true;
241d86ed7fbStbbdev }
242d86ed7fbStbbdev // check to see if cell is on bottom row
243d86ed7fbStbbdev if ((x * y) - cellNumber <= x) {
244d86ed7fbStbbdev onBottomRow = true;
245d86ed7fbStbbdev }
246d86ed7fbStbbdev // check to see if cell is on left column
247d86ed7fbStbbdev if (cellNumber % x == 0) {
248d86ed7fbStbbdev onLeftColumn = true;
249d86ed7fbStbbdev }
250d86ed7fbStbbdev // check to see if cell is on right column
251d86ed7fbStbbdev if ((cellNumber + 1) % x == 0) {
252d86ed7fbStbbdev onRightColumn = true;
253d86ed7fbStbbdev }
254d86ed7fbStbbdev
255d86ed7fbStbbdev switch (cp) {
256d86ed7fbStbbdev case 1:
257d86ed7fbStbbdev if (onTopRow && onLeftColumn) {
258d86ed7fbStbbdev return *(source + ((x * y) - 1));
259d86ed7fbStbbdev }
260d86ed7fbStbbdev if (onTopRow && !onLeftColumn) {
261d86ed7fbStbbdev return *(source + (((x * y) - x) + (cellNumber - 1)));
262d86ed7fbStbbdev }
263d86ed7fbStbbdev if (onLeftColumn && !onTopRow) {
264d86ed7fbStbbdev return *(source + (cellNumber - 1));
265d86ed7fbStbbdev }
266d86ed7fbStbbdev return *((source + cellNumber) - (x + 1));
267d86ed7fbStbbdev
268d86ed7fbStbbdev case 2:
269d86ed7fbStbbdev if (onTopRow) {
270d86ed7fbStbbdev return *(source + (((x * y) - x) + cellNumber));
271d86ed7fbStbbdev }
272d86ed7fbStbbdev return *((source + cellNumber) - x);
273d86ed7fbStbbdev
274d86ed7fbStbbdev case 3:
275d86ed7fbStbbdev if (onTopRow && onRightColumn) {
276d86ed7fbStbbdev return *(source + ((x * y) - x));
277d86ed7fbStbbdev }
278d86ed7fbStbbdev if (onTopRow && !onRightColumn) {
279d86ed7fbStbbdev return *(source + (((x * y) - x) + (cellNumber + 1)));
280d86ed7fbStbbdev }
281d86ed7fbStbbdev if (onRightColumn && !onTopRow) {
282d86ed7fbStbbdev return *(source + ((cellNumber - (x * 2)) + 1));
283d86ed7fbStbbdev }
284d86ed7fbStbbdev return *(source + (cellNumber - (x - 1)));
285d86ed7fbStbbdev
286d86ed7fbStbbdev case 4:
287d86ed7fbStbbdev if (onRightColumn) {
288d86ed7fbStbbdev return *(source + (cellNumber - (x - 1)));
289d86ed7fbStbbdev }
290d86ed7fbStbbdev return *(source + (cellNumber + 1));
291d86ed7fbStbbdev
292d86ed7fbStbbdev case 5:
293d86ed7fbStbbdev if (onBottomRow && onRightColumn) {
294d86ed7fbStbbdev return *source;
295d86ed7fbStbbdev }
296d86ed7fbStbbdev if (onBottomRow && !onRightColumn) {
297d86ed7fbStbbdev return *(source + ((cellNumber - ((x * y) - x)) + 1));
298d86ed7fbStbbdev }
299d86ed7fbStbbdev if (onRightColumn && !onBottomRow) {
300d86ed7fbStbbdev return *(source + (cellNumber + 1));
301d86ed7fbStbbdev }
302d86ed7fbStbbdev return *(source + (((cellNumber + x)) + 1));
303d86ed7fbStbbdev
304d86ed7fbStbbdev case 6:
305d86ed7fbStbbdev if (onBottomRow) {
306d86ed7fbStbbdev return *(source + (cellNumber - ((x * y) - x)));
307d86ed7fbStbbdev }
308d86ed7fbStbbdev return *(source + (cellNumber + x));
309d86ed7fbStbbdev
310d86ed7fbStbbdev case 7:
311d86ed7fbStbbdev if (onBottomRow && onLeftColumn) {
312d86ed7fbStbbdev return *(source + (x - 1));
313d86ed7fbStbbdev }
314d86ed7fbStbbdev if (onBottomRow && !onLeftColumn) {
315d86ed7fbStbbdev return *(source + (cellNumber - ((x * y) - x) - 1));
316d86ed7fbStbbdev }
317d86ed7fbStbbdev if (onLeftColumn && !onBottomRow) {
318d86ed7fbStbbdev return *(source + (cellNumber + ((x * 2) - 1)));
319d86ed7fbStbbdev }
320d86ed7fbStbbdev return *(source + (cellNumber + (x - 1)));
321d86ed7fbStbbdev
322d86ed7fbStbbdev case 8:
323d86ed7fbStbbdev if (onLeftColumn) {
324d86ed7fbStbbdev return *(source + (cellNumber + (x - 1)));
325d86ed7fbStbbdev }
326d86ed7fbStbbdev return *(source + (cellNumber - 1));
327d86ed7fbStbbdev }
328d86ed7fbStbbdev return cellState;
329d86ed7fbStbbdev }
330d86ed7fbStbbdev
CheckCell(Matrix * m_matrix,int cellNumber)331d86ed7fbStbbdev char CheckCell(Matrix* m_matrix, int cellNumber) {
332d86ed7fbStbbdev char total = 0;
333d86ed7fbStbbdev char* source = m_matrix->data;
334d86ed7fbStbbdev //look around to find cell's with status "alive"
335d86ed7fbStbbdev for (int i = 1; i < 9; i++) {
336d86ed7fbStbbdev total += GetAdjacentCellState(source, m_matrix->width, m_matrix->height, cellNumber, i);
337d86ed7fbStbbdev }
338d86ed7fbStbbdev // if the number of adjacent live cells is < 2 or > 3, the result is a dead
339d86ed7fbStbbdev // cell regardless of its current state. (A live cell dies of loneliness if it
340d86ed7fbStbbdev // has less than 2 neighbors, and of overcrowding if it has more than 3; a new
341d86ed7fbStbbdev // cell is born in an empty spot only if it has exactly 3 neighbors.
342d86ed7fbStbbdev if (total < 2 || total > 3) {
343d86ed7fbStbbdev return 0;
344d86ed7fbStbbdev }
345d86ed7fbStbbdev
346d86ed7fbStbbdev // if we get here and the cell position holds a living cell, it stays alive
347d86ed7fbStbbdev if (*(source + cellNumber)) {
348d86ed7fbStbbdev return 1;
349d86ed7fbStbbdev }
350d86ed7fbStbbdev
351d86ed7fbStbbdev // we have an empty position. If there are only 2 neighbors, the position stays
352d86ed7fbStbbdev // empty.
353d86ed7fbStbbdev if (total == 2) {
354d86ed7fbStbbdev return 0;
355d86ed7fbStbbdev }
356d86ed7fbStbbdev
357d86ed7fbStbbdev // we have an empty position and exactly 3 neighbors. A cell is born.
358d86ed7fbStbbdev return 1;
359d86ed7fbStbbdev }
360d86ed7fbStbbdev
UpdateState(Matrix * m_matrix,char * dest,int begin,int end)361d86ed7fbStbbdev void UpdateState(Matrix* m_matrix, char* dest, int begin, int end) {
362d86ed7fbStbbdev for (int i = begin; i <= end; i++) {
363d86ed7fbStbbdev *(dest + i) = CheckCell(m_matrix, i);
364d86ed7fbStbbdev }
365d86ed7fbStbbdev }
366d86ed7fbStbbdev
367d86ed7fbStbbdev #endif
368d86ed7fbStbbdev /* end non-SSE block */
369