1 /*
2 * kmp_atomic.cpp -- ATOMIC implementation routines
3 */
4
5 //===----------------------------------------------------------------------===//
6 //
7 // The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "kmp_atomic.h"
15 #include "kmp.h" // TRUE, asm routines prototypes
16
17 typedef unsigned char uchar;
18 typedef unsigned short ushort;
19
20 /*!
21 @defgroup ATOMIC_OPS Atomic Operations
22 These functions are used for implementing the many different varieties of atomic
23 operations.
24
25 The compiler is at liberty to inline atomic operations that are naturally
26 supported by the target architecture. For instance on IA-32 architecture an
27 atomic like this can be inlined
28 @code
29 static int s = 0;
30 #pragma omp atomic
31 s++;
32 @endcode
33 using the single instruction: `lock; incl s`
34
35 However the runtime does provide entrypoints for these operations to support
36 compilers that choose not to inline them. (For instance,
37 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.)
38
39 The names of the functions are encoded by using the data type name and the
40 operation name, as in these tables.
41
42 Data Type | Data type encoding
43 -----------|---------------
44 int8_t | `fixed1`
45 uint8_t | `fixed1u`
46 int16_t | `fixed2`
47 uint16_t | `fixed2u`
48 int32_t | `fixed4`
49 uint32_t | `fixed4u`
50 int32_t | `fixed8`
51 uint32_t | `fixed8u`
52 float | `float4`
53 double | `float8`
54 float 10 (8087 eighty bit float) | `float10`
55 complex<float> | `cmplx4`
56 complex<double> | `cmplx8`
57 complex<float10> | `cmplx10`
58 <br>
59
60 Operation | Operation encoding
61 ----------|-------------------
62 + | add
63 - | sub
64 \* | mul
65 / | div
66 & | andb
67 << | shl
68 \>\> | shr
69 \| | orb
70 ^ | xor
71 && | andl
72 \|\| | orl
73 maximum | max
74 minimum | min
75 .eqv. | eqv
76 .neqv. | neqv
77
78 <br>
79 For non-commutative operations, `_rev` can also be added for the reversed
80 operation. For the functions that capture the result, the suffix `_cpt` is
81 added.
82
83 Update Functions
84 ================
85 The general form of an atomic function that just performs an update (without a
86 `capture`)
87 @code
88 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE *
89 lhs, TYPE rhs );
90 @endcode
91 @param ident_t a pointer to source location
92 @param gtid the global thread id
93 @param lhs a pointer to the left operand
94 @param rhs the right operand
95
96 `capture` functions
97 ===================
98 The capture functions perform an atomic update and return a result, which is
99 either the value before the capture, or that after. They take an additional
100 argument to determine which result is returned.
101 Their general form is therefore
102 @code
103 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE *
104 lhs, TYPE rhs, int flag );
105 @endcode
106 @param ident_t a pointer to source location
107 @param gtid the global thread id
108 @param lhs a pointer to the left operand
109 @param rhs the right operand
110 @param flag one if the result is to be captured *after* the operation, zero if
111 captured *before*.
112
113 The one set of exceptions to this is the `complex<float>` type where the value
114 is not returned, rather an extra argument pointer is passed.
115
116 They look like
117 @code
118 void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 *
119 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
120 @endcode
121
122 Read and Write Operations
123 =========================
124 The OpenMP<sup>*</sup> standard now supports atomic operations that simply
125 ensure that the value is read or written atomically, with no modification
126 performed. In many cases on IA-32 architecture these operations can be inlined
127 since the architecture guarantees that no tearing occurs on aligned objects
128 accessed with a single memory operation of up to 64 bits in size.
129
130 The general form of the read operations is
131 @code
132 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
133 @endcode
134
135 For the write operations the form is
136 @code
137 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs
138 );
139 @endcode
140
141 Full list of functions
142 ======================
143 This leads to the generation of 376 atomic functions, as follows.
144
145 Functons for integers
146 ---------------------
147 There are versions here for integers of size 1,2,4 and 8 bytes both signed and
148 unsigned (where that matters).
149 @code
150 __kmpc_atomic_fixed1_add
151 __kmpc_atomic_fixed1_add_cpt
152 __kmpc_atomic_fixed1_add_fp
153 __kmpc_atomic_fixed1_andb
154 __kmpc_atomic_fixed1_andb_cpt
155 __kmpc_atomic_fixed1_andl
156 __kmpc_atomic_fixed1_andl_cpt
157 __kmpc_atomic_fixed1_div
158 __kmpc_atomic_fixed1_div_cpt
159 __kmpc_atomic_fixed1_div_cpt_rev
160 __kmpc_atomic_fixed1_div_float8
161 __kmpc_atomic_fixed1_div_fp
162 __kmpc_atomic_fixed1_div_rev
163 __kmpc_atomic_fixed1_eqv
164 __kmpc_atomic_fixed1_eqv_cpt
165 __kmpc_atomic_fixed1_max
166 __kmpc_atomic_fixed1_max_cpt
167 __kmpc_atomic_fixed1_min
168 __kmpc_atomic_fixed1_min_cpt
169 __kmpc_atomic_fixed1_mul
170 __kmpc_atomic_fixed1_mul_cpt
171 __kmpc_atomic_fixed1_mul_float8
172 __kmpc_atomic_fixed1_mul_fp
173 __kmpc_atomic_fixed1_neqv
174 __kmpc_atomic_fixed1_neqv_cpt
175 __kmpc_atomic_fixed1_orb
176 __kmpc_atomic_fixed1_orb_cpt
177 __kmpc_atomic_fixed1_orl
178 __kmpc_atomic_fixed1_orl_cpt
179 __kmpc_atomic_fixed1_rd
180 __kmpc_atomic_fixed1_shl
181 __kmpc_atomic_fixed1_shl_cpt
182 __kmpc_atomic_fixed1_shl_cpt_rev
183 __kmpc_atomic_fixed1_shl_rev
184 __kmpc_atomic_fixed1_shr
185 __kmpc_atomic_fixed1_shr_cpt
186 __kmpc_atomic_fixed1_shr_cpt_rev
187 __kmpc_atomic_fixed1_shr_rev
188 __kmpc_atomic_fixed1_sub
189 __kmpc_atomic_fixed1_sub_cpt
190 __kmpc_atomic_fixed1_sub_cpt_rev
191 __kmpc_atomic_fixed1_sub_fp
192 __kmpc_atomic_fixed1_sub_rev
193 __kmpc_atomic_fixed1_swp
194 __kmpc_atomic_fixed1_wr
195 __kmpc_atomic_fixed1_xor
196 __kmpc_atomic_fixed1_xor_cpt
197 __kmpc_atomic_fixed1u_add_fp
198 __kmpc_atomic_fixed1u_sub_fp
199 __kmpc_atomic_fixed1u_mul_fp
200 __kmpc_atomic_fixed1u_div
201 __kmpc_atomic_fixed1u_div_cpt
202 __kmpc_atomic_fixed1u_div_cpt_rev
203 __kmpc_atomic_fixed1u_div_fp
204 __kmpc_atomic_fixed1u_div_rev
205 __kmpc_atomic_fixed1u_shr
206 __kmpc_atomic_fixed1u_shr_cpt
207 __kmpc_atomic_fixed1u_shr_cpt_rev
208 __kmpc_atomic_fixed1u_shr_rev
209 __kmpc_atomic_fixed2_add
210 __kmpc_atomic_fixed2_add_cpt
211 __kmpc_atomic_fixed2_add_fp
212 __kmpc_atomic_fixed2_andb
213 __kmpc_atomic_fixed2_andb_cpt
214 __kmpc_atomic_fixed2_andl
215 __kmpc_atomic_fixed2_andl_cpt
216 __kmpc_atomic_fixed2_div
217 __kmpc_atomic_fixed2_div_cpt
218 __kmpc_atomic_fixed2_div_cpt_rev
219 __kmpc_atomic_fixed2_div_float8
220 __kmpc_atomic_fixed2_div_fp
221 __kmpc_atomic_fixed2_div_rev
222 __kmpc_atomic_fixed2_eqv
223 __kmpc_atomic_fixed2_eqv_cpt
224 __kmpc_atomic_fixed2_max
225 __kmpc_atomic_fixed2_max_cpt
226 __kmpc_atomic_fixed2_min
227 __kmpc_atomic_fixed2_min_cpt
228 __kmpc_atomic_fixed2_mul
229 __kmpc_atomic_fixed2_mul_cpt
230 __kmpc_atomic_fixed2_mul_float8
231 __kmpc_atomic_fixed2_mul_fp
232 __kmpc_atomic_fixed2_neqv
233 __kmpc_atomic_fixed2_neqv_cpt
234 __kmpc_atomic_fixed2_orb
235 __kmpc_atomic_fixed2_orb_cpt
236 __kmpc_atomic_fixed2_orl
237 __kmpc_atomic_fixed2_orl_cpt
238 __kmpc_atomic_fixed2_rd
239 __kmpc_atomic_fixed2_shl
240 __kmpc_atomic_fixed2_shl_cpt
241 __kmpc_atomic_fixed2_shl_cpt_rev
242 __kmpc_atomic_fixed2_shl_rev
243 __kmpc_atomic_fixed2_shr
244 __kmpc_atomic_fixed2_shr_cpt
245 __kmpc_atomic_fixed2_shr_cpt_rev
246 __kmpc_atomic_fixed2_shr_rev
247 __kmpc_atomic_fixed2_sub
248 __kmpc_atomic_fixed2_sub_cpt
249 __kmpc_atomic_fixed2_sub_cpt_rev
250 __kmpc_atomic_fixed2_sub_fp
251 __kmpc_atomic_fixed2_sub_rev
252 __kmpc_atomic_fixed2_swp
253 __kmpc_atomic_fixed2_wr
254 __kmpc_atomic_fixed2_xor
255 __kmpc_atomic_fixed2_xor_cpt
256 __kmpc_atomic_fixed2u_add_fp
257 __kmpc_atomic_fixed2u_sub_fp
258 __kmpc_atomic_fixed2u_mul_fp
259 __kmpc_atomic_fixed2u_div
260 __kmpc_atomic_fixed2u_div_cpt
261 __kmpc_atomic_fixed2u_div_cpt_rev
262 __kmpc_atomic_fixed2u_div_fp
263 __kmpc_atomic_fixed2u_div_rev
264 __kmpc_atomic_fixed2u_shr
265 __kmpc_atomic_fixed2u_shr_cpt
266 __kmpc_atomic_fixed2u_shr_cpt_rev
267 __kmpc_atomic_fixed2u_shr_rev
268 __kmpc_atomic_fixed4_add
269 __kmpc_atomic_fixed4_add_cpt
270 __kmpc_atomic_fixed4_add_fp
271 __kmpc_atomic_fixed4_andb
272 __kmpc_atomic_fixed4_andb_cpt
273 __kmpc_atomic_fixed4_andl
274 __kmpc_atomic_fixed4_andl_cpt
275 __kmpc_atomic_fixed4_div
276 __kmpc_atomic_fixed4_div_cpt
277 __kmpc_atomic_fixed4_div_cpt_rev
278 __kmpc_atomic_fixed4_div_float8
279 __kmpc_atomic_fixed4_div_fp
280 __kmpc_atomic_fixed4_div_rev
281 __kmpc_atomic_fixed4_eqv
282 __kmpc_atomic_fixed4_eqv_cpt
283 __kmpc_atomic_fixed4_max
284 __kmpc_atomic_fixed4_max_cpt
285 __kmpc_atomic_fixed4_min
286 __kmpc_atomic_fixed4_min_cpt
287 __kmpc_atomic_fixed4_mul
288 __kmpc_atomic_fixed4_mul_cpt
289 __kmpc_atomic_fixed4_mul_float8
290 __kmpc_atomic_fixed4_mul_fp
291 __kmpc_atomic_fixed4_neqv
292 __kmpc_atomic_fixed4_neqv_cpt
293 __kmpc_atomic_fixed4_orb
294 __kmpc_atomic_fixed4_orb_cpt
295 __kmpc_atomic_fixed4_orl
296 __kmpc_atomic_fixed4_orl_cpt
297 __kmpc_atomic_fixed4_rd
298 __kmpc_atomic_fixed4_shl
299 __kmpc_atomic_fixed4_shl_cpt
300 __kmpc_atomic_fixed4_shl_cpt_rev
301 __kmpc_atomic_fixed4_shl_rev
302 __kmpc_atomic_fixed4_shr
303 __kmpc_atomic_fixed4_shr_cpt
304 __kmpc_atomic_fixed4_shr_cpt_rev
305 __kmpc_atomic_fixed4_shr_rev
306 __kmpc_atomic_fixed4_sub
307 __kmpc_atomic_fixed4_sub_cpt
308 __kmpc_atomic_fixed4_sub_cpt_rev
309 __kmpc_atomic_fixed4_sub_fp
310 __kmpc_atomic_fixed4_sub_rev
311 __kmpc_atomic_fixed4_swp
312 __kmpc_atomic_fixed4_wr
313 __kmpc_atomic_fixed4_xor
314 __kmpc_atomic_fixed4_xor_cpt
315 __kmpc_atomic_fixed4u_add_fp
316 __kmpc_atomic_fixed4u_sub_fp
317 __kmpc_atomic_fixed4u_mul_fp
318 __kmpc_atomic_fixed4u_div
319 __kmpc_atomic_fixed4u_div_cpt
320 __kmpc_atomic_fixed4u_div_cpt_rev
321 __kmpc_atomic_fixed4u_div_fp
322 __kmpc_atomic_fixed4u_div_rev
323 __kmpc_atomic_fixed4u_shr
324 __kmpc_atomic_fixed4u_shr_cpt
325 __kmpc_atomic_fixed4u_shr_cpt_rev
326 __kmpc_atomic_fixed4u_shr_rev
327 __kmpc_atomic_fixed8_add
328 __kmpc_atomic_fixed8_add_cpt
329 __kmpc_atomic_fixed8_add_fp
330 __kmpc_atomic_fixed8_andb
331 __kmpc_atomic_fixed8_andb_cpt
332 __kmpc_atomic_fixed8_andl
333 __kmpc_atomic_fixed8_andl_cpt
334 __kmpc_atomic_fixed8_div
335 __kmpc_atomic_fixed8_div_cpt
336 __kmpc_atomic_fixed8_div_cpt_rev
337 __kmpc_atomic_fixed8_div_float8
338 __kmpc_atomic_fixed8_div_fp
339 __kmpc_atomic_fixed8_div_rev
340 __kmpc_atomic_fixed8_eqv
341 __kmpc_atomic_fixed8_eqv_cpt
342 __kmpc_atomic_fixed8_max
343 __kmpc_atomic_fixed8_max_cpt
344 __kmpc_atomic_fixed8_min
345 __kmpc_atomic_fixed8_min_cpt
346 __kmpc_atomic_fixed8_mul
347 __kmpc_atomic_fixed8_mul_cpt
348 __kmpc_atomic_fixed8_mul_float8
349 __kmpc_atomic_fixed8_mul_fp
350 __kmpc_atomic_fixed8_neqv
351 __kmpc_atomic_fixed8_neqv_cpt
352 __kmpc_atomic_fixed8_orb
353 __kmpc_atomic_fixed8_orb_cpt
354 __kmpc_atomic_fixed8_orl
355 __kmpc_atomic_fixed8_orl_cpt
356 __kmpc_atomic_fixed8_rd
357 __kmpc_atomic_fixed8_shl
358 __kmpc_atomic_fixed8_shl_cpt
359 __kmpc_atomic_fixed8_shl_cpt_rev
360 __kmpc_atomic_fixed8_shl_rev
361 __kmpc_atomic_fixed8_shr
362 __kmpc_atomic_fixed8_shr_cpt
363 __kmpc_atomic_fixed8_shr_cpt_rev
364 __kmpc_atomic_fixed8_shr_rev
365 __kmpc_atomic_fixed8_sub
366 __kmpc_atomic_fixed8_sub_cpt
367 __kmpc_atomic_fixed8_sub_cpt_rev
368 __kmpc_atomic_fixed8_sub_fp
369 __kmpc_atomic_fixed8_sub_rev
370 __kmpc_atomic_fixed8_swp
371 __kmpc_atomic_fixed8_wr
372 __kmpc_atomic_fixed8_xor
373 __kmpc_atomic_fixed8_xor_cpt
374 __kmpc_atomic_fixed8u_add_fp
375 __kmpc_atomic_fixed8u_sub_fp
376 __kmpc_atomic_fixed8u_mul_fp
377 __kmpc_atomic_fixed8u_div
378 __kmpc_atomic_fixed8u_div_cpt
379 __kmpc_atomic_fixed8u_div_cpt_rev
380 __kmpc_atomic_fixed8u_div_fp
381 __kmpc_atomic_fixed8u_div_rev
382 __kmpc_atomic_fixed8u_shr
383 __kmpc_atomic_fixed8u_shr_cpt
384 __kmpc_atomic_fixed8u_shr_cpt_rev
385 __kmpc_atomic_fixed8u_shr_rev
386 @endcode
387
388 Functions for floating point
389 ----------------------------
390 There are versions here for floating point numbers of size 4, 8, 10 and 16
391 bytes. (Ten byte floats are used by X87, but are now rare).
392 @code
393 __kmpc_atomic_float4_add
394 __kmpc_atomic_float4_add_cpt
395 __kmpc_atomic_float4_add_float8
396 __kmpc_atomic_float4_add_fp
397 __kmpc_atomic_float4_div
398 __kmpc_atomic_float4_div_cpt
399 __kmpc_atomic_float4_div_cpt_rev
400 __kmpc_atomic_float4_div_float8
401 __kmpc_atomic_float4_div_fp
402 __kmpc_atomic_float4_div_rev
403 __kmpc_atomic_float4_max
404 __kmpc_atomic_float4_max_cpt
405 __kmpc_atomic_float4_min
406 __kmpc_atomic_float4_min_cpt
407 __kmpc_atomic_float4_mul
408 __kmpc_atomic_float4_mul_cpt
409 __kmpc_atomic_float4_mul_float8
410 __kmpc_atomic_float4_mul_fp
411 __kmpc_atomic_float4_rd
412 __kmpc_atomic_float4_sub
413 __kmpc_atomic_float4_sub_cpt
414 __kmpc_atomic_float4_sub_cpt_rev
415 __kmpc_atomic_float4_sub_float8
416 __kmpc_atomic_float4_sub_fp
417 __kmpc_atomic_float4_sub_rev
418 __kmpc_atomic_float4_swp
419 __kmpc_atomic_float4_wr
420 __kmpc_atomic_float8_add
421 __kmpc_atomic_float8_add_cpt
422 __kmpc_atomic_float8_add_fp
423 __kmpc_atomic_float8_div
424 __kmpc_atomic_float8_div_cpt
425 __kmpc_atomic_float8_div_cpt_rev
426 __kmpc_atomic_float8_div_fp
427 __kmpc_atomic_float8_div_rev
428 __kmpc_atomic_float8_max
429 __kmpc_atomic_float8_max_cpt
430 __kmpc_atomic_float8_min
431 __kmpc_atomic_float8_min_cpt
432 __kmpc_atomic_float8_mul
433 __kmpc_atomic_float8_mul_cpt
434 __kmpc_atomic_float8_mul_fp
435 __kmpc_atomic_float8_rd
436 __kmpc_atomic_float8_sub
437 __kmpc_atomic_float8_sub_cpt
438 __kmpc_atomic_float8_sub_cpt_rev
439 __kmpc_atomic_float8_sub_fp
440 __kmpc_atomic_float8_sub_rev
441 __kmpc_atomic_float8_swp
442 __kmpc_atomic_float8_wr
443 __kmpc_atomic_float10_add
444 __kmpc_atomic_float10_add_cpt
445 __kmpc_atomic_float10_add_fp
446 __kmpc_atomic_float10_div
447 __kmpc_atomic_float10_div_cpt
448 __kmpc_atomic_float10_div_cpt_rev
449 __kmpc_atomic_float10_div_fp
450 __kmpc_atomic_float10_div_rev
451 __kmpc_atomic_float10_mul
452 __kmpc_atomic_float10_mul_cpt
453 __kmpc_atomic_float10_mul_fp
454 __kmpc_atomic_float10_rd
455 __kmpc_atomic_float10_sub
456 __kmpc_atomic_float10_sub_cpt
457 __kmpc_atomic_float10_sub_cpt_rev
458 __kmpc_atomic_float10_sub_fp
459 __kmpc_atomic_float10_sub_rev
460 __kmpc_atomic_float10_swp
461 __kmpc_atomic_float10_wr
462 __kmpc_atomic_float16_add
463 __kmpc_atomic_float16_add_cpt
464 __kmpc_atomic_float16_div
465 __kmpc_atomic_float16_div_cpt
466 __kmpc_atomic_float16_div_cpt_rev
467 __kmpc_atomic_float16_div_rev
468 __kmpc_atomic_float16_max
469 __kmpc_atomic_float16_max_cpt
470 __kmpc_atomic_float16_min
471 __kmpc_atomic_float16_min_cpt
472 __kmpc_atomic_float16_mul
473 __kmpc_atomic_float16_mul_cpt
474 __kmpc_atomic_float16_rd
475 __kmpc_atomic_float16_sub
476 __kmpc_atomic_float16_sub_cpt
477 __kmpc_atomic_float16_sub_cpt_rev
478 __kmpc_atomic_float16_sub_rev
479 __kmpc_atomic_float16_swp
480 __kmpc_atomic_float16_wr
481 @endcode
482
483 Functions for Complex types
484 ---------------------------
485 Functions for complex types whose component floating point variables are of size
486 4,8,10 or 16 bytes. The names here are based on the size of the component float,
487 *not* the size of the complex type. So `__kmpc_atomc_cmplx8_add` is an operation
488 on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
489
490 @code
491 __kmpc_atomic_cmplx4_add
492 __kmpc_atomic_cmplx4_add_cmplx8
493 __kmpc_atomic_cmplx4_add_cpt
494 __kmpc_atomic_cmplx4_div
495 __kmpc_atomic_cmplx4_div_cmplx8
496 __kmpc_atomic_cmplx4_div_cpt
497 __kmpc_atomic_cmplx4_div_cpt_rev
498 __kmpc_atomic_cmplx4_div_rev
499 __kmpc_atomic_cmplx4_mul
500 __kmpc_atomic_cmplx4_mul_cmplx8
501 __kmpc_atomic_cmplx4_mul_cpt
502 __kmpc_atomic_cmplx4_rd
503 __kmpc_atomic_cmplx4_sub
504 __kmpc_atomic_cmplx4_sub_cmplx8
505 __kmpc_atomic_cmplx4_sub_cpt
506 __kmpc_atomic_cmplx4_sub_cpt_rev
507 __kmpc_atomic_cmplx4_sub_rev
508 __kmpc_atomic_cmplx4_swp
509 __kmpc_atomic_cmplx4_wr
510 __kmpc_atomic_cmplx8_add
511 __kmpc_atomic_cmplx8_add_cpt
512 __kmpc_atomic_cmplx8_div
513 __kmpc_atomic_cmplx8_div_cpt
514 __kmpc_atomic_cmplx8_div_cpt_rev
515 __kmpc_atomic_cmplx8_div_rev
516 __kmpc_atomic_cmplx8_mul
517 __kmpc_atomic_cmplx8_mul_cpt
518 __kmpc_atomic_cmplx8_rd
519 __kmpc_atomic_cmplx8_sub
520 __kmpc_atomic_cmplx8_sub_cpt
521 __kmpc_atomic_cmplx8_sub_cpt_rev
522 __kmpc_atomic_cmplx8_sub_rev
523 __kmpc_atomic_cmplx8_swp
524 __kmpc_atomic_cmplx8_wr
525 __kmpc_atomic_cmplx10_add
526 __kmpc_atomic_cmplx10_add_cpt
527 __kmpc_atomic_cmplx10_div
528 __kmpc_atomic_cmplx10_div_cpt
529 __kmpc_atomic_cmplx10_div_cpt_rev
530 __kmpc_atomic_cmplx10_div_rev
531 __kmpc_atomic_cmplx10_mul
532 __kmpc_atomic_cmplx10_mul_cpt
533 __kmpc_atomic_cmplx10_rd
534 __kmpc_atomic_cmplx10_sub
535 __kmpc_atomic_cmplx10_sub_cpt
536 __kmpc_atomic_cmplx10_sub_cpt_rev
537 __kmpc_atomic_cmplx10_sub_rev
538 __kmpc_atomic_cmplx10_swp
539 __kmpc_atomic_cmplx10_wr
540 __kmpc_atomic_cmplx16_add
541 __kmpc_atomic_cmplx16_add_cpt
542 __kmpc_atomic_cmplx16_div
543 __kmpc_atomic_cmplx16_div_cpt
544 __kmpc_atomic_cmplx16_div_cpt_rev
545 __kmpc_atomic_cmplx16_div_rev
546 __kmpc_atomic_cmplx16_mul
547 __kmpc_atomic_cmplx16_mul_cpt
548 __kmpc_atomic_cmplx16_rd
549 __kmpc_atomic_cmplx16_sub
550 __kmpc_atomic_cmplx16_sub_cpt
551 __kmpc_atomic_cmplx16_sub_cpt_rev
552 __kmpc_atomic_cmplx16_swp
553 __kmpc_atomic_cmplx16_wr
554 @endcode
555 */
556
557 /*!
558 @ingroup ATOMIC_OPS
559 @{
560 */
561
562 /*
563 * Global vars
564 */
565
566 #ifndef KMP_GOMP_COMPAT
567 int __kmp_atomic_mode = 1; // Intel perf
568 #else
569 int __kmp_atomic_mode = 2; // GOMP compatibility
570 #endif /* KMP_GOMP_COMPAT */
571
572 KMP_ALIGN(128)
573
574 // Control access to all user coded atomics in Gnu compat mode
575 kmp_atomic_lock_t __kmp_atomic_lock;
576 // Control access to all user coded atomics for 1-byte fixed data types
577 kmp_atomic_lock_t __kmp_atomic_lock_1i;
578 // Control access to all user coded atomics for 2-byte fixed data types
579 kmp_atomic_lock_t __kmp_atomic_lock_2i;
580 // Control access to all user coded atomics for 4-byte fixed data types
581 kmp_atomic_lock_t __kmp_atomic_lock_4i;
582 // Control access to all user coded atomics for kmp_real32 data type
583 kmp_atomic_lock_t __kmp_atomic_lock_4r;
584 // Control access to all user coded atomics for 8-byte fixed data types
585 kmp_atomic_lock_t __kmp_atomic_lock_8i;
586 // Control access to all user coded atomics for kmp_real64 data type
587 kmp_atomic_lock_t __kmp_atomic_lock_8r;
588 // Control access to all user coded atomics for complex byte data type
589 kmp_atomic_lock_t __kmp_atomic_lock_8c;
590 // Control access to all user coded atomics for long double data type
591 kmp_atomic_lock_t __kmp_atomic_lock_10r;
592 // Control access to all user coded atomics for _Quad data type
593 kmp_atomic_lock_t __kmp_atomic_lock_16r;
594 // Control access to all user coded atomics for double complex data type
595 kmp_atomic_lock_t __kmp_atomic_lock_16c;
596 // Control access to all user coded atomics for long double complex type
597 kmp_atomic_lock_t __kmp_atomic_lock_20c;
598 // Control access to all user coded atomics for _Quad complex data type
599 kmp_atomic_lock_t __kmp_atomic_lock_32c;
600
601 /* 2007-03-02:
602 Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
603 on *_32 and *_32e. This is just a temporary workaround for the problem. It
604 seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
605 in assembler language. */
606 #define KMP_ATOMIC_VOLATILE volatile
607
608 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
609
operator +=(Quad_a4_t & lhs,Quad_a4_t & rhs)610 static inline void operator+=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
611 lhs.q += rhs.q;
612 }
operator -=(Quad_a4_t & lhs,Quad_a4_t & rhs)613 static inline void operator-=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
614 lhs.q -= rhs.q;
615 }
operator *=(Quad_a4_t & lhs,Quad_a4_t & rhs)616 static inline void operator*=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
617 lhs.q *= rhs.q;
618 }
operator /=(Quad_a4_t & lhs,Quad_a4_t & rhs)619 static inline void operator/=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
620 lhs.q /= rhs.q;
621 }
operator <(Quad_a4_t & lhs,Quad_a4_t & rhs)622 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
623 return lhs.q < rhs.q;
624 }
operator >(Quad_a4_t & lhs,Quad_a4_t & rhs)625 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
626 return lhs.q > rhs.q;
627 }
628
operator +=(Quad_a16_t & lhs,Quad_a16_t & rhs)629 static inline void operator+=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
630 lhs.q += rhs.q;
631 }
operator -=(Quad_a16_t & lhs,Quad_a16_t & rhs)632 static inline void operator-=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
633 lhs.q -= rhs.q;
634 }
operator *=(Quad_a16_t & lhs,Quad_a16_t & rhs)635 static inline void operator*=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
636 lhs.q *= rhs.q;
637 }
operator /=(Quad_a16_t & lhs,Quad_a16_t & rhs)638 static inline void operator/=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
639 lhs.q /= rhs.q;
640 }
operator <(Quad_a16_t & lhs,Quad_a16_t & rhs)641 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
642 return lhs.q < rhs.q;
643 }
operator >(Quad_a16_t & lhs,Quad_a16_t & rhs)644 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
645 return lhs.q > rhs.q;
646 }
647
operator +=(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)648 static inline void operator+=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
649 lhs.q += rhs.q;
650 }
operator -=(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)651 static inline void operator-=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
652 lhs.q -= rhs.q;
653 }
operator *=(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)654 static inline void operator*=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
655 lhs.q *= rhs.q;
656 }
operator /=(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)657 static inline void operator/=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
658 lhs.q /= rhs.q;
659 }
660
operator +=(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)661 static inline void operator+=(kmp_cmplx128_a16_t &lhs,
662 kmp_cmplx128_a16_t &rhs) {
663 lhs.q += rhs.q;
664 }
operator -=(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)665 static inline void operator-=(kmp_cmplx128_a16_t &lhs,
666 kmp_cmplx128_a16_t &rhs) {
667 lhs.q -= rhs.q;
668 }
operator *=(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)669 static inline void operator*=(kmp_cmplx128_a16_t &lhs,
670 kmp_cmplx128_a16_t &rhs) {
671 lhs.q *= rhs.q;
672 }
operator /=(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)673 static inline void operator/=(kmp_cmplx128_a16_t &lhs,
674 kmp_cmplx128_a16_t &rhs) {
675 lhs.q /= rhs.q;
676 }
677
678 #endif
679
680 // ATOMIC implementation routines -----------------------------------------
681 // One routine for each operation and operand type.
682 // All routines declarations looks like
683 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
684
685 #define KMP_CHECK_GTID \
686 if (gtid == KMP_GTID_UNKNOWN) { \
687 gtid = __kmp_entry_gtid(); \
688 } // check and get gtid when needed
689
690 // Beginning of a definition (provides name, parameters, gebug trace)
691 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
692 // fixed)
693 // OP_ID - operation identifier (add, sub, mul, ...)
694 // TYPE - operands' type
695 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
696 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
697 TYPE *lhs, TYPE rhs) { \
698 KMP_DEBUG_ASSERT(__kmp_init_serial); \
699 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
700
701 // ------------------------------------------------------------------------
702 // Lock variables used for critical sections for various size operands
703 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
704 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
705 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
706 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
707 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
708 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
709 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
710 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
711 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
712 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
713 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
714 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
715 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
716
717 // ------------------------------------------------------------------------
718 // Operation on *lhs, rhs bound by critical section
719 // OP - operator (it's supposed to contain an assignment)
720 // LCK_ID - lock identifier
721 // Note: don't check gtid as it should always be valid
722 // 1, 2-byte - expect valid parameter, other - check before this macro
723 #define OP_CRITICAL(OP, LCK_ID) \
724 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
725 \
726 (*lhs) OP(rhs); \
727 \
728 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
729
730 // ------------------------------------------------------------------------
731 // For GNU compatibility, we may need to use a critical section,
732 // even though it is not required by the ISA.
733 //
734 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
735 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
736 // critical section. On Intel(R) 64, all atomic operations are done with fetch
737 // and add or compare and exchange. Therefore, the FLAG parameter to this
738 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
739 // require a critical section, where we predict that they will be implemented
740 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
741 //
742 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
743 // the FLAG parameter should always be 1. If we know that we will be using
744 // a critical section, then we want to make certain that we use the generic
745 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
746 // locks that are specialized based upon the size or type of the data.
747 //
748 // If FLAG is 0, then we are relying on dead code elimination by the build
749 // compiler to get rid of the useless block of code, and save a needless
750 // branch at runtime.
751
752 #ifdef KMP_GOMP_COMPAT
753 #define OP_GOMP_CRITICAL(OP, FLAG) \
754 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
755 KMP_CHECK_GTID; \
756 OP_CRITICAL(OP, 0); \
757 return; \
758 }
759 #else
760 #define OP_GOMP_CRITICAL(OP, FLAG)
761 #endif /* KMP_GOMP_COMPAT */
762
763 #if KMP_MIC
764 #define KMP_DO_PAUSE _mm_delay_32(1)
765 #else
766 #define KMP_DO_PAUSE KMP_CPU_PAUSE()
767 #endif /* KMP_MIC */
768
769 // ------------------------------------------------------------------------
770 // Operation on *lhs, rhs using "compare_and_store" routine
771 // TYPE - operands' type
772 // BITS - size in bits, used to distinguish low level calls
773 // OP - operator
774 #define OP_CMPXCHG(TYPE, BITS, OP) \
775 { \
776 TYPE old_value, new_value; \
777 old_value = *(TYPE volatile *)lhs; \
778 new_value = old_value OP rhs; \
779 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
780 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
781 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
782 KMP_DO_PAUSE; \
783 \
784 old_value = *(TYPE volatile *)lhs; \
785 new_value = old_value OP rhs; \
786 } \
787 }
788
789 #if USE_CMPXCHG_FIX
790 // 2007-06-25:
791 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
792 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
793 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
794 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
795 // the workaround.
796 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
797 { \
798 struct _sss { \
799 TYPE cmp; \
800 kmp_int##BITS *vvv; \
801 }; \
802 struct _sss old_value, new_value; \
803 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
804 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
805 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
806 new_value.cmp = old_value.cmp OP rhs; \
807 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
808 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
809 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
810 KMP_DO_PAUSE; \
811 \
812 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
813 new_value.cmp = old_value.cmp OP rhs; \
814 } \
815 }
816 // end of the first part of the workaround for C78287
817 #endif // USE_CMPXCHG_FIX
818
819 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
820
821 // ------------------------------------------------------------------------
822 // X86 or X86_64: no alignment problems ====================================
823 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
824 GOMP_FLAG) \
825 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
826 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
827 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
828 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
829 }
830 // -------------------------------------------------------------------------
831 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
832 GOMP_FLAG) \
833 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
834 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
835 OP_CMPXCHG(TYPE, BITS, OP) \
836 }
837 #if USE_CMPXCHG_FIX
838 // -------------------------------------------------------------------------
839 // workaround for C78287 (complex(kind=4) data type)
840 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
841 MASK, GOMP_FLAG) \
842 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
843 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
844 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
845 }
846 // end of the second part of the workaround for C78287
847 #endif
848
849 #else
850 // -------------------------------------------------------------------------
851 // Code for other architectures that don't handle unaligned accesses.
852 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
853 GOMP_FLAG) \
854 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
855 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
856 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
857 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
858 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
859 } else { \
860 KMP_CHECK_GTID; \
861 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
862 } \
863 }
864 // -------------------------------------------------------------------------
865 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
866 GOMP_FLAG) \
867 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
868 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
869 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
870 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
871 } else { \
872 KMP_CHECK_GTID; \
873 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
874 } \
875 }
876 #if USE_CMPXCHG_FIX
877 // -------------------------------------------------------------------------
878 // workaround for C78287 (complex(kind=4) data type)
879 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
880 MASK, GOMP_FLAG) \
881 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
882 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
883 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
884 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
885 } else { \
886 KMP_CHECK_GTID; \
887 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
888 } \
889 }
890 // end of the second part of the workaround for C78287
891 #endif // USE_CMPXCHG_FIX
892 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
893
894 // Routines for ATOMIC 4-byte operands addition and subtraction
895 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
896 0) // __kmpc_atomic_fixed4_add
897 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
898 0) // __kmpc_atomic_fixed4_sub
899
900 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
901 KMP_ARCH_X86) // __kmpc_atomic_float4_add
902 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
903 KMP_ARCH_X86) // __kmpc_atomic_float4_sub
904
905 // Routines for ATOMIC 8-byte operands addition and subtraction
906 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
907 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
908 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
909 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
910
911 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
912 KMP_ARCH_X86) // __kmpc_atomic_float8_add
913 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
914 KMP_ARCH_X86) // __kmpc_atomic_float8_sub
915
916 // ------------------------------------------------------------------------
917 // Entries definition for integer operands
918 // TYPE_ID - operands type and size (fixed4, float4)
919 // OP_ID - operation identifier (add, sub, mul, ...)
920 // TYPE - operand type
921 // BITS - size in bits, used to distinguish low level calls
922 // OP - operator (used in critical section)
923 // LCK_ID - lock identifier, used to possibly distinguish lock variable
924 // MASK - used for alignment check
925
926 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
927 // ------------------------------------------------------------------------
928 // Routines for ATOMIC integer operands, other operators
929 // ------------------------------------------------------------------------
930 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
931 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
932 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
933 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
934 0) // __kmpc_atomic_fixed1_andb
935 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
936 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
937 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
938 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
939 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
940 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
941 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
942 0) // __kmpc_atomic_fixed1_orb
943 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
944 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
945 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
946 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
947 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
948 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
949 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
950 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
951 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
952 0) // __kmpc_atomic_fixed1_xor
953 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
954 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
955 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
956 0) // __kmpc_atomic_fixed2_andb
957 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
958 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
959 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
960 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
961 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
962 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
963 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
964 0) // __kmpc_atomic_fixed2_orb
965 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
966 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
967 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
968 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
969 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
970 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
971 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
972 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
973 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
974 0) // __kmpc_atomic_fixed2_xor
975 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
976 0) // __kmpc_atomic_fixed4_andb
977 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
978 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
979 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
980 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
981 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
982 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
983 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
984 0) // __kmpc_atomic_fixed4_orb
985 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
986 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
987 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
988 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
989 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
990 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
991 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
992 0) // __kmpc_atomic_fixed4_xor
993 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
994 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
995 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
996 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
997 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
998 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
999 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1000 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1001 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1002 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1003 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1004 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1005 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1006 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1007 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1008 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1009 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1010 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1011 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1012 KMP_ARCH_X86) // __kmpc_atomic_float4_div
1013 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1014 KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1015 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1016 KMP_ARCH_X86) // __kmpc_atomic_float8_div
1017 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1018 KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1019 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
1020
1021 /* ------------------------------------------------------------------------ */
1022 /* Routines for C/C++ Reduction operators && and || */
1023
1024 // ------------------------------------------------------------------------
1025 // Need separate macros for &&, || because there is no combined assignment
1026 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1027 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1028 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1029 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1030 OP_CRITICAL(= *lhs OP, LCK_ID) \
1031 }
1032
1033 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1034
1035 // ------------------------------------------------------------------------
1036 // X86 or X86_64: no alignment problems ===================================
1037 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1038 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1039 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1040 OP_CMPXCHG(TYPE, BITS, OP) \
1041 }
1042
1043 #else
1044 // ------------------------------------------------------------------------
1045 // Code for other architectures that don't handle unaligned accesses.
1046 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1047 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1048 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1049 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1050 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1051 } else { \
1052 KMP_CHECK_GTID; \
1053 OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \
1054 } \
1055 }
1056 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1057
1058 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1059 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1060 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1061 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1062 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1063 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1064 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1065 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1066 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1067 0) // __kmpc_atomic_fixed4_andl
1068 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1069 0) // __kmpc_atomic_fixed4_orl
1070 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1071 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1072 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1073 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1074
1075 /* ------------------------------------------------------------------------- */
1076 /* Routines for Fortran operators that matched no one in C: */
1077 /* MAX, MIN, .EQV., .NEQV. */
1078 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
1079 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
1080
1081 // -------------------------------------------------------------------------
1082 // MIN and MAX need separate macros
1083 // OP - operator to check if we need any actions?
1084 #define MIN_MAX_CRITSECT(OP, LCK_ID) \
1085 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1086 \
1087 if (*lhs OP rhs) { /* still need actions? */ \
1088 *lhs = rhs; \
1089 } \
1090 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1091
1092 // -------------------------------------------------------------------------
1093 #ifdef KMP_GOMP_COMPAT
1094 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \
1095 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1096 KMP_CHECK_GTID; \
1097 MIN_MAX_CRITSECT(OP, 0); \
1098 return; \
1099 }
1100 #else
1101 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1102 #endif /* KMP_GOMP_COMPAT */
1103
1104 // -------------------------------------------------------------------------
1105 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1106 { \
1107 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1108 TYPE old_value; \
1109 temp_val = *lhs; \
1110 old_value = temp_val; \
1111 while (old_value OP rhs && /* still need actions? */ \
1112 !KMP_COMPARE_AND_STORE_ACQ##BITS( \
1113 (kmp_int##BITS *)lhs, \
1114 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1115 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
1116 KMP_CPU_PAUSE(); \
1117 temp_val = *lhs; \
1118 old_value = temp_val; \
1119 } \
1120 }
1121
1122 // -------------------------------------------------------------------------
1123 // 1-byte, 2-byte operands - use critical section
1124 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1125 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1126 if (*lhs OP rhs) { /* need actions? */ \
1127 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1128 MIN_MAX_CRITSECT(OP, LCK_ID) \
1129 } \
1130 }
1131
1132 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1133
1134 // -------------------------------------------------------------------------
1135 // X86 or X86_64: no alignment problems ====================================
1136 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1137 GOMP_FLAG) \
1138 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1139 if (*lhs OP rhs) { \
1140 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1141 MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1142 } \
1143 }
1144
1145 #else
1146 // -------------------------------------------------------------------------
1147 // Code for other architectures that don't handle unaligned accesses.
1148 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1149 GOMP_FLAG) \
1150 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1151 if (*lhs OP rhs) { \
1152 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1153 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1154 MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1155 } else { \
1156 KMP_CHECK_GTID; \
1157 MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \
1158 } \
1159 } \
1160 }
1161 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1162
1163 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1164 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1165 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1166 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1167 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1168 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1169 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1170 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1171 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1172 0) // __kmpc_atomic_fixed4_max
1173 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1174 0) // __kmpc_atomic_fixed4_min
1175 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1176 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1177 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1178 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1179 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1180 KMP_ARCH_X86) // __kmpc_atomic_float4_max
1181 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1182 KMP_ARCH_X86) // __kmpc_atomic_float4_min
1183 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1184 KMP_ARCH_X86) // __kmpc_atomic_float8_max
1185 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1186 KMP_ARCH_X86) // __kmpc_atomic_float8_min
1187 #if KMP_HAVE_QUAD
1188 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1189 1) // __kmpc_atomic_float16_max
1190 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1191 1) // __kmpc_atomic_float16_min
1192 #if (KMP_ARCH_X86)
1193 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1194 1) // __kmpc_atomic_float16_max_a16
1195 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1196 1) // __kmpc_atomic_float16_min_a16
1197 #endif
1198 #endif
1199 // ------------------------------------------------------------------------
1200 // Need separate macros for .EQV. because of the need of complement (~)
1201 // OP ignored for critical sections, ^=~ used instead
1202 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1203 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1204 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \
1205 OP_CRITICAL(^= ~, LCK_ID) /* send assignment and complement */ \
1206 }
1207
1208 // ------------------------------------------------------------------------
1209 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1210 // ------------------------------------------------------------------------
1211 // X86 or X86_64: no alignment problems ===================================
1212 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1213 GOMP_FLAG) \
1214 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1215 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \
1216 OP_CMPXCHG(TYPE, BITS, OP) \
1217 }
1218 // ------------------------------------------------------------------------
1219 #else
1220 // ------------------------------------------------------------------------
1221 // Code for other architectures that don't handle unaligned accesses.
1222 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1223 GOMP_FLAG) \
1224 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1225 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) \
1226 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1227 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1228 } else { \
1229 KMP_CHECK_GTID; \
1230 OP_CRITICAL(^= ~, LCK_ID) /* unaligned address - use critical */ \
1231 } \
1232 }
1233 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1234
1235 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1236 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1237 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1238 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1239 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1240 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1241 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1242 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1243 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1244 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1245 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1246 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1247 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1248 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1249 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1250 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1251
1252 // ------------------------------------------------------------------------
1253 // Routines for Extended types: long double, _Quad, complex flavours (use
1254 // critical section)
1255 // TYPE_ID, OP_ID, TYPE - detailed above
1256 // OP - operator
1257 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1258 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1259 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1260 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \
1261 OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \
1262 }
1263
1264 /* ------------------------------------------------------------------------- */
1265 // routines for long double type
1266 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1267 1) // __kmpc_atomic_float10_add
1268 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1269 1) // __kmpc_atomic_float10_sub
1270 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1271 1) // __kmpc_atomic_float10_mul
1272 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1273 1) // __kmpc_atomic_float10_div
1274 #if KMP_HAVE_QUAD
1275 // routines for _Quad type
1276 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1277 1) // __kmpc_atomic_float16_add
1278 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1279 1) // __kmpc_atomic_float16_sub
1280 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1281 1) // __kmpc_atomic_float16_mul
1282 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1283 1) // __kmpc_atomic_float16_div
1284 #if (KMP_ARCH_X86)
1285 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1286 1) // __kmpc_atomic_float16_add_a16
1287 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1288 1) // __kmpc_atomic_float16_sub_a16
1289 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1290 1) // __kmpc_atomic_float16_mul_a16
1291 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1292 1) // __kmpc_atomic_float16_div_a16
1293 #endif
1294 #endif
1295 // routines for complex types
1296
1297 #if USE_CMPXCHG_FIX
1298 // workaround for C78287 (complex(kind=4) data type)
1299 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1300 1) // __kmpc_atomic_cmplx4_add
1301 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1302 1) // __kmpc_atomic_cmplx4_sub
1303 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1304 1) // __kmpc_atomic_cmplx4_mul
1305 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1306 1) // __kmpc_atomic_cmplx4_div
1307 // end of the workaround for C78287
1308 #else
1309 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1310 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1311 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1312 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1313 #endif // USE_CMPXCHG_FIX
1314
1315 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1316 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1317 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1318 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1319 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1320 1) // __kmpc_atomic_cmplx10_add
1321 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1322 1) // __kmpc_atomic_cmplx10_sub
1323 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1324 1) // __kmpc_atomic_cmplx10_mul
1325 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1326 1) // __kmpc_atomic_cmplx10_div
1327 #if KMP_HAVE_QUAD
1328 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1329 1) // __kmpc_atomic_cmplx16_add
1330 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1331 1) // __kmpc_atomic_cmplx16_sub
1332 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1333 1) // __kmpc_atomic_cmplx16_mul
1334 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1335 1) // __kmpc_atomic_cmplx16_div
1336 #if (KMP_ARCH_X86)
1337 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1338 1) // __kmpc_atomic_cmplx16_add_a16
1339 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1340 1) // __kmpc_atomic_cmplx16_sub_a16
1341 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1342 1) // __kmpc_atomic_cmplx16_mul_a16
1343 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1344 1) // __kmpc_atomic_cmplx16_div_a16
1345 #endif
1346 #endif
1347
1348 #if OMP_40_ENABLED
1349
1350 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1351 // Supported only on IA-32 architecture and Intel(R) 64
1352 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1353
1354 // ------------------------------------------------------------------------
1355 // Operation on *lhs, rhs bound by critical section
1356 // OP - operator (it's supposed to contain an assignment)
1357 // LCK_ID - lock identifier
1358 // Note: don't check gtid as it should always be valid
1359 // 1, 2-byte - expect valid parameter, other - check before this macro
1360 #define OP_CRITICAL_REV(OP, LCK_ID) \
1361 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1362 \
1363 (*lhs) = (rhs)OP(*lhs); \
1364 \
1365 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1366
1367 #ifdef KMP_GOMP_COMPAT
1368 #define OP_GOMP_CRITICAL_REV(OP, FLAG) \
1369 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1370 KMP_CHECK_GTID; \
1371 OP_CRITICAL_REV(OP, 0); \
1372 return; \
1373 }
1374 #else
1375 #define OP_GOMP_CRITICAL_REV(OP, FLAG)
1376 #endif /* KMP_GOMP_COMPAT */
1377
1378 // Beginning of a definition (provides name, parameters, gebug trace)
1379 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1380 // fixed)
1381 // OP_ID - operation identifier (add, sub, mul, ...)
1382 // TYPE - operands' type
1383 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1384 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \
1385 TYPE *lhs, TYPE rhs) { \
1386 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1387 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1388
1389 // ------------------------------------------------------------------------
1390 // Operation on *lhs, rhs using "compare_and_store" routine
1391 // TYPE - operands' type
1392 // BITS - size in bits, used to distinguish low level calls
1393 // OP - operator
1394 // Note: temp_val introduced in order to force the compiler to read
1395 // *lhs only once (w/o it the compiler reads *lhs twice)
1396 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \
1397 { \
1398 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1399 TYPE old_value, new_value; \
1400 temp_val = *lhs; \
1401 old_value = temp_val; \
1402 new_value = rhs OP old_value; \
1403 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
1404 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1405 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
1406 KMP_DO_PAUSE; \
1407 \
1408 temp_val = *lhs; \
1409 old_value = temp_val; \
1410 new_value = rhs OP old_value; \
1411 } \
1412 }
1413
1414 // -------------------------------------------------------------------------
1415 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \
1416 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1417 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1418 OP_CMPXCHG_REV(TYPE, BITS, OP) \
1419 }
1420
1421 // ------------------------------------------------------------------------
1422 // Entries definition for integer operands
1423 // TYPE_ID - operands type and size (fixed4, float4)
1424 // OP_ID - operation identifier (add, sub, mul, ...)
1425 // TYPE - operand type
1426 // BITS - size in bits, used to distinguish low level calls
1427 // OP - operator (used in critical section)
1428 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1429
1430 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1431 // ------------------------------------------------------------------------
1432 // Routines for ATOMIC integer operands, other operators
1433 // ------------------------------------------------------------------------
1434 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1435 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1436 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1437 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1438 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1439 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1440 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1441 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1442 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1443 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1444 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1445 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1446 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1447
1448 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1449 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1450 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1451 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1452 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1453 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1454 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1455 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1456 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1457 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1458 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1459 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1460
1461 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1462 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1463 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1464 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1465 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1466 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1467 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1468 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1469 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1470 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1471 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1472 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1473
1474 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1475 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1476 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1477 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1478 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1479 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1480 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1481 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1482 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1483 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1484 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1485 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1486
1487 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1488 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1489 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1490 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1491
1492 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1493 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1494 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1495 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1496 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1497
1498 // ------------------------------------------------------------------------
1499 // Routines for Extended types: long double, _Quad, complex flavours (use
1500 // critical section)
1501 // TYPE_ID, OP_ID, TYPE - detailed above
1502 // OP - operator
1503 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1504 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1505 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1506 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1507 OP_CRITICAL_REV(OP, LCK_ID) \
1508 }
1509
1510 /* ------------------------------------------------------------------------- */
1511 // routines for long double type
1512 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1513 1) // __kmpc_atomic_float10_sub_rev
1514 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1515 1) // __kmpc_atomic_float10_div_rev
1516 #if KMP_HAVE_QUAD
1517 // routines for _Quad type
1518 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1519 1) // __kmpc_atomic_float16_sub_rev
1520 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1521 1) // __kmpc_atomic_float16_div_rev
1522 #if (KMP_ARCH_X86)
1523 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1524 1) // __kmpc_atomic_float16_sub_a16_rev
1525 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1526 1) // __kmpc_atomic_float16_div_a16_rev
1527 #endif
1528 #endif
1529
1530 // routines for complex types
1531 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1532 1) // __kmpc_atomic_cmplx4_sub_rev
1533 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1534 1) // __kmpc_atomic_cmplx4_div_rev
1535 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1536 1) // __kmpc_atomic_cmplx8_sub_rev
1537 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1538 1) // __kmpc_atomic_cmplx8_div_rev
1539 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1540 1) // __kmpc_atomic_cmplx10_sub_rev
1541 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1542 1) // __kmpc_atomic_cmplx10_div_rev
1543 #if KMP_HAVE_QUAD
1544 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1545 1) // __kmpc_atomic_cmplx16_sub_rev
1546 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1547 1) // __kmpc_atomic_cmplx16_div_rev
1548 #if (KMP_ARCH_X86)
1549 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1550 1) // __kmpc_atomic_cmplx16_sub_a16_rev
1551 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1552 1) // __kmpc_atomic_cmplx16_div_a16_rev
1553 #endif
1554 #endif
1555
1556 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1557 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1558
1559 #endif // OMP_40_ENABLED
1560
1561 /* ------------------------------------------------------------------------ */
1562 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1563 /* Note: in order to reduce the total number of types combinations */
1564 /* it is supposed that compiler converts RHS to longest floating type,*/
1565 /* that is _Quad, before call to any of these routines */
1566 /* Conversion to _Quad will be done by the compiler during calculation, */
1567 /* conversion back to TYPE - before the assignment, like: */
1568 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1569 /* Performance penalty expected because of SW emulation use */
1570 /* ------------------------------------------------------------------------ */
1571
1572 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1573 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
1574 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \
1575 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1576 KA_TRACE(100, \
1577 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
1578 gtid));
1579
1580 // -------------------------------------------------------------------------
1581 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \
1582 GOMP_FLAG) \
1583 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1584 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \
1585 OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \
1586 }
1587
1588 // -------------------------------------------------------------------------
1589 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1590 // -------------------------------------------------------------------------
1591 // X86 or X86_64: no alignment problems ====================================
1592 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1593 LCK_ID, MASK, GOMP_FLAG) \
1594 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1595 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1596 OP_CMPXCHG(TYPE, BITS, OP) \
1597 }
1598 // -------------------------------------------------------------------------
1599 #else
1600 // ------------------------------------------------------------------------
1601 // Code for other architectures that don't handle unaligned accesses.
1602 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1603 LCK_ID, MASK, GOMP_FLAG) \
1604 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1605 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1606 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1607 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1608 } else { \
1609 KMP_CHECK_GTID; \
1610 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
1611 } \
1612 }
1613 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1614
1615 // -------------------------------------------------------------------------
1616 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1617 // -------------------------------------------------------------------------
1618 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
1619 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
1620 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1621 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1622 OP_CMPXCHG_REV(TYPE, BITS, OP) \
1623 }
1624 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
1625 LCK_ID, GOMP_FLAG) \
1626 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1627 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1628 OP_CRITICAL_REV(OP, LCK_ID) \
1629 }
1630 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1631
1632 // RHS=float8
1633 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1634 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1635 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1636 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1637 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1638 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1639 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1640 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1641 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1642 0) // __kmpc_atomic_fixed4_mul_float8
1643 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1644 0) // __kmpc_atomic_fixed4_div_float8
1645 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1646 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1647 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1648 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1649 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1650 KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1651 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1652 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1653 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1654 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1655 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1656 KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1657
1658 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1659 // use them)
1660 #if KMP_HAVE_QUAD
1661 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1662 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1663 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1664 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1665 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1666 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1667 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1668 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1669 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1670 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1671 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1672 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1673 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1674 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1675 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1676 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1677
1678 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1679 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1680 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1681 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1682 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1683 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1684 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1685 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1686 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1687 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1688 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1689 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1690 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1691 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1692 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1693 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1694
1695 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1696 0) // __kmpc_atomic_fixed4_add_fp
1697 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1698 0) // __kmpc_atomic_fixed4u_add_fp
1699 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1700 0) // __kmpc_atomic_fixed4_sub_fp
1701 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1702 0) // __kmpc_atomic_fixed4u_sub_fp
1703 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1704 0) // __kmpc_atomic_fixed4_mul_fp
1705 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1706 0) // __kmpc_atomic_fixed4u_mul_fp
1707 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1708 0) // __kmpc_atomic_fixed4_div_fp
1709 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1710 0) // __kmpc_atomic_fixed4u_div_fp
1711
1712 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1713 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1714 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1715 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1716 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1717 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1718 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1719 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1720 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1721 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1722 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1723 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1724 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1725 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1726 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1727 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1728
1729 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1730 KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1731 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1732 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1733 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1734 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1735 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1736 KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1737
1738 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1739 KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1740 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1741 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1742 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1743 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1744 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1745 KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1746
1747 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1748 1) // __kmpc_atomic_float10_add_fp
1749 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1750 1) // __kmpc_atomic_float10_sub_fp
1751 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1752 1) // __kmpc_atomic_float10_mul_fp
1753 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1754 1) // __kmpc_atomic_float10_div_fp
1755
1756 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1757 // Reverse operations
1758 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1759 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1760 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1761 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1762 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1763 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1764 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1765 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1766
1767 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1768 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1769 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1770 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1771 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1772 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1773 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1774 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1775
1776 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1777 0) // __kmpc_atomic_fixed4_sub_rev_fp
1778 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1779 0) // __kmpc_atomic_fixed4u_sub_rev_fp
1780 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1781 0) // __kmpc_atomic_fixed4_div_rev_fp
1782 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1783 0) // __kmpc_atomic_fixed4u_div_rev_fp
1784
1785 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1786 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1787 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1788 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1789 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1790 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1791 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1792 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1793
1794 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1795 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1796 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1797 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1798
1799 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1800 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1801 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1802 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1803
1804 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1805 1) // __kmpc_atomic_float10_sub_rev_fp
1806 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1807 1) // __kmpc_atomic_float10_div_rev_fp
1808 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1809
1810 #endif
1811
1812 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1813 // ------------------------------------------------------------------------
1814 // X86 or X86_64: no alignment problems ====================================
1815 #if USE_CMPXCHG_FIX
1816 // workaround for C78287 (complex(kind=4) data type)
1817 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1818 LCK_ID, MASK, GOMP_FLAG) \
1819 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1820 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1821 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
1822 }
1823 // end of the second part of the workaround for C78287
1824 #else
1825 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1826 LCK_ID, MASK, GOMP_FLAG) \
1827 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1828 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1829 OP_CMPXCHG(TYPE, BITS, OP) \
1830 }
1831 #endif // USE_CMPXCHG_FIX
1832 #else
1833 // ------------------------------------------------------------------------
1834 // Code for other architectures that don't handle unaligned accesses.
1835 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1836 LCK_ID, MASK, GOMP_FLAG) \
1837 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1838 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1839 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1840 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1841 } else { \
1842 KMP_CHECK_GTID; \
1843 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
1844 } \
1845 }
1846 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1847
1848 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1849 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1850 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1851 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1852 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1853 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1854 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1855 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1856
1857 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1858 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1859
1860 // ------------------------------------------------------------------------
1861 // Atomic READ routines
1862
1863 // ------------------------------------------------------------------------
1864 // Beginning of a definition (provides name, parameters, gebug trace)
1865 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1866 // fixed)
1867 // OP_ID - operation identifier (add, sub, mul, ...)
1868 // TYPE - operands' type
1869 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1870 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
1871 TYPE *loc) { \
1872 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1873 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1874
1875 // ------------------------------------------------------------------------
1876 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1877 // TYPE - operands' type
1878 // BITS - size in bits, used to distinguish low level calls
1879 // OP - operator
1880 // Note: temp_val introduced in order to force the compiler to read
1881 // *lhs only once (w/o it the compiler reads *lhs twice)
1882 // TODO: check if it is still necessary
1883 // Return old value regardless of the result of "compare & swap# operation
1884 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \
1885 { \
1886 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1887 union f_i_union { \
1888 TYPE f_val; \
1889 kmp_int##BITS i_val; \
1890 }; \
1891 union f_i_union old_value; \
1892 temp_val = *loc; \
1893 old_value.f_val = temp_val; \
1894 old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \
1895 (kmp_int##BITS *)loc, \
1896 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \
1897 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \
1898 new_value = old_value.f_val; \
1899 return new_value; \
1900 }
1901
1902 // -------------------------------------------------------------------------
1903 // Operation on *lhs, rhs bound by critical section
1904 // OP - operator (it's supposed to contain an assignment)
1905 // LCK_ID - lock identifier
1906 // Note: don't check gtid as it should always be valid
1907 // 1, 2-byte - expect valid parameter, other - check before this macro
1908 #define OP_CRITICAL_READ(OP, LCK_ID) \
1909 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1910 \
1911 new_value = (*loc); \
1912 \
1913 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1914
1915 // -------------------------------------------------------------------------
1916 #ifdef KMP_GOMP_COMPAT
1917 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \
1918 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1919 KMP_CHECK_GTID; \
1920 OP_CRITICAL_READ(OP, 0); \
1921 return new_value; \
1922 }
1923 #else
1924 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1925 #endif /* KMP_GOMP_COMPAT */
1926
1927 // -------------------------------------------------------------------------
1928 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1929 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1930 TYPE new_value; \
1931 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1932 new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \
1933 return new_value; \
1934 }
1935 // -------------------------------------------------------------------------
1936 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1937 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1938 TYPE new_value; \
1939 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1940 OP_CMPXCHG_READ(TYPE, BITS, OP) \
1941 }
1942 // ------------------------------------------------------------------------
1943 // Routines for Extended types: long double, _Quad, complex flavours (use
1944 // critical section)
1945 // TYPE_ID, OP_ID, TYPE - detailed above
1946 // OP - operator
1947 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1948 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1949 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1950 TYPE new_value; \
1951 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \
1952 OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \
1953 return new_value; \
1954 }
1955
1956 // ------------------------------------------------------------------------
1957 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
1958 // value doesn't work.
1959 // Let's return the read value through the additional parameter.
1960 #if (KMP_OS_WINDOWS)
1961
1962 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \
1963 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1964 \
1965 (*out) = (*loc); \
1966 \
1967 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1968 // ------------------------------------------------------------------------
1969 #ifdef KMP_GOMP_COMPAT
1970 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \
1971 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1972 KMP_CHECK_GTID; \
1973 OP_CRITICAL_READ_WRK(OP, 0); \
1974 }
1975 #else
1976 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
1977 #endif /* KMP_GOMP_COMPAT */
1978 // ------------------------------------------------------------------------
1979 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
1980 void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
1981 TYPE *loc) { \
1982 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1983 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1984
1985 // ------------------------------------------------------------------------
1986 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1987 ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
1988 OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \
1989 OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \
1990 }
1991
1992 #endif // KMP_OS_WINDOWS
1993
1994 // ------------------------------------------------------------------------
1995 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1996 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
1997 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
1998 KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
1999 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2000 KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2001 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2002 KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2003
2004 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2005 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2006 KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2007 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2008 KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2009
2010 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2011 1) // __kmpc_atomic_float10_rd
2012 #if KMP_HAVE_QUAD
2013 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2014 1) // __kmpc_atomic_float16_rd
2015 #endif // KMP_HAVE_QUAD
2016
2017 // Fix for CQ220361 on Windows* OS
2018 #if (KMP_OS_WINDOWS)
2019 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2020 1) // __kmpc_atomic_cmplx4_rd
2021 #else
2022 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2023 1) // __kmpc_atomic_cmplx4_rd
2024 #endif
2025 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2026 1) // __kmpc_atomic_cmplx8_rd
2027 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2028 1) // __kmpc_atomic_cmplx10_rd
2029 #if KMP_HAVE_QUAD
2030 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2031 1) // __kmpc_atomic_cmplx16_rd
2032 #if (KMP_ARCH_X86)
2033 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2034 1) // __kmpc_atomic_float16_a16_rd
2035 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2036 1) // __kmpc_atomic_cmplx16_a16_rd
2037 #endif
2038 #endif
2039
2040 // ------------------------------------------------------------------------
2041 // Atomic WRITE routines
2042
2043 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2044 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2045 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2046 KMP_XCHG_FIXED##BITS(lhs, rhs); \
2047 }
2048 // ------------------------------------------------------------------------
2049 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2050 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2051 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2052 KMP_XCHG_REAL##BITS(lhs, rhs); \
2053 }
2054
2055 // ------------------------------------------------------------------------
2056 // Operation on *lhs, rhs using "compare_and_store" routine
2057 // TYPE - operands' type
2058 // BITS - size in bits, used to distinguish low level calls
2059 // OP - operator
2060 // Note: temp_val introduced in order to force the compiler to read
2061 // *lhs only once (w/o it the compiler reads *lhs twice)
2062 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \
2063 { \
2064 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2065 TYPE old_value, new_value; \
2066 temp_val = *lhs; \
2067 old_value = temp_val; \
2068 new_value = rhs; \
2069 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2070 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2071 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2072 KMP_CPU_PAUSE(); \
2073 \
2074 temp_val = *lhs; \
2075 old_value = temp_val; \
2076 new_value = rhs; \
2077 } \
2078 }
2079
2080 // -------------------------------------------------------------------------
2081 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2082 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2083 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2084 OP_CMPXCHG_WR(TYPE, BITS, OP) \
2085 }
2086
2087 // ------------------------------------------------------------------------
2088 // Routines for Extended types: long double, _Quad, complex flavours (use
2089 // critical section)
2090 // TYPE_ID, OP_ID, TYPE - detailed above
2091 // OP - operator
2092 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2093 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2094 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2095 OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \
2096 OP_CRITICAL(OP, LCK_ID) /* send assignment */ \
2097 }
2098 // -------------------------------------------------------------------------
2099
2100 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2101 KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2102 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2103 KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2104 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2105 KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2106 #if (KMP_ARCH_X86)
2107 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2108 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2109 #else
2110 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2111 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2112 #endif
2113
2114 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2115 KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2116 #if (KMP_ARCH_X86)
2117 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2118 KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2119 #else
2120 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2121 KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2122 #endif
2123
2124 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2125 1) // __kmpc_atomic_float10_wr
2126 #if KMP_HAVE_QUAD
2127 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2128 1) // __kmpc_atomic_float16_wr
2129 #endif
2130 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2131 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2132 1) // __kmpc_atomic_cmplx8_wr
2133 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2134 1) // __kmpc_atomic_cmplx10_wr
2135 #if KMP_HAVE_QUAD
2136 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2137 1) // __kmpc_atomic_cmplx16_wr
2138 #if (KMP_ARCH_X86)
2139 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2140 1) // __kmpc_atomic_float16_a16_wr
2141 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2142 1) // __kmpc_atomic_cmplx16_a16_wr
2143 #endif
2144 #endif
2145
2146 // ------------------------------------------------------------------------
2147 // Atomic CAPTURE routines
2148
2149 // Beginning of a definition (provides name, parameters, gebug trace)
2150 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2151 // fixed)
2152 // OP_ID - operation identifier (add, sub, mul, ...)
2153 // TYPE - operands' type
2154 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
2155 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
2156 TYPE *lhs, TYPE rhs, int flag) { \
2157 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2158 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2159
2160 // -------------------------------------------------------------------------
2161 // Operation on *lhs, rhs bound by critical section
2162 // OP - operator (it's supposed to contain an assignment)
2163 // LCK_ID - lock identifier
2164 // Note: don't check gtid as it should always be valid
2165 // 1, 2-byte - expect valid parameter, other - check before this macro
2166 #define OP_CRITICAL_CPT(OP, LCK_ID) \
2167 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2168 \
2169 if (flag) { \
2170 (*lhs) OP rhs; \
2171 new_value = (*lhs); \
2172 } else { \
2173 new_value = (*lhs); \
2174 (*lhs) OP rhs; \
2175 } \
2176 \
2177 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2178 return new_value;
2179
2180 // ------------------------------------------------------------------------
2181 #ifdef KMP_GOMP_COMPAT
2182 #define OP_GOMP_CRITICAL_CPT(OP, FLAG) \
2183 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2184 KMP_CHECK_GTID; \
2185 OP_CRITICAL_CPT(OP## =, 0); \
2186 }
2187 #else
2188 #define OP_GOMP_CRITICAL_CPT(OP, FLAG)
2189 #endif /* KMP_GOMP_COMPAT */
2190
2191 // ------------------------------------------------------------------------
2192 // Operation on *lhs, rhs using "compare_and_store" routine
2193 // TYPE - operands' type
2194 // BITS - size in bits, used to distinguish low level calls
2195 // OP - operator
2196 // Note: temp_val introduced in order to force the compiler to read
2197 // *lhs only once (w/o it the compiler reads *lhs twice)
2198 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2199 { \
2200 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2201 TYPE old_value, new_value; \
2202 temp_val = *lhs; \
2203 old_value = temp_val; \
2204 new_value = old_value OP rhs; \
2205 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2206 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2207 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2208 KMP_CPU_PAUSE(); \
2209 \
2210 temp_val = *lhs; \
2211 old_value = temp_val; \
2212 new_value = old_value OP rhs; \
2213 } \
2214 if (flag) { \
2215 return new_value; \
2216 } else \
2217 return old_value; \
2218 }
2219
2220 // -------------------------------------------------------------------------
2221 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2222 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2223 TYPE new_value; \
2224 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \
2225 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2226 }
2227
2228 // -------------------------------------------------------------------------
2229 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2230 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2231 TYPE old_value, new_value; \
2232 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \
2233 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
2234 old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
2235 if (flag) { \
2236 return old_value OP rhs; \
2237 } else \
2238 return old_value; \
2239 }
2240 // -------------------------------------------------------------------------
2241
2242 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2243 0) // __kmpc_atomic_fixed4_add_cpt
2244 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2245 0) // __kmpc_atomic_fixed4_sub_cpt
2246 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2247 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2248 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2249 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2250
2251 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2252 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2253 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2254 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2255 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2256 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2257 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2258 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2259
2260 // ------------------------------------------------------------------------
2261 // Entries definition for integer operands
2262 // TYPE_ID - operands type and size (fixed4, float4)
2263 // OP_ID - operation identifier (add, sub, mul, ...)
2264 // TYPE - operand type
2265 // BITS - size in bits, used to distinguish low level calls
2266 // OP - operator (used in critical section)
2267 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
2268 // ------------------------------------------------------------------------
2269 // Routines for ATOMIC integer operands, other operators
2270 // ------------------------------------------------------------------------
2271 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2272 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2273 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2274 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2275 0) // __kmpc_atomic_fixed1_andb_cpt
2276 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2277 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2278 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2279 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2280 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2281 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2282 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2283 0) // __kmpc_atomic_fixed1_orb_cpt
2284 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2285 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2286 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2287 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2288 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2289 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2290 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2291 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2292 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2293 0) // __kmpc_atomic_fixed1_xor_cpt
2294 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2295 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2296 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2297 0) // __kmpc_atomic_fixed2_andb_cpt
2298 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2299 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2300 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2301 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2302 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2303 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2304 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2305 0) // __kmpc_atomic_fixed2_orb_cpt
2306 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2307 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2308 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2309 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2310 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2311 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2312 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2313 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2314 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2315 0) // __kmpc_atomic_fixed2_xor_cpt
2316 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2317 0) // __kmpc_atomic_fixed4_andb_cpt
2318 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2319 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2320 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2321 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2322 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2323 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2324 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2325 0) // __kmpc_atomic_fixed4_orb_cpt
2326 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2327 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2328 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2329 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2330 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2331 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2332 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2333 0) // __kmpc_atomic_fixed4_xor_cpt
2334 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2335 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2336 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2337 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2338 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2339 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2340 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2341 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2342 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2343 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2344 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2345 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2346 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2347 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2348 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2349 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2350 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2351 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2352 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2353 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2354 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2355 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2356 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2357 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2358 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2359 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2360 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2361
2362 // CAPTURE routines for mixed types RHS=float16
2363 #if KMP_HAVE_QUAD
2364
2365 // Beginning of a definition (provides name, parameters, gebug trace)
2366 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2367 // fixed)
2368 // OP_ID - operation identifier (add, sub, mul, ...)
2369 // TYPE - operands' type
2370 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2371 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
2372 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \
2373 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2374 KA_TRACE(100, \
2375 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
2376 gtid));
2377
2378 // -------------------------------------------------------------------------
2379 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
2380 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
2381 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2382 TYPE new_value; \
2383 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \
2384 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2385 }
2386
2387 // -------------------------------------------------------------------------
2388 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
2389 LCK_ID, GOMP_FLAG) \
2390 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2391 TYPE new_value; \
2392 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \
2393 OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \
2394 }
2395
2396 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2397 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2398 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2399 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2400 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2401 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2402 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2403 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2404 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2405 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2406 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2407 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2408 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2409 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2410 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2411 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2412
2413 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2414 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2415 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2416 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2417 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2418 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2419 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2420 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2421 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2422 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2423 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2424 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2425 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2426 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2427 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2428 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2429
2430 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2431 0) // __kmpc_atomic_fixed4_add_cpt_fp
2432 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2433 0) // __kmpc_atomic_fixed4u_add_cpt_fp
2434 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2435 0) // __kmpc_atomic_fixed4_sub_cpt_fp
2436 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2437 0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2438 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2439 0) // __kmpc_atomic_fixed4_mul_cpt_fp
2440 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2441 0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2442 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2443 0) // __kmpc_atomic_fixed4_div_cpt_fp
2444 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2445 0) // __kmpc_atomic_fixed4u_div_cpt_fp
2446
2447 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2448 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2449 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2450 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2451 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2452 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2453 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2454 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2455 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2456 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2457 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2458 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2459 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2460 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2461 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2462 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2463
2464 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2465 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2466 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2467 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2468 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2469 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2470 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2471 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2472
2473 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2474 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2475 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2476 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2477 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2478 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2479 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2480 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2481
2482 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2483 1) // __kmpc_atomic_float10_add_cpt_fp
2484 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2485 1) // __kmpc_atomic_float10_sub_cpt_fp
2486 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2487 1) // __kmpc_atomic_float10_mul_cpt_fp
2488 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2489 1) // __kmpc_atomic_float10_div_cpt_fp
2490
2491 #endif // KMP_HAVE_QUAD
2492
2493 // ------------------------------------------------------------------------
2494 // Routines for C/C++ Reduction operators && and ||
2495
2496 // -------------------------------------------------------------------------
2497 // Operation on *lhs, rhs bound by critical section
2498 // OP - operator (it's supposed to contain an assignment)
2499 // LCK_ID - lock identifier
2500 // Note: don't check gtid as it should always be valid
2501 // 1, 2-byte - expect valid parameter, other - check before this macro
2502 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \
2503 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2504 \
2505 if (flag) { \
2506 new_value OP rhs; \
2507 } else \
2508 new_value = (*lhs); \
2509 \
2510 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2511
2512 // ------------------------------------------------------------------------
2513 #ifdef KMP_GOMP_COMPAT
2514 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \
2515 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2516 KMP_CHECK_GTID; \
2517 OP_CRITICAL_L_CPT(OP, 0); \
2518 return new_value; \
2519 }
2520 #else
2521 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2522 #endif /* KMP_GOMP_COMPAT */
2523
2524 // ------------------------------------------------------------------------
2525 // Need separate macros for &&, || because there is no combined assignment
2526 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2527 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2528 TYPE new_value; \
2529 OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \
2530 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2531 }
2532
2533 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2534 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2535 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2536 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2537 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2538 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2539 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2540 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2541 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2542 0) // __kmpc_atomic_fixed4_andl_cpt
2543 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2544 0) // __kmpc_atomic_fixed4_orl_cpt
2545 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2546 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2547 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2548 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2549
2550 // -------------------------------------------------------------------------
2551 // Routines for Fortran operators that matched no one in C:
2552 // MAX, MIN, .EQV., .NEQV.
2553 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2554 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2555
2556 // -------------------------------------------------------------------------
2557 // MIN and MAX need separate macros
2558 // OP - operator to check if we need any actions?
2559 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2560 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2561 \
2562 if (*lhs OP rhs) { /* still need actions? */ \
2563 old_value = *lhs; \
2564 *lhs = rhs; \
2565 if (flag) \
2566 new_value = rhs; \
2567 else \
2568 new_value = old_value; \
2569 } \
2570 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2571 return new_value;
2572
2573 // -------------------------------------------------------------------------
2574 #ifdef KMP_GOMP_COMPAT
2575 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \
2576 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2577 KMP_CHECK_GTID; \
2578 MIN_MAX_CRITSECT_CPT(OP, 0); \
2579 }
2580 #else
2581 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2582 #endif /* KMP_GOMP_COMPAT */
2583
2584 // -------------------------------------------------------------------------
2585 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2586 { \
2587 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2588 /*TYPE old_value; */ \
2589 temp_val = *lhs; \
2590 old_value = temp_val; \
2591 while (old_value OP rhs && /* still need actions? */ \
2592 !KMP_COMPARE_AND_STORE_ACQ##BITS( \
2593 (kmp_int##BITS *)lhs, \
2594 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2595 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
2596 KMP_CPU_PAUSE(); \
2597 temp_val = *lhs; \
2598 old_value = temp_val; \
2599 } \
2600 if (flag) \
2601 return rhs; \
2602 else \
2603 return old_value; \
2604 }
2605
2606 // -------------------------------------------------------------------------
2607 // 1-byte, 2-byte operands - use critical section
2608 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2609 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2610 TYPE new_value, old_value; \
2611 if (*lhs OP rhs) { /* need actions? */ \
2612 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2613 MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2614 } \
2615 return *lhs; \
2616 }
2617
2618 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2619 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2620 TYPE new_value, old_value; \
2621 if (*lhs OP rhs) { \
2622 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2623 MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2624 } \
2625 return *lhs; \
2626 }
2627
2628 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2629 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2630 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2631 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2632 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2633 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2634 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2635 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2636 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2637 0) // __kmpc_atomic_fixed4_max_cpt
2638 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2639 0) // __kmpc_atomic_fixed4_min_cpt
2640 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2641 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2642 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2643 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2644 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2645 KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2646 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2647 KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2648 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2649 KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2650 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2651 KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2652 #if KMP_HAVE_QUAD
2653 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2654 1) // __kmpc_atomic_float16_max_cpt
2655 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2656 1) // __kmpc_atomic_float16_min_cpt
2657 #if (KMP_ARCH_X86)
2658 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2659 1) // __kmpc_atomic_float16_max_a16_cpt
2660 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2661 1) // __kmpc_atomic_float16_mix_a16_cpt
2662 #endif
2663 #endif
2664
2665 // ------------------------------------------------------------------------
2666 #ifdef KMP_GOMP_COMPAT
2667 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \
2668 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2669 KMP_CHECK_GTID; \
2670 OP_CRITICAL_CPT(OP, 0); \
2671 }
2672 #else
2673 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2674 #endif /* KMP_GOMP_COMPAT */
2675 // ------------------------------------------------------------------------
2676 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2677 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2678 TYPE new_value; \
2679 OP_GOMP_CRITICAL_EQV_CPT(^= ~, GOMP_FLAG) /* send assignment */ \
2680 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2681 }
2682
2683 // ------------------------------------------------------------------------
2684
2685 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2686 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2687 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2688 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2689 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2690 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2691 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2692 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2693 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2694 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2695 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2696 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2697 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2698 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2699 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2700 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2701
2702 // ------------------------------------------------------------------------
2703 // Routines for Extended types: long double, _Quad, complex flavours (use
2704 // critical section)
2705 // TYPE_ID, OP_ID, TYPE - detailed above
2706 // OP - operator
2707 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2708 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2709 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2710 TYPE new_value; \
2711 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \
2712 OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \
2713 }
2714
2715 // ------------------------------------------------------------------------
2716 // Workaround for cmplx4. Regular routines with return value don't work
2717 // on Win_32e. Let's return captured values through the additional parameter.
2718 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \
2719 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2720 \
2721 if (flag) { \
2722 (*lhs) OP rhs; \
2723 (*out) = (*lhs); \
2724 } else { \
2725 (*out) = (*lhs); \
2726 (*lhs) OP rhs; \
2727 } \
2728 \
2729 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2730 return;
2731 // ------------------------------------------------------------------------
2732
2733 #ifdef KMP_GOMP_COMPAT
2734 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \
2735 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2736 KMP_CHECK_GTID; \
2737 OP_CRITICAL_CPT_WRK(OP## =, 0); \
2738 }
2739 #else
2740 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2741 #endif /* KMP_GOMP_COMPAT */
2742 // ------------------------------------------------------------------------
2743
2744 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2745 void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2746 TYPE rhs, TYPE *out, int flag) { \
2747 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2748 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2749 // ------------------------------------------------------------------------
2750
2751 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2752 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2753 OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \
2754 OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \
2755 }
2756 // The end of workaround for cmplx4
2757
2758 /* ------------------------------------------------------------------------- */
2759 // routines for long double type
2760 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2761 1) // __kmpc_atomic_float10_add_cpt
2762 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2763 1) // __kmpc_atomic_float10_sub_cpt
2764 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2765 1) // __kmpc_atomic_float10_mul_cpt
2766 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2767 1) // __kmpc_atomic_float10_div_cpt
2768 #if KMP_HAVE_QUAD
2769 // routines for _Quad type
2770 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2771 1) // __kmpc_atomic_float16_add_cpt
2772 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2773 1) // __kmpc_atomic_float16_sub_cpt
2774 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2775 1) // __kmpc_atomic_float16_mul_cpt
2776 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2777 1) // __kmpc_atomic_float16_div_cpt
2778 #if (KMP_ARCH_X86)
2779 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2780 1) // __kmpc_atomic_float16_add_a16_cpt
2781 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2782 1) // __kmpc_atomic_float16_sub_a16_cpt
2783 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2784 1) // __kmpc_atomic_float16_mul_a16_cpt
2785 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2786 1) // __kmpc_atomic_float16_div_a16_cpt
2787 #endif
2788 #endif
2789
2790 // routines for complex types
2791
2792 // cmplx4 routines to return void
2793 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2794 1) // __kmpc_atomic_cmplx4_add_cpt
2795 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2796 1) // __kmpc_atomic_cmplx4_sub_cpt
2797 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2798 1) // __kmpc_atomic_cmplx4_mul_cpt
2799 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2800 1) // __kmpc_atomic_cmplx4_div_cpt
2801
2802 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2803 1) // __kmpc_atomic_cmplx8_add_cpt
2804 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2805 1) // __kmpc_atomic_cmplx8_sub_cpt
2806 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2807 1) // __kmpc_atomic_cmplx8_mul_cpt
2808 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2809 1) // __kmpc_atomic_cmplx8_div_cpt
2810 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2811 1) // __kmpc_atomic_cmplx10_add_cpt
2812 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2813 1) // __kmpc_atomic_cmplx10_sub_cpt
2814 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2815 1) // __kmpc_atomic_cmplx10_mul_cpt
2816 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2817 1) // __kmpc_atomic_cmplx10_div_cpt
2818 #if KMP_HAVE_QUAD
2819 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2820 1) // __kmpc_atomic_cmplx16_add_cpt
2821 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2822 1) // __kmpc_atomic_cmplx16_sub_cpt
2823 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2824 1) // __kmpc_atomic_cmplx16_mul_cpt
2825 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2826 1) // __kmpc_atomic_cmplx16_div_cpt
2827 #if (KMP_ARCH_X86)
2828 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2829 1) // __kmpc_atomic_cmplx16_add_a16_cpt
2830 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2831 1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2832 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2833 1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2834 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2835 1) // __kmpc_atomic_cmplx16_div_a16_cpt
2836 #endif
2837 #endif
2838
2839 #if OMP_40_ENABLED
2840
2841 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2842 // binop x; v = x; } for non-commutative operations.
2843 // Supported only on IA-32 architecture and Intel(R) 64
2844
2845 // -------------------------------------------------------------------------
2846 // Operation on *lhs, rhs bound by critical section
2847 // OP - operator (it's supposed to contain an assignment)
2848 // LCK_ID - lock identifier
2849 // Note: don't check gtid as it should always be valid
2850 // 1, 2-byte - expect valid parameter, other - check before this macro
2851 #define OP_CRITICAL_CPT_REV(OP, LCK_ID) \
2852 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2853 \
2854 if (flag) { \
2855 /*temp_val = (*lhs);*/ \
2856 (*lhs) = (rhs)OP(*lhs); \
2857 new_value = (*lhs); \
2858 } else { \
2859 new_value = (*lhs); \
2860 (*lhs) = (rhs)OP(*lhs); \
2861 } \
2862 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2863 return new_value;
2864
2865 // ------------------------------------------------------------------------
2866 #ifdef KMP_GOMP_COMPAT
2867 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) \
2868 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2869 KMP_CHECK_GTID; \
2870 OP_CRITICAL_CPT_REV(OP, 0); \
2871 }
2872 #else
2873 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG)
2874 #endif /* KMP_GOMP_COMPAT */
2875
2876 // ------------------------------------------------------------------------
2877 // Operation on *lhs, rhs using "compare_and_store" routine
2878 // TYPE - operands' type
2879 // BITS - size in bits, used to distinguish low level calls
2880 // OP - operator
2881 // Note: temp_val introduced in order to force the compiler to read
2882 // *lhs only once (w/o it the compiler reads *lhs twice)
2883 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2884 { \
2885 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2886 TYPE old_value, new_value; \
2887 temp_val = *lhs; \
2888 old_value = temp_val; \
2889 new_value = rhs OP old_value; \
2890 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2891 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2892 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2893 KMP_CPU_PAUSE(); \
2894 \
2895 temp_val = *lhs; \
2896 old_value = temp_val; \
2897 new_value = rhs OP old_value; \
2898 } \
2899 if (flag) { \
2900 return new_value; \
2901 } else \
2902 return old_value; \
2903 }
2904
2905 // -------------------------------------------------------------------------
2906 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2907 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2908 TYPE new_value; \
2909 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \
2910 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2911 }
2912
2913 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2914 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2915 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2916 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2917 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2918 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2919 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2920 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2921 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2922 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2923 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2924 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2925 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
2926 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
2927 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
2928 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
2929 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
2930 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
2931 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
2932 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
2933 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
2934 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
2935 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
2936 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
2937 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
2938 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
2939 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
2940 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
2941 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
2942 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
2943 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
2944 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
2945 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
2946 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
2947 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
2948 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
2949 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
2950 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
2951 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
2952 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
2953 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
2954 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
2955 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
2956 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
2957 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
2958 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
2959 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
2960 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
2961 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
2962 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
2963 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
2964 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
2965 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
2966 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
2967 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
2968 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
2969 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2970
2971 // ------------------------------------------------------------------------
2972 // Routines for Extended types: long double, _Quad, complex flavours (use
2973 // critical section)
2974 // TYPE_ID, OP_ID, TYPE - detailed above
2975 // OP - operator
2976 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2977 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2978 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2979 TYPE new_value; \
2980 /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \
2981 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \
2982 OP_CRITICAL_CPT_REV(OP, LCK_ID) \
2983 }
2984
2985 /* ------------------------------------------------------------------------- */
2986 // routines for long double type
2987 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
2988 1) // __kmpc_atomic_float10_sub_cpt_rev
2989 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
2990 1) // __kmpc_atomic_float10_div_cpt_rev
2991 #if KMP_HAVE_QUAD
2992 // routines for _Quad type
2993 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
2994 1) // __kmpc_atomic_float16_sub_cpt_rev
2995 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
2996 1) // __kmpc_atomic_float16_div_cpt_rev
2997 #if (KMP_ARCH_X86)
2998 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
2999 1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3000 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3001 1) // __kmpc_atomic_float16_div_a16_cpt_rev
3002 #endif
3003 #endif
3004
3005 // routines for complex types
3006
3007 // ------------------------------------------------------------------------
3008 // Workaround for cmplx4. Regular routines with return value don't work
3009 // on Win_32e. Let's return captured values through the additional parameter.
3010 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3011 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3012 \
3013 if (flag) { \
3014 (*lhs) = (rhs)OP(*lhs); \
3015 (*out) = (*lhs); \
3016 } else { \
3017 (*out) = (*lhs); \
3018 (*lhs) = (rhs)OP(*lhs); \
3019 } \
3020 \
3021 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3022 return;
3023 // ------------------------------------------------------------------------
3024
3025 #ifdef KMP_GOMP_COMPAT
3026 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \
3027 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3028 KMP_CHECK_GTID; \
3029 OP_CRITICAL_CPT_REV_WRK(OP, 0); \
3030 }
3031 #else
3032 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3033 #endif /* KMP_GOMP_COMPAT */
3034 // ------------------------------------------------------------------------
3035
3036 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \
3037 GOMP_FLAG) \
3038 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
3039 OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \
3040 OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3041 }
3042 // The end of workaround for cmplx4
3043
3044 // !!! TODO: check if we need to return void for cmplx4 routines
3045 // cmplx4 routines to return void
3046 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3047 1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3048 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3049 1) // __kmpc_atomic_cmplx4_div_cpt_rev
3050
3051 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3052 1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3053 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3054 1) // __kmpc_atomic_cmplx8_div_cpt_rev
3055 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3056 1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3057 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3058 1) // __kmpc_atomic_cmplx10_div_cpt_rev
3059 #if KMP_HAVE_QUAD
3060 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3061 1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3062 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3063 1) // __kmpc_atomic_cmplx16_div_cpt_rev
3064 #if (KMP_ARCH_X86)
3065 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3066 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3067 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3068 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3069 #endif
3070 #endif
3071
3072 // Capture reverse for mixed type: RHS=float16
3073 #if KMP_HAVE_QUAD
3074
3075 // Beginning of a definition (provides name, parameters, gebug trace)
3076 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3077 // fixed)
3078 // OP_ID - operation identifier (add, sub, mul, ...)
3079 // TYPE - operands' type
3080 // -------------------------------------------------------------------------
3081 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
3082 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
3083 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3084 TYPE new_value; \
3085 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \
3086 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
3087 }
3088
3089 // -------------------------------------------------------------------------
3090 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3091 LCK_ID, GOMP_FLAG) \
3092 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3093 TYPE new_value; \
3094 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) /* send assignment */ \
3095 OP_CRITICAL_CPT_REV(OP, LCK_ID) /* send assignment */ \
3096 }
3097
3098 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3099 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3100 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3101 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3102 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3103 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3104 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3105 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3106
3107 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3108 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3109 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3110 1,
3111 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3112 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3113 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3114 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3115 1,
3116 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3117
3118 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3119 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3120 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3121 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3122 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3123 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3124 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3125 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3126
3127 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3128 7,
3129 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3130 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3131 8i, 7,
3132 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3133 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3134 7,
3135 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3136 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3137 8i, 7,
3138 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3139
3140 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3141 4r, 3,
3142 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3143 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3144 4r, 3,
3145 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3146
3147 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3148 8r, 7,
3149 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3150 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3151 8r, 7,
3152 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3153
3154 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3155 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3156 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3157 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3158
3159 #endif // KMP_HAVE_QUAD
3160
3161 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3162
3163 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3164 TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3165 TYPE rhs) { \
3166 KMP_DEBUG_ASSERT(__kmp_init_serial); \
3167 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3168
3169 #define CRITICAL_SWP(LCK_ID) \
3170 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3171 \
3172 old_value = (*lhs); \
3173 (*lhs) = rhs; \
3174 \
3175 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3176 return old_value;
3177
3178 // ------------------------------------------------------------------------
3179 #ifdef KMP_GOMP_COMPAT
3180 #define GOMP_CRITICAL_SWP(FLAG) \
3181 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3182 KMP_CHECK_GTID; \
3183 CRITICAL_SWP(0); \
3184 }
3185 #else
3186 #define GOMP_CRITICAL_SWP(FLAG)
3187 #endif /* KMP_GOMP_COMPAT */
3188
3189 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3190 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3191 TYPE old_value; \
3192 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3193 old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \
3194 return old_value; \
3195 }
3196 // ------------------------------------------------------------------------
3197 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3198 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3199 TYPE old_value; \
3200 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3201 old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \
3202 return old_value; \
3203 }
3204
3205 // ------------------------------------------------------------------------
3206 #define CMPXCHG_SWP(TYPE, BITS) \
3207 { \
3208 TYPE KMP_ATOMIC_VOLATILE temp_val; \
3209 TYPE old_value, new_value; \
3210 temp_val = *lhs; \
3211 old_value = temp_val; \
3212 new_value = rhs; \
3213 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
3214 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
3215 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
3216 KMP_CPU_PAUSE(); \
3217 \
3218 temp_val = *lhs; \
3219 old_value = temp_val; \
3220 new_value = rhs; \
3221 } \
3222 return old_value; \
3223 }
3224
3225 // -------------------------------------------------------------------------
3226 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3227 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3228 TYPE old_value; \
3229 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3230 CMPXCHG_SWP(TYPE, BITS) \
3231 }
3232
3233 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3234 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3235 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3236
3237 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3238 KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3239
3240 #if (KMP_ARCH_X86)
3241 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3242 KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3243 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3244 KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3245 #else
3246 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3247 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3248 KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3249 #endif
3250
3251 // ------------------------------------------------------------------------
3252 // Routines for Extended types: long double, _Quad, complex flavours (use
3253 // critical section)
3254 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3255 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3256 TYPE old_value; \
3257 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3258 CRITICAL_SWP(LCK_ID) \
3259 }
3260
3261 // ------------------------------------------------------------------------
3262 // !!! TODO: check if we need to return void for cmplx4 routines
3263 // Workaround for cmplx4. Regular routines with return value don't work
3264 // on Win_32e. Let's return captured values through the additional parameter.
3265
3266 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3267 void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3268 TYPE rhs, TYPE *out) { \
3269 KMP_DEBUG_ASSERT(__kmp_init_serial); \
3270 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3271
3272 #define CRITICAL_SWP_WRK(LCK_ID) \
3273 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3274 \
3275 tmp = (*lhs); \
3276 (*lhs) = (rhs); \
3277 (*out) = tmp; \
3278 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3279 return;
3280 // ------------------------------------------------------------------------
3281
3282 #ifdef KMP_GOMP_COMPAT
3283 #define GOMP_CRITICAL_SWP_WRK(FLAG) \
3284 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3285 KMP_CHECK_GTID; \
3286 CRITICAL_SWP_WRK(0); \
3287 }
3288 #else
3289 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3290 #endif /* KMP_GOMP_COMPAT */
3291 // ------------------------------------------------------------------------
3292
3293 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3294 ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3295 TYPE tmp; \
3296 GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
3297 CRITICAL_SWP_WRK(LCK_ID) \
3298 }
3299 // The end of workaround for cmplx4
3300
3301 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3302 #if KMP_HAVE_QUAD
3303 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3304 #endif
3305 // cmplx4 routine to return void
3306 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3307
3308 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) //
3309 // __kmpc_atomic_cmplx4_swp
3310
3311 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3312 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3313 #if KMP_HAVE_QUAD
3314 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3315 #if (KMP_ARCH_X86)
3316 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3317 1) // __kmpc_atomic_float16_a16_swp
3318 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3319 1) // __kmpc_atomic_cmplx16_a16_swp
3320 #endif
3321 #endif
3322
3323 // End of OpenMP 4.0 Capture
3324
3325 #endif // OMP_40_ENABLED
3326
3327 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3328
3329 #undef OP_CRITICAL
3330
3331 /* ------------------------------------------------------------------------ */
3332 /* Generic atomic routines */
3333
__kmpc_atomic_1(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3334 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3335 void (*f)(void *, void *, void *)) {
3336 KMP_DEBUG_ASSERT(__kmp_init_serial);
3337
3338 if (
3339 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3340 FALSE /* must use lock */
3341 #else
3342 TRUE
3343 #endif
3344 ) {
3345 kmp_int8 old_value, new_value;
3346
3347 old_value = *(kmp_int8 *)lhs;
3348 (*f)(&new_value, &old_value, rhs);
3349
3350 /* TODO: Should this be acquire or release? */
3351 while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3352 *(kmp_int8 *)&new_value)) {
3353 KMP_CPU_PAUSE();
3354
3355 old_value = *(kmp_int8 *)lhs;
3356 (*f)(&new_value, &old_value, rhs);
3357 }
3358
3359 return;
3360 } else {
3361 // All 1-byte data is of integer data type.
3362
3363 #ifdef KMP_GOMP_COMPAT
3364 if (__kmp_atomic_mode == 2) {
3365 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3366 } else
3367 #endif /* KMP_GOMP_COMPAT */
3368 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3369
3370 (*f)(lhs, lhs, rhs);
3371
3372 #ifdef KMP_GOMP_COMPAT
3373 if (__kmp_atomic_mode == 2) {
3374 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3375 } else
3376 #endif /* KMP_GOMP_COMPAT */
3377 __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3378 }
3379 }
3380
__kmpc_atomic_2(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3381 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3382 void (*f)(void *, void *, void *)) {
3383 if (
3384 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3385 FALSE /* must use lock */
3386 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3387 TRUE /* no alignment problems */
3388 #else
3389 !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3390 #endif
3391 ) {
3392 kmp_int16 old_value, new_value;
3393
3394 old_value = *(kmp_int16 *)lhs;
3395 (*f)(&new_value, &old_value, rhs);
3396
3397 /* TODO: Should this be acquire or release? */
3398 while (!KMP_COMPARE_AND_STORE_ACQ16(
3399 (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3400 KMP_CPU_PAUSE();
3401
3402 old_value = *(kmp_int16 *)lhs;
3403 (*f)(&new_value, &old_value, rhs);
3404 }
3405
3406 return;
3407 } else {
3408 // All 2-byte data is of integer data type.
3409
3410 #ifdef KMP_GOMP_COMPAT
3411 if (__kmp_atomic_mode == 2) {
3412 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3413 } else
3414 #endif /* KMP_GOMP_COMPAT */
3415 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3416
3417 (*f)(lhs, lhs, rhs);
3418
3419 #ifdef KMP_GOMP_COMPAT
3420 if (__kmp_atomic_mode == 2) {
3421 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3422 } else
3423 #endif /* KMP_GOMP_COMPAT */
3424 __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3425 }
3426 }
3427
__kmpc_atomic_4(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3428 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3429 void (*f)(void *, void *, void *)) {
3430 KMP_DEBUG_ASSERT(__kmp_init_serial);
3431
3432 if (
3433 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3434 // Gomp compatibility is broken if this routine is called for floats.
3435 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3436 TRUE /* no alignment problems */
3437 #else
3438 !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3439 #endif
3440 ) {
3441 kmp_int32 old_value, new_value;
3442
3443 old_value = *(kmp_int32 *)lhs;
3444 (*f)(&new_value, &old_value, rhs);
3445
3446 /* TODO: Should this be acquire or release? */
3447 while (!KMP_COMPARE_AND_STORE_ACQ32(
3448 (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3449 KMP_CPU_PAUSE();
3450
3451 old_value = *(kmp_int32 *)lhs;
3452 (*f)(&new_value, &old_value, rhs);
3453 }
3454
3455 return;
3456 } else {
3457 // Use __kmp_atomic_lock_4i for all 4-byte data,
3458 // even if it isn't of integer data type.
3459
3460 #ifdef KMP_GOMP_COMPAT
3461 if (__kmp_atomic_mode == 2) {
3462 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3463 } else
3464 #endif /* KMP_GOMP_COMPAT */
3465 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3466
3467 (*f)(lhs, lhs, rhs);
3468
3469 #ifdef KMP_GOMP_COMPAT
3470 if (__kmp_atomic_mode == 2) {
3471 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3472 } else
3473 #endif /* KMP_GOMP_COMPAT */
3474 __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3475 }
3476 }
3477
__kmpc_atomic_8(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3478 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3479 void (*f)(void *, void *, void *)) {
3480 KMP_DEBUG_ASSERT(__kmp_init_serial);
3481 if (
3482
3483 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3484 FALSE /* must use lock */
3485 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3486 TRUE /* no alignment problems */
3487 #else
3488 !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3489 #endif
3490 ) {
3491 kmp_int64 old_value, new_value;
3492
3493 old_value = *(kmp_int64 *)lhs;
3494 (*f)(&new_value, &old_value, rhs);
3495 /* TODO: Should this be acquire or release? */
3496 while (!KMP_COMPARE_AND_STORE_ACQ64(
3497 (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3498 KMP_CPU_PAUSE();
3499
3500 old_value = *(kmp_int64 *)lhs;
3501 (*f)(&new_value, &old_value, rhs);
3502 }
3503
3504 return;
3505 } else {
3506 // Use __kmp_atomic_lock_8i for all 8-byte data,
3507 // even if it isn't of integer data type.
3508
3509 #ifdef KMP_GOMP_COMPAT
3510 if (__kmp_atomic_mode == 2) {
3511 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3512 } else
3513 #endif /* KMP_GOMP_COMPAT */
3514 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3515
3516 (*f)(lhs, lhs, rhs);
3517
3518 #ifdef KMP_GOMP_COMPAT
3519 if (__kmp_atomic_mode == 2) {
3520 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3521 } else
3522 #endif /* KMP_GOMP_COMPAT */
3523 __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3524 }
3525 }
3526
__kmpc_atomic_10(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3527 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3528 void (*f)(void *, void *, void *)) {
3529 KMP_DEBUG_ASSERT(__kmp_init_serial);
3530
3531 #ifdef KMP_GOMP_COMPAT
3532 if (__kmp_atomic_mode == 2) {
3533 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3534 } else
3535 #endif /* KMP_GOMP_COMPAT */
3536 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3537
3538 (*f)(lhs, lhs, rhs);
3539
3540 #ifdef KMP_GOMP_COMPAT
3541 if (__kmp_atomic_mode == 2) {
3542 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3543 } else
3544 #endif /* KMP_GOMP_COMPAT */
3545 __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3546 }
3547
__kmpc_atomic_16(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3548 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3549 void (*f)(void *, void *, void *)) {
3550 KMP_DEBUG_ASSERT(__kmp_init_serial);
3551
3552 #ifdef KMP_GOMP_COMPAT
3553 if (__kmp_atomic_mode == 2) {
3554 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3555 } else
3556 #endif /* KMP_GOMP_COMPAT */
3557 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3558
3559 (*f)(lhs, lhs, rhs);
3560
3561 #ifdef KMP_GOMP_COMPAT
3562 if (__kmp_atomic_mode == 2) {
3563 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3564 } else
3565 #endif /* KMP_GOMP_COMPAT */
3566 __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3567 }
3568
__kmpc_atomic_20(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3569 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3570 void (*f)(void *, void *, void *)) {
3571 KMP_DEBUG_ASSERT(__kmp_init_serial);
3572
3573 #ifdef KMP_GOMP_COMPAT
3574 if (__kmp_atomic_mode == 2) {
3575 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3576 } else
3577 #endif /* KMP_GOMP_COMPAT */
3578 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3579
3580 (*f)(lhs, lhs, rhs);
3581
3582 #ifdef KMP_GOMP_COMPAT
3583 if (__kmp_atomic_mode == 2) {
3584 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3585 } else
3586 #endif /* KMP_GOMP_COMPAT */
3587 __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3588 }
3589
__kmpc_atomic_32(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3590 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3591 void (*f)(void *, void *, void *)) {
3592 KMP_DEBUG_ASSERT(__kmp_init_serial);
3593
3594 #ifdef KMP_GOMP_COMPAT
3595 if (__kmp_atomic_mode == 2) {
3596 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3597 } else
3598 #endif /* KMP_GOMP_COMPAT */
3599 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3600
3601 (*f)(lhs, lhs, rhs);
3602
3603 #ifdef KMP_GOMP_COMPAT
3604 if (__kmp_atomic_mode == 2) {
3605 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3606 } else
3607 #endif /* KMP_GOMP_COMPAT */
3608 __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3609 }
3610
3611 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3612 // compiler; duplicated in order to not use 3-party names in pure Intel code
3613 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
__kmpc_atomic_start(void)3614 void __kmpc_atomic_start(void) {
3615 int gtid = __kmp_entry_gtid();
3616 KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3617 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3618 }
3619
__kmpc_atomic_end(void)3620 void __kmpc_atomic_end(void) {
3621 int gtid = __kmp_get_gtid();
3622 KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3623 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3624 }
3625
3626 /*!
3627 @}
3628 */
3629
3630 // end of file
3631