1 /*
2  * kmp_atomic.cpp -- ATOMIC implementation routines
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 //                     The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "kmp_atomic.h"
15 #include "kmp.h" // TRUE, asm routines prototypes
16 
17 typedef unsigned char uchar;
18 typedef unsigned short ushort;
19 
20 /*!
21 @defgroup ATOMIC_OPS Atomic Operations
22 These functions are used for implementing the many different varieties of atomic
23 operations.
24 
25 The compiler is at liberty to inline atomic operations that are naturally
26 supported by the target architecture. For instance on IA-32 architecture an
27 atomic like this can be inlined
28 @code
29 static int s = 0;
30 #pragma omp atomic
31     s++;
32 @endcode
33 using the single instruction: `lock; incl s`
34 
35 However the runtime does provide entrypoints for these operations to support
36 compilers that choose not to inline them. (For instance,
37 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.)
38 
39 The names of the functions are encoded by using the data type name and the
40 operation name, as in these tables.
41 
42 Data Type  | Data type encoding
43 -----------|---------------
44 int8_t     | `fixed1`
45 uint8_t    | `fixed1u`
46 int16_t    | `fixed2`
47 uint16_t   | `fixed2u`
48 int32_t    | `fixed4`
49 uint32_t   | `fixed4u`
50 int32_t    | `fixed8`
51 uint32_t   | `fixed8u`
52 float      | `float4`
53 double     | `float8`
54 float 10 (8087 eighty bit float)  | `float10`
55 complex<float>   |  `cmplx4`
56 complex<double>  | `cmplx8`
57 complex<float10> | `cmplx10`
58 <br>
59 
60 Operation | Operation encoding
61 ----------|-------------------
62 + | add
63 - | sub
64 \* | mul
65 / | div
66 & | andb
67 << | shl
68 \>\> | shr
69 \| | orb
70 ^  | xor
71 && | andl
72 \|\| | orl
73 maximum | max
74 minimum | min
75 .eqv.   | eqv
76 .neqv.  | neqv
77 
78 <br>
79 For non-commutative operations, `_rev` can also be added for the reversed
80 operation. For the functions that capture the result, the suffix `_cpt` is
81 added.
82 
83 Update Functions
84 ================
85 The general form of an atomic function that just performs an update (without a
86 `capture`)
87 @code
88 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE *
89 lhs, TYPE rhs );
90 @endcode
91 @param ident_t  a pointer to source location
92 @param gtid  the global thread id
93 @param lhs   a pointer to the left operand
94 @param rhs   the right operand
95 
96 `capture` functions
97 ===================
98 The capture functions perform an atomic update and return a result, which is
99 either the value before the capture, or that after. They take an additional
100 argument to determine which result is returned.
101 Their general form is therefore
102 @code
103 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE *
104 lhs, TYPE rhs, int flag );
105 @endcode
106 @param ident_t  a pointer to source location
107 @param gtid  the global thread id
108 @param lhs   a pointer to the left operand
109 @param rhs   the right operand
110 @param flag  one if the result is to be captured *after* the operation, zero if
111 captured *before*.
112 
113 The one set of exceptions to this is the `complex<float>` type where the value
114 is not returned, rather an extra argument pointer is passed.
115 
116 They look like
117 @code
118 void __kmpc_atomic_cmplx4_<op>_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 *
119 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
120 @endcode
121 
122 Read and Write Operations
123 =========================
124 The OpenMP<sup>*</sup> standard now supports atomic operations that simply
125 ensure that the value is read or written atomically, with no modification
126 performed. In many cases on IA-32 architecture these operations can be inlined
127 since the architecture guarantees that no tearing occurs on aligned objects
128 accessed with a single memory operation of up to 64 bits in size.
129 
130 The general form of the read operations is
131 @code
132 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
133 @endcode
134 
135 For the write operations the form is
136 @code
137 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs
138 );
139 @endcode
140 
141 Full list of functions
142 ======================
143 This leads to the generation of 376 atomic functions, as follows.
144 
145 Functons for integers
146 ---------------------
147 There are versions here for integers of size 1,2,4 and 8 bytes both signed and
148 unsigned (where that matters).
149 @code
150     __kmpc_atomic_fixed1_add
151     __kmpc_atomic_fixed1_add_cpt
152     __kmpc_atomic_fixed1_add_fp
153     __kmpc_atomic_fixed1_andb
154     __kmpc_atomic_fixed1_andb_cpt
155     __kmpc_atomic_fixed1_andl
156     __kmpc_atomic_fixed1_andl_cpt
157     __kmpc_atomic_fixed1_div
158     __kmpc_atomic_fixed1_div_cpt
159     __kmpc_atomic_fixed1_div_cpt_rev
160     __kmpc_atomic_fixed1_div_float8
161     __kmpc_atomic_fixed1_div_fp
162     __kmpc_atomic_fixed1_div_rev
163     __kmpc_atomic_fixed1_eqv
164     __kmpc_atomic_fixed1_eqv_cpt
165     __kmpc_atomic_fixed1_max
166     __kmpc_atomic_fixed1_max_cpt
167     __kmpc_atomic_fixed1_min
168     __kmpc_atomic_fixed1_min_cpt
169     __kmpc_atomic_fixed1_mul
170     __kmpc_atomic_fixed1_mul_cpt
171     __kmpc_atomic_fixed1_mul_float8
172     __kmpc_atomic_fixed1_mul_fp
173     __kmpc_atomic_fixed1_neqv
174     __kmpc_atomic_fixed1_neqv_cpt
175     __kmpc_atomic_fixed1_orb
176     __kmpc_atomic_fixed1_orb_cpt
177     __kmpc_atomic_fixed1_orl
178     __kmpc_atomic_fixed1_orl_cpt
179     __kmpc_atomic_fixed1_rd
180     __kmpc_atomic_fixed1_shl
181     __kmpc_atomic_fixed1_shl_cpt
182     __kmpc_atomic_fixed1_shl_cpt_rev
183     __kmpc_atomic_fixed1_shl_rev
184     __kmpc_atomic_fixed1_shr
185     __kmpc_atomic_fixed1_shr_cpt
186     __kmpc_atomic_fixed1_shr_cpt_rev
187     __kmpc_atomic_fixed1_shr_rev
188     __kmpc_atomic_fixed1_sub
189     __kmpc_atomic_fixed1_sub_cpt
190     __kmpc_atomic_fixed1_sub_cpt_rev
191     __kmpc_atomic_fixed1_sub_fp
192     __kmpc_atomic_fixed1_sub_rev
193     __kmpc_atomic_fixed1_swp
194     __kmpc_atomic_fixed1_wr
195     __kmpc_atomic_fixed1_xor
196     __kmpc_atomic_fixed1_xor_cpt
197     __kmpc_atomic_fixed1u_add_fp
198     __kmpc_atomic_fixed1u_sub_fp
199     __kmpc_atomic_fixed1u_mul_fp
200     __kmpc_atomic_fixed1u_div
201     __kmpc_atomic_fixed1u_div_cpt
202     __kmpc_atomic_fixed1u_div_cpt_rev
203     __kmpc_atomic_fixed1u_div_fp
204     __kmpc_atomic_fixed1u_div_rev
205     __kmpc_atomic_fixed1u_shr
206     __kmpc_atomic_fixed1u_shr_cpt
207     __kmpc_atomic_fixed1u_shr_cpt_rev
208     __kmpc_atomic_fixed1u_shr_rev
209     __kmpc_atomic_fixed2_add
210     __kmpc_atomic_fixed2_add_cpt
211     __kmpc_atomic_fixed2_add_fp
212     __kmpc_atomic_fixed2_andb
213     __kmpc_atomic_fixed2_andb_cpt
214     __kmpc_atomic_fixed2_andl
215     __kmpc_atomic_fixed2_andl_cpt
216     __kmpc_atomic_fixed2_div
217     __kmpc_atomic_fixed2_div_cpt
218     __kmpc_atomic_fixed2_div_cpt_rev
219     __kmpc_atomic_fixed2_div_float8
220     __kmpc_atomic_fixed2_div_fp
221     __kmpc_atomic_fixed2_div_rev
222     __kmpc_atomic_fixed2_eqv
223     __kmpc_atomic_fixed2_eqv_cpt
224     __kmpc_atomic_fixed2_max
225     __kmpc_atomic_fixed2_max_cpt
226     __kmpc_atomic_fixed2_min
227     __kmpc_atomic_fixed2_min_cpt
228     __kmpc_atomic_fixed2_mul
229     __kmpc_atomic_fixed2_mul_cpt
230     __kmpc_atomic_fixed2_mul_float8
231     __kmpc_atomic_fixed2_mul_fp
232     __kmpc_atomic_fixed2_neqv
233     __kmpc_atomic_fixed2_neqv_cpt
234     __kmpc_atomic_fixed2_orb
235     __kmpc_atomic_fixed2_orb_cpt
236     __kmpc_atomic_fixed2_orl
237     __kmpc_atomic_fixed2_orl_cpt
238     __kmpc_atomic_fixed2_rd
239     __kmpc_atomic_fixed2_shl
240     __kmpc_atomic_fixed2_shl_cpt
241     __kmpc_atomic_fixed2_shl_cpt_rev
242     __kmpc_atomic_fixed2_shl_rev
243     __kmpc_atomic_fixed2_shr
244     __kmpc_atomic_fixed2_shr_cpt
245     __kmpc_atomic_fixed2_shr_cpt_rev
246     __kmpc_atomic_fixed2_shr_rev
247     __kmpc_atomic_fixed2_sub
248     __kmpc_atomic_fixed2_sub_cpt
249     __kmpc_atomic_fixed2_sub_cpt_rev
250     __kmpc_atomic_fixed2_sub_fp
251     __kmpc_atomic_fixed2_sub_rev
252     __kmpc_atomic_fixed2_swp
253     __kmpc_atomic_fixed2_wr
254     __kmpc_atomic_fixed2_xor
255     __kmpc_atomic_fixed2_xor_cpt
256     __kmpc_atomic_fixed2u_add_fp
257     __kmpc_atomic_fixed2u_sub_fp
258     __kmpc_atomic_fixed2u_mul_fp
259     __kmpc_atomic_fixed2u_div
260     __kmpc_atomic_fixed2u_div_cpt
261     __kmpc_atomic_fixed2u_div_cpt_rev
262     __kmpc_atomic_fixed2u_div_fp
263     __kmpc_atomic_fixed2u_div_rev
264     __kmpc_atomic_fixed2u_shr
265     __kmpc_atomic_fixed2u_shr_cpt
266     __kmpc_atomic_fixed2u_shr_cpt_rev
267     __kmpc_atomic_fixed2u_shr_rev
268     __kmpc_atomic_fixed4_add
269     __kmpc_atomic_fixed4_add_cpt
270     __kmpc_atomic_fixed4_add_fp
271     __kmpc_atomic_fixed4_andb
272     __kmpc_atomic_fixed4_andb_cpt
273     __kmpc_atomic_fixed4_andl
274     __kmpc_atomic_fixed4_andl_cpt
275     __kmpc_atomic_fixed4_div
276     __kmpc_atomic_fixed4_div_cpt
277     __kmpc_atomic_fixed4_div_cpt_rev
278     __kmpc_atomic_fixed4_div_float8
279     __kmpc_atomic_fixed4_div_fp
280     __kmpc_atomic_fixed4_div_rev
281     __kmpc_atomic_fixed4_eqv
282     __kmpc_atomic_fixed4_eqv_cpt
283     __kmpc_atomic_fixed4_max
284     __kmpc_atomic_fixed4_max_cpt
285     __kmpc_atomic_fixed4_min
286     __kmpc_atomic_fixed4_min_cpt
287     __kmpc_atomic_fixed4_mul
288     __kmpc_atomic_fixed4_mul_cpt
289     __kmpc_atomic_fixed4_mul_float8
290     __kmpc_atomic_fixed4_mul_fp
291     __kmpc_atomic_fixed4_neqv
292     __kmpc_atomic_fixed4_neqv_cpt
293     __kmpc_atomic_fixed4_orb
294     __kmpc_atomic_fixed4_orb_cpt
295     __kmpc_atomic_fixed4_orl
296     __kmpc_atomic_fixed4_orl_cpt
297     __kmpc_atomic_fixed4_rd
298     __kmpc_atomic_fixed4_shl
299     __kmpc_atomic_fixed4_shl_cpt
300     __kmpc_atomic_fixed4_shl_cpt_rev
301     __kmpc_atomic_fixed4_shl_rev
302     __kmpc_atomic_fixed4_shr
303     __kmpc_atomic_fixed4_shr_cpt
304     __kmpc_atomic_fixed4_shr_cpt_rev
305     __kmpc_atomic_fixed4_shr_rev
306     __kmpc_atomic_fixed4_sub
307     __kmpc_atomic_fixed4_sub_cpt
308     __kmpc_atomic_fixed4_sub_cpt_rev
309     __kmpc_atomic_fixed4_sub_fp
310     __kmpc_atomic_fixed4_sub_rev
311     __kmpc_atomic_fixed4_swp
312     __kmpc_atomic_fixed4_wr
313     __kmpc_atomic_fixed4_xor
314     __kmpc_atomic_fixed4_xor_cpt
315     __kmpc_atomic_fixed4u_add_fp
316     __kmpc_atomic_fixed4u_sub_fp
317     __kmpc_atomic_fixed4u_mul_fp
318     __kmpc_atomic_fixed4u_div
319     __kmpc_atomic_fixed4u_div_cpt
320     __kmpc_atomic_fixed4u_div_cpt_rev
321     __kmpc_atomic_fixed4u_div_fp
322     __kmpc_atomic_fixed4u_div_rev
323     __kmpc_atomic_fixed4u_shr
324     __kmpc_atomic_fixed4u_shr_cpt
325     __kmpc_atomic_fixed4u_shr_cpt_rev
326     __kmpc_atomic_fixed4u_shr_rev
327     __kmpc_atomic_fixed8_add
328     __kmpc_atomic_fixed8_add_cpt
329     __kmpc_atomic_fixed8_add_fp
330     __kmpc_atomic_fixed8_andb
331     __kmpc_atomic_fixed8_andb_cpt
332     __kmpc_atomic_fixed8_andl
333     __kmpc_atomic_fixed8_andl_cpt
334     __kmpc_atomic_fixed8_div
335     __kmpc_atomic_fixed8_div_cpt
336     __kmpc_atomic_fixed8_div_cpt_rev
337     __kmpc_atomic_fixed8_div_float8
338     __kmpc_atomic_fixed8_div_fp
339     __kmpc_atomic_fixed8_div_rev
340     __kmpc_atomic_fixed8_eqv
341     __kmpc_atomic_fixed8_eqv_cpt
342     __kmpc_atomic_fixed8_max
343     __kmpc_atomic_fixed8_max_cpt
344     __kmpc_atomic_fixed8_min
345     __kmpc_atomic_fixed8_min_cpt
346     __kmpc_atomic_fixed8_mul
347     __kmpc_atomic_fixed8_mul_cpt
348     __kmpc_atomic_fixed8_mul_float8
349     __kmpc_atomic_fixed8_mul_fp
350     __kmpc_atomic_fixed8_neqv
351     __kmpc_atomic_fixed8_neqv_cpt
352     __kmpc_atomic_fixed8_orb
353     __kmpc_atomic_fixed8_orb_cpt
354     __kmpc_atomic_fixed8_orl
355     __kmpc_atomic_fixed8_orl_cpt
356     __kmpc_atomic_fixed8_rd
357     __kmpc_atomic_fixed8_shl
358     __kmpc_atomic_fixed8_shl_cpt
359     __kmpc_atomic_fixed8_shl_cpt_rev
360     __kmpc_atomic_fixed8_shl_rev
361     __kmpc_atomic_fixed8_shr
362     __kmpc_atomic_fixed8_shr_cpt
363     __kmpc_atomic_fixed8_shr_cpt_rev
364     __kmpc_atomic_fixed8_shr_rev
365     __kmpc_atomic_fixed8_sub
366     __kmpc_atomic_fixed8_sub_cpt
367     __kmpc_atomic_fixed8_sub_cpt_rev
368     __kmpc_atomic_fixed8_sub_fp
369     __kmpc_atomic_fixed8_sub_rev
370     __kmpc_atomic_fixed8_swp
371     __kmpc_atomic_fixed8_wr
372     __kmpc_atomic_fixed8_xor
373     __kmpc_atomic_fixed8_xor_cpt
374     __kmpc_atomic_fixed8u_add_fp
375     __kmpc_atomic_fixed8u_sub_fp
376     __kmpc_atomic_fixed8u_mul_fp
377     __kmpc_atomic_fixed8u_div
378     __kmpc_atomic_fixed8u_div_cpt
379     __kmpc_atomic_fixed8u_div_cpt_rev
380     __kmpc_atomic_fixed8u_div_fp
381     __kmpc_atomic_fixed8u_div_rev
382     __kmpc_atomic_fixed8u_shr
383     __kmpc_atomic_fixed8u_shr_cpt
384     __kmpc_atomic_fixed8u_shr_cpt_rev
385     __kmpc_atomic_fixed8u_shr_rev
386 @endcode
387 
388 Functions for floating point
389 ----------------------------
390 There are versions here for floating point numbers of size 4, 8, 10 and 16
391 bytes. (Ten byte floats are used by X87, but are now rare).
392 @code
393     __kmpc_atomic_float4_add
394     __kmpc_atomic_float4_add_cpt
395     __kmpc_atomic_float4_add_float8
396     __kmpc_atomic_float4_add_fp
397     __kmpc_atomic_float4_div
398     __kmpc_atomic_float4_div_cpt
399     __kmpc_atomic_float4_div_cpt_rev
400     __kmpc_atomic_float4_div_float8
401     __kmpc_atomic_float4_div_fp
402     __kmpc_atomic_float4_div_rev
403     __kmpc_atomic_float4_max
404     __kmpc_atomic_float4_max_cpt
405     __kmpc_atomic_float4_min
406     __kmpc_atomic_float4_min_cpt
407     __kmpc_atomic_float4_mul
408     __kmpc_atomic_float4_mul_cpt
409     __kmpc_atomic_float4_mul_float8
410     __kmpc_atomic_float4_mul_fp
411     __kmpc_atomic_float4_rd
412     __kmpc_atomic_float4_sub
413     __kmpc_atomic_float4_sub_cpt
414     __kmpc_atomic_float4_sub_cpt_rev
415     __kmpc_atomic_float4_sub_float8
416     __kmpc_atomic_float4_sub_fp
417     __kmpc_atomic_float4_sub_rev
418     __kmpc_atomic_float4_swp
419     __kmpc_atomic_float4_wr
420     __kmpc_atomic_float8_add
421     __kmpc_atomic_float8_add_cpt
422     __kmpc_atomic_float8_add_fp
423     __kmpc_atomic_float8_div
424     __kmpc_atomic_float8_div_cpt
425     __kmpc_atomic_float8_div_cpt_rev
426     __kmpc_atomic_float8_div_fp
427     __kmpc_atomic_float8_div_rev
428     __kmpc_atomic_float8_max
429     __kmpc_atomic_float8_max_cpt
430     __kmpc_atomic_float8_min
431     __kmpc_atomic_float8_min_cpt
432     __kmpc_atomic_float8_mul
433     __kmpc_atomic_float8_mul_cpt
434     __kmpc_atomic_float8_mul_fp
435     __kmpc_atomic_float8_rd
436     __kmpc_atomic_float8_sub
437     __kmpc_atomic_float8_sub_cpt
438     __kmpc_atomic_float8_sub_cpt_rev
439     __kmpc_atomic_float8_sub_fp
440     __kmpc_atomic_float8_sub_rev
441     __kmpc_atomic_float8_swp
442     __kmpc_atomic_float8_wr
443     __kmpc_atomic_float10_add
444     __kmpc_atomic_float10_add_cpt
445     __kmpc_atomic_float10_add_fp
446     __kmpc_atomic_float10_div
447     __kmpc_atomic_float10_div_cpt
448     __kmpc_atomic_float10_div_cpt_rev
449     __kmpc_atomic_float10_div_fp
450     __kmpc_atomic_float10_div_rev
451     __kmpc_atomic_float10_mul
452     __kmpc_atomic_float10_mul_cpt
453     __kmpc_atomic_float10_mul_fp
454     __kmpc_atomic_float10_rd
455     __kmpc_atomic_float10_sub
456     __kmpc_atomic_float10_sub_cpt
457     __kmpc_atomic_float10_sub_cpt_rev
458     __kmpc_atomic_float10_sub_fp
459     __kmpc_atomic_float10_sub_rev
460     __kmpc_atomic_float10_swp
461     __kmpc_atomic_float10_wr
462     __kmpc_atomic_float16_add
463     __kmpc_atomic_float16_add_cpt
464     __kmpc_atomic_float16_div
465     __kmpc_atomic_float16_div_cpt
466     __kmpc_atomic_float16_div_cpt_rev
467     __kmpc_atomic_float16_div_rev
468     __kmpc_atomic_float16_max
469     __kmpc_atomic_float16_max_cpt
470     __kmpc_atomic_float16_min
471     __kmpc_atomic_float16_min_cpt
472     __kmpc_atomic_float16_mul
473     __kmpc_atomic_float16_mul_cpt
474     __kmpc_atomic_float16_rd
475     __kmpc_atomic_float16_sub
476     __kmpc_atomic_float16_sub_cpt
477     __kmpc_atomic_float16_sub_cpt_rev
478     __kmpc_atomic_float16_sub_rev
479     __kmpc_atomic_float16_swp
480     __kmpc_atomic_float16_wr
481 @endcode
482 
483 Functions for Complex types
484 ---------------------------
485 Functions for complex types whose component floating point variables are of size
486 4,8,10 or 16 bytes. The names here are based on the size of the component float,
487 *not* the size of the complex type. So `__kmpc_atomc_cmplx8_add` is an operation
488 on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
489 
490 @code
491     __kmpc_atomic_cmplx4_add
492     __kmpc_atomic_cmplx4_add_cmplx8
493     __kmpc_atomic_cmplx4_add_cpt
494     __kmpc_atomic_cmplx4_div
495     __kmpc_atomic_cmplx4_div_cmplx8
496     __kmpc_atomic_cmplx4_div_cpt
497     __kmpc_atomic_cmplx4_div_cpt_rev
498     __kmpc_atomic_cmplx4_div_rev
499     __kmpc_atomic_cmplx4_mul
500     __kmpc_atomic_cmplx4_mul_cmplx8
501     __kmpc_atomic_cmplx4_mul_cpt
502     __kmpc_atomic_cmplx4_rd
503     __kmpc_atomic_cmplx4_sub
504     __kmpc_atomic_cmplx4_sub_cmplx8
505     __kmpc_atomic_cmplx4_sub_cpt
506     __kmpc_atomic_cmplx4_sub_cpt_rev
507     __kmpc_atomic_cmplx4_sub_rev
508     __kmpc_atomic_cmplx4_swp
509     __kmpc_atomic_cmplx4_wr
510     __kmpc_atomic_cmplx8_add
511     __kmpc_atomic_cmplx8_add_cpt
512     __kmpc_atomic_cmplx8_div
513     __kmpc_atomic_cmplx8_div_cpt
514     __kmpc_atomic_cmplx8_div_cpt_rev
515     __kmpc_atomic_cmplx8_div_rev
516     __kmpc_atomic_cmplx8_mul
517     __kmpc_atomic_cmplx8_mul_cpt
518     __kmpc_atomic_cmplx8_rd
519     __kmpc_atomic_cmplx8_sub
520     __kmpc_atomic_cmplx8_sub_cpt
521     __kmpc_atomic_cmplx8_sub_cpt_rev
522     __kmpc_atomic_cmplx8_sub_rev
523     __kmpc_atomic_cmplx8_swp
524     __kmpc_atomic_cmplx8_wr
525     __kmpc_atomic_cmplx10_add
526     __kmpc_atomic_cmplx10_add_cpt
527     __kmpc_atomic_cmplx10_div
528     __kmpc_atomic_cmplx10_div_cpt
529     __kmpc_atomic_cmplx10_div_cpt_rev
530     __kmpc_atomic_cmplx10_div_rev
531     __kmpc_atomic_cmplx10_mul
532     __kmpc_atomic_cmplx10_mul_cpt
533     __kmpc_atomic_cmplx10_rd
534     __kmpc_atomic_cmplx10_sub
535     __kmpc_atomic_cmplx10_sub_cpt
536     __kmpc_atomic_cmplx10_sub_cpt_rev
537     __kmpc_atomic_cmplx10_sub_rev
538     __kmpc_atomic_cmplx10_swp
539     __kmpc_atomic_cmplx10_wr
540     __kmpc_atomic_cmplx16_add
541     __kmpc_atomic_cmplx16_add_cpt
542     __kmpc_atomic_cmplx16_div
543     __kmpc_atomic_cmplx16_div_cpt
544     __kmpc_atomic_cmplx16_div_cpt_rev
545     __kmpc_atomic_cmplx16_div_rev
546     __kmpc_atomic_cmplx16_mul
547     __kmpc_atomic_cmplx16_mul_cpt
548     __kmpc_atomic_cmplx16_rd
549     __kmpc_atomic_cmplx16_sub
550     __kmpc_atomic_cmplx16_sub_cpt
551     __kmpc_atomic_cmplx16_sub_cpt_rev
552     __kmpc_atomic_cmplx16_swp
553     __kmpc_atomic_cmplx16_wr
554 @endcode
555 */
556 
557 /*!
558 @ingroup ATOMIC_OPS
559 @{
560 */
561 
562 /*
563  * Global vars
564  */
565 
566 #ifndef KMP_GOMP_COMPAT
567 int __kmp_atomic_mode = 1; // Intel perf
568 #else
569 int __kmp_atomic_mode = 2; // GOMP compatibility
570 #endif /* KMP_GOMP_COMPAT */
571 
572 KMP_ALIGN(128)
573 
574 // Control access to all user coded atomics in Gnu compat mode
575 kmp_atomic_lock_t __kmp_atomic_lock;
576 // Control access to all user coded atomics for 1-byte fixed data types
577 kmp_atomic_lock_t __kmp_atomic_lock_1i;
578 // Control access to all user coded atomics for 2-byte fixed data types
579 kmp_atomic_lock_t __kmp_atomic_lock_2i;
580 // Control access to all user coded atomics for 4-byte fixed data types
581 kmp_atomic_lock_t __kmp_atomic_lock_4i;
582 // Control access to all user coded atomics for kmp_real32 data type
583 kmp_atomic_lock_t __kmp_atomic_lock_4r;
584 // Control access to all user coded atomics for 8-byte fixed data types
585 kmp_atomic_lock_t __kmp_atomic_lock_8i;
586 // Control access to all user coded atomics for kmp_real64 data type
587 kmp_atomic_lock_t __kmp_atomic_lock_8r;
588 // Control access to all user coded atomics for complex byte data type
589 kmp_atomic_lock_t __kmp_atomic_lock_8c;
590 // Control access to all user coded atomics for long double data type
591 kmp_atomic_lock_t __kmp_atomic_lock_10r;
592 // Control access to all user coded atomics for _Quad data type
593 kmp_atomic_lock_t __kmp_atomic_lock_16r;
594 // Control access to all user coded atomics for double complex data type
595 kmp_atomic_lock_t __kmp_atomic_lock_16c;
596 // Control access to all user coded atomics for long double complex type
597 kmp_atomic_lock_t __kmp_atomic_lock_20c;
598 // Control access to all user coded atomics for _Quad complex data type
599 kmp_atomic_lock_t __kmp_atomic_lock_32c;
600 
601 /* 2007-03-02:
602    Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
603    on *_32 and *_32e. This is just a temporary workaround for the problem. It
604    seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
605    in assembler language. */
606 #define KMP_ATOMIC_VOLATILE volatile
607 
608 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
609 
operator +=(Quad_a4_t & lhs,Quad_a4_t & rhs)610 static inline void operator+=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
611   lhs.q += rhs.q;
612 }
operator -=(Quad_a4_t & lhs,Quad_a4_t & rhs)613 static inline void operator-=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
614   lhs.q -= rhs.q;
615 }
operator *=(Quad_a4_t & lhs,Quad_a4_t & rhs)616 static inline void operator*=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
617   lhs.q *= rhs.q;
618 }
operator /=(Quad_a4_t & lhs,Quad_a4_t & rhs)619 static inline void operator/=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
620   lhs.q /= rhs.q;
621 }
operator <(Quad_a4_t & lhs,Quad_a4_t & rhs)622 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
623   return lhs.q < rhs.q;
624 }
operator >(Quad_a4_t & lhs,Quad_a4_t & rhs)625 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
626   return lhs.q > rhs.q;
627 }
628 
operator +=(Quad_a16_t & lhs,Quad_a16_t & rhs)629 static inline void operator+=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
630   lhs.q += rhs.q;
631 }
operator -=(Quad_a16_t & lhs,Quad_a16_t & rhs)632 static inline void operator-=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
633   lhs.q -= rhs.q;
634 }
operator *=(Quad_a16_t & lhs,Quad_a16_t & rhs)635 static inline void operator*=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
636   lhs.q *= rhs.q;
637 }
operator /=(Quad_a16_t & lhs,Quad_a16_t & rhs)638 static inline void operator/=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
639   lhs.q /= rhs.q;
640 }
operator <(Quad_a16_t & lhs,Quad_a16_t & rhs)641 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
642   return lhs.q < rhs.q;
643 }
operator >(Quad_a16_t & lhs,Quad_a16_t & rhs)644 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
645   return lhs.q > rhs.q;
646 }
647 
operator +=(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)648 static inline void operator+=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
649   lhs.q += rhs.q;
650 }
operator -=(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)651 static inline void operator-=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
652   lhs.q -= rhs.q;
653 }
operator *=(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)654 static inline void operator*=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
655   lhs.q *= rhs.q;
656 }
operator /=(kmp_cmplx128_a4_t & lhs,kmp_cmplx128_a4_t & rhs)657 static inline void operator/=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
658   lhs.q /= rhs.q;
659 }
660 
operator +=(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)661 static inline void operator+=(kmp_cmplx128_a16_t &lhs,
662                               kmp_cmplx128_a16_t &rhs) {
663   lhs.q += rhs.q;
664 }
operator -=(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)665 static inline void operator-=(kmp_cmplx128_a16_t &lhs,
666                               kmp_cmplx128_a16_t &rhs) {
667   lhs.q -= rhs.q;
668 }
operator *=(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)669 static inline void operator*=(kmp_cmplx128_a16_t &lhs,
670                               kmp_cmplx128_a16_t &rhs) {
671   lhs.q *= rhs.q;
672 }
operator /=(kmp_cmplx128_a16_t & lhs,kmp_cmplx128_a16_t & rhs)673 static inline void operator/=(kmp_cmplx128_a16_t &lhs,
674                               kmp_cmplx128_a16_t &rhs) {
675   lhs.q /= rhs.q;
676 }
677 
678 #endif
679 
680 // ATOMIC implementation routines -----------------------------------------
681 // One routine for each operation and operand type.
682 // All routines declarations looks like
683 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
684 
685 #define KMP_CHECK_GTID                                                         \
686   if (gtid == KMP_GTID_UNKNOWN) {                                              \
687     gtid = __kmp_entry_gtid();                                                 \
688   } // check and get gtid when needed
689 
690 // Beginning of a definition (provides name, parameters, gebug trace)
691 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
692 //     fixed)
693 //     OP_ID   - operation identifier (add, sub, mul, ...)
694 //     TYPE    - operands' type
695 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE)                           \
696   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
697                                              TYPE *lhs, TYPE rhs) {            \
698     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
699     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
700 
701 // ------------------------------------------------------------------------
702 // Lock variables used for critical sections for various size operands
703 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
704 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
705 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
706 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
707 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
708 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
709 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
710 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
711 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
712 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
713 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
714 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
715 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
716 
717 // ------------------------------------------------------------------------
718 // Operation on *lhs, rhs bound by critical section
719 //     OP     - operator (it's supposed to contain an assignment)
720 //     LCK_ID - lock identifier
721 // Note: don't check gtid as it should always be valid
722 // 1, 2-byte - expect valid parameter, other - check before this macro
723 #define OP_CRITICAL(OP, LCK_ID)                                                \
724   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
725                                                                                \
726   (*lhs) OP(rhs);                                                              \
727                                                                                \
728   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
729 
730 // ------------------------------------------------------------------------
731 // For GNU compatibility, we may need to use a critical section,
732 // even though it is not required by the ISA.
733 //
734 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
735 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
736 // critical section.  On Intel(R) 64, all atomic operations are done with fetch
737 // and add or compare and exchange.  Therefore, the FLAG parameter to this
738 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
739 // require a critical section, where we predict that they will be implemented
740 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
741 //
742 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
743 // the FLAG parameter should always be 1.  If we know that we will be using
744 // a critical section, then we want to make certain that we use the generic
745 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
746 // locks that are specialized based upon the size or type of the data.
747 //
748 // If FLAG is 0, then we are relying on dead code elimination by the build
749 // compiler to get rid of the useless block of code, and save a needless
750 // branch at runtime.
751 
752 #ifdef KMP_GOMP_COMPAT
753 #define OP_GOMP_CRITICAL(OP, FLAG)                                             \
754   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
755     KMP_CHECK_GTID;                                                            \
756     OP_CRITICAL(OP, 0);                                                        \
757     return;                                                                    \
758   }
759 #else
760 #define OP_GOMP_CRITICAL(OP, FLAG)
761 #endif /* KMP_GOMP_COMPAT */
762 
763 #if KMP_MIC
764 #define KMP_DO_PAUSE _mm_delay_32(1)
765 #else
766 #define KMP_DO_PAUSE KMP_CPU_PAUSE()
767 #endif /* KMP_MIC */
768 
769 // ------------------------------------------------------------------------
770 // Operation on *lhs, rhs using "compare_and_store" routine
771 //     TYPE    - operands' type
772 //     BITS    - size in bits, used to distinguish low level calls
773 //     OP      - operator
774 #define OP_CMPXCHG(TYPE, BITS, OP)                                             \
775   {                                                                            \
776     TYPE old_value, new_value;                                                 \
777     old_value = *(TYPE volatile *)lhs;                                         \
778     new_value = old_value OP rhs;                                              \
779     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
780         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
781         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
782       KMP_DO_PAUSE;                                                            \
783                                                                                \
784       old_value = *(TYPE volatile *)lhs;                                       \
785       new_value = old_value OP rhs;                                            \
786     }                                                                          \
787   }
788 
789 #if USE_CMPXCHG_FIX
790 // 2007-06-25:
791 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
792 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
793 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
794 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
795 // the workaround.
796 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                  \
797   {                                                                            \
798     struct _sss {                                                              \
799       TYPE cmp;                                                                \
800       kmp_int##BITS *vvv;                                                      \
801     };                                                                         \
802     struct _sss old_value, new_value;                                          \
803     old_value.vvv = (kmp_int##BITS *)&old_value.cmp;                           \
804     new_value.vvv = (kmp_int##BITS *)&new_value.cmp;                           \
805     *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                           \
806     new_value.cmp = old_value.cmp OP rhs;                                      \
807     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
808         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv,   \
809         *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) {                      \
810       KMP_DO_PAUSE;                                                            \
811                                                                                \
812       *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                         \
813       new_value.cmp = old_value.cmp OP rhs;                                    \
814     }                                                                          \
815   }
816 // end of the first part of the workaround for C78287
817 #endif // USE_CMPXCHG_FIX
818 
819 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
820 
821 // ------------------------------------------------------------------------
822 // X86 or X86_64: no alignment problems ====================================
823 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
824                          GOMP_FLAG)                                            \
825   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
826   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
827   /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
828   KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                        \
829   }
830 // -------------------------------------------------------------------------
831 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
832                        GOMP_FLAG)                                              \
833   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
834   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
835   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
836   }
837 #if USE_CMPXCHG_FIX
838 // -------------------------------------------------------------------------
839 // workaround for C78287 (complex(kind=4) data type)
840 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
841                                   MASK, GOMP_FLAG)                             \
842   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
843   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
844   OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
845   }
846 // end of the second part of the workaround for C78287
847 #endif
848 
849 #else
850 // -------------------------------------------------------------------------
851 // Code for other architectures that don't handle unaligned accesses.
852 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
853                          GOMP_FLAG)                                            \
854   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
855   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
856   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
857     /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */          \
858     KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                      \
859   } else {                                                                     \
860     KMP_CHECK_GTID;                                                            \
861     OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
862   }                                                                            \
863   }
864 // -------------------------------------------------------------------------
865 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
866                        GOMP_FLAG)                                              \
867   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
868   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
869   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
870     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
871   } else {                                                                     \
872     KMP_CHECK_GTID;                                                            \
873     OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
874   }                                                                            \
875   }
876 #if USE_CMPXCHG_FIX
877 // -------------------------------------------------------------------------
878 // workaround for C78287 (complex(kind=4) data type)
879 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
880                                   MASK, GOMP_FLAG)                             \
881   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
882   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
883   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
884     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
885   } else {                                                                     \
886     KMP_CHECK_GTID;                                                            \
887     OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
888   }                                                                            \
889   }
890 // end of the second part of the workaround for C78287
891 #endif // USE_CMPXCHG_FIX
892 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
893 
894 // Routines for ATOMIC 4-byte operands addition and subtraction
895 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
896                  0) // __kmpc_atomic_fixed4_add
897 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
898                  0) // __kmpc_atomic_fixed4_sub
899 
900 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
901                KMP_ARCH_X86) // __kmpc_atomic_float4_add
902 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
903                KMP_ARCH_X86) // __kmpc_atomic_float4_sub
904 
905 // Routines for ATOMIC 8-byte operands addition and subtraction
906 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
907                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
908 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
909                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
910 
911 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
912                KMP_ARCH_X86) // __kmpc_atomic_float8_add
913 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
914                KMP_ARCH_X86) // __kmpc_atomic_float8_sub
915 
916 // ------------------------------------------------------------------------
917 // Entries definition for integer operands
918 //     TYPE_ID - operands type and size (fixed4, float4)
919 //     OP_ID   - operation identifier (add, sub, mul, ...)
920 //     TYPE    - operand type
921 //     BITS    - size in bits, used to distinguish low level calls
922 //     OP      - operator (used in critical section)
923 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
924 //     MASK    - used for alignment check
925 
926 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,MASK,GOMP_FLAG
927 // ------------------------------------------------------------------------
928 // Routines for ATOMIC integer operands, other operators
929 // ------------------------------------------------------------------------
930 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
931 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
932                KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
933 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
934                0) // __kmpc_atomic_fixed1_andb
935 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
936                KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
937 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
938                KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
939 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
940                KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
941 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
942                0) // __kmpc_atomic_fixed1_orb
943 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
944                KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
945 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
946                KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
947 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
948                KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
949 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
950                KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
951 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
952                0) // __kmpc_atomic_fixed1_xor
953 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
954                KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
955 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
956                0) // __kmpc_atomic_fixed2_andb
957 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
958                KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
959 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
960                KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
961 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
962                KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
963 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
964                0) // __kmpc_atomic_fixed2_orb
965 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
966                KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
967 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
968                KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
969 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
970                KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
971 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
972                KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
973 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
974                0) // __kmpc_atomic_fixed2_xor
975 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
976                0) // __kmpc_atomic_fixed4_andb
977 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
978                KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
979 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
980                KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
981 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
982                KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
983 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
984                0) // __kmpc_atomic_fixed4_orb
985 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
986                KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
987 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
988                KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
989 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
990                KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
991 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
992                0) // __kmpc_atomic_fixed4_xor
993 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
994                KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
995 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
996                KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
997 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
998                KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
999 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1000                KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1001 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1002                KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1003 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1004                KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1005 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1006                KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1007 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1008                KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1009 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1010                KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1011 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1012                KMP_ARCH_X86) // __kmpc_atomic_float4_div
1013 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1014                KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1015 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1016                KMP_ARCH_X86) // __kmpc_atomic_float8_div
1017 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1018                KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1019 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
1020 
1021 /* ------------------------------------------------------------------------ */
1022 /* Routines for C/C++ Reduction operators && and ||                         */
1023 
1024 // ------------------------------------------------------------------------
1025 // Need separate macros for &&, || because there is no combined assignment
1026 //   TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1027 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)             \
1028   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1029   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1030   OP_CRITICAL(= *lhs OP, LCK_ID)                                               \
1031   }
1032 
1033 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1034 
1035 // ------------------------------------------------------------------------
1036 // X86 or X86_64: no alignment problems ===================================
1037 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1038   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1039   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1040   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1041   }
1042 
1043 #else
1044 // ------------------------------------------------------------------------
1045 // Code for other architectures that don't handle unaligned accesses.
1046 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1047   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1048   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1049   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1050     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1051   } else {                                                                     \
1052     KMP_CHECK_GTID;                                                            \
1053     OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */              \
1054   }                                                                            \
1055   }
1056 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1057 
1058 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1059               KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1060 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1061               KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1062 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1063               KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1064 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1065               KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1066 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1067               0) // __kmpc_atomic_fixed4_andl
1068 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1069               0) // __kmpc_atomic_fixed4_orl
1070 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1071               KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1072 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1073               KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1074 
1075 /* ------------------------------------------------------------------------- */
1076 /* Routines for Fortran operators that matched no one in C:                  */
1077 /* MAX, MIN, .EQV., .NEQV.                                                   */
1078 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}           */
1079 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}  */
1080 
1081 // -------------------------------------------------------------------------
1082 // MIN and MAX need separate macros
1083 // OP - operator to check if we need any actions?
1084 #define MIN_MAX_CRITSECT(OP, LCK_ID)                                           \
1085   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1086                                                                                \
1087   if (*lhs OP rhs) { /* still need actions? */                                 \
1088     *lhs = rhs;                                                                \
1089   }                                                                            \
1090   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1091 
1092 // -------------------------------------------------------------------------
1093 #ifdef KMP_GOMP_COMPAT
1094 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)                                        \
1095   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1096     KMP_CHECK_GTID;                                                            \
1097     MIN_MAX_CRITSECT(OP, 0);                                                   \
1098     return;                                                                    \
1099   }
1100 #else
1101 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1102 #endif /* KMP_GOMP_COMPAT */
1103 
1104 // -------------------------------------------------------------------------
1105 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                        \
1106   {                                                                            \
1107     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1108     TYPE old_value;                                                            \
1109     temp_val = *lhs;                                                           \
1110     old_value = temp_val;                                                      \
1111     while (old_value OP rhs && /* still need actions? */                       \
1112            !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1113                (kmp_int##BITS *)lhs,                                           \
1114                *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
1115                *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
1116       KMP_CPU_PAUSE();                                                         \
1117       temp_val = *lhs;                                                         \
1118       old_value = temp_val;                                                    \
1119     }                                                                          \
1120   }
1121 
1122 // -------------------------------------------------------------------------
1123 // 1-byte, 2-byte operands - use critical section
1124 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)          \
1125   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1126   if (*lhs OP rhs) { /* need actions? */                                       \
1127     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1128     MIN_MAX_CRITSECT(OP, LCK_ID)                                               \
1129   }                                                                            \
1130   }
1131 
1132 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1133 
1134 // -------------------------------------------------------------------------
1135 // X86 or X86_64: no alignment problems ====================================
1136 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1137                          GOMP_FLAG)                                            \
1138   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1139   if (*lhs OP rhs) {                                                           \
1140     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1141     MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                            \
1142   }                                                                            \
1143   }
1144 
1145 #else
1146 // -------------------------------------------------------------------------
1147 // Code for other architectures that don't handle unaligned accesses.
1148 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1149                          GOMP_FLAG)                                            \
1150   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1151   if (*lhs OP rhs) {                                                           \
1152     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1153     if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                    \
1154       MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */                    \
1155     } else {                                                                   \
1156       KMP_CHECK_GTID;                                                          \
1157       MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */                     \
1158     }                                                                          \
1159   }                                                                            \
1160   }
1161 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1162 
1163 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1164                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1165 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1166                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1167 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1168                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1169 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1170                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1171 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1172                  0) // __kmpc_atomic_fixed4_max
1173 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1174                  0) // __kmpc_atomic_fixed4_min
1175 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1176                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1177 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1178                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1179 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1180                  KMP_ARCH_X86) // __kmpc_atomic_float4_max
1181 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1182                  KMP_ARCH_X86) // __kmpc_atomic_float4_min
1183 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1184                  KMP_ARCH_X86) // __kmpc_atomic_float8_max
1185 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1186                  KMP_ARCH_X86) // __kmpc_atomic_float8_min
1187 #if KMP_HAVE_QUAD
1188 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1189                  1) // __kmpc_atomic_float16_max
1190 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1191                  1) // __kmpc_atomic_float16_min
1192 #if (KMP_ARCH_X86)
1193 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1194                  1) // __kmpc_atomic_float16_max_a16
1195 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1196                  1) // __kmpc_atomic_float16_min_a16
1197 #endif
1198 #endif
1199 // ------------------------------------------------------------------------
1200 // Need separate macros for .EQV. because of the need of complement (~)
1201 // OP ignored for critical sections, ^=~ used instead
1202 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1203   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1204   OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */                      \
1205   OP_CRITICAL(^= ~, LCK_ID) /* send assignment and complement */               \
1206   }
1207 
1208 // ------------------------------------------------------------------------
1209 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1210 // ------------------------------------------------------------------------
1211 // X86 or X86_64: no alignment problems ===================================
1212 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1213                         GOMP_FLAG)                                             \
1214   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1215   OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */                      \
1216   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1217   }
1218 // ------------------------------------------------------------------------
1219 #else
1220 // ------------------------------------------------------------------------
1221 // Code for other architectures that don't handle unaligned accesses.
1222 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1223                         GOMP_FLAG)                                             \
1224   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1225   OP_GOMP_CRITICAL(^= ~, GOMP_FLAG)                                            \
1226   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1227     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1228   } else {                                                                     \
1229     KMP_CHECK_GTID;                                                            \
1230     OP_CRITICAL(^= ~, LCK_ID) /* unaligned address - use critical */           \
1231   }                                                                            \
1232   }
1233 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1234 
1235 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1236                KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1237 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1238                KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1239 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1240                KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1241 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1242                KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1243 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1244                 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1245 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1246                 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1247 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1248                 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1249 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1250                 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1251 
1252 // ------------------------------------------------------------------------
1253 // Routines for Extended types: long double, _Quad, complex flavours (use
1254 // critical section)
1255 //     TYPE_ID, OP_ID, TYPE - detailed above
1256 //     OP      - operator
1257 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1258 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1259   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1260   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */                    \
1261   OP_CRITICAL(OP## =, LCK_ID) /* send assignment */                            \
1262   }
1263 
1264 /* ------------------------------------------------------------------------- */
1265 // routines for long double type
1266 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1267                 1) // __kmpc_atomic_float10_add
1268 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1269                 1) // __kmpc_atomic_float10_sub
1270 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1271                 1) // __kmpc_atomic_float10_mul
1272 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1273                 1) // __kmpc_atomic_float10_div
1274 #if KMP_HAVE_QUAD
1275 // routines for _Quad type
1276 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1277                 1) // __kmpc_atomic_float16_add
1278 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1279                 1) // __kmpc_atomic_float16_sub
1280 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1281                 1) // __kmpc_atomic_float16_mul
1282 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1283                 1) // __kmpc_atomic_float16_div
1284 #if (KMP_ARCH_X86)
1285 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1286                 1) // __kmpc_atomic_float16_add_a16
1287 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1288                 1) // __kmpc_atomic_float16_sub_a16
1289 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1290                 1) // __kmpc_atomic_float16_mul_a16
1291 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1292                 1) // __kmpc_atomic_float16_div_a16
1293 #endif
1294 #endif
1295 // routines for complex types
1296 
1297 #if USE_CMPXCHG_FIX
1298 // workaround for C78287 (complex(kind=4) data type)
1299 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1300                           1) // __kmpc_atomic_cmplx4_add
1301 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1302                           1) // __kmpc_atomic_cmplx4_sub
1303 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1304                           1) // __kmpc_atomic_cmplx4_mul
1305 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1306                           1) // __kmpc_atomic_cmplx4_div
1307 // end of the workaround for C78287
1308 #else
1309 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1310 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1311 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1312 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1313 #endif // USE_CMPXCHG_FIX
1314 
1315 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1316 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1317 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1318 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1319 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1320                 1) // __kmpc_atomic_cmplx10_add
1321 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1322                 1) // __kmpc_atomic_cmplx10_sub
1323 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1324                 1) // __kmpc_atomic_cmplx10_mul
1325 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1326                 1) // __kmpc_atomic_cmplx10_div
1327 #if KMP_HAVE_QUAD
1328 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1329                 1) // __kmpc_atomic_cmplx16_add
1330 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1331                 1) // __kmpc_atomic_cmplx16_sub
1332 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1333                 1) // __kmpc_atomic_cmplx16_mul
1334 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1335                 1) // __kmpc_atomic_cmplx16_div
1336 #if (KMP_ARCH_X86)
1337 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1338                 1) // __kmpc_atomic_cmplx16_add_a16
1339 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1340                 1) // __kmpc_atomic_cmplx16_sub_a16
1341 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1342                 1) // __kmpc_atomic_cmplx16_mul_a16
1343 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1344                 1) // __kmpc_atomic_cmplx16_div_a16
1345 #endif
1346 #endif
1347 
1348 #if OMP_40_ENABLED
1349 
1350 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1351 // Supported only on IA-32 architecture and Intel(R) 64
1352 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1353 
1354 // ------------------------------------------------------------------------
1355 // Operation on *lhs, rhs bound by critical section
1356 //     OP     - operator (it's supposed to contain an assignment)
1357 //     LCK_ID - lock identifier
1358 // Note: don't check gtid as it should always be valid
1359 // 1, 2-byte - expect valid parameter, other - check before this macro
1360 #define OP_CRITICAL_REV(OP, LCK_ID)                                            \
1361   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1362                                                                                \
1363   (*lhs) = (rhs)OP(*lhs);                                                      \
1364                                                                                \
1365   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1366 
1367 #ifdef KMP_GOMP_COMPAT
1368 #define OP_GOMP_CRITICAL_REV(OP, FLAG)                                         \
1369   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1370     KMP_CHECK_GTID;                                                            \
1371     OP_CRITICAL_REV(OP, 0);                                                    \
1372     return;                                                                    \
1373   }
1374 #else
1375 #define OP_GOMP_CRITICAL_REV(OP, FLAG)
1376 #endif /* KMP_GOMP_COMPAT */
1377 
1378 // Beginning of a definition (provides name, parameters, gebug trace)
1379 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1380 //     fixed)
1381 //     OP_ID   - operation identifier (add, sub, mul, ...)
1382 //     TYPE    - operands' type
1383 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
1384   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid,  \
1385                                                    TYPE *lhs, TYPE rhs) {      \
1386     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1387     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1388 
1389 // ------------------------------------------------------------------------
1390 // Operation on *lhs, rhs using "compare_and_store" routine
1391 //     TYPE    - operands' type
1392 //     BITS    - size in bits, used to distinguish low level calls
1393 //     OP      - operator
1394 // Note: temp_val introduced in order to force the compiler to read
1395 //       *lhs only once (w/o it the compiler reads *lhs twice)
1396 #define OP_CMPXCHG_REV(TYPE, BITS, OP)                                         \
1397   {                                                                            \
1398     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1399     TYPE old_value, new_value;                                                 \
1400     temp_val = *lhs;                                                           \
1401     old_value = temp_val;                                                      \
1402     new_value = rhs OP old_value;                                              \
1403     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1404         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
1405         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
1406       KMP_DO_PAUSE;                                                            \
1407                                                                                \
1408       temp_val = *lhs;                                                         \
1409       old_value = temp_val;                                                    \
1410       new_value = rhs OP old_value;                                            \
1411     }                                                                          \
1412   }
1413 
1414 // -------------------------------------------------------------------------
1415 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG)  \
1416   ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1417   OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG)                                          \
1418   OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1419   }
1420 
1421 // ------------------------------------------------------------------------
1422 // Entries definition for integer operands
1423 //     TYPE_ID - operands type and size (fixed4, float4)
1424 //     OP_ID   - operation identifier (add, sub, mul, ...)
1425 //     TYPE    - operand type
1426 //     BITS    - size in bits, used to distinguish low level calls
1427 //     OP      - operator (used in critical section)
1428 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1429 
1430 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,GOMP_FLAG
1431 // ------------------------------------------------------------------------
1432 // Routines for ATOMIC integer operands, other operators
1433 // ------------------------------------------------------------------------
1434 //                  TYPE_ID,OP_ID, TYPE,    BITS, OP, LCK_ID, GOMP_FLAG
1435 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1436                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1437 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1438                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1439 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1440                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1441 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1442                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1443 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1444                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1445 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1446                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1447 
1448 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1449                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1450 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1451                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1452 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1453                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1454 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1455                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1456 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1457                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1458 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1459                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1460 
1461 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1462                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1463 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1464                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1465 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1466                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1467 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1468                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1469 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1470                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1471 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1472                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1473 
1474 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1475                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1476 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1477                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1478 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1479                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1480 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1481                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1482 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1483                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1484 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1485                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1486 
1487 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1488                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1489 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1490                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1491 
1492 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1493                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1494 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1495                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1496 //                  TYPE_ID,OP_ID, TYPE,     BITS,OP,LCK_ID, GOMP_FLAG
1497 
1498 // ------------------------------------------------------------------------
1499 // Routines for Extended types: long double, _Quad, complex flavours (use
1500 // critical section)
1501 //     TYPE_ID, OP_ID, TYPE - detailed above
1502 //     OP      - operator
1503 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1504 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
1505   ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1506   OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG)                                          \
1507   OP_CRITICAL_REV(OP, LCK_ID)                                                  \
1508   }
1509 
1510 /* ------------------------------------------------------------------------- */
1511 // routines for long double type
1512 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1513                     1) // __kmpc_atomic_float10_sub_rev
1514 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1515                     1) // __kmpc_atomic_float10_div_rev
1516 #if KMP_HAVE_QUAD
1517 // routines for _Quad type
1518 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1519                     1) // __kmpc_atomic_float16_sub_rev
1520 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1521                     1) // __kmpc_atomic_float16_div_rev
1522 #if (KMP_ARCH_X86)
1523 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1524                     1) // __kmpc_atomic_float16_sub_a16_rev
1525 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1526                     1) // __kmpc_atomic_float16_div_a16_rev
1527 #endif
1528 #endif
1529 
1530 // routines for complex types
1531 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1532                     1) // __kmpc_atomic_cmplx4_sub_rev
1533 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1534                     1) // __kmpc_atomic_cmplx4_div_rev
1535 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1536                     1) // __kmpc_atomic_cmplx8_sub_rev
1537 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1538                     1) // __kmpc_atomic_cmplx8_div_rev
1539 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1540                     1) // __kmpc_atomic_cmplx10_sub_rev
1541 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1542                     1) // __kmpc_atomic_cmplx10_div_rev
1543 #if KMP_HAVE_QUAD
1544 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1545                     1) // __kmpc_atomic_cmplx16_sub_rev
1546 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1547                     1) // __kmpc_atomic_cmplx16_div_rev
1548 #if (KMP_ARCH_X86)
1549 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1550                     1) // __kmpc_atomic_cmplx16_sub_a16_rev
1551 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1552                     1) // __kmpc_atomic_cmplx16_div_a16_rev
1553 #endif
1554 #endif
1555 
1556 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1557 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1558 
1559 #endif // OMP_40_ENABLED
1560 
1561 /* ------------------------------------------------------------------------ */
1562 /* Routines for mixed types of LHS and RHS, when RHS is "larger"            */
1563 /* Note: in order to reduce the total number of types combinations          */
1564 /*       it is supposed that compiler converts RHS to longest floating type,*/
1565 /*       that is _Quad, before call to any of these routines                */
1566 /* Conversion to _Quad will be done by the compiler during calculation,     */
1567 /*    conversion back to TYPE - before the assignment, like:                */
1568 /*    *lhs = (TYPE)( (_Quad)(*lhs) OP rhs )                                 */
1569 /* Performance penalty expected because of SW emulation use                 */
1570 /* ------------------------------------------------------------------------ */
1571 
1572 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                \
1573   void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
1574       ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) {                       \
1575     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1576     KA_TRACE(100,                                                              \
1577              ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
1578               gtid));
1579 
1580 // -------------------------------------------------------------------------
1581 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID,  \
1582                            GOMP_FLAG)                                          \
1583   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1584   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */                    \
1585   OP_CRITICAL(OP## =, LCK_ID) /* send assignment */                            \
1586   }
1587 
1588 // -------------------------------------------------------------------------
1589 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1590 // -------------------------------------------------------------------------
1591 // X86 or X86_64: no alignment problems ====================================
1592 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1593                            LCK_ID, MASK, GOMP_FLAG)                            \
1594   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1595   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1596   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1597   }
1598 // -------------------------------------------------------------------------
1599 #else
1600 // ------------------------------------------------------------------------
1601 // Code for other architectures that don't handle unaligned accesses.
1602 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1603                            LCK_ID, MASK, GOMP_FLAG)                            \
1604   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1605   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1606   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1607     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1608   } else {                                                                     \
1609     KMP_CHECK_GTID;                                                            \
1610     OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
1611   }                                                                            \
1612   }
1613 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1614 
1615 // -------------------------------------------------------------------------
1616 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1617 // -------------------------------------------------------------------------
1618 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
1619                                RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
1620   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1621   OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG)                                          \
1622   OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1623   }
1624 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,      \
1625                                LCK_ID, GOMP_FLAG)                              \
1626   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1627   OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG)                                          \
1628   OP_CRITICAL_REV(OP, LCK_ID)                                                  \
1629   }
1630 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1631 
1632 // RHS=float8
1633 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1634                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1635 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1636                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1637 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1638                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1639 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1640                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1641 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1642                    0) // __kmpc_atomic_fixed4_mul_float8
1643 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1644                    0) // __kmpc_atomic_fixed4_div_float8
1645 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1646                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1647 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1648                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1649 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1650                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1651 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1652                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1653 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1654                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1655 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1656                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1657 
1658 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1659 // use them)
1660 #if KMP_HAVE_QUAD
1661 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1662                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1663 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1664                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1665 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1666                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1667 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1668                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1669 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1670                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1671 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1672                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1673 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1674                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1675 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1676                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1677 
1678 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1679                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1680 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1681                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1682 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1683                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1684 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1685                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1686 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1687                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1688 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1689                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1690 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1691                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1692 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1693                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1694 
1695 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1696                    0) // __kmpc_atomic_fixed4_add_fp
1697 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1698                    0) // __kmpc_atomic_fixed4u_add_fp
1699 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1700                    0) // __kmpc_atomic_fixed4_sub_fp
1701 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1702                    0) // __kmpc_atomic_fixed4u_sub_fp
1703 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1704                    0) // __kmpc_atomic_fixed4_mul_fp
1705 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1706                    0) // __kmpc_atomic_fixed4u_mul_fp
1707 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1708                    0) // __kmpc_atomic_fixed4_div_fp
1709 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1710                    0) // __kmpc_atomic_fixed4u_div_fp
1711 
1712 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1713                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1714 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1715                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1716 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1717                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1718 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1719                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1720 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1721                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1722 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1723                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1724 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1725                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1726 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1727                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1728 
1729 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1730                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1731 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1732                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1733 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1734                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1735 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1736                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1737 
1738 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1739                    KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1740 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1741                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1742 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1743                    KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1744 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1745                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1746 
1747 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1748                    1) // __kmpc_atomic_float10_add_fp
1749 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1750                    1) // __kmpc_atomic_float10_sub_fp
1751 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1752                    1) // __kmpc_atomic_float10_mul_fp
1753 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1754                    1) // __kmpc_atomic_float10_div_fp
1755 
1756 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1757 // Reverse operations
1758 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1759                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1760 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1761                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1762 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1763                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1764 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1765                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1766 
1767 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1768                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1769 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1770                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1771 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1772                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1773 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1774                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1775 
1776 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1777                        0) // __kmpc_atomic_fixed4_sub_rev_fp
1778 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1779                        0) // __kmpc_atomic_fixed4u_sub_rev_fp
1780 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1781                        0) // __kmpc_atomic_fixed4_div_rev_fp
1782 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1783                        0) // __kmpc_atomic_fixed4u_div_rev_fp
1784 
1785 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1786                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1787 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1788                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1789 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1790                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1791 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1792                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1793 
1794 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1795                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1796 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1797                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1798 
1799 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1800                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1801 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1802                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1803 
1804 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1805                        1) // __kmpc_atomic_float10_sub_rev_fp
1806 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1807                        1) // __kmpc_atomic_float10_div_rev_fp
1808 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1809 
1810 #endif
1811 
1812 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1813 // ------------------------------------------------------------------------
1814 // X86 or X86_64: no alignment problems ====================================
1815 #if USE_CMPXCHG_FIX
1816 // workaround for C78287 (complex(kind=4) data type)
1817 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1818                              LCK_ID, MASK, GOMP_FLAG)                          \
1819   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1820   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1821   OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
1822   }
1823 // end of the second part of the workaround for C78287
1824 #else
1825 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1826                              LCK_ID, MASK, GOMP_FLAG)                          \
1827   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1828   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1829   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1830   }
1831 #endif // USE_CMPXCHG_FIX
1832 #else
1833 // ------------------------------------------------------------------------
1834 // Code for other architectures that don't handle unaligned accesses.
1835 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1836                              LCK_ID, MASK, GOMP_FLAG)                          \
1837   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1838   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1839   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1840     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1841   } else {                                                                     \
1842     KMP_CHECK_GTID;                                                            \
1843     OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
1844   }                                                                            \
1845   }
1846 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1847 
1848 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1849                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1850 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1851                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1852 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1853                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1854 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1855                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1856 
1857 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1858 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1859 
1860 // ------------------------------------------------------------------------
1861 // Atomic READ routines
1862 
1863 // ------------------------------------------------------------------------
1864 // Beginning of a definition (provides name, parameters, gebug trace)
1865 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1866 //     fixed)
1867 //     OP_ID   - operation identifier (add, sub, mul, ...)
1868 //     TYPE    - operands' type
1869 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE)                      \
1870   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
1871                                              TYPE *loc) {                      \
1872     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1873     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1874 
1875 // ------------------------------------------------------------------------
1876 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1877 //     TYPE    - operands' type
1878 //     BITS    - size in bits, used to distinguish low level calls
1879 //     OP      - operator
1880 // Note: temp_val introduced in order to force the compiler to read
1881 //       *lhs only once (w/o it the compiler reads *lhs twice)
1882 // TODO: check if it is still necessary
1883 // Return old value regardless of the result of "compare & swap# operation
1884 #define OP_CMPXCHG_READ(TYPE, BITS, OP)                                        \
1885   {                                                                            \
1886     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1887     union f_i_union {                                                          \
1888       TYPE f_val;                                                              \
1889       kmp_int##BITS i_val;                                                     \
1890     };                                                                         \
1891     union f_i_union old_value;                                                 \
1892     temp_val = *loc;                                                           \
1893     old_value.f_val = temp_val;                                                \
1894     old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS(                         \
1895         (kmp_int##BITS *)loc,                                                  \
1896         *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val,                     \
1897         *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val);                    \
1898     new_value = old_value.f_val;                                               \
1899     return new_value;                                                          \
1900   }
1901 
1902 // -------------------------------------------------------------------------
1903 // Operation on *lhs, rhs bound by critical section
1904 //     OP     - operator (it's supposed to contain an assignment)
1905 //     LCK_ID - lock identifier
1906 // Note: don't check gtid as it should always be valid
1907 // 1, 2-byte - expect valid parameter, other - check before this macro
1908 #define OP_CRITICAL_READ(OP, LCK_ID)                                           \
1909   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1910                                                                                \
1911   new_value = (*loc);                                                          \
1912                                                                                \
1913   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1914 
1915 // -------------------------------------------------------------------------
1916 #ifdef KMP_GOMP_COMPAT
1917 #define OP_GOMP_CRITICAL_READ(OP, FLAG)                                        \
1918   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1919     KMP_CHECK_GTID;                                                            \
1920     OP_CRITICAL_READ(OP, 0);                                                   \
1921     return new_value;                                                          \
1922   }
1923 #else
1924 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1925 #endif /* KMP_GOMP_COMPAT */
1926 
1927 // -------------------------------------------------------------------------
1928 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
1929   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1930   TYPE new_value;                                                              \
1931   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
1932   new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0);                              \
1933   return new_value;                                                            \
1934   }
1935 // -------------------------------------------------------------------------
1936 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
1937   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1938   TYPE new_value;                                                              \
1939   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
1940   OP_CMPXCHG_READ(TYPE, BITS, OP)                                              \
1941   }
1942 // ------------------------------------------------------------------------
1943 // Routines for Extended types: long double, _Quad, complex flavours (use
1944 // critical section)
1945 //     TYPE_ID, OP_ID, TYPE - detailed above
1946 //     OP      - operator
1947 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1948 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
1949   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1950   TYPE new_value;                                                              \
1951   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */               \
1952   OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */                           \
1953   return new_value;                                                            \
1954   }
1955 
1956 // ------------------------------------------------------------------------
1957 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
1958 // value doesn't work.
1959 // Let's return the read value through the additional parameter.
1960 #if (KMP_OS_WINDOWS)
1961 
1962 #define OP_CRITICAL_READ_WRK(OP, LCK_ID)                                       \
1963   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1964                                                                                \
1965   (*out) = (*loc);                                                             \
1966                                                                                \
1967   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1968 // ------------------------------------------------------------------------
1969 #ifdef KMP_GOMP_COMPAT
1970 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)                                    \
1971   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1972     KMP_CHECK_GTID;                                                            \
1973     OP_CRITICAL_READ_WRK(OP, 0);                                               \
1974   }
1975 #else
1976 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
1977 #endif /* KMP_GOMP_COMPAT */
1978 // ------------------------------------------------------------------------
1979 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                            \
1980   void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
1981                                          TYPE *loc) {                          \
1982     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1983     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1984 
1985 // ------------------------------------------------------------------------
1986 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)  \
1987   ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                                  \
1988   OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */           \
1989   OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */                       \
1990   }
1991 
1992 #endif // KMP_OS_WINDOWS
1993 
1994 // ------------------------------------------------------------------------
1995 //                  TYPE_ID,OP_ID, TYPE,      OP, GOMP_FLAG
1996 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
1997 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
1998                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
1999 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2000                     KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2001 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2002                     KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2003 
2004 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2005 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2006                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2007 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2008                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2009 
2010 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2011                      1) // __kmpc_atomic_float10_rd
2012 #if KMP_HAVE_QUAD
2013 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2014                      1) // __kmpc_atomic_float16_rd
2015 #endif // KMP_HAVE_QUAD
2016 
2017 // Fix for CQ220361 on Windows* OS
2018 #if (KMP_OS_WINDOWS)
2019 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2020                          1) // __kmpc_atomic_cmplx4_rd
2021 #else
2022 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2023                      1) // __kmpc_atomic_cmplx4_rd
2024 #endif
2025 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2026                      1) // __kmpc_atomic_cmplx8_rd
2027 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2028                      1) // __kmpc_atomic_cmplx10_rd
2029 #if KMP_HAVE_QUAD
2030 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2031                      1) // __kmpc_atomic_cmplx16_rd
2032 #if (KMP_ARCH_X86)
2033 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2034                      1) // __kmpc_atomic_float16_a16_rd
2035 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2036                      1) // __kmpc_atomic_cmplx16_a16_rd
2037 #endif
2038 #endif
2039 
2040 // ------------------------------------------------------------------------
2041 // Atomic WRITE routines
2042 
2043 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)              \
2044   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2045   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2046   KMP_XCHG_FIXED##BITS(lhs, rhs);                                              \
2047   }
2048 // ------------------------------------------------------------------------
2049 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2050   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2051   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2052   KMP_XCHG_REAL##BITS(lhs, rhs);                                               \
2053   }
2054 
2055 // ------------------------------------------------------------------------
2056 // Operation on *lhs, rhs using "compare_and_store" routine
2057 //     TYPE    - operands' type
2058 //     BITS    - size in bits, used to distinguish low level calls
2059 //     OP      - operator
2060 // Note: temp_val introduced in order to force the compiler to read
2061 //       *lhs only once (w/o it the compiler reads *lhs twice)
2062 #define OP_CMPXCHG_WR(TYPE, BITS, OP)                                          \
2063   {                                                                            \
2064     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2065     TYPE old_value, new_value;                                                 \
2066     temp_val = *lhs;                                                           \
2067     old_value = temp_val;                                                      \
2068     new_value = rhs;                                                           \
2069     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2070         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2071         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2072       KMP_CPU_PAUSE();                                                         \
2073                                                                                \
2074       temp_val = *lhs;                                                         \
2075       old_value = temp_val;                                                    \
2076       new_value = rhs;                                                         \
2077     }                                                                          \
2078   }
2079 
2080 // -------------------------------------------------------------------------
2081 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2082   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2083   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2084   OP_CMPXCHG_WR(TYPE, BITS, OP)                                                \
2085   }
2086 
2087 // ------------------------------------------------------------------------
2088 // Routines for Extended types: long double, _Quad, complex flavours (use
2089 // critical section)
2090 //     TYPE_ID, OP_ID, TYPE - detailed above
2091 //     OP      - operator
2092 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2093 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)        \
2094   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2095   OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */                        \
2096   OP_CRITICAL(OP, LCK_ID) /* send assignment */                                \
2097   }
2098 // -------------------------------------------------------------------------
2099 
2100 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2101                KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2102 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2103                KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2104 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2105                KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2106 #if (KMP_ARCH_X86)
2107 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2108                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2109 #else
2110 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2111                KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2112 #endif
2113 
2114 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2115                      KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2116 #if (KMP_ARCH_X86)
2117 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2118                   KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2119 #else
2120 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2121                      KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2122 #endif
2123 
2124 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2125                    1) // __kmpc_atomic_float10_wr
2126 #if KMP_HAVE_QUAD
2127 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2128                    1) // __kmpc_atomic_float16_wr
2129 #endif
2130 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2131 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2132                    1) // __kmpc_atomic_cmplx8_wr
2133 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2134                    1) // __kmpc_atomic_cmplx10_wr
2135 #if KMP_HAVE_QUAD
2136 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2137                    1) // __kmpc_atomic_cmplx16_wr
2138 #if (KMP_ARCH_X86)
2139 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2140                    1) // __kmpc_atomic_float16_a16_wr
2141 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2142                    1) // __kmpc_atomic_cmplx16_a16_wr
2143 #endif
2144 #endif
2145 
2146 // ------------------------------------------------------------------------
2147 // Atomic CAPTURE routines
2148 
2149 // Beginning of a definition (provides name, parameters, gebug trace)
2150 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2151 //     fixed)
2152 //     OP_ID   - operation identifier (add, sub, mul, ...)
2153 //     TYPE    - operands' type
2154 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
2155   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
2156                                              TYPE *lhs, TYPE rhs, int flag) {  \
2157     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2158     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2159 
2160 // -------------------------------------------------------------------------
2161 // Operation on *lhs, rhs bound by critical section
2162 //     OP     - operator (it's supposed to contain an assignment)
2163 //     LCK_ID - lock identifier
2164 // Note: don't check gtid as it should always be valid
2165 // 1, 2-byte - expect valid parameter, other - check before this macro
2166 #define OP_CRITICAL_CPT(OP, LCK_ID)                                            \
2167   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2168                                                                                \
2169   if (flag) {                                                                  \
2170     (*lhs) OP rhs;                                                             \
2171     new_value = (*lhs);                                                        \
2172   } else {                                                                     \
2173     new_value = (*lhs);                                                        \
2174     (*lhs) OP rhs;                                                             \
2175   }                                                                            \
2176                                                                                \
2177   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2178   return new_value;
2179 
2180 // ------------------------------------------------------------------------
2181 #ifdef KMP_GOMP_COMPAT
2182 #define OP_GOMP_CRITICAL_CPT(OP, FLAG)                                         \
2183   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2184     KMP_CHECK_GTID;                                                            \
2185     OP_CRITICAL_CPT(OP## =, 0);                                                \
2186   }
2187 #else
2188 #define OP_GOMP_CRITICAL_CPT(OP, FLAG)
2189 #endif /* KMP_GOMP_COMPAT */
2190 
2191 // ------------------------------------------------------------------------
2192 // Operation on *lhs, rhs using "compare_and_store" routine
2193 //     TYPE    - operands' type
2194 //     BITS    - size in bits, used to distinguish low level calls
2195 //     OP      - operator
2196 // Note: temp_val introduced in order to force the compiler to read
2197 //       *lhs only once (w/o it the compiler reads *lhs twice)
2198 #define OP_CMPXCHG_CPT(TYPE, BITS, OP)                                         \
2199   {                                                                            \
2200     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2201     TYPE old_value, new_value;                                                 \
2202     temp_val = *lhs;                                                           \
2203     old_value = temp_val;                                                      \
2204     new_value = old_value OP rhs;                                              \
2205     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2206         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2207         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2208       KMP_CPU_PAUSE();                                                         \
2209                                                                                \
2210       temp_val = *lhs;                                                         \
2211       old_value = temp_val;                                                    \
2212       new_value = old_value OP rhs;                                            \
2213     }                                                                          \
2214     if (flag) {                                                                \
2215       return new_value;                                                        \
2216     } else                                                                     \
2217       return old_value;                                                        \
2218   }
2219 
2220 // -------------------------------------------------------------------------
2221 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)          \
2222   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2223   TYPE new_value;                                                              \
2224   OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG)                                          \
2225   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2226   }
2227 
2228 // -------------------------------------------------------------------------
2229 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2230   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2231   TYPE old_value, new_value;                                                   \
2232   OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG)                                          \
2233   /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
2234   old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                            \
2235   if (flag) {                                                                  \
2236     return old_value OP rhs;                                                   \
2237   } else                                                                       \
2238     return old_value;                                                          \
2239   }
2240 // -------------------------------------------------------------------------
2241 
2242 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2243                      0) // __kmpc_atomic_fixed4_add_cpt
2244 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2245                      0) // __kmpc_atomic_fixed4_sub_cpt
2246 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2247                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2248 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2249                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2250 
2251 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2252                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2253 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2254                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2255 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2256                    KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2257 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2258                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2259 
2260 // ------------------------------------------------------------------------
2261 // Entries definition for integer operands
2262 //     TYPE_ID - operands type and size (fixed4, float4)
2263 //     OP_ID   - operation identifier (add, sub, mul, ...)
2264 //     TYPE    - operand type
2265 //     BITS    - size in bits, used to distinguish low level calls
2266 //     OP      - operator (used in critical section)
2267 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,GOMP_FLAG
2268 // ------------------------------------------------------------------------
2269 // Routines for ATOMIC integer operands, other operators
2270 // ------------------------------------------------------------------------
2271 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2272 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2273                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2274 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2275                    0) // __kmpc_atomic_fixed1_andb_cpt
2276 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2277                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2278 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2279                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2280 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2281                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2282 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2283                    0) // __kmpc_atomic_fixed1_orb_cpt
2284 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2285                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2286 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2287                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2288 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2289                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2290 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2291                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2292 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2293                    0) // __kmpc_atomic_fixed1_xor_cpt
2294 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2295                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2296 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2297                    0) // __kmpc_atomic_fixed2_andb_cpt
2298 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2299                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2300 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2301                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2302 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2303                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2304 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2305                    0) // __kmpc_atomic_fixed2_orb_cpt
2306 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2307                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2308 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2309                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2310 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2311                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2312 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2313                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2314 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2315                    0) // __kmpc_atomic_fixed2_xor_cpt
2316 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2317                    0) // __kmpc_atomic_fixed4_andb_cpt
2318 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2319                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2320 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2321                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2322 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2323                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2324 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2325                    0) // __kmpc_atomic_fixed4_orb_cpt
2326 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2327                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2328 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2329                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2330 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2331                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2332 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2333                    0) // __kmpc_atomic_fixed4_xor_cpt
2334 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2335                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2336 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2337                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2338 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2339                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2340 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2341                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2342 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2343                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2344 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2345                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2346 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2347                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2348 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2349                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2350 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2351                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2352 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2353                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2354 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2355                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2356 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2357                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2358 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2359                    KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2360 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2361 
2362 // CAPTURE routines for mixed types RHS=float16
2363 #if KMP_HAVE_QUAD
2364 
2365 // Beginning of a definition (provides name, parameters, gebug trace)
2366 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2367 //     fixed)
2368 //     OP_ID   - operation identifier (add, sub, mul, ...)
2369 //     TYPE    - operands' type
2370 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)            \
2371   TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
2372       ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) {             \
2373     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2374     KA_TRACE(100,                                                              \
2375              ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
2376               gtid));
2377 
2378 // -------------------------------------------------------------------------
2379 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
2380                                RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
2381   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2382   TYPE new_value;                                                              \
2383   OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG)                                          \
2384   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2385   }
2386 
2387 // -------------------------------------------------------------------------
2388 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,     \
2389                                 LCK_ID, GOMP_FLAG)                             \
2390   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2391   TYPE new_value;                                                              \
2392   OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */                    \
2393   OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */                        \
2394   }
2395 
2396 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2397                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2398 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2399                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2400 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2401                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2402 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2403                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2404 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2405                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2406 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2407                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2408 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2409                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2410 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2411                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2412 
2413 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2414                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2415 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2416                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2417 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2418                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2419 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2420                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2421 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2422                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2423 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2424                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2425 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2426                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2427 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2428                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2429 
2430 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2431                        0) // __kmpc_atomic_fixed4_add_cpt_fp
2432 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2433                        0) // __kmpc_atomic_fixed4u_add_cpt_fp
2434 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2435                        0) // __kmpc_atomic_fixed4_sub_cpt_fp
2436 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2437                        0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2438 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2439                        0) // __kmpc_atomic_fixed4_mul_cpt_fp
2440 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2441                        0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2442 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2443                        0) // __kmpc_atomic_fixed4_div_cpt_fp
2444 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2445                        0) // __kmpc_atomic_fixed4u_div_cpt_fp
2446 
2447 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2448                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2449 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2450                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2451 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2452                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2453 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2454                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2455 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2456                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2457 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2458                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2459 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2460                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2461 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2462                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2463 
2464 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2465                        KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2466 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2467                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2468 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2469                        KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2470 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2471                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2472 
2473 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2474                        KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2475 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2476                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2477 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2478                        KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2479 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2480                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2481 
2482 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2483                         1) // __kmpc_atomic_float10_add_cpt_fp
2484 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2485                         1) // __kmpc_atomic_float10_sub_cpt_fp
2486 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2487                         1) // __kmpc_atomic_float10_mul_cpt_fp
2488 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2489                         1) // __kmpc_atomic_float10_div_cpt_fp
2490 
2491 #endif // KMP_HAVE_QUAD
2492 
2493 // ------------------------------------------------------------------------
2494 // Routines for C/C++ Reduction operators && and ||
2495 
2496 // -------------------------------------------------------------------------
2497 // Operation on *lhs, rhs bound by critical section
2498 //     OP     - operator (it's supposed to contain an assignment)
2499 //     LCK_ID - lock identifier
2500 // Note: don't check gtid as it should always be valid
2501 // 1, 2-byte - expect valid parameter, other - check before this macro
2502 #define OP_CRITICAL_L_CPT(OP, LCK_ID)                                          \
2503   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2504                                                                                \
2505   if (flag) {                                                                  \
2506     new_value OP rhs;                                                          \
2507   } else                                                                       \
2508     new_value = (*lhs);                                                        \
2509                                                                                \
2510   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2511 
2512 // ------------------------------------------------------------------------
2513 #ifdef KMP_GOMP_COMPAT
2514 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)                                       \
2515   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2516     KMP_CHECK_GTID;                                                            \
2517     OP_CRITICAL_L_CPT(OP, 0);                                                  \
2518     return new_value;                                                          \
2519   }
2520 #else
2521 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2522 #endif /* KMP_GOMP_COMPAT */
2523 
2524 // ------------------------------------------------------------------------
2525 // Need separate macros for &&, || because there is no combined assignment
2526 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2527   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2528   TYPE new_value;                                                              \
2529   OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG)                                 \
2530   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2531   }
2532 
2533 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2534                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2535 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2536                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2537 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2538                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2539 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2540                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2541 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2542                   0) // __kmpc_atomic_fixed4_andl_cpt
2543 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2544                   0) // __kmpc_atomic_fixed4_orl_cpt
2545 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2546                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2547 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2548                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2549 
2550 // -------------------------------------------------------------------------
2551 // Routines for Fortran operators that matched no one in C:
2552 // MAX, MIN, .EQV., .NEQV.
2553 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2554 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2555 
2556 // -------------------------------------------------------------------------
2557 // MIN and MAX need separate macros
2558 // OP - operator to check if we need any actions?
2559 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                       \
2560   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2561                                                                                \
2562   if (*lhs OP rhs) { /* still need actions? */                                 \
2563     old_value = *lhs;                                                          \
2564     *lhs = rhs;                                                                \
2565     if (flag)                                                                  \
2566       new_value = rhs;                                                         \
2567     else                                                                       \
2568       new_value = old_value;                                                   \
2569   }                                                                            \
2570   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2571   return new_value;
2572 
2573 // -------------------------------------------------------------------------
2574 #ifdef KMP_GOMP_COMPAT
2575 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)                                    \
2576   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2577     KMP_CHECK_GTID;                                                            \
2578     MIN_MAX_CRITSECT_CPT(OP, 0);                                               \
2579   }
2580 #else
2581 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2582 #endif /* KMP_GOMP_COMPAT */
2583 
2584 // -------------------------------------------------------------------------
2585 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                    \
2586   {                                                                            \
2587     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2588     /*TYPE old_value; */                                                       \
2589     temp_val = *lhs;                                                           \
2590     old_value = temp_val;                                                      \
2591     while (old_value OP rhs && /* still need actions? */                       \
2592            !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2593                (kmp_int##BITS *)lhs,                                           \
2594                *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
2595                *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
2596       KMP_CPU_PAUSE();                                                         \
2597       temp_val = *lhs;                                                         \
2598       old_value = temp_val;                                                    \
2599     }                                                                          \
2600     if (flag)                                                                  \
2601       return rhs;                                                              \
2602     else                                                                       \
2603       return old_value;                                                        \
2604   }
2605 
2606 // -------------------------------------------------------------------------
2607 // 1-byte, 2-byte operands - use critical section
2608 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
2609   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2610   TYPE new_value, old_value;                                                   \
2611   if (*lhs OP rhs) { /* need actions? */                                       \
2612     GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2613     MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                           \
2614   }                                                                            \
2615   return *lhs;                                                                 \
2616   }
2617 
2618 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2619   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2620   TYPE new_value, old_value;                                                   \
2621   if (*lhs OP rhs) {                                                           \
2622     GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2623     MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                        \
2624   }                                                                            \
2625   return *lhs;                                                                 \
2626   }
2627 
2628 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2629                      KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2630 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2631                      KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2632 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2633                      KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2634 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2635                      KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2636 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2637                      0) // __kmpc_atomic_fixed4_max_cpt
2638 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2639                      0) // __kmpc_atomic_fixed4_min_cpt
2640 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2641                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2642 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2643                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2644 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2645                      KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2646 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2647                      KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2648 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2649                      KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2650 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2651                      KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2652 #if KMP_HAVE_QUAD
2653 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2654                      1) // __kmpc_atomic_float16_max_cpt
2655 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2656                      1) // __kmpc_atomic_float16_min_cpt
2657 #if (KMP_ARCH_X86)
2658 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2659                      1) // __kmpc_atomic_float16_max_a16_cpt
2660 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2661                      1) // __kmpc_atomic_float16_mix_a16_cpt
2662 #endif
2663 #endif
2664 
2665 // ------------------------------------------------------------------------
2666 #ifdef KMP_GOMP_COMPAT
2667 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)                                     \
2668   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2669     KMP_CHECK_GTID;                                                            \
2670     OP_CRITICAL_CPT(OP, 0);                                                    \
2671   }
2672 #else
2673 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2674 #endif /* KMP_GOMP_COMPAT */
2675 // ------------------------------------------------------------------------
2676 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
2677   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2678   TYPE new_value;                                                              \
2679   OP_GOMP_CRITICAL_EQV_CPT(^= ~, GOMP_FLAG) /* send assignment */              \
2680   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2681   }
2682 
2683 // ------------------------------------------------------------------------
2684 
2685 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2686                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2687 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2688                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2689 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2690                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2691 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2692                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2693 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2694                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2695 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2696                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2697 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2698                     KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2699 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2700                     KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2701 
2702 // ------------------------------------------------------------------------
2703 // Routines for Extended types: long double, _Quad, complex flavours (use
2704 // critical section)
2705 //     TYPE_ID, OP_ID, TYPE - detailed above
2706 //     OP      - operator
2707 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2708 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
2709   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2710   TYPE new_value;                                                              \
2711   OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */                    \
2712   OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */                        \
2713   }
2714 
2715 // ------------------------------------------------------------------------
2716 // Workaround for cmplx4. Regular routines with return value don't work
2717 // on Win_32e. Let's return captured values through the additional parameter.
2718 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID)                                        \
2719   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2720                                                                                \
2721   if (flag) {                                                                  \
2722     (*lhs) OP rhs;                                                             \
2723     (*out) = (*lhs);                                                           \
2724   } else {                                                                     \
2725     (*out) = (*lhs);                                                           \
2726     (*lhs) OP rhs;                                                             \
2727   }                                                                            \
2728                                                                                \
2729   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2730   return;
2731 // ------------------------------------------------------------------------
2732 
2733 #ifdef KMP_GOMP_COMPAT
2734 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)                                     \
2735   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2736     KMP_CHECK_GTID;                                                            \
2737     OP_CRITICAL_CPT_WRK(OP## =, 0);                                            \
2738   }
2739 #else
2740 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2741 #endif /* KMP_GOMP_COMPAT */
2742 // ------------------------------------------------------------------------
2743 
2744 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                 \
2745   void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2746                                          TYPE rhs, TYPE *out, int flag) {      \
2747     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2748     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2749 // ------------------------------------------------------------------------
2750 
2751 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
2752   ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
2753   OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG)                                      \
2754   OP_CRITICAL_CPT_WRK(OP## =, LCK_ID)                                          \
2755   }
2756 // The end of workaround for cmplx4
2757 
2758 /* ------------------------------------------------------------------------- */
2759 // routines for long double type
2760 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2761                     1) // __kmpc_atomic_float10_add_cpt
2762 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2763                     1) // __kmpc_atomic_float10_sub_cpt
2764 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2765                     1) // __kmpc_atomic_float10_mul_cpt
2766 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2767                     1) // __kmpc_atomic_float10_div_cpt
2768 #if KMP_HAVE_QUAD
2769 // routines for _Quad type
2770 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2771                     1) // __kmpc_atomic_float16_add_cpt
2772 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2773                     1) // __kmpc_atomic_float16_sub_cpt
2774 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2775                     1) // __kmpc_atomic_float16_mul_cpt
2776 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2777                     1) // __kmpc_atomic_float16_div_cpt
2778 #if (KMP_ARCH_X86)
2779 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2780                     1) // __kmpc_atomic_float16_add_a16_cpt
2781 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2782                     1) // __kmpc_atomic_float16_sub_a16_cpt
2783 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2784                     1) // __kmpc_atomic_float16_mul_a16_cpt
2785 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2786                     1) // __kmpc_atomic_float16_div_a16_cpt
2787 #endif
2788 #endif
2789 
2790 // routines for complex types
2791 
2792 // cmplx4 routines to return void
2793 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2794                         1) // __kmpc_atomic_cmplx4_add_cpt
2795 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2796                         1) // __kmpc_atomic_cmplx4_sub_cpt
2797 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2798                         1) // __kmpc_atomic_cmplx4_mul_cpt
2799 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2800                         1) // __kmpc_atomic_cmplx4_div_cpt
2801 
2802 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2803                     1) // __kmpc_atomic_cmplx8_add_cpt
2804 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2805                     1) // __kmpc_atomic_cmplx8_sub_cpt
2806 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2807                     1) // __kmpc_atomic_cmplx8_mul_cpt
2808 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2809                     1) // __kmpc_atomic_cmplx8_div_cpt
2810 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2811                     1) // __kmpc_atomic_cmplx10_add_cpt
2812 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2813                     1) // __kmpc_atomic_cmplx10_sub_cpt
2814 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2815                     1) // __kmpc_atomic_cmplx10_mul_cpt
2816 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2817                     1) // __kmpc_atomic_cmplx10_div_cpt
2818 #if KMP_HAVE_QUAD
2819 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2820                     1) // __kmpc_atomic_cmplx16_add_cpt
2821 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2822                     1) // __kmpc_atomic_cmplx16_sub_cpt
2823 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2824                     1) // __kmpc_atomic_cmplx16_mul_cpt
2825 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2826                     1) // __kmpc_atomic_cmplx16_div_cpt
2827 #if (KMP_ARCH_X86)
2828 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2829                     1) // __kmpc_atomic_cmplx16_add_a16_cpt
2830 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2831                     1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2832 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2833                     1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2834 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2835                     1) // __kmpc_atomic_cmplx16_div_a16_cpt
2836 #endif
2837 #endif
2838 
2839 #if OMP_40_ENABLED
2840 
2841 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2842 // binop x; v = x; }  for non-commutative operations.
2843 // Supported only on IA-32 architecture and Intel(R) 64
2844 
2845 // -------------------------------------------------------------------------
2846 // Operation on *lhs, rhs bound by critical section
2847 //     OP     - operator (it's supposed to contain an assignment)
2848 //     LCK_ID - lock identifier
2849 // Note: don't check gtid as it should always be valid
2850 // 1, 2-byte - expect valid parameter, other - check before this macro
2851 #define OP_CRITICAL_CPT_REV(OP, LCK_ID)                                        \
2852   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2853                                                                                \
2854   if (flag) {                                                                  \
2855     /*temp_val = (*lhs);*/                                                     \
2856     (*lhs) = (rhs)OP(*lhs);                                                    \
2857     new_value = (*lhs);                                                        \
2858   } else {                                                                     \
2859     new_value = (*lhs);                                                        \
2860     (*lhs) = (rhs)OP(*lhs);                                                    \
2861   }                                                                            \
2862   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2863   return new_value;
2864 
2865 // ------------------------------------------------------------------------
2866 #ifdef KMP_GOMP_COMPAT
2867 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG)                                     \
2868   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2869     KMP_CHECK_GTID;                                                            \
2870     OP_CRITICAL_CPT_REV(OP, 0);                                                \
2871   }
2872 #else
2873 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG)
2874 #endif /* KMP_GOMP_COMPAT */
2875 
2876 // ------------------------------------------------------------------------
2877 // Operation on *lhs, rhs using "compare_and_store" routine
2878 //     TYPE    - operands' type
2879 //     BITS    - size in bits, used to distinguish low level calls
2880 //     OP      - operator
2881 // Note: temp_val introduced in order to force the compiler to read
2882 //       *lhs only once (w/o it the compiler reads *lhs twice)
2883 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                     \
2884   {                                                                            \
2885     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2886     TYPE old_value, new_value;                                                 \
2887     temp_val = *lhs;                                                           \
2888     old_value = temp_val;                                                      \
2889     new_value = rhs OP old_value;                                              \
2890     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2891         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2892         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2893       KMP_CPU_PAUSE();                                                         \
2894                                                                                \
2895       temp_val = *lhs;                                                         \
2896       old_value = temp_val;                                                    \
2897       new_value = rhs OP old_value;                                            \
2898     }                                                                          \
2899     if (flag) {                                                                \
2900       return new_value;                                                        \
2901     } else                                                                     \
2902       return old_value;                                                        \
2903   }
2904 
2905 // -------------------------------------------------------------------------
2906 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)      \
2907   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2908   TYPE new_value;                                                              \
2909   OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG)                                      \
2910   OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
2911   }
2912 
2913 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2914                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2915 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2916                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2917 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2918                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2919 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2920                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2921 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2922                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2923 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2924                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2925 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
2926                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
2927 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
2928                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
2929 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
2930                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
2931 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
2932                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
2933 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
2934                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
2935 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
2936                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
2937 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
2938                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
2939 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
2940                        KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
2941 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
2942                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
2943 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
2944                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
2945 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
2946                        KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
2947 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
2948                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
2949 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
2950                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
2951 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
2952                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
2953 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
2954                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
2955 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
2956                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
2957 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
2958                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
2959 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
2960                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
2961 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
2962                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
2963 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
2964                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
2965 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
2966                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
2967 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
2968                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
2969 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2970 
2971 // ------------------------------------------------------------------------
2972 // Routines for Extended types: long double, _Quad, complex flavours (use
2973 // critical section)
2974 //     TYPE_ID, OP_ID, TYPE - detailed above
2975 //     OP      - operator
2976 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2977 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
2978   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2979   TYPE new_value;                                                              \
2980   /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/                   \
2981   OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG)                                      \
2982   OP_CRITICAL_CPT_REV(OP, LCK_ID)                                              \
2983   }
2984 
2985 /* ------------------------------------------------------------------------- */
2986 // routines for long double type
2987 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
2988                         1) // __kmpc_atomic_float10_sub_cpt_rev
2989 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
2990                         1) // __kmpc_atomic_float10_div_cpt_rev
2991 #if KMP_HAVE_QUAD
2992 // routines for _Quad type
2993 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
2994                         1) // __kmpc_atomic_float16_sub_cpt_rev
2995 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
2996                         1) // __kmpc_atomic_float16_div_cpt_rev
2997 #if (KMP_ARCH_X86)
2998 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
2999                         1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3000 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3001                         1) // __kmpc_atomic_float16_div_a16_cpt_rev
3002 #endif
3003 #endif
3004 
3005 // routines for complex types
3006 
3007 // ------------------------------------------------------------------------
3008 // Workaround for cmplx4. Regular routines with return value don't work
3009 // on Win_32e. Let's return captured values through the additional parameter.
3010 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                    \
3011   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3012                                                                                \
3013   if (flag) {                                                                  \
3014     (*lhs) = (rhs)OP(*lhs);                                                    \
3015     (*out) = (*lhs);                                                           \
3016   } else {                                                                     \
3017     (*out) = (*lhs);                                                           \
3018     (*lhs) = (rhs)OP(*lhs);                                                    \
3019   }                                                                            \
3020                                                                                \
3021   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3022   return;
3023 // ------------------------------------------------------------------------
3024 
3025 #ifdef KMP_GOMP_COMPAT
3026 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)                                 \
3027   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3028     KMP_CHECK_GTID;                                                            \
3029     OP_CRITICAL_CPT_REV_WRK(OP, 0);                                            \
3030   }
3031 #else
3032 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3033 #endif /* KMP_GOMP_COMPAT */
3034 // ------------------------------------------------------------------------
3035 
3036 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID,          \
3037                                     GOMP_FLAG)                                 \
3038   ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
3039   OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG)                                  \
3040   OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                          \
3041   }
3042 // The end of workaround for cmplx4
3043 
3044 // !!! TODO: check if we need to return void for cmplx4 routines
3045 // cmplx4 routines to return void
3046 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3047                             1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3048 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3049                             1) // __kmpc_atomic_cmplx4_div_cpt_rev
3050 
3051 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3052                         1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3053 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3054                         1) // __kmpc_atomic_cmplx8_div_cpt_rev
3055 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3056                         1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3057 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3058                         1) // __kmpc_atomic_cmplx10_div_cpt_rev
3059 #if KMP_HAVE_QUAD
3060 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3061                         1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3062 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3063                         1) // __kmpc_atomic_cmplx16_div_cpt_rev
3064 #if (KMP_ARCH_X86)
3065 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3066                         1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3067 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3068                         1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3069 #endif
3070 #endif
3071 
3072 // Capture reverse for mixed type: RHS=float16
3073 #if KMP_HAVE_QUAD
3074 
3075 // Beginning of a definition (provides name, parameters, gebug trace)
3076 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3077 //     fixed)
3078 //     OP_ID   - operation identifier (add, sub, mul, ...)
3079 //     TYPE    - operands' type
3080 // -------------------------------------------------------------------------
3081 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,   \
3082                                    RTYPE, LCK_ID, MASK, GOMP_FLAG)             \
3083   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3084   TYPE new_value;                                                              \
3085   OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG)                                      \
3086   OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
3087   }
3088 
3089 // -------------------------------------------------------------------------
3090 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3091                                     LCK_ID, GOMP_FLAG)                         \
3092   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3093   TYPE new_value;                                                              \
3094   OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) /* send assignment */                \
3095   OP_CRITICAL_CPT_REV(OP, LCK_ID) /* send assignment */                        \
3096   }
3097 
3098 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3099                            KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3100 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3101                            KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3102 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3103                            KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3104 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3105                            KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3106 
3107 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3108                            KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3109 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3110                            1,
3111                            KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3112 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3113                            KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3114 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3115                            1,
3116                            KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3117 
3118 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3119                            3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3120 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3121                            4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3122 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3123                            3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3124 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3125                            4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3126 
3127 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3128                            7,
3129                            KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3130 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3131                            8i, 7,
3132                            KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3133 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3134                            7,
3135                            KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3136 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3137                            8i, 7,
3138                            KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3139 
3140 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3141                            4r, 3,
3142                            KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3143 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3144                            4r, 3,
3145                            KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3146 
3147 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3148                            8r, 7,
3149                            KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3150 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3151                            8r, 7,
3152                            KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3153 
3154 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3155                             10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3156 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3157                             10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3158 
3159 #endif // KMP_HAVE_QUAD
3160 
3161 //   OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3162 
3163 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                        \
3164   TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3165                                      TYPE rhs) {                               \
3166     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3167     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3168 
3169 #define CRITICAL_SWP(LCK_ID)                                                   \
3170   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3171                                                                                \
3172   old_value = (*lhs);                                                          \
3173   (*lhs) = rhs;                                                                \
3174                                                                                \
3175   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3176   return old_value;
3177 
3178 // ------------------------------------------------------------------------
3179 #ifdef KMP_GOMP_COMPAT
3180 #define GOMP_CRITICAL_SWP(FLAG)                                                \
3181   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3182     KMP_CHECK_GTID;                                                            \
3183     CRITICAL_SWP(0);                                                           \
3184   }
3185 #else
3186 #define GOMP_CRITICAL_SWP(FLAG)
3187 #endif /* KMP_GOMP_COMPAT */
3188 
3189 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                        \
3190   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3191   TYPE old_value;                                                              \
3192   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3193   old_value = KMP_XCHG_FIXED##BITS(lhs, rhs);                                  \
3194   return old_value;                                                            \
3195   }
3196 // ------------------------------------------------------------------------
3197 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                  \
3198   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3199   TYPE old_value;                                                              \
3200   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3201   old_value = KMP_XCHG_REAL##BITS(lhs, rhs);                                   \
3202   return old_value;                                                            \
3203   }
3204 
3205 // ------------------------------------------------------------------------
3206 #define CMPXCHG_SWP(TYPE, BITS)                                                \
3207   {                                                                            \
3208     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
3209     TYPE old_value, new_value;                                                 \
3210     temp_val = *lhs;                                                           \
3211     old_value = temp_val;                                                      \
3212     new_value = rhs;                                                           \
3213     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
3214         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
3215         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
3216       KMP_CPU_PAUSE();                                                         \
3217                                                                                \
3218       temp_val = *lhs;                                                         \
3219       old_value = temp_val;                                                    \
3220       new_value = rhs;                                                         \
3221     }                                                                          \
3222     return old_value;                                                          \
3223   }
3224 
3225 // -------------------------------------------------------------------------
3226 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                     \
3227   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3228   TYPE old_value;                                                              \
3229   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3230   CMPXCHG_SWP(TYPE, BITS)                                                      \
3231   }
3232 
3233 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3234 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3235 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3236 
3237 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3238                       KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3239 
3240 #if (KMP_ARCH_X86)
3241 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3242                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3243 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3244                    KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3245 #else
3246 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3247 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3248                       KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3249 #endif
3250 
3251 // ------------------------------------------------------------------------
3252 // Routines for Extended types: long double, _Quad, complex flavours (use
3253 // critical section)
3254 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)                  \
3255   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3256   TYPE old_value;                                                              \
3257   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3258   CRITICAL_SWP(LCK_ID)                                                         \
3259   }
3260 
3261 // ------------------------------------------------------------------------
3262 // !!! TODO: check if we need to return void for cmplx4 routines
3263 // Workaround for cmplx4. Regular routines with return value don't work
3264 // on Win_32e. Let's return captured values through the additional parameter.
3265 
3266 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                    \
3267   void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3268                                      TYPE rhs, TYPE *out) {                    \
3269     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3270     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3271 
3272 #define CRITICAL_SWP_WRK(LCK_ID)                                               \
3273   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3274                                                                                \
3275   tmp = (*lhs);                                                                \
3276   (*lhs) = (rhs);                                                              \
3277   (*out) = tmp;                                                                \
3278   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3279   return;
3280 // ------------------------------------------------------------------------
3281 
3282 #ifdef KMP_GOMP_COMPAT
3283 #define GOMP_CRITICAL_SWP_WRK(FLAG)                                            \
3284   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3285     KMP_CHECK_GTID;                                                            \
3286     CRITICAL_SWP_WRK(0);                                                       \
3287   }
3288 #else
3289 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3290 #endif /* KMP_GOMP_COMPAT */
3291 // ------------------------------------------------------------------------
3292 
3293 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)              \
3294   ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                          \
3295   TYPE tmp;                                                                    \
3296   GOMP_CRITICAL_SWP_WRK(GOMP_FLAG)                                             \
3297   CRITICAL_SWP_WRK(LCK_ID)                                                     \
3298   }
3299 // The end of workaround for cmplx4
3300 
3301 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3302 #if KMP_HAVE_QUAD
3303 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3304 #endif
3305 // cmplx4 routine to return void
3306 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3307 
3308 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32,  8c,   1 )           //
3309 // __kmpc_atomic_cmplx4_swp
3310 
3311 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3312 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3313 #if KMP_HAVE_QUAD
3314 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3315 #if (KMP_ARCH_X86)
3316 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3317                     1) // __kmpc_atomic_float16_a16_swp
3318 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3319                     1) // __kmpc_atomic_cmplx16_a16_swp
3320 #endif
3321 #endif
3322 
3323 // End of OpenMP 4.0 Capture
3324 
3325 #endif // OMP_40_ENABLED
3326 
3327 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3328 
3329 #undef OP_CRITICAL
3330 
3331 /* ------------------------------------------------------------------------ */
3332 /* Generic atomic routines                                                  */
3333 
__kmpc_atomic_1(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3334 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3335                      void (*f)(void *, void *, void *)) {
3336   KMP_DEBUG_ASSERT(__kmp_init_serial);
3337 
3338   if (
3339 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3340       FALSE /* must use lock */
3341 #else
3342       TRUE
3343 #endif
3344       ) {
3345     kmp_int8 old_value, new_value;
3346 
3347     old_value = *(kmp_int8 *)lhs;
3348     (*f)(&new_value, &old_value, rhs);
3349 
3350     /* TODO: Should this be acquire or release? */
3351     while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3352                                        *(kmp_int8 *)&new_value)) {
3353       KMP_CPU_PAUSE();
3354 
3355       old_value = *(kmp_int8 *)lhs;
3356       (*f)(&new_value, &old_value, rhs);
3357     }
3358 
3359     return;
3360   } else {
3361 // All 1-byte data is of integer data type.
3362 
3363 #ifdef KMP_GOMP_COMPAT
3364     if (__kmp_atomic_mode == 2) {
3365       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3366     } else
3367 #endif /* KMP_GOMP_COMPAT */
3368       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3369 
3370     (*f)(lhs, lhs, rhs);
3371 
3372 #ifdef KMP_GOMP_COMPAT
3373     if (__kmp_atomic_mode == 2) {
3374       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3375     } else
3376 #endif /* KMP_GOMP_COMPAT */
3377       __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3378   }
3379 }
3380 
__kmpc_atomic_2(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3381 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3382                      void (*f)(void *, void *, void *)) {
3383   if (
3384 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3385       FALSE /* must use lock */
3386 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3387       TRUE /* no alignment problems */
3388 #else
3389       !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3390 #endif
3391       ) {
3392     kmp_int16 old_value, new_value;
3393 
3394     old_value = *(kmp_int16 *)lhs;
3395     (*f)(&new_value, &old_value, rhs);
3396 
3397     /* TODO: Should this be acquire or release? */
3398     while (!KMP_COMPARE_AND_STORE_ACQ16(
3399         (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3400       KMP_CPU_PAUSE();
3401 
3402       old_value = *(kmp_int16 *)lhs;
3403       (*f)(&new_value, &old_value, rhs);
3404     }
3405 
3406     return;
3407   } else {
3408 // All 2-byte data is of integer data type.
3409 
3410 #ifdef KMP_GOMP_COMPAT
3411     if (__kmp_atomic_mode == 2) {
3412       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3413     } else
3414 #endif /* KMP_GOMP_COMPAT */
3415       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3416 
3417     (*f)(lhs, lhs, rhs);
3418 
3419 #ifdef KMP_GOMP_COMPAT
3420     if (__kmp_atomic_mode == 2) {
3421       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3422     } else
3423 #endif /* KMP_GOMP_COMPAT */
3424       __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3425   }
3426 }
3427 
__kmpc_atomic_4(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3428 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3429                      void (*f)(void *, void *, void *)) {
3430   KMP_DEBUG_ASSERT(__kmp_init_serial);
3431 
3432   if (
3433 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3434 // Gomp compatibility is broken if this routine is called for floats.
3435 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3436       TRUE /* no alignment problems */
3437 #else
3438       !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3439 #endif
3440       ) {
3441     kmp_int32 old_value, new_value;
3442 
3443     old_value = *(kmp_int32 *)lhs;
3444     (*f)(&new_value, &old_value, rhs);
3445 
3446     /* TODO: Should this be acquire or release? */
3447     while (!KMP_COMPARE_AND_STORE_ACQ32(
3448         (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3449       KMP_CPU_PAUSE();
3450 
3451       old_value = *(kmp_int32 *)lhs;
3452       (*f)(&new_value, &old_value, rhs);
3453     }
3454 
3455     return;
3456   } else {
3457 // Use __kmp_atomic_lock_4i for all 4-byte data,
3458 // even if it isn't of integer data type.
3459 
3460 #ifdef KMP_GOMP_COMPAT
3461     if (__kmp_atomic_mode == 2) {
3462       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3463     } else
3464 #endif /* KMP_GOMP_COMPAT */
3465       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3466 
3467     (*f)(lhs, lhs, rhs);
3468 
3469 #ifdef KMP_GOMP_COMPAT
3470     if (__kmp_atomic_mode == 2) {
3471       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3472     } else
3473 #endif /* KMP_GOMP_COMPAT */
3474       __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3475   }
3476 }
3477 
__kmpc_atomic_8(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3478 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3479                      void (*f)(void *, void *, void *)) {
3480   KMP_DEBUG_ASSERT(__kmp_init_serial);
3481   if (
3482 
3483 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3484       FALSE /* must use lock */
3485 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3486       TRUE /* no alignment problems */
3487 #else
3488       !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3489 #endif
3490       ) {
3491     kmp_int64 old_value, new_value;
3492 
3493     old_value = *(kmp_int64 *)lhs;
3494     (*f)(&new_value, &old_value, rhs);
3495     /* TODO: Should this be acquire or release? */
3496     while (!KMP_COMPARE_AND_STORE_ACQ64(
3497         (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3498       KMP_CPU_PAUSE();
3499 
3500       old_value = *(kmp_int64 *)lhs;
3501       (*f)(&new_value, &old_value, rhs);
3502     }
3503 
3504     return;
3505   } else {
3506 // Use __kmp_atomic_lock_8i for all 8-byte data,
3507 // even if it isn't of integer data type.
3508 
3509 #ifdef KMP_GOMP_COMPAT
3510     if (__kmp_atomic_mode == 2) {
3511       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3512     } else
3513 #endif /* KMP_GOMP_COMPAT */
3514       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3515 
3516     (*f)(lhs, lhs, rhs);
3517 
3518 #ifdef KMP_GOMP_COMPAT
3519     if (__kmp_atomic_mode == 2) {
3520       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3521     } else
3522 #endif /* KMP_GOMP_COMPAT */
3523       __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3524   }
3525 }
3526 
__kmpc_atomic_10(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3527 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3528                       void (*f)(void *, void *, void *)) {
3529   KMP_DEBUG_ASSERT(__kmp_init_serial);
3530 
3531 #ifdef KMP_GOMP_COMPAT
3532   if (__kmp_atomic_mode == 2) {
3533     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3534   } else
3535 #endif /* KMP_GOMP_COMPAT */
3536     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3537 
3538   (*f)(lhs, lhs, rhs);
3539 
3540 #ifdef KMP_GOMP_COMPAT
3541   if (__kmp_atomic_mode == 2) {
3542     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3543   } else
3544 #endif /* KMP_GOMP_COMPAT */
3545     __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3546 }
3547 
__kmpc_atomic_16(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3548 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3549                       void (*f)(void *, void *, void *)) {
3550   KMP_DEBUG_ASSERT(__kmp_init_serial);
3551 
3552 #ifdef KMP_GOMP_COMPAT
3553   if (__kmp_atomic_mode == 2) {
3554     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3555   } else
3556 #endif /* KMP_GOMP_COMPAT */
3557     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3558 
3559   (*f)(lhs, lhs, rhs);
3560 
3561 #ifdef KMP_GOMP_COMPAT
3562   if (__kmp_atomic_mode == 2) {
3563     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3564   } else
3565 #endif /* KMP_GOMP_COMPAT */
3566     __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3567 }
3568 
__kmpc_atomic_20(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3569 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3570                       void (*f)(void *, void *, void *)) {
3571   KMP_DEBUG_ASSERT(__kmp_init_serial);
3572 
3573 #ifdef KMP_GOMP_COMPAT
3574   if (__kmp_atomic_mode == 2) {
3575     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3576   } else
3577 #endif /* KMP_GOMP_COMPAT */
3578     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3579 
3580   (*f)(lhs, lhs, rhs);
3581 
3582 #ifdef KMP_GOMP_COMPAT
3583   if (__kmp_atomic_mode == 2) {
3584     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3585   } else
3586 #endif /* KMP_GOMP_COMPAT */
3587     __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3588 }
3589 
__kmpc_atomic_32(ident_t * id_ref,int gtid,void * lhs,void * rhs,void (* f)(void *,void *,void *))3590 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3591                       void (*f)(void *, void *, void *)) {
3592   KMP_DEBUG_ASSERT(__kmp_init_serial);
3593 
3594 #ifdef KMP_GOMP_COMPAT
3595   if (__kmp_atomic_mode == 2) {
3596     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3597   } else
3598 #endif /* KMP_GOMP_COMPAT */
3599     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3600 
3601   (*f)(lhs, lhs, rhs);
3602 
3603 #ifdef KMP_GOMP_COMPAT
3604   if (__kmp_atomic_mode == 2) {
3605     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3606   } else
3607 #endif /* KMP_GOMP_COMPAT */
3608     __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3609 }
3610 
3611 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3612 // compiler; duplicated in order to not use 3-party names in pure Intel code
3613 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
__kmpc_atomic_start(void)3614 void __kmpc_atomic_start(void) {
3615   int gtid = __kmp_entry_gtid();
3616   KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3617   __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3618 }
3619 
__kmpc_atomic_end(void)3620 void __kmpc_atomic_end(void) {
3621   int gtid = __kmp_get_gtid();
3622   KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3623   __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3624 }
3625 
3626 /*!
3627 @}
3628 */
3629 
3630 // end of file
3631