1 /*
2  * kmp_atomic.cpp -- ATOMIC implementation routines
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 //                     The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include "kmp_atomic.h"
17 #include "kmp.h" // TRUE, asm routines prototypes
18 
19 typedef unsigned char uchar;
20 typedef unsigned short ushort;
21 
22 /*!
23 @defgroup ATOMIC_OPS Atomic Operations
24 These functions are used for implementing the many different varieties of atomic
25 operations.
26 
27 The compiler is at liberty to inline atomic operations that are naturally
28 supported by the target architecture. For instance on IA-32 architecture an
29 atomic like this can be inlined
30 @code
31 static int s = 0;
32 #pragma omp atomic
33     s++;
34 @endcode
35 using the single instruction: `lock; incl s`
36 
37 However the runtime does provide entrypoints for these operations to support
38 compilers that choose not to inline them. (For instance,
39 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.)
40 
41 The names of the functions are encoded by using the data type name and the
42 operation name, as in these tables.
43 
44 Data Type  | Data type encoding
45 -----------|---------------
46 int8_t     | `fixed1`
47 uint8_t    | `fixed1u`
48 int16_t    | `fixed2`
49 uint16_t   | `fixed2u`
50 int32_t    | `fixed4`
51 uint32_t   | `fixed4u`
52 int32_t    | `fixed8`
53 uint32_t   | `fixed8u`
54 float      | `float4`
55 double     | `float8`
56 float 10 (8087 eighty bit float)  | `float10`
57 complex<float>   |  `cmplx4`
58 complex<double>  | `cmplx8`
59 complex<float10> | `cmplx10`
60 <br>
61 
62 Operation | Operation encoding
63 ----------|-------------------
64 + | add
65 - | sub
66 \* | mul
67 / | div
68 & | andb
69 << | shl
70 \>\> | shr
71 \| | orb
72 ^  | xor
73 && | andl
74 \|\| | orl
75 maximum | max
76 minimum | min
77 .eqv.   | eqv
78 .neqv.  | neqv
79 
80 <br>
81 For non-commutative operations, `_rev` can also be added for the reversed
82 operation. For the functions that capture the result, the suffix `_cpt` is
83 added.
84 
85 Update Functions
86 ================
87 The general form of an atomic function that just performs an update (without a
88 `capture`)
89 @code
90 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE *
91 lhs, TYPE rhs );
92 @endcode
93 @param ident_t  a pointer to source location
94 @param gtid  the global thread id
95 @param lhs   a pointer to the left operand
96 @param rhs   the right operand
97 
98 `capture` functions
99 ===================
100 The capture functions perform an atomic update and return a result, which is
101 either the value before the capture, or that after. They take an additional
102 argument to determine which result is returned.
103 Their general form is therefore
104 @code
105 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE *
106 lhs, TYPE rhs, int flag );
107 @endcode
108 @param ident_t  a pointer to source location
109 @param gtid  the global thread id
110 @param lhs   a pointer to the left operand
111 @param rhs   the right operand
112 @param flag  one if the result is to be captured *after* the operation, zero if
113 captured *before*.
114 
115 The one set of exceptions to this is the `complex<float>` type where the value
116 is not returned, rather an extra argument pointer is passed.
117 
118 They look like
119 @code
120 void __kmpc_atomic_cmplx4_<op>_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 *
121 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
122 @endcode
123 
124 Read and Write Operations
125 =========================
126 The OpenMP<sup>*</sup> standard now supports atomic operations that simply
127 ensure that the value is read or written atomically, with no modification
128 performed. In many cases on IA-32 architecture these operations can be inlined
129 since the architecture guarantees that no tearing occurs on aligned objects
130 accessed with a single memory operation of up to 64 bits in size.
131 
132 The general form of the read operations is
133 @code
134 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
135 @endcode
136 
137 For the write operations the form is
138 @code
139 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs
140 );
141 @endcode
142 
143 Full list of functions
144 ======================
145 This leads to the generation of 376 atomic functions, as follows.
146 
147 Functons for integers
148 ---------------------
149 There are versions here for integers of size 1,2,4 and 8 bytes both signed and
150 unsigned (where that matters).
151 @code
152     __kmpc_atomic_fixed1_add
153     __kmpc_atomic_fixed1_add_cpt
154     __kmpc_atomic_fixed1_add_fp
155     __kmpc_atomic_fixed1_andb
156     __kmpc_atomic_fixed1_andb_cpt
157     __kmpc_atomic_fixed1_andl
158     __kmpc_atomic_fixed1_andl_cpt
159     __kmpc_atomic_fixed1_div
160     __kmpc_atomic_fixed1_div_cpt
161     __kmpc_atomic_fixed1_div_cpt_rev
162     __kmpc_atomic_fixed1_div_float8
163     __kmpc_atomic_fixed1_div_fp
164     __kmpc_atomic_fixed1_div_rev
165     __kmpc_atomic_fixed1_eqv
166     __kmpc_atomic_fixed1_eqv_cpt
167     __kmpc_atomic_fixed1_max
168     __kmpc_atomic_fixed1_max_cpt
169     __kmpc_atomic_fixed1_min
170     __kmpc_atomic_fixed1_min_cpt
171     __kmpc_atomic_fixed1_mul
172     __kmpc_atomic_fixed1_mul_cpt
173     __kmpc_atomic_fixed1_mul_float8
174     __kmpc_atomic_fixed1_mul_fp
175     __kmpc_atomic_fixed1_neqv
176     __kmpc_atomic_fixed1_neqv_cpt
177     __kmpc_atomic_fixed1_orb
178     __kmpc_atomic_fixed1_orb_cpt
179     __kmpc_atomic_fixed1_orl
180     __kmpc_atomic_fixed1_orl_cpt
181     __kmpc_atomic_fixed1_rd
182     __kmpc_atomic_fixed1_shl
183     __kmpc_atomic_fixed1_shl_cpt
184     __kmpc_atomic_fixed1_shl_cpt_rev
185     __kmpc_atomic_fixed1_shl_rev
186     __kmpc_atomic_fixed1_shr
187     __kmpc_atomic_fixed1_shr_cpt
188     __kmpc_atomic_fixed1_shr_cpt_rev
189     __kmpc_atomic_fixed1_shr_rev
190     __kmpc_atomic_fixed1_sub
191     __kmpc_atomic_fixed1_sub_cpt
192     __kmpc_atomic_fixed1_sub_cpt_rev
193     __kmpc_atomic_fixed1_sub_fp
194     __kmpc_atomic_fixed1_sub_rev
195     __kmpc_atomic_fixed1_swp
196     __kmpc_atomic_fixed1_wr
197     __kmpc_atomic_fixed1_xor
198     __kmpc_atomic_fixed1_xor_cpt
199     __kmpc_atomic_fixed1u_add_fp
200     __kmpc_atomic_fixed1u_sub_fp
201     __kmpc_atomic_fixed1u_mul_fp
202     __kmpc_atomic_fixed1u_div
203     __kmpc_atomic_fixed1u_div_cpt
204     __kmpc_atomic_fixed1u_div_cpt_rev
205     __kmpc_atomic_fixed1u_div_fp
206     __kmpc_atomic_fixed1u_div_rev
207     __kmpc_atomic_fixed1u_shr
208     __kmpc_atomic_fixed1u_shr_cpt
209     __kmpc_atomic_fixed1u_shr_cpt_rev
210     __kmpc_atomic_fixed1u_shr_rev
211     __kmpc_atomic_fixed2_add
212     __kmpc_atomic_fixed2_add_cpt
213     __kmpc_atomic_fixed2_add_fp
214     __kmpc_atomic_fixed2_andb
215     __kmpc_atomic_fixed2_andb_cpt
216     __kmpc_atomic_fixed2_andl
217     __kmpc_atomic_fixed2_andl_cpt
218     __kmpc_atomic_fixed2_div
219     __kmpc_atomic_fixed2_div_cpt
220     __kmpc_atomic_fixed2_div_cpt_rev
221     __kmpc_atomic_fixed2_div_float8
222     __kmpc_atomic_fixed2_div_fp
223     __kmpc_atomic_fixed2_div_rev
224     __kmpc_atomic_fixed2_eqv
225     __kmpc_atomic_fixed2_eqv_cpt
226     __kmpc_atomic_fixed2_max
227     __kmpc_atomic_fixed2_max_cpt
228     __kmpc_atomic_fixed2_min
229     __kmpc_atomic_fixed2_min_cpt
230     __kmpc_atomic_fixed2_mul
231     __kmpc_atomic_fixed2_mul_cpt
232     __kmpc_atomic_fixed2_mul_float8
233     __kmpc_atomic_fixed2_mul_fp
234     __kmpc_atomic_fixed2_neqv
235     __kmpc_atomic_fixed2_neqv_cpt
236     __kmpc_atomic_fixed2_orb
237     __kmpc_atomic_fixed2_orb_cpt
238     __kmpc_atomic_fixed2_orl
239     __kmpc_atomic_fixed2_orl_cpt
240     __kmpc_atomic_fixed2_rd
241     __kmpc_atomic_fixed2_shl
242     __kmpc_atomic_fixed2_shl_cpt
243     __kmpc_atomic_fixed2_shl_cpt_rev
244     __kmpc_atomic_fixed2_shl_rev
245     __kmpc_atomic_fixed2_shr
246     __kmpc_atomic_fixed2_shr_cpt
247     __kmpc_atomic_fixed2_shr_cpt_rev
248     __kmpc_atomic_fixed2_shr_rev
249     __kmpc_atomic_fixed2_sub
250     __kmpc_atomic_fixed2_sub_cpt
251     __kmpc_atomic_fixed2_sub_cpt_rev
252     __kmpc_atomic_fixed2_sub_fp
253     __kmpc_atomic_fixed2_sub_rev
254     __kmpc_atomic_fixed2_swp
255     __kmpc_atomic_fixed2_wr
256     __kmpc_atomic_fixed2_xor
257     __kmpc_atomic_fixed2_xor_cpt
258     __kmpc_atomic_fixed2u_add_fp
259     __kmpc_atomic_fixed2u_sub_fp
260     __kmpc_atomic_fixed2u_mul_fp
261     __kmpc_atomic_fixed2u_div
262     __kmpc_atomic_fixed2u_div_cpt
263     __kmpc_atomic_fixed2u_div_cpt_rev
264     __kmpc_atomic_fixed2u_div_fp
265     __kmpc_atomic_fixed2u_div_rev
266     __kmpc_atomic_fixed2u_shr
267     __kmpc_atomic_fixed2u_shr_cpt
268     __kmpc_atomic_fixed2u_shr_cpt_rev
269     __kmpc_atomic_fixed2u_shr_rev
270     __kmpc_atomic_fixed4_add
271     __kmpc_atomic_fixed4_add_cpt
272     __kmpc_atomic_fixed4_add_fp
273     __kmpc_atomic_fixed4_andb
274     __kmpc_atomic_fixed4_andb_cpt
275     __kmpc_atomic_fixed4_andl
276     __kmpc_atomic_fixed4_andl_cpt
277     __kmpc_atomic_fixed4_div
278     __kmpc_atomic_fixed4_div_cpt
279     __kmpc_atomic_fixed4_div_cpt_rev
280     __kmpc_atomic_fixed4_div_float8
281     __kmpc_atomic_fixed4_div_fp
282     __kmpc_atomic_fixed4_div_rev
283     __kmpc_atomic_fixed4_eqv
284     __kmpc_atomic_fixed4_eqv_cpt
285     __kmpc_atomic_fixed4_max
286     __kmpc_atomic_fixed4_max_cpt
287     __kmpc_atomic_fixed4_min
288     __kmpc_atomic_fixed4_min_cpt
289     __kmpc_atomic_fixed4_mul
290     __kmpc_atomic_fixed4_mul_cpt
291     __kmpc_atomic_fixed4_mul_float8
292     __kmpc_atomic_fixed4_mul_fp
293     __kmpc_atomic_fixed4_neqv
294     __kmpc_atomic_fixed4_neqv_cpt
295     __kmpc_atomic_fixed4_orb
296     __kmpc_atomic_fixed4_orb_cpt
297     __kmpc_atomic_fixed4_orl
298     __kmpc_atomic_fixed4_orl_cpt
299     __kmpc_atomic_fixed4_rd
300     __kmpc_atomic_fixed4_shl
301     __kmpc_atomic_fixed4_shl_cpt
302     __kmpc_atomic_fixed4_shl_cpt_rev
303     __kmpc_atomic_fixed4_shl_rev
304     __kmpc_atomic_fixed4_shr
305     __kmpc_atomic_fixed4_shr_cpt
306     __kmpc_atomic_fixed4_shr_cpt_rev
307     __kmpc_atomic_fixed4_shr_rev
308     __kmpc_atomic_fixed4_sub
309     __kmpc_atomic_fixed4_sub_cpt
310     __kmpc_atomic_fixed4_sub_cpt_rev
311     __kmpc_atomic_fixed4_sub_fp
312     __kmpc_atomic_fixed4_sub_rev
313     __kmpc_atomic_fixed4_swp
314     __kmpc_atomic_fixed4_wr
315     __kmpc_atomic_fixed4_xor
316     __kmpc_atomic_fixed4_xor_cpt
317     __kmpc_atomic_fixed4u_add_fp
318     __kmpc_atomic_fixed4u_sub_fp
319     __kmpc_atomic_fixed4u_mul_fp
320     __kmpc_atomic_fixed4u_div
321     __kmpc_atomic_fixed4u_div_cpt
322     __kmpc_atomic_fixed4u_div_cpt_rev
323     __kmpc_atomic_fixed4u_div_fp
324     __kmpc_atomic_fixed4u_div_rev
325     __kmpc_atomic_fixed4u_shr
326     __kmpc_atomic_fixed4u_shr_cpt
327     __kmpc_atomic_fixed4u_shr_cpt_rev
328     __kmpc_atomic_fixed4u_shr_rev
329     __kmpc_atomic_fixed8_add
330     __kmpc_atomic_fixed8_add_cpt
331     __kmpc_atomic_fixed8_add_fp
332     __kmpc_atomic_fixed8_andb
333     __kmpc_atomic_fixed8_andb_cpt
334     __kmpc_atomic_fixed8_andl
335     __kmpc_atomic_fixed8_andl_cpt
336     __kmpc_atomic_fixed8_div
337     __kmpc_atomic_fixed8_div_cpt
338     __kmpc_atomic_fixed8_div_cpt_rev
339     __kmpc_atomic_fixed8_div_float8
340     __kmpc_atomic_fixed8_div_fp
341     __kmpc_atomic_fixed8_div_rev
342     __kmpc_atomic_fixed8_eqv
343     __kmpc_atomic_fixed8_eqv_cpt
344     __kmpc_atomic_fixed8_max
345     __kmpc_atomic_fixed8_max_cpt
346     __kmpc_atomic_fixed8_min
347     __kmpc_atomic_fixed8_min_cpt
348     __kmpc_atomic_fixed8_mul
349     __kmpc_atomic_fixed8_mul_cpt
350     __kmpc_atomic_fixed8_mul_float8
351     __kmpc_atomic_fixed8_mul_fp
352     __kmpc_atomic_fixed8_neqv
353     __kmpc_atomic_fixed8_neqv_cpt
354     __kmpc_atomic_fixed8_orb
355     __kmpc_atomic_fixed8_orb_cpt
356     __kmpc_atomic_fixed8_orl
357     __kmpc_atomic_fixed8_orl_cpt
358     __kmpc_atomic_fixed8_rd
359     __kmpc_atomic_fixed8_shl
360     __kmpc_atomic_fixed8_shl_cpt
361     __kmpc_atomic_fixed8_shl_cpt_rev
362     __kmpc_atomic_fixed8_shl_rev
363     __kmpc_atomic_fixed8_shr
364     __kmpc_atomic_fixed8_shr_cpt
365     __kmpc_atomic_fixed8_shr_cpt_rev
366     __kmpc_atomic_fixed8_shr_rev
367     __kmpc_atomic_fixed8_sub
368     __kmpc_atomic_fixed8_sub_cpt
369     __kmpc_atomic_fixed8_sub_cpt_rev
370     __kmpc_atomic_fixed8_sub_fp
371     __kmpc_atomic_fixed8_sub_rev
372     __kmpc_atomic_fixed8_swp
373     __kmpc_atomic_fixed8_wr
374     __kmpc_atomic_fixed8_xor
375     __kmpc_atomic_fixed8_xor_cpt
376     __kmpc_atomic_fixed8u_add_fp
377     __kmpc_atomic_fixed8u_sub_fp
378     __kmpc_atomic_fixed8u_mul_fp
379     __kmpc_atomic_fixed8u_div
380     __kmpc_atomic_fixed8u_div_cpt
381     __kmpc_atomic_fixed8u_div_cpt_rev
382     __kmpc_atomic_fixed8u_div_fp
383     __kmpc_atomic_fixed8u_div_rev
384     __kmpc_atomic_fixed8u_shr
385     __kmpc_atomic_fixed8u_shr_cpt
386     __kmpc_atomic_fixed8u_shr_cpt_rev
387     __kmpc_atomic_fixed8u_shr_rev
388 @endcode
389 
390 Functions for floating point
391 ----------------------------
392 There are versions here for floating point numbers of size 4, 8, 10 and 16
393 bytes. (Ten byte floats are used by X87, but are now rare).
394 @code
395     __kmpc_atomic_float4_add
396     __kmpc_atomic_float4_add_cpt
397     __kmpc_atomic_float4_add_float8
398     __kmpc_atomic_float4_add_fp
399     __kmpc_atomic_float4_div
400     __kmpc_atomic_float4_div_cpt
401     __kmpc_atomic_float4_div_cpt_rev
402     __kmpc_atomic_float4_div_float8
403     __kmpc_atomic_float4_div_fp
404     __kmpc_atomic_float4_div_rev
405     __kmpc_atomic_float4_max
406     __kmpc_atomic_float4_max_cpt
407     __kmpc_atomic_float4_min
408     __kmpc_atomic_float4_min_cpt
409     __kmpc_atomic_float4_mul
410     __kmpc_atomic_float4_mul_cpt
411     __kmpc_atomic_float4_mul_float8
412     __kmpc_atomic_float4_mul_fp
413     __kmpc_atomic_float4_rd
414     __kmpc_atomic_float4_sub
415     __kmpc_atomic_float4_sub_cpt
416     __kmpc_atomic_float4_sub_cpt_rev
417     __kmpc_atomic_float4_sub_float8
418     __kmpc_atomic_float4_sub_fp
419     __kmpc_atomic_float4_sub_rev
420     __kmpc_atomic_float4_swp
421     __kmpc_atomic_float4_wr
422     __kmpc_atomic_float8_add
423     __kmpc_atomic_float8_add_cpt
424     __kmpc_atomic_float8_add_fp
425     __kmpc_atomic_float8_div
426     __kmpc_atomic_float8_div_cpt
427     __kmpc_atomic_float8_div_cpt_rev
428     __kmpc_atomic_float8_div_fp
429     __kmpc_atomic_float8_div_rev
430     __kmpc_atomic_float8_max
431     __kmpc_atomic_float8_max_cpt
432     __kmpc_atomic_float8_min
433     __kmpc_atomic_float8_min_cpt
434     __kmpc_atomic_float8_mul
435     __kmpc_atomic_float8_mul_cpt
436     __kmpc_atomic_float8_mul_fp
437     __kmpc_atomic_float8_rd
438     __kmpc_atomic_float8_sub
439     __kmpc_atomic_float8_sub_cpt
440     __kmpc_atomic_float8_sub_cpt_rev
441     __kmpc_atomic_float8_sub_fp
442     __kmpc_atomic_float8_sub_rev
443     __kmpc_atomic_float8_swp
444     __kmpc_atomic_float8_wr
445     __kmpc_atomic_float10_add
446     __kmpc_atomic_float10_add_cpt
447     __kmpc_atomic_float10_add_fp
448     __kmpc_atomic_float10_div
449     __kmpc_atomic_float10_div_cpt
450     __kmpc_atomic_float10_div_cpt_rev
451     __kmpc_atomic_float10_div_fp
452     __kmpc_atomic_float10_div_rev
453     __kmpc_atomic_float10_mul
454     __kmpc_atomic_float10_mul_cpt
455     __kmpc_atomic_float10_mul_fp
456     __kmpc_atomic_float10_rd
457     __kmpc_atomic_float10_sub
458     __kmpc_atomic_float10_sub_cpt
459     __kmpc_atomic_float10_sub_cpt_rev
460     __kmpc_atomic_float10_sub_fp
461     __kmpc_atomic_float10_sub_rev
462     __kmpc_atomic_float10_swp
463     __kmpc_atomic_float10_wr
464     __kmpc_atomic_float16_add
465     __kmpc_atomic_float16_add_cpt
466     __kmpc_atomic_float16_div
467     __kmpc_atomic_float16_div_cpt
468     __kmpc_atomic_float16_div_cpt_rev
469     __kmpc_atomic_float16_div_rev
470     __kmpc_atomic_float16_max
471     __kmpc_atomic_float16_max_cpt
472     __kmpc_atomic_float16_min
473     __kmpc_atomic_float16_min_cpt
474     __kmpc_atomic_float16_mul
475     __kmpc_atomic_float16_mul_cpt
476     __kmpc_atomic_float16_rd
477     __kmpc_atomic_float16_sub
478     __kmpc_atomic_float16_sub_cpt
479     __kmpc_atomic_float16_sub_cpt_rev
480     __kmpc_atomic_float16_sub_rev
481     __kmpc_atomic_float16_swp
482     __kmpc_atomic_float16_wr
483 @endcode
484 
485 Functions for Complex types
486 ---------------------------
487 Functions for complex types whose component floating point variables are of size
488 4,8,10 or 16 bytes. The names here are based on the size of the component float,
489 *not* the size of the complex type. So `__kmpc_atomc_cmplx8_add` is an operation
490 on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
491 
492 @code
493     __kmpc_atomic_cmplx4_add
494     __kmpc_atomic_cmplx4_add_cmplx8
495     __kmpc_atomic_cmplx4_add_cpt
496     __kmpc_atomic_cmplx4_div
497     __kmpc_atomic_cmplx4_div_cmplx8
498     __kmpc_atomic_cmplx4_div_cpt
499     __kmpc_atomic_cmplx4_div_cpt_rev
500     __kmpc_atomic_cmplx4_div_rev
501     __kmpc_atomic_cmplx4_mul
502     __kmpc_atomic_cmplx4_mul_cmplx8
503     __kmpc_atomic_cmplx4_mul_cpt
504     __kmpc_atomic_cmplx4_rd
505     __kmpc_atomic_cmplx4_sub
506     __kmpc_atomic_cmplx4_sub_cmplx8
507     __kmpc_atomic_cmplx4_sub_cpt
508     __kmpc_atomic_cmplx4_sub_cpt_rev
509     __kmpc_atomic_cmplx4_sub_rev
510     __kmpc_atomic_cmplx4_swp
511     __kmpc_atomic_cmplx4_wr
512     __kmpc_atomic_cmplx8_add
513     __kmpc_atomic_cmplx8_add_cpt
514     __kmpc_atomic_cmplx8_div
515     __kmpc_atomic_cmplx8_div_cpt
516     __kmpc_atomic_cmplx8_div_cpt_rev
517     __kmpc_atomic_cmplx8_div_rev
518     __kmpc_atomic_cmplx8_mul
519     __kmpc_atomic_cmplx8_mul_cpt
520     __kmpc_atomic_cmplx8_rd
521     __kmpc_atomic_cmplx8_sub
522     __kmpc_atomic_cmplx8_sub_cpt
523     __kmpc_atomic_cmplx8_sub_cpt_rev
524     __kmpc_atomic_cmplx8_sub_rev
525     __kmpc_atomic_cmplx8_swp
526     __kmpc_atomic_cmplx8_wr
527     __kmpc_atomic_cmplx10_add
528     __kmpc_atomic_cmplx10_add_cpt
529     __kmpc_atomic_cmplx10_div
530     __kmpc_atomic_cmplx10_div_cpt
531     __kmpc_atomic_cmplx10_div_cpt_rev
532     __kmpc_atomic_cmplx10_div_rev
533     __kmpc_atomic_cmplx10_mul
534     __kmpc_atomic_cmplx10_mul_cpt
535     __kmpc_atomic_cmplx10_rd
536     __kmpc_atomic_cmplx10_sub
537     __kmpc_atomic_cmplx10_sub_cpt
538     __kmpc_atomic_cmplx10_sub_cpt_rev
539     __kmpc_atomic_cmplx10_sub_rev
540     __kmpc_atomic_cmplx10_swp
541     __kmpc_atomic_cmplx10_wr
542     __kmpc_atomic_cmplx16_add
543     __kmpc_atomic_cmplx16_add_cpt
544     __kmpc_atomic_cmplx16_div
545     __kmpc_atomic_cmplx16_div_cpt
546     __kmpc_atomic_cmplx16_div_cpt_rev
547     __kmpc_atomic_cmplx16_div_rev
548     __kmpc_atomic_cmplx16_mul
549     __kmpc_atomic_cmplx16_mul_cpt
550     __kmpc_atomic_cmplx16_rd
551     __kmpc_atomic_cmplx16_sub
552     __kmpc_atomic_cmplx16_sub_cpt
553     __kmpc_atomic_cmplx16_sub_cpt_rev
554     __kmpc_atomic_cmplx16_swp
555     __kmpc_atomic_cmplx16_wr
556 @endcode
557 */
558 
559 /*!
560 @ingroup ATOMIC_OPS
561 @{
562 */
563 
564 /*
565  * Global vars
566  */
567 
568 #ifndef KMP_GOMP_COMPAT
569 int __kmp_atomic_mode = 1; // Intel perf
570 #else
571 int __kmp_atomic_mode = 2; // GOMP compatibility
572 #endif /* KMP_GOMP_COMPAT */
573 
574 KMP_ALIGN(128)
575 
576 // Control access to all user coded atomics in Gnu compat mode
577 kmp_atomic_lock_t __kmp_atomic_lock;
578 // Control access to all user coded atomics for 1-byte fixed data types
579 kmp_atomic_lock_t __kmp_atomic_lock_1i;
580 // Control access to all user coded atomics for 2-byte fixed data types
581 kmp_atomic_lock_t __kmp_atomic_lock_2i;
582 // Control access to all user coded atomics for 4-byte fixed data types
583 kmp_atomic_lock_t __kmp_atomic_lock_4i;
584 // Control access to all user coded atomics for kmp_real32 data type
585 kmp_atomic_lock_t __kmp_atomic_lock_4r;
586 // Control access to all user coded atomics for 8-byte fixed data types
587 kmp_atomic_lock_t __kmp_atomic_lock_8i;
588 // Control access to all user coded atomics for kmp_real64 data type
589 kmp_atomic_lock_t __kmp_atomic_lock_8r;
590 // Control access to all user coded atomics for complex byte data type
591 kmp_atomic_lock_t __kmp_atomic_lock_8c;
592 // Control access to all user coded atomics for long double data type
593 kmp_atomic_lock_t __kmp_atomic_lock_10r;
594 // Control access to all user coded atomics for _Quad data type
595 kmp_atomic_lock_t __kmp_atomic_lock_16r;
596 // Control access to all user coded atomics for double complex data type
597 kmp_atomic_lock_t __kmp_atomic_lock_16c;
598 // Control access to all user coded atomics for long double complex type
599 kmp_atomic_lock_t __kmp_atomic_lock_20c;
600 // Control access to all user coded atomics for _Quad complex data type
601 kmp_atomic_lock_t __kmp_atomic_lock_32c;
602 
603 /* 2007-03-02:
604    Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
605    on *_32 and *_32e. This is just a temporary workaround for the problem. It
606    seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
607    in assembler language. */
608 #define KMP_ATOMIC_VOLATILE volatile
609 
610 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
611 
612 static inline void operator+=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613   lhs.q += rhs.q;
614 };
615 static inline void operator-=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616   lhs.q -= rhs.q;
617 };
618 static inline void operator*=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619   lhs.q *= rhs.q;
620 };
621 static inline void operator/=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622   lhs.q /= rhs.q;
623 };
624 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625   return lhs.q < rhs.q;
626 }
627 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
628   return lhs.q > rhs.q;
629 }
630 
631 static inline void operator+=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632   lhs.q += rhs.q;
633 };
634 static inline void operator-=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635   lhs.q -= rhs.q;
636 };
637 static inline void operator*=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638   lhs.q *= rhs.q;
639 };
640 static inline void operator/=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641   lhs.q /= rhs.q;
642 };
643 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644   return lhs.q < rhs.q;
645 }
646 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
647   return lhs.q > rhs.q;
648 }
649 
650 static inline void operator+=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
651   lhs.q += rhs.q;
652 };
653 static inline void operator-=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
654   lhs.q -= rhs.q;
655 };
656 static inline void operator*=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
657   lhs.q *= rhs.q;
658 };
659 static inline void operator/=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
660   lhs.q /= rhs.q;
661 };
662 
663 static inline void operator+=(kmp_cmplx128_a16_t &lhs,
664                               kmp_cmplx128_a16_t &rhs) {
665   lhs.q += rhs.q;
666 };
667 static inline void operator-=(kmp_cmplx128_a16_t &lhs,
668                               kmp_cmplx128_a16_t &rhs) {
669   lhs.q -= rhs.q;
670 };
671 static inline void operator*=(kmp_cmplx128_a16_t &lhs,
672                               kmp_cmplx128_a16_t &rhs) {
673   lhs.q *= rhs.q;
674 };
675 static inline void operator/=(kmp_cmplx128_a16_t &lhs,
676                               kmp_cmplx128_a16_t &rhs) {
677   lhs.q /= rhs.q;
678 };
679 
680 #endif
681 
682 // ATOMIC implementation routines -----------------------------------------
683 // One routine for each operation and operand type.
684 // All routines declarations looks like
685 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
686 
687 #define KMP_CHECK_GTID                                                         \
688   if (gtid == KMP_GTID_UNKNOWN) {                                              \
689     gtid = __kmp_entry_gtid();                                                 \
690   } // check and get gtid when needed
691 
692 // Beginning of a definition (provides name, parameters, gebug trace)
693 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
694 //     fixed)
695 //     OP_ID   - operation identifier (add, sub, mul, ...)
696 //     TYPE    - operands' type
697 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE)                           \
698   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
699                                              TYPE *lhs, TYPE rhs) {            \
700     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
701     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
702 
703 // ------------------------------------------------------------------------
704 // Lock variables used for critical sections for various size operands
705 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
706 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
707 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
708 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
709 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
710 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
711 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
712 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
713 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
714 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
715 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
716 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
717 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
718 
719 // ------------------------------------------------------------------------
720 // Operation on *lhs, rhs bound by critical section
721 //     OP     - operator (it's supposed to contain an assignment)
722 //     LCK_ID - lock identifier
723 // Note: don't check gtid as it should always be valid
724 // 1, 2-byte - expect valid parameter, other - check before this macro
725 #define OP_CRITICAL(OP, LCK_ID)                                                \
726   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
727                                                                                \
728   (*lhs) OP(rhs);                                                              \
729                                                                                \
730   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
731 
732 // ------------------------------------------------------------------------
733 // For GNU compatibility, we may need to use a critical section,
734 // even though it is not required by the ISA.
735 //
736 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
737 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
738 // critical section.  On Intel(R) 64, all atomic operations are done with fetch
739 // and add or compare and exchange.  Therefore, the FLAG parameter to this
740 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
741 // require a critical section, where we predict that they will be implemented
742 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
743 //
744 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
745 // the FLAG parameter should always be 1.  If we know that we will be using
746 // a critical section, then we want to make certain that we use the generic
747 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
748 // locks that are specialized based upon the size or type of the data.
749 //
750 // If FLAG is 0, then we are relying on dead code elimination by the build
751 // compiler to get rid of the useless block of code, and save a needless
752 // branch at runtime.
753 
754 #ifdef KMP_GOMP_COMPAT
755 #define OP_GOMP_CRITICAL(OP, FLAG)                                             \
756   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
757     KMP_CHECK_GTID;                                                            \
758     OP_CRITICAL(OP, 0);                                                        \
759     return;                                                                    \
760   }
761 #else
762 #define OP_GOMP_CRITICAL(OP, FLAG)
763 #endif /* KMP_GOMP_COMPAT */
764 
765 #if KMP_MIC
766 #define KMP_DO_PAUSE _mm_delay_32(1)
767 #else
768 #define KMP_DO_PAUSE KMP_CPU_PAUSE()
769 #endif /* KMP_MIC */
770 
771 // ------------------------------------------------------------------------
772 // Operation on *lhs, rhs using "compare_and_store" routine
773 //     TYPE    - operands' type
774 //     BITS    - size in bits, used to distinguish low level calls
775 //     OP      - operator
776 #define OP_CMPXCHG(TYPE, BITS, OP)                                             \
777   {                                                                            \
778     TYPE old_value, new_value;                                                 \
779     old_value = *(TYPE volatile *)lhs;                                         \
780     new_value = old_value OP rhs;                                              \
781     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
782         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
783         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
784       KMP_DO_PAUSE;                                                            \
785                                                                                \
786       old_value = *(TYPE volatile *)lhs;                                       \
787       new_value = old_value OP rhs;                                            \
788     }                                                                          \
789   }
790 
791 #if USE_CMPXCHG_FIX
792 // 2007-06-25:
793 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
794 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
795 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
796 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
797 // the workaround.
798 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                  \
799   {                                                                            \
800     struct _sss {                                                              \
801       TYPE cmp;                                                                \
802       kmp_int##BITS *vvv;                                                      \
803     };                                                                         \
804     struct _sss old_value, new_value;                                          \
805     old_value.vvv = (kmp_int##BITS *)&old_value.cmp;                           \
806     new_value.vvv = (kmp_int##BITS *)&new_value.cmp;                           \
807     *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                           \
808     new_value.cmp = old_value.cmp OP rhs;                                      \
809     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
810         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv,   \
811         *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) {                      \
812       KMP_DO_PAUSE;                                                            \
813                                                                                \
814       *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                         \
815       new_value.cmp = old_value.cmp OP rhs;                                    \
816     }                                                                          \
817   }
818 // end of the first part of the workaround for C78287
819 #endif // USE_CMPXCHG_FIX
820 
821 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
822 
823 // ------------------------------------------------------------------------
824 // X86 or X86_64: no alignment problems ====================================
825 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
826                          GOMP_FLAG)                                            \
827   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
828   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
829   /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
830   KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                        \
831   }
832 // -------------------------------------------------------------------------
833 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
834                        GOMP_FLAG)                                              \
835   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
836   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
837   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
838   }
839 #if USE_CMPXCHG_FIX
840 // -------------------------------------------------------------------------
841 // workaround for C78287 (complex(kind=4) data type)
842 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
843                                   MASK, GOMP_FLAG)                             \
844   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
845   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
846   OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
847   }
848 // end of the second part of the workaround for C78287
849 #endif
850 
851 #else
852 // -------------------------------------------------------------------------
853 // Code for other architectures that don't handle unaligned accesses.
854 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
855                          GOMP_FLAG)                                            \
856   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
857   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
858   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
859     /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */          \
860     KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                      \
861   } else {                                                                     \
862     KMP_CHECK_GTID;                                                            \
863     OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
864   }                                                                            \
865   }
866 // -------------------------------------------------------------------------
867 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
868                        GOMP_FLAG)                                              \
869   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
870   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
871   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
872     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
873   } else {                                                                     \
874     KMP_CHECK_GTID;                                                            \
875     OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
876   }                                                                            \
877   }
878 #if USE_CMPXCHG_FIX
879 // -------------------------------------------------------------------------
880 // workaround for C78287 (complex(kind=4) data type)
881 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
882                                   MASK, GOMP_FLAG)                             \
883   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
884   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
885   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
886     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
887   } else {                                                                     \
888     KMP_CHECK_GTID;                                                            \
889     OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
890   }                                                                            \
891   }
892 // end of the second part of the workaround for C78287
893 #endif // USE_CMPXCHG_FIX
894 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
895 
896 // Routines for ATOMIC 4-byte operands addition and subtraction
897 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
898                  0) // __kmpc_atomic_fixed4_add
899 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
900                  0) // __kmpc_atomic_fixed4_sub
901 
902 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
903                KMP_ARCH_X86) // __kmpc_atomic_float4_add
904 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
905                KMP_ARCH_X86) // __kmpc_atomic_float4_sub
906 
907 // Routines for ATOMIC 8-byte operands addition and subtraction
908 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
909                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
910 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
911                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
912 
913 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
914                KMP_ARCH_X86) // __kmpc_atomic_float8_add
915 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
916                KMP_ARCH_X86) // __kmpc_atomic_float8_sub
917 
918 // ------------------------------------------------------------------------
919 // Entries definition for integer operands
920 //     TYPE_ID - operands type and size (fixed4, float4)
921 //     OP_ID   - operation identifier (add, sub, mul, ...)
922 //     TYPE    - operand type
923 //     BITS    - size in bits, used to distinguish low level calls
924 //     OP      - operator (used in critical section)
925 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
926 //     MASK    - used for alignment check
927 
928 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,MASK,GOMP_FLAG
929 // ------------------------------------------------------------------------
930 // Routines for ATOMIC integer operands, other operators
931 // ------------------------------------------------------------------------
932 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
933 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
934                KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
935 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
936                0) // __kmpc_atomic_fixed1_andb
937 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
938                KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
939 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
940                KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
941 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
942                KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
943 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
944                0) // __kmpc_atomic_fixed1_orb
945 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
946                KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
947 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
948                KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
949 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
950                KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
951 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
952                KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
953 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
954                0) // __kmpc_atomic_fixed1_xor
955 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
956                KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
957 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
958                0) // __kmpc_atomic_fixed2_andb
959 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
960                KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
961 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
962                KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
963 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
964                KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
965 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
966                0) // __kmpc_atomic_fixed2_orb
967 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
968                KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
969 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
970                KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
971 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
972                KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
973 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
974                KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
975 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
976                0) // __kmpc_atomic_fixed2_xor
977 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
978                0) // __kmpc_atomic_fixed4_andb
979 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
980                KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
981 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
982                KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
983 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
984                KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
985 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
986                0) // __kmpc_atomic_fixed4_orb
987 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
988                KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
989 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
990                KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
991 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
992                KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
993 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
994                0) // __kmpc_atomic_fixed4_xor
995 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
996                KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
997 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
998                KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
999 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
1000                KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
1001 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1002                KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1003 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1004                KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1005 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1006                KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1007 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1008                KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1009 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1010                KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1011 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1012                KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1013 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1014                KMP_ARCH_X86) // __kmpc_atomic_float4_div
1015 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1016                KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1017 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1018                KMP_ARCH_X86) // __kmpc_atomic_float8_div
1019 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1020                KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1021 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
1022 
1023 /* ------------------------------------------------------------------------ */
1024 /* Routines for C/C++ Reduction operators && and ||                         */
1025 
1026 // ------------------------------------------------------------------------
1027 // Need separate macros for &&, || because there is no combined assignment
1028 //   TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1029 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)             \
1030   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1031   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1032   OP_CRITICAL(= *lhs OP, LCK_ID)                                               \
1033   }
1034 
1035 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1036 
1037 // ------------------------------------------------------------------------
1038 // X86 or X86_64: no alignment problems ===================================
1039 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1040   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1041   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1042   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1043   }
1044 
1045 #else
1046 // ------------------------------------------------------------------------
1047 // Code for other architectures that don't handle unaligned accesses.
1048 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1049   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1050   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1051   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1052     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1053   } else {                                                                     \
1054     KMP_CHECK_GTID;                                                            \
1055     OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */              \
1056   }                                                                            \
1057   }
1058 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1059 
1060 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1061               KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1062 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1063               KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1064 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1065               KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1066 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1067               KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1068 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1069               0) // __kmpc_atomic_fixed4_andl
1070 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1071               0) // __kmpc_atomic_fixed4_orl
1072 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1073               KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1074 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1075               KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1076 
1077 /* ------------------------------------------------------------------------- */
1078 /* Routines for Fortran operators that matched no one in C:                  */
1079 /* MAX, MIN, .EQV., .NEQV.                                                   */
1080 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}           */
1081 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}  */
1082 
1083 // -------------------------------------------------------------------------
1084 // MIN and MAX need separate macros
1085 // OP - operator to check if we need any actions?
1086 #define MIN_MAX_CRITSECT(OP, LCK_ID)                                           \
1087   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1088                                                                                \
1089   if (*lhs OP rhs) { /* still need actions? */                                 \
1090     *lhs = rhs;                                                                \
1091   }                                                                            \
1092   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1093 
1094 // -------------------------------------------------------------------------
1095 #ifdef KMP_GOMP_COMPAT
1096 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)                                        \
1097   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1098     KMP_CHECK_GTID;                                                            \
1099     MIN_MAX_CRITSECT(OP, 0);                                                   \
1100     return;                                                                    \
1101   }
1102 #else
1103 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1104 #endif /* KMP_GOMP_COMPAT */
1105 
1106 // -------------------------------------------------------------------------
1107 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                        \
1108   {                                                                            \
1109     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1110     TYPE old_value;                                                            \
1111     temp_val = *lhs;                                                           \
1112     old_value = temp_val;                                                      \
1113     while (old_value OP rhs && /* still need actions? */                       \
1114            !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1115                (kmp_int##BITS *)lhs,                                           \
1116                *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
1117                *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
1118       KMP_CPU_PAUSE();                                                         \
1119       temp_val = *lhs;                                                         \
1120       old_value = temp_val;                                                    \
1121     }                                                                          \
1122   }
1123 
1124 // -------------------------------------------------------------------------
1125 // 1-byte, 2-byte operands - use critical section
1126 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)          \
1127   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1128   if (*lhs OP rhs) { /* need actions? */                                       \
1129     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1130     MIN_MAX_CRITSECT(OP, LCK_ID)                                               \
1131   }                                                                            \
1132   }
1133 
1134 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1135 
1136 // -------------------------------------------------------------------------
1137 // X86 or X86_64: no alignment problems ====================================
1138 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1139                          GOMP_FLAG)                                            \
1140   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1141   if (*lhs OP rhs) {                                                           \
1142     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1143     MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                            \
1144   }                                                                            \
1145   }
1146 
1147 #else
1148 // -------------------------------------------------------------------------
1149 // Code for other architectures that don't handle unaligned accesses.
1150 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1151                          GOMP_FLAG)                                            \
1152   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1153   if (*lhs OP rhs) {                                                           \
1154     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1155     if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                    \
1156       MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */                    \
1157     } else {                                                                   \
1158       KMP_CHECK_GTID;                                                          \
1159       MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */                     \
1160     }                                                                          \
1161   }                                                                            \
1162   }
1163 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1164 
1165 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1166                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1167 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1168                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1169 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1170                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1171 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1172                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1173 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1174                  0) // __kmpc_atomic_fixed4_max
1175 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1176                  0) // __kmpc_atomic_fixed4_min
1177 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1178                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1179 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1180                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1181 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1182                  KMP_ARCH_X86) // __kmpc_atomic_float4_max
1183 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1184                  KMP_ARCH_X86) // __kmpc_atomic_float4_min
1185 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1186                  KMP_ARCH_X86) // __kmpc_atomic_float8_max
1187 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1188                  KMP_ARCH_X86) // __kmpc_atomic_float8_min
1189 #if KMP_HAVE_QUAD
1190 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1191                  1) // __kmpc_atomic_float16_max
1192 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1193                  1) // __kmpc_atomic_float16_min
1194 #if (KMP_ARCH_X86)
1195 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1196                  1) // __kmpc_atomic_float16_max_a16
1197 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1198                  1) // __kmpc_atomic_float16_min_a16
1199 #endif
1200 #endif
1201 // ------------------------------------------------------------------------
1202 // Need separate macros for .EQV. because of the need of complement (~)
1203 // OP ignored for critical sections, ^=~ used instead
1204 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1205   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1206   OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */                      \
1207   OP_CRITICAL(^= ~, LCK_ID) /* send assignment and complement */               \
1208   }
1209 
1210 // ------------------------------------------------------------------------
1211 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1212 // ------------------------------------------------------------------------
1213 // X86 or X86_64: no alignment problems ===================================
1214 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1215                         GOMP_FLAG)                                             \
1216   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1217   OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */                      \
1218   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1219   }
1220 // ------------------------------------------------------------------------
1221 #else
1222 // ------------------------------------------------------------------------
1223 // Code for other architectures that don't handle unaligned accesses.
1224 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1225                         GOMP_FLAG)                                             \
1226   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1227   OP_GOMP_CRITICAL(^= ~, GOMP_FLAG)                                            \
1228   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1229     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1230   } else {                                                                     \
1231     KMP_CHECK_GTID;                                                            \
1232     OP_CRITICAL(^= ~, LCK_ID) /* unaligned address - use critical */           \
1233   }                                                                            \
1234   }
1235 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1236 
1237 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1238                KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1239 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1240                KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1241 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1242                KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1243 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1244                KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1245 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1246                 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1247 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1248                 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1249 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1250                 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1251 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1252                 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1253 
1254 // ------------------------------------------------------------------------
1255 // Routines for Extended types: long double, _Quad, complex flavours (use
1256 // critical section)
1257 //     TYPE_ID, OP_ID, TYPE - detailed above
1258 //     OP      - operator
1259 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1260 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1261   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1262   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */                    \
1263   OP_CRITICAL(OP## =, LCK_ID) /* send assignment */                            \
1264   }
1265 
1266 /* ------------------------------------------------------------------------- */
1267 // routines for long double type
1268 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1269                 1) // __kmpc_atomic_float10_add
1270 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1271                 1) // __kmpc_atomic_float10_sub
1272 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1273                 1) // __kmpc_atomic_float10_mul
1274 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1275                 1) // __kmpc_atomic_float10_div
1276 #if KMP_HAVE_QUAD
1277 // routines for _Quad type
1278 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1279                 1) // __kmpc_atomic_float16_add
1280 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1281                 1) // __kmpc_atomic_float16_sub
1282 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1283                 1) // __kmpc_atomic_float16_mul
1284 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1285                 1) // __kmpc_atomic_float16_div
1286 #if (KMP_ARCH_X86)
1287 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1288                 1) // __kmpc_atomic_float16_add_a16
1289 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1290                 1) // __kmpc_atomic_float16_sub_a16
1291 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1292                 1) // __kmpc_atomic_float16_mul_a16
1293 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1294                 1) // __kmpc_atomic_float16_div_a16
1295 #endif
1296 #endif
1297 // routines for complex types
1298 
1299 #if USE_CMPXCHG_FIX
1300 // workaround for C78287 (complex(kind=4) data type)
1301 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1302                           1) // __kmpc_atomic_cmplx4_add
1303 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1304                           1) // __kmpc_atomic_cmplx4_sub
1305 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1306                           1) // __kmpc_atomic_cmplx4_mul
1307 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1308                           1) // __kmpc_atomic_cmplx4_div
1309 // end of the workaround for C78287
1310 #else
1311 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1312 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1313 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1314 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1315 #endif // USE_CMPXCHG_FIX
1316 
1317 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1318 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1319 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1320 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1321 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1322                 1) // __kmpc_atomic_cmplx10_add
1323 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1324                 1) // __kmpc_atomic_cmplx10_sub
1325 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1326                 1) // __kmpc_atomic_cmplx10_mul
1327 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1328                 1) // __kmpc_atomic_cmplx10_div
1329 #if KMP_HAVE_QUAD
1330 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1331                 1) // __kmpc_atomic_cmplx16_add
1332 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1333                 1) // __kmpc_atomic_cmplx16_sub
1334 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1335                 1) // __kmpc_atomic_cmplx16_mul
1336 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1337                 1) // __kmpc_atomic_cmplx16_div
1338 #if (KMP_ARCH_X86)
1339 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1340                 1) // __kmpc_atomic_cmplx16_add_a16
1341 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1342                 1) // __kmpc_atomic_cmplx16_sub_a16
1343 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1344                 1) // __kmpc_atomic_cmplx16_mul_a16
1345 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1346                 1) // __kmpc_atomic_cmplx16_div_a16
1347 #endif
1348 #endif
1349 
1350 #if OMP_40_ENABLED
1351 
1352 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1353 // Supported only on IA-32 architecture and Intel(R) 64
1354 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1355 
1356 // ------------------------------------------------------------------------
1357 // Operation on *lhs, rhs bound by critical section
1358 //     OP     - operator (it's supposed to contain an assignment)
1359 //     LCK_ID - lock identifier
1360 // Note: don't check gtid as it should always be valid
1361 // 1, 2-byte - expect valid parameter, other - check before this macro
1362 #define OP_CRITICAL_REV(OP, LCK_ID)                                            \
1363   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1364                                                                                \
1365   (*lhs) = (rhs)OP(*lhs);                                                      \
1366                                                                                \
1367   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1368 
1369 #ifdef KMP_GOMP_COMPAT
1370 #define OP_GOMP_CRITICAL_REV(OP, FLAG)                                         \
1371   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1372     KMP_CHECK_GTID;                                                            \
1373     OP_CRITICAL_REV(OP, 0);                                                    \
1374     return;                                                                    \
1375   }
1376 #else
1377 #define OP_GOMP_CRITICAL_REV(OP, FLAG)
1378 #endif /* KMP_GOMP_COMPAT */
1379 
1380 // Beginning of a definition (provides name, parameters, gebug trace)
1381 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1382 //     fixed)
1383 //     OP_ID   - operation identifier (add, sub, mul, ...)
1384 //     TYPE    - operands' type
1385 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
1386   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid,  \
1387                                                    TYPE *lhs, TYPE rhs) {      \
1388     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1389     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1390 
1391 // ------------------------------------------------------------------------
1392 // Operation on *lhs, rhs using "compare_and_store" routine
1393 //     TYPE    - operands' type
1394 //     BITS    - size in bits, used to distinguish low level calls
1395 //     OP      - operator
1396 // Note: temp_val introduced in order to force the compiler to read
1397 //       *lhs only once (w/o it the compiler reads *lhs twice)
1398 #define OP_CMPXCHG_REV(TYPE, BITS, OP)                                         \
1399   {                                                                            \
1400     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1401     TYPE old_value, new_value;                                                 \
1402     temp_val = *lhs;                                                           \
1403     old_value = temp_val;                                                      \
1404     new_value = rhs OP old_value;                                              \
1405     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1406         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
1407         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
1408       KMP_DO_PAUSE;                                                            \
1409                                                                                \
1410       temp_val = *lhs;                                                         \
1411       old_value = temp_val;                                                    \
1412       new_value = rhs OP old_value;                                            \
1413     }                                                                          \
1414   }
1415 
1416 // -------------------------------------------------------------------------
1417 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG)  \
1418   ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1419   OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG)                                          \
1420   OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1421   }
1422 
1423 // ------------------------------------------------------------------------
1424 // Entries definition for integer operands
1425 //     TYPE_ID - operands type and size (fixed4, float4)
1426 //     OP_ID   - operation identifier (add, sub, mul, ...)
1427 //     TYPE    - operand type
1428 //     BITS    - size in bits, used to distinguish low level calls
1429 //     OP      - operator (used in critical section)
1430 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1431 
1432 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,GOMP_FLAG
1433 // ------------------------------------------------------------------------
1434 // Routines for ATOMIC integer operands, other operators
1435 // ------------------------------------------------------------------------
1436 //                  TYPE_ID,OP_ID, TYPE,    BITS, OP, LCK_ID, GOMP_FLAG
1437 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1438                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1439 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1440                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1441 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1442                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1443 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1444                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1445 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1446                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1447 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1448                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1449 
1450 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1451                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1452 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1453                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1454 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1455                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1456 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1457                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1458 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1459                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1460 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1461                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1462 
1463 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1464                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1465 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1466                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1467 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1468                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1469 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1470                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1471 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1472                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1473 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1474                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1475 
1476 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1477                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1478 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1479                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1480 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1481                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1482 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1483                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1484 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1485                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1486 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1487                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1488 
1489 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1490                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1491 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1492                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1493 
1494 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1495                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1496 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1497                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1498 //                  TYPE_ID,OP_ID, TYPE,     BITS,OP,LCK_ID, GOMP_FLAG
1499 
1500 // ------------------------------------------------------------------------
1501 // Routines for Extended types: long double, _Quad, complex flavours (use
1502 // critical section)
1503 //     TYPE_ID, OP_ID, TYPE - detailed above
1504 //     OP      - operator
1505 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1506 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
1507   ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1508   OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG)                                          \
1509   OP_CRITICAL_REV(OP, LCK_ID)                                                  \
1510   }
1511 
1512 /* ------------------------------------------------------------------------- */
1513 // routines for long double type
1514 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1515                     1) // __kmpc_atomic_float10_sub_rev
1516 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1517                     1) // __kmpc_atomic_float10_div_rev
1518 #if KMP_HAVE_QUAD
1519 // routines for _Quad type
1520 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1521                     1) // __kmpc_atomic_float16_sub_rev
1522 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1523                     1) // __kmpc_atomic_float16_div_rev
1524 #if (KMP_ARCH_X86)
1525 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1526                     1) // __kmpc_atomic_float16_sub_a16_rev
1527 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1528                     1) // __kmpc_atomic_float16_div_a16_rev
1529 #endif
1530 #endif
1531 
1532 // routines for complex types
1533 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1534                     1) // __kmpc_atomic_cmplx4_sub_rev
1535 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1536                     1) // __kmpc_atomic_cmplx4_div_rev
1537 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1538                     1) // __kmpc_atomic_cmplx8_sub_rev
1539 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1540                     1) // __kmpc_atomic_cmplx8_div_rev
1541 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1542                     1) // __kmpc_atomic_cmplx10_sub_rev
1543 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1544                     1) // __kmpc_atomic_cmplx10_div_rev
1545 #if KMP_HAVE_QUAD
1546 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1547                     1) // __kmpc_atomic_cmplx16_sub_rev
1548 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1549                     1) // __kmpc_atomic_cmplx16_div_rev
1550 #if (KMP_ARCH_X86)
1551 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1552                     1) // __kmpc_atomic_cmplx16_sub_a16_rev
1553 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1554                     1) // __kmpc_atomic_cmplx16_div_a16_rev
1555 #endif
1556 #endif
1557 
1558 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1559 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1560 
1561 #endif // OMP_40_ENABLED
1562 
1563 /* ------------------------------------------------------------------------ */
1564 /* Routines for mixed types of LHS and RHS, when RHS is "larger"            */
1565 /* Note: in order to reduce the total number of types combinations          */
1566 /*       it is supposed that compiler converts RHS to longest floating type,*/
1567 /*       that is _Quad, before call to any of these routines                */
1568 /* Conversion to _Quad will be done by the compiler during calculation,     */
1569 /*    conversion back to TYPE - before the assignment, like:                */
1570 /*    *lhs = (TYPE)( (_Quad)(*lhs) OP rhs )                                 */
1571 /* Performance penalty expected because of SW emulation use                 */
1572 /* ------------------------------------------------------------------------ */
1573 
1574 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                \
1575   void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
1576       ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) {                       \
1577     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1578     KA_TRACE(100,                                                              \
1579              ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
1580               gtid));
1581 
1582 // -------------------------------------------------------------------------
1583 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID,  \
1584                            GOMP_FLAG)                                          \
1585   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1586   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */                    \
1587   OP_CRITICAL(OP## =, LCK_ID) /* send assignment */                            \
1588   }
1589 
1590 // -------------------------------------------------------------------------
1591 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1592 // -------------------------------------------------------------------------
1593 // X86 or X86_64: no alignment problems ====================================
1594 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1595                            LCK_ID, MASK, GOMP_FLAG)                            \
1596   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1597   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1598   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1599   }
1600 // -------------------------------------------------------------------------
1601 #else
1602 // ------------------------------------------------------------------------
1603 // Code for other architectures that don't handle unaligned accesses.
1604 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1605                            LCK_ID, MASK, GOMP_FLAG)                            \
1606   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1607   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1608   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1609     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1610   } else {                                                                     \
1611     KMP_CHECK_GTID;                                                            \
1612     OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
1613   }                                                                            \
1614   }
1615 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1616 
1617 // -------------------------------------------------------------------------
1618 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1619 // -------------------------------------------------------------------------
1620 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
1621                                RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
1622   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1623   OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG)                                          \
1624   OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1625   }
1626 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,      \
1627                                LCK_ID, GOMP_FLAG)                              \
1628   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1629   OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG)                                          \
1630   OP_CRITICAL_REV(OP, LCK_ID)                                                  \
1631   }
1632 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1633 
1634 // RHS=float8
1635 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1636                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1637 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1638                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1639 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1640                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1641 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1642                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1643 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1644                    0) // __kmpc_atomic_fixed4_mul_float8
1645 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1646                    0) // __kmpc_atomic_fixed4_div_float8
1647 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1648                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1649 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1650                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1651 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1652                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1653 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1654                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1655 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1656                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1657 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1658                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1659 
1660 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1661 // use them)
1662 #if KMP_HAVE_QUAD
1663 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1664                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1665 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1666                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1667 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1668                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1669 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1670                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1671 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1672                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1673 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1674                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1675 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1676                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1677 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1678                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1679 
1680 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1681                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1682 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1683                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1684 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1685                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1686 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1687                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1688 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1689                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1690 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1691                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1692 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1693                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1694 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1695                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1696 
1697 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1698                    0) // __kmpc_atomic_fixed4_add_fp
1699 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1700                    0) // __kmpc_atomic_fixed4u_add_fp
1701 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1702                    0) // __kmpc_atomic_fixed4_sub_fp
1703 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1704                    0) // __kmpc_atomic_fixed4u_sub_fp
1705 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1706                    0) // __kmpc_atomic_fixed4_mul_fp
1707 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1708                    0) // __kmpc_atomic_fixed4u_mul_fp
1709 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1710                    0) // __kmpc_atomic_fixed4_div_fp
1711 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1712                    0) // __kmpc_atomic_fixed4u_div_fp
1713 
1714 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1715                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1716 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1717                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1718 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1719                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1720 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1721                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1722 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1723                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1724 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1725                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1726 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1727                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1728 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1729                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1730 
1731 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1732                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1733 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1734                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1735 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1736                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1737 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1738                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1739 
1740 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1741                    KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1742 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1743                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1744 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1745                    KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1746 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1747                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1748 
1749 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1750                    1) // __kmpc_atomic_float10_add_fp
1751 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1752                    1) // __kmpc_atomic_float10_sub_fp
1753 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1754                    1) // __kmpc_atomic_float10_mul_fp
1755 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1756                    1) // __kmpc_atomic_float10_div_fp
1757 
1758 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1759 // Reverse operations
1760 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1761                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1762 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1763                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1764 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1765                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1766 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1767                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1768 
1769 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1770                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1771 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1772                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1773 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1774                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1775 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1776                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1777 
1778 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1779                        0) // __kmpc_atomic_fixed4_sub_rev_fp
1780 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1781                        0) // __kmpc_atomic_fixed4u_sub_rev_fp
1782 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1783                        0) // __kmpc_atomic_fixed4_div_rev_fp
1784 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1785                        0) // __kmpc_atomic_fixed4u_div_rev_fp
1786 
1787 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1788                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1789 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1790                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1791 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1792                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1793 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1794                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1795 
1796 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1797                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1798 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1799                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1800 
1801 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1802                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1803 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1804                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1805 
1806 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1807                        1) // __kmpc_atomic_float10_sub_rev_fp
1808 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1809                        1) // __kmpc_atomic_float10_div_rev_fp
1810 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1811 
1812 #endif
1813 
1814 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1815 // ------------------------------------------------------------------------
1816 // X86 or X86_64: no alignment problems ====================================
1817 #if USE_CMPXCHG_FIX
1818 // workaround for C78287 (complex(kind=4) data type)
1819 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1820                              LCK_ID, MASK, GOMP_FLAG)                          \
1821   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1822   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1823   OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
1824   }
1825 // end of the second part of the workaround for C78287
1826 #else
1827 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1828                              LCK_ID, MASK, GOMP_FLAG)                          \
1829   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1830   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1831   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1832   }
1833 #endif // USE_CMPXCHG_FIX
1834 #else
1835 // ------------------------------------------------------------------------
1836 // Code for other architectures that don't handle unaligned accesses.
1837 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1838                              LCK_ID, MASK, GOMP_FLAG)                          \
1839   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1840   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1841   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1842     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1843   } else {                                                                     \
1844     KMP_CHECK_GTID;                                                            \
1845     OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
1846   }                                                                            \
1847   }
1848 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1849 
1850 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1851                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1852 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1853                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1854 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1855                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1856 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1857                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1858 
1859 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1860 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1861 
1862 // ------------------------------------------------------------------------
1863 // Atomic READ routines
1864 
1865 // ------------------------------------------------------------------------
1866 // Beginning of a definition (provides name, parameters, gebug trace)
1867 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1868 //     fixed)
1869 //     OP_ID   - operation identifier (add, sub, mul, ...)
1870 //     TYPE    - operands' type
1871 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE)                      \
1872   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
1873                                              TYPE *loc) {                      \
1874     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1875     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1876 
1877 // ------------------------------------------------------------------------
1878 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1879 //     TYPE    - operands' type
1880 //     BITS    - size in bits, used to distinguish low level calls
1881 //     OP      - operator
1882 // Note: temp_val introduced in order to force the compiler to read
1883 //       *lhs only once (w/o it the compiler reads *lhs twice)
1884 // TODO: check if it is still necessary
1885 // Return old value regardless of the result of "compare & swap# operation
1886 #define OP_CMPXCHG_READ(TYPE, BITS, OP)                                        \
1887   {                                                                            \
1888     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1889     union f_i_union {                                                          \
1890       TYPE f_val;                                                              \
1891       kmp_int##BITS i_val;                                                     \
1892     };                                                                         \
1893     union f_i_union old_value;                                                 \
1894     temp_val = *loc;                                                           \
1895     old_value.f_val = temp_val;                                                \
1896     old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS(                         \
1897         (kmp_int##BITS *)loc,                                                  \
1898         *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val,                     \
1899         *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val);                    \
1900     new_value = old_value.f_val;                                               \
1901     return new_value;                                                          \
1902   }
1903 
1904 // -------------------------------------------------------------------------
1905 // Operation on *lhs, rhs bound by critical section
1906 //     OP     - operator (it's supposed to contain an assignment)
1907 //     LCK_ID - lock identifier
1908 // Note: don't check gtid as it should always be valid
1909 // 1, 2-byte - expect valid parameter, other - check before this macro
1910 #define OP_CRITICAL_READ(OP, LCK_ID)                                           \
1911   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1912                                                                                \
1913   new_value = (*loc);                                                          \
1914                                                                                \
1915   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1916 
1917 // -------------------------------------------------------------------------
1918 #ifdef KMP_GOMP_COMPAT
1919 #define OP_GOMP_CRITICAL_READ(OP, FLAG)                                        \
1920   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1921     KMP_CHECK_GTID;                                                            \
1922     OP_CRITICAL_READ(OP, 0);                                                   \
1923     return new_value;                                                          \
1924   }
1925 #else
1926 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1927 #endif /* KMP_GOMP_COMPAT */
1928 
1929 // -------------------------------------------------------------------------
1930 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
1931   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1932   TYPE new_value;                                                              \
1933   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
1934   new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0);                              \
1935   return new_value;                                                            \
1936   }
1937 // -------------------------------------------------------------------------
1938 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
1939   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1940   TYPE new_value;                                                              \
1941   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
1942   OP_CMPXCHG_READ(TYPE, BITS, OP)                                              \
1943   }
1944 // ------------------------------------------------------------------------
1945 // Routines for Extended types: long double, _Quad, complex flavours (use
1946 // critical section)
1947 //     TYPE_ID, OP_ID, TYPE - detailed above
1948 //     OP      - operator
1949 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1950 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
1951   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1952   TYPE new_value;                                                              \
1953   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */               \
1954   OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */                           \
1955   return new_value;                                                            \
1956   }
1957 
1958 // ------------------------------------------------------------------------
1959 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
1960 // value doesn't work.
1961 // Let's return the read value through the additional parameter.
1962 #if (KMP_OS_WINDOWS)
1963 
1964 #define OP_CRITICAL_READ_WRK(OP, LCK_ID)                                       \
1965   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1966                                                                                \
1967   (*out) = (*loc);                                                             \
1968                                                                                \
1969   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1970 // ------------------------------------------------------------------------
1971 #ifdef KMP_GOMP_COMPAT
1972 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)                                    \
1973   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1974     KMP_CHECK_GTID;                                                            \
1975     OP_CRITICAL_READ_WRK(OP, 0);                                               \
1976   }
1977 #else
1978 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
1979 #endif /* KMP_GOMP_COMPAT */
1980 // ------------------------------------------------------------------------
1981 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                            \
1982   void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
1983                                          TYPE *loc) {                          \
1984     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1985     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1986 
1987 // ------------------------------------------------------------------------
1988 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)  \
1989   ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                                  \
1990   OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */           \
1991   OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */                       \
1992   }
1993 
1994 #endif // KMP_OS_WINDOWS
1995 
1996 // ------------------------------------------------------------------------
1997 //                  TYPE_ID,OP_ID, TYPE,      OP, GOMP_FLAG
1998 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
1999 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
2000                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
2001 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2002                     KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2003 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2004                     KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2005 
2006 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2007 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2008                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2009 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2010                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2011 
2012 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2013                      1) // __kmpc_atomic_float10_rd
2014 #if KMP_HAVE_QUAD
2015 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2016                      1) // __kmpc_atomic_float16_rd
2017 #endif // KMP_HAVE_QUAD
2018 
2019 // Fix for CQ220361 on Windows* OS
2020 #if (KMP_OS_WINDOWS)
2021 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2022                          1) // __kmpc_atomic_cmplx4_rd
2023 #else
2024 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2025                      1) // __kmpc_atomic_cmplx4_rd
2026 #endif
2027 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2028                      1) // __kmpc_atomic_cmplx8_rd
2029 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2030                      1) // __kmpc_atomic_cmplx10_rd
2031 #if KMP_HAVE_QUAD
2032 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2033                      1) // __kmpc_atomic_cmplx16_rd
2034 #if (KMP_ARCH_X86)
2035 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2036                      1) // __kmpc_atomic_float16_a16_rd
2037 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2038                      1) // __kmpc_atomic_cmplx16_a16_rd
2039 #endif
2040 #endif
2041 
2042 // ------------------------------------------------------------------------
2043 // Atomic WRITE routines
2044 
2045 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)              \
2046   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2047   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2048   KMP_XCHG_FIXED##BITS(lhs, rhs);                                              \
2049   }
2050 // ------------------------------------------------------------------------
2051 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2052   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2053   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2054   KMP_XCHG_REAL##BITS(lhs, rhs);                                               \
2055   }
2056 
2057 // ------------------------------------------------------------------------
2058 // Operation on *lhs, rhs using "compare_and_store" routine
2059 //     TYPE    - operands' type
2060 //     BITS    - size in bits, used to distinguish low level calls
2061 //     OP      - operator
2062 // Note: temp_val introduced in order to force the compiler to read
2063 //       *lhs only once (w/o it the compiler reads *lhs twice)
2064 #define OP_CMPXCHG_WR(TYPE, BITS, OP)                                          \
2065   {                                                                            \
2066     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2067     TYPE old_value, new_value;                                                 \
2068     temp_val = *lhs;                                                           \
2069     old_value = temp_val;                                                      \
2070     new_value = rhs;                                                           \
2071     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2072         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2073         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2074       KMP_CPU_PAUSE();                                                         \
2075                                                                                \
2076       temp_val = *lhs;                                                         \
2077       old_value = temp_val;                                                    \
2078       new_value = rhs;                                                         \
2079     }                                                                          \
2080   }
2081 
2082 // -------------------------------------------------------------------------
2083 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2084   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2085   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2086   OP_CMPXCHG_WR(TYPE, BITS, OP)                                                \
2087   }
2088 
2089 // ------------------------------------------------------------------------
2090 // Routines for Extended types: long double, _Quad, complex flavours (use
2091 // critical section)
2092 //     TYPE_ID, OP_ID, TYPE - detailed above
2093 //     OP      - operator
2094 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2095 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)        \
2096   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2097   OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */                        \
2098   OP_CRITICAL(OP, LCK_ID) /* send assignment */                                \
2099   }
2100 // -------------------------------------------------------------------------
2101 
2102 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2103                KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2104 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2105                KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2106 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2107                KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2108 #if (KMP_ARCH_X86)
2109 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2110                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2111 #else
2112 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2113                KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2114 #endif
2115 
2116 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2117                      KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2118 #if (KMP_ARCH_X86)
2119 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2120                   KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2121 #else
2122 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2123                      KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2124 #endif
2125 
2126 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2127                    1) // __kmpc_atomic_float10_wr
2128 #if KMP_HAVE_QUAD
2129 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2130                    1) // __kmpc_atomic_float16_wr
2131 #endif
2132 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2133 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2134                    1) // __kmpc_atomic_cmplx8_wr
2135 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2136                    1) // __kmpc_atomic_cmplx10_wr
2137 #if KMP_HAVE_QUAD
2138 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2139                    1) // __kmpc_atomic_cmplx16_wr
2140 #if (KMP_ARCH_X86)
2141 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2142                    1) // __kmpc_atomic_float16_a16_wr
2143 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2144                    1) // __kmpc_atomic_cmplx16_a16_wr
2145 #endif
2146 #endif
2147 
2148 // ------------------------------------------------------------------------
2149 // Atomic CAPTURE routines
2150 
2151 // Beginning of a definition (provides name, parameters, gebug trace)
2152 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2153 //     fixed)
2154 //     OP_ID   - operation identifier (add, sub, mul, ...)
2155 //     TYPE    - operands' type
2156 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
2157   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
2158                                              TYPE *lhs, TYPE rhs, int flag) {  \
2159     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2160     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2161 
2162 // -------------------------------------------------------------------------
2163 // Operation on *lhs, rhs bound by critical section
2164 //     OP     - operator (it's supposed to contain an assignment)
2165 //     LCK_ID - lock identifier
2166 // Note: don't check gtid as it should always be valid
2167 // 1, 2-byte - expect valid parameter, other - check before this macro
2168 #define OP_CRITICAL_CPT(OP, LCK_ID)                                            \
2169   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2170                                                                                \
2171   if (flag) {                                                                  \
2172     (*lhs) OP rhs;                                                             \
2173     new_value = (*lhs);                                                        \
2174   } else {                                                                     \
2175     new_value = (*lhs);                                                        \
2176     (*lhs) OP rhs;                                                             \
2177   }                                                                            \
2178                                                                                \
2179   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2180   return new_value;
2181 
2182 // ------------------------------------------------------------------------
2183 #ifdef KMP_GOMP_COMPAT
2184 #define OP_GOMP_CRITICAL_CPT(OP, FLAG)                                         \
2185   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2186     KMP_CHECK_GTID;                                                            \
2187     OP_CRITICAL_CPT(OP## =, 0);                                                \
2188   }
2189 #else
2190 #define OP_GOMP_CRITICAL_CPT(OP, FLAG)
2191 #endif /* KMP_GOMP_COMPAT */
2192 
2193 // ------------------------------------------------------------------------
2194 // Operation on *lhs, rhs using "compare_and_store" routine
2195 //     TYPE    - operands' type
2196 //     BITS    - size in bits, used to distinguish low level calls
2197 //     OP      - operator
2198 // Note: temp_val introduced in order to force the compiler to read
2199 //       *lhs only once (w/o it the compiler reads *lhs twice)
2200 #define OP_CMPXCHG_CPT(TYPE, BITS, OP)                                         \
2201   {                                                                            \
2202     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2203     TYPE old_value, new_value;                                                 \
2204     temp_val = *lhs;                                                           \
2205     old_value = temp_val;                                                      \
2206     new_value = old_value OP rhs;                                              \
2207     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2208         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2209         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2210       KMP_CPU_PAUSE();                                                         \
2211                                                                                \
2212       temp_val = *lhs;                                                         \
2213       old_value = temp_val;                                                    \
2214       new_value = old_value OP rhs;                                            \
2215     }                                                                          \
2216     if (flag) {                                                                \
2217       return new_value;                                                        \
2218     } else                                                                     \
2219       return old_value;                                                        \
2220   }
2221 
2222 // -------------------------------------------------------------------------
2223 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)          \
2224   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2225   TYPE new_value;                                                              \
2226   OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG)                                          \
2227   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2228   }
2229 
2230 // -------------------------------------------------------------------------
2231 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2232   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2233   TYPE old_value, new_value;                                                   \
2234   OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG)                                          \
2235   /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
2236   old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                            \
2237   if (flag) {                                                                  \
2238     return old_value OP rhs;                                                   \
2239   } else                                                                       \
2240     return old_value;                                                          \
2241   }
2242 // -------------------------------------------------------------------------
2243 
2244 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2245                      0) // __kmpc_atomic_fixed4_add_cpt
2246 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2247                      0) // __kmpc_atomic_fixed4_sub_cpt
2248 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2249                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2250 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2251                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2252 
2253 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2254                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2255 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2256                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2257 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2258                    KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2259 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2260                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2261 
2262 // ------------------------------------------------------------------------
2263 // Entries definition for integer operands
2264 //     TYPE_ID - operands type and size (fixed4, float4)
2265 //     OP_ID   - operation identifier (add, sub, mul, ...)
2266 //     TYPE    - operand type
2267 //     BITS    - size in bits, used to distinguish low level calls
2268 //     OP      - operator (used in critical section)
2269 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,GOMP_FLAG
2270 // ------------------------------------------------------------------------
2271 // Routines for ATOMIC integer operands, other operators
2272 // ------------------------------------------------------------------------
2273 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2274 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2275                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2276 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2277                    0) // __kmpc_atomic_fixed1_andb_cpt
2278 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2279                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2280 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2281                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2282 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2283                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2284 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2285                    0) // __kmpc_atomic_fixed1_orb_cpt
2286 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2287                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2288 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2289                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2290 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2291                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2292 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2293                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2294 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2295                    0) // __kmpc_atomic_fixed1_xor_cpt
2296 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2297                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2298 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2299                    0) // __kmpc_atomic_fixed2_andb_cpt
2300 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2301                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2302 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2303                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2304 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2305                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2306 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2307                    0) // __kmpc_atomic_fixed2_orb_cpt
2308 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2309                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2310 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2311                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2312 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2313                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2314 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2315                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2316 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2317                    0) // __kmpc_atomic_fixed2_xor_cpt
2318 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2319                    0) // __kmpc_atomic_fixed4_andb_cpt
2320 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2321                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2322 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2323                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2324 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2325                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2326 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2327                    0) // __kmpc_atomic_fixed4_orb_cpt
2328 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2329                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2330 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2331                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2332 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2333                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2334 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2335                    0) // __kmpc_atomic_fixed4_xor_cpt
2336 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2337                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2338 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2339                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2340 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2341                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2342 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2343                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2344 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2345                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2346 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2347                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2348 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2349                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2350 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2351                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2352 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2353                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2354 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2355                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2356 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2357                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2358 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2359                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2360 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2361                    KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2362 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2363 
2364 // CAPTURE routines for mixed types RHS=float16
2365 #if KMP_HAVE_QUAD
2366 
2367 // Beginning of a definition (provides name, parameters, gebug trace)
2368 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2369 //     fixed)
2370 //     OP_ID   - operation identifier (add, sub, mul, ...)
2371 //     TYPE    - operands' type
2372 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)            \
2373   TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
2374       ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) {             \
2375     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2376     KA_TRACE(100,                                                              \
2377              ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
2378               gtid));
2379 
2380 // -------------------------------------------------------------------------
2381 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
2382                                RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
2383   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2384   TYPE new_value;                                                              \
2385   OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG)                                          \
2386   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2387   }
2388 
2389 // -------------------------------------------------------------------------
2390 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,     \
2391                                 LCK_ID, GOMP_FLAG)                             \
2392   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2393   TYPE new_value;                                                              \
2394   OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */                    \
2395   OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */                        \
2396   }
2397 
2398 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2399                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2400 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2401                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2402 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2403                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2404 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2405                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2406 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2407                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2408 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2409                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2410 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2411                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2412 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2413                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2414 
2415 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2416                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2417 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2418                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2419 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2420                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2421 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2422                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2423 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2424                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2425 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2426                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2427 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2428                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2429 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2430                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2431 
2432 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2433                        0) // __kmpc_atomic_fixed4_add_cpt_fp
2434 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2435                        0) // __kmpc_atomic_fixed4u_add_cpt_fp
2436 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2437                        0) // __kmpc_atomic_fixed4_sub_cpt_fp
2438 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2439                        0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2440 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2441                        0) // __kmpc_atomic_fixed4_mul_cpt_fp
2442 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2443                        0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2444 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2445                        0) // __kmpc_atomic_fixed4_div_cpt_fp
2446 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2447                        0) // __kmpc_atomic_fixed4u_div_cpt_fp
2448 
2449 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2450                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2451 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2452                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2453 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2454                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2455 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2456                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2457 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2458                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2459 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2460                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2461 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2462                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2463 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2464                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2465 
2466 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2467                        KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2468 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2469                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2470 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2471                        KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2472 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2473                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2474 
2475 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2476                        KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2477 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2478                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2479 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2480                        KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2481 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2482                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2483 
2484 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2485                         1) // __kmpc_atomic_float10_add_cpt_fp
2486 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2487                         1) // __kmpc_atomic_float10_sub_cpt_fp
2488 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2489                         1) // __kmpc_atomic_float10_mul_cpt_fp
2490 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2491                         1) // __kmpc_atomic_float10_div_cpt_fp
2492 
2493 #endif // KMP_HAVE_QUAD
2494 
2495 // ------------------------------------------------------------------------
2496 // Routines for C/C++ Reduction operators && and ||
2497 
2498 // -------------------------------------------------------------------------
2499 // Operation on *lhs, rhs bound by critical section
2500 //     OP     - operator (it's supposed to contain an assignment)
2501 //     LCK_ID - lock identifier
2502 // Note: don't check gtid as it should always be valid
2503 // 1, 2-byte - expect valid parameter, other - check before this macro
2504 #define OP_CRITICAL_L_CPT(OP, LCK_ID)                                          \
2505   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2506                                                                                \
2507   if (flag) {                                                                  \
2508     new_value OP rhs;                                                          \
2509   } else                                                                       \
2510     new_value = (*lhs);                                                        \
2511                                                                                \
2512   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2513 
2514 // ------------------------------------------------------------------------
2515 #ifdef KMP_GOMP_COMPAT
2516 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)                                       \
2517   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2518     KMP_CHECK_GTID;                                                            \
2519     OP_CRITICAL_L_CPT(OP, 0);                                                  \
2520     return new_value;                                                          \
2521   }
2522 #else
2523 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2524 #endif /* KMP_GOMP_COMPAT */
2525 
2526 // ------------------------------------------------------------------------
2527 // Need separate macros for &&, || because there is no combined assignment
2528 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2529   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2530   TYPE new_value;                                                              \
2531   OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG)                                 \
2532   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2533   }
2534 
2535 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2536                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2537 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2538                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2539 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2540                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2541 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2542                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2543 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2544                   0) // __kmpc_atomic_fixed4_andl_cpt
2545 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2546                   0) // __kmpc_atomic_fixed4_orl_cpt
2547 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2548                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2549 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2550                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2551 
2552 // -------------------------------------------------------------------------
2553 // Routines for Fortran operators that matched no one in C:
2554 // MAX, MIN, .EQV., .NEQV.
2555 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2556 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2557 
2558 // -------------------------------------------------------------------------
2559 // MIN and MAX need separate macros
2560 // OP - operator to check if we need any actions?
2561 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                       \
2562   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2563                                                                                \
2564   if (*lhs OP rhs) { /* still need actions? */                                 \
2565     old_value = *lhs;                                                          \
2566     *lhs = rhs;                                                                \
2567     if (flag)                                                                  \
2568       new_value = rhs;                                                         \
2569     else                                                                       \
2570       new_value = old_value;                                                   \
2571   }                                                                            \
2572   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2573   return new_value;
2574 
2575 // -------------------------------------------------------------------------
2576 #ifdef KMP_GOMP_COMPAT
2577 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)                                    \
2578   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2579     KMP_CHECK_GTID;                                                            \
2580     MIN_MAX_CRITSECT_CPT(OP, 0);                                               \
2581   }
2582 #else
2583 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2584 #endif /* KMP_GOMP_COMPAT */
2585 
2586 // -------------------------------------------------------------------------
2587 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                    \
2588   {                                                                            \
2589     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2590     /*TYPE old_value; */                                                       \
2591     temp_val = *lhs;                                                           \
2592     old_value = temp_val;                                                      \
2593     while (old_value OP rhs && /* still need actions? */                       \
2594            !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2595                (kmp_int##BITS *)lhs,                                           \
2596                *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
2597                *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
2598       KMP_CPU_PAUSE();                                                         \
2599       temp_val = *lhs;                                                         \
2600       old_value = temp_val;                                                    \
2601     }                                                                          \
2602     if (flag)                                                                  \
2603       return rhs;                                                              \
2604     else                                                                       \
2605       return old_value;                                                        \
2606   }
2607 
2608 // -------------------------------------------------------------------------
2609 // 1-byte, 2-byte operands - use critical section
2610 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
2611   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2612   TYPE new_value, old_value;                                                   \
2613   if (*lhs OP rhs) { /* need actions? */                                       \
2614     GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2615     MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                           \
2616   }                                                                            \
2617   return *lhs;                                                                 \
2618   }
2619 
2620 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2621   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2622   TYPE new_value, old_value;                                                   \
2623   if (*lhs OP rhs) {                                                           \
2624     GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2625     MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                        \
2626   }                                                                            \
2627   return *lhs;                                                                 \
2628   }
2629 
2630 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2631                      KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2632 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2633                      KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2634 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2635                      KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2636 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2637                      KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2638 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2639                      0) // __kmpc_atomic_fixed4_max_cpt
2640 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2641                      0) // __kmpc_atomic_fixed4_min_cpt
2642 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2643                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2644 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2645                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2646 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2647                      KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2648 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2649                      KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2650 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2651                      KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2652 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2653                      KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2654 #if KMP_HAVE_QUAD
2655 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2656                      1) // __kmpc_atomic_float16_max_cpt
2657 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2658                      1) // __kmpc_atomic_float16_min_cpt
2659 #if (KMP_ARCH_X86)
2660 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2661                      1) // __kmpc_atomic_float16_max_a16_cpt
2662 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2663                      1) // __kmpc_atomic_float16_mix_a16_cpt
2664 #endif
2665 #endif
2666 
2667 // ------------------------------------------------------------------------
2668 #ifdef KMP_GOMP_COMPAT
2669 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)                                     \
2670   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2671     KMP_CHECK_GTID;                                                            \
2672     OP_CRITICAL_CPT(OP, 0);                                                    \
2673   }
2674 #else
2675 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2676 #endif /* KMP_GOMP_COMPAT */
2677 // ------------------------------------------------------------------------
2678 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
2679   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2680   TYPE new_value;                                                              \
2681   OP_GOMP_CRITICAL_EQV_CPT(^= ~, GOMP_FLAG) /* send assignment */              \
2682   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2683   }
2684 
2685 // ------------------------------------------------------------------------
2686 
2687 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2688                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2689 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2690                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2691 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2692                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2693 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2694                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2695 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2696                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2697 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2698                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2699 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2700                     KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2701 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2702                     KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2703 
2704 // ------------------------------------------------------------------------
2705 // Routines for Extended types: long double, _Quad, complex flavours (use
2706 // critical section)
2707 //     TYPE_ID, OP_ID, TYPE - detailed above
2708 //     OP      - operator
2709 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2710 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
2711   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2712   TYPE new_value;                                                              \
2713   OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */                    \
2714   OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */                        \
2715   }
2716 
2717 // ------------------------------------------------------------------------
2718 // Workaround for cmplx4. Regular routines with return value don't work
2719 // on Win_32e. Let's return captured values through the additional parameter.
2720 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID)                                        \
2721   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2722                                                                                \
2723   if (flag) {                                                                  \
2724     (*lhs) OP rhs;                                                             \
2725     (*out) = (*lhs);                                                           \
2726   } else {                                                                     \
2727     (*out) = (*lhs);                                                           \
2728     (*lhs) OP rhs;                                                             \
2729   }                                                                            \
2730                                                                                \
2731   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2732   return;
2733 // ------------------------------------------------------------------------
2734 
2735 #ifdef KMP_GOMP_COMPAT
2736 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)                                     \
2737   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2738     KMP_CHECK_GTID;                                                            \
2739     OP_CRITICAL_CPT_WRK(OP## =, 0);                                            \
2740   }
2741 #else
2742 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2743 #endif /* KMP_GOMP_COMPAT */
2744 // ------------------------------------------------------------------------
2745 
2746 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                 \
2747   void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2748                                          TYPE rhs, TYPE *out, int flag) {      \
2749     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2750     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2751 // ------------------------------------------------------------------------
2752 
2753 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
2754   ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
2755   OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG)                                      \
2756   OP_CRITICAL_CPT_WRK(OP## =, LCK_ID)                                          \
2757   }
2758 // The end of workaround for cmplx4
2759 
2760 /* ------------------------------------------------------------------------- */
2761 // routines for long double type
2762 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2763                     1) // __kmpc_atomic_float10_add_cpt
2764 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2765                     1) // __kmpc_atomic_float10_sub_cpt
2766 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2767                     1) // __kmpc_atomic_float10_mul_cpt
2768 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2769                     1) // __kmpc_atomic_float10_div_cpt
2770 #if KMP_HAVE_QUAD
2771 // routines for _Quad type
2772 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2773                     1) // __kmpc_atomic_float16_add_cpt
2774 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2775                     1) // __kmpc_atomic_float16_sub_cpt
2776 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2777                     1) // __kmpc_atomic_float16_mul_cpt
2778 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2779                     1) // __kmpc_atomic_float16_div_cpt
2780 #if (KMP_ARCH_X86)
2781 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2782                     1) // __kmpc_atomic_float16_add_a16_cpt
2783 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2784                     1) // __kmpc_atomic_float16_sub_a16_cpt
2785 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2786                     1) // __kmpc_atomic_float16_mul_a16_cpt
2787 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2788                     1) // __kmpc_atomic_float16_div_a16_cpt
2789 #endif
2790 #endif
2791 
2792 // routines for complex types
2793 
2794 // cmplx4 routines to return void
2795 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2796                         1) // __kmpc_atomic_cmplx4_add_cpt
2797 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2798                         1) // __kmpc_atomic_cmplx4_sub_cpt
2799 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2800                         1) // __kmpc_atomic_cmplx4_mul_cpt
2801 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2802                         1) // __kmpc_atomic_cmplx4_div_cpt
2803 
2804 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2805                     1) // __kmpc_atomic_cmplx8_add_cpt
2806 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2807                     1) // __kmpc_atomic_cmplx8_sub_cpt
2808 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2809                     1) // __kmpc_atomic_cmplx8_mul_cpt
2810 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2811                     1) // __kmpc_atomic_cmplx8_div_cpt
2812 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2813                     1) // __kmpc_atomic_cmplx10_add_cpt
2814 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2815                     1) // __kmpc_atomic_cmplx10_sub_cpt
2816 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2817                     1) // __kmpc_atomic_cmplx10_mul_cpt
2818 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2819                     1) // __kmpc_atomic_cmplx10_div_cpt
2820 #if KMP_HAVE_QUAD
2821 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2822                     1) // __kmpc_atomic_cmplx16_add_cpt
2823 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2824                     1) // __kmpc_atomic_cmplx16_sub_cpt
2825 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2826                     1) // __kmpc_atomic_cmplx16_mul_cpt
2827 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2828                     1) // __kmpc_atomic_cmplx16_div_cpt
2829 #if (KMP_ARCH_X86)
2830 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2831                     1) // __kmpc_atomic_cmplx16_add_a16_cpt
2832 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2833                     1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2834 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2835                     1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2836 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2837                     1) // __kmpc_atomic_cmplx16_div_a16_cpt
2838 #endif
2839 #endif
2840 
2841 #if OMP_40_ENABLED
2842 
2843 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2844 // binop x; v = x; }  for non-commutative operations.
2845 // Supported only on IA-32 architecture and Intel(R) 64
2846 
2847 // -------------------------------------------------------------------------
2848 // Operation on *lhs, rhs bound by critical section
2849 //     OP     - operator (it's supposed to contain an assignment)
2850 //     LCK_ID - lock identifier
2851 // Note: don't check gtid as it should always be valid
2852 // 1, 2-byte - expect valid parameter, other - check before this macro
2853 #define OP_CRITICAL_CPT_REV(OP, LCK_ID)                                        \
2854   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2855                                                                                \
2856   if (flag) {                                                                  \
2857     /*temp_val = (*lhs);*/                                                     \
2858     (*lhs) = (rhs)OP(*lhs);                                                    \
2859     new_value = (*lhs);                                                        \
2860   } else {                                                                     \
2861     new_value = (*lhs);                                                        \
2862     (*lhs) = (rhs)OP(*lhs);                                                    \
2863   }                                                                            \
2864   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2865   return new_value;
2866 
2867 // ------------------------------------------------------------------------
2868 #ifdef KMP_GOMP_COMPAT
2869 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG)                                     \
2870   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2871     KMP_CHECK_GTID;                                                            \
2872     OP_CRITICAL_CPT_REV(OP, 0);                                                \
2873   }
2874 #else
2875 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG)
2876 #endif /* KMP_GOMP_COMPAT */
2877 
2878 // ------------------------------------------------------------------------
2879 // Operation on *lhs, rhs using "compare_and_store" routine
2880 //     TYPE    - operands' type
2881 //     BITS    - size in bits, used to distinguish low level calls
2882 //     OP      - operator
2883 // Note: temp_val introduced in order to force the compiler to read
2884 //       *lhs only once (w/o it the compiler reads *lhs twice)
2885 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                     \
2886   {                                                                            \
2887     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2888     TYPE old_value, new_value;                                                 \
2889     temp_val = *lhs;                                                           \
2890     old_value = temp_val;                                                      \
2891     new_value = rhs OP old_value;                                              \
2892     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2893         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2894         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2895       KMP_CPU_PAUSE();                                                         \
2896                                                                                \
2897       temp_val = *lhs;                                                         \
2898       old_value = temp_val;                                                    \
2899       new_value = rhs OP old_value;                                            \
2900     }                                                                          \
2901     if (flag) {                                                                \
2902       return new_value;                                                        \
2903     } else                                                                     \
2904       return old_value;                                                        \
2905   }
2906 
2907 // -------------------------------------------------------------------------
2908 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)      \
2909   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2910   TYPE new_value;                                                              \
2911   TYPE KMP_ATOMIC_VOLATILE temp_val;                                           \
2912   OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG)                                      \
2913   OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
2914   }
2915 
2916 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2917                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2918 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2919                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2920 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2921                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2922 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2923                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2924 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2925                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2926 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2927                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2928 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
2929                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
2930 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
2931                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
2932 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
2933                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
2934 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
2935                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
2936 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
2937                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
2938 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
2939                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
2940 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
2941                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
2942 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
2943                        KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
2944 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
2945                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
2946 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
2947                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
2948 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
2949                        KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
2950 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
2951                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
2952 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
2953                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
2954 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
2955                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
2956 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
2957                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
2958 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
2959                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
2960 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
2961                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
2962 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
2963                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
2964 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
2965                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
2966 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
2967                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
2968 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
2969                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
2970 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
2971                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
2972 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2973 
2974 // ------------------------------------------------------------------------
2975 // Routines for Extended types: long double, _Quad, complex flavours (use
2976 // critical section)
2977 //     TYPE_ID, OP_ID, TYPE - detailed above
2978 //     OP      - operator
2979 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2980 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
2981   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2982   TYPE new_value;                                                              \
2983   TYPE KMP_ATOMIC_VOLATILE temp_val;                                           \
2984   /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/                   \
2985   OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG)                                      \
2986   OP_CRITICAL_CPT_REV(OP, LCK_ID)                                              \
2987   }
2988 
2989 /* ------------------------------------------------------------------------- */
2990 // routines for long double type
2991 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
2992                         1) // __kmpc_atomic_float10_sub_cpt_rev
2993 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
2994                         1) // __kmpc_atomic_float10_div_cpt_rev
2995 #if KMP_HAVE_QUAD
2996 // routines for _Quad type
2997 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
2998                         1) // __kmpc_atomic_float16_sub_cpt_rev
2999 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
3000                         1) // __kmpc_atomic_float16_div_cpt_rev
3001 #if (KMP_ARCH_X86)
3002 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3003                         1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3004 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3005                         1) // __kmpc_atomic_float16_div_a16_cpt_rev
3006 #endif
3007 #endif
3008 
3009 // routines for complex types
3010 
3011 // ------------------------------------------------------------------------
3012 // Workaround for cmplx4. Regular routines with return value don't work
3013 // on Win_32e. Let's return captured values through the additional parameter.
3014 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                    \
3015   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3016                                                                                \
3017   if (flag) {                                                                  \
3018     (*lhs) = (rhs)OP(*lhs);                                                    \
3019     (*out) = (*lhs);                                                           \
3020   } else {                                                                     \
3021     (*out) = (*lhs);                                                           \
3022     (*lhs) = (rhs)OP(*lhs);                                                    \
3023   }                                                                            \
3024                                                                                \
3025   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3026   return;
3027 // ------------------------------------------------------------------------
3028 
3029 #ifdef KMP_GOMP_COMPAT
3030 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)                                 \
3031   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3032     KMP_CHECK_GTID;                                                            \
3033     OP_CRITICAL_CPT_REV_WRK(OP, 0);                                            \
3034   }
3035 #else
3036 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3037 #endif /* KMP_GOMP_COMPAT */
3038 // ------------------------------------------------------------------------
3039 
3040 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID,          \
3041                                     GOMP_FLAG)                                 \
3042   ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
3043   OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG)                                  \
3044   OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                          \
3045   }
3046 // The end of workaround for cmplx4
3047 
3048 // !!! TODO: check if we need to return void for cmplx4 routines
3049 // cmplx4 routines to return void
3050 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3051                             1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3052 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3053                             1) // __kmpc_atomic_cmplx4_div_cpt_rev
3054 
3055 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3056                         1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3057 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3058                         1) // __kmpc_atomic_cmplx8_div_cpt_rev
3059 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3060                         1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3061 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3062                         1) // __kmpc_atomic_cmplx10_div_cpt_rev
3063 #if KMP_HAVE_QUAD
3064 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3065                         1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3066 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3067                         1) // __kmpc_atomic_cmplx16_div_cpt_rev
3068 #if (KMP_ARCH_X86)
3069 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3070                         1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3071 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3072                         1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3073 #endif
3074 #endif
3075 
3076 // Capture reverse for mixed type: RHS=float16
3077 #if KMP_HAVE_QUAD
3078 
3079 // Beginning of a definition (provides name, parameters, gebug trace)
3080 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3081 //     fixed)
3082 //     OP_ID   - operation identifier (add, sub, mul, ...)
3083 //     TYPE    - operands' type
3084 // -------------------------------------------------------------------------
3085 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,   \
3086                                    RTYPE, LCK_ID, MASK, GOMP_FLAG)             \
3087   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3088   TYPE new_value;                                                              \
3089   OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG)                                      \
3090   OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
3091   }
3092 
3093 // -------------------------------------------------------------------------
3094 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3095                                     LCK_ID, GOMP_FLAG)                         \
3096   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3097   TYPE new_value;                                                              \
3098   OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) /* send assignment */                \
3099   OP_CRITICAL_CPT_REV(OP, LCK_ID) /* send assignment */                        \
3100   }
3101 
3102 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3103                            KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3104 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3105                            KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3106 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3107                            KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3108 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3109                            KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3110 
3111 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3112                            KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3113 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3114                            1,
3115                            KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3116 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3117                            KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3118 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3119                            1,
3120                            KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3121 
3122 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3123                            3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3124 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3125                            4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3126 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3127                            3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3128 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3129                            4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3130 
3131 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3132                            7,
3133                            KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3134 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3135                            8i, 7,
3136                            KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3137 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3138                            7,
3139                            KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3140 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3141                            8i, 7,
3142                            KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3143 
3144 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3145                            4r, 3,
3146                            KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3147 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3148                            4r, 3,
3149                            KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3150 
3151 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3152                            8r, 7,
3153                            KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3154 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3155                            8r, 7,
3156                            KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3157 
3158 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3159                             10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3160 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3161                             10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3162 
3163 #endif // KMP_HAVE_QUAD
3164 
3165 //   OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3166 
3167 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                        \
3168   TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3169                                      TYPE rhs) {                               \
3170     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3171     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3172 
3173 #define CRITICAL_SWP(LCK_ID)                                                   \
3174   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3175                                                                                \
3176   old_value = (*lhs);                                                          \
3177   (*lhs) = rhs;                                                                \
3178                                                                                \
3179   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3180   return old_value;
3181 
3182 // ------------------------------------------------------------------------
3183 #ifdef KMP_GOMP_COMPAT
3184 #define GOMP_CRITICAL_SWP(FLAG)                                                \
3185   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3186     KMP_CHECK_GTID;                                                            \
3187     CRITICAL_SWP(0);                                                           \
3188   }
3189 #else
3190 #define GOMP_CRITICAL_SWP(FLAG)
3191 #endif /* KMP_GOMP_COMPAT */
3192 
3193 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                        \
3194   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3195   TYPE old_value;                                                              \
3196   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3197   old_value = KMP_XCHG_FIXED##BITS(lhs, rhs);                                  \
3198   return old_value;                                                            \
3199   }
3200 // ------------------------------------------------------------------------
3201 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                  \
3202   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3203   TYPE old_value;                                                              \
3204   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3205   old_value = KMP_XCHG_REAL##BITS(lhs, rhs);                                   \
3206   return old_value;                                                            \
3207   }
3208 
3209 // ------------------------------------------------------------------------
3210 #define CMPXCHG_SWP(TYPE, BITS)                                                \
3211   {                                                                            \
3212     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
3213     TYPE old_value, new_value;                                                 \
3214     temp_val = *lhs;                                                           \
3215     old_value = temp_val;                                                      \
3216     new_value = rhs;                                                           \
3217     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
3218         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
3219         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
3220       KMP_CPU_PAUSE();                                                         \
3221                                                                                \
3222       temp_val = *lhs;                                                         \
3223       old_value = temp_val;                                                    \
3224       new_value = rhs;                                                         \
3225     }                                                                          \
3226     return old_value;                                                          \
3227   }
3228 
3229 // -------------------------------------------------------------------------
3230 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                     \
3231   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3232   TYPE old_value;                                                              \
3233   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3234   CMPXCHG_SWP(TYPE, BITS)                                                      \
3235   }
3236 
3237 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3238 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3239 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3240 
3241 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3242                       KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3243 
3244 #if (KMP_ARCH_X86)
3245 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3246                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3247 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3248                    KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3249 #else
3250 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3251 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3252                       KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3253 #endif
3254 
3255 // ------------------------------------------------------------------------
3256 // Routines for Extended types: long double, _Quad, complex flavours (use
3257 // critical section)
3258 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)                  \
3259   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3260   TYPE old_value;                                                              \
3261   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3262   CRITICAL_SWP(LCK_ID)                                                         \
3263   }
3264 
3265 // ------------------------------------------------------------------------
3266 // !!! TODO: check if we need to return void for cmplx4 routines
3267 // Workaround for cmplx4. Regular routines with return value don't work
3268 // on Win_32e. Let's return captured values through the additional parameter.
3269 
3270 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                    \
3271   void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3272                                      TYPE rhs, TYPE *out) {                    \
3273     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3274     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3275 
3276 #define CRITICAL_SWP_WRK(LCK_ID)                                               \
3277   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3278                                                                                \
3279   tmp = (*lhs);                                                                \
3280   (*lhs) = (rhs);                                                              \
3281   (*out) = tmp;                                                                \
3282   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3283   return;
3284 // ------------------------------------------------------------------------
3285 
3286 #ifdef KMP_GOMP_COMPAT
3287 #define GOMP_CRITICAL_SWP_WRK(FLAG)                                            \
3288   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3289     KMP_CHECK_GTID;                                                            \
3290     CRITICAL_SWP_WRK(0);                                                       \
3291   }
3292 #else
3293 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3294 #endif /* KMP_GOMP_COMPAT */
3295 // ------------------------------------------------------------------------
3296 
3297 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)              \
3298   ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                          \
3299   TYPE tmp;                                                                    \
3300   GOMP_CRITICAL_SWP_WRK(GOMP_FLAG)                                             \
3301   CRITICAL_SWP_WRK(LCK_ID)                                                     \
3302   }
3303 // The end of workaround for cmplx4
3304 
3305 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3306 #if KMP_HAVE_QUAD
3307 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3308 #endif
3309 // cmplx4 routine to return void
3310 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3311 
3312 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32,  8c,   1 )           //
3313 // __kmpc_atomic_cmplx4_swp
3314 
3315 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3316 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3317 #if KMP_HAVE_QUAD
3318 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3319 #if (KMP_ARCH_X86)
3320 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3321                     1) // __kmpc_atomic_float16_a16_swp
3322 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3323                     1) // __kmpc_atomic_cmplx16_a16_swp
3324 #endif
3325 #endif
3326 
3327 // End of OpenMP 4.0 Capture
3328 
3329 #endif // OMP_40_ENABLED
3330 
3331 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3332 
3333 #undef OP_CRITICAL
3334 
3335 /* ------------------------------------------------------------------------ */
3336 /* Generic atomic routines                                                  */
3337 
3338 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3339                      void (*f)(void *, void *, void *)) {
3340   KMP_DEBUG_ASSERT(__kmp_init_serial);
3341 
3342   if (
3343 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3344       FALSE /* must use lock */
3345 #else
3346       TRUE
3347 #endif
3348       ) {
3349     kmp_int8 old_value, new_value;
3350 
3351     old_value = *(kmp_int8 *)lhs;
3352     (*f)(&new_value, &old_value, rhs);
3353 
3354     /* TODO: Should this be acquire or release? */
3355     while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3356                                        *(kmp_int8 *)&new_value)) {
3357       KMP_CPU_PAUSE();
3358 
3359       old_value = *(kmp_int8 *)lhs;
3360       (*f)(&new_value, &old_value, rhs);
3361     }
3362 
3363     return;
3364   } else {
3365 // All 1-byte data is of integer data type.
3366 
3367 #ifdef KMP_GOMP_COMPAT
3368     if (__kmp_atomic_mode == 2) {
3369       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3370     } else
3371 #endif /* KMP_GOMP_COMPAT */
3372       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3373 
3374     (*f)(lhs, lhs, rhs);
3375 
3376 #ifdef KMP_GOMP_COMPAT
3377     if (__kmp_atomic_mode == 2) {
3378       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3379     } else
3380 #endif /* KMP_GOMP_COMPAT */
3381       __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3382   }
3383 }
3384 
3385 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3386                      void (*f)(void *, void *, void *)) {
3387   if (
3388 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3389       FALSE /* must use lock */
3390 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3391       TRUE /* no alignment problems */
3392 #else
3393       !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3394 #endif
3395       ) {
3396     kmp_int16 old_value, new_value;
3397 
3398     old_value = *(kmp_int16 *)lhs;
3399     (*f)(&new_value, &old_value, rhs);
3400 
3401     /* TODO: Should this be acquire or release? */
3402     while (!KMP_COMPARE_AND_STORE_ACQ16(
3403         (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3404       KMP_CPU_PAUSE();
3405 
3406       old_value = *(kmp_int16 *)lhs;
3407       (*f)(&new_value, &old_value, rhs);
3408     }
3409 
3410     return;
3411   } else {
3412 // All 2-byte data is of integer data type.
3413 
3414 #ifdef KMP_GOMP_COMPAT
3415     if (__kmp_atomic_mode == 2) {
3416       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3417     } else
3418 #endif /* KMP_GOMP_COMPAT */
3419       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3420 
3421     (*f)(lhs, lhs, rhs);
3422 
3423 #ifdef KMP_GOMP_COMPAT
3424     if (__kmp_atomic_mode == 2) {
3425       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3426     } else
3427 #endif /* KMP_GOMP_COMPAT */
3428       __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3429   }
3430 }
3431 
3432 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3433                      void (*f)(void *, void *, void *)) {
3434   KMP_DEBUG_ASSERT(__kmp_init_serial);
3435 
3436   if (
3437 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3438 // Gomp compatibility is broken if this routine is called for floats.
3439 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3440       TRUE /* no alignment problems */
3441 #else
3442       !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3443 #endif
3444       ) {
3445     kmp_int32 old_value, new_value;
3446 
3447     old_value = *(kmp_int32 *)lhs;
3448     (*f)(&new_value, &old_value, rhs);
3449 
3450     /* TODO: Should this be acquire or release? */
3451     while (!KMP_COMPARE_AND_STORE_ACQ32(
3452         (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3453       KMP_CPU_PAUSE();
3454 
3455       old_value = *(kmp_int32 *)lhs;
3456       (*f)(&new_value, &old_value, rhs);
3457     }
3458 
3459     return;
3460   } else {
3461 // Use __kmp_atomic_lock_4i for all 4-byte data,
3462 // even if it isn't of integer data type.
3463 
3464 #ifdef KMP_GOMP_COMPAT
3465     if (__kmp_atomic_mode == 2) {
3466       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3467     } else
3468 #endif /* KMP_GOMP_COMPAT */
3469       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3470 
3471     (*f)(lhs, lhs, rhs);
3472 
3473 #ifdef KMP_GOMP_COMPAT
3474     if (__kmp_atomic_mode == 2) {
3475       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3476     } else
3477 #endif /* KMP_GOMP_COMPAT */
3478       __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3479   }
3480 }
3481 
3482 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3483                      void (*f)(void *, void *, void *)) {
3484   KMP_DEBUG_ASSERT(__kmp_init_serial);
3485   if (
3486 
3487 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3488       FALSE /* must use lock */
3489 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3490       TRUE /* no alignment problems */
3491 #else
3492       !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3493 #endif
3494       ) {
3495     kmp_int64 old_value, new_value;
3496 
3497     old_value = *(kmp_int64 *)lhs;
3498     (*f)(&new_value, &old_value, rhs);
3499     /* TODO: Should this be acquire or release? */
3500     while (!KMP_COMPARE_AND_STORE_ACQ64(
3501         (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3502       KMP_CPU_PAUSE();
3503 
3504       old_value = *(kmp_int64 *)lhs;
3505       (*f)(&new_value, &old_value, rhs);
3506     }
3507 
3508     return;
3509   } else {
3510 // Use __kmp_atomic_lock_8i for all 8-byte data,
3511 // even if it isn't of integer data type.
3512 
3513 #ifdef KMP_GOMP_COMPAT
3514     if (__kmp_atomic_mode == 2) {
3515       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3516     } else
3517 #endif /* KMP_GOMP_COMPAT */
3518       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3519 
3520     (*f)(lhs, lhs, rhs);
3521 
3522 #ifdef KMP_GOMP_COMPAT
3523     if (__kmp_atomic_mode == 2) {
3524       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3525     } else
3526 #endif /* KMP_GOMP_COMPAT */
3527       __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3528   }
3529 }
3530 
3531 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3532                       void (*f)(void *, void *, void *)) {
3533   KMP_DEBUG_ASSERT(__kmp_init_serial);
3534 
3535 #ifdef KMP_GOMP_COMPAT
3536   if (__kmp_atomic_mode == 2) {
3537     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3538   } else
3539 #endif /* KMP_GOMP_COMPAT */
3540     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3541 
3542   (*f)(lhs, lhs, rhs);
3543 
3544 #ifdef KMP_GOMP_COMPAT
3545   if (__kmp_atomic_mode == 2) {
3546     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3547   } else
3548 #endif /* KMP_GOMP_COMPAT */
3549     __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3550 }
3551 
3552 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3553                       void (*f)(void *, void *, void *)) {
3554   KMP_DEBUG_ASSERT(__kmp_init_serial);
3555 
3556 #ifdef KMP_GOMP_COMPAT
3557   if (__kmp_atomic_mode == 2) {
3558     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3559   } else
3560 #endif /* KMP_GOMP_COMPAT */
3561     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3562 
3563   (*f)(lhs, lhs, rhs);
3564 
3565 #ifdef KMP_GOMP_COMPAT
3566   if (__kmp_atomic_mode == 2) {
3567     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3568   } else
3569 #endif /* KMP_GOMP_COMPAT */
3570     __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3571 }
3572 
3573 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3574                       void (*f)(void *, void *, void *)) {
3575   KMP_DEBUG_ASSERT(__kmp_init_serial);
3576 
3577 #ifdef KMP_GOMP_COMPAT
3578   if (__kmp_atomic_mode == 2) {
3579     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3580   } else
3581 #endif /* KMP_GOMP_COMPAT */
3582     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3583 
3584   (*f)(lhs, lhs, rhs);
3585 
3586 #ifdef KMP_GOMP_COMPAT
3587   if (__kmp_atomic_mode == 2) {
3588     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3589   } else
3590 #endif /* KMP_GOMP_COMPAT */
3591     __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3592 }
3593 
3594 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3595                       void (*f)(void *, void *, void *)) {
3596   KMP_DEBUG_ASSERT(__kmp_init_serial);
3597 
3598 #ifdef KMP_GOMP_COMPAT
3599   if (__kmp_atomic_mode == 2) {
3600     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3601   } else
3602 #endif /* KMP_GOMP_COMPAT */
3603     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3604 
3605   (*f)(lhs, lhs, rhs);
3606 
3607 #ifdef KMP_GOMP_COMPAT
3608   if (__kmp_atomic_mode == 2) {
3609     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3610   } else
3611 #endif /* KMP_GOMP_COMPAT */
3612     __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3613 }
3614 
3615 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3616 // compiler; duplicated in order to not use 3-party names in pure Intel code
3617 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3618 void __kmpc_atomic_start(void) {
3619   int gtid = __kmp_entry_gtid();
3620   KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3621   __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3622 }
3623 
3624 void __kmpc_atomic_end(void) {
3625   int gtid = __kmp_get_gtid();
3626   KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3627   __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3628 }
3629 
3630 /*!
3631 @}
3632 */
3633 
3634 // end of file
3635