1 /*
2  * kmp_atomic.cpp -- ATOMIC implementation routines
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp_atomic.h"
14 #include "kmp.h" // TRUE, asm routines prototypes
15 
16 typedef unsigned char uchar;
17 typedef unsigned short ushort;
18 
19 /*!
20 @defgroup ATOMIC_OPS Atomic Operations
21 These functions are used for implementing the many different varieties of atomic
22 operations.
23 
24 The compiler is at liberty to inline atomic operations that are naturally
25 supported by the target architecture. For instance on IA-32 architecture an
26 atomic like this can be inlined
27 @code
28 static int s = 0;
29 #pragma omp atomic
30     s++;
31 @endcode
32 using the single instruction: `lock; incl s`
33 
34 However the runtime does provide entrypoints for these operations to support
35 compilers that choose not to inline them. (For instance,
36 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.)
37 
38 The names of the functions are encoded by using the data type name and the
39 operation name, as in these tables.
40 
41 Data Type  | Data type encoding
42 -----------|---------------
43 int8_t     | `fixed1`
44 uint8_t    | `fixed1u`
45 int16_t    | `fixed2`
46 uint16_t   | `fixed2u`
47 int32_t    | `fixed4`
48 uint32_t   | `fixed4u`
49 int32_t    | `fixed8`
50 uint32_t   | `fixed8u`
51 float      | `float4`
52 double     | `float8`
53 float 10 (8087 eighty bit float)  | `float10`
54 complex<float>   |  `cmplx4`
55 complex<double>  | `cmplx8`
56 complex<float10> | `cmplx10`
57 <br>
58 
59 Operation | Operation encoding
60 ----------|-------------------
61 + | add
62 - | sub
63 \* | mul
64 / | div
65 & | andb
66 << | shl
67 \>\> | shr
68 \| | orb
69 ^  | xor
70 && | andl
71 \|\| | orl
72 maximum | max
73 minimum | min
74 .eqv.   | eqv
75 .neqv.  | neqv
76 
77 <br>
78 For non-commutative operations, `_rev` can also be added for the reversed
79 operation. For the functions that capture the result, the suffix `_cpt` is
80 added.
81 
82 Update Functions
83 ================
84 The general form of an atomic function that just performs an update (without a
85 `capture`)
86 @code
87 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE *
88 lhs, TYPE rhs );
89 @endcode
90 @param ident_t  a pointer to source location
91 @param gtid  the global thread id
92 @param lhs   a pointer to the left operand
93 @param rhs   the right operand
94 
95 `capture` functions
96 ===================
97 The capture functions perform an atomic update and return a result, which is
98 either the value before the capture, or that after. They take an additional
99 argument to determine which result is returned.
100 Their general form is therefore
101 @code
102 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE *
103 lhs, TYPE rhs, int flag );
104 @endcode
105 @param ident_t  a pointer to source location
106 @param gtid  the global thread id
107 @param lhs   a pointer to the left operand
108 @param rhs   the right operand
109 @param flag  one if the result is to be captured *after* the operation, zero if
110 captured *before*.
111 
112 The one set of exceptions to this is the `complex<float>` type where the value
113 is not returned, rather an extra argument pointer is passed.
114 
115 They look like
116 @code
117 void __kmpc_atomic_cmplx4_<op>_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 *
118 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
119 @endcode
120 
121 Read and Write Operations
122 =========================
123 The OpenMP<sup>*</sup> standard now supports atomic operations that simply
124 ensure that the value is read or written atomically, with no modification
125 performed. In many cases on IA-32 architecture these operations can be inlined
126 since the architecture guarantees that no tearing occurs on aligned objects
127 accessed with a single memory operation of up to 64 bits in size.
128 
129 The general form of the read operations is
130 @code
131 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
132 @endcode
133 
134 For the write operations the form is
135 @code
136 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs
137 );
138 @endcode
139 
140 Full list of functions
141 ======================
142 This leads to the generation of 376 atomic functions, as follows.
143 
144 Functons for integers
145 ---------------------
146 There are versions here for integers of size 1,2,4 and 8 bytes both signed and
147 unsigned (where that matters).
148 @code
149     __kmpc_atomic_fixed1_add
150     __kmpc_atomic_fixed1_add_cpt
151     __kmpc_atomic_fixed1_add_fp
152     __kmpc_atomic_fixed1_andb
153     __kmpc_atomic_fixed1_andb_cpt
154     __kmpc_atomic_fixed1_andl
155     __kmpc_atomic_fixed1_andl_cpt
156     __kmpc_atomic_fixed1_div
157     __kmpc_atomic_fixed1_div_cpt
158     __kmpc_atomic_fixed1_div_cpt_rev
159     __kmpc_atomic_fixed1_div_float8
160     __kmpc_atomic_fixed1_div_fp
161     __kmpc_atomic_fixed1_div_rev
162     __kmpc_atomic_fixed1_eqv
163     __kmpc_atomic_fixed1_eqv_cpt
164     __kmpc_atomic_fixed1_max
165     __kmpc_atomic_fixed1_max_cpt
166     __kmpc_atomic_fixed1_min
167     __kmpc_atomic_fixed1_min_cpt
168     __kmpc_atomic_fixed1_mul
169     __kmpc_atomic_fixed1_mul_cpt
170     __kmpc_atomic_fixed1_mul_float8
171     __kmpc_atomic_fixed1_mul_fp
172     __kmpc_atomic_fixed1_neqv
173     __kmpc_atomic_fixed1_neqv_cpt
174     __kmpc_atomic_fixed1_orb
175     __kmpc_atomic_fixed1_orb_cpt
176     __kmpc_atomic_fixed1_orl
177     __kmpc_atomic_fixed1_orl_cpt
178     __kmpc_atomic_fixed1_rd
179     __kmpc_atomic_fixed1_shl
180     __kmpc_atomic_fixed1_shl_cpt
181     __kmpc_atomic_fixed1_shl_cpt_rev
182     __kmpc_atomic_fixed1_shl_rev
183     __kmpc_atomic_fixed1_shr
184     __kmpc_atomic_fixed1_shr_cpt
185     __kmpc_atomic_fixed1_shr_cpt_rev
186     __kmpc_atomic_fixed1_shr_rev
187     __kmpc_atomic_fixed1_sub
188     __kmpc_atomic_fixed1_sub_cpt
189     __kmpc_atomic_fixed1_sub_cpt_rev
190     __kmpc_atomic_fixed1_sub_fp
191     __kmpc_atomic_fixed1_sub_rev
192     __kmpc_atomic_fixed1_swp
193     __kmpc_atomic_fixed1_wr
194     __kmpc_atomic_fixed1_xor
195     __kmpc_atomic_fixed1_xor_cpt
196     __kmpc_atomic_fixed1u_add_fp
197     __kmpc_atomic_fixed1u_sub_fp
198     __kmpc_atomic_fixed1u_mul_fp
199     __kmpc_atomic_fixed1u_div
200     __kmpc_atomic_fixed1u_div_cpt
201     __kmpc_atomic_fixed1u_div_cpt_rev
202     __kmpc_atomic_fixed1u_div_fp
203     __kmpc_atomic_fixed1u_div_rev
204     __kmpc_atomic_fixed1u_shr
205     __kmpc_atomic_fixed1u_shr_cpt
206     __kmpc_atomic_fixed1u_shr_cpt_rev
207     __kmpc_atomic_fixed1u_shr_rev
208     __kmpc_atomic_fixed2_add
209     __kmpc_atomic_fixed2_add_cpt
210     __kmpc_atomic_fixed2_add_fp
211     __kmpc_atomic_fixed2_andb
212     __kmpc_atomic_fixed2_andb_cpt
213     __kmpc_atomic_fixed2_andl
214     __kmpc_atomic_fixed2_andl_cpt
215     __kmpc_atomic_fixed2_div
216     __kmpc_atomic_fixed2_div_cpt
217     __kmpc_atomic_fixed2_div_cpt_rev
218     __kmpc_atomic_fixed2_div_float8
219     __kmpc_atomic_fixed2_div_fp
220     __kmpc_atomic_fixed2_div_rev
221     __kmpc_atomic_fixed2_eqv
222     __kmpc_atomic_fixed2_eqv_cpt
223     __kmpc_atomic_fixed2_max
224     __kmpc_atomic_fixed2_max_cpt
225     __kmpc_atomic_fixed2_min
226     __kmpc_atomic_fixed2_min_cpt
227     __kmpc_atomic_fixed2_mul
228     __kmpc_atomic_fixed2_mul_cpt
229     __kmpc_atomic_fixed2_mul_float8
230     __kmpc_atomic_fixed2_mul_fp
231     __kmpc_atomic_fixed2_neqv
232     __kmpc_atomic_fixed2_neqv_cpt
233     __kmpc_atomic_fixed2_orb
234     __kmpc_atomic_fixed2_orb_cpt
235     __kmpc_atomic_fixed2_orl
236     __kmpc_atomic_fixed2_orl_cpt
237     __kmpc_atomic_fixed2_rd
238     __kmpc_atomic_fixed2_shl
239     __kmpc_atomic_fixed2_shl_cpt
240     __kmpc_atomic_fixed2_shl_cpt_rev
241     __kmpc_atomic_fixed2_shl_rev
242     __kmpc_atomic_fixed2_shr
243     __kmpc_atomic_fixed2_shr_cpt
244     __kmpc_atomic_fixed2_shr_cpt_rev
245     __kmpc_atomic_fixed2_shr_rev
246     __kmpc_atomic_fixed2_sub
247     __kmpc_atomic_fixed2_sub_cpt
248     __kmpc_atomic_fixed2_sub_cpt_rev
249     __kmpc_atomic_fixed2_sub_fp
250     __kmpc_atomic_fixed2_sub_rev
251     __kmpc_atomic_fixed2_swp
252     __kmpc_atomic_fixed2_wr
253     __kmpc_atomic_fixed2_xor
254     __kmpc_atomic_fixed2_xor_cpt
255     __kmpc_atomic_fixed2u_add_fp
256     __kmpc_atomic_fixed2u_sub_fp
257     __kmpc_atomic_fixed2u_mul_fp
258     __kmpc_atomic_fixed2u_div
259     __kmpc_atomic_fixed2u_div_cpt
260     __kmpc_atomic_fixed2u_div_cpt_rev
261     __kmpc_atomic_fixed2u_div_fp
262     __kmpc_atomic_fixed2u_div_rev
263     __kmpc_atomic_fixed2u_shr
264     __kmpc_atomic_fixed2u_shr_cpt
265     __kmpc_atomic_fixed2u_shr_cpt_rev
266     __kmpc_atomic_fixed2u_shr_rev
267     __kmpc_atomic_fixed4_add
268     __kmpc_atomic_fixed4_add_cpt
269     __kmpc_atomic_fixed4_add_fp
270     __kmpc_atomic_fixed4_andb
271     __kmpc_atomic_fixed4_andb_cpt
272     __kmpc_atomic_fixed4_andl
273     __kmpc_atomic_fixed4_andl_cpt
274     __kmpc_atomic_fixed4_div
275     __kmpc_atomic_fixed4_div_cpt
276     __kmpc_atomic_fixed4_div_cpt_rev
277     __kmpc_atomic_fixed4_div_float8
278     __kmpc_atomic_fixed4_div_fp
279     __kmpc_atomic_fixed4_div_rev
280     __kmpc_atomic_fixed4_eqv
281     __kmpc_atomic_fixed4_eqv_cpt
282     __kmpc_atomic_fixed4_max
283     __kmpc_atomic_fixed4_max_cpt
284     __kmpc_atomic_fixed4_min
285     __kmpc_atomic_fixed4_min_cpt
286     __kmpc_atomic_fixed4_mul
287     __kmpc_atomic_fixed4_mul_cpt
288     __kmpc_atomic_fixed4_mul_float8
289     __kmpc_atomic_fixed4_mul_fp
290     __kmpc_atomic_fixed4_neqv
291     __kmpc_atomic_fixed4_neqv_cpt
292     __kmpc_atomic_fixed4_orb
293     __kmpc_atomic_fixed4_orb_cpt
294     __kmpc_atomic_fixed4_orl
295     __kmpc_atomic_fixed4_orl_cpt
296     __kmpc_atomic_fixed4_rd
297     __kmpc_atomic_fixed4_shl
298     __kmpc_atomic_fixed4_shl_cpt
299     __kmpc_atomic_fixed4_shl_cpt_rev
300     __kmpc_atomic_fixed4_shl_rev
301     __kmpc_atomic_fixed4_shr
302     __kmpc_atomic_fixed4_shr_cpt
303     __kmpc_atomic_fixed4_shr_cpt_rev
304     __kmpc_atomic_fixed4_shr_rev
305     __kmpc_atomic_fixed4_sub
306     __kmpc_atomic_fixed4_sub_cpt
307     __kmpc_atomic_fixed4_sub_cpt_rev
308     __kmpc_atomic_fixed4_sub_fp
309     __kmpc_atomic_fixed4_sub_rev
310     __kmpc_atomic_fixed4_swp
311     __kmpc_atomic_fixed4_wr
312     __kmpc_atomic_fixed4_xor
313     __kmpc_atomic_fixed4_xor_cpt
314     __kmpc_atomic_fixed4u_add_fp
315     __kmpc_atomic_fixed4u_sub_fp
316     __kmpc_atomic_fixed4u_mul_fp
317     __kmpc_atomic_fixed4u_div
318     __kmpc_atomic_fixed4u_div_cpt
319     __kmpc_atomic_fixed4u_div_cpt_rev
320     __kmpc_atomic_fixed4u_div_fp
321     __kmpc_atomic_fixed4u_div_rev
322     __kmpc_atomic_fixed4u_shr
323     __kmpc_atomic_fixed4u_shr_cpt
324     __kmpc_atomic_fixed4u_shr_cpt_rev
325     __kmpc_atomic_fixed4u_shr_rev
326     __kmpc_atomic_fixed8_add
327     __kmpc_atomic_fixed8_add_cpt
328     __kmpc_atomic_fixed8_add_fp
329     __kmpc_atomic_fixed8_andb
330     __kmpc_atomic_fixed8_andb_cpt
331     __kmpc_atomic_fixed8_andl
332     __kmpc_atomic_fixed8_andl_cpt
333     __kmpc_atomic_fixed8_div
334     __kmpc_atomic_fixed8_div_cpt
335     __kmpc_atomic_fixed8_div_cpt_rev
336     __kmpc_atomic_fixed8_div_float8
337     __kmpc_atomic_fixed8_div_fp
338     __kmpc_atomic_fixed8_div_rev
339     __kmpc_atomic_fixed8_eqv
340     __kmpc_atomic_fixed8_eqv_cpt
341     __kmpc_atomic_fixed8_max
342     __kmpc_atomic_fixed8_max_cpt
343     __kmpc_atomic_fixed8_min
344     __kmpc_atomic_fixed8_min_cpt
345     __kmpc_atomic_fixed8_mul
346     __kmpc_atomic_fixed8_mul_cpt
347     __kmpc_atomic_fixed8_mul_float8
348     __kmpc_atomic_fixed8_mul_fp
349     __kmpc_atomic_fixed8_neqv
350     __kmpc_atomic_fixed8_neqv_cpt
351     __kmpc_atomic_fixed8_orb
352     __kmpc_atomic_fixed8_orb_cpt
353     __kmpc_atomic_fixed8_orl
354     __kmpc_atomic_fixed8_orl_cpt
355     __kmpc_atomic_fixed8_rd
356     __kmpc_atomic_fixed8_shl
357     __kmpc_atomic_fixed8_shl_cpt
358     __kmpc_atomic_fixed8_shl_cpt_rev
359     __kmpc_atomic_fixed8_shl_rev
360     __kmpc_atomic_fixed8_shr
361     __kmpc_atomic_fixed8_shr_cpt
362     __kmpc_atomic_fixed8_shr_cpt_rev
363     __kmpc_atomic_fixed8_shr_rev
364     __kmpc_atomic_fixed8_sub
365     __kmpc_atomic_fixed8_sub_cpt
366     __kmpc_atomic_fixed8_sub_cpt_rev
367     __kmpc_atomic_fixed8_sub_fp
368     __kmpc_atomic_fixed8_sub_rev
369     __kmpc_atomic_fixed8_swp
370     __kmpc_atomic_fixed8_wr
371     __kmpc_atomic_fixed8_xor
372     __kmpc_atomic_fixed8_xor_cpt
373     __kmpc_atomic_fixed8u_add_fp
374     __kmpc_atomic_fixed8u_sub_fp
375     __kmpc_atomic_fixed8u_mul_fp
376     __kmpc_atomic_fixed8u_div
377     __kmpc_atomic_fixed8u_div_cpt
378     __kmpc_atomic_fixed8u_div_cpt_rev
379     __kmpc_atomic_fixed8u_div_fp
380     __kmpc_atomic_fixed8u_div_rev
381     __kmpc_atomic_fixed8u_shr
382     __kmpc_atomic_fixed8u_shr_cpt
383     __kmpc_atomic_fixed8u_shr_cpt_rev
384     __kmpc_atomic_fixed8u_shr_rev
385 @endcode
386 
387 Functions for floating point
388 ----------------------------
389 There are versions here for floating point numbers of size 4, 8, 10 and 16
390 bytes. (Ten byte floats are used by X87, but are now rare).
391 @code
392     __kmpc_atomic_float4_add
393     __kmpc_atomic_float4_add_cpt
394     __kmpc_atomic_float4_add_float8
395     __kmpc_atomic_float4_add_fp
396     __kmpc_atomic_float4_div
397     __kmpc_atomic_float4_div_cpt
398     __kmpc_atomic_float4_div_cpt_rev
399     __kmpc_atomic_float4_div_float8
400     __kmpc_atomic_float4_div_fp
401     __kmpc_atomic_float4_div_rev
402     __kmpc_atomic_float4_max
403     __kmpc_atomic_float4_max_cpt
404     __kmpc_atomic_float4_min
405     __kmpc_atomic_float4_min_cpt
406     __kmpc_atomic_float4_mul
407     __kmpc_atomic_float4_mul_cpt
408     __kmpc_atomic_float4_mul_float8
409     __kmpc_atomic_float4_mul_fp
410     __kmpc_atomic_float4_rd
411     __kmpc_atomic_float4_sub
412     __kmpc_atomic_float4_sub_cpt
413     __kmpc_atomic_float4_sub_cpt_rev
414     __kmpc_atomic_float4_sub_float8
415     __kmpc_atomic_float4_sub_fp
416     __kmpc_atomic_float4_sub_rev
417     __kmpc_atomic_float4_swp
418     __kmpc_atomic_float4_wr
419     __kmpc_atomic_float8_add
420     __kmpc_atomic_float8_add_cpt
421     __kmpc_atomic_float8_add_fp
422     __kmpc_atomic_float8_div
423     __kmpc_atomic_float8_div_cpt
424     __kmpc_atomic_float8_div_cpt_rev
425     __kmpc_atomic_float8_div_fp
426     __kmpc_atomic_float8_div_rev
427     __kmpc_atomic_float8_max
428     __kmpc_atomic_float8_max_cpt
429     __kmpc_atomic_float8_min
430     __kmpc_atomic_float8_min_cpt
431     __kmpc_atomic_float8_mul
432     __kmpc_atomic_float8_mul_cpt
433     __kmpc_atomic_float8_mul_fp
434     __kmpc_atomic_float8_rd
435     __kmpc_atomic_float8_sub
436     __kmpc_atomic_float8_sub_cpt
437     __kmpc_atomic_float8_sub_cpt_rev
438     __kmpc_atomic_float8_sub_fp
439     __kmpc_atomic_float8_sub_rev
440     __kmpc_atomic_float8_swp
441     __kmpc_atomic_float8_wr
442     __kmpc_atomic_float10_add
443     __kmpc_atomic_float10_add_cpt
444     __kmpc_atomic_float10_add_fp
445     __kmpc_atomic_float10_div
446     __kmpc_atomic_float10_div_cpt
447     __kmpc_atomic_float10_div_cpt_rev
448     __kmpc_atomic_float10_div_fp
449     __kmpc_atomic_float10_div_rev
450     __kmpc_atomic_float10_mul
451     __kmpc_atomic_float10_mul_cpt
452     __kmpc_atomic_float10_mul_fp
453     __kmpc_atomic_float10_rd
454     __kmpc_atomic_float10_sub
455     __kmpc_atomic_float10_sub_cpt
456     __kmpc_atomic_float10_sub_cpt_rev
457     __kmpc_atomic_float10_sub_fp
458     __kmpc_atomic_float10_sub_rev
459     __kmpc_atomic_float10_swp
460     __kmpc_atomic_float10_wr
461     __kmpc_atomic_float16_add
462     __kmpc_atomic_float16_add_cpt
463     __kmpc_atomic_float16_div
464     __kmpc_atomic_float16_div_cpt
465     __kmpc_atomic_float16_div_cpt_rev
466     __kmpc_atomic_float16_div_rev
467     __kmpc_atomic_float16_max
468     __kmpc_atomic_float16_max_cpt
469     __kmpc_atomic_float16_min
470     __kmpc_atomic_float16_min_cpt
471     __kmpc_atomic_float16_mul
472     __kmpc_atomic_float16_mul_cpt
473     __kmpc_atomic_float16_rd
474     __kmpc_atomic_float16_sub
475     __kmpc_atomic_float16_sub_cpt
476     __kmpc_atomic_float16_sub_cpt_rev
477     __kmpc_atomic_float16_sub_rev
478     __kmpc_atomic_float16_swp
479     __kmpc_atomic_float16_wr
480 @endcode
481 
482 Functions for Complex types
483 ---------------------------
484 Functions for complex types whose component floating point variables are of size
485 4,8,10 or 16 bytes. The names here are based on the size of the component float,
486 *not* the size of the complex type. So `__kmpc_atomc_cmplx8_add` is an operation
487 on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
488 
489 @code
490     __kmpc_atomic_cmplx4_add
491     __kmpc_atomic_cmplx4_add_cmplx8
492     __kmpc_atomic_cmplx4_add_cpt
493     __kmpc_atomic_cmplx4_div
494     __kmpc_atomic_cmplx4_div_cmplx8
495     __kmpc_atomic_cmplx4_div_cpt
496     __kmpc_atomic_cmplx4_div_cpt_rev
497     __kmpc_atomic_cmplx4_div_rev
498     __kmpc_atomic_cmplx4_mul
499     __kmpc_atomic_cmplx4_mul_cmplx8
500     __kmpc_atomic_cmplx4_mul_cpt
501     __kmpc_atomic_cmplx4_rd
502     __kmpc_atomic_cmplx4_sub
503     __kmpc_atomic_cmplx4_sub_cmplx8
504     __kmpc_atomic_cmplx4_sub_cpt
505     __kmpc_atomic_cmplx4_sub_cpt_rev
506     __kmpc_atomic_cmplx4_sub_rev
507     __kmpc_atomic_cmplx4_swp
508     __kmpc_atomic_cmplx4_wr
509     __kmpc_atomic_cmplx8_add
510     __kmpc_atomic_cmplx8_add_cpt
511     __kmpc_atomic_cmplx8_div
512     __kmpc_atomic_cmplx8_div_cpt
513     __kmpc_atomic_cmplx8_div_cpt_rev
514     __kmpc_atomic_cmplx8_div_rev
515     __kmpc_atomic_cmplx8_mul
516     __kmpc_atomic_cmplx8_mul_cpt
517     __kmpc_atomic_cmplx8_rd
518     __kmpc_atomic_cmplx8_sub
519     __kmpc_atomic_cmplx8_sub_cpt
520     __kmpc_atomic_cmplx8_sub_cpt_rev
521     __kmpc_atomic_cmplx8_sub_rev
522     __kmpc_atomic_cmplx8_swp
523     __kmpc_atomic_cmplx8_wr
524     __kmpc_atomic_cmplx10_add
525     __kmpc_atomic_cmplx10_add_cpt
526     __kmpc_atomic_cmplx10_div
527     __kmpc_atomic_cmplx10_div_cpt
528     __kmpc_atomic_cmplx10_div_cpt_rev
529     __kmpc_atomic_cmplx10_div_rev
530     __kmpc_atomic_cmplx10_mul
531     __kmpc_atomic_cmplx10_mul_cpt
532     __kmpc_atomic_cmplx10_rd
533     __kmpc_atomic_cmplx10_sub
534     __kmpc_atomic_cmplx10_sub_cpt
535     __kmpc_atomic_cmplx10_sub_cpt_rev
536     __kmpc_atomic_cmplx10_sub_rev
537     __kmpc_atomic_cmplx10_swp
538     __kmpc_atomic_cmplx10_wr
539     __kmpc_atomic_cmplx16_add
540     __kmpc_atomic_cmplx16_add_cpt
541     __kmpc_atomic_cmplx16_div
542     __kmpc_atomic_cmplx16_div_cpt
543     __kmpc_atomic_cmplx16_div_cpt_rev
544     __kmpc_atomic_cmplx16_div_rev
545     __kmpc_atomic_cmplx16_mul
546     __kmpc_atomic_cmplx16_mul_cpt
547     __kmpc_atomic_cmplx16_rd
548     __kmpc_atomic_cmplx16_sub
549     __kmpc_atomic_cmplx16_sub_cpt
550     __kmpc_atomic_cmplx16_sub_cpt_rev
551     __kmpc_atomic_cmplx16_swp
552     __kmpc_atomic_cmplx16_wr
553 @endcode
554 */
555 
556 /*!
557 @ingroup ATOMIC_OPS
558 @{
559 */
560 
561 /*
562  * Global vars
563  */
564 
565 #ifndef KMP_GOMP_COMPAT
566 int __kmp_atomic_mode = 1; // Intel perf
567 #else
568 int __kmp_atomic_mode = 2; // GOMP compatibility
569 #endif /* KMP_GOMP_COMPAT */
570 
571 KMP_ALIGN(128)
572 
573 // Control access to all user coded atomics in Gnu compat mode
574 kmp_atomic_lock_t __kmp_atomic_lock;
575 // Control access to all user coded atomics for 1-byte fixed data types
576 kmp_atomic_lock_t __kmp_atomic_lock_1i;
577 // Control access to all user coded atomics for 2-byte fixed data types
578 kmp_atomic_lock_t __kmp_atomic_lock_2i;
579 // Control access to all user coded atomics for 4-byte fixed data types
580 kmp_atomic_lock_t __kmp_atomic_lock_4i;
581 // Control access to all user coded atomics for kmp_real32 data type
582 kmp_atomic_lock_t __kmp_atomic_lock_4r;
583 // Control access to all user coded atomics for 8-byte fixed data types
584 kmp_atomic_lock_t __kmp_atomic_lock_8i;
585 // Control access to all user coded atomics for kmp_real64 data type
586 kmp_atomic_lock_t __kmp_atomic_lock_8r;
587 // Control access to all user coded atomics for complex byte data type
588 kmp_atomic_lock_t __kmp_atomic_lock_8c;
589 // Control access to all user coded atomics for long double data type
590 kmp_atomic_lock_t __kmp_atomic_lock_10r;
591 // Control access to all user coded atomics for _Quad data type
592 kmp_atomic_lock_t __kmp_atomic_lock_16r;
593 // Control access to all user coded atomics for double complex data type
594 kmp_atomic_lock_t __kmp_atomic_lock_16c;
595 // Control access to all user coded atomics for long double complex type
596 kmp_atomic_lock_t __kmp_atomic_lock_20c;
597 // Control access to all user coded atomics for _Quad complex data type
598 kmp_atomic_lock_t __kmp_atomic_lock_32c;
599 
600 /* 2007-03-02:
601    Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602    on *_32 and *_32e. This is just a temporary workaround for the problem. It
603    seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604    in assembler language. */
605 #define KMP_ATOMIC_VOLATILE volatile
606 
607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608 
609 static inline void operator+=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610   lhs.q += rhs.q;
611 }
612 static inline void operator-=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613   lhs.q -= rhs.q;
614 }
615 static inline void operator*=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616   lhs.q *= rhs.q;
617 }
618 static inline void operator/=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619   lhs.q /= rhs.q;
620 }
621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622   return lhs.q < rhs.q;
623 }
624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625   return lhs.q > rhs.q;
626 }
627 
628 static inline void operator+=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629   lhs.q += rhs.q;
630 }
631 static inline void operator-=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632   lhs.q -= rhs.q;
633 }
634 static inline void operator*=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635   lhs.q *= rhs.q;
636 }
637 static inline void operator/=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638   lhs.q /= rhs.q;
639 }
640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641   return lhs.q < rhs.q;
642 }
643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644   return lhs.q > rhs.q;
645 }
646 
647 static inline void operator+=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
648   lhs.q += rhs.q;
649 }
650 static inline void operator-=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
651   lhs.q -= rhs.q;
652 }
653 static inline void operator*=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
654   lhs.q *= rhs.q;
655 }
656 static inline void operator/=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
657   lhs.q /= rhs.q;
658 }
659 
660 static inline void operator+=(kmp_cmplx128_a16_t &lhs,
661                               kmp_cmplx128_a16_t &rhs) {
662   lhs.q += rhs.q;
663 }
664 static inline void operator-=(kmp_cmplx128_a16_t &lhs,
665                               kmp_cmplx128_a16_t &rhs) {
666   lhs.q -= rhs.q;
667 }
668 static inline void operator*=(kmp_cmplx128_a16_t &lhs,
669                               kmp_cmplx128_a16_t &rhs) {
670   lhs.q *= rhs.q;
671 }
672 static inline void operator/=(kmp_cmplx128_a16_t &lhs,
673                               kmp_cmplx128_a16_t &rhs) {
674   lhs.q /= rhs.q;
675 }
676 
677 #endif
678 
679 // ATOMIC implementation routines -----------------------------------------
680 // One routine for each operation and operand type.
681 // All routines declarations looks like
682 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
683 
684 #define KMP_CHECK_GTID                                                         \
685   if (gtid == KMP_GTID_UNKNOWN) {                                              \
686     gtid = __kmp_entry_gtid();                                                 \
687   } // check and get gtid when needed
688 
689 // Beginning of a definition (provides name, parameters, gebug trace)
690 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
691 //     fixed)
692 //     OP_ID   - operation identifier (add, sub, mul, ...)
693 //     TYPE    - operands' type
694 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE)                           \
695   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
696                                              TYPE *lhs, TYPE rhs) {            \
697     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
698     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
699 
700 // ------------------------------------------------------------------------
701 // Lock variables used for critical sections for various size operands
702 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
703 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
704 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
705 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
706 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
707 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
708 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
709 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
710 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
711 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
712 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
713 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
714 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
715 
716 // ------------------------------------------------------------------------
717 // Operation on *lhs, rhs bound by critical section
718 //     OP     - operator (it's supposed to contain an assignment)
719 //     LCK_ID - lock identifier
720 // Note: don't check gtid as it should always be valid
721 // 1, 2-byte - expect valid parameter, other - check before this macro
722 #define OP_CRITICAL(OP, LCK_ID)                                                \
723   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
724                                                                                \
725   (*lhs) OP(rhs);                                                              \
726                                                                                \
727   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
728 
729 // ------------------------------------------------------------------------
730 // For GNU compatibility, we may need to use a critical section,
731 // even though it is not required by the ISA.
732 //
733 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
734 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
735 // critical section.  On Intel(R) 64, all atomic operations are done with fetch
736 // and add or compare and exchange.  Therefore, the FLAG parameter to this
737 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
738 // require a critical section, where we predict that they will be implemented
739 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
740 //
741 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
742 // the FLAG parameter should always be 1.  If we know that we will be using
743 // a critical section, then we want to make certain that we use the generic
744 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
745 // locks that are specialized based upon the size or type of the data.
746 //
747 // If FLAG is 0, then we are relying on dead code elimination by the build
748 // compiler to get rid of the useless block of code, and save a needless
749 // branch at runtime.
750 
751 #ifdef KMP_GOMP_COMPAT
752 #define OP_GOMP_CRITICAL(OP, FLAG)                                             \
753   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
754     KMP_CHECK_GTID;                                                            \
755     OP_CRITICAL(OP, 0);                                                        \
756     return;                                                                    \
757   }
758 #else
759 #define OP_GOMP_CRITICAL(OP, FLAG)
760 #endif /* KMP_GOMP_COMPAT */
761 
762 #if KMP_MIC
763 #define KMP_DO_PAUSE _mm_delay_32(1)
764 #else
765 #define KMP_DO_PAUSE KMP_CPU_PAUSE()
766 #endif /* KMP_MIC */
767 
768 // ------------------------------------------------------------------------
769 // Operation on *lhs, rhs using "compare_and_store" routine
770 //     TYPE    - operands' type
771 //     BITS    - size in bits, used to distinguish low level calls
772 //     OP      - operator
773 #define OP_CMPXCHG(TYPE, BITS, OP)                                             \
774   {                                                                            \
775     TYPE old_value, new_value;                                                 \
776     old_value = *(TYPE volatile *)lhs;                                         \
777     new_value = old_value OP rhs;                                              \
778     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
779         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
780         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
781       KMP_DO_PAUSE;                                                            \
782                                                                                \
783       old_value = *(TYPE volatile *)lhs;                                       \
784       new_value = old_value OP rhs;                                            \
785     }                                                                          \
786   }
787 
788 #if USE_CMPXCHG_FIX
789 // 2007-06-25:
790 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
791 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
792 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
793 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
794 // the workaround.
795 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                  \
796   {                                                                            \
797     struct _sss {                                                              \
798       TYPE cmp;                                                                \
799       kmp_int##BITS *vvv;                                                      \
800     };                                                                         \
801     struct _sss old_value, new_value;                                          \
802     old_value.vvv = (kmp_int##BITS *)&old_value.cmp;                           \
803     new_value.vvv = (kmp_int##BITS *)&new_value.cmp;                           \
804     *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                           \
805     new_value.cmp = old_value.cmp OP rhs;                                      \
806     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
807         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv,   \
808         *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) {                      \
809       KMP_DO_PAUSE;                                                            \
810                                                                                \
811       *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                         \
812       new_value.cmp = old_value.cmp OP rhs;                                    \
813     }                                                                          \
814   }
815 // end of the first part of the workaround for C78287
816 #endif // USE_CMPXCHG_FIX
817 
818 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
819 
820 // ------------------------------------------------------------------------
821 // X86 or X86_64: no alignment problems ====================================
822 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
823                          GOMP_FLAG)                                            \
824   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
825   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
826   /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
827   KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                        \
828   }
829 // -------------------------------------------------------------------------
830 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
831                        GOMP_FLAG)                                              \
832   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
833   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
834   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
835   }
836 #if USE_CMPXCHG_FIX
837 // -------------------------------------------------------------------------
838 // workaround for C78287 (complex(kind=4) data type)
839 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
840                                   MASK, GOMP_FLAG)                             \
841   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
842   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
843   OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
844   }
845 // end of the second part of the workaround for C78287
846 #endif
847 
848 #else
849 // -------------------------------------------------------------------------
850 // Code for other architectures that don't handle unaligned accesses.
851 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
852                          GOMP_FLAG)                                            \
853   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
854   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
855   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
856     /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */          \
857     KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                      \
858   } else {                                                                     \
859     KMP_CHECK_GTID;                                                            \
860     OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
861   }                                                                            \
862   }
863 // -------------------------------------------------------------------------
864 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
865                        GOMP_FLAG)                                              \
866   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
867   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
868   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
869     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
870   } else {                                                                     \
871     KMP_CHECK_GTID;                                                            \
872     OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
873   }                                                                            \
874   }
875 #if USE_CMPXCHG_FIX
876 // -------------------------------------------------------------------------
877 // workaround for C78287 (complex(kind=4) data type)
878 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
879                                   MASK, GOMP_FLAG)                             \
880   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
881   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
882   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
883     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
884   } else {                                                                     \
885     KMP_CHECK_GTID;                                                            \
886     OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
887   }                                                                            \
888   }
889 // end of the second part of the workaround for C78287
890 #endif // USE_CMPXCHG_FIX
891 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
892 
893 // Routines for ATOMIC 4-byte operands addition and subtraction
894 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
895                  0) // __kmpc_atomic_fixed4_add
896 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
897                  0) // __kmpc_atomic_fixed4_sub
898 
899 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
900                KMP_ARCH_X86) // __kmpc_atomic_float4_add
901 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
902                KMP_ARCH_X86) // __kmpc_atomic_float4_sub
903 
904 // Routines for ATOMIC 8-byte operands addition and subtraction
905 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
906                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
907 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
908                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
909 
910 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
911                KMP_ARCH_X86) // __kmpc_atomic_float8_add
912 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
913                KMP_ARCH_X86) // __kmpc_atomic_float8_sub
914 
915 // ------------------------------------------------------------------------
916 // Entries definition for integer operands
917 //     TYPE_ID - operands type and size (fixed4, float4)
918 //     OP_ID   - operation identifier (add, sub, mul, ...)
919 //     TYPE    - operand type
920 //     BITS    - size in bits, used to distinguish low level calls
921 //     OP      - operator (used in critical section)
922 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
923 //     MASK    - used for alignment check
924 
925 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,MASK,GOMP_FLAG
926 // ------------------------------------------------------------------------
927 // Routines for ATOMIC integer operands, other operators
928 // ------------------------------------------------------------------------
929 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
930 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
931                KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
932 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
933                0) // __kmpc_atomic_fixed1_andb
934 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
935                KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
936 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
937                KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
938 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
939                KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
940 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
941                0) // __kmpc_atomic_fixed1_orb
942 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
943                KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
944 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
945                KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
946 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
947                KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
948 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
949                KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
950 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
951                0) // __kmpc_atomic_fixed1_xor
952 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
953                KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
954 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
955                0) // __kmpc_atomic_fixed2_andb
956 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
957                KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
958 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
959                KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
960 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
961                KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
962 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
963                0) // __kmpc_atomic_fixed2_orb
964 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
965                KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
966 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
967                KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
968 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
969                KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
970 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
971                KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
972 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
973                0) // __kmpc_atomic_fixed2_xor
974 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
975                0) // __kmpc_atomic_fixed4_andb
976 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
977                KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
978 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
979                KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
980 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
981                KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
982 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
983                0) // __kmpc_atomic_fixed4_orb
984 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
985                KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
986 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
987                KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
988 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
989                KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
990 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
991                0) // __kmpc_atomic_fixed4_xor
992 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
993                KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
994 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
995                KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
996 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
997                KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
998 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
999                KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1000 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1001                KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1002 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1003                KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1004 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1005                KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1006 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1007                KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1008 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1009                KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1010 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1011                KMP_ARCH_X86) // __kmpc_atomic_float4_div
1012 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1013                KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1014 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1015                KMP_ARCH_X86) // __kmpc_atomic_float8_div
1016 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1017                KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1018 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
1019 
1020 /* ------------------------------------------------------------------------ */
1021 /* Routines for C/C++ Reduction operators && and ||                         */
1022 
1023 // ------------------------------------------------------------------------
1024 // Need separate macros for &&, || because there is no combined assignment
1025 //   TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1026 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)             \
1027   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1028   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1029   OP_CRITICAL(= *lhs OP, LCK_ID)                                               \
1030   }
1031 
1032 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1033 
1034 // ------------------------------------------------------------------------
1035 // X86 or X86_64: no alignment problems ===================================
1036 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1037   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1038   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1039   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1040   }
1041 
1042 #else
1043 // ------------------------------------------------------------------------
1044 // Code for other architectures that don't handle unaligned accesses.
1045 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1046   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1047   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1048   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1049     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1050   } else {                                                                     \
1051     KMP_CHECK_GTID;                                                            \
1052     OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */              \
1053   }                                                                            \
1054   }
1055 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1056 
1057 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1058               KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1059 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1060               KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1061 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1062               KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1063 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1064               KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1065 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1066               0) // __kmpc_atomic_fixed4_andl
1067 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1068               0) // __kmpc_atomic_fixed4_orl
1069 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1070               KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1071 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1072               KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1073 
1074 /* ------------------------------------------------------------------------- */
1075 /* Routines for Fortran operators that matched no one in C:                  */
1076 /* MAX, MIN, .EQV., .NEQV.                                                   */
1077 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}           */
1078 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}  */
1079 
1080 // -------------------------------------------------------------------------
1081 // MIN and MAX need separate macros
1082 // OP - operator to check if we need any actions?
1083 #define MIN_MAX_CRITSECT(OP, LCK_ID)                                           \
1084   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1085                                                                                \
1086   if (*lhs OP rhs) { /* still need actions? */                                 \
1087     *lhs = rhs;                                                                \
1088   }                                                                            \
1089   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1090 
1091 // -------------------------------------------------------------------------
1092 #ifdef KMP_GOMP_COMPAT
1093 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)                                        \
1094   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1095     KMP_CHECK_GTID;                                                            \
1096     MIN_MAX_CRITSECT(OP, 0);                                                   \
1097     return;                                                                    \
1098   }
1099 #else
1100 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1101 #endif /* KMP_GOMP_COMPAT */
1102 
1103 // -------------------------------------------------------------------------
1104 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                        \
1105   {                                                                            \
1106     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1107     TYPE old_value;                                                            \
1108     temp_val = *lhs;                                                           \
1109     old_value = temp_val;                                                      \
1110     while (old_value OP rhs && /* still need actions? */                       \
1111            !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1112                (kmp_int##BITS *)lhs,                                           \
1113                *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
1114                *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
1115       KMP_CPU_PAUSE();                                                         \
1116       temp_val = *lhs;                                                         \
1117       old_value = temp_val;                                                    \
1118     }                                                                          \
1119   }
1120 
1121 // -------------------------------------------------------------------------
1122 // 1-byte, 2-byte operands - use critical section
1123 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)          \
1124   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1125   if (*lhs OP rhs) { /* need actions? */                                       \
1126     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1127     MIN_MAX_CRITSECT(OP, LCK_ID)                                               \
1128   }                                                                            \
1129   }
1130 
1131 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1132 
1133 // -------------------------------------------------------------------------
1134 // X86 or X86_64: no alignment problems ====================================
1135 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1136                          GOMP_FLAG)                                            \
1137   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1138   if (*lhs OP rhs) {                                                           \
1139     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1140     MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                            \
1141   }                                                                            \
1142   }
1143 
1144 #else
1145 // -------------------------------------------------------------------------
1146 // Code for other architectures that don't handle unaligned accesses.
1147 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1148                          GOMP_FLAG)                                            \
1149   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1150   if (*lhs OP rhs) {                                                           \
1151     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1152     if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                    \
1153       MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */                    \
1154     } else {                                                                   \
1155       KMP_CHECK_GTID;                                                          \
1156       MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */                     \
1157     }                                                                          \
1158   }                                                                            \
1159   }
1160 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1161 
1162 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1163                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1164 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1165                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1166 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1167                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1168 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1169                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1170 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1171                  0) // __kmpc_atomic_fixed4_max
1172 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1173                  0) // __kmpc_atomic_fixed4_min
1174 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1175                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1176 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1177                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1178 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1179                  KMP_ARCH_X86) // __kmpc_atomic_float4_max
1180 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1181                  KMP_ARCH_X86) // __kmpc_atomic_float4_min
1182 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1183                  KMP_ARCH_X86) // __kmpc_atomic_float8_max
1184 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1185                  KMP_ARCH_X86) // __kmpc_atomic_float8_min
1186 #if KMP_HAVE_QUAD
1187 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1188                  1) // __kmpc_atomic_float16_max
1189 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1190                  1) // __kmpc_atomic_float16_min
1191 #if (KMP_ARCH_X86)
1192 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1193                  1) // __kmpc_atomic_float16_max_a16
1194 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1195                  1) // __kmpc_atomic_float16_min_a16
1196 #endif
1197 #endif
1198 // ------------------------------------------------------------------------
1199 // Need separate macros for .EQV. because of the need of complement (~)
1200 // OP ignored for critical sections, ^=~ used instead
1201 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1202   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1203   OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */                      \
1204   OP_CRITICAL(^= ~, LCK_ID) /* send assignment and complement */               \
1205   }
1206 
1207 // ------------------------------------------------------------------------
1208 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1209 // ------------------------------------------------------------------------
1210 // X86 or X86_64: no alignment problems ===================================
1211 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1212                         GOMP_FLAG)                                             \
1213   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1214   OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */                      \
1215   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1216   }
1217 // ------------------------------------------------------------------------
1218 #else
1219 // ------------------------------------------------------------------------
1220 // Code for other architectures that don't handle unaligned accesses.
1221 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1222                         GOMP_FLAG)                                             \
1223   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1224   OP_GOMP_CRITICAL(^= ~, GOMP_FLAG)                                            \
1225   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1226     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1227   } else {                                                                     \
1228     KMP_CHECK_GTID;                                                            \
1229     OP_CRITICAL(^= ~, LCK_ID) /* unaligned address - use critical */           \
1230   }                                                                            \
1231   }
1232 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1233 
1234 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1235                KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1236 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1237                KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1238 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1239                KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1240 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1241                KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1242 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1243                 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1244 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1245                 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1246 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1247                 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1248 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1249                 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1250 
1251 // ------------------------------------------------------------------------
1252 // Routines for Extended types: long double, _Quad, complex flavours (use
1253 // critical section)
1254 //     TYPE_ID, OP_ID, TYPE - detailed above
1255 //     OP      - operator
1256 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1257 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1258   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1259   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */                    \
1260   OP_CRITICAL(OP## =, LCK_ID) /* send assignment */                            \
1261   }
1262 
1263 /* ------------------------------------------------------------------------- */
1264 // routines for long double type
1265 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1266                 1) // __kmpc_atomic_float10_add
1267 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1268                 1) // __kmpc_atomic_float10_sub
1269 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1270                 1) // __kmpc_atomic_float10_mul
1271 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1272                 1) // __kmpc_atomic_float10_div
1273 #if KMP_HAVE_QUAD
1274 // routines for _Quad type
1275 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1276                 1) // __kmpc_atomic_float16_add
1277 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1278                 1) // __kmpc_atomic_float16_sub
1279 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1280                 1) // __kmpc_atomic_float16_mul
1281 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1282                 1) // __kmpc_atomic_float16_div
1283 #if (KMP_ARCH_X86)
1284 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1285                 1) // __kmpc_atomic_float16_add_a16
1286 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1287                 1) // __kmpc_atomic_float16_sub_a16
1288 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1289                 1) // __kmpc_atomic_float16_mul_a16
1290 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1291                 1) // __kmpc_atomic_float16_div_a16
1292 #endif
1293 #endif
1294 // routines for complex types
1295 
1296 #if USE_CMPXCHG_FIX
1297 // workaround for C78287 (complex(kind=4) data type)
1298 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1299                           1) // __kmpc_atomic_cmplx4_add
1300 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1301                           1) // __kmpc_atomic_cmplx4_sub
1302 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1303                           1) // __kmpc_atomic_cmplx4_mul
1304 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1305                           1) // __kmpc_atomic_cmplx4_div
1306 // end of the workaround for C78287
1307 #else
1308 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1309 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1310 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1311 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1312 #endif // USE_CMPXCHG_FIX
1313 
1314 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1315 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1316 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1317 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1318 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1319                 1) // __kmpc_atomic_cmplx10_add
1320 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1321                 1) // __kmpc_atomic_cmplx10_sub
1322 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1323                 1) // __kmpc_atomic_cmplx10_mul
1324 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1325                 1) // __kmpc_atomic_cmplx10_div
1326 #if KMP_HAVE_QUAD
1327 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1328                 1) // __kmpc_atomic_cmplx16_add
1329 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1330                 1) // __kmpc_atomic_cmplx16_sub
1331 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1332                 1) // __kmpc_atomic_cmplx16_mul
1333 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1334                 1) // __kmpc_atomic_cmplx16_div
1335 #if (KMP_ARCH_X86)
1336 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1337                 1) // __kmpc_atomic_cmplx16_add_a16
1338 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1339                 1) // __kmpc_atomic_cmplx16_sub_a16
1340 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1341                 1) // __kmpc_atomic_cmplx16_mul_a16
1342 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1343                 1) // __kmpc_atomic_cmplx16_div_a16
1344 #endif
1345 #endif
1346 
1347 #if OMP_40_ENABLED
1348 
1349 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1350 // Supported only on IA-32 architecture and Intel(R) 64
1351 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1352 
1353 // ------------------------------------------------------------------------
1354 // Operation on *lhs, rhs bound by critical section
1355 //     OP     - operator (it's supposed to contain an assignment)
1356 //     LCK_ID - lock identifier
1357 // Note: don't check gtid as it should always be valid
1358 // 1, 2-byte - expect valid parameter, other - check before this macro
1359 #define OP_CRITICAL_REV(OP, LCK_ID)                                            \
1360   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1361                                                                                \
1362   (*lhs) = (rhs)OP(*lhs);                                                      \
1363                                                                                \
1364   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1365 
1366 #ifdef KMP_GOMP_COMPAT
1367 #define OP_GOMP_CRITICAL_REV(OP, FLAG)                                         \
1368   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1369     KMP_CHECK_GTID;                                                            \
1370     OP_CRITICAL_REV(OP, 0);                                                    \
1371     return;                                                                    \
1372   }
1373 #else
1374 #define OP_GOMP_CRITICAL_REV(OP, FLAG)
1375 #endif /* KMP_GOMP_COMPAT */
1376 
1377 // Beginning of a definition (provides name, parameters, gebug trace)
1378 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1379 //     fixed)
1380 //     OP_ID   - operation identifier (add, sub, mul, ...)
1381 //     TYPE    - operands' type
1382 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
1383   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid,  \
1384                                                    TYPE *lhs, TYPE rhs) {      \
1385     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1386     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1387 
1388 // ------------------------------------------------------------------------
1389 // Operation on *lhs, rhs using "compare_and_store" routine
1390 //     TYPE    - operands' type
1391 //     BITS    - size in bits, used to distinguish low level calls
1392 //     OP      - operator
1393 // Note: temp_val introduced in order to force the compiler to read
1394 //       *lhs only once (w/o it the compiler reads *lhs twice)
1395 #define OP_CMPXCHG_REV(TYPE, BITS, OP)                                         \
1396   {                                                                            \
1397     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1398     TYPE old_value, new_value;                                                 \
1399     temp_val = *lhs;                                                           \
1400     old_value = temp_val;                                                      \
1401     new_value = rhs OP old_value;                                              \
1402     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1403         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
1404         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
1405       KMP_DO_PAUSE;                                                            \
1406                                                                                \
1407       temp_val = *lhs;                                                         \
1408       old_value = temp_val;                                                    \
1409       new_value = rhs OP old_value;                                            \
1410     }                                                                          \
1411   }
1412 
1413 // -------------------------------------------------------------------------
1414 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG)  \
1415   ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1416   OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG)                                          \
1417   OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1418   }
1419 
1420 // ------------------------------------------------------------------------
1421 // Entries definition for integer operands
1422 //     TYPE_ID - operands type and size (fixed4, float4)
1423 //     OP_ID   - operation identifier (add, sub, mul, ...)
1424 //     TYPE    - operand type
1425 //     BITS    - size in bits, used to distinguish low level calls
1426 //     OP      - operator (used in critical section)
1427 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1428 
1429 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,GOMP_FLAG
1430 // ------------------------------------------------------------------------
1431 // Routines for ATOMIC integer operands, other operators
1432 // ------------------------------------------------------------------------
1433 //                  TYPE_ID,OP_ID, TYPE,    BITS, OP, LCK_ID, GOMP_FLAG
1434 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1435                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1436 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1437                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1438 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1439                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1440 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1441                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1442 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1443                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1444 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1445                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1446 
1447 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1448                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1449 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1450                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1451 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1452                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1453 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1454                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1455 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1456                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1457 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1458                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1459 
1460 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1461                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1462 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1463                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1464 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1465                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1466 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1467                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1468 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1469                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1470 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1471                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1472 
1473 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1474                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1475 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1476                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1477 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1478                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1479 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1480                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1481 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1482                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1483 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1484                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1485 
1486 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1487                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1488 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1489                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1490 
1491 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1492                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1493 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1494                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1495 //                  TYPE_ID,OP_ID, TYPE,     BITS,OP,LCK_ID, GOMP_FLAG
1496 
1497 // ------------------------------------------------------------------------
1498 // Routines for Extended types: long double, _Quad, complex flavours (use
1499 // critical section)
1500 //     TYPE_ID, OP_ID, TYPE - detailed above
1501 //     OP      - operator
1502 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1503 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
1504   ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1505   OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG)                                          \
1506   OP_CRITICAL_REV(OP, LCK_ID)                                                  \
1507   }
1508 
1509 /* ------------------------------------------------------------------------- */
1510 // routines for long double type
1511 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1512                     1) // __kmpc_atomic_float10_sub_rev
1513 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1514                     1) // __kmpc_atomic_float10_div_rev
1515 #if KMP_HAVE_QUAD
1516 // routines for _Quad type
1517 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1518                     1) // __kmpc_atomic_float16_sub_rev
1519 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1520                     1) // __kmpc_atomic_float16_div_rev
1521 #if (KMP_ARCH_X86)
1522 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1523                     1) // __kmpc_atomic_float16_sub_a16_rev
1524 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1525                     1) // __kmpc_atomic_float16_div_a16_rev
1526 #endif
1527 #endif
1528 
1529 // routines for complex types
1530 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1531                     1) // __kmpc_atomic_cmplx4_sub_rev
1532 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1533                     1) // __kmpc_atomic_cmplx4_div_rev
1534 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1535                     1) // __kmpc_atomic_cmplx8_sub_rev
1536 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1537                     1) // __kmpc_atomic_cmplx8_div_rev
1538 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1539                     1) // __kmpc_atomic_cmplx10_sub_rev
1540 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1541                     1) // __kmpc_atomic_cmplx10_div_rev
1542 #if KMP_HAVE_QUAD
1543 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1544                     1) // __kmpc_atomic_cmplx16_sub_rev
1545 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1546                     1) // __kmpc_atomic_cmplx16_div_rev
1547 #if (KMP_ARCH_X86)
1548 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1549                     1) // __kmpc_atomic_cmplx16_sub_a16_rev
1550 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1551                     1) // __kmpc_atomic_cmplx16_div_a16_rev
1552 #endif
1553 #endif
1554 
1555 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1556 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1557 
1558 #endif // OMP_40_ENABLED
1559 
1560 /* ------------------------------------------------------------------------ */
1561 /* Routines for mixed types of LHS and RHS, when RHS is "larger"            */
1562 /* Note: in order to reduce the total number of types combinations          */
1563 /*       it is supposed that compiler converts RHS to longest floating type,*/
1564 /*       that is _Quad, before call to any of these routines                */
1565 /* Conversion to _Quad will be done by the compiler during calculation,     */
1566 /*    conversion back to TYPE - before the assignment, like:                */
1567 /*    *lhs = (TYPE)( (_Quad)(*lhs) OP rhs )                                 */
1568 /* Performance penalty expected because of SW emulation use                 */
1569 /* ------------------------------------------------------------------------ */
1570 
1571 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                \
1572   void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
1573       ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) {                       \
1574     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1575     KA_TRACE(100,                                                              \
1576              ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
1577               gtid));
1578 
1579 // -------------------------------------------------------------------------
1580 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID,  \
1581                            GOMP_FLAG)                                          \
1582   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1583   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */                    \
1584   OP_CRITICAL(OP## =, LCK_ID) /* send assignment */                            \
1585   }
1586 
1587 // -------------------------------------------------------------------------
1588 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1589 // -------------------------------------------------------------------------
1590 // X86 or X86_64: no alignment problems ====================================
1591 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1592                            LCK_ID, MASK, GOMP_FLAG)                            \
1593   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1594   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1595   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1596   }
1597 // -------------------------------------------------------------------------
1598 #else
1599 // ------------------------------------------------------------------------
1600 // Code for other architectures that don't handle unaligned accesses.
1601 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1602                            LCK_ID, MASK, GOMP_FLAG)                            \
1603   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1604   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1605   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1606     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1607   } else {                                                                     \
1608     KMP_CHECK_GTID;                                                            \
1609     OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
1610   }                                                                            \
1611   }
1612 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1613 
1614 // -------------------------------------------------------------------------
1615 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1616 // -------------------------------------------------------------------------
1617 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
1618                                RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
1619   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1620   OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG)                                          \
1621   OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1622   }
1623 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,      \
1624                                LCK_ID, GOMP_FLAG)                              \
1625   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1626   OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG)                                          \
1627   OP_CRITICAL_REV(OP, LCK_ID)                                                  \
1628   }
1629 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1630 
1631 // RHS=float8
1632 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1633                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1634 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1635                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1636 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1637                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1638 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1639                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1640 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1641                    0) // __kmpc_atomic_fixed4_mul_float8
1642 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1643                    0) // __kmpc_atomic_fixed4_div_float8
1644 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1645                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1646 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1647                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1648 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1649                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1650 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1651                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1652 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1653                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1654 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1655                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1656 
1657 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1658 // use them)
1659 #if KMP_HAVE_QUAD
1660 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1661                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1662 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1663                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1664 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1665                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1666 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1667                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1668 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1669                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1670 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1671                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1672 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1673                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1674 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1675                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1676 
1677 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1678                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1679 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1680                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1681 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1682                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1683 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1684                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1685 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1686                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1687 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1688                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1689 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1690                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1691 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1692                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1693 
1694 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1695                    0) // __kmpc_atomic_fixed4_add_fp
1696 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1697                    0) // __kmpc_atomic_fixed4u_add_fp
1698 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1699                    0) // __kmpc_atomic_fixed4_sub_fp
1700 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1701                    0) // __kmpc_atomic_fixed4u_sub_fp
1702 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1703                    0) // __kmpc_atomic_fixed4_mul_fp
1704 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1705                    0) // __kmpc_atomic_fixed4u_mul_fp
1706 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1707                    0) // __kmpc_atomic_fixed4_div_fp
1708 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1709                    0) // __kmpc_atomic_fixed4u_div_fp
1710 
1711 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1712                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1713 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1714                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1715 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1716                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1717 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1718                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1719 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1720                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1721 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1722                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1723 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1724                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1725 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1726                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1727 
1728 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1729                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1730 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1731                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1732 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1733                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1734 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1735                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1736 
1737 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1738                    KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1739 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1740                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1741 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1742                    KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1743 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1744                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1745 
1746 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1747                    1) // __kmpc_atomic_float10_add_fp
1748 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1749                    1) // __kmpc_atomic_float10_sub_fp
1750 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1751                    1) // __kmpc_atomic_float10_mul_fp
1752 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1753                    1) // __kmpc_atomic_float10_div_fp
1754 
1755 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1756 // Reverse operations
1757 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1758                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1759 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1760                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1761 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1762                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1763 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1764                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1765 
1766 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1767                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1768 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1769                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1770 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1771                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1772 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1773                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1774 
1775 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1776                        0) // __kmpc_atomic_fixed4_sub_rev_fp
1777 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1778                        0) // __kmpc_atomic_fixed4u_sub_rev_fp
1779 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1780                        0) // __kmpc_atomic_fixed4_div_rev_fp
1781 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1782                        0) // __kmpc_atomic_fixed4u_div_rev_fp
1783 
1784 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1785                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1786 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1787                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1788 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1789                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1790 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1791                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1792 
1793 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1794                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1795 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1796                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1797 
1798 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1799                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1800 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1801                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1802 
1803 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1804                        1) // __kmpc_atomic_float10_sub_rev_fp
1805 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1806                        1) // __kmpc_atomic_float10_div_rev_fp
1807 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1808 
1809 #endif
1810 
1811 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1812 // ------------------------------------------------------------------------
1813 // X86 or X86_64: no alignment problems ====================================
1814 #if USE_CMPXCHG_FIX
1815 // workaround for C78287 (complex(kind=4) data type)
1816 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1817                              LCK_ID, MASK, GOMP_FLAG)                          \
1818   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1819   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1820   OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
1821   }
1822 // end of the second part of the workaround for C78287
1823 #else
1824 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1825                              LCK_ID, MASK, GOMP_FLAG)                          \
1826   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1827   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1828   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1829   }
1830 #endif // USE_CMPXCHG_FIX
1831 #else
1832 // ------------------------------------------------------------------------
1833 // Code for other architectures that don't handle unaligned accesses.
1834 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1835                              LCK_ID, MASK, GOMP_FLAG)                          \
1836   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1837   OP_GOMP_CRITICAL(OP## =, GOMP_FLAG)                                          \
1838   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1839     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1840   } else {                                                                     \
1841     KMP_CHECK_GTID;                                                            \
1842     OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */         \
1843   }                                                                            \
1844   }
1845 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1846 
1847 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1848                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1849 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1850                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1851 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1852                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1853 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1854                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1855 
1856 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1857 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1858 
1859 // ------------------------------------------------------------------------
1860 // Atomic READ routines
1861 
1862 // ------------------------------------------------------------------------
1863 // Beginning of a definition (provides name, parameters, gebug trace)
1864 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1865 //     fixed)
1866 //     OP_ID   - operation identifier (add, sub, mul, ...)
1867 //     TYPE    - operands' type
1868 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE)                      \
1869   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
1870                                              TYPE *loc) {                      \
1871     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1872     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1873 
1874 // ------------------------------------------------------------------------
1875 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1876 //     TYPE    - operands' type
1877 //     BITS    - size in bits, used to distinguish low level calls
1878 //     OP      - operator
1879 // Note: temp_val introduced in order to force the compiler to read
1880 //       *lhs only once (w/o it the compiler reads *lhs twice)
1881 // TODO: check if it is still necessary
1882 // Return old value regardless of the result of "compare & swap# operation
1883 #define OP_CMPXCHG_READ(TYPE, BITS, OP)                                        \
1884   {                                                                            \
1885     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1886     union f_i_union {                                                          \
1887       TYPE f_val;                                                              \
1888       kmp_int##BITS i_val;                                                     \
1889     };                                                                         \
1890     union f_i_union old_value;                                                 \
1891     temp_val = *loc;                                                           \
1892     old_value.f_val = temp_val;                                                \
1893     old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS(                         \
1894         (kmp_int##BITS *)loc,                                                  \
1895         *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val,                     \
1896         *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val);                    \
1897     new_value = old_value.f_val;                                               \
1898     return new_value;                                                          \
1899   }
1900 
1901 // -------------------------------------------------------------------------
1902 // Operation on *lhs, rhs bound by critical section
1903 //     OP     - operator (it's supposed to contain an assignment)
1904 //     LCK_ID - lock identifier
1905 // Note: don't check gtid as it should always be valid
1906 // 1, 2-byte - expect valid parameter, other - check before this macro
1907 #define OP_CRITICAL_READ(OP, LCK_ID)                                           \
1908   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1909                                                                                \
1910   new_value = (*loc);                                                          \
1911                                                                                \
1912   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1913 
1914 // -------------------------------------------------------------------------
1915 #ifdef KMP_GOMP_COMPAT
1916 #define OP_GOMP_CRITICAL_READ(OP, FLAG)                                        \
1917   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1918     KMP_CHECK_GTID;                                                            \
1919     OP_CRITICAL_READ(OP, 0);                                                   \
1920     return new_value;                                                          \
1921   }
1922 #else
1923 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1924 #endif /* KMP_GOMP_COMPAT */
1925 
1926 // -------------------------------------------------------------------------
1927 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
1928   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1929   TYPE new_value;                                                              \
1930   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
1931   new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0);                              \
1932   return new_value;                                                            \
1933   }
1934 // -------------------------------------------------------------------------
1935 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
1936   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1937   TYPE new_value;                                                              \
1938   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
1939   OP_CMPXCHG_READ(TYPE, BITS, OP)                                              \
1940   }
1941 // ------------------------------------------------------------------------
1942 // Routines for Extended types: long double, _Quad, complex flavours (use
1943 // critical section)
1944 //     TYPE_ID, OP_ID, TYPE - detailed above
1945 //     OP      - operator
1946 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1947 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
1948   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1949   TYPE new_value;                                                              \
1950   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */               \
1951   OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */                           \
1952   return new_value;                                                            \
1953   }
1954 
1955 // ------------------------------------------------------------------------
1956 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
1957 // value doesn't work.
1958 // Let's return the read value through the additional parameter.
1959 #if (KMP_OS_WINDOWS)
1960 
1961 #define OP_CRITICAL_READ_WRK(OP, LCK_ID)                                       \
1962   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1963                                                                                \
1964   (*out) = (*loc);                                                             \
1965                                                                                \
1966   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1967 // ------------------------------------------------------------------------
1968 #ifdef KMP_GOMP_COMPAT
1969 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)                                    \
1970   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1971     KMP_CHECK_GTID;                                                            \
1972     OP_CRITICAL_READ_WRK(OP, 0);                                               \
1973   }
1974 #else
1975 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
1976 #endif /* KMP_GOMP_COMPAT */
1977 // ------------------------------------------------------------------------
1978 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                            \
1979   void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
1980                                          TYPE *loc) {                          \
1981     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1982     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1983 
1984 // ------------------------------------------------------------------------
1985 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)  \
1986   ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                                  \
1987   OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */           \
1988   OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */                       \
1989   }
1990 
1991 #endif // KMP_OS_WINDOWS
1992 
1993 // ------------------------------------------------------------------------
1994 //                  TYPE_ID,OP_ID, TYPE,      OP, GOMP_FLAG
1995 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
1996 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
1997                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
1998 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
1999                     KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2000 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2001                     KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2002 
2003 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2004 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2005                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2006 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2007                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2008 
2009 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2010                      1) // __kmpc_atomic_float10_rd
2011 #if KMP_HAVE_QUAD
2012 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2013                      1) // __kmpc_atomic_float16_rd
2014 #endif // KMP_HAVE_QUAD
2015 
2016 // Fix for CQ220361 on Windows* OS
2017 #if (KMP_OS_WINDOWS)
2018 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2019                          1) // __kmpc_atomic_cmplx4_rd
2020 #else
2021 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2022                      1) // __kmpc_atomic_cmplx4_rd
2023 #endif
2024 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2025                      1) // __kmpc_atomic_cmplx8_rd
2026 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2027                      1) // __kmpc_atomic_cmplx10_rd
2028 #if KMP_HAVE_QUAD
2029 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2030                      1) // __kmpc_atomic_cmplx16_rd
2031 #if (KMP_ARCH_X86)
2032 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2033                      1) // __kmpc_atomic_float16_a16_rd
2034 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2035                      1) // __kmpc_atomic_cmplx16_a16_rd
2036 #endif
2037 #endif
2038 
2039 // ------------------------------------------------------------------------
2040 // Atomic WRITE routines
2041 
2042 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)              \
2043   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2044   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2045   KMP_XCHG_FIXED##BITS(lhs, rhs);                                              \
2046   }
2047 // ------------------------------------------------------------------------
2048 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2049   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2050   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2051   KMP_XCHG_REAL##BITS(lhs, rhs);                                               \
2052   }
2053 
2054 // ------------------------------------------------------------------------
2055 // Operation on *lhs, rhs using "compare_and_store" routine
2056 //     TYPE    - operands' type
2057 //     BITS    - size in bits, used to distinguish low level calls
2058 //     OP      - operator
2059 // Note: temp_val introduced in order to force the compiler to read
2060 //       *lhs only once (w/o it the compiler reads *lhs twice)
2061 #define OP_CMPXCHG_WR(TYPE, BITS, OP)                                          \
2062   {                                                                            \
2063     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2064     TYPE old_value, new_value;                                                 \
2065     temp_val = *lhs;                                                           \
2066     old_value = temp_val;                                                      \
2067     new_value = rhs;                                                           \
2068     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2069         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2070         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2071       KMP_CPU_PAUSE();                                                         \
2072                                                                                \
2073       temp_val = *lhs;                                                         \
2074       old_value = temp_val;                                                    \
2075       new_value = rhs;                                                         \
2076     }                                                                          \
2077   }
2078 
2079 // -------------------------------------------------------------------------
2080 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2081   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2082   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2083   OP_CMPXCHG_WR(TYPE, BITS, OP)                                                \
2084   }
2085 
2086 // ------------------------------------------------------------------------
2087 // Routines for Extended types: long double, _Quad, complex flavours (use
2088 // critical section)
2089 //     TYPE_ID, OP_ID, TYPE - detailed above
2090 //     OP      - operator
2091 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2092 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)        \
2093   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2094   OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */                        \
2095   OP_CRITICAL(OP, LCK_ID) /* send assignment */                                \
2096   }
2097 // -------------------------------------------------------------------------
2098 
2099 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2100                KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2101 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2102                KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2103 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2104                KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2105 #if (KMP_ARCH_X86)
2106 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2107                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2108 #else
2109 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2110                KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2111 #endif
2112 
2113 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2114                      KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2115 #if (KMP_ARCH_X86)
2116 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2117                   KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2118 #else
2119 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2120                      KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2121 #endif
2122 
2123 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2124                    1) // __kmpc_atomic_float10_wr
2125 #if KMP_HAVE_QUAD
2126 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2127                    1) // __kmpc_atomic_float16_wr
2128 #endif
2129 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2130 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2131                    1) // __kmpc_atomic_cmplx8_wr
2132 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2133                    1) // __kmpc_atomic_cmplx10_wr
2134 #if KMP_HAVE_QUAD
2135 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2136                    1) // __kmpc_atomic_cmplx16_wr
2137 #if (KMP_ARCH_X86)
2138 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2139                    1) // __kmpc_atomic_float16_a16_wr
2140 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2141                    1) // __kmpc_atomic_cmplx16_a16_wr
2142 #endif
2143 #endif
2144 
2145 // ------------------------------------------------------------------------
2146 // Atomic CAPTURE routines
2147 
2148 // Beginning of a definition (provides name, parameters, gebug trace)
2149 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2150 //     fixed)
2151 //     OP_ID   - operation identifier (add, sub, mul, ...)
2152 //     TYPE    - operands' type
2153 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
2154   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
2155                                              TYPE *lhs, TYPE rhs, int flag) {  \
2156     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2157     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2158 
2159 // -------------------------------------------------------------------------
2160 // Operation on *lhs, rhs bound by critical section
2161 //     OP     - operator (it's supposed to contain an assignment)
2162 //     LCK_ID - lock identifier
2163 // Note: don't check gtid as it should always be valid
2164 // 1, 2-byte - expect valid parameter, other - check before this macro
2165 #define OP_CRITICAL_CPT(OP, LCK_ID)                                            \
2166   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2167                                                                                \
2168   if (flag) {                                                                  \
2169     (*lhs) OP rhs;                                                             \
2170     new_value = (*lhs);                                                        \
2171   } else {                                                                     \
2172     new_value = (*lhs);                                                        \
2173     (*lhs) OP rhs;                                                             \
2174   }                                                                            \
2175                                                                                \
2176   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2177   return new_value;
2178 
2179 // ------------------------------------------------------------------------
2180 #ifdef KMP_GOMP_COMPAT
2181 #define OP_GOMP_CRITICAL_CPT(OP, FLAG)                                         \
2182   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2183     KMP_CHECK_GTID;                                                            \
2184     OP_CRITICAL_CPT(OP## =, 0);                                                \
2185   }
2186 #else
2187 #define OP_GOMP_CRITICAL_CPT(OP, FLAG)
2188 #endif /* KMP_GOMP_COMPAT */
2189 
2190 // ------------------------------------------------------------------------
2191 // Operation on *lhs, rhs using "compare_and_store" routine
2192 //     TYPE    - operands' type
2193 //     BITS    - size in bits, used to distinguish low level calls
2194 //     OP      - operator
2195 // Note: temp_val introduced in order to force the compiler to read
2196 //       *lhs only once (w/o it the compiler reads *lhs twice)
2197 #define OP_CMPXCHG_CPT(TYPE, BITS, OP)                                         \
2198   {                                                                            \
2199     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2200     TYPE old_value, new_value;                                                 \
2201     temp_val = *lhs;                                                           \
2202     old_value = temp_val;                                                      \
2203     new_value = old_value OP rhs;                                              \
2204     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2205         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2206         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2207       KMP_CPU_PAUSE();                                                         \
2208                                                                                \
2209       temp_val = *lhs;                                                         \
2210       old_value = temp_val;                                                    \
2211       new_value = old_value OP rhs;                                            \
2212     }                                                                          \
2213     if (flag) {                                                                \
2214       return new_value;                                                        \
2215     } else                                                                     \
2216       return old_value;                                                        \
2217   }
2218 
2219 // -------------------------------------------------------------------------
2220 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)          \
2221   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2222   TYPE new_value;                                                              \
2223   OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG)                                          \
2224   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2225   }
2226 
2227 // -------------------------------------------------------------------------
2228 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2229   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2230   TYPE old_value, new_value;                                                   \
2231   OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG)                                          \
2232   /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
2233   old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                            \
2234   if (flag) {                                                                  \
2235     return old_value OP rhs;                                                   \
2236   } else                                                                       \
2237     return old_value;                                                          \
2238   }
2239 // -------------------------------------------------------------------------
2240 
2241 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2242                      0) // __kmpc_atomic_fixed4_add_cpt
2243 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2244                      0) // __kmpc_atomic_fixed4_sub_cpt
2245 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2246                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2247 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2248                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2249 
2250 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2251                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2252 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2253                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2254 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2255                    KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2256 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2257                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2258 
2259 // ------------------------------------------------------------------------
2260 // Entries definition for integer operands
2261 //     TYPE_ID - operands type and size (fixed4, float4)
2262 //     OP_ID   - operation identifier (add, sub, mul, ...)
2263 //     TYPE    - operand type
2264 //     BITS    - size in bits, used to distinguish low level calls
2265 //     OP      - operator (used in critical section)
2266 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,GOMP_FLAG
2267 // ------------------------------------------------------------------------
2268 // Routines for ATOMIC integer operands, other operators
2269 // ------------------------------------------------------------------------
2270 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2271 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2272                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2273 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2274                    0) // __kmpc_atomic_fixed1_andb_cpt
2275 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2276                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2277 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2278                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2279 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2280                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2281 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2282                    0) // __kmpc_atomic_fixed1_orb_cpt
2283 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2284                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2285 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2286                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2287 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2288                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2289 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2290                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2291 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2292                    0) // __kmpc_atomic_fixed1_xor_cpt
2293 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2294                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2295 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2296                    0) // __kmpc_atomic_fixed2_andb_cpt
2297 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2298                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2299 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2300                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2301 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2302                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2303 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2304                    0) // __kmpc_atomic_fixed2_orb_cpt
2305 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2306                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2307 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2308                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2309 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2310                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2311 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2312                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2313 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2314                    0) // __kmpc_atomic_fixed2_xor_cpt
2315 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2316                    0) // __kmpc_atomic_fixed4_andb_cpt
2317 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2318                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2319 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2320                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2321 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2322                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2323 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2324                    0) // __kmpc_atomic_fixed4_orb_cpt
2325 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2326                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2327 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2328                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2329 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2330                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2331 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2332                    0) // __kmpc_atomic_fixed4_xor_cpt
2333 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2334                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2335 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2336                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2337 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2338                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2339 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2340                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2341 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2342                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2343 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2344                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2345 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2346                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2347 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2348                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2349 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2350                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2351 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2352                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2353 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2354                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2355 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2356                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2357 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2358                    KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2359 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2360 
2361 // CAPTURE routines for mixed types RHS=float16
2362 #if KMP_HAVE_QUAD
2363 
2364 // Beginning of a definition (provides name, parameters, gebug trace)
2365 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2366 //     fixed)
2367 //     OP_ID   - operation identifier (add, sub, mul, ...)
2368 //     TYPE    - operands' type
2369 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)            \
2370   TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
2371       ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) {             \
2372     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2373     KA_TRACE(100,                                                              \
2374              ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
2375               gtid));
2376 
2377 // -------------------------------------------------------------------------
2378 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
2379                                RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
2380   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2381   TYPE new_value;                                                              \
2382   OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG)                                          \
2383   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2384   }
2385 
2386 // -------------------------------------------------------------------------
2387 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,     \
2388                                 LCK_ID, GOMP_FLAG)                             \
2389   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2390   TYPE new_value;                                                              \
2391   OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */                    \
2392   OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */                        \
2393   }
2394 
2395 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2396                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2397 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2398                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2399 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2400                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2401 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2402                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2403 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2404                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2405 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2406                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2407 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2408                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2409 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2410                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2411 
2412 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2413                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2414 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2415                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2416 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2417                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2418 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2419                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2420 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2421                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2422 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2423                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2424 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2425                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2426 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2427                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2428 
2429 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2430                        0) // __kmpc_atomic_fixed4_add_cpt_fp
2431 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2432                        0) // __kmpc_atomic_fixed4u_add_cpt_fp
2433 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2434                        0) // __kmpc_atomic_fixed4_sub_cpt_fp
2435 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2436                        0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2437 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2438                        0) // __kmpc_atomic_fixed4_mul_cpt_fp
2439 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2440                        0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2441 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2442                        0) // __kmpc_atomic_fixed4_div_cpt_fp
2443 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2444                        0) // __kmpc_atomic_fixed4u_div_cpt_fp
2445 
2446 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2447                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2448 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2449                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2450 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2451                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2452 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2453                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2454 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2455                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2456 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2457                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2458 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2459                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2460 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2461                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2462 
2463 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2464                        KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2465 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2466                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2467 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2468                        KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2469 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2470                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2471 
2472 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2473                        KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2474 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2475                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2476 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2477                        KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2478 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2479                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2480 
2481 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2482                         1) // __kmpc_atomic_float10_add_cpt_fp
2483 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2484                         1) // __kmpc_atomic_float10_sub_cpt_fp
2485 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2486                         1) // __kmpc_atomic_float10_mul_cpt_fp
2487 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2488                         1) // __kmpc_atomic_float10_div_cpt_fp
2489 
2490 #endif // KMP_HAVE_QUAD
2491 
2492 // ------------------------------------------------------------------------
2493 // Routines for C/C++ Reduction operators && and ||
2494 
2495 // -------------------------------------------------------------------------
2496 // Operation on *lhs, rhs bound by critical section
2497 //     OP     - operator (it's supposed to contain an assignment)
2498 //     LCK_ID - lock identifier
2499 // Note: don't check gtid as it should always be valid
2500 // 1, 2-byte - expect valid parameter, other - check before this macro
2501 #define OP_CRITICAL_L_CPT(OP, LCK_ID)                                          \
2502   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2503                                                                                \
2504   if (flag) {                                                                  \
2505     new_value OP rhs;                                                          \
2506   } else                                                                       \
2507     new_value = (*lhs);                                                        \
2508                                                                                \
2509   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2510 
2511 // ------------------------------------------------------------------------
2512 #ifdef KMP_GOMP_COMPAT
2513 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)                                       \
2514   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2515     KMP_CHECK_GTID;                                                            \
2516     OP_CRITICAL_L_CPT(OP, 0);                                                  \
2517     return new_value;                                                          \
2518   }
2519 #else
2520 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2521 #endif /* KMP_GOMP_COMPAT */
2522 
2523 // ------------------------------------------------------------------------
2524 // Need separate macros for &&, || because there is no combined assignment
2525 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2526   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2527   TYPE new_value;                                                              \
2528   OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG)                                 \
2529   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2530   }
2531 
2532 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2533                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2534 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2535                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2536 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2537                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2538 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2539                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2540 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2541                   0) // __kmpc_atomic_fixed4_andl_cpt
2542 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2543                   0) // __kmpc_atomic_fixed4_orl_cpt
2544 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2545                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2546 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2547                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2548 
2549 // -------------------------------------------------------------------------
2550 // Routines for Fortran operators that matched no one in C:
2551 // MAX, MIN, .EQV., .NEQV.
2552 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2553 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2554 
2555 // -------------------------------------------------------------------------
2556 // MIN and MAX need separate macros
2557 // OP - operator to check if we need any actions?
2558 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                       \
2559   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2560                                                                                \
2561   if (*lhs OP rhs) { /* still need actions? */                                 \
2562     old_value = *lhs;                                                          \
2563     *lhs = rhs;                                                                \
2564     if (flag)                                                                  \
2565       new_value = rhs;                                                         \
2566     else                                                                       \
2567       new_value = old_value;                                                   \
2568   } else {                                                                     \
2569     new_value = *lhs;                                                          \
2570   }                                                                            \
2571   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2572   return new_value;
2573 
2574 // -------------------------------------------------------------------------
2575 #ifdef KMP_GOMP_COMPAT
2576 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)                                    \
2577   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2578     KMP_CHECK_GTID;                                                            \
2579     MIN_MAX_CRITSECT_CPT(OP, 0);                                               \
2580   }
2581 #else
2582 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2583 #endif /* KMP_GOMP_COMPAT */
2584 
2585 // -------------------------------------------------------------------------
2586 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                    \
2587   {                                                                            \
2588     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2589     /*TYPE old_value; */                                                       \
2590     temp_val = *lhs;                                                           \
2591     old_value = temp_val;                                                      \
2592     while (old_value OP rhs && /* still need actions? */                       \
2593            !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2594                (kmp_int##BITS *)lhs,                                           \
2595                *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
2596                *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
2597       KMP_CPU_PAUSE();                                                         \
2598       temp_val = *lhs;                                                         \
2599       old_value = temp_val;                                                    \
2600     }                                                                          \
2601     if (flag)                                                                  \
2602       return rhs;                                                              \
2603     else                                                                       \
2604       return old_value;                                                        \
2605   }
2606 
2607 // -------------------------------------------------------------------------
2608 // 1-byte, 2-byte operands - use critical section
2609 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
2610   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2611   TYPE new_value, old_value;                                                   \
2612   if (*lhs OP rhs) { /* need actions? */                                       \
2613     GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2614     MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                           \
2615   }                                                                            \
2616   return *lhs;                                                                 \
2617   }
2618 
2619 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2620   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2621   TYPE new_value, old_value;                                                   \
2622   if (*lhs OP rhs) {                                                           \
2623     GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2624     MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                        \
2625   }                                                                            \
2626   return *lhs;                                                                 \
2627   }
2628 
2629 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2630                      KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2631 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2632                      KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2633 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2634                      KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2635 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2636                      KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2637 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2638                      0) // __kmpc_atomic_fixed4_max_cpt
2639 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2640                      0) // __kmpc_atomic_fixed4_min_cpt
2641 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2642                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2643 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2644                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2645 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2646                      KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2647 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2648                      KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2649 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2650                      KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2651 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2652                      KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2653 #if KMP_HAVE_QUAD
2654 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2655                      1) // __kmpc_atomic_float16_max_cpt
2656 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2657                      1) // __kmpc_atomic_float16_min_cpt
2658 #if (KMP_ARCH_X86)
2659 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2660                      1) // __kmpc_atomic_float16_max_a16_cpt
2661 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2662                      1) // __kmpc_atomic_float16_mix_a16_cpt
2663 #endif
2664 #endif
2665 
2666 // ------------------------------------------------------------------------
2667 #ifdef KMP_GOMP_COMPAT
2668 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)                                     \
2669   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2670     KMP_CHECK_GTID;                                                            \
2671     OP_CRITICAL_CPT(OP, 0);                                                    \
2672   }
2673 #else
2674 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2675 #endif /* KMP_GOMP_COMPAT */
2676 // ------------------------------------------------------------------------
2677 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
2678   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2679   TYPE new_value;                                                              \
2680   OP_GOMP_CRITICAL_EQV_CPT(^= ~, GOMP_FLAG) /* send assignment */              \
2681   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2682   }
2683 
2684 // ------------------------------------------------------------------------
2685 
2686 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2687                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2688 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2689                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2690 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2691                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2692 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2693                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2694 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2695                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2696 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2697                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2698 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2699                     KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2700 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2701                     KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2702 
2703 // ------------------------------------------------------------------------
2704 // Routines for Extended types: long double, _Quad, complex flavours (use
2705 // critical section)
2706 //     TYPE_ID, OP_ID, TYPE - detailed above
2707 //     OP      - operator
2708 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2709 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
2710   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2711   TYPE new_value;                                                              \
2712   OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */                    \
2713   OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */                        \
2714   }
2715 
2716 // ------------------------------------------------------------------------
2717 // Workaround for cmplx4. Regular routines with return value don't work
2718 // on Win_32e. Let's return captured values through the additional parameter.
2719 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID)                                        \
2720   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2721                                                                                \
2722   if (flag) {                                                                  \
2723     (*lhs) OP rhs;                                                             \
2724     (*out) = (*lhs);                                                           \
2725   } else {                                                                     \
2726     (*out) = (*lhs);                                                           \
2727     (*lhs) OP rhs;                                                             \
2728   }                                                                            \
2729                                                                                \
2730   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2731   return;
2732 // ------------------------------------------------------------------------
2733 
2734 #ifdef KMP_GOMP_COMPAT
2735 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)                                     \
2736   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2737     KMP_CHECK_GTID;                                                            \
2738     OP_CRITICAL_CPT_WRK(OP## =, 0);                                            \
2739   }
2740 #else
2741 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2742 #endif /* KMP_GOMP_COMPAT */
2743 // ------------------------------------------------------------------------
2744 
2745 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                 \
2746   void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2747                                          TYPE rhs, TYPE *out, int flag) {      \
2748     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2749     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2750 // ------------------------------------------------------------------------
2751 
2752 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
2753   ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
2754   OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG)                                      \
2755   OP_CRITICAL_CPT_WRK(OP## =, LCK_ID)                                          \
2756   }
2757 // The end of workaround for cmplx4
2758 
2759 /* ------------------------------------------------------------------------- */
2760 // routines for long double type
2761 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2762                     1) // __kmpc_atomic_float10_add_cpt
2763 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2764                     1) // __kmpc_atomic_float10_sub_cpt
2765 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2766                     1) // __kmpc_atomic_float10_mul_cpt
2767 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2768                     1) // __kmpc_atomic_float10_div_cpt
2769 #if KMP_HAVE_QUAD
2770 // routines for _Quad type
2771 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2772                     1) // __kmpc_atomic_float16_add_cpt
2773 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2774                     1) // __kmpc_atomic_float16_sub_cpt
2775 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2776                     1) // __kmpc_atomic_float16_mul_cpt
2777 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2778                     1) // __kmpc_atomic_float16_div_cpt
2779 #if (KMP_ARCH_X86)
2780 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2781                     1) // __kmpc_atomic_float16_add_a16_cpt
2782 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2783                     1) // __kmpc_atomic_float16_sub_a16_cpt
2784 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2785                     1) // __kmpc_atomic_float16_mul_a16_cpt
2786 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2787                     1) // __kmpc_atomic_float16_div_a16_cpt
2788 #endif
2789 #endif
2790 
2791 // routines for complex types
2792 
2793 // cmplx4 routines to return void
2794 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2795                         1) // __kmpc_atomic_cmplx4_add_cpt
2796 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2797                         1) // __kmpc_atomic_cmplx4_sub_cpt
2798 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2799                         1) // __kmpc_atomic_cmplx4_mul_cpt
2800 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2801                         1) // __kmpc_atomic_cmplx4_div_cpt
2802 
2803 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2804                     1) // __kmpc_atomic_cmplx8_add_cpt
2805 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2806                     1) // __kmpc_atomic_cmplx8_sub_cpt
2807 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2808                     1) // __kmpc_atomic_cmplx8_mul_cpt
2809 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2810                     1) // __kmpc_atomic_cmplx8_div_cpt
2811 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2812                     1) // __kmpc_atomic_cmplx10_add_cpt
2813 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2814                     1) // __kmpc_atomic_cmplx10_sub_cpt
2815 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2816                     1) // __kmpc_atomic_cmplx10_mul_cpt
2817 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2818                     1) // __kmpc_atomic_cmplx10_div_cpt
2819 #if KMP_HAVE_QUAD
2820 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2821                     1) // __kmpc_atomic_cmplx16_add_cpt
2822 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2823                     1) // __kmpc_atomic_cmplx16_sub_cpt
2824 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2825                     1) // __kmpc_atomic_cmplx16_mul_cpt
2826 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2827                     1) // __kmpc_atomic_cmplx16_div_cpt
2828 #if (KMP_ARCH_X86)
2829 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2830                     1) // __kmpc_atomic_cmplx16_add_a16_cpt
2831 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2832                     1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2833 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2834                     1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2835 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2836                     1) // __kmpc_atomic_cmplx16_div_a16_cpt
2837 #endif
2838 #endif
2839 
2840 #if OMP_40_ENABLED
2841 
2842 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2843 // binop x; v = x; }  for non-commutative operations.
2844 // Supported only on IA-32 architecture and Intel(R) 64
2845 
2846 // -------------------------------------------------------------------------
2847 // Operation on *lhs, rhs bound by critical section
2848 //     OP     - operator (it's supposed to contain an assignment)
2849 //     LCK_ID - lock identifier
2850 // Note: don't check gtid as it should always be valid
2851 // 1, 2-byte - expect valid parameter, other - check before this macro
2852 #define OP_CRITICAL_CPT_REV(OP, LCK_ID)                                        \
2853   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2854                                                                                \
2855   if (flag) {                                                                  \
2856     /*temp_val = (*lhs);*/                                                     \
2857     (*lhs) = (rhs)OP(*lhs);                                                    \
2858     new_value = (*lhs);                                                        \
2859   } else {                                                                     \
2860     new_value = (*lhs);                                                        \
2861     (*lhs) = (rhs)OP(*lhs);                                                    \
2862   }                                                                            \
2863   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2864   return new_value;
2865 
2866 // ------------------------------------------------------------------------
2867 #ifdef KMP_GOMP_COMPAT
2868 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG)                                     \
2869   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2870     KMP_CHECK_GTID;                                                            \
2871     OP_CRITICAL_CPT_REV(OP, 0);                                                \
2872   }
2873 #else
2874 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG)
2875 #endif /* KMP_GOMP_COMPAT */
2876 
2877 // ------------------------------------------------------------------------
2878 // Operation on *lhs, rhs using "compare_and_store" routine
2879 //     TYPE    - operands' type
2880 //     BITS    - size in bits, used to distinguish low level calls
2881 //     OP      - operator
2882 // Note: temp_val introduced in order to force the compiler to read
2883 //       *lhs only once (w/o it the compiler reads *lhs twice)
2884 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                     \
2885   {                                                                            \
2886     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2887     TYPE old_value, new_value;                                                 \
2888     temp_val = *lhs;                                                           \
2889     old_value = temp_val;                                                      \
2890     new_value = rhs OP old_value;                                              \
2891     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2892         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2893         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2894       KMP_CPU_PAUSE();                                                         \
2895                                                                                \
2896       temp_val = *lhs;                                                         \
2897       old_value = temp_val;                                                    \
2898       new_value = rhs OP old_value;                                            \
2899     }                                                                          \
2900     if (flag) {                                                                \
2901       return new_value;                                                        \
2902     } else                                                                     \
2903       return old_value;                                                        \
2904   }
2905 
2906 // -------------------------------------------------------------------------
2907 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)      \
2908   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2909   TYPE new_value;                                                              \
2910   OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG)                                      \
2911   OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
2912   }
2913 
2914 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2915                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2916 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2917                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2918 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2919                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2920 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2921                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2922 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2923                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2924 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2925                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2926 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
2927                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
2928 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
2929                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
2930 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
2931                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
2932 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
2933                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
2934 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
2935                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
2936 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
2937                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
2938 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
2939                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
2940 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
2941                        KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
2942 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
2943                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
2944 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
2945                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
2946 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
2947                        KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
2948 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
2949                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
2950 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
2951                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
2952 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
2953                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
2954 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
2955                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
2956 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
2957                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
2958 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
2959                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
2960 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
2961                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
2962 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
2963                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
2964 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
2965                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
2966 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
2967                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
2968 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
2969                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
2970 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2971 
2972 // ------------------------------------------------------------------------
2973 // Routines for Extended types: long double, _Quad, complex flavours (use
2974 // critical section)
2975 //     TYPE_ID, OP_ID, TYPE - detailed above
2976 //     OP      - operator
2977 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2978 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
2979   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2980   TYPE new_value;                                                              \
2981   /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/                   \
2982   OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG)                                      \
2983   OP_CRITICAL_CPT_REV(OP, LCK_ID)                                              \
2984   }
2985 
2986 /* ------------------------------------------------------------------------- */
2987 // routines for long double type
2988 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
2989                         1) // __kmpc_atomic_float10_sub_cpt_rev
2990 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
2991                         1) // __kmpc_atomic_float10_div_cpt_rev
2992 #if KMP_HAVE_QUAD
2993 // routines for _Quad type
2994 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
2995                         1) // __kmpc_atomic_float16_sub_cpt_rev
2996 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
2997                         1) // __kmpc_atomic_float16_div_cpt_rev
2998 #if (KMP_ARCH_X86)
2999 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3000                         1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3001 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3002                         1) // __kmpc_atomic_float16_div_a16_cpt_rev
3003 #endif
3004 #endif
3005 
3006 // routines for complex types
3007 
3008 // ------------------------------------------------------------------------
3009 // Workaround for cmplx4. Regular routines with return value don't work
3010 // on Win_32e. Let's return captured values through the additional parameter.
3011 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                    \
3012   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3013                                                                                \
3014   if (flag) {                                                                  \
3015     (*lhs) = (rhs)OP(*lhs);                                                    \
3016     (*out) = (*lhs);                                                           \
3017   } else {                                                                     \
3018     (*out) = (*lhs);                                                           \
3019     (*lhs) = (rhs)OP(*lhs);                                                    \
3020   }                                                                            \
3021                                                                                \
3022   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3023   return;
3024 // ------------------------------------------------------------------------
3025 
3026 #ifdef KMP_GOMP_COMPAT
3027 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)                                 \
3028   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3029     KMP_CHECK_GTID;                                                            \
3030     OP_CRITICAL_CPT_REV_WRK(OP, 0);                                            \
3031   }
3032 #else
3033 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3034 #endif /* KMP_GOMP_COMPAT */
3035 // ------------------------------------------------------------------------
3036 
3037 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID,          \
3038                                     GOMP_FLAG)                                 \
3039   ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
3040   OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG)                                  \
3041   OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                          \
3042   }
3043 // The end of workaround for cmplx4
3044 
3045 // !!! TODO: check if we need to return void for cmplx4 routines
3046 // cmplx4 routines to return void
3047 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3048                             1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3049 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3050                             1) // __kmpc_atomic_cmplx4_div_cpt_rev
3051 
3052 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3053                         1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3054 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3055                         1) // __kmpc_atomic_cmplx8_div_cpt_rev
3056 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3057                         1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3058 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3059                         1) // __kmpc_atomic_cmplx10_div_cpt_rev
3060 #if KMP_HAVE_QUAD
3061 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3062                         1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3063 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3064                         1) // __kmpc_atomic_cmplx16_div_cpt_rev
3065 #if (KMP_ARCH_X86)
3066 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3067                         1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3068 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3069                         1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3070 #endif
3071 #endif
3072 
3073 // Capture reverse for mixed type: RHS=float16
3074 #if KMP_HAVE_QUAD
3075 
3076 // Beginning of a definition (provides name, parameters, gebug trace)
3077 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3078 //     fixed)
3079 //     OP_ID   - operation identifier (add, sub, mul, ...)
3080 //     TYPE    - operands' type
3081 // -------------------------------------------------------------------------
3082 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,   \
3083                                    RTYPE, LCK_ID, MASK, GOMP_FLAG)             \
3084   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3085   TYPE new_value;                                                              \
3086   OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG)                                      \
3087   OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
3088   }
3089 
3090 // -------------------------------------------------------------------------
3091 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3092                                     LCK_ID, GOMP_FLAG)                         \
3093   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3094   TYPE new_value;                                                              \
3095   OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) /* send assignment */                \
3096   OP_CRITICAL_CPT_REV(OP, LCK_ID) /* send assignment */                        \
3097   }
3098 
3099 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3100                            KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3101 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3102                            KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3103 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3104                            KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3105 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3106                            KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3107 
3108 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3109                            KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3110 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3111                            1,
3112                            KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3113 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3114                            KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3115 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3116                            1,
3117                            KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3118 
3119 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3120                            3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3121 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3122                            4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3123 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3124                            3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3125 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3126                            4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3127 
3128 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3129                            7,
3130                            KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3131 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3132                            8i, 7,
3133                            KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3134 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3135                            7,
3136                            KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3137 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3138                            8i, 7,
3139                            KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3140 
3141 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3142                            4r, 3,
3143                            KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3144 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3145                            4r, 3,
3146                            KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3147 
3148 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3149                            8r, 7,
3150                            KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3151 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3152                            8r, 7,
3153                            KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3154 
3155 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3156                             10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3157 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3158                             10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3159 
3160 #endif // KMP_HAVE_QUAD
3161 
3162 //   OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3163 
3164 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                        \
3165   TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3166                                      TYPE rhs) {                               \
3167     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3168     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3169 
3170 #define CRITICAL_SWP(LCK_ID)                                                   \
3171   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3172                                                                                \
3173   old_value = (*lhs);                                                          \
3174   (*lhs) = rhs;                                                                \
3175                                                                                \
3176   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3177   return old_value;
3178 
3179 // ------------------------------------------------------------------------
3180 #ifdef KMP_GOMP_COMPAT
3181 #define GOMP_CRITICAL_SWP(FLAG)                                                \
3182   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3183     KMP_CHECK_GTID;                                                            \
3184     CRITICAL_SWP(0);                                                           \
3185   }
3186 #else
3187 #define GOMP_CRITICAL_SWP(FLAG)
3188 #endif /* KMP_GOMP_COMPAT */
3189 
3190 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                        \
3191   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3192   TYPE old_value;                                                              \
3193   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3194   old_value = KMP_XCHG_FIXED##BITS(lhs, rhs);                                  \
3195   return old_value;                                                            \
3196   }
3197 // ------------------------------------------------------------------------
3198 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                  \
3199   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3200   TYPE old_value;                                                              \
3201   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3202   old_value = KMP_XCHG_REAL##BITS(lhs, rhs);                                   \
3203   return old_value;                                                            \
3204   }
3205 
3206 // ------------------------------------------------------------------------
3207 #define CMPXCHG_SWP(TYPE, BITS)                                                \
3208   {                                                                            \
3209     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
3210     TYPE old_value, new_value;                                                 \
3211     temp_val = *lhs;                                                           \
3212     old_value = temp_val;                                                      \
3213     new_value = rhs;                                                           \
3214     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
3215         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
3216         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
3217       KMP_CPU_PAUSE();                                                         \
3218                                                                                \
3219       temp_val = *lhs;                                                         \
3220       old_value = temp_val;                                                    \
3221       new_value = rhs;                                                         \
3222     }                                                                          \
3223     return old_value;                                                          \
3224   }
3225 
3226 // -------------------------------------------------------------------------
3227 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                     \
3228   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3229   TYPE old_value;                                                              \
3230   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3231   CMPXCHG_SWP(TYPE, BITS)                                                      \
3232   }
3233 
3234 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3235 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3236 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3237 
3238 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3239                       KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3240 
3241 #if (KMP_ARCH_X86)
3242 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3243                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3244 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3245                    KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3246 #else
3247 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3248 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3249                       KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3250 #endif
3251 
3252 // ------------------------------------------------------------------------
3253 // Routines for Extended types: long double, _Quad, complex flavours (use
3254 // critical section)
3255 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)                  \
3256   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3257   TYPE old_value;                                                              \
3258   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3259   CRITICAL_SWP(LCK_ID)                                                         \
3260   }
3261 
3262 // ------------------------------------------------------------------------
3263 // !!! TODO: check if we need to return void for cmplx4 routines
3264 // Workaround for cmplx4. Regular routines with return value don't work
3265 // on Win_32e. Let's return captured values through the additional parameter.
3266 
3267 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                    \
3268   void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3269                                      TYPE rhs, TYPE *out) {                    \
3270     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3271     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3272 
3273 #define CRITICAL_SWP_WRK(LCK_ID)                                               \
3274   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3275                                                                                \
3276   tmp = (*lhs);                                                                \
3277   (*lhs) = (rhs);                                                              \
3278   (*out) = tmp;                                                                \
3279   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3280   return;
3281 // ------------------------------------------------------------------------
3282 
3283 #ifdef KMP_GOMP_COMPAT
3284 #define GOMP_CRITICAL_SWP_WRK(FLAG)                                            \
3285   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3286     KMP_CHECK_GTID;                                                            \
3287     CRITICAL_SWP_WRK(0);                                                       \
3288   }
3289 #else
3290 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3291 #endif /* KMP_GOMP_COMPAT */
3292 // ------------------------------------------------------------------------
3293 
3294 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)              \
3295   ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                          \
3296   TYPE tmp;                                                                    \
3297   GOMP_CRITICAL_SWP_WRK(GOMP_FLAG)                                             \
3298   CRITICAL_SWP_WRK(LCK_ID)                                                     \
3299   }
3300 // The end of workaround for cmplx4
3301 
3302 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3303 #if KMP_HAVE_QUAD
3304 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3305 #endif
3306 // cmplx4 routine to return void
3307 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3308 
3309 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32,  8c,   1 )           //
3310 // __kmpc_atomic_cmplx4_swp
3311 
3312 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3313 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3314 #if KMP_HAVE_QUAD
3315 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3316 #if (KMP_ARCH_X86)
3317 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3318                     1) // __kmpc_atomic_float16_a16_swp
3319 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3320                     1) // __kmpc_atomic_cmplx16_a16_swp
3321 #endif
3322 #endif
3323 
3324 // End of OpenMP 4.0 Capture
3325 
3326 #endif // OMP_40_ENABLED
3327 
3328 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3329 
3330 #undef OP_CRITICAL
3331 
3332 /* ------------------------------------------------------------------------ */
3333 /* Generic atomic routines                                                  */
3334 
3335 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3336                      void (*f)(void *, void *, void *)) {
3337   KMP_DEBUG_ASSERT(__kmp_init_serial);
3338 
3339   if (
3340 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3341       FALSE /* must use lock */
3342 #else
3343       TRUE
3344 #endif
3345       ) {
3346     kmp_int8 old_value, new_value;
3347 
3348     old_value = *(kmp_int8 *)lhs;
3349     (*f)(&new_value, &old_value, rhs);
3350 
3351     /* TODO: Should this be acquire or release? */
3352     while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3353                                        *(kmp_int8 *)&new_value)) {
3354       KMP_CPU_PAUSE();
3355 
3356       old_value = *(kmp_int8 *)lhs;
3357       (*f)(&new_value, &old_value, rhs);
3358     }
3359 
3360     return;
3361   } else {
3362 // All 1-byte data is of integer data type.
3363 
3364 #ifdef KMP_GOMP_COMPAT
3365     if (__kmp_atomic_mode == 2) {
3366       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3367     } else
3368 #endif /* KMP_GOMP_COMPAT */
3369       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3370 
3371     (*f)(lhs, lhs, rhs);
3372 
3373 #ifdef KMP_GOMP_COMPAT
3374     if (__kmp_atomic_mode == 2) {
3375       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3376     } else
3377 #endif /* KMP_GOMP_COMPAT */
3378       __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3379   }
3380 }
3381 
3382 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3383                      void (*f)(void *, void *, void *)) {
3384   if (
3385 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3386       FALSE /* must use lock */
3387 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3388       TRUE /* no alignment problems */
3389 #else
3390       !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3391 #endif
3392       ) {
3393     kmp_int16 old_value, new_value;
3394 
3395     old_value = *(kmp_int16 *)lhs;
3396     (*f)(&new_value, &old_value, rhs);
3397 
3398     /* TODO: Should this be acquire or release? */
3399     while (!KMP_COMPARE_AND_STORE_ACQ16(
3400         (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3401       KMP_CPU_PAUSE();
3402 
3403       old_value = *(kmp_int16 *)lhs;
3404       (*f)(&new_value, &old_value, rhs);
3405     }
3406 
3407     return;
3408   } else {
3409 // All 2-byte data is of integer data type.
3410 
3411 #ifdef KMP_GOMP_COMPAT
3412     if (__kmp_atomic_mode == 2) {
3413       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3414     } else
3415 #endif /* KMP_GOMP_COMPAT */
3416       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3417 
3418     (*f)(lhs, lhs, rhs);
3419 
3420 #ifdef KMP_GOMP_COMPAT
3421     if (__kmp_atomic_mode == 2) {
3422       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3423     } else
3424 #endif /* KMP_GOMP_COMPAT */
3425       __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3426   }
3427 }
3428 
3429 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3430                      void (*f)(void *, void *, void *)) {
3431   KMP_DEBUG_ASSERT(__kmp_init_serial);
3432 
3433   if (
3434 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3435 // Gomp compatibility is broken if this routine is called for floats.
3436 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3437       TRUE /* no alignment problems */
3438 #else
3439       !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3440 #endif
3441       ) {
3442     kmp_int32 old_value, new_value;
3443 
3444     old_value = *(kmp_int32 *)lhs;
3445     (*f)(&new_value, &old_value, rhs);
3446 
3447     /* TODO: Should this be acquire or release? */
3448     while (!KMP_COMPARE_AND_STORE_ACQ32(
3449         (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3450       KMP_CPU_PAUSE();
3451 
3452       old_value = *(kmp_int32 *)lhs;
3453       (*f)(&new_value, &old_value, rhs);
3454     }
3455 
3456     return;
3457   } else {
3458 // Use __kmp_atomic_lock_4i for all 4-byte data,
3459 // even if it isn't of integer data type.
3460 
3461 #ifdef KMP_GOMP_COMPAT
3462     if (__kmp_atomic_mode == 2) {
3463       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3464     } else
3465 #endif /* KMP_GOMP_COMPAT */
3466       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3467 
3468     (*f)(lhs, lhs, rhs);
3469 
3470 #ifdef KMP_GOMP_COMPAT
3471     if (__kmp_atomic_mode == 2) {
3472       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3473     } else
3474 #endif /* KMP_GOMP_COMPAT */
3475       __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3476   }
3477 }
3478 
3479 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3480                      void (*f)(void *, void *, void *)) {
3481   KMP_DEBUG_ASSERT(__kmp_init_serial);
3482   if (
3483 
3484 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3485       FALSE /* must use lock */
3486 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3487       TRUE /* no alignment problems */
3488 #else
3489       !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3490 #endif
3491       ) {
3492     kmp_int64 old_value, new_value;
3493 
3494     old_value = *(kmp_int64 *)lhs;
3495     (*f)(&new_value, &old_value, rhs);
3496     /* TODO: Should this be acquire or release? */
3497     while (!KMP_COMPARE_AND_STORE_ACQ64(
3498         (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3499       KMP_CPU_PAUSE();
3500 
3501       old_value = *(kmp_int64 *)lhs;
3502       (*f)(&new_value, &old_value, rhs);
3503     }
3504 
3505     return;
3506   } else {
3507 // Use __kmp_atomic_lock_8i for all 8-byte data,
3508 // even if it isn't of integer data type.
3509 
3510 #ifdef KMP_GOMP_COMPAT
3511     if (__kmp_atomic_mode == 2) {
3512       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3513     } else
3514 #endif /* KMP_GOMP_COMPAT */
3515       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3516 
3517     (*f)(lhs, lhs, rhs);
3518 
3519 #ifdef KMP_GOMP_COMPAT
3520     if (__kmp_atomic_mode == 2) {
3521       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3522     } else
3523 #endif /* KMP_GOMP_COMPAT */
3524       __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3525   }
3526 }
3527 
3528 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3529                       void (*f)(void *, void *, void *)) {
3530   KMP_DEBUG_ASSERT(__kmp_init_serial);
3531 
3532 #ifdef KMP_GOMP_COMPAT
3533   if (__kmp_atomic_mode == 2) {
3534     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3535   } else
3536 #endif /* KMP_GOMP_COMPAT */
3537     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3538 
3539   (*f)(lhs, lhs, rhs);
3540 
3541 #ifdef KMP_GOMP_COMPAT
3542   if (__kmp_atomic_mode == 2) {
3543     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3544   } else
3545 #endif /* KMP_GOMP_COMPAT */
3546     __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3547 }
3548 
3549 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3550                       void (*f)(void *, void *, void *)) {
3551   KMP_DEBUG_ASSERT(__kmp_init_serial);
3552 
3553 #ifdef KMP_GOMP_COMPAT
3554   if (__kmp_atomic_mode == 2) {
3555     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3556   } else
3557 #endif /* KMP_GOMP_COMPAT */
3558     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3559 
3560   (*f)(lhs, lhs, rhs);
3561 
3562 #ifdef KMP_GOMP_COMPAT
3563   if (__kmp_atomic_mode == 2) {
3564     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3565   } else
3566 #endif /* KMP_GOMP_COMPAT */
3567     __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3568 }
3569 
3570 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3571                       void (*f)(void *, void *, void *)) {
3572   KMP_DEBUG_ASSERT(__kmp_init_serial);
3573 
3574 #ifdef KMP_GOMP_COMPAT
3575   if (__kmp_atomic_mode == 2) {
3576     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3577   } else
3578 #endif /* KMP_GOMP_COMPAT */
3579     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3580 
3581   (*f)(lhs, lhs, rhs);
3582 
3583 #ifdef KMP_GOMP_COMPAT
3584   if (__kmp_atomic_mode == 2) {
3585     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3586   } else
3587 #endif /* KMP_GOMP_COMPAT */
3588     __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3589 }
3590 
3591 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3592                       void (*f)(void *, void *, void *)) {
3593   KMP_DEBUG_ASSERT(__kmp_init_serial);
3594 
3595 #ifdef KMP_GOMP_COMPAT
3596   if (__kmp_atomic_mode == 2) {
3597     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3598   } else
3599 #endif /* KMP_GOMP_COMPAT */
3600     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3601 
3602   (*f)(lhs, lhs, rhs);
3603 
3604 #ifdef KMP_GOMP_COMPAT
3605   if (__kmp_atomic_mode == 2) {
3606     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3607   } else
3608 #endif /* KMP_GOMP_COMPAT */
3609     __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3610 }
3611 
3612 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3613 // compiler; duplicated in order to not use 3-party names in pure Intel code
3614 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3615 void __kmpc_atomic_start(void) {
3616   int gtid = __kmp_entry_gtid();
3617   KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3618   __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3619 }
3620 
3621 void __kmpc_atomic_end(void) {
3622   int gtid = __kmp_get_gtid();
3623   KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3624   __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3625 }
3626 
3627 /*!
3628 @}
3629 */
3630 
3631 // end of file
3632