1 /*
2  * kmp_atomic.cpp -- ATOMIC implementation routines
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp_atomic.h"
14 #include "kmp.h" // TRUE, asm routines prototypes
15 
16 typedef unsigned char uchar;
17 typedef unsigned short ushort;
18 
19 /*!
20 @defgroup ATOMIC_OPS Atomic Operations
21 These functions are used for implementing the many different varieties of atomic
22 operations.
23 
24 The compiler is at liberty to inline atomic operations that are naturally
25 supported by the target architecture. For instance on IA-32 architecture an
26 atomic like this can be inlined
27 @code
28 static int s = 0;
29 #pragma omp atomic
30     s++;
31 @endcode
32 using the single instruction: `lock; incl s`
33 
34 However the runtime does provide entrypoints for these operations to support
35 compilers that choose not to inline them. (For instance,
36 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.)
37 
38 The names of the functions are encoded by using the data type name and the
39 operation name, as in these tables.
40 
41 Data Type  | Data type encoding
42 -----------|---------------
43 int8_t     | `fixed1`
44 uint8_t    | `fixed1u`
45 int16_t    | `fixed2`
46 uint16_t   | `fixed2u`
47 int32_t    | `fixed4`
48 uint32_t   | `fixed4u`
49 int32_t    | `fixed8`
50 uint32_t   | `fixed8u`
51 float      | `float4`
52 double     | `float8`
53 float 10 (8087 eighty bit float)  | `float10`
54 complex<float>   |  `cmplx4`
55 complex<double>  | `cmplx8`
56 complex<float10> | `cmplx10`
57 <br>
58 
59 Operation | Operation encoding
60 ----------|-------------------
61 + | add
62 - | sub
63 \* | mul
64 / | div
65 & | andb
66 << | shl
67 \>\> | shr
68 \| | orb
69 ^  | xor
70 && | andl
71 \|\| | orl
72 maximum | max
73 minimum | min
74 .eqv.   | eqv
75 .neqv.  | neqv
76 
77 <br>
78 For non-commutative operations, `_rev` can also be added for the reversed
79 operation. For the functions that capture the result, the suffix `_cpt` is
80 added.
81 
82 Update Functions
83 ================
84 The general form of an atomic function that just performs an update (without a
85 `capture`)
86 @code
87 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE *
88 lhs, TYPE rhs );
89 @endcode
90 @param ident_t  a pointer to source location
91 @param gtid  the global thread id
92 @param lhs   a pointer to the left operand
93 @param rhs   the right operand
94 
95 `capture` functions
96 ===================
97 The capture functions perform an atomic update and return a result, which is
98 either the value before the capture, or that after. They take an additional
99 argument to determine which result is returned.
100 Their general form is therefore
101 @code
102 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE *
103 lhs, TYPE rhs, int flag );
104 @endcode
105 @param ident_t  a pointer to source location
106 @param gtid  the global thread id
107 @param lhs   a pointer to the left operand
108 @param rhs   the right operand
109 @param flag  one if the result is to be captured *after* the operation, zero if
110 captured *before*.
111 
112 The one set of exceptions to this is the `complex<float>` type where the value
113 is not returned, rather an extra argument pointer is passed.
114 
115 They look like
116 @code
117 void __kmpc_atomic_cmplx4_<op>_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 *
118 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
119 @endcode
120 
121 Read and Write Operations
122 =========================
123 The OpenMP<sup>*</sup> standard now supports atomic operations that simply
124 ensure that the value is read or written atomically, with no modification
125 performed. In many cases on IA-32 architecture these operations can be inlined
126 since the architecture guarantees that no tearing occurs on aligned objects
127 accessed with a single memory operation of up to 64 bits in size.
128 
129 The general form of the read operations is
130 @code
131 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
132 @endcode
133 
134 For the write operations the form is
135 @code
136 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs
137 );
138 @endcode
139 
140 Full list of functions
141 ======================
142 This leads to the generation of 376 atomic functions, as follows.
143 
144 Functions for integers
145 ---------------------
146 There are versions here for integers of size 1,2,4 and 8 bytes both signed and
147 unsigned (where that matters).
148 @code
149     __kmpc_atomic_fixed1_add
150     __kmpc_atomic_fixed1_add_cpt
151     __kmpc_atomic_fixed1_add_fp
152     __kmpc_atomic_fixed1_andb
153     __kmpc_atomic_fixed1_andb_cpt
154     __kmpc_atomic_fixed1_andl
155     __kmpc_atomic_fixed1_andl_cpt
156     __kmpc_atomic_fixed1_div
157     __kmpc_atomic_fixed1_div_cpt
158     __kmpc_atomic_fixed1_div_cpt_rev
159     __kmpc_atomic_fixed1_div_float8
160     __kmpc_atomic_fixed1_div_fp
161     __kmpc_atomic_fixed1_div_rev
162     __kmpc_atomic_fixed1_eqv
163     __kmpc_atomic_fixed1_eqv_cpt
164     __kmpc_atomic_fixed1_max
165     __kmpc_atomic_fixed1_max_cpt
166     __kmpc_atomic_fixed1_min
167     __kmpc_atomic_fixed1_min_cpt
168     __kmpc_atomic_fixed1_mul
169     __kmpc_atomic_fixed1_mul_cpt
170     __kmpc_atomic_fixed1_mul_float8
171     __kmpc_atomic_fixed1_mul_fp
172     __kmpc_atomic_fixed1_neqv
173     __kmpc_atomic_fixed1_neqv_cpt
174     __kmpc_atomic_fixed1_orb
175     __kmpc_atomic_fixed1_orb_cpt
176     __kmpc_atomic_fixed1_orl
177     __kmpc_atomic_fixed1_orl_cpt
178     __kmpc_atomic_fixed1_rd
179     __kmpc_atomic_fixed1_shl
180     __kmpc_atomic_fixed1_shl_cpt
181     __kmpc_atomic_fixed1_shl_cpt_rev
182     __kmpc_atomic_fixed1_shl_rev
183     __kmpc_atomic_fixed1_shr
184     __kmpc_atomic_fixed1_shr_cpt
185     __kmpc_atomic_fixed1_shr_cpt_rev
186     __kmpc_atomic_fixed1_shr_rev
187     __kmpc_atomic_fixed1_sub
188     __kmpc_atomic_fixed1_sub_cpt
189     __kmpc_atomic_fixed1_sub_cpt_rev
190     __kmpc_atomic_fixed1_sub_fp
191     __kmpc_atomic_fixed1_sub_rev
192     __kmpc_atomic_fixed1_swp
193     __kmpc_atomic_fixed1_wr
194     __kmpc_atomic_fixed1_xor
195     __kmpc_atomic_fixed1_xor_cpt
196     __kmpc_atomic_fixed1u_add_fp
197     __kmpc_atomic_fixed1u_sub_fp
198     __kmpc_atomic_fixed1u_mul_fp
199     __kmpc_atomic_fixed1u_div
200     __kmpc_atomic_fixed1u_div_cpt
201     __kmpc_atomic_fixed1u_div_cpt_rev
202     __kmpc_atomic_fixed1u_div_fp
203     __kmpc_atomic_fixed1u_div_rev
204     __kmpc_atomic_fixed1u_shr
205     __kmpc_atomic_fixed1u_shr_cpt
206     __kmpc_atomic_fixed1u_shr_cpt_rev
207     __kmpc_atomic_fixed1u_shr_rev
208     __kmpc_atomic_fixed2_add
209     __kmpc_atomic_fixed2_add_cpt
210     __kmpc_atomic_fixed2_add_fp
211     __kmpc_atomic_fixed2_andb
212     __kmpc_atomic_fixed2_andb_cpt
213     __kmpc_atomic_fixed2_andl
214     __kmpc_atomic_fixed2_andl_cpt
215     __kmpc_atomic_fixed2_div
216     __kmpc_atomic_fixed2_div_cpt
217     __kmpc_atomic_fixed2_div_cpt_rev
218     __kmpc_atomic_fixed2_div_float8
219     __kmpc_atomic_fixed2_div_fp
220     __kmpc_atomic_fixed2_div_rev
221     __kmpc_atomic_fixed2_eqv
222     __kmpc_atomic_fixed2_eqv_cpt
223     __kmpc_atomic_fixed2_max
224     __kmpc_atomic_fixed2_max_cpt
225     __kmpc_atomic_fixed2_min
226     __kmpc_atomic_fixed2_min_cpt
227     __kmpc_atomic_fixed2_mul
228     __kmpc_atomic_fixed2_mul_cpt
229     __kmpc_atomic_fixed2_mul_float8
230     __kmpc_atomic_fixed2_mul_fp
231     __kmpc_atomic_fixed2_neqv
232     __kmpc_atomic_fixed2_neqv_cpt
233     __kmpc_atomic_fixed2_orb
234     __kmpc_atomic_fixed2_orb_cpt
235     __kmpc_atomic_fixed2_orl
236     __kmpc_atomic_fixed2_orl_cpt
237     __kmpc_atomic_fixed2_rd
238     __kmpc_atomic_fixed2_shl
239     __kmpc_atomic_fixed2_shl_cpt
240     __kmpc_atomic_fixed2_shl_cpt_rev
241     __kmpc_atomic_fixed2_shl_rev
242     __kmpc_atomic_fixed2_shr
243     __kmpc_atomic_fixed2_shr_cpt
244     __kmpc_atomic_fixed2_shr_cpt_rev
245     __kmpc_atomic_fixed2_shr_rev
246     __kmpc_atomic_fixed2_sub
247     __kmpc_atomic_fixed2_sub_cpt
248     __kmpc_atomic_fixed2_sub_cpt_rev
249     __kmpc_atomic_fixed2_sub_fp
250     __kmpc_atomic_fixed2_sub_rev
251     __kmpc_atomic_fixed2_swp
252     __kmpc_atomic_fixed2_wr
253     __kmpc_atomic_fixed2_xor
254     __kmpc_atomic_fixed2_xor_cpt
255     __kmpc_atomic_fixed2u_add_fp
256     __kmpc_atomic_fixed2u_sub_fp
257     __kmpc_atomic_fixed2u_mul_fp
258     __kmpc_atomic_fixed2u_div
259     __kmpc_atomic_fixed2u_div_cpt
260     __kmpc_atomic_fixed2u_div_cpt_rev
261     __kmpc_atomic_fixed2u_div_fp
262     __kmpc_atomic_fixed2u_div_rev
263     __kmpc_atomic_fixed2u_shr
264     __kmpc_atomic_fixed2u_shr_cpt
265     __kmpc_atomic_fixed2u_shr_cpt_rev
266     __kmpc_atomic_fixed2u_shr_rev
267     __kmpc_atomic_fixed4_add
268     __kmpc_atomic_fixed4_add_cpt
269     __kmpc_atomic_fixed4_add_fp
270     __kmpc_atomic_fixed4_andb
271     __kmpc_atomic_fixed4_andb_cpt
272     __kmpc_atomic_fixed4_andl
273     __kmpc_atomic_fixed4_andl_cpt
274     __kmpc_atomic_fixed4_div
275     __kmpc_atomic_fixed4_div_cpt
276     __kmpc_atomic_fixed4_div_cpt_rev
277     __kmpc_atomic_fixed4_div_float8
278     __kmpc_atomic_fixed4_div_fp
279     __kmpc_atomic_fixed4_div_rev
280     __kmpc_atomic_fixed4_eqv
281     __kmpc_atomic_fixed4_eqv_cpt
282     __kmpc_atomic_fixed4_max
283     __kmpc_atomic_fixed4_max_cpt
284     __kmpc_atomic_fixed4_min
285     __kmpc_atomic_fixed4_min_cpt
286     __kmpc_atomic_fixed4_mul
287     __kmpc_atomic_fixed4_mul_cpt
288     __kmpc_atomic_fixed4_mul_float8
289     __kmpc_atomic_fixed4_mul_fp
290     __kmpc_atomic_fixed4_neqv
291     __kmpc_atomic_fixed4_neqv_cpt
292     __kmpc_atomic_fixed4_orb
293     __kmpc_atomic_fixed4_orb_cpt
294     __kmpc_atomic_fixed4_orl
295     __kmpc_atomic_fixed4_orl_cpt
296     __kmpc_atomic_fixed4_rd
297     __kmpc_atomic_fixed4_shl
298     __kmpc_atomic_fixed4_shl_cpt
299     __kmpc_atomic_fixed4_shl_cpt_rev
300     __kmpc_atomic_fixed4_shl_rev
301     __kmpc_atomic_fixed4_shr
302     __kmpc_atomic_fixed4_shr_cpt
303     __kmpc_atomic_fixed4_shr_cpt_rev
304     __kmpc_atomic_fixed4_shr_rev
305     __kmpc_atomic_fixed4_sub
306     __kmpc_atomic_fixed4_sub_cpt
307     __kmpc_atomic_fixed4_sub_cpt_rev
308     __kmpc_atomic_fixed4_sub_fp
309     __kmpc_atomic_fixed4_sub_rev
310     __kmpc_atomic_fixed4_swp
311     __kmpc_atomic_fixed4_wr
312     __kmpc_atomic_fixed4_xor
313     __kmpc_atomic_fixed4_xor_cpt
314     __kmpc_atomic_fixed4u_add_fp
315     __kmpc_atomic_fixed4u_sub_fp
316     __kmpc_atomic_fixed4u_mul_fp
317     __kmpc_atomic_fixed4u_div
318     __kmpc_atomic_fixed4u_div_cpt
319     __kmpc_atomic_fixed4u_div_cpt_rev
320     __kmpc_atomic_fixed4u_div_fp
321     __kmpc_atomic_fixed4u_div_rev
322     __kmpc_atomic_fixed4u_shr
323     __kmpc_atomic_fixed4u_shr_cpt
324     __kmpc_atomic_fixed4u_shr_cpt_rev
325     __kmpc_atomic_fixed4u_shr_rev
326     __kmpc_atomic_fixed8_add
327     __kmpc_atomic_fixed8_add_cpt
328     __kmpc_atomic_fixed8_add_fp
329     __kmpc_atomic_fixed8_andb
330     __kmpc_atomic_fixed8_andb_cpt
331     __kmpc_atomic_fixed8_andl
332     __kmpc_atomic_fixed8_andl_cpt
333     __kmpc_atomic_fixed8_div
334     __kmpc_atomic_fixed8_div_cpt
335     __kmpc_atomic_fixed8_div_cpt_rev
336     __kmpc_atomic_fixed8_div_float8
337     __kmpc_atomic_fixed8_div_fp
338     __kmpc_atomic_fixed8_div_rev
339     __kmpc_atomic_fixed8_eqv
340     __kmpc_atomic_fixed8_eqv_cpt
341     __kmpc_atomic_fixed8_max
342     __kmpc_atomic_fixed8_max_cpt
343     __kmpc_atomic_fixed8_min
344     __kmpc_atomic_fixed8_min_cpt
345     __kmpc_atomic_fixed8_mul
346     __kmpc_atomic_fixed8_mul_cpt
347     __kmpc_atomic_fixed8_mul_float8
348     __kmpc_atomic_fixed8_mul_fp
349     __kmpc_atomic_fixed8_neqv
350     __kmpc_atomic_fixed8_neqv_cpt
351     __kmpc_atomic_fixed8_orb
352     __kmpc_atomic_fixed8_orb_cpt
353     __kmpc_atomic_fixed8_orl
354     __kmpc_atomic_fixed8_orl_cpt
355     __kmpc_atomic_fixed8_rd
356     __kmpc_atomic_fixed8_shl
357     __kmpc_atomic_fixed8_shl_cpt
358     __kmpc_atomic_fixed8_shl_cpt_rev
359     __kmpc_atomic_fixed8_shl_rev
360     __kmpc_atomic_fixed8_shr
361     __kmpc_atomic_fixed8_shr_cpt
362     __kmpc_atomic_fixed8_shr_cpt_rev
363     __kmpc_atomic_fixed8_shr_rev
364     __kmpc_atomic_fixed8_sub
365     __kmpc_atomic_fixed8_sub_cpt
366     __kmpc_atomic_fixed8_sub_cpt_rev
367     __kmpc_atomic_fixed8_sub_fp
368     __kmpc_atomic_fixed8_sub_rev
369     __kmpc_atomic_fixed8_swp
370     __kmpc_atomic_fixed8_wr
371     __kmpc_atomic_fixed8_xor
372     __kmpc_atomic_fixed8_xor_cpt
373     __kmpc_atomic_fixed8u_add_fp
374     __kmpc_atomic_fixed8u_sub_fp
375     __kmpc_atomic_fixed8u_mul_fp
376     __kmpc_atomic_fixed8u_div
377     __kmpc_atomic_fixed8u_div_cpt
378     __kmpc_atomic_fixed8u_div_cpt_rev
379     __kmpc_atomic_fixed8u_div_fp
380     __kmpc_atomic_fixed8u_div_rev
381     __kmpc_atomic_fixed8u_shr
382     __kmpc_atomic_fixed8u_shr_cpt
383     __kmpc_atomic_fixed8u_shr_cpt_rev
384     __kmpc_atomic_fixed8u_shr_rev
385 @endcode
386 
387 Functions for floating point
388 ----------------------------
389 There are versions here for floating point numbers of size 4, 8, 10 and 16
390 bytes. (Ten byte floats are used by X87, but are now rare).
391 @code
392     __kmpc_atomic_float4_add
393     __kmpc_atomic_float4_add_cpt
394     __kmpc_atomic_float4_add_float8
395     __kmpc_atomic_float4_add_fp
396     __kmpc_atomic_float4_div
397     __kmpc_atomic_float4_div_cpt
398     __kmpc_atomic_float4_div_cpt_rev
399     __kmpc_atomic_float4_div_float8
400     __kmpc_atomic_float4_div_fp
401     __kmpc_atomic_float4_div_rev
402     __kmpc_atomic_float4_max
403     __kmpc_atomic_float4_max_cpt
404     __kmpc_atomic_float4_min
405     __kmpc_atomic_float4_min_cpt
406     __kmpc_atomic_float4_mul
407     __kmpc_atomic_float4_mul_cpt
408     __kmpc_atomic_float4_mul_float8
409     __kmpc_atomic_float4_mul_fp
410     __kmpc_atomic_float4_rd
411     __kmpc_atomic_float4_sub
412     __kmpc_atomic_float4_sub_cpt
413     __kmpc_atomic_float4_sub_cpt_rev
414     __kmpc_atomic_float4_sub_float8
415     __kmpc_atomic_float4_sub_fp
416     __kmpc_atomic_float4_sub_rev
417     __kmpc_atomic_float4_swp
418     __kmpc_atomic_float4_wr
419     __kmpc_atomic_float8_add
420     __kmpc_atomic_float8_add_cpt
421     __kmpc_atomic_float8_add_fp
422     __kmpc_atomic_float8_div
423     __kmpc_atomic_float8_div_cpt
424     __kmpc_atomic_float8_div_cpt_rev
425     __kmpc_atomic_float8_div_fp
426     __kmpc_atomic_float8_div_rev
427     __kmpc_atomic_float8_max
428     __kmpc_atomic_float8_max_cpt
429     __kmpc_atomic_float8_min
430     __kmpc_atomic_float8_min_cpt
431     __kmpc_atomic_float8_mul
432     __kmpc_atomic_float8_mul_cpt
433     __kmpc_atomic_float8_mul_fp
434     __kmpc_atomic_float8_rd
435     __kmpc_atomic_float8_sub
436     __kmpc_atomic_float8_sub_cpt
437     __kmpc_atomic_float8_sub_cpt_rev
438     __kmpc_atomic_float8_sub_fp
439     __kmpc_atomic_float8_sub_rev
440     __kmpc_atomic_float8_swp
441     __kmpc_atomic_float8_wr
442     __kmpc_atomic_float10_add
443     __kmpc_atomic_float10_add_cpt
444     __kmpc_atomic_float10_add_fp
445     __kmpc_atomic_float10_div
446     __kmpc_atomic_float10_div_cpt
447     __kmpc_atomic_float10_div_cpt_rev
448     __kmpc_atomic_float10_div_fp
449     __kmpc_atomic_float10_div_rev
450     __kmpc_atomic_float10_mul
451     __kmpc_atomic_float10_mul_cpt
452     __kmpc_atomic_float10_mul_fp
453     __kmpc_atomic_float10_rd
454     __kmpc_atomic_float10_sub
455     __kmpc_atomic_float10_sub_cpt
456     __kmpc_atomic_float10_sub_cpt_rev
457     __kmpc_atomic_float10_sub_fp
458     __kmpc_atomic_float10_sub_rev
459     __kmpc_atomic_float10_swp
460     __kmpc_atomic_float10_wr
461     __kmpc_atomic_float16_add
462     __kmpc_atomic_float16_add_cpt
463     __kmpc_atomic_float16_div
464     __kmpc_atomic_float16_div_cpt
465     __kmpc_atomic_float16_div_cpt_rev
466     __kmpc_atomic_float16_div_rev
467     __kmpc_atomic_float16_max
468     __kmpc_atomic_float16_max_cpt
469     __kmpc_atomic_float16_min
470     __kmpc_atomic_float16_min_cpt
471     __kmpc_atomic_float16_mul
472     __kmpc_atomic_float16_mul_cpt
473     __kmpc_atomic_float16_rd
474     __kmpc_atomic_float16_sub
475     __kmpc_atomic_float16_sub_cpt
476     __kmpc_atomic_float16_sub_cpt_rev
477     __kmpc_atomic_float16_sub_rev
478     __kmpc_atomic_float16_swp
479     __kmpc_atomic_float16_wr
480 @endcode
481 
482 Functions for Complex types
483 ---------------------------
484 Functions for complex types whose component floating point variables are of size
485 4,8,10 or 16 bytes. The names here are based on the size of the component float,
486 *not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an
487 operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
488 
489 @code
490     __kmpc_atomic_cmplx4_add
491     __kmpc_atomic_cmplx4_add_cmplx8
492     __kmpc_atomic_cmplx4_add_cpt
493     __kmpc_atomic_cmplx4_div
494     __kmpc_atomic_cmplx4_div_cmplx8
495     __kmpc_atomic_cmplx4_div_cpt
496     __kmpc_atomic_cmplx4_div_cpt_rev
497     __kmpc_atomic_cmplx4_div_rev
498     __kmpc_atomic_cmplx4_mul
499     __kmpc_atomic_cmplx4_mul_cmplx8
500     __kmpc_atomic_cmplx4_mul_cpt
501     __kmpc_atomic_cmplx4_rd
502     __kmpc_atomic_cmplx4_sub
503     __kmpc_atomic_cmplx4_sub_cmplx8
504     __kmpc_atomic_cmplx4_sub_cpt
505     __kmpc_atomic_cmplx4_sub_cpt_rev
506     __kmpc_atomic_cmplx4_sub_rev
507     __kmpc_atomic_cmplx4_swp
508     __kmpc_atomic_cmplx4_wr
509     __kmpc_atomic_cmplx8_add
510     __kmpc_atomic_cmplx8_add_cpt
511     __kmpc_atomic_cmplx8_div
512     __kmpc_atomic_cmplx8_div_cpt
513     __kmpc_atomic_cmplx8_div_cpt_rev
514     __kmpc_atomic_cmplx8_div_rev
515     __kmpc_atomic_cmplx8_mul
516     __kmpc_atomic_cmplx8_mul_cpt
517     __kmpc_atomic_cmplx8_rd
518     __kmpc_atomic_cmplx8_sub
519     __kmpc_atomic_cmplx8_sub_cpt
520     __kmpc_atomic_cmplx8_sub_cpt_rev
521     __kmpc_atomic_cmplx8_sub_rev
522     __kmpc_atomic_cmplx8_swp
523     __kmpc_atomic_cmplx8_wr
524     __kmpc_atomic_cmplx10_add
525     __kmpc_atomic_cmplx10_add_cpt
526     __kmpc_atomic_cmplx10_div
527     __kmpc_atomic_cmplx10_div_cpt
528     __kmpc_atomic_cmplx10_div_cpt_rev
529     __kmpc_atomic_cmplx10_div_rev
530     __kmpc_atomic_cmplx10_mul
531     __kmpc_atomic_cmplx10_mul_cpt
532     __kmpc_atomic_cmplx10_rd
533     __kmpc_atomic_cmplx10_sub
534     __kmpc_atomic_cmplx10_sub_cpt
535     __kmpc_atomic_cmplx10_sub_cpt_rev
536     __kmpc_atomic_cmplx10_sub_rev
537     __kmpc_atomic_cmplx10_swp
538     __kmpc_atomic_cmplx10_wr
539     __kmpc_atomic_cmplx16_add
540     __kmpc_atomic_cmplx16_add_cpt
541     __kmpc_atomic_cmplx16_div
542     __kmpc_atomic_cmplx16_div_cpt
543     __kmpc_atomic_cmplx16_div_cpt_rev
544     __kmpc_atomic_cmplx16_div_rev
545     __kmpc_atomic_cmplx16_mul
546     __kmpc_atomic_cmplx16_mul_cpt
547     __kmpc_atomic_cmplx16_rd
548     __kmpc_atomic_cmplx16_sub
549     __kmpc_atomic_cmplx16_sub_cpt
550     __kmpc_atomic_cmplx16_sub_cpt_rev
551     __kmpc_atomic_cmplx16_swp
552     __kmpc_atomic_cmplx16_wr
553 @endcode
554 */
555 
556 /*!
557 @ingroup ATOMIC_OPS
558 @{
559 */
560 
561 /*
562  * Global vars
563  */
564 
565 #ifndef KMP_GOMP_COMPAT
566 int __kmp_atomic_mode = 1; // Intel perf
567 #else
568 int __kmp_atomic_mode = 2; // GOMP compatibility
569 #endif /* KMP_GOMP_COMPAT */
570 
571 KMP_ALIGN(128)
572 
573 // Control access to all user coded atomics in Gnu compat mode
574 kmp_atomic_lock_t __kmp_atomic_lock;
575 // Control access to all user coded atomics for 1-byte fixed data types
576 kmp_atomic_lock_t __kmp_atomic_lock_1i;
577 // Control access to all user coded atomics for 2-byte fixed data types
578 kmp_atomic_lock_t __kmp_atomic_lock_2i;
579 // Control access to all user coded atomics for 4-byte fixed data types
580 kmp_atomic_lock_t __kmp_atomic_lock_4i;
581 // Control access to all user coded atomics for kmp_real32 data type
582 kmp_atomic_lock_t __kmp_atomic_lock_4r;
583 // Control access to all user coded atomics for 8-byte fixed data types
584 kmp_atomic_lock_t __kmp_atomic_lock_8i;
585 // Control access to all user coded atomics for kmp_real64 data type
586 kmp_atomic_lock_t __kmp_atomic_lock_8r;
587 // Control access to all user coded atomics for complex byte data type
588 kmp_atomic_lock_t __kmp_atomic_lock_8c;
589 // Control access to all user coded atomics for long double data type
590 kmp_atomic_lock_t __kmp_atomic_lock_10r;
591 // Control access to all user coded atomics for _Quad data type
592 kmp_atomic_lock_t __kmp_atomic_lock_16r;
593 // Control access to all user coded atomics for double complex data type
594 kmp_atomic_lock_t __kmp_atomic_lock_16c;
595 // Control access to all user coded atomics for long double complex type
596 kmp_atomic_lock_t __kmp_atomic_lock_20c;
597 // Control access to all user coded atomics for _Quad complex data type
598 kmp_atomic_lock_t __kmp_atomic_lock_32c;
599 
600 /* 2007-03-02:
601    Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602    on *_32 and *_32e. This is just a temporary workaround for the problem. It
603    seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604    in assembler language. */
605 #define KMP_ATOMIC_VOLATILE volatile
606 
607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
608 
609 static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) {
610   return lhs.q + rhs.q;
611 }
612 static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) {
613   return lhs.q - rhs.q;
614 }
615 static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) {
616   return lhs.q * rhs.q;
617 }
618 static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) {
619   return lhs.q / rhs.q;
620 }
621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622   return lhs.q < rhs.q;
623 }
624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625   return lhs.q > rhs.q;
626 }
627 
628 static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) {
629   return lhs.q + rhs.q;
630 }
631 static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) {
632   return lhs.q - rhs.q;
633 }
634 static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) {
635   return lhs.q * rhs.q;
636 }
637 static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) {
638   return lhs.q / rhs.q;
639 }
640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641   return lhs.q < rhs.q;
642 }
643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644   return lhs.q > rhs.q;
645 }
646 
647 static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs,
648                                           kmp_cmplx128_a4_t &rhs) {
649   return lhs.q + rhs.q;
650 }
651 static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs,
652                                           kmp_cmplx128_a4_t &rhs) {
653   return lhs.q - rhs.q;
654 }
655 static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs,
656                                           kmp_cmplx128_a4_t &rhs) {
657   return lhs.q * rhs.q;
658 }
659 static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs,
660                                           kmp_cmplx128_a4_t &rhs) {
661   return lhs.q / rhs.q;
662 }
663 
664 static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs,
665                                            kmp_cmplx128_a16_t &rhs) {
666   return lhs.q + rhs.q;
667 }
668 static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs,
669                                            kmp_cmplx128_a16_t &rhs) {
670   return lhs.q - rhs.q;
671 }
672 static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs,
673                                            kmp_cmplx128_a16_t &rhs) {
674   return lhs.q * rhs.q;
675 }
676 static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs,
677                                            kmp_cmplx128_a16_t &rhs) {
678   return lhs.q / rhs.q;
679 }
680 
681 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
682 
683 // ATOMIC implementation routines -----------------------------------------
684 // One routine for each operation and operand type.
685 // All routines declarations looks like
686 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
687 
688 #define KMP_CHECK_GTID                                                         \
689   if (gtid == KMP_GTID_UNKNOWN) {                                              \
690     gtid = __kmp_entry_gtid();                                                 \
691   } // check and get gtid when needed
692 
693 // Beginning of a definition (provides name, parameters, gebug trace)
694 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
695 //     fixed)
696 //     OP_ID   - operation identifier (add, sub, mul, ...)
697 //     TYPE    - operands' type
698 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE)                           \
699   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
700                                              TYPE *lhs, TYPE rhs) {            \
701     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
702     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
703 
704 // ------------------------------------------------------------------------
705 // Lock variables used for critical sections for various size operands
706 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
707 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
708 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
709 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
710 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
711 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
712 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
713 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
714 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
715 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
716 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
717 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
718 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
719 
720 // ------------------------------------------------------------------------
721 // Operation on *lhs, rhs bound by critical section
722 //     OP     - operator (it's supposed to contain an assignment)
723 //     LCK_ID - lock identifier
724 // Note: don't check gtid as it should always be valid
725 // 1, 2-byte - expect valid parameter, other - check before this macro
726 #define OP_CRITICAL(OP, LCK_ID)                                                \
727   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
728                                                                                \
729   (*lhs) OP(rhs);                                                              \
730                                                                                \
731   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
732 
733 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID)                                   \
734   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
735   (*lhs) = (TYPE)((*lhs)OP((TYPE)rhs));                                        \
736   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
737 
738 // ------------------------------------------------------------------------
739 // For GNU compatibility, we may need to use a critical section,
740 // even though it is not required by the ISA.
741 //
742 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
743 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
744 // critical section.  On Intel(R) 64, all atomic operations are done with fetch
745 // and add or compare and exchange.  Therefore, the FLAG parameter to this
746 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
747 // require a critical section, where we predict that they will be implemented
748 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
749 //
750 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
751 // the FLAG parameter should always be 1.  If we know that we will be using
752 // a critical section, then we want to make certain that we use the generic
753 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
754 // locks that are specialized based upon the size or type of the data.
755 //
756 // If FLAG is 0, then we are relying on dead code elimination by the build
757 // compiler to get rid of the useless block of code, and save a needless
758 // branch at runtime.
759 
760 #ifdef KMP_GOMP_COMPAT
761 #define OP_GOMP_CRITICAL(OP, FLAG)                                             \
762   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
763     KMP_CHECK_GTID;                                                            \
764     OP_CRITICAL(OP, 0);                                                        \
765     return;                                                                    \
766   }
767 
768 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)                                \
769   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
770     KMP_CHECK_GTID;                                                            \
771     OP_UPDATE_CRITICAL(TYPE, OP, 0);                                           \
772     return;                                                                    \
773   }
774 #else
775 #define OP_GOMP_CRITICAL(OP, FLAG)
776 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG)
777 #endif /* KMP_GOMP_COMPAT */
778 
779 #if KMP_MIC
780 #define KMP_DO_PAUSE _mm_delay_32(1)
781 #else
782 #define KMP_DO_PAUSE
783 #endif /* KMP_MIC */
784 
785 // ------------------------------------------------------------------------
786 // Operation on *lhs, rhs using "compare_and_store" routine
787 //     TYPE    - operands' type
788 //     BITS    - size in bits, used to distinguish low level calls
789 //     OP      - operator
790 #define OP_CMPXCHG(TYPE, BITS, OP)                                             \
791   {                                                                            \
792     TYPE old_value, new_value;                                                 \
793     old_value = *(TYPE volatile *)lhs;                                         \
794     new_value = (TYPE)(old_value OP((TYPE)rhs));                               \
795     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
796         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
797         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
798       KMP_DO_PAUSE;                                                            \
799                                                                                \
800       old_value = *(TYPE volatile *)lhs;                                       \
801       new_value = (TYPE)(old_value OP((TYPE)rhs));                             \
802     }                                                                          \
803   }
804 
805 #if USE_CMPXCHG_FIX
806 // 2007-06-25:
807 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
808 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
809 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
810 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
811 // the workaround.
812 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                  \
813   {                                                                            \
814     struct _sss {                                                              \
815       TYPE cmp;                                                                \
816       kmp_int##BITS *vvv;                                                      \
817     };                                                                         \
818     struct _sss old_value, new_value;                                          \
819     old_value.vvv = (kmp_int##BITS *)&old_value.cmp;                           \
820     new_value.vvv = (kmp_int##BITS *)&new_value.cmp;                           \
821     *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                           \
822     new_value.cmp = (TYPE)(old_value.cmp OP rhs);                              \
823     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
824         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv,   \
825         *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) {                      \
826       KMP_DO_PAUSE;                                                            \
827                                                                                \
828       *old_value.vvv = *(volatile kmp_int##BITS *)lhs;                         \
829       new_value.cmp = (TYPE)(old_value.cmp OP rhs);                            \
830     }                                                                          \
831   }
832 // end of the first part of the workaround for C78287
833 #endif // USE_CMPXCHG_FIX
834 
835 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
836 
837 // ------------------------------------------------------------------------
838 // X86 or X86_64: no alignment problems ====================================
839 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
840                          GOMP_FLAG)                                            \
841   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
842   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
843   /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
844   KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                        \
845   }
846 // -------------------------------------------------------------------------
847 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
848                        GOMP_FLAG)                                              \
849   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
850   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
851   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
852   }
853 #if USE_CMPXCHG_FIX
854 // -------------------------------------------------------------------------
855 // workaround for C78287 (complex(kind=4) data type)
856 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
857                                   MASK, GOMP_FLAG)                             \
858   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
859   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
860   OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
861   }
862 // end of the second part of the workaround for C78287
863 #endif // USE_CMPXCHG_FIX
864 
865 #else
866 // -------------------------------------------------------------------------
867 // Code for other architectures that don't handle unaligned accesses.
868 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
869                          GOMP_FLAG)                                            \
870   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
871   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
872   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
873     /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */          \
874     KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                                      \
875   } else {                                                                     \
876     KMP_CHECK_GTID;                                                            \
877     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
878                        LCK_ID) /* unaligned address - use critical */          \
879   }                                                                            \
880   }
881 // -------------------------------------------------------------------------
882 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,           \
883                        GOMP_FLAG)                                              \
884   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
885   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
886   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
887     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
888   } else {                                                                     \
889     KMP_CHECK_GTID;                                                            \
890     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
891                        LCK_ID) /* unaligned address - use critical */          \
892   }                                                                            \
893   }
894 #if USE_CMPXCHG_FIX
895 // -------------------------------------------------------------------------
896 // workaround for C78287 (complex(kind=4) data type)
897 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID,      \
898                                   MASK, GOMP_FLAG)                             \
899   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
900   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
901   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
902     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
903   } else {                                                                     \
904     KMP_CHECK_GTID;                                                            \
905     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
906                        LCK_ID) /* unaligned address - use critical */          \
907   }                                                                            \
908   }
909 // end of the second part of the workaround for C78287
910 #endif // USE_CMPXCHG_FIX
911 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
912 
913 // Routines for ATOMIC 4-byte operands addition and subtraction
914 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
915                  0) // __kmpc_atomic_fixed4_add
916 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
917                  0) // __kmpc_atomic_fixed4_sub
918 
919 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
920                KMP_ARCH_X86) // __kmpc_atomic_float4_add
921 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
922                KMP_ARCH_X86) // __kmpc_atomic_float4_sub
923 
924 // Routines for ATOMIC 8-byte operands addition and subtraction
925 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
926                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
927 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
928                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
929 
930 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
931                KMP_ARCH_X86) // __kmpc_atomic_float8_add
932 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
933                KMP_ARCH_X86) // __kmpc_atomic_float8_sub
934 
935 // ------------------------------------------------------------------------
936 // Entries definition for integer operands
937 //     TYPE_ID - operands type and size (fixed4, float4)
938 //     OP_ID   - operation identifier (add, sub, mul, ...)
939 //     TYPE    - operand type
940 //     BITS    - size in bits, used to distinguish low level calls
941 //     OP      - operator (used in critical section)
942 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
943 //     MASK    - used for alignment check
944 
945 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,MASK,GOMP_FLAG
946 // ------------------------------------------------------------------------
947 // Routines for ATOMIC integer operands, other operators
948 // ------------------------------------------------------------------------
949 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
950 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
951                KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
952 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
953                0) // __kmpc_atomic_fixed1_andb
954 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
955                KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
956 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
957                KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
958 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
959                KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
960 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
961                0) // __kmpc_atomic_fixed1_orb
962 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
963                KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
964 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
965                KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
966 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
967                KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
968 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
969                KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
970 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
971                0) // __kmpc_atomic_fixed1_xor
972 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
973                KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
974 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
975                0) // __kmpc_atomic_fixed2_andb
976 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
977                KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
978 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
979                KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
980 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
981                KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
982 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
983                0) // __kmpc_atomic_fixed2_orb
984 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
985                KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
986 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
987                KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
988 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
989                KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
990 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
991                KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
992 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
993                0) // __kmpc_atomic_fixed2_xor
994 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
995                0) // __kmpc_atomic_fixed4_andb
996 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
997                KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
998 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
999                KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
1000 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
1001                KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
1002 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
1003                0) // __kmpc_atomic_fixed4_orb
1004 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
1005                KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
1006 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
1007                KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
1008 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
1009                KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
1010 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
1011                0) // __kmpc_atomic_fixed4_xor
1012 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
1013                KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
1014 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
1015                KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
1016 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
1017                KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
1018 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
1019                KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1020 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1021                KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1022 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1023                KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1024 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1025                KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1026 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1027                KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1028 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1029                KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1030 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1031                KMP_ARCH_X86) // __kmpc_atomic_float4_div
1032 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1033                KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1034 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1035                KMP_ARCH_X86) // __kmpc_atomic_float8_div
1036 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1037                KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1038 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
1039 
1040 /* ------------------------------------------------------------------------ */
1041 /* Routines for C/C++ Reduction operators && and ||                         */
1042 
1043 // ------------------------------------------------------------------------
1044 // Need separate macros for &&, || because there is no combined assignment
1045 //   TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1046 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)             \
1047   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1048   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1049   OP_CRITICAL(= *lhs OP, LCK_ID)                                               \
1050   }
1051 
1052 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1053 
1054 // ------------------------------------------------------------------------
1055 // X86 or X86_64: no alignment problems ===================================
1056 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1057   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1058   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1059   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1060   }
1061 
1062 #else
1063 // ------------------------------------------------------------------------
1064 // Code for other architectures that don't handle unaligned accesses.
1065 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1066   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1067   OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG)                                       \
1068   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1069     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1070   } else {                                                                     \
1071     KMP_CHECK_GTID;                                                            \
1072     OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */              \
1073   }                                                                            \
1074   }
1075 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1076 
1077 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1078               KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1079 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1080               KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1081 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1082               KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1083 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1084               KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1085 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1086               0) // __kmpc_atomic_fixed4_andl
1087 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1088               0) // __kmpc_atomic_fixed4_orl
1089 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1090               KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1091 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1092               KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1093 
1094 /* ------------------------------------------------------------------------- */
1095 /* Routines for Fortran operators that matched no one in C:                  */
1096 /* MAX, MIN, .EQV., .NEQV.                                                   */
1097 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}           */
1098 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}  */
1099 
1100 // -------------------------------------------------------------------------
1101 // MIN and MAX need separate macros
1102 // OP - operator to check if we need any actions?
1103 #define MIN_MAX_CRITSECT(OP, LCK_ID)                                           \
1104   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1105                                                                                \
1106   if (*lhs OP rhs) { /* still need actions? */                                 \
1107     *lhs = rhs;                                                                \
1108   }                                                                            \
1109   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1110 
1111 // -------------------------------------------------------------------------
1112 #ifdef KMP_GOMP_COMPAT
1113 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)                                        \
1114   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1115     KMP_CHECK_GTID;                                                            \
1116     MIN_MAX_CRITSECT(OP, 0);                                                   \
1117     return;                                                                    \
1118   }
1119 #else
1120 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1121 #endif /* KMP_GOMP_COMPAT */
1122 
1123 // -------------------------------------------------------------------------
1124 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                        \
1125   {                                                                            \
1126     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1127     TYPE old_value;                                                            \
1128     temp_val = *lhs;                                                           \
1129     old_value = temp_val;                                                      \
1130     while (old_value OP rhs && /* still need actions? */                       \
1131            !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1132                (kmp_int##BITS *)lhs,                                           \
1133                *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
1134                *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
1135       temp_val = *lhs;                                                         \
1136       old_value = temp_val;                                                    \
1137     }                                                                          \
1138   }
1139 
1140 // -------------------------------------------------------------------------
1141 // 1-byte, 2-byte operands - use critical section
1142 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)          \
1143   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1144   if (*lhs OP rhs) { /* need actions? */                                       \
1145     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1146     MIN_MAX_CRITSECT(OP, LCK_ID)                                               \
1147   }                                                                            \
1148   }
1149 
1150 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1151 
1152 // -------------------------------------------------------------------------
1153 // X86 or X86_64: no alignment problems ====================================
1154 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1155                          GOMP_FLAG)                                            \
1156   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1157   if (*lhs OP rhs) {                                                           \
1158     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1159     MIN_MAX_CMPXCHG(TYPE, BITS, OP)                                            \
1160   }                                                                            \
1161   }
1162 
1163 #else
1164 // -------------------------------------------------------------------------
1165 // Code for other architectures that don't handle unaligned accesses.
1166 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,         \
1167                          GOMP_FLAG)                                            \
1168   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1169   if (*lhs OP rhs) {                                                           \
1170     GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG)                                       \
1171     if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                    \
1172       MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */                    \
1173     } else {                                                                   \
1174       KMP_CHECK_GTID;                                                          \
1175       MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */                     \
1176     }                                                                          \
1177   }                                                                            \
1178   }
1179 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1180 
1181 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1182                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1183 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1184                  KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1185 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1186                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1187 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1188                  KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1189 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1190                  0) // __kmpc_atomic_fixed4_max
1191 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1192                  0) // __kmpc_atomic_fixed4_min
1193 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1194                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1195 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1196                  KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1197 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1198                  KMP_ARCH_X86) // __kmpc_atomic_float4_max
1199 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1200                  KMP_ARCH_X86) // __kmpc_atomic_float4_min
1201 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1202                  KMP_ARCH_X86) // __kmpc_atomic_float8_max
1203 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1204                  KMP_ARCH_X86) // __kmpc_atomic_float8_min
1205 #if KMP_HAVE_QUAD
1206 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1207                  1) // __kmpc_atomic_float16_max
1208 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1209                  1) // __kmpc_atomic_float16_min
1210 #if (KMP_ARCH_X86)
1211 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1212                  1) // __kmpc_atomic_float16_max_a16
1213 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1214                  1) // __kmpc_atomic_float16_min_a16
1215 #endif // (KMP_ARCH_X86)
1216 #endif // KMP_HAVE_QUAD
1217 // ------------------------------------------------------------------------
1218 // Need separate macros for .EQV. because of the need of complement (~)
1219 // OP ignored for critical sections, ^=~ used instead
1220 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1221   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1222   OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */               \
1223   OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */        \
1224   }
1225 
1226 // ------------------------------------------------------------------------
1227 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1228 // ------------------------------------------------------------------------
1229 // X86 or X86_64: no alignment problems ===================================
1230 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1231                         GOMP_FLAG)                                             \
1232   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1233   OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */               \
1234   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1235   }
1236 // ------------------------------------------------------------------------
1237 #else
1238 // ------------------------------------------------------------------------
1239 // Code for other architectures that don't handle unaligned accesses.
1240 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK,          \
1241                         GOMP_FLAG)                                             \
1242   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1243   OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG)                                     \
1244   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1245     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1246   } else {                                                                     \
1247     KMP_CHECK_GTID;                                                            \
1248     OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */    \
1249   }                                                                            \
1250   }
1251 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1252 
1253 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1254                KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1255 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1256                KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1257 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1258                KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1259 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1260                KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1261 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1262                 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1263 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1264                 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1265 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1266                 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1267 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1268                 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1269 
1270 // ------------------------------------------------------------------------
1271 // Routines for Extended types: long double, _Quad, complex flavours (use
1272 // critical section)
1273 //     TYPE_ID, OP_ID, TYPE - detailed above
1274 //     OP      - operator
1275 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1276 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)           \
1277   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
1278   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */           \
1279   OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */                   \
1280   }
1281 
1282 /* ------------------------------------------------------------------------- */
1283 // routines for long double type
1284 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1285                 1) // __kmpc_atomic_float10_add
1286 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1287                 1) // __kmpc_atomic_float10_sub
1288 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1289                 1) // __kmpc_atomic_float10_mul
1290 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1291                 1) // __kmpc_atomic_float10_div
1292 #if KMP_HAVE_QUAD
1293 // routines for _Quad type
1294 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1295                 1) // __kmpc_atomic_float16_add
1296 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1297                 1) // __kmpc_atomic_float16_sub
1298 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1299                 1) // __kmpc_atomic_float16_mul
1300 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1301                 1) // __kmpc_atomic_float16_div
1302 #if (KMP_ARCH_X86)
1303 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1304                 1) // __kmpc_atomic_float16_add_a16
1305 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1306                 1) // __kmpc_atomic_float16_sub_a16
1307 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1308                 1) // __kmpc_atomic_float16_mul_a16
1309 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1310                 1) // __kmpc_atomic_float16_div_a16
1311 #endif // (KMP_ARCH_X86)
1312 #endif // KMP_HAVE_QUAD
1313 // routines for complex types
1314 
1315 #if USE_CMPXCHG_FIX
1316 // workaround for C78287 (complex(kind=4) data type)
1317 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1318                           1) // __kmpc_atomic_cmplx4_add
1319 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1320                           1) // __kmpc_atomic_cmplx4_sub
1321 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1322                           1) // __kmpc_atomic_cmplx4_mul
1323 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1324                           1) // __kmpc_atomic_cmplx4_div
1325 // end of the workaround for C78287
1326 #else
1327 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1328 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1329 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1330 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1331 #endif // USE_CMPXCHG_FIX
1332 
1333 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1334 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1335 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1336 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1337 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1338                 1) // __kmpc_atomic_cmplx10_add
1339 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1340                 1) // __kmpc_atomic_cmplx10_sub
1341 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1342                 1) // __kmpc_atomic_cmplx10_mul
1343 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1344                 1) // __kmpc_atomic_cmplx10_div
1345 #if KMP_HAVE_QUAD
1346 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1347                 1) // __kmpc_atomic_cmplx16_add
1348 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1349                 1) // __kmpc_atomic_cmplx16_sub
1350 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1351                 1) // __kmpc_atomic_cmplx16_mul
1352 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1353                 1) // __kmpc_atomic_cmplx16_div
1354 #if (KMP_ARCH_X86)
1355 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1356                 1) // __kmpc_atomic_cmplx16_add_a16
1357 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1358                 1) // __kmpc_atomic_cmplx16_sub_a16
1359 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1360                 1) // __kmpc_atomic_cmplx16_mul_a16
1361 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1362                 1) // __kmpc_atomic_cmplx16_div_a16
1363 #endif // (KMP_ARCH_X86)
1364 #endif // KMP_HAVE_QUAD
1365 
1366 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1367 // Supported only on IA-32 architecture and Intel(R) 64
1368 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1369 
1370 // ------------------------------------------------------------------------
1371 // Operation on *lhs, rhs bound by critical section
1372 //     OP     - operator (it's supposed to contain an assignment)
1373 //     LCK_ID - lock identifier
1374 // Note: don't check gtid as it should always be valid
1375 // 1, 2-byte - expect valid parameter, other - check before this macro
1376 #define OP_CRITICAL_REV(TYPE, OP, LCK_ID)                                      \
1377   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1378                                                                                \
1379   (*lhs) = (TYPE)((rhs)OP(*lhs));                                              \
1380                                                                                \
1381   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1382 
1383 #ifdef KMP_GOMP_COMPAT
1384 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)                                   \
1385   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1386     KMP_CHECK_GTID;                                                            \
1387     OP_CRITICAL_REV(TYPE, OP, 0);                                              \
1388     return;                                                                    \
1389   }
1390 
1391 #else
1392 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG)
1393 #endif /* KMP_GOMP_COMPAT */
1394 
1395 // Beginning of a definition (provides name, parameters, gebug trace)
1396 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1397 //     fixed)
1398 //     OP_ID   - operation identifier (add, sub, mul, ...)
1399 //     TYPE    - operands' type
1400 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
1401   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid,  \
1402                                                    TYPE *lhs, TYPE rhs) {      \
1403     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1404     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1405 
1406 // ------------------------------------------------------------------------
1407 // Operation on *lhs, rhs using "compare_and_store" routine
1408 //     TYPE    - operands' type
1409 //     BITS    - size in bits, used to distinguish low level calls
1410 //     OP      - operator
1411 // Note: temp_val introduced in order to force the compiler to read
1412 //       *lhs only once (w/o it the compiler reads *lhs twice)
1413 #define OP_CMPXCHG_REV(TYPE, BITS, OP)                                         \
1414   {                                                                            \
1415     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1416     TYPE old_value, new_value;                                                 \
1417     temp_val = *lhs;                                                           \
1418     old_value = temp_val;                                                      \
1419     new_value = (TYPE)(rhs OP old_value);                                      \
1420     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
1421         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
1422         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
1423       KMP_DO_PAUSE;                                                            \
1424                                                                                \
1425       temp_val = *lhs;                                                         \
1426       old_value = temp_val;                                                    \
1427       new_value = (TYPE)(rhs OP old_value);                                    \
1428     }                                                                          \
1429   }
1430 
1431 // -------------------------------------------------------------------------
1432 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG)  \
1433   ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1434   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1435   OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1436   }
1437 
1438 // ------------------------------------------------------------------------
1439 // Entries definition for integer operands
1440 //     TYPE_ID - operands type and size (fixed4, float4)
1441 //     OP_ID   - operation identifier (add, sub, mul, ...)
1442 //     TYPE    - operand type
1443 //     BITS    - size in bits, used to distinguish low level calls
1444 //     OP      - operator (used in critical section)
1445 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1446 
1447 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,GOMP_FLAG
1448 // ------------------------------------------------------------------------
1449 // Routines for ATOMIC integer operands, other operators
1450 // ------------------------------------------------------------------------
1451 //                  TYPE_ID,OP_ID, TYPE,    BITS, OP, LCK_ID, GOMP_FLAG
1452 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1453                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1454 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1455                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1456 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1457                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1458 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1459                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1460 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1461                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1462 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1463                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1464 
1465 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1466                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1467 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1468                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1469 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1470                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1471 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1472                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1473 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1474                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1475 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1476                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1477 
1478 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1479                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1480 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1481                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1482 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1483                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1484 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1485                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1486 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1487                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1488 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1489                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1490 
1491 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1492                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1493 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1494                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1495 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1496                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1497 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1498                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1499 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1500                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1501 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1502                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1503 
1504 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1505                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1506 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1507                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1508 
1509 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1510                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1511 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1512                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1513 //                  TYPE_ID,OP_ID, TYPE,     BITS,OP,LCK_ID, GOMP_FLAG
1514 
1515 // ------------------------------------------------------------------------
1516 // Routines for Extended types: long double, _Quad, complex flavours (use
1517 // critical section)
1518 //     TYPE_ID, OP_ID, TYPE - detailed above
1519 //     OP      - operator
1520 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1521 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
1522   ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void)                                 \
1523   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1524   OP_CRITICAL_REV(TYPE, OP, LCK_ID)                                            \
1525   }
1526 
1527 /* ------------------------------------------------------------------------- */
1528 // routines for long double type
1529 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1530                     1) // __kmpc_atomic_float10_sub_rev
1531 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1532                     1) // __kmpc_atomic_float10_div_rev
1533 #if KMP_HAVE_QUAD
1534 // routines for _Quad type
1535 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1536                     1) // __kmpc_atomic_float16_sub_rev
1537 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1538                     1) // __kmpc_atomic_float16_div_rev
1539 #if (KMP_ARCH_X86)
1540 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1541                     1) // __kmpc_atomic_float16_sub_a16_rev
1542 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1543                     1) // __kmpc_atomic_float16_div_a16_rev
1544 #endif // KMP_ARCH_X86
1545 #endif // KMP_HAVE_QUAD
1546 
1547 // routines for complex types
1548 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1549                     1) // __kmpc_atomic_cmplx4_sub_rev
1550 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1551                     1) // __kmpc_atomic_cmplx4_div_rev
1552 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1553                     1) // __kmpc_atomic_cmplx8_sub_rev
1554 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1555                     1) // __kmpc_atomic_cmplx8_div_rev
1556 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1557                     1) // __kmpc_atomic_cmplx10_sub_rev
1558 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1559                     1) // __kmpc_atomic_cmplx10_div_rev
1560 #if KMP_HAVE_QUAD
1561 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1562                     1) // __kmpc_atomic_cmplx16_sub_rev
1563 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1564                     1) // __kmpc_atomic_cmplx16_div_rev
1565 #if (KMP_ARCH_X86)
1566 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1567                     1) // __kmpc_atomic_cmplx16_sub_a16_rev
1568 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1569                     1) // __kmpc_atomic_cmplx16_div_a16_rev
1570 #endif // KMP_ARCH_X86
1571 #endif // KMP_HAVE_QUAD
1572 
1573 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1574 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1575 
1576 /* ------------------------------------------------------------------------ */
1577 /* Routines for mixed types of LHS and RHS, when RHS is "larger"            */
1578 /* Note: in order to reduce the total number of types combinations          */
1579 /*       it is supposed that compiler converts RHS to longest floating type,*/
1580 /*       that is _Quad, before call to any of these routines                */
1581 /* Conversion to _Quad will be done by the compiler during calculation,     */
1582 /*    conversion back to TYPE - before the assignment, like:                */
1583 /*    *lhs = (TYPE)( (_Quad)(*lhs) OP rhs )                                 */
1584 /* Performance penalty expected because of SW emulation use                 */
1585 /* ------------------------------------------------------------------------ */
1586 
1587 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                \
1588   void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
1589       ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) {                       \
1590     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1591     KA_TRACE(100,                                                              \
1592              ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
1593               gtid));
1594 
1595 // -------------------------------------------------------------------------
1596 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID,  \
1597                            GOMP_FLAG)                                          \
1598   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1599   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */           \
1600   OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */                   \
1601   }
1602 
1603 // -------------------------------------------------------------------------
1604 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1605 // -------------------------------------------------------------------------
1606 // X86 or X86_64: no alignment problems ====================================
1607 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1608                            LCK_ID, MASK, GOMP_FLAG)                            \
1609   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1610   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1611   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1612   }
1613 // -------------------------------------------------------------------------
1614 #else
1615 // ------------------------------------------------------------------------
1616 // Code for other architectures that don't handle unaligned accesses.
1617 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,    \
1618                            LCK_ID, MASK, GOMP_FLAG)                            \
1619   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1620   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1621   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1622     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1623   } else {                                                                     \
1624     KMP_CHECK_GTID;                                                            \
1625     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
1626                        LCK_ID) /* unaligned address - use critical */          \
1627   }                                                                            \
1628   }
1629 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1630 
1631 // -------------------------------------------------------------------------
1632 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1633 // -------------------------------------------------------------------------
1634 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
1635                                RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
1636   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1637   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1638   OP_CMPXCHG_REV(TYPE, BITS, OP)                                               \
1639   }
1640 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,      \
1641                                LCK_ID, GOMP_FLAG)                              \
1642   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1643   OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG)                                    \
1644   OP_CRITICAL_REV(TYPE, OP, LCK_ID)                                            \
1645   }
1646 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1647 
1648 // RHS=float8
1649 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1650                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1651 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1652                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1653 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1654                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1655 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1656                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1657 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1658                    0) // __kmpc_atomic_fixed4_mul_float8
1659 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1660                    0) // __kmpc_atomic_fixed4_div_float8
1661 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1662                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1663 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1664                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1665 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1666                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1667 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1668                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1669 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1670                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1671 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1672                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1673 
1674 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1675 // use them)
1676 #if KMP_HAVE_QUAD
1677 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1678                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1679 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1680                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1681 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1682                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1683 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1684                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1685 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1686                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1687 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1688                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1689 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1690                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1691 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1692                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1693 
1694 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1695                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1696 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1697                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1698 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1699                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1700 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1701                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1702 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1703                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1704 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1705                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1706 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1707                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1708 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1709                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1710 
1711 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1712                    0) // __kmpc_atomic_fixed4_add_fp
1713 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1714                    0) // __kmpc_atomic_fixed4u_add_fp
1715 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1716                    0) // __kmpc_atomic_fixed4_sub_fp
1717 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1718                    0) // __kmpc_atomic_fixed4u_sub_fp
1719 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1720                    0) // __kmpc_atomic_fixed4_mul_fp
1721 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1722                    0) // __kmpc_atomic_fixed4u_mul_fp
1723 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1724                    0) // __kmpc_atomic_fixed4_div_fp
1725 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1726                    0) // __kmpc_atomic_fixed4u_div_fp
1727 
1728 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1729                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1730 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1731                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1732 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1733                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1734 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1735                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1736 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1737                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1738 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1739                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1740 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1741                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1742 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1743                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1744 
1745 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1746                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1747 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1748                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1749 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1750                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1751 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1752                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1753 
1754 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1755                    KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1756 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1757                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1758 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1759                    KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1760 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1761                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1762 
1763 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1764                    1) // __kmpc_atomic_float10_add_fp
1765 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1766                    1) // __kmpc_atomic_float10_sub_fp
1767 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1768                    1) // __kmpc_atomic_float10_mul_fp
1769 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1770                    1) // __kmpc_atomic_float10_div_fp
1771 
1772 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1773 // Reverse operations
1774 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1775                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1776 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1777                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1778 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1779                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1780 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1781                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1782 
1783 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1784                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1785 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1786                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1787 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1788                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1789 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1790                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1791 
1792 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1793                        0) // __kmpc_atomic_fixed4_sub_rev_fp
1794 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1795                        0) // __kmpc_atomic_fixed4u_sub_rev_fp
1796 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1797                        0) // __kmpc_atomic_fixed4_div_rev_fp
1798 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1799                        0) // __kmpc_atomic_fixed4u_div_rev_fp
1800 
1801 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1802                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1803 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1804                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1805 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1806                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1807 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1808                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1809 
1810 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1811                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1812 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1813                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1814 
1815 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1816                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1817 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1818                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1819 
1820 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1821                        1) // __kmpc_atomic_float10_sub_rev_fp
1822 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1823                        1) // __kmpc_atomic_float10_div_rev_fp
1824 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1825 
1826 #endif // KMP_HAVE_QUAD
1827 
1828 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1829 // ------------------------------------------------------------------------
1830 // X86 or X86_64: no alignment problems ====================================
1831 #if USE_CMPXCHG_FIX
1832 // workaround for C78287 (complex(kind=4) data type)
1833 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1834                              LCK_ID, MASK, GOMP_FLAG)                          \
1835   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1836   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1837   OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP)                                        \
1838   }
1839 // end of the second part of the workaround for C78287
1840 #else
1841 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1842                              LCK_ID, MASK, GOMP_FLAG)                          \
1843   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1844   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1845   OP_CMPXCHG(TYPE, BITS, OP)                                                   \
1846   }
1847 #endif // USE_CMPXCHG_FIX
1848 #else
1849 // ------------------------------------------------------------------------
1850 // Code for other architectures that don't handle unaligned accesses.
1851 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE,  \
1852                              LCK_ID, MASK, GOMP_FLAG)                          \
1853   ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE)                      \
1854   OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG)                                 \
1855   if (!((kmp_uintptr_t)lhs & 0x##MASK)) {                                      \
1856     OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */                           \
1857   } else {                                                                     \
1858     KMP_CHECK_GTID;                                                            \
1859     OP_UPDATE_CRITICAL(TYPE, OP,                                               \
1860                        LCK_ID) /* unaligned address - use critical */          \
1861   }                                                                            \
1862   }
1863 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1864 
1865 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1866                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1867 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1868                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1869 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1870                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1871 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1872                      7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1873 
1874 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1875 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1876 
1877 // ------------------------------------------------------------------------
1878 // Atomic READ routines
1879 
1880 // ------------------------------------------------------------------------
1881 // Beginning of a definition (provides name, parameters, gebug trace)
1882 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1883 //     fixed)
1884 //     OP_ID   - operation identifier (add, sub, mul, ...)
1885 //     TYPE    - operands' type
1886 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE)                      \
1887   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
1888                                              TYPE *loc) {                      \
1889     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
1890     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1891 
1892 // ------------------------------------------------------------------------
1893 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1894 //     TYPE    - operands' type
1895 //     BITS    - size in bits, used to distinguish low level calls
1896 //     OP      - operator
1897 // Note: temp_val introduced in order to force the compiler to read
1898 //       *lhs only once (w/o it the compiler reads *lhs twice)
1899 // TODO: check if it is still necessary
1900 // Return old value regardless of the result of "compare & swap# operation
1901 #define OP_CMPXCHG_READ(TYPE, BITS, OP)                                        \
1902   {                                                                            \
1903     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
1904     union f_i_union {                                                          \
1905       TYPE f_val;                                                              \
1906       kmp_int##BITS i_val;                                                     \
1907     };                                                                         \
1908     union f_i_union old_value;                                                 \
1909     temp_val = *loc;                                                           \
1910     old_value.f_val = temp_val;                                                \
1911     old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS(                         \
1912         (kmp_int##BITS *)loc,                                                  \
1913         *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val,                     \
1914         *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val);                    \
1915     new_value = old_value.f_val;                                               \
1916     return new_value;                                                          \
1917   }
1918 
1919 // -------------------------------------------------------------------------
1920 // Operation on *lhs, rhs bound by critical section
1921 //     OP     - operator (it's supposed to contain an assignment)
1922 //     LCK_ID - lock identifier
1923 // Note: don't check gtid as it should always be valid
1924 // 1, 2-byte - expect valid parameter, other - check before this macro
1925 #define OP_CRITICAL_READ(OP, LCK_ID)                                           \
1926   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1927                                                                                \
1928   new_value = (*loc);                                                          \
1929                                                                                \
1930   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1931 
1932 // -------------------------------------------------------------------------
1933 #ifdef KMP_GOMP_COMPAT
1934 #define OP_GOMP_CRITICAL_READ(OP, FLAG)                                        \
1935   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1936     KMP_CHECK_GTID;                                                            \
1937     OP_CRITICAL_READ(OP, 0);                                                   \
1938     return new_value;                                                          \
1939   }
1940 #else
1941 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1942 #endif /* KMP_GOMP_COMPAT */
1943 
1944 // -------------------------------------------------------------------------
1945 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
1946   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1947   TYPE new_value;                                                              \
1948   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
1949   new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0);                              \
1950   return new_value;                                                            \
1951   }
1952 // -------------------------------------------------------------------------
1953 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
1954   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1955   TYPE new_value;                                                              \
1956   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG)                                     \
1957   OP_CMPXCHG_READ(TYPE, BITS, OP)                                              \
1958   }
1959 // ------------------------------------------------------------------------
1960 // Routines for Extended types: long double, _Quad, complex flavours (use
1961 // critical section)
1962 //     TYPE_ID, OP_ID, TYPE - detailed above
1963 //     OP      - operator
1964 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1965 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
1966   ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE)                                \
1967   TYPE new_value;                                                              \
1968   OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */               \
1969   OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */                           \
1970   return new_value;                                                            \
1971   }
1972 
1973 // ------------------------------------------------------------------------
1974 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
1975 // value doesn't work.
1976 // Let's return the read value through the additional parameter.
1977 #if (KMP_OS_WINDOWS)
1978 
1979 #define OP_CRITICAL_READ_WRK(OP, LCK_ID)                                       \
1980   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
1981                                                                                \
1982   (*out) = (*loc);                                                             \
1983                                                                                \
1984   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1985 // ------------------------------------------------------------------------
1986 #ifdef KMP_GOMP_COMPAT
1987 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)                                    \
1988   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
1989     KMP_CHECK_GTID;                                                            \
1990     OP_CRITICAL_READ_WRK(OP, 0);                                               \
1991   }
1992 #else
1993 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
1994 #endif /* KMP_GOMP_COMPAT */
1995 // ------------------------------------------------------------------------
1996 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                            \
1997   void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
1998                                          TYPE *loc) {                          \
1999     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2000     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2001 
2002 // ------------------------------------------------------------------------
2003 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)  \
2004   ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE)                                  \
2005   OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */           \
2006   OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */                       \
2007   }
2008 
2009 #endif // KMP_OS_WINDOWS
2010 
2011 // ------------------------------------------------------------------------
2012 //                  TYPE_ID,OP_ID, TYPE,      OP, GOMP_FLAG
2013 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
2014 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
2015                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
2016 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
2017                     KMP_ARCH_X86) // __kmpc_atomic_float4_rd
2018 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
2019                     KMP_ARCH_X86) // __kmpc_atomic_float8_rd
2020 
2021 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2022 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2023                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2024 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2025                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2026 
2027 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2028                      1) // __kmpc_atomic_float10_rd
2029 #if KMP_HAVE_QUAD
2030 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2031                      1) // __kmpc_atomic_float16_rd
2032 #endif // KMP_HAVE_QUAD
2033 
2034 // Fix for CQ220361 on Windows* OS
2035 #if (KMP_OS_WINDOWS)
2036 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2037                          1) // __kmpc_atomic_cmplx4_rd
2038 #else
2039 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2040                      1) // __kmpc_atomic_cmplx4_rd
2041 #endif // (KMP_OS_WINDOWS)
2042 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2043                      1) // __kmpc_atomic_cmplx8_rd
2044 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2045                      1) // __kmpc_atomic_cmplx10_rd
2046 #if KMP_HAVE_QUAD
2047 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2048                      1) // __kmpc_atomic_cmplx16_rd
2049 #if (KMP_ARCH_X86)
2050 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2051                      1) // __kmpc_atomic_float16_a16_rd
2052 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2053                      1) // __kmpc_atomic_cmplx16_a16_rd
2054 #endif // (KMP_ARCH_X86)
2055 #endif // KMP_HAVE_QUAD
2056 
2057 // ------------------------------------------------------------------------
2058 // Atomic WRITE routines
2059 
2060 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)              \
2061   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2062   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2063   KMP_XCHG_FIXED##BITS(lhs, rhs);                                              \
2064   }
2065 // ------------------------------------------------------------------------
2066 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2067   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2068   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2069   KMP_XCHG_REAL##BITS(lhs, rhs);                                               \
2070   }
2071 
2072 // ------------------------------------------------------------------------
2073 // Operation on *lhs, rhs using "compare_and_store" routine
2074 //     TYPE    - operands' type
2075 //     BITS    - size in bits, used to distinguish low level calls
2076 //     OP      - operator
2077 // Note: temp_val introduced in order to force the compiler to read
2078 //       *lhs only once (w/o it the compiler reads *lhs twice)
2079 #define OP_CMPXCHG_WR(TYPE, BITS, OP)                                          \
2080   {                                                                            \
2081     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2082     TYPE old_value, new_value;                                                 \
2083     temp_val = *lhs;                                                           \
2084     old_value = temp_val;                                                      \
2085     new_value = rhs;                                                           \
2086     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2087         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2088         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2089       temp_val = *lhs;                                                         \
2090       old_value = temp_val;                                                    \
2091       new_value = rhs;                                                         \
2092     }                                                                          \
2093   }
2094 
2095 // -------------------------------------------------------------------------
2096 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2097   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2098   OP_GOMP_CRITICAL(OP, GOMP_FLAG)                                              \
2099   OP_CMPXCHG_WR(TYPE, BITS, OP)                                                \
2100   }
2101 
2102 // ------------------------------------------------------------------------
2103 // Routines for Extended types: long double, _Quad, complex flavours (use
2104 // critical section)
2105 //     TYPE_ID, OP_ID, TYPE - detailed above
2106 //     OP      - operator
2107 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2108 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)        \
2109   ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void)                                     \
2110   OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */                        \
2111   OP_CRITICAL(OP, LCK_ID) /* send assignment */                                \
2112   }
2113 // -------------------------------------------------------------------------
2114 
2115 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2116                KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2117 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2118                KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2119 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2120                KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2121 #if (KMP_ARCH_X86)
2122 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2123                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2124 #else
2125 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2126                KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2127 #endif // (KMP_ARCH_X86)
2128 
2129 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2130                      KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2131 #if (KMP_ARCH_X86)
2132 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2133                   KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2134 #else
2135 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2136                      KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2137 #endif // (KMP_ARCH_X86)
2138 
2139 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2140                    1) // __kmpc_atomic_float10_wr
2141 #if KMP_HAVE_QUAD
2142 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2143                    1) // __kmpc_atomic_float16_wr
2144 #endif // KMP_HAVE_QUAD
2145 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2146 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2147                    1) // __kmpc_atomic_cmplx8_wr
2148 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2149                    1) // __kmpc_atomic_cmplx10_wr
2150 #if KMP_HAVE_QUAD
2151 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2152                    1) // __kmpc_atomic_cmplx16_wr
2153 #if (KMP_ARCH_X86)
2154 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2155                    1) // __kmpc_atomic_float16_a16_wr
2156 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2157                    1) // __kmpc_atomic_cmplx16_a16_wr
2158 #endif // (KMP_ARCH_X86)
2159 #endif // KMP_HAVE_QUAD
2160 
2161 // ------------------------------------------------------------------------
2162 // Atomic CAPTURE routines
2163 
2164 // Beginning of a definition (provides name, parameters, gebug trace)
2165 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2166 //     fixed)
2167 //     OP_ID   - operation identifier (add, sub, mul, ...)
2168 //     TYPE    - operands' type
2169 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE)                       \
2170   RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid,        \
2171                                              TYPE *lhs, TYPE rhs, int flag) {  \
2172     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2173     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2174 
2175 // -------------------------------------------------------------------------
2176 // Operation on *lhs, rhs bound by critical section
2177 //     OP     - operator (it's supposed to contain an assignment)
2178 //     LCK_ID - lock identifier
2179 // Note: don't check gtid as it should always be valid
2180 // 1, 2-byte - expect valid parameter, other - check before this macro
2181 #define OP_CRITICAL_CPT(OP, LCK_ID)                                            \
2182   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2183                                                                                \
2184   if (flag) {                                                                  \
2185     (*lhs) OP rhs;                                                             \
2186     new_value = (*lhs);                                                        \
2187   } else {                                                                     \
2188     new_value = (*lhs);                                                        \
2189     (*lhs) OP rhs;                                                             \
2190   }                                                                            \
2191                                                                                \
2192   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2193   return new_value;
2194 
2195 #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID)                               \
2196   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2197                                                                                \
2198   if (flag) {                                                                  \
2199     (*lhs) = (TYPE)((*lhs)OP rhs);                                             \
2200     new_value = (*lhs);                                                        \
2201   } else {                                                                     \
2202     new_value = (*lhs);                                                        \
2203     (*lhs) = (TYPE)((*lhs)OP rhs);                                             \
2204   }                                                                            \
2205                                                                                \
2206   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2207   return new_value;
2208 
2209 // ------------------------------------------------------------------------
2210 #ifdef KMP_GOMP_COMPAT
2211 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)                                   \
2212   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2213     KMP_CHECK_GTID;                                                            \
2214     OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0);                                       \
2215   }
2216 #else
2217 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG)
2218 #endif /* KMP_GOMP_COMPAT */
2219 
2220 // ------------------------------------------------------------------------
2221 // Operation on *lhs, rhs using "compare_and_store" routine
2222 //     TYPE    - operands' type
2223 //     BITS    - size in bits, used to distinguish low level calls
2224 //     OP      - operator
2225 // Note: temp_val introduced in order to force the compiler to read
2226 //       *lhs only once (w/o it the compiler reads *lhs twice)
2227 #define OP_CMPXCHG_CPT(TYPE, BITS, OP)                                         \
2228   {                                                                            \
2229     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2230     TYPE old_value, new_value;                                                 \
2231     temp_val = *lhs;                                                           \
2232     old_value = temp_val;                                                      \
2233     new_value = (TYPE)(old_value OP rhs);                                      \
2234     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2235         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2236         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2237       temp_val = *lhs;                                                         \
2238       old_value = temp_val;                                                    \
2239       new_value = (TYPE)(old_value OP rhs);                                    \
2240     }                                                                          \
2241     if (flag) {                                                                \
2242       return new_value;                                                        \
2243     } else                                                                     \
2244       return old_value;                                                        \
2245   }
2246 
2247 // -------------------------------------------------------------------------
2248 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)          \
2249   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2250   TYPE new_value;                                                              \
2251   (void)new_value;                                                             \
2252   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG)                                    \
2253   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2254   }
2255 
2256 // -------------------------------------------------------------------------
2257 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2258   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2259   TYPE old_value, new_value;                                                   \
2260   (void)new_value;                                                             \
2261   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG)                                    \
2262   /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */            \
2263   old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs);                            \
2264   if (flag) {                                                                  \
2265     return old_value OP rhs;                                                   \
2266   } else                                                                       \
2267     return old_value;                                                          \
2268   }
2269 // -------------------------------------------------------------------------
2270 
2271 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2272                      0) // __kmpc_atomic_fixed4_add_cpt
2273 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2274                      0) // __kmpc_atomic_fixed4_sub_cpt
2275 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2276                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2277 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2278                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2279 
2280 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2281                    KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2282 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2283                    KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2284 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2285                    KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2286 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2287                    KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2288 
2289 // ------------------------------------------------------------------------
2290 // Entries definition for integer operands
2291 //     TYPE_ID - operands type and size (fixed4, float4)
2292 //     OP_ID   - operation identifier (add, sub, mul, ...)
2293 //     TYPE    - operand type
2294 //     BITS    - size in bits, used to distinguish low level calls
2295 //     OP      - operator (used in critical section)
2296 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,GOMP_FLAG
2297 // ------------------------------------------------------------------------
2298 // Routines for ATOMIC integer operands, other operators
2299 // ------------------------------------------------------------------------
2300 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2301 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2302                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2303 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2304                    0) // __kmpc_atomic_fixed1_andb_cpt
2305 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2306                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2307 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2308                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2309 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2310                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2311 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2312                    0) // __kmpc_atomic_fixed1_orb_cpt
2313 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2314                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2315 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2316                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2317 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2318                    KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2319 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2320                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2321 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2322                    0) // __kmpc_atomic_fixed1_xor_cpt
2323 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2324                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2325 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2326                    0) // __kmpc_atomic_fixed2_andb_cpt
2327 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2328                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2329 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2330                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2331 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2332                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2333 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2334                    0) // __kmpc_atomic_fixed2_orb_cpt
2335 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2336                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2337 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2338                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2339 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2340                    KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2341 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2342                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2343 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2344                    0) // __kmpc_atomic_fixed2_xor_cpt
2345 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2346                    0) // __kmpc_atomic_fixed4_andb_cpt
2347 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2348                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2349 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2350                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2351 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2352                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2353 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2354                    0) // __kmpc_atomic_fixed4_orb_cpt
2355 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2356                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2357 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2358                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2359 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2360                    KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2361 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2362                    0) // __kmpc_atomic_fixed4_xor_cpt
2363 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2364                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2365 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2366                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2367 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2368                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2369 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2370                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2371 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2372                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2373 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2374                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2375 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2376                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2377 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2378                    KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2379 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2380                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2381 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2382                    KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2383 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2384                    KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2385 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2386                    KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2387 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2388                    KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2389 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2390 
2391 // CAPTURE routines for mixed types RHS=float16
2392 #if KMP_HAVE_QUAD
2393 
2394 // Beginning of a definition (provides name, parameters, gebug trace)
2395 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2396 //     fixed)
2397 //     OP_ID   - operation identifier (add, sub, mul, ...)
2398 //     TYPE    - operands' type
2399 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)            \
2400   TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID(                         \
2401       ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) {             \
2402     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2403     KA_TRACE(100,                                                              \
2404              ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n",   \
2405               gtid));
2406 
2407 // -------------------------------------------------------------------------
2408 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,       \
2409                                RTYPE, LCK_ID, MASK, GOMP_FLAG)                 \
2410   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2411   TYPE new_value;                                                              \
2412   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG)                                    \
2413   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2414   }
2415 
2416 // -------------------------------------------------------------------------
2417 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE,     \
2418                                 LCK_ID, GOMP_FLAG)                             \
2419   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
2420   TYPE new_value;                                                              \
2421   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */              \
2422   OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */               \
2423   }
2424 
2425 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2426                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2427 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2428                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2429 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2430                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2431 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2432                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2433 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2434                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2435 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2436                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2437 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2438                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2439 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2440                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2441 
2442 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2443                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2444 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2445                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2446 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2447                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2448 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2449                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2450 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2451                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2452 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2453                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2454 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2455                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2456 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2457                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2458 
2459 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2460                        0) // __kmpc_atomic_fixed4_add_cpt_fp
2461 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2462                        0) // __kmpc_atomic_fixed4u_add_cpt_fp
2463 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2464                        0) // __kmpc_atomic_fixed4_sub_cpt_fp
2465 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2466                        0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2467 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2468                        0) // __kmpc_atomic_fixed4_mul_cpt_fp
2469 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2470                        0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2471 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2472                        0) // __kmpc_atomic_fixed4_div_cpt_fp
2473 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2474                        0) // __kmpc_atomic_fixed4u_div_cpt_fp
2475 
2476 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2477                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2478 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2479                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2480 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2481                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2482 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2483                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2484 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2485                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2486 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2487                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2488 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2489                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2490 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2491                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2492 
2493 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2494                        KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2495 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2496                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2497 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2498                        KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2499 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2500                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2501 
2502 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2503                        KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2504 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2505                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2506 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2507                        KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2508 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2509                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2510 
2511 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2512                         1) // __kmpc_atomic_float10_add_cpt_fp
2513 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2514                         1) // __kmpc_atomic_float10_sub_cpt_fp
2515 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2516                         1) // __kmpc_atomic_float10_mul_cpt_fp
2517 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2518                         1) // __kmpc_atomic_float10_div_cpt_fp
2519 
2520 #endif // KMP_HAVE_QUAD
2521 
2522 // ------------------------------------------------------------------------
2523 // Routines for C/C++ Reduction operators && and ||
2524 
2525 // -------------------------------------------------------------------------
2526 // Operation on *lhs, rhs bound by critical section
2527 //     OP     - operator (it's supposed to contain an assignment)
2528 //     LCK_ID - lock identifier
2529 // Note: don't check gtid as it should always be valid
2530 // 1, 2-byte - expect valid parameter, other - check before this macro
2531 #define OP_CRITICAL_L_CPT(OP, LCK_ID)                                          \
2532   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2533                                                                                \
2534   if (flag) {                                                                  \
2535     new_value OP rhs;                                                          \
2536     (*lhs) = new_value;                                                        \
2537   } else {                                                                     \
2538     new_value = (*lhs);                                                        \
2539     (*lhs) OP rhs;                                                             \
2540   }                                                                            \
2541                                                                                \
2542   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2543 
2544 // ------------------------------------------------------------------------
2545 #ifdef KMP_GOMP_COMPAT
2546 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)                                       \
2547   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2548     KMP_CHECK_GTID;                                                            \
2549     OP_CRITICAL_L_CPT(OP, 0);                                                  \
2550     return new_value;                                                          \
2551   }
2552 #else
2553 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2554 #endif /* KMP_GOMP_COMPAT */
2555 
2556 // ------------------------------------------------------------------------
2557 // Need separate macros for &&, || because there is no combined assignment
2558 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)           \
2559   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2560   TYPE new_value;                                                              \
2561   (void)new_value;                                                             \
2562   OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG)                                 \
2563   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2564   }
2565 
2566 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2567                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2568 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2569                   KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2570 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2571                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2572 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2573                   KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2574 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2575                   0) // __kmpc_atomic_fixed4_andl_cpt
2576 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2577                   0) // __kmpc_atomic_fixed4_orl_cpt
2578 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2579                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2580 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2581                   KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2582 
2583 // -------------------------------------------------------------------------
2584 // Routines for Fortran operators that matched no one in C:
2585 // MAX, MIN, .EQV., .NEQV.
2586 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2587 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2588 
2589 // -------------------------------------------------------------------------
2590 // MIN and MAX need separate macros
2591 // OP - operator to check if we need any actions?
2592 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                       \
2593   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2594                                                                                \
2595   if (*lhs OP rhs) { /* still need actions? */                                 \
2596     old_value = *lhs;                                                          \
2597     *lhs = rhs;                                                                \
2598     if (flag)                                                                  \
2599       new_value = rhs;                                                         \
2600     else                                                                       \
2601       new_value = old_value;                                                   \
2602   } else {                                                                     \
2603     new_value = *lhs;                                                          \
2604   }                                                                            \
2605   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2606   return new_value;
2607 
2608 // -------------------------------------------------------------------------
2609 #ifdef KMP_GOMP_COMPAT
2610 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)                                    \
2611   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2612     KMP_CHECK_GTID;                                                            \
2613     MIN_MAX_CRITSECT_CPT(OP, 0);                                               \
2614   }
2615 #else
2616 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2617 #endif /* KMP_GOMP_COMPAT */
2618 
2619 // -------------------------------------------------------------------------
2620 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                    \
2621   {                                                                            \
2622     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2623     /*TYPE old_value; */                                                       \
2624     temp_val = *lhs;                                                           \
2625     old_value = temp_val;                                                      \
2626     while (old_value OP rhs && /* still need actions? */                       \
2627            !KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2628                (kmp_int##BITS *)lhs,                                           \
2629                *VOLATILE_CAST(kmp_int##BITS *) & old_value,                    \
2630                *VOLATILE_CAST(kmp_int##BITS *) & rhs)) {                       \
2631       temp_val = *lhs;                                                         \
2632       old_value = temp_val;                                                    \
2633     }                                                                          \
2634     if (flag)                                                                  \
2635       return rhs;                                                              \
2636     else                                                                       \
2637       return old_value;                                                        \
2638   }
2639 
2640 // -------------------------------------------------------------------------
2641 // 1-byte, 2-byte operands - use critical section
2642 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)      \
2643   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2644   TYPE new_value, old_value;                                                   \
2645   if (*lhs OP rhs) { /* need actions? */                                       \
2646     GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2647     MIN_MAX_CRITSECT_CPT(OP, LCK_ID)                                           \
2648   }                                                                            \
2649   return *lhs;                                                                 \
2650   }
2651 
2652 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)        \
2653   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2654   TYPE new_value, old_value;                                                   \
2655   (void)new_value;                                                             \
2656   if (*lhs OP rhs) {                                                           \
2657     GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG)                                   \
2658     MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP)                                        \
2659   }                                                                            \
2660   return *lhs;                                                                 \
2661   }
2662 
2663 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2664                      KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2665 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2666                      KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2667 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2668                      KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2669 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2670                      KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2671 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2672                      0) // __kmpc_atomic_fixed4_max_cpt
2673 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2674                      0) // __kmpc_atomic_fixed4_min_cpt
2675 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2676                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2677 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2678                      KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2679 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2680                      KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2681 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2682                      KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2683 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2684                      KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2685 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2686                      KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2687 #if KMP_HAVE_QUAD
2688 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2689                      1) // __kmpc_atomic_float16_max_cpt
2690 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2691                      1) // __kmpc_atomic_float16_min_cpt
2692 #if (KMP_ARCH_X86)
2693 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2694                      1) // __kmpc_atomic_float16_max_a16_cpt
2695 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2696                      1) // __kmpc_atomic_float16_mix_a16_cpt
2697 #endif // (KMP_ARCH_X86)
2698 #endif // KMP_HAVE_QUAD
2699 
2700 // ------------------------------------------------------------------------
2701 #ifdef KMP_GOMP_COMPAT
2702 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)                                     \
2703   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2704     KMP_CHECK_GTID;                                                            \
2705     OP_CRITICAL_CPT(OP, 0);                                                    \
2706   }
2707 #else
2708 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2709 #endif /* KMP_GOMP_COMPAT */
2710 // ------------------------------------------------------------------------
2711 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)         \
2712   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2713   TYPE new_value;                                                              \
2714   (void)new_value;                                                             \
2715   OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */       \
2716   OP_CMPXCHG_CPT(TYPE, BITS, OP)                                               \
2717   }
2718 
2719 // ------------------------------------------------------------------------
2720 
2721 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2722                    KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2723 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2724                    KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2725 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2726                    KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2727 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2728                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2729 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2730                     KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2731 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2732                     KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2733 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2734                     KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2735 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2736                     KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2737 
2738 // ------------------------------------------------------------------------
2739 // Routines for Extended types: long double, _Quad, complex flavours (use
2740 // critical section)
2741 //     TYPE_ID, OP_ID, TYPE - detailed above
2742 //     OP      - operator
2743 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2744 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)       \
2745   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2746   TYPE new_value;                                                              \
2747   OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */              \
2748   OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */               \
2749   }
2750 
2751 // ------------------------------------------------------------------------
2752 // Workaround for cmplx4. Regular routines with return value don't work
2753 // on Win_32e. Let's return captured values through the additional parameter.
2754 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID)                                        \
2755   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2756                                                                                \
2757   if (flag) {                                                                  \
2758     (*lhs) OP rhs;                                                             \
2759     (*out) = (*lhs);                                                           \
2760   } else {                                                                     \
2761     (*out) = (*lhs);                                                           \
2762     (*lhs) OP rhs;                                                             \
2763   }                                                                            \
2764                                                                                \
2765   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2766   return;
2767 // ------------------------------------------------------------------------
2768 
2769 #ifdef KMP_GOMP_COMPAT
2770 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)                                     \
2771   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2772     KMP_CHECK_GTID;                                                            \
2773     OP_CRITICAL_CPT_WRK(OP## =, 0);                                            \
2774   }
2775 #else
2776 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2777 #endif /* KMP_GOMP_COMPAT */
2778 // ------------------------------------------------------------------------
2779 
2780 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                 \
2781   void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2782                                          TYPE rhs, TYPE *out, int flag) {      \
2783     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
2784     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2785 // ------------------------------------------------------------------------
2786 
2787 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
2788   ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
2789   OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG)                                      \
2790   OP_CRITICAL_CPT_WRK(OP## =, LCK_ID)                                          \
2791   }
2792 // The end of workaround for cmplx4
2793 
2794 /* ------------------------------------------------------------------------- */
2795 // routines for long double type
2796 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2797                     1) // __kmpc_atomic_float10_add_cpt
2798 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2799                     1) // __kmpc_atomic_float10_sub_cpt
2800 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2801                     1) // __kmpc_atomic_float10_mul_cpt
2802 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2803                     1) // __kmpc_atomic_float10_div_cpt
2804 #if KMP_HAVE_QUAD
2805 // routines for _Quad type
2806 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2807                     1) // __kmpc_atomic_float16_add_cpt
2808 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2809                     1) // __kmpc_atomic_float16_sub_cpt
2810 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2811                     1) // __kmpc_atomic_float16_mul_cpt
2812 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2813                     1) // __kmpc_atomic_float16_div_cpt
2814 #if (KMP_ARCH_X86)
2815 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2816                     1) // __kmpc_atomic_float16_add_a16_cpt
2817 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2818                     1) // __kmpc_atomic_float16_sub_a16_cpt
2819 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2820                     1) // __kmpc_atomic_float16_mul_a16_cpt
2821 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2822                     1) // __kmpc_atomic_float16_div_a16_cpt
2823 #endif // (KMP_ARCH_X86)
2824 #endif // KMP_HAVE_QUAD
2825 
2826 // routines for complex types
2827 
2828 // cmplx4 routines to return void
2829 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2830                         1) // __kmpc_atomic_cmplx4_add_cpt
2831 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2832                         1) // __kmpc_atomic_cmplx4_sub_cpt
2833 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2834                         1) // __kmpc_atomic_cmplx4_mul_cpt
2835 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2836                         1) // __kmpc_atomic_cmplx4_div_cpt
2837 
2838 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2839                     1) // __kmpc_atomic_cmplx8_add_cpt
2840 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2841                     1) // __kmpc_atomic_cmplx8_sub_cpt
2842 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2843                     1) // __kmpc_atomic_cmplx8_mul_cpt
2844 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2845                     1) // __kmpc_atomic_cmplx8_div_cpt
2846 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2847                     1) // __kmpc_atomic_cmplx10_add_cpt
2848 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2849                     1) // __kmpc_atomic_cmplx10_sub_cpt
2850 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2851                     1) // __kmpc_atomic_cmplx10_mul_cpt
2852 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2853                     1) // __kmpc_atomic_cmplx10_div_cpt
2854 #if KMP_HAVE_QUAD
2855 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2856                     1) // __kmpc_atomic_cmplx16_add_cpt
2857 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2858                     1) // __kmpc_atomic_cmplx16_sub_cpt
2859 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2860                     1) // __kmpc_atomic_cmplx16_mul_cpt
2861 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2862                     1) // __kmpc_atomic_cmplx16_div_cpt
2863 #if (KMP_ARCH_X86)
2864 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2865                     1) // __kmpc_atomic_cmplx16_add_a16_cpt
2866 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2867                     1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2868 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2869                     1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2870 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2871                     1) // __kmpc_atomic_cmplx16_div_a16_cpt
2872 #endif // (KMP_ARCH_X86)
2873 #endif // KMP_HAVE_QUAD
2874 
2875 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2876 // binop x; v = x; }  for non-commutative operations.
2877 // Supported only on IA-32 architecture and Intel(R) 64
2878 
2879 // -------------------------------------------------------------------------
2880 // Operation on *lhs, rhs bound by critical section
2881 //     OP     - operator (it's supposed to contain an assignment)
2882 //     LCK_ID - lock identifier
2883 // Note: don't check gtid as it should always be valid
2884 // 1, 2-byte - expect valid parameter, other - check before this macro
2885 #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID)                                  \
2886   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2887                                                                                \
2888   if (flag) {                                                                  \
2889     /*temp_val = (*lhs);*/                                                     \
2890     (*lhs) = (TYPE)((rhs)OP(*lhs));                                            \
2891     new_value = (*lhs);                                                        \
2892   } else {                                                                     \
2893     new_value = (*lhs);                                                        \
2894     (*lhs) = (TYPE)((rhs)OP(*lhs));                                            \
2895   }                                                                            \
2896   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
2897   return new_value;
2898 
2899 // ------------------------------------------------------------------------
2900 #ifdef KMP_GOMP_COMPAT
2901 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)                               \
2902   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
2903     KMP_CHECK_GTID;                                                            \
2904     OP_CRITICAL_CPT_REV(TYPE, OP, 0);                                          \
2905   }
2906 #else
2907 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG)
2908 #endif /* KMP_GOMP_COMPAT */
2909 
2910 // ------------------------------------------------------------------------
2911 // Operation on *lhs, rhs using "compare_and_store" routine
2912 //     TYPE    - operands' type
2913 //     BITS    - size in bits, used to distinguish low level calls
2914 //     OP      - operator
2915 // Note: temp_val introduced in order to force the compiler to read
2916 //       *lhs only once (w/o it the compiler reads *lhs twice)
2917 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                     \
2918   {                                                                            \
2919     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
2920     TYPE old_value, new_value;                                                 \
2921     temp_val = *lhs;                                                           \
2922     old_value = temp_val;                                                      \
2923     new_value = (TYPE)(rhs OP old_value);                                      \
2924     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
2925         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
2926         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
2927       temp_val = *lhs;                                                         \
2928       old_value = temp_val;                                                    \
2929       new_value = (TYPE)(rhs OP old_value);                                    \
2930     }                                                                          \
2931     if (flag) {                                                                \
2932       return new_value;                                                        \
2933     } else                                                                     \
2934       return old_value;                                                        \
2935   }
2936 
2937 // -------------------------------------------------------------------------
2938 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG)      \
2939   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
2940   TYPE new_value;                                                              \
2941   (void)new_value;                                                             \
2942   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG)                                \
2943   OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
2944   }
2945 
2946 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2947                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2948 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2949                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2950 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2951                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2952 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2953                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2954 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2955                        KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2956 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2957                        KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2958 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
2959                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
2960 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
2961                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
2962 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
2963                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
2964 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
2965                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
2966 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
2967                        KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
2968 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
2969                        KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
2970 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
2971                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
2972 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
2973                        KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
2974 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
2975                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
2976 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
2977                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
2978 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
2979                        KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
2980 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
2981                        KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
2982 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
2983                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
2984 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
2985                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
2986 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
2987                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
2988 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
2989                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
2990 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
2991                        KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
2992 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
2993                        KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
2994 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
2995                        KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
2996 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
2997                        KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
2998 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
2999                        KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
3000 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
3001                        KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
3002 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
3003 
3004 // ------------------------------------------------------------------------
3005 // Routines for Extended types: long double, _Quad, complex flavours (use
3006 // critical section)
3007 //     TYPE_ID, OP_ID, TYPE - detailed above
3008 //     OP      - operator
3009 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
3010 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG)   \
3011   ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE)                                 \
3012   TYPE new_value;                                                              \
3013   /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/                   \
3014   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG)                                \
3015   OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID)                                        \
3016   }
3017 
3018 /* ------------------------------------------------------------------------- */
3019 // routines for long double type
3020 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
3021                         1) // __kmpc_atomic_float10_sub_cpt_rev
3022 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
3023                         1) // __kmpc_atomic_float10_div_cpt_rev
3024 #if KMP_HAVE_QUAD
3025 // routines for _Quad type
3026 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
3027                         1) // __kmpc_atomic_float16_sub_cpt_rev
3028 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
3029                         1) // __kmpc_atomic_float16_div_cpt_rev
3030 #if (KMP_ARCH_X86)
3031 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
3032                         1) // __kmpc_atomic_float16_sub_a16_cpt_rev
3033 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
3034                         1) // __kmpc_atomic_float16_div_a16_cpt_rev
3035 #endif // (KMP_ARCH_X86)
3036 #endif // KMP_HAVE_QUAD
3037 
3038 // routines for complex types
3039 
3040 // ------------------------------------------------------------------------
3041 // Workaround for cmplx4. Regular routines with return value don't work
3042 // on Win_32e. Let's return captured values through the additional parameter.
3043 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                    \
3044   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3045                                                                                \
3046   if (flag) {                                                                  \
3047     (*lhs) = (rhs)OP(*lhs);                                                    \
3048     (*out) = (*lhs);                                                           \
3049   } else {                                                                     \
3050     (*out) = (*lhs);                                                           \
3051     (*lhs) = (rhs)OP(*lhs);                                                    \
3052   }                                                                            \
3053                                                                                \
3054   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3055   return;
3056 // ------------------------------------------------------------------------
3057 
3058 #ifdef KMP_GOMP_COMPAT
3059 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)                                 \
3060   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3061     KMP_CHECK_GTID;                                                            \
3062     OP_CRITICAL_CPT_REV_WRK(OP, 0);                                            \
3063   }
3064 #else
3065 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3066 #endif /* KMP_GOMP_COMPAT */
3067 // ------------------------------------------------------------------------
3068 
3069 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID,          \
3070                                     GOMP_FLAG)                                 \
3071   ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE)                                       \
3072   OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG)                                  \
3073   OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID)                                          \
3074   }
3075 // The end of workaround for cmplx4
3076 
3077 // !!! TODO: check if we need to return void for cmplx4 routines
3078 // cmplx4 routines to return void
3079 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3080                             1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3081 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3082                             1) // __kmpc_atomic_cmplx4_div_cpt_rev
3083 
3084 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3085                         1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3086 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3087                         1) // __kmpc_atomic_cmplx8_div_cpt_rev
3088 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3089                         1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3090 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3091                         1) // __kmpc_atomic_cmplx10_div_cpt_rev
3092 #if KMP_HAVE_QUAD
3093 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3094                         1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3095 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3096                         1) // __kmpc_atomic_cmplx16_div_cpt_rev
3097 #if (KMP_ARCH_X86)
3098 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3099                         1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3100 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3101                         1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3102 #endif // (KMP_ARCH_X86)
3103 #endif // KMP_HAVE_QUAD
3104 
3105 // Capture reverse for mixed type: RHS=float16
3106 #if KMP_HAVE_QUAD
3107 
3108 // Beginning of a definition (provides name, parameters, gebug trace)
3109 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3110 //     fixed)
3111 //     OP_ID   - operation identifier (add, sub, mul, ...)
3112 //     TYPE    - operands' type
3113 // -------------------------------------------------------------------------
3114 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID,   \
3115                                    RTYPE, LCK_ID, MASK, GOMP_FLAG)             \
3116   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3117   TYPE new_value;                                                              \
3118   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG)                                \
3119   OP_CMPXCHG_CPT_REV(TYPE, BITS, OP)                                           \
3120   }
3121 
3122 // -------------------------------------------------------------------------
3123 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3124                                     LCK_ID, GOMP_FLAG)                         \
3125   ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE)                  \
3126   TYPE new_value;                                                              \
3127   OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */          \
3128   OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */                  \
3129   }
3130 
3131 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3132                            KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3133 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3134                            KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3135 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3136                            KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3137 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3138                            KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3139 
3140 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3141                            KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3142 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3143                            1,
3144                            KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3145 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3146                            KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3147 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3148                            1,
3149                            KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3150 
3151 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3152                            3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3153 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3154                            4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3155 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3156                            3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3157 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3158                            4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3159 
3160 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3161                            7,
3162                            KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3163 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3164                            8i, 7,
3165                            KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3166 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3167                            7,
3168                            KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3169 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3170                            8i, 7,
3171                            KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3172 
3173 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3174                            4r, 3,
3175                            KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3176 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3177                            4r, 3,
3178                            KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3179 
3180 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3181                            8r, 7,
3182                            KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3183 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3184                            8r, 7,
3185                            KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3186 
3187 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3188                             10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3189 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3190                             10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3191 
3192 #endif // KMP_HAVE_QUAD
3193 
3194 //   OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3195 
3196 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                        \
3197   TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3198                                      TYPE rhs) {                               \
3199     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3200     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3201 
3202 #define CRITICAL_SWP(LCK_ID)                                                   \
3203   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3204                                                                                \
3205   old_value = (*lhs);                                                          \
3206   (*lhs) = rhs;                                                                \
3207                                                                                \
3208   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3209   return old_value;
3210 
3211 // ------------------------------------------------------------------------
3212 #ifdef KMP_GOMP_COMPAT
3213 #define GOMP_CRITICAL_SWP(FLAG)                                                \
3214   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3215     KMP_CHECK_GTID;                                                            \
3216     CRITICAL_SWP(0);                                                           \
3217   }
3218 #else
3219 #define GOMP_CRITICAL_SWP(FLAG)
3220 #endif /* KMP_GOMP_COMPAT */
3221 
3222 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                        \
3223   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3224   TYPE old_value;                                                              \
3225   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3226   old_value = KMP_XCHG_FIXED##BITS(lhs, rhs);                                  \
3227   return old_value;                                                            \
3228   }
3229 // ------------------------------------------------------------------------
3230 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                  \
3231   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3232   TYPE old_value;                                                              \
3233   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3234   old_value = KMP_XCHG_REAL##BITS(lhs, rhs);                                   \
3235   return old_value;                                                            \
3236   }
3237 
3238 // ------------------------------------------------------------------------
3239 #define CMPXCHG_SWP(TYPE, BITS)                                                \
3240   {                                                                            \
3241     TYPE KMP_ATOMIC_VOLATILE temp_val;                                         \
3242     TYPE old_value, new_value;                                                 \
3243     temp_val = *lhs;                                                           \
3244     old_value = temp_val;                                                      \
3245     new_value = rhs;                                                           \
3246     while (!KMP_COMPARE_AND_STORE_ACQ##BITS(                                   \
3247         (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value,     \
3248         *VOLATILE_CAST(kmp_int##BITS *) & new_value)) {                        \
3249       temp_val = *lhs;                                                         \
3250       old_value = temp_val;                                                    \
3251       new_value = rhs;                                                         \
3252     }                                                                          \
3253     return old_value;                                                          \
3254   }
3255 
3256 // -------------------------------------------------------------------------
3257 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG)                     \
3258   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3259   TYPE old_value;                                                              \
3260   (void)old_value;                                                             \
3261   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3262   CMPXCHG_SWP(TYPE, BITS)                                                      \
3263   }
3264 
3265 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3266 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3267 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3268 
3269 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3270                       KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3271 
3272 #if (KMP_ARCH_X86)
3273 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3274                    KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3275 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3276                    KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3277 #else
3278 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3279 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3280                       KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3281 #endif // (KMP_ARCH_X86)
3282 
3283 // ------------------------------------------------------------------------
3284 // Routines for Extended types: long double, _Quad, complex flavours (use
3285 // critical section)
3286 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)                  \
3287   ATOMIC_BEGIN_SWP(TYPE_ID, TYPE)                                              \
3288   TYPE old_value;                                                              \
3289   GOMP_CRITICAL_SWP(GOMP_FLAG)                                                 \
3290   CRITICAL_SWP(LCK_ID)                                                         \
3291   }
3292 
3293 // ------------------------------------------------------------------------
3294 // !!! TODO: check if we need to return void for cmplx4 routines
3295 // Workaround for cmplx4. Regular routines with return value don't work
3296 // on Win_32e. Let's return captured values through the additional parameter.
3297 
3298 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                    \
3299   void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs,     \
3300                                      TYPE rhs, TYPE *out) {                    \
3301     KMP_DEBUG_ASSERT(__kmp_init_serial);                                       \
3302     KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3303 
3304 #define CRITICAL_SWP_WRK(LCK_ID)                                               \
3305   __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3306                                                                                \
3307   tmp = (*lhs);                                                                \
3308   (*lhs) = (rhs);                                                              \
3309   (*out) = tmp;                                                                \
3310   __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);                       \
3311   return;
3312 // ------------------------------------------------------------------------
3313 
3314 #ifdef KMP_GOMP_COMPAT
3315 #define GOMP_CRITICAL_SWP_WRK(FLAG)                                            \
3316   if ((FLAG) && (__kmp_atomic_mode == 2)) {                                    \
3317     KMP_CHECK_GTID;                                                            \
3318     CRITICAL_SWP_WRK(0);                                                       \
3319   }
3320 #else
3321 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3322 #endif /* KMP_GOMP_COMPAT */
3323 // ------------------------------------------------------------------------
3324 
3325 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG)              \
3326   ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE)                                          \
3327   TYPE tmp;                                                                    \
3328   GOMP_CRITICAL_SWP_WRK(GOMP_FLAG)                                             \
3329   CRITICAL_SWP_WRK(LCK_ID)                                                     \
3330   }
3331 // The end of workaround for cmplx4
3332 
3333 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3334 #if KMP_HAVE_QUAD
3335 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3336 #endif // KMP_HAVE_QUAD
3337 // cmplx4 routine to return void
3338 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3339 
3340 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32,  8c,   1 )           //
3341 // __kmpc_atomic_cmplx4_swp
3342 
3343 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3344 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3345 #if KMP_HAVE_QUAD
3346 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3347 #if (KMP_ARCH_X86)
3348 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3349                     1) // __kmpc_atomic_float16_a16_swp
3350 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3351                     1) // __kmpc_atomic_cmplx16_a16_swp
3352 #endif // (KMP_ARCH_X86)
3353 #endif // KMP_HAVE_QUAD
3354 
3355 // End of OpenMP 4.0 Capture
3356 
3357 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3358 
3359 #undef OP_CRITICAL
3360 
3361 /* ------------------------------------------------------------------------ */
3362 /* Generic atomic routines                                                  */
3363 
3364 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3365                      void (*f)(void *, void *, void *)) {
3366   KMP_DEBUG_ASSERT(__kmp_init_serial);
3367 
3368   if (
3369 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3370       FALSE /* must use lock */
3371 #else
3372       TRUE
3373 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3374   ) {
3375     kmp_int8 old_value, new_value;
3376 
3377     old_value = *(kmp_int8 *)lhs;
3378     (*f)(&new_value, &old_value, rhs);
3379 
3380     /* TODO: Should this be acquire or release? */
3381     while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3382                                        *(kmp_int8 *)&new_value)) {
3383       KMP_CPU_PAUSE();
3384 
3385       old_value = *(kmp_int8 *)lhs;
3386       (*f)(&new_value, &old_value, rhs);
3387     }
3388 
3389     return;
3390   } else {
3391     // All 1-byte data is of integer data type.
3392 
3393 #ifdef KMP_GOMP_COMPAT
3394     if (__kmp_atomic_mode == 2) {
3395       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3396     } else
3397 #endif /* KMP_GOMP_COMPAT */
3398       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3399 
3400     (*f)(lhs, lhs, rhs);
3401 
3402 #ifdef KMP_GOMP_COMPAT
3403     if (__kmp_atomic_mode == 2) {
3404       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3405     } else
3406 #endif /* KMP_GOMP_COMPAT */
3407       __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3408   }
3409 }
3410 
3411 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3412                      void (*f)(void *, void *, void *)) {
3413   if (
3414 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3415       FALSE /* must use lock */
3416 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3417       TRUE /* no alignment problems */
3418 #else
3419       !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3420 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3421   ) {
3422     kmp_int16 old_value, new_value;
3423 
3424     old_value = *(kmp_int16 *)lhs;
3425     (*f)(&new_value, &old_value, rhs);
3426 
3427     /* TODO: Should this be acquire or release? */
3428     while (!KMP_COMPARE_AND_STORE_ACQ16(
3429         (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3430       KMP_CPU_PAUSE();
3431 
3432       old_value = *(kmp_int16 *)lhs;
3433       (*f)(&new_value, &old_value, rhs);
3434     }
3435 
3436     return;
3437   } else {
3438     // All 2-byte data is of integer data type.
3439 
3440 #ifdef KMP_GOMP_COMPAT
3441     if (__kmp_atomic_mode == 2) {
3442       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3443     } else
3444 #endif /* KMP_GOMP_COMPAT */
3445       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3446 
3447     (*f)(lhs, lhs, rhs);
3448 
3449 #ifdef KMP_GOMP_COMPAT
3450     if (__kmp_atomic_mode == 2) {
3451       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3452     } else
3453 #endif /* KMP_GOMP_COMPAT */
3454       __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3455   }
3456 }
3457 
3458 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3459                      void (*f)(void *, void *, void *)) {
3460   KMP_DEBUG_ASSERT(__kmp_init_serial);
3461 
3462   if (
3463 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3464 // Gomp compatibility is broken if this routine is called for floats.
3465 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3466       TRUE /* no alignment problems */
3467 #else
3468       !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3469 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3470   ) {
3471     kmp_int32 old_value, new_value;
3472 
3473     old_value = *(kmp_int32 *)lhs;
3474     (*f)(&new_value, &old_value, rhs);
3475 
3476     /* TODO: Should this be acquire or release? */
3477     while (!KMP_COMPARE_AND_STORE_ACQ32(
3478         (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3479       KMP_CPU_PAUSE();
3480 
3481       old_value = *(kmp_int32 *)lhs;
3482       (*f)(&new_value, &old_value, rhs);
3483     }
3484 
3485     return;
3486   } else {
3487     // Use __kmp_atomic_lock_4i for all 4-byte data,
3488     // even if it isn't of integer data type.
3489 
3490 #ifdef KMP_GOMP_COMPAT
3491     if (__kmp_atomic_mode == 2) {
3492       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3493     } else
3494 #endif /* KMP_GOMP_COMPAT */
3495       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3496 
3497     (*f)(lhs, lhs, rhs);
3498 
3499 #ifdef KMP_GOMP_COMPAT
3500     if (__kmp_atomic_mode == 2) {
3501       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3502     } else
3503 #endif /* KMP_GOMP_COMPAT */
3504       __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3505   }
3506 }
3507 
3508 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3509                      void (*f)(void *, void *, void *)) {
3510   KMP_DEBUG_ASSERT(__kmp_init_serial);
3511   if (
3512 
3513 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3514       FALSE /* must use lock */
3515 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3516       TRUE /* no alignment problems */
3517 #else
3518       !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3519 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3520   ) {
3521     kmp_int64 old_value, new_value;
3522 
3523     old_value = *(kmp_int64 *)lhs;
3524     (*f)(&new_value, &old_value, rhs);
3525     /* TODO: Should this be acquire or release? */
3526     while (!KMP_COMPARE_AND_STORE_ACQ64(
3527         (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3528       KMP_CPU_PAUSE();
3529 
3530       old_value = *(kmp_int64 *)lhs;
3531       (*f)(&new_value, &old_value, rhs);
3532     }
3533 
3534     return;
3535   } else {
3536     // Use __kmp_atomic_lock_8i for all 8-byte data,
3537     // even if it isn't of integer data type.
3538 
3539 #ifdef KMP_GOMP_COMPAT
3540     if (__kmp_atomic_mode == 2) {
3541       __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3542     } else
3543 #endif /* KMP_GOMP_COMPAT */
3544       __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3545 
3546     (*f)(lhs, lhs, rhs);
3547 
3548 #ifdef KMP_GOMP_COMPAT
3549     if (__kmp_atomic_mode == 2) {
3550       __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3551     } else
3552 #endif /* KMP_GOMP_COMPAT */
3553       __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3554   }
3555 }
3556 
3557 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3558                       void (*f)(void *, void *, void *)) {
3559   KMP_DEBUG_ASSERT(__kmp_init_serial);
3560 
3561 #ifdef KMP_GOMP_COMPAT
3562   if (__kmp_atomic_mode == 2) {
3563     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3564   } else
3565 #endif /* KMP_GOMP_COMPAT */
3566     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3567 
3568   (*f)(lhs, lhs, rhs);
3569 
3570 #ifdef KMP_GOMP_COMPAT
3571   if (__kmp_atomic_mode == 2) {
3572     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3573   } else
3574 #endif /* KMP_GOMP_COMPAT */
3575     __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3576 }
3577 
3578 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3579                       void (*f)(void *, void *, void *)) {
3580   KMP_DEBUG_ASSERT(__kmp_init_serial);
3581 
3582 #ifdef KMP_GOMP_COMPAT
3583   if (__kmp_atomic_mode == 2) {
3584     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3585   } else
3586 #endif /* KMP_GOMP_COMPAT */
3587     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3588 
3589   (*f)(lhs, lhs, rhs);
3590 
3591 #ifdef KMP_GOMP_COMPAT
3592   if (__kmp_atomic_mode == 2) {
3593     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3594   } else
3595 #endif /* KMP_GOMP_COMPAT */
3596     __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3597 }
3598 
3599 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3600                       void (*f)(void *, void *, void *)) {
3601   KMP_DEBUG_ASSERT(__kmp_init_serial);
3602 
3603 #ifdef KMP_GOMP_COMPAT
3604   if (__kmp_atomic_mode == 2) {
3605     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3606   } else
3607 #endif /* KMP_GOMP_COMPAT */
3608     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3609 
3610   (*f)(lhs, lhs, rhs);
3611 
3612 #ifdef KMP_GOMP_COMPAT
3613   if (__kmp_atomic_mode == 2) {
3614     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3615   } else
3616 #endif /* KMP_GOMP_COMPAT */
3617     __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3618 }
3619 
3620 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3621                       void (*f)(void *, void *, void *)) {
3622   KMP_DEBUG_ASSERT(__kmp_init_serial);
3623 
3624 #ifdef KMP_GOMP_COMPAT
3625   if (__kmp_atomic_mode == 2) {
3626     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3627   } else
3628 #endif /* KMP_GOMP_COMPAT */
3629     __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3630 
3631   (*f)(lhs, lhs, rhs);
3632 
3633 #ifdef KMP_GOMP_COMPAT
3634   if (__kmp_atomic_mode == 2) {
3635     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3636   } else
3637 #endif /* KMP_GOMP_COMPAT */
3638     __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3639 }
3640 
3641 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3642 // compiler; duplicated in order to not use 3-party names in pure Intel code
3643 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3644 void __kmpc_atomic_start(void) {
3645   int gtid = __kmp_entry_gtid();
3646   KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3647   __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3648 }
3649 
3650 void __kmpc_atomic_end(void) {
3651   int gtid = __kmp_get_gtid();
3652   KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3653   __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3654 }
3655 
3656 /*!
3657 @}
3658 */
3659 
3660 // end of file
3661