1 /*
2  * kmp_atomic.c -- ATOMIC implementation routines
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 //                     The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #include "kmp_atomic.h"
17 #include "kmp.h"                  // TRUE, asm routines prototypes
18 
19 typedef unsigned char uchar;
20 typedef unsigned short ushort;
21 
22 /*!
23 @defgroup ATOMIC_OPS Atomic Operations
24 These functions are used for implementing the many different varieties of atomic operations.
25 
26 The compiler is at liberty to inline atomic operations that are naturally supported
27 by the target architecture. For instance on IA-32 architecture an atomic like this can be inlined
28 @code
29 static int s = 0;
30 #pragma omp atomic
31     s++;
32 @endcode
33 using the single instruction: `lock; incl s`
34 
35 However the runtime does provide entrypoints for these operations to support compilers that choose
36 not to inline them. (For instance, `__kmpc_atomic_fixed4_add` could be used to perform the
37 increment above.)
38 
39 The names of the functions are encoded by using the data type name and the operation name, as in these tables.
40 
41 Data Type  | Data type encoding
42 -----------|---------------
43 int8_t     | `fixed1`
44 uint8_t    | `fixed1u`
45 int16_t    | `fixed2`
46 uint16_t   | `fixed2u`
47 int32_t    | `fixed4`
48 uint32_t   | `fixed4u`
49 int32_t    | `fixed8`
50 uint32_t   | `fixed8u`
51 float      | `float4`
52 double     | `float8`
53 float 10 (8087 eighty bit float)  | `float10`
54 complex<float>   |  `cmplx4`
55 complex<double>  | `cmplx8`
56 complex<float10> | `cmplx10`
57 <br>
58 
59 Operation | Operation encoding
60 ----------|-------------------
61 + | add
62 - | sub
63 \* | mul
64 / | div
65 & | andb
66 << | shl
67 \>\> | shr
68 \| | orb
69 ^  | xor
70 && | andl
71 \|\| | orl
72 maximum | max
73 minimum | min
74 .eqv.   | eqv
75 .neqv.  | neqv
76 
77 <br>
78 For non-commutative operations, `_rev` can also be added for the reversed operation.
79 For the functions that capture the result, the suffix `_cpt` is added.
80 
81 Update Functions
82 ================
83 The general form of an atomic function that just performs an update (without a `capture`)
84 @code
85 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs );
86 @endcode
87 @param ident_t  a pointer to source location
88 @param gtid  the global thread id
89 @param lhs   a pointer to the left operand
90 @param rhs   the right operand
91 
92 `capture` functions
93 ===================
94 The capture functions perform an atomic update and return a result, which is either the value
95 before the capture, or that after. They take an additional argument to determine which result is returned.
96 Their general form is therefore
97 @code
98 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag );
99 @endcode
100 @param ident_t  a pointer to source location
101 @param gtid  the global thread id
102 @param lhs   a pointer to the left operand
103 @param rhs   the right operand
104 @param flag  one if the result is to be captured *after* the operation, zero if captured *before*.
105 
106 The one set of exceptions to this is the `complex<float>` type where the value is not returned,
107 rather an extra argument pointer is passed.
108 
109 They look like
110 @code
111 void __kmpc_atomic_cmplx4_<op>_cpt(  ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
112 @endcode
113 
114 Read and Write Operations
115 =========================
116 The OpenMP<sup>*</sup> standard now supports atomic operations that simply ensure that the
117 value is read or written atomically, with no modification
118 performed. In many cases on IA-32 architecture these operations can be inlined since
119 the architecture guarantees that no tearing occurs on aligned objects
120 accessed with a single memory operation of up to 64 bits in size.
121 
122 The general form of the read operations is
123 @code
124 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
125 @endcode
126 
127 For the write operations the form is
128 @code
129 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs );
130 @endcode
131 
132 Full list of functions
133 ======================
134 This leads to the generation of 376 atomic functions, as follows.
135 
136 Functons for integers
137 ---------------------
138 There are versions here for integers of size 1,2,4 and 8 bytes both signed and unsigned (where that matters).
139 @code
140     __kmpc_atomic_fixed1_add
141     __kmpc_atomic_fixed1_add_cpt
142     __kmpc_atomic_fixed1_add_fp
143     __kmpc_atomic_fixed1_andb
144     __kmpc_atomic_fixed1_andb_cpt
145     __kmpc_atomic_fixed1_andl
146     __kmpc_atomic_fixed1_andl_cpt
147     __kmpc_atomic_fixed1_div
148     __kmpc_atomic_fixed1_div_cpt
149     __kmpc_atomic_fixed1_div_cpt_rev
150     __kmpc_atomic_fixed1_div_float8
151     __kmpc_atomic_fixed1_div_fp
152     __kmpc_atomic_fixed1_div_rev
153     __kmpc_atomic_fixed1_eqv
154     __kmpc_atomic_fixed1_eqv_cpt
155     __kmpc_atomic_fixed1_max
156     __kmpc_atomic_fixed1_max_cpt
157     __kmpc_atomic_fixed1_min
158     __kmpc_atomic_fixed1_min_cpt
159     __kmpc_atomic_fixed1_mul
160     __kmpc_atomic_fixed1_mul_cpt
161     __kmpc_atomic_fixed1_mul_float8
162     __kmpc_atomic_fixed1_mul_fp
163     __kmpc_atomic_fixed1_neqv
164     __kmpc_atomic_fixed1_neqv_cpt
165     __kmpc_atomic_fixed1_orb
166     __kmpc_atomic_fixed1_orb_cpt
167     __kmpc_atomic_fixed1_orl
168     __kmpc_atomic_fixed1_orl_cpt
169     __kmpc_atomic_fixed1_rd
170     __kmpc_atomic_fixed1_shl
171     __kmpc_atomic_fixed1_shl_cpt
172     __kmpc_atomic_fixed1_shl_cpt_rev
173     __kmpc_atomic_fixed1_shl_rev
174     __kmpc_atomic_fixed1_shr
175     __kmpc_atomic_fixed1_shr_cpt
176     __kmpc_atomic_fixed1_shr_cpt_rev
177     __kmpc_atomic_fixed1_shr_rev
178     __kmpc_atomic_fixed1_sub
179     __kmpc_atomic_fixed1_sub_cpt
180     __kmpc_atomic_fixed1_sub_cpt_rev
181     __kmpc_atomic_fixed1_sub_fp
182     __kmpc_atomic_fixed1_sub_rev
183     __kmpc_atomic_fixed1_swp
184     __kmpc_atomic_fixed1_wr
185     __kmpc_atomic_fixed1_xor
186     __kmpc_atomic_fixed1_xor_cpt
187     __kmpc_atomic_fixed1u_add_fp
188     __kmpc_atomic_fixed1u_sub_fp
189     __kmpc_atomic_fixed1u_mul_fp
190     __kmpc_atomic_fixed1u_div
191     __kmpc_atomic_fixed1u_div_cpt
192     __kmpc_atomic_fixed1u_div_cpt_rev
193     __kmpc_atomic_fixed1u_div_fp
194     __kmpc_atomic_fixed1u_div_rev
195     __kmpc_atomic_fixed1u_shr
196     __kmpc_atomic_fixed1u_shr_cpt
197     __kmpc_atomic_fixed1u_shr_cpt_rev
198     __kmpc_atomic_fixed1u_shr_rev
199     __kmpc_atomic_fixed2_add
200     __kmpc_atomic_fixed2_add_cpt
201     __kmpc_atomic_fixed2_add_fp
202     __kmpc_atomic_fixed2_andb
203     __kmpc_atomic_fixed2_andb_cpt
204     __kmpc_atomic_fixed2_andl
205     __kmpc_atomic_fixed2_andl_cpt
206     __kmpc_atomic_fixed2_div
207     __kmpc_atomic_fixed2_div_cpt
208     __kmpc_atomic_fixed2_div_cpt_rev
209     __kmpc_atomic_fixed2_div_float8
210     __kmpc_atomic_fixed2_div_fp
211     __kmpc_atomic_fixed2_div_rev
212     __kmpc_atomic_fixed2_eqv
213     __kmpc_atomic_fixed2_eqv_cpt
214     __kmpc_atomic_fixed2_max
215     __kmpc_atomic_fixed2_max_cpt
216     __kmpc_atomic_fixed2_min
217     __kmpc_atomic_fixed2_min_cpt
218     __kmpc_atomic_fixed2_mul
219     __kmpc_atomic_fixed2_mul_cpt
220     __kmpc_atomic_fixed2_mul_float8
221     __kmpc_atomic_fixed2_mul_fp
222     __kmpc_atomic_fixed2_neqv
223     __kmpc_atomic_fixed2_neqv_cpt
224     __kmpc_atomic_fixed2_orb
225     __kmpc_atomic_fixed2_orb_cpt
226     __kmpc_atomic_fixed2_orl
227     __kmpc_atomic_fixed2_orl_cpt
228     __kmpc_atomic_fixed2_rd
229     __kmpc_atomic_fixed2_shl
230     __kmpc_atomic_fixed2_shl_cpt
231     __kmpc_atomic_fixed2_shl_cpt_rev
232     __kmpc_atomic_fixed2_shl_rev
233     __kmpc_atomic_fixed2_shr
234     __kmpc_atomic_fixed2_shr_cpt
235     __kmpc_atomic_fixed2_shr_cpt_rev
236     __kmpc_atomic_fixed2_shr_rev
237     __kmpc_atomic_fixed2_sub
238     __kmpc_atomic_fixed2_sub_cpt
239     __kmpc_atomic_fixed2_sub_cpt_rev
240     __kmpc_atomic_fixed2_sub_fp
241     __kmpc_atomic_fixed2_sub_rev
242     __kmpc_atomic_fixed2_swp
243     __kmpc_atomic_fixed2_wr
244     __kmpc_atomic_fixed2_xor
245     __kmpc_atomic_fixed2_xor_cpt
246     __kmpc_atomic_fixed2u_add_fp
247     __kmpc_atomic_fixed2u_sub_fp
248     __kmpc_atomic_fixed2u_mul_fp
249     __kmpc_atomic_fixed2u_div
250     __kmpc_atomic_fixed2u_div_cpt
251     __kmpc_atomic_fixed2u_div_cpt_rev
252     __kmpc_atomic_fixed2u_div_fp
253     __kmpc_atomic_fixed2u_div_rev
254     __kmpc_atomic_fixed2u_shr
255     __kmpc_atomic_fixed2u_shr_cpt
256     __kmpc_atomic_fixed2u_shr_cpt_rev
257     __kmpc_atomic_fixed2u_shr_rev
258     __kmpc_atomic_fixed4_add
259     __kmpc_atomic_fixed4_add_cpt
260     __kmpc_atomic_fixed4_add_fp
261     __kmpc_atomic_fixed4_andb
262     __kmpc_atomic_fixed4_andb_cpt
263     __kmpc_atomic_fixed4_andl
264     __kmpc_atomic_fixed4_andl_cpt
265     __kmpc_atomic_fixed4_div
266     __kmpc_atomic_fixed4_div_cpt
267     __kmpc_atomic_fixed4_div_cpt_rev
268     __kmpc_atomic_fixed4_div_float8
269     __kmpc_atomic_fixed4_div_fp
270     __kmpc_atomic_fixed4_div_rev
271     __kmpc_atomic_fixed4_eqv
272     __kmpc_atomic_fixed4_eqv_cpt
273     __kmpc_atomic_fixed4_max
274     __kmpc_atomic_fixed4_max_cpt
275     __kmpc_atomic_fixed4_min
276     __kmpc_atomic_fixed4_min_cpt
277     __kmpc_atomic_fixed4_mul
278     __kmpc_atomic_fixed4_mul_cpt
279     __kmpc_atomic_fixed4_mul_float8
280     __kmpc_atomic_fixed4_mul_fp
281     __kmpc_atomic_fixed4_neqv
282     __kmpc_atomic_fixed4_neqv_cpt
283     __kmpc_atomic_fixed4_orb
284     __kmpc_atomic_fixed4_orb_cpt
285     __kmpc_atomic_fixed4_orl
286     __kmpc_atomic_fixed4_orl_cpt
287     __kmpc_atomic_fixed4_rd
288     __kmpc_atomic_fixed4_shl
289     __kmpc_atomic_fixed4_shl_cpt
290     __kmpc_atomic_fixed4_shl_cpt_rev
291     __kmpc_atomic_fixed4_shl_rev
292     __kmpc_atomic_fixed4_shr
293     __kmpc_atomic_fixed4_shr_cpt
294     __kmpc_atomic_fixed4_shr_cpt_rev
295     __kmpc_atomic_fixed4_shr_rev
296     __kmpc_atomic_fixed4_sub
297     __kmpc_atomic_fixed4_sub_cpt
298     __kmpc_atomic_fixed4_sub_cpt_rev
299     __kmpc_atomic_fixed4_sub_fp
300     __kmpc_atomic_fixed4_sub_rev
301     __kmpc_atomic_fixed4_swp
302     __kmpc_atomic_fixed4_wr
303     __kmpc_atomic_fixed4_xor
304     __kmpc_atomic_fixed4_xor_cpt
305     __kmpc_atomic_fixed4u_add_fp
306     __kmpc_atomic_fixed4u_sub_fp
307     __kmpc_atomic_fixed4u_mul_fp
308     __kmpc_atomic_fixed4u_div
309     __kmpc_atomic_fixed4u_div_cpt
310     __kmpc_atomic_fixed4u_div_cpt_rev
311     __kmpc_atomic_fixed4u_div_fp
312     __kmpc_atomic_fixed4u_div_rev
313     __kmpc_atomic_fixed4u_shr
314     __kmpc_atomic_fixed4u_shr_cpt
315     __kmpc_atomic_fixed4u_shr_cpt_rev
316     __kmpc_atomic_fixed4u_shr_rev
317     __kmpc_atomic_fixed8_add
318     __kmpc_atomic_fixed8_add_cpt
319     __kmpc_atomic_fixed8_add_fp
320     __kmpc_atomic_fixed8_andb
321     __kmpc_atomic_fixed8_andb_cpt
322     __kmpc_atomic_fixed8_andl
323     __kmpc_atomic_fixed8_andl_cpt
324     __kmpc_atomic_fixed8_div
325     __kmpc_atomic_fixed8_div_cpt
326     __kmpc_atomic_fixed8_div_cpt_rev
327     __kmpc_atomic_fixed8_div_float8
328     __kmpc_atomic_fixed8_div_fp
329     __kmpc_atomic_fixed8_div_rev
330     __kmpc_atomic_fixed8_eqv
331     __kmpc_atomic_fixed8_eqv_cpt
332     __kmpc_atomic_fixed8_max
333     __kmpc_atomic_fixed8_max_cpt
334     __kmpc_atomic_fixed8_min
335     __kmpc_atomic_fixed8_min_cpt
336     __kmpc_atomic_fixed8_mul
337     __kmpc_atomic_fixed8_mul_cpt
338     __kmpc_atomic_fixed8_mul_float8
339     __kmpc_atomic_fixed8_mul_fp
340     __kmpc_atomic_fixed8_neqv
341     __kmpc_atomic_fixed8_neqv_cpt
342     __kmpc_atomic_fixed8_orb
343     __kmpc_atomic_fixed8_orb_cpt
344     __kmpc_atomic_fixed8_orl
345     __kmpc_atomic_fixed8_orl_cpt
346     __kmpc_atomic_fixed8_rd
347     __kmpc_atomic_fixed8_shl
348     __kmpc_atomic_fixed8_shl_cpt
349     __kmpc_atomic_fixed8_shl_cpt_rev
350     __kmpc_atomic_fixed8_shl_rev
351     __kmpc_atomic_fixed8_shr
352     __kmpc_atomic_fixed8_shr_cpt
353     __kmpc_atomic_fixed8_shr_cpt_rev
354     __kmpc_atomic_fixed8_shr_rev
355     __kmpc_atomic_fixed8_sub
356     __kmpc_atomic_fixed8_sub_cpt
357     __kmpc_atomic_fixed8_sub_cpt_rev
358     __kmpc_atomic_fixed8_sub_fp
359     __kmpc_atomic_fixed8_sub_rev
360     __kmpc_atomic_fixed8_swp
361     __kmpc_atomic_fixed8_wr
362     __kmpc_atomic_fixed8_xor
363     __kmpc_atomic_fixed8_xor_cpt
364     __kmpc_atomic_fixed8u_add_fp
365     __kmpc_atomic_fixed8u_sub_fp
366     __kmpc_atomic_fixed8u_mul_fp
367     __kmpc_atomic_fixed8u_div
368     __kmpc_atomic_fixed8u_div_cpt
369     __kmpc_atomic_fixed8u_div_cpt_rev
370     __kmpc_atomic_fixed8u_div_fp
371     __kmpc_atomic_fixed8u_div_rev
372     __kmpc_atomic_fixed8u_shr
373     __kmpc_atomic_fixed8u_shr_cpt
374     __kmpc_atomic_fixed8u_shr_cpt_rev
375     __kmpc_atomic_fixed8u_shr_rev
376 @endcode
377 
378 Functions for floating point
379 ----------------------------
380 There are versions here for floating point numbers of size 4, 8, 10 and 16 bytes.
381 (Ten byte floats are used by X87, but are now rare).
382 @code
383     __kmpc_atomic_float4_add
384     __kmpc_atomic_float4_add_cpt
385     __kmpc_atomic_float4_add_float8
386     __kmpc_atomic_float4_add_fp
387     __kmpc_atomic_float4_div
388     __kmpc_atomic_float4_div_cpt
389     __kmpc_atomic_float4_div_cpt_rev
390     __kmpc_atomic_float4_div_float8
391     __kmpc_atomic_float4_div_fp
392     __kmpc_atomic_float4_div_rev
393     __kmpc_atomic_float4_max
394     __kmpc_atomic_float4_max_cpt
395     __kmpc_atomic_float4_min
396     __kmpc_atomic_float4_min_cpt
397     __kmpc_atomic_float4_mul
398     __kmpc_atomic_float4_mul_cpt
399     __kmpc_atomic_float4_mul_float8
400     __kmpc_atomic_float4_mul_fp
401     __kmpc_atomic_float4_rd
402     __kmpc_atomic_float4_sub
403     __kmpc_atomic_float4_sub_cpt
404     __kmpc_atomic_float4_sub_cpt_rev
405     __kmpc_atomic_float4_sub_float8
406     __kmpc_atomic_float4_sub_fp
407     __kmpc_atomic_float4_sub_rev
408     __kmpc_atomic_float4_swp
409     __kmpc_atomic_float4_wr
410     __kmpc_atomic_float8_add
411     __kmpc_atomic_float8_add_cpt
412     __kmpc_atomic_float8_add_fp
413     __kmpc_atomic_float8_div
414     __kmpc_atomic_float8_div_cpt
415     __kmpc_atomic_float8_div_cpt_rev
416     __kmpc_atomic_float8_div_fp
417     __kmpc_atomic_float8_div_rev
418     __kmpc_atomic_float8_max
419     __kmpc_atomic_float8_max_cpt
420     __kmpc_atomic_float8_min
421     __kmpc_atomic_float8_min_cpt
422     __kmpc_atomic_float8_mul
423     __kmpc_atomic_float8_mul_cpt
424     __kmpc_atomic_float8_mul_fp
425     __kmpc_atomic_float8_rd
426     __kmpc_atomic_float8_sub
427     __kmpc_atomic_float8_sub_cpt
428     __kmpc_atomic_float8_sub_cpt_rev
429     __kmpc_atomic_float8_sub_fp
430     __kmpc_atomic_float8_sub_rev
431     __kmpc_atomic_float8_swp
432     __kmpc_atomic_float8_wr
433     __kmpc_atomic_float10_add
434     __kmpc_atomic_float10_add_cpt
435     __kmpc_atomic_float10_add_fp
436     __kmpc_atomic_float10_div
437     __kmpc_atomic_float10_div_cpt
438     __kmpc_atomic_float10_div_cpt_rev
439     __kmpc_atomic_float10_div_fp
440     __kmpc_atomic_float10_div_rev
441     __kmpc_atomic_float10_mul
442     __kmpc_atomic_float10_mul_cpt
443     __kmpc_atomic_float10_mul_fp
444     __kmpc_atomic_float10_rd
445     __kmpc_atomic_float10_sub
446     __kmpc_atomic_float10_sub_cpt
447     __kmpc_atomic_float10_sub_cpt_rev
448     __kmpc_atomic_float10_sub_fp
449     __kmpc_atomic_float10_sub_rev
450     __kmpc_atomic_float10_swp
451     __kmpc_atomic_float10_wr
452     __kmpc_atomic_float16_add
453     __kmpc_atomic_float16_add_cpt
454     __kmpc_atomic_float16_div
455     __kmpc_atomic_float16_div_cpt
456     __kmpc_atomic_float16_div_cpt_rev
457     __kmpc_atomic_float16_div_rev
458     __kmpc_atomic_float16_max
459     __kmpc_atomic_float16_max_cpt
460     __kmpc_atomic_float16_min
461     __kmpc_atomic_float16_min_cpt
462     __kmpc_atomic_float16_mul
463     __kmpc_atomic_float16_mul_cpt
464     __kmpc_atomic_float16_rd
465     __kmpc_atomic_float16_sub
466     __kmpc_atomic_float16_sub_cpt
467     __kmpc_atomic_float16_sub_cpt_rev
468     __kmpc_atomic_float16_sub_rev
469     __kmpc_atomic_float16_swp
470     __kmpc_atomic_float16_wr
471 @endcode
472 
473 Functions for Complex types
474 ---------------------------
475 Functions for complex types whose component floating point variables are of size 4,8,10 or 16 bytes.
476 The names here are based on the size of the component float, *not* the size of the complex type. So
477 `__kmpc_atomc_cmplx8_add` is an operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
478 
479 @code
480     __kmpc_atomic_cmplx4_add
481     __kmpc_atomic_cmplx4_add_cmplx8
482     __kmpc_atomic_cmplx4_add_cpt
483     __kmpc_atomic_cmplx4_div
484     __kmpc_atomic_cmplx4_div_cmplx8
485     __kmpc_atomic_cmplx4_div_cpt
486     __kmpc_atomic_cmplx4_div_cpt_rev
487     __kmpc_atomic_cmplx4_div_rev
488     __kmpc_atomic_cmplx4_mul
489     __kmpc_atomic_cmplx4_mul_cmplx8
490     __kmpc_atomic_cmplx4_mul_cpt
491     __kmpc_atomic_cmplx4_rd
492     __kmpc_atomic_cmplx4_sub
493     __kmpc_atomic_cmplx4_sub_cmplx8
494     __kmpc_atomic_cmplx4_sub_cpt
495     __kmpc_atomic_cmplx4_sub_cpt_rev
496     __kmpc_atomic_cmplx4_sub_rev
497     __kmpc_atomic_cmplx4_swp
498     __kmpc_atomic_cmplx4_wr
499     __kmpc_atomic_cmplx8_add
500     __kmpc_atomic_cmplx8_add_cpt
501     __kmpc_atomic_cmplx8_div
502     __kmpc_atomic_cmplx8_div_cpt
503     __kmpc_atomic_cmplx8_div_cpt_rev
504     __kmpc_atomic_cmplx8_div_rev
505     __kmpc_atomic_cmplx8_mul
506     __kmpc_atomic_cmplx8_mul_cpt
507     __kmpc_atomic_cmplx8_rd
508     __kmpc_atomic_cmplx8_sub
509     __kmpc_atomic_cmplx8_sub_cpt
510     __kmpc_atomic_cmplx8_sub_cpt_rev
511     __kmpc_atomic_cmplx8_sub_rev
512     __kmpc_atomic_cmplx8_swp
513     __kmpc_atomic_cmplx8_wr
514     __kmpc_atomic_cmplx10_add
515     __kmpc_atomic_cmplx10_add_cpt
516     __kmpc_atomic_cmplx10_div
517     __kmpc_atomic_cmplx10_div_cpt
518     __kmpc_atomic_cmplx10_div_cpt_rev
519     __kmpc_atomic_cmplx10_div_rev
520     __kmpc_atomic_cmplx10_mul
521     __kmpc_atomic_cmplx10_mul_cpt
522     __kmpc_atomic_cmplx10_rd
523     __kmpc_atomic_cmplx10_sub
524     __kmpc_atomic_cmplx10_sub_cpt
525     __kmpc_atomic_cmplx10_sub_cpt_rev
526     __kmpc_atomic_cmplx10_sub_rev
527     __kmpc_atomic_cmplx10_swp
528     __kmpc_atomic_cmplx10_wr
529     __kmpc_atomic_cmplx16_add
530     __kmpc_atomic_cmplx16_add_cpt
531     __kmpc_atomic_cmplx16_div
532     __kmpc_atomic_cmplx16_div_cpt
533     __kmpc_atomic_cmplx16_div_cpt_rev
534     __kmpc_atomic_cmplx16_div_rev
535     __kmpc_atomic_cmplx16_mul
536     __kmpc_atomic_cmplx16_mul_cpt
537     __kmpc_atomic_cmplx16_rd
538     __kmpc_atomic_cmplx16_sub
539     __kmpc_atomic_cmplx16_sub_cpt
540     __kmpc_atomic_cmplx16_sub_cpt_rev
541     __kmpc_atomic_cmplx16_swp
542     __kmpc_atomic_cmplx16_wr
543 @endcode
544 */
545 
546 /*!
547 @ingroup ATOMIC_OPS
548 @{
549 */
550 
551 /*
552  * Global vars
553  */
554 
555 #ifndef KMP_GOMP_COMPAT
556 int __kmp_atomic_mode = 1;      // Intel perf
557 #else
558 int __kmp_atomic_mode = 2;      // GOMP compatibility
559 #endif /* KMP_GOMP_COMPAT */
560 
561 KMP_ALIGN(128)
562 
563 kmp_atomic_lock_t __kmp_atomic_lock;     /* Control access to all user coded atomics in Gnu compat mode   */
564 kmp_atomic_lock_t __kmp_atomic_lock_1i;  /* Control access to all user coded atomics for 1-byte fixed data types */
565 kmp_atomic_lock_t __kmp_atomic_lock_2i;  /* Control access to all user coded atomics for 2-byte fixed data types */
566 kmp_atomic_lock_t __kmp_atomic_lock_4i;  /* Control access to all user coded atomics for 4-byte fixed data types */
567 kmp_atomic_lock_t __kmp_atomic_lock_4r;  /* Control access to all user coded atomics for kmp_real32 data type    */
568 kmp_atomic_lock_t __kmp_atomic_lock_8i;  /* Control access to all user coded atomics for 8-byte fixed data types */
569 kmp_atomic_lock_t __kmp_atomic_lock_8r;  /* Control access to all user coded atomics for kmp_real64 data type    */
570 kmp_atomic_lock_t __kmp_atomic_lock_8c;  /* Control access to all user coded atomics for complex byte data type  */
571 kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type   */
572 kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type         */
573 kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/
574 kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/
575 kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */
576 
577 
578 /*
579   2007-03-02:
580   Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a
581   bug on *_32 and *_32e. This is just a temporary workaround for the problem.
582   It seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG
583   routines in assembler language.
584 */
585 #define KMP_ATOMIC_VOLATILE volatile
586 
587 #if ( KMP_ARCH_X86 ) && KMP_HAVE_QUAD
588 
589     static inline void operator +=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q += rhs.q; };
590     static inline void operator -=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q -= rhs.q; };
591     static inline void operator *=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q *= rhs.q; };
592     static inline void operator /=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q /= rhs.q; };
593     static inline bool operator < ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q < rhs.q; }
594     static inline bool operator > ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q > rhs.q; }
595 
596     static inline void operator +=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q += rhs.q; };
597     static inline void operator -=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q -= rhs.q; };
598     static inline void operator *=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q *= rhs.q; };
599     static inline void operator /=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q /= rhs.q; };
600     static inline bool operator < ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q < rhs.q; }
601     static inline bool operator > ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q > rhs.q; }
602 
603     static inline void operator +=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q += rhs.q; };
604     static inline void operator -=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q -= rhs.q; };
605     static inline void operator *=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q *= rhs.q; };
606     static inline void operator /=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q /= rhs.q; };
607 
608     static inline void operator +=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q += rhs.q; };
609     static inline void operator -=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q -= rhs.q; };
610     static inline void operator *=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q *= rhs.q; };
611     static inline void operator /=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q /= rhs.q; };
612 
613 #endif
614 
615 /* ------------------------------------------------------------------------ */
616 /* ATOMIC implementation routines                                           */
617 /* one routine for each operation and operand type                          */
618 /* ------------------------------------------------------------------------ */
619 
620 // All routines declarations looks like
621 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
622 // ------------------------------------------------------------------------
623 
624 #define KMP_CHECK_GTID                                                    \
625     if ( gtid == KMP_GTID_UNKNOWN ) {                                     \
626         gtid = __kmp_entry_gtid();                                        \
627     } // check and get gtid when needed
628 
629 // Beginning of a definition (provides name, parameters, gebug trace)
630 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
631 //     OP_ID   - operation identifier (add, sub, mul, ...)
632 //     TYPE    - operands' type
633 #define ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
634 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
635 {                                                                                         \
636     KMP_DEBUG_ASSERT( __kmp_init_serial );                                                \
637     KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
638 
639 // ------------------------------------------------------------------------
640 // Lock variables used for critical sections for various size operands
641 #define ATOMIC_LOCK0   __kmp_atomic_lock       // all types, for Gnu compat
642 #define ATOMIC_LOCK1i  __kmp_atomic_lock_1i    // char
643 #define ATOMIC_LOCK2i  __kmp_atomic_lock_2i    // short
644 #define ATOMIC_LOCK4i  __kmp_atomic_lock_4i    // long int
645 #define ATOMIC_LOCK4r  __kmp_atomic_lock_4r    // float
646 #define ATOMIC_LOCK8i  __kmp_atomic_lock_8i    // long long int
647 #define ATOMIC_LOCK8r  __kmp_atomic_lock_8r    // double
648 #define ATOMIC_LOCK8c  __kmp_atomic_lock_8c    // float complex
649 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r   // long double
650 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r   // _Quad
651 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c   // double complex
652 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c   // long double complex
653 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c   // _Quad complex
654 
655 // ------------------------------------------------------------------------
656 // Operation on *lhs, rhs bound by critical section
657 //     OP     - operator (it's supposed to contain an assignment)
658 //     LCK_ID - lock identifier
659 // Note: don't check gtid as it should always be valid
660 // 1, 2-byte - expect valid parameter, other - check before this macro
661 #define OP_CRITICAL(OP,LCK_ID) \
662     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                    \
663                                                                           \
664     (*lhs) OP (rhs);                                                      \
665                                                                           \
666     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
667 
668 // ------------------------------------------------------------------------
669 // For GNU compatibility, we may need to use a critical section,
670 // even though it is not required by the ISA.
671 //
672 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
673 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
674 // critical section.  On Intel(R) 64, all atomic operations are done with fetch
675 // and add or compare and exchange.  Therefore, the FLAG parameter to this
676 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
677 // require a critical section, where we predict that they will be implemented
678 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
679 //
680 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
681 // the FLAG parameter should always be 1.  If we know that we will be using
682 // a critical section, then we want to make certain that we use the generic
683 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
684 // locks that are specialized based upon the size or type of the data.
685 //
686 // If FLAG is 0, then we are relying on dead code elimination by the build
687 // compiler to get rid of the useless block of code, and save a needless
688 // branch at runtime.
689 //
690 
691 #ifdef KMP_GOMP_COMPAT
692 # define OP_GOMP_CRITICAL(OP,FLAG)                                        \
693     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
694         KMP_CHECK_GTID;                                                   \
695         OP_CRITICAL( OP, 0 );                                             \
696         return;                                                           \
697     }
698 # else
699 # define OP_GOMP_CRITICAL(OP,FLAG)
700 #endif /* KMP_GOMP_COMPAT */
701 
702 #if KMP_MIC
703 # define KMP_DO_PAUSE _mm_delay_32( 1 )
704 #else
705 # define KMP_DO_PAUSE KMP_CPU_PAUSE()
706 #endif /* KMP_MIC */
707 
708 // ------------------------------------------------------------------------
709 // Operation on *lhs, rhs using "compare_and_store" routine
710 //     TYPE    - operands' type
711 //     BITS    - size in bits, used to distinguish low level calls
712 //     OP      - operator
713 #define OP_CMPXCHG(TYPE,BITS,OP)                                          \
714     {                                                                     \
715         TYPE old_value, new_value;                                        \
716         old_value = *(TYPE volatile *)lhs;                                \
717         new_value = old_value OP rhs;                                     \
718         while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
719                       *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \
720                       *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \
721         {                                                                 \
722                 KMP_DO_PAUSE;                                             \
723                                                                           \
724             old_value = *(TYPE volatile *)lhs;                            \
725             new_value = old_value OP rhs;                                 \
726         }                                                                 \
727     }
728 
729 #if USE_CMPXCHG_FIX
730 // 2007-06-25:
731 // workaround for C78287 (complex(kind=4) data type)
732 // lin_32, lin_32e, win_32 and win_32e are affected (I verified the asm)
733 // Compiler ignores the volatile qualifier of the temp_val in the OP_CMPXCHG macro.
734 // This is a problem of the compiler.
735 // Related tracker is C76005, targeted to 11.0.
736 // I verified the asm of the workaround.
737 #define OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP)                               \
738     {                                                                     \
739 	struct _sss {                                                     \
740 	    TYPE            cmp;                                          \
741 	    kmp_int##BITS   *vvv;                                         \
742 	};                                                                \
743         struct _sss old_value, new_value;                                 \
744         old_value.vvv = ( kmp_int##BITS * )&old_value.cmp;                \
745         new_value.vvv = ( kmp_int##BITS * )&new_value.cmp;                \
746         *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs;              \
747         new_value.cmp = old_value.cmp OP rhs;                             \
748         while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
749                       *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv,      \
750                       *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv ) )   \
751         {                                                                 \
752             KMP_DO_PAUSE;                                                 \
753                                                                           \
754 	    *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs;          \
755 	    new_value.cmp = old_value.cmp OP rhs;                         \
756         }                                                                 \
757     }
758 // end of the first part of the workaround for C78287
759 #endif // USE_CMPXCHG_FIX
760 
761 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
762 
763 // ------------------------------------------------------------------------
764 // X86 or X86_64: no alignment problems ====================================
765 #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
766 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
767     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                      \
768     /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */      \
769     KMP_TEST_THEN_ADD##BITS( lhs, OP rhs );                                \
770 }
771 // -------------------------------------------------------------------------
772 #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \
773 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
774     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                      \
775     OP_CMPXCHG(TYPE,BITS,OP)                                               \
776 }
777 #if USE_CMPXCHG_FIX
778 // -------------------------------------------------------------------------
779 // workaround for C78287 (complex(kind=4) data type)
780 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \
781 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                                 \
782     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                 \
783     OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP)                                               \
784 }
785 // end of the second part of the workaround for C78287
786 #endif
787 
788 #else
789 // -------------------------------------------------------------------------
790 // Code for other architectures that don't handle unaligned accesses.
791 #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
792 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
793     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                      \
794     if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                           \
795         /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */  \
796         KMP_TEST_THEN_ADD##BITS( lhs, OP rhs );                            \
797     } else {                                                               \
798         KMP_CHECK_GTID;                                                    \
799         OP_CRITICAL(OP##=,LCK_ID)  /* unaligned address - use critical */  \
800     }                                                                      \
801 }
802 // -------------------------------------------------------------------------
803 #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \
804 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
805     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                      \
806     if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                           \
807         OP_CMPXCHG(TYPE,BITS,OP)     /* aligned address */                 \
808     } else {                                                               \
809         KMP_CHECK_GTID;                                                    \
810         OP_CRITICAL(OP##=,LCK_ID)  /* unaligned address - use critical */  \
811     }                                                                      \
812 }
813 #if USE_CMPXCHG_FIX
814 // -------------------------------------------------------------------------
815 // workaround for C78287 (complex(kind=4) data type)
816 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \
817 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                                 \
818     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                 \
819     if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                                      \
820         OP_CMPXCHG(TYPE,BITS,OP)             /* aligned address */                    \
821     } else {                                                                          \
822         KMP_CHECK_GTID;                                                               \
823         OP_CRITICAL(OP##=,LCK_ID)  /* unaligned address - use critical */             \
824     }                                                                                 \
825 }
826 // end of the second part of the workaround for C78287
827 #endif // USE_CMPXCHG_FIX
828 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
829 
830 // Routines for ATOMIC 4-byte operands addition and subtraction
831 ATOMIC_FIXED_ADD( fixed4, add, kmp_int32,  32, +, 4i, 3, 0            )  // __kmpc_atomic_fixed4_add
832 ATOMIC_FIXED_ADD( fixed4, sub, kmp_int32,  32, -, 4i, 3, 0            )  // __kmpc_atomic_fixed4_sub
833 
834 ATOMIC_CMPXCHG( float4,  add, kmp_real32, 32, +,  4r, 3, KMP_ARCH_X86 )  // __kmpc_atomic_float4_add
835 ATOMIC_CMPXCHG( float4,  sub, kmp_real32, 32, -,  4r, 3, KMP_ARCH_X86 )  // __kmpc_atomic_float4_sub
836 
837 // Routines for ATOMIC 8-byte operands addition and subtraction
838 ATOMIC_FIXED_ADD( fixed8, add, kmp_int64,  64, +, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_add
839 ATOMIC_FIXED_ADD( fixed8, sub, kmp_int64,  64, -, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_sub
840 
841 ATOMIC_CMPXCHG( float8,  add, kmp_real64, 64, +,  8r, 7, KMP_ARCH_X86 )  // __kmpc_atomic_float8_add
842 ATOMIC_CMPXCHG( float8,  sub, kmp_real64, 64, -,  8r, 7, KMP_ARCH_X86 )  // __kmpc_atomic_float8_sub
843 
844 // ------------------------------------------------------------------------
845 // Entries definition for integer operands
846 //     TYPE_ID - operands type and size (fixed4, float4)
847 //     OP_ID   - operation identifier (add, sub, mul, ...)
848 //     TYPE    - operand type
849 //     BITS    - size in bits, used to distinguish low level calls
850 //     OP      - operator (used in critical section)
851 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
852 //     MASK    - used for alignment check
853 
854 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,MASK,GOMP_FLAG
855 // ------------------------------------------------------------------------
856 // Routines for ATOMIC integer operands, other operators
857 // ------------------------------------------------------------------------
858 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
859 ATOMIC_CMPXCHG( fixed1,  add, kmp_int8,    8, +,  1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_add
860 ATOMIC_CMPXCHG( fixed1, andb, kmp_int8,    8, &,  1i, 0, 0            )  // __kmpc_atomic_fixed1_andb
861 ATOMIC_CMPXCHG( fixed1,  div, kmp_int8,    8, /,  1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_div
862 ATOMIC_CMPXCHG( fixed1u, div, kmp_uint8,   8, /,  1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_div
863 ATOMIC_CMPXCHG( fixed1,  mul, kmp_int8,    8, *,  1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_mul
864 ATOMIC_CMPXCHG( fixed1,  orb, kmp_int8,    8, |,  1i, 0, 0            )  // __kmpc_atomic_fixed1_orb
865 ATOMIC_CMPXCHG( fixed1,  shl, kmp_int8,    8, <<, 1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shl
866 ATOMIC_CMPXCHG( fixed1,  shr, kmp_int8,    8, >>, 1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shr
867 ATOMIC_CMPXCHG( fixed1u, shr, kmp_uint8,   8, >>, 1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_shr
868 ATOMIC_CMPXCHG( fixed1,  sub, kmp_int8,    8, -,  1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_sub
869 ATOMIC_CMPXCHG( fixed1,  xor, kmp_int8,    8, ^,  1i, 0, 0            )  // __kmpc_atomic_fixed1_xor
870 ATOMIC_CMPXCHG( fixed2,  add, kmp_int16,  16, +,  2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_add
871 ATOMIC_CMPXCHG( fixed2, andb, kmp_int16,  16, &,  2i, 1, 0            )  // __kmpc_atomic_fixed2_andb
872 ATOMIC_CMPXCHG( fixed2,  div, kmp_int16,  16, /,  2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_div
873 ATOMIC_CMPXCHG( fixed2u, div, kmp_uint16, 16, /,  2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_div
874 ATOMIC_CMPXCHG( fixed2,  mul, kmp_int16,  16, *,  2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_mul
875 ATOMIC_CMPXCHG( fixed2,  orb, kmp_int16,  16, |,  2i, 1, 0            )  // __kmpc_atomic_fixed2_orb
876 ATOMIC_CMPXCHG( fixed2,  shl, kmp_int16,  16, <<, 2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shl
877 ATOMIC_CMPXCHG( fixed2,  shr, kmp_int16,  16, >>, 2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shr
878 ATOMIC_CMPXCHG( fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_shr
879 ATOMIC_CMPXCHG( fixed2,  sub, kmp_int16,  16, -,  2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_sub
880 ATOMIC_CMPXCHG( fixed2,  xor, kmp_int16,  16, ^,  2i, 1, 0            )  // __kmpc_atomic_fixed2_xor
881 ATOMIC_CMPXCHG( fixed4, andb, kmp_int32,  32, &,  4i, 3, 0            )  // __kmpc_atomic_fixed4_andb
882 ATOMIC_CMPXCHG( fixed4,  div, kmp_int32,  32, /,  4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_div
883 ATOMIC_CMPXCHG( fixed4u, div, kmp_uint32, 32, /,  4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_div
884 ATOMIC_CMPXCHG( fixed4,  mul, kmp_int32,  32, *,  4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_mul
885 ATOMIC_CMPXCHG( fixed4,  orb, kmp_int32,  32, |,  4i, 3, 0            )  // __kmpc_atomic_fixed4_orb
886 ATOMIC_CMPXCHG( fixed4,  shl, kmp_int32,  32, <<, 4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shl
887 ATOMIC_CMPXCHG( fixed4,  shr, kmp_int32,  32, >>, 4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shr
888 ATOMIC_CMPXCHG( fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_shr
889 ATOMIC_CMPXCHG( fixed4,  xor, kmp_int32,  32, ^,  4i, 3, 0            )  // __kmpc_atomic_fixed4_xor
890 ATOMIC_CMPXCHG( fixed8, andb, kmp_int64,  64, &,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_andb
891 ATOMIC_CMPXCHG( fixed8,  div, kmp_int64,  64, /,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_div
892 ATOMIC_CMPXCHG( fixed8u, div, kmp_uint64, 64, /,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_div
893 ATOMIC_CMPXCHG( fixed8,  mul, kmp_int64,  64, *,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_mul
894 ATOMIC_CMPXCHG( fixed8,  orb, kmp_int64,  64, |,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_orb
895 ATOMIC_CMPXCHG( fixed8,  shl, kmp_int64,  64, <<, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shl
896 ATOMIC_CMPXCHG( fixed8,  shr, kmp_int64,  64, >>, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shr
897 ATOMIC_CMPXCHG( fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_shr
898 ATOMIC_CMPXCHG( fixed8,  xor, kmp_int64,  64, ^,  8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_xor
899 ATOMIC_CMPXCHG( float4,  div, kmp_real32, 32, /,  4r, 3, KMP_ARCH_X86 )  // __kmpc_atomic_float4_div
900 ATOMIC_CMPXCHG( float4,  mul, kmp_real32, 32, *,  4r, 3, KMP_ARCH_X86 )  // __kmpc_atomic_float4_mul
901 ATOMIC_CMPXCHG( float8,  div, kmp_real64, 64, /,  8r, 7, KMP_ARCH_X86 )  // __kmpc_atomic_float8_div
902 ATOMIC_CMPXCHG( float8,  mul, kmp_real64, 64, *,  8r, 7, KMP_ARCH_X86 )  // __kmpc_atomic_float8_mul
903 //              TYPE_ID,OP_ID, TYPE,          OP, LCK_ID, GOMP_FLAG
904 
905 
906 /* ------------------------------------------------------------------------ */
907 /* Routines for C/C++ Reduction operators && and ||                         */
908 /* ------------------------------------------------------------------------ */
909 
910 // ------------------------------------------------------------------------
911 // Need separate macros for &&, || because there is no combined assignment
912 //   TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
913 #define ATOMIC_CRIT_L(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)             \
914 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
915     OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG )                              \
916     OP_CRITICAL( = *lhs OP, LCK_ID )                                      \
917 }
918 
919 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
920 
921 // ------------------------------------------------------------------------
922 // X86 or X86_64: no alignment problems ===================================
923 #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \
924 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
925     OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG )                              \
926     OP_CMPXCHG(TYPE,BITS,OP)                                              \
927 }
928 
929 #else
930 // ------------------------------------------------------------------------
931 // Code for other architectures that don't handle unaligned accesses.
932 #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG)   \
933 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
934     OP_GOMP_CRITICAL(= *lhs OP,GOMP_FLAG)                                 \
935     if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                          \
936         OP_CMPXCHG(TYPE,BITS,OP)       /* aligned address */              \
937     } else {                                                              \
938         KMP_CHECK_GTID;                                                   \
939         OP_CRITICAL(= *lhs OP,LCK_ID)  /* unaligned - use critical */     \
940     }                                                                     \
941 }
942 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
943 
944 ATOMIC_CMPX_L( fixed1, andl, char,       8, &&, 1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_andl
945 ATOMIC_CMPX_L( fixed1,  orl, char,       8, ||, 1i, 0, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_orl
946 ATOMIC_CMPX_L( fixed2, andl, short,     16, &&, 2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_andl
947 ATOMIC_CMPX_L( fixed2,  orl, short,     16, ||, 2i, 1, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_orl
948 ATOMIC_CMPX_L( fixed4, andl, kmp_int32, 32, &&, 4i, 3, 0 )             // __kmpc_atomic_fixed4_andl
949 ATOMIC_CMPX_L( fixed4,  orl, kmp_int32, 32, ||, 4i, 3, 0 )             // __kmpc_atomic_fixed4_orl
950 ATOMIC_CMPX_L( fixed8, andl, kmp_int64, 64, &&, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_andl
951 ATOMIC_CMPX_L( fixed8,  orl, kmp_int64, 64, ||, 8i, 7, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_orl
952 
953 
954 /* ------------------------------------------------------------------------- */
955 /* Routines for Fortran operators that matched no one in C:                  */
956 /* MAX, MIN, .EQV., .NEQV.                                                   */
957 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}           */
958 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}  */
959 /* ------------------------------------------------------------------------- */
960 
961 // -------------------------------------------------------------------------
962 // MIN and MAX need separate macros
963 // OP - operator to check if we need any actions?
964 #define MIN_MAX_CRITSECT(OP,LCK_ID)                                        \
965     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                     \
966                                                                            \
967     if ( *lhs OP rhs ) {                 /* still need actions? */         \
968         *lhs = rhs;                                                        \
969     }                                                                      \
970     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
971 
972 // -------------------------------------------------------------------------
973 #ifdef KMP_GOMP_COMPAT
974 #define GOMP_MIN_MAX_CRITSECT(OP,FLAG)                                     \
975     if (( FLAG ) && ( __kmp_atomic_mode == 2 )) {                          \
976         KMP_CHECK_GTID;                                                    \
977         MIN_MAX_CRITSECT( OP, 0 );                                         \
978         return;                                                            \
979     }
980 #else
981 #define GOMP_MIN_MAX_CRITSECT(OP,FLAG)
982 #endif /* KMP_GOMP_COMPAT */
983 
984 // -------------------------------------------------------------------------
985 #define MIN_MAX_CMPXCHG(TYPE,BITS,OP)                                      \
986     {                                                                      \
987         TYPE KMP_ATOMIC_VOLATILE temp_val;                                 \
988         TYPE old_value;                                                    \
989         temp_val = *lhs;                                                   \
990         old_value = temp_val;                                              \
991         while ( old_value OP rhs &&          /* still need actions? */     \
992             ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs,      \
993                       *VOLATILE_CAST(kmp_int##BITS *) &old_value,          \
994                       *VOLATILE_CAST(kmp_int##BITS *) &rhs ) )             \
995         {                                                                  \
996             KMP_CPU_PAUSE();                                               \
997             temp_val = *lhs;                                               \
998             old_value = temp_val;                                          \
999         }                                                                  \
1000     }
1001 
1002 // -------------------------------------------------------------------------
1003 // 1-byte, 2-byte operands - use critical section
1004 #define MIN_MAX_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)           \
1005 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
1006     if ( *lhs OP rhs ) {     /* need actions? */                           \
1007         GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG)                                \
1008         MIN_MAX_CRITSECT(OP,LCK_ID)                                        \
1009     }                                                                      \
1010 }
1011 
1012 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1013 
1014 // -------------------------------------------------------------------------
1015 // X86 or X86_64: no alignment problems ====================================
1016 #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1017 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
1018     if ( *lhs OP rhs ) {                                                   \
1019         GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG)                                \
1020         MIN_MAX_CMPXCHG(TYPE,BITS,OP)                                      \
1021     }                                                                      \
1022 }
1023 
1024 #else
1025 // -------------------------------------------------------------------------
1026 // Code for other architectures that don't handle unaligned accesses.
1027 #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1028 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                      \
1029     if ( *lhs OP rhs ) {                                                   \
1030         GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG)                                \
1031         if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                       \
1032             MIN_MAX_CMPXCHG(TYPE,BITS,OP) /* aligned address */            \
1033         } else {                                                           \
1034             KMP_CHECK_GTID;                                                \
1035             MIN_MAX_CRITSECT(OP,LCK_ID)   /* unaligned address */          \
1036         }                                                                  \
1037     }                                                                      \
1038 }
1039 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1040 
1041 MIN_MAX_COMPXCHG( fixed1,  max, char,        8, <, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max
1042 MIN_MAX_COMPXCHG( fixed1,  min, char,        8, >, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min
1043 MIN_MAX_COMPXCHG( fixed2,  max, short,      16, <, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max
1044 MIN_MAX_COMPXCHG( fixed2,  min, short,      16, >, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min
1045 MIN_MAX_COMPXCHG( fixed4,  max, kmp_int32,  32, <, 4i, 3, 0 )            // __kmpc_atomic_fixed4_max
1046 MIN_MAX_COMPXCHG( fixed4,  min, kmp_int32,  32, >, 4i, 3, 0 )            // __kmpc_atomic_fixed4_min
1047 MIN_MAX_COMPXCHG( fixed8,  max, kmp_int64,  64, <, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max
1048 MIN_MAX_COMPXCHG( fixed8,  min, kmp_int64,  64, >, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min
1049 MIN_MAX_COMPXCHG( float4,  max, kmp_real32, 32, <, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max
1050 MIN_MAX_COMPXCHG( float4,  min, kmp_real32, 32, >, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min
1051 MIN_MAX_COMPXCHG( float8,  max, kmp_real64, 64, <, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max
1052 MIN_MAX_COMPXCHG( float8,  min, kmp_real64, 64, >, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min
1053 #if KMP_HAVE_QUAD
1054 MIN_MAX_CRITICAL( float16, max,     QUAD_LEGACY,      <, 16r,   1 )            // __kmpc_atomic_float16_max
1055 MIN_MAX_CRITICAL( float16, min,     QUAD_LEGACY,      >, 16r,   1 )            // __kmpc_atomic_float16_min
1056 #if ( KMP_ARCH_X86 )
1057     MIN_MAX_CRITICAL( float16, max_a16, Quad_a16_t,     <, 16r,   1 )            // __kmpc_atomic_float16_max_a16
1058     MIN_MAX_CRITICAL( float16, min_a16, Quad_a16_t,     >, 16r,   1 )            // __kmpc_atomic_float16_min_a16
1059 #endif
1060 #endif
1061 // ------------------------------------------------------------------------
1062 // Need separate macros for .EQV. because of the need of complement (~)
1063 // OP ignored for critical sections, ^=~ used instead
1064 #define ATOMIC_CRIT_EQV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)           \
1065 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
1066     OP_GOMP_CRITICAL(^=~,GOMP_FLAG)  /* send assignment */                \
1067     OP_CRITICAL(^=~,LCK_ID)    /* send assignment and complement */       \
1068 }
1069 
1070 // ------------------------------------------------------------------------
1071 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1072 // ------------------------------------------------------------------------
1073 // X86 or X86_64: no alignment problems ===================================
1074 #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1075 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
1076     OP_GOMP_CRITICAL(^=~,GOMP_FLAG)  /* send assignment */                \
1077     OP_CMPXCHG(TYPE,BITS,OP)                                              \
1078 }
1079 // ------------------------------------------------------------------------
1080 #else
1081 // ------------------------------------------------------------------------
1082 // Code for other architectures that don't handle unaligned accesses.
1083 #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \
1084 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
1085     OP_GOMP_CRITICAL(^=~,GOMP_FLAG)                                       \
1086     if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                          \
1087         OP_CMPXCHG(TYPE,BITS,OP)   /* aligned address */                  \
1088     } else {                                                              \
1089         KMP_CHECK_GTID;                                                   \
1090         OP_CRITICAL(^=~,LCK_ID)    /* unaligned address - use critical */ \
1091     }                                                                     \
1092 }
1093 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1094 
1095 ATOMIC_CMPXCHG(  fixed1, neqv, kmp_int8,   8,   ^, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv
1096 ATOMIC_CMPXCHG(  fixed2, neqv, kmp_int16, 16,   ^, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv
1097 ATOMIC_CMPXCHG(  fixed4, neqv, kmp_int32, 32,   ^, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv
1098 ATOMIC_CMPXCHG(  fixed8, neqv, kmp_int64, 64,   ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv
1099 ATOMIC_CMPX_EQV( fixed1, eqv,  kmp_int8,   8,  ^~, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv
1100 ATOMIC_CMPX_EQV( fixed2, eqv,  kmp_int16, 16,  ^~, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv
1101 ATOMIC_CMPX_EQV( fixed4, eqv,  kmp_int32, 32,  ^~, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv
1102 ATOMIC_CMPX_EQV( fixed8, eqv,  kmp_int64, 64,  ^~, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv
1103 
1104 
1105 // ------------------------------------------------------------------------
1106 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1107 //     TYPE_ID, OP_ID, TYPE - detailed above
1108 //     OP      - operator
1109 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1110 #define ATOMIC_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)           \
1111 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
1112     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)  /* send assignment */              \
1113     OP_CRITICAL(OP##=,LCK_ID)          /* send assignment */              \
1114 }
1115 
1116 /* ------------------------------------------------------------------------- */
1117 // routines for long double type
1118 ATOMIC_CRITICAL( float10, add, long double,     +, 10r,   1 )            // __kmpc_atomic_float10_add
1119 ATOMIC_CRITICAL( float10, sub, long double,     -, 10r,   1 )            // __kmpc_atomic_float10_sub
1120 ATOMIC_CRITICAL( float10, mul, long double,     *, 10r,   1 )            // __kmpc_atomic_float10_mul
1121 ATOMIC_CRITICAL( float10, div, long double,     /, 10r,   1 )            // __kmpc_atomic_float10_div
1122 #if KMP_HAVE_QUAD
1123 // routines for _Quad type
1124 ATOMIC_CRITICAL( float16, add, QUAD_LEGACY,     +, 16r,   1 )            // __kmpc_atomic_float16_add
1125 ATOMIC_CRITICAL( float16, sub, QUAD_LEGACY,     -, 16r,   1 )            // __kmpc_atomic_float16_sub
1126 ATOMIC_CRITICAL( float16, mul, QUAD_LEGACY,     *, 16r,   1 )            // __kmpc_atomic_float16_mul
1127 ATOMIC_CRITICAL( float16, div, QUAD_LEGACY,     /, 16r,   1 )            // __kmpc_atomic_float16_div
1128 #if ( KMP_ARCH_X86 )
1129     ATOMIC_CRITICAL( float16, add_a16, Quad_a16_t, +, 16r, 1 )           // __kmpc_atomic_float16_add_a16
1130     ATOMIC_CRITICAL( float16, sub_a16, Quad_a16_t, -, 16r, 1 )           // __kmpc_atomic_float16_sub_a16
1131     ATOMIC_CRITICAL( float16, mul_a16, Quad_a16_t, *, 16r, 1 )           // __kmpc_atomic_float16_mul_a16
1132     ATOMIC_CRITICAL( float16, div_a16, Quad_a16_t, /, 16r, 1 )           // __kmpc_atomic_float16_div_a16
1133 #endif
1134 #endif
1135 // routines for complex types
1136 
1137 #if USE_CMPXCHG_FIX
1138 // workaround for C78287 (complex(kind=4) data type)
1139 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1 )   // __kmpc_atomic_cmplx4_add
1140 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1 )   // __kmpc_atomic_cmplx4_sub
1141 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1 )   // __kmpc_atomic_cmplx4_mul
1142 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1 )   // __kmpc_atomic_cmplx4_div
1143 // end of the workaround for C78287
1144 #else
1145 ATOMIC_CRITICAL( cmplx4,  add, kmp_cmplx32,     +,  8c,   1 )            // __kmpc_atomic_cmplx4_add
1146 ATOMIC_CRITICAL( cmplx4,  sub, kmp_cmplx32,     -,  8c,   1 )            // __kmpc_atomic_cmplx4_sub
1147 ATOMIC_CRITICAL( cmplx4,  mul, kmp_cmplx32,     *,  8c,   1 )            // __kmpc_atomic_cmplx4_mul
1148 ATOMIC_CRITICAL( cmplx4,  div, kmp_cmplx32,     /,  8c,   1 )            // __kmpc_atomic_cmplx4_div
1149 #endif // USE_CMPXCHG_FIX
1150 
1151 ATOMIC_CRITICAL( cmplx8,  add, kmp_cmplx64,     +, 16c,   1 )            // __kmpc_atomic_cmplx8_add
1152 ATOMIC_CRITICAL( cmplx8,  sub, kmp_cmplx64,     -, 16c,   1 )            // __kmpc_atomic_cmplx8_sub
1153 ATOMIC_CRITICAL( cmplx8,  mul, kmp_cmplx64,     *, 16c,   1 )            // __kmpc_atomic_cmplx8_mul
1154 ATOMIC_CRITICAL( cmplx8,  div, kmp_cmplx64,     /, 16c,   1 )            // __kmpc_atomic_cmplx8_div
1155 ATOMIC_CRITICAL( cmplx10, add, kmp_cmplx80,     +, 20c,   1 )            // __kmpc_atomic_cmplx10_add
1156 ATOMIC_CRITICAL( cmplx10, sub, kmp_cmplx80,     -, 20c,   1 )            // __kmpc_atomic_cmplx10_sub
1157 ATOMIC_CRITICAL( cmplx10, mul, kmp_cmplx80,     *, 20c,   1 )            // __kmpc_atomic_cmplx10_mul
1158 ATOMIC_CRITICAL( cmplx10, div, kmp_cmplx80,     /, 20c,   1 )            // __kmpc_atomic_cmplx10_div
1159 #if KMP_HAVE_QUAD
1160 ATOMIC_CRITICAL( cmplx16, add, CPLX128_LEG,     +, 32c,   1 )            // __kmpc_atomic_cmplx16_add
1161 ATOMIC_CRITICAL( cmplx16, sub, CPLX128_LEG,     -, 32c,   1 )            // __kmpc_atomic_cmplx16_sub
1162 ATOMIC_CRITICAL( cmplx16, mul, CPLX128_LEG,     *, 32c,   1 )            // __kmpc_atomic_cmplx16_mul
1163 ATOMIC_CRITICAL( cmplx16, div, CPLX128_LEG,     /, 32c,   1 )            // __kmpc_atomic_cmplx16_div
1164 #if ( KMP_ARCH_X86 )
1165     ATOMIC_CRITICAL( cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1 )   // __kmpc_atomic_cmplx16_add_a16
1166     ATOMIC_CRITICAL( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 )   // __kmpc_atomic_cmplx16_sub_a16
1167     ATOMIC_CRITICAL( cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1 )   // __kmpc_atomic_cmplx16_mul_a16
1168     ATOMIC_CRITICAL( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 )   // __kmpc_atomic_cmplx16_div_a16
1169 #endif
1170 #endif
1171 
1172 #if OMP_40_ENABLED
1173 
1174 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1175 // Supported only on IA-32 architecture and Intel(R) 64
1176 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1177 
1178 // ------------------------------------------------------------------------
1179 // Operation on *lhs, rhs bound by critical section
1180 //     OP     - operator (it's supposed to contain an assignment)
1181 //     LCK_ID - lock identifier
1182 // Note: don't check gtid as it should always be valid
1183 // 1, 2-byte - expect valid parameter, other - check before this macro
1184 #define OP_CRITICAL_REV(OP,LCK_ID) \
1185     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
1186                                                                           \
1187     (*lhs) = (rhs) OP (*lhs);                                             \
1188                                                                           \
1189     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1190 
1191 #ifdef KMP_GOMP_COMPAT
1192 #define OP_GOMP_CRITICAL_REV(OP,FLAG)                                     \
1193     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
1194         KMP_CHECK_GTID;                                                   \
1195         OP_CRITICAL_REV( OP, 0 );                                         \
1196         return;                                                           \
1197     }
1198 #else
1199 #define OP_GOMP_CRITICAL_REV(OP,FLAG)
1200 #endif /* KMP_GOMP_COMPAT */
1201 
1202 
1203 // Beginning of a definition (provides name, parameters, gebug trace)
1204 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1205 //     OP_ID   - operation identifier (add, sub, mul, ...)
1206 //     TYPE    - operands' type
1207 #define ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
1208 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \
1209 {                                                                                         \
1210     KMP_DEBUG_ASSERT( __kmp_init_serial );                                                \
1211     KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid ));
1212 
1213 // ------------------------------------------------------------------------
1214 // Operation on *lhs, rhs using "compare_and_store" routine
1215 //     TYPE    - operands' type
1216 //     BITS    - size in bits, used to distinguish low level calls
1217 //     OP      - operator
1218 // Note: temp_val introduced in order to force the compiler to read
1219 //       *lhs only once (w/o it the compiler reads *lhs twice)
1220 #define OP_CMPXCHG_REV(TYPE,BITS,OP)                                      \
1221     {                                                                     \
1222         TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
1223         TYPE old_value, new_value;                                        \
1224         temp_val = *lhs;                                                  \
1225         old_value = temp_val;                                             \
1226         new_value = rhs OP old_value;                                     \
1227         while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1228                       *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \
1229                       *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \
1230         {                                                                 \
1231             KMP_DO_PAUSE;                                                 \
1232                                                                           \
1233             temp_val = *lhs;                                              \
1234             old_value = temp_val;                                         \
1235             new_value = rhs OP old_value;                                 \
1236         }                                                                 \
1237     }
1238 
1239 // -------------------------------------------------------------------------
1240 #define ATOMIC_CMPXCHG_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,GOMP_FLAG)   \
1241 ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void)                                 \
1242     OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG)                                    \
1243     OP_CMPXCHG_REV(TYPE,BITS,OP)                                          \
1244 }
1245 
1246 // ------------------------------------------------------------------------
1247 // Entries definition for integer operands
1248 //     TYPE_ID - operands type and size (fixed4, float4)
1249 //     OP_ID   - operation identifier (add, sub, mul, ...)
1250 //     TYPE    - operand type
1251 //     BITS    - size in bits, used to distinguish low level calls
1252 //     OP      - operator (used in critical section)
1253 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1254 
1255 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,LCK_ID,GOMP_FLAG
1256 // ------------------------------------------------------------------------
1257 // Routines for ATOMIC integer operands, other operators
1258 // ------------------------------------------------------------------------
1259 //                  TYPE_ID,OP_ID, TYPE,    BITS, OP, LCK_ID, GOMP_FLAG
1260 ATOMIC_CMPXCHG_REV( fixed1,  div, kmp_int8,    8, /,  1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_div_rev
1261 ATOMIC_CMPXCHG_REV( fixed1u, div, kmp_uint8,   8, /,  1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_div_rev
1262 ATOMIC_CMPXCHG_REV( fixed1,  shl, kmp_int8,    8, <<, 1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shl_rev
1263 ATOMIC_CMPXCHG_REV( fixed1,  shr, kmp_int8,    8, >>, 1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shr_rev
1264 ATOMIC_CMPXCHG_REV( fixed1u, shr, kmp_uint8,   8, >>, 1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_shr_rev
1265 ATOMIC_CMPXCHG_REV( fixed1,  sub, kmp_int8,    8, -,  1i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_sub_rev
1266 
1267 ATOMIC_CMPXCHG_REV( fixed2,  div, kmp_int16,  16, /,  2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_div_rev
1268 ATOMIC_CMPXCHG_REV( fixed2u, div, kmp_uint16, 16, /,  2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_div_rev
1269 ATOMIC_CMPXCHG_REV( fixed2,  shl, kmp_int16,  16, <<, 2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shl_rev
1270 ATOMIC_CMPXCHG_REV( fixed2,  shr, kmp_int16,  16, >>, 2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shr_rev
1271 ATOMIC_CMPXCHG_REV( fixed2u, shr, kmp_uint16, 16, >>, 2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_shr_rev
1272 ATOMIC_CMPXCHG_REV( fixed2,  sub, kmp_int16,  16, -,  2i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_sub_rev
1273 
1274 ATOMIC_CMPXCHG_REV( fixed4,  div, kmp_int32,  32, /,  4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_div_rev
1275 ATOMIC_CMPXCHG_REV( fixed4u, div, kmp_uint32, 32, /,  4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_div_rev
1276 ATOMIC_CMPXCHG_REV( fixed4,  shl, kmp_int32,  32, <<, 4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shl_rev
1277 ATOMIC_CMPXCHG_REV( fixed4,  shr, kmp_int32,  32, >>, 4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shr_rev
1278 ATOMIC_CMPXCHG_REV( fixed4u, shr, kmp_uint32, 32, >>, 4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_shr_rev
1279 ATOMIC_CMPXCHG_REV( fixed4,  sub, kmp_int32,  32, -,  4i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_sub_rev
1280 
1281 ATOMIC_CMPXCHG_REV( fixed8,  div, kmp_int64,  64, /,  8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_div_rev
1282 ATOMIC_CMPXCHG_REV( fixed8u, div, kmp_uint64, 64, /,  8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_div_rev
1283 ATOMIC_CMPXCHG_REV( fixed8,  shl, kmp_int64,  64, <<, 8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shl_rev
1284 ATOMIC_CMPXCHG_REV( fixed8,  shr, kmp_int64,  64, >>, 8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shr_rev
1285 ATOMIC_CMPXCHG_REV( fixed8u, shr, kmp_uint64, 64, >>, 8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_shr_rev
1286 ATOMIC_CMPXCHG_REV( fixed8,  sub, kmp_int64,  64, -,  8i, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_sub_rev
1287 
1288 ATOMIC_CMPXCHG_REV( float4,  div, kmp_real32, 32, /,  4r, KMP_ARCH_X86 )  // __kmpc_atomic_float4_div_rev
1289 ATOMIC_CMPXCHG_REV( float4,  sub, kmp_real32, 32, -,  4r, KMP_ARCH_X86 )  // __kmpc_atomic_float4_sub_rev
1290 
1291 ATOMIC_CMPXCHG_REV( float8,  div, kmp_real64, 64, /,  8r, KMP_ARCH_X86 )  // __kmpc_atomic_float8_div_rev
1292 ATOMIC_CMPXCHG_REV( float8,  sub, kmp_real64, 64, -,  8r, KMP_ARCH_X86 )  // __kmpc_atomic_float8_sub_rev
1293 //                  TYPE_ID,OP_ID, TYPE,     BITS,OP,LCK_ID, GOMP_FLAG
1294 
1295 // ------------------------------------------------------------------------
1296 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1297 //     TYPE_ID, OP_ID, TYPE - detailed above
1298 //     OP      - operator
1299 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1300 #define ATOMIC_CRITICAL_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)           \
1301 ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void)                                     \
1302     OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG)                                        \
1303     OP_CRITICAL_REV(OP,LCK_ID)                                                \
1304 }
1305 
1306 /* ------------------------------------------------------------------------- */
1307 // routines for long double type
1308 ATOMIC_CRITICAL_REV( float10, sub, long double,     -, 10r,   1 )            // __kmpc_atomic_float10_sub_rev
1309 ATOMIC_CRITICAL_REV( float10, div, long double,     /, 10r,   1 )            // __kmpc_atomic_float10_div_rev
1310 #if KMP_HAVE_QUAD
1311 // routines for _Quad type
1312 ATOMIC_CRITICAL_REV( float16, sub, QUAD_LEGACY,     -, 16r,   1 )            // __kmpc_atomic_float16_sub_rev
1313 ATOMIC_CRITICAL_REV( float16, div, QUAD_LEGACY,     /, 16r,   1 )            // __kmpc_atomic_float16_div_rev
1314 #if ( KMP_ARCH_X86 )
1315     ATOMIC_CRITICAL_REV( float16, sub_a16, Quad_a16_t, -, 16r, 1 )           // __kmpc_atomic_float16_sub_a16_rev
1316     ATOMIC_CRITICAL_REV( float16, div_a16, Quad_a16_t, /, 16r, 1 )           // __kmpc_atomic_float16_div_a16_rev
1317 #endif
1318 #endif
1319 
1320 // routines for complex types
1321 ATOMIC_CRITICAL_REV( cmplx4,  sub, kmp_cmplx32,     -, 8c,    1 )            // __kmpc_atomic_cmplx4_sub_rev
1322 ATOMIC_CRITICAL_REV( cmplx4,  div, kmp_cmplx32,     /, 8c,    1 )            // __kmpc_atomic_cmplx4_div_rev
1323 ATOMIC_CRITICAL_REV( cmplx8,  sub, kmp_cmplx64,     -, 16c,   1 )            // __kmpc_atomic_cmplx8_sub_rev
1324 ATOMIC_CRITICAL_REV( cmplx8,  div, kmp_cmplx64,     /, 16c,   1 )            // __kmpc_atomic_cmplx8_div_rev
1325 ATOMIC_CRITICAL_REV( cmplx10, sub, kmp_cmplx80,     -, 20c,   1 )            // __kmpc_atomic_cmplx10_sub_rev
1326 ATOMIC_CRITICAL_REV( cmplx10, div, kmp_cmplx80,     /, 20c,   1 )            // __kmpc_atomic_cmplx10_div_rev
1327 #if KMP_HAVE_QUAD
1328 ATOMIC_CRITICAL_REV( cmplx16, sub, CPLX128_LEG,     -, 32c,   1 )            // __kmpc_atomic_cmplx16_sub_rev
1329 ATOMIC_CRITICAL_REV( cmplx16, div, CPLX128_LEG,     /, 32c,   1 )            // __kmpc_atomic_cmplx16_div_rev
1330 #if ( KMP_ARCH_X86 )
1331     ATOMIC_CRITICAL_REV( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 )   // __kmpc_atomic_cmplx16_sub_a16_rev
1332     ATOMIC_CRITICAL_REV( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 )   // __kmpc_atomic_cmplx16_div_a16_rev
1333 #endif
1334 #endif
1335 
1336 
1337 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
1338 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1339 
1340 #endif //OMP_40_ENABLED
1341 
1342 
1343 /* ------------------------------------------------------------------------ */
1344 /* Routines for mixed types of LHS and RHS, when RHS is "larger"            */
1345 /* Note: in order to reduce the total number of types combinations          */
1346 /*       it is supposed that compiler converts RHS to longest floating type,*/
1347 /*       that is _Quad, before call to any of these routines                */
1348 /* Conversion to _Quad will be done by the compiler during calculation,     */
1349 /*    conversion back to TYPE - before the assignment, like:                */
1350 /*    *lhs = (TYPE)( (_Quad)(*lhs) OP rhs )                                 */
1351 /* Performance penalty expected because of SW emulation use                 */
1352 /* ------------------------------------------------------------------------ */
1353 
1354 #define ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                             \
1355 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( ident_t *id_ref, int gtid, TYPE * lhs, RTYPE rhs ) \
1356 {                                                                                                       \
1357     KMP_DEBUG_ASSERT( __kmp_init_serial );                                                              \
1358     KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", gtid ));
1359 
1360 // -------------------------------------------------------------------------
1361 #define ATOMIC_CRITICAL_FP(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG)         \
1362 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                       \
1363     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)  /* send assignment */                              \
1364     OP_CRITICAL(OP##=,LCK_ID)  /* send assignment */                                      \
1365 }
1366 
1367 // -------------------------------------------------------------------------
1368 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1369 // -------------------------------------------------------------------------
1370 // X86 or X86_64: no alignment problems ====================================
1371 #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1372 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                         \
1373     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                       \
1374     OP_CMPXCHG(TYPE,BITS,OP)                                                                \
1375 }
1376 // -------------------------------------------------------------------------
1377 #else
1378 // ------------------------------------------------------------------------
1379 // Code for other architectures that don't handle unaligned accesses.
1380 #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1381 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                         \
1382     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                       \
1383     if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                                            \
1384         OP_CMPXCHG(TYPE,BITS,OP)     /* aligned address */                                  \
1385     } else {                                                                                \
1386         KMP_CHECK_GTID;                                                                     \
1387         OP_CRITICAL(OP##=,LCK_ID)  /* unaligned address - use critical */                   \
1388     }                                                                                       \
1389 }
1390 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1391 
1392 // -------------------------------------------------------------------------
1393 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1394 // -------------------------------------------------------------------------
1395 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1396 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                         \
1397     OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG)                                                       \
1398     OP_CMPXCHG_REV(TYPE,BITS,OP)                                                                \
1399 }
1400 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG)           \
1401 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                         \
1402     OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG)                                        \
1403     OP_CRITICAL_REV(OP,LCK_ID)                                                \
1404 }
1405 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1406 
1407 // RHS=float8
1408 ATOMIC_CMPXCHG_MIX( fixed1, char,       mul,  8, *, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_float8
1409 ATOMIC_CMPXCHG_MIX( fixed1, char,       div,  8, /, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_float8
1410 ATOMIC_CMPXCHG_MIX( fixed2, short,      mul, 16, *, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_float8
1411 ATOMIC_CMPXCHG_MIX( fixed2, short,      div, 16, /, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_float8
1412 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32,  mul, 32, *, float8, kmp_real64, 4i, 3, 0 )            // __kmpc_atomic_fixed4_mul_float8
1413 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32,  div, 32, /, float8, kmp_real64, 4i, 3, 0 )            // __kmpc_atomic_fixed4_div_float8
1414 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64,  mul, 64, *, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_float8
1415 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64,  div, 64, /, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_float8
1416 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_float8
1417 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_float8
1418 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_float8
1419 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_float8
1420 
1421 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them)
1422 #if KMP_HAVE_QUAD
1423 ATOMIC_CMPXCHG_MIX( fixed1,  char,       add,  8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_fp
1424 ATOMIC_CMPXCHG_MIX( fixed1u, uchar,      add,  8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_add_fp
1425 ATOMIC_CMPXCHG_MIX( fixed1,  char,       sub,  8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_fp
1426 ATOMIC_CMPXCHG_MIX( fixed1u, uchar,      sub,  8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_sub_fp
1427 ATOMIC_CMPXCHG_MIX( fixed1,  char,       mul,  8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_fp
1428 ATOMIC_CMPXCHG_MIX( fixed1u, uchar,      mul,  8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_mul_fp
1429 ATOMIC_CMPXCHG_MIX( fixed1,  char,       div,  8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_fp
1430 ATOMIC_CMPXCHG_MIX( fixed1u, uchar,      div,  8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_fp
1431 
1432 ATOMIC_CMPXCHG_MIX( fixed2,  short,      add, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_fp
1433 ATOMIC_CMPXCHG_MIX( fixed2u, ushort,     add, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_add_fp
1434 ATOMIC_CMPXCHG_MIX( fixed2,  short,      sub, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_fp
1435 ATOMIC_CMPXCHG_MIX( fixed2u, ushort,     sub, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_sub_fp
1436 ATOMIC_CMPXCHG_MIX( fixed2,  short,      mul, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_fp
1437 ATOMIC_CMPXCHG_MIX( fixed2u, ushort,     mul, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_mul_fp
1438 ATOMIC_CMPXCHG_MIX( fixed2,  short,      div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_fp
1439 ATOMIC_CMPXCHG_MIX( fixed2u, ushort,     div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_fp
1440 
1441 ATOMIC_CMPXCHG_MIX( fixed4,  kmp_int32,  add, 32, +, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_add_fp
1442 ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4u_add_fp
1443 ATOMIC_CMPXCHG_MIX( fixed4,  kmp_int32,  sub, 32, -, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_sub_fp
1444 ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4u_sub_fp
1445 ATOMIC_CMPXCHG_MIX( fixed4,  kmp_int32,  mul, 32, *, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_mul_fp
1446 ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4u_mul_fp
1447 ATOMIC_CMPXCHG_MIX( fixed4,  kmp_int32,  div, 32, /, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_div_fp
1448 ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4u_div_fp
1449 
1450 ATOMIC_CMPXCHG_MIX( fixed8,  kmp_int64,  add, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_fp
1451 ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_add_fp
1452 ATOMIC_CMPXCHG_MIX( fixed8,  kmp_int64,  sub, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_fp
1453 ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_sub_fp
1454 ATOMIC_CMPXCHG_MIX( fixed8,  kmp_int64,  mul, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_fp
1455 ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_mul_fp
1456 ATOMIC_CMPXCHG_MIX( fixed8,  kmp_int64,  div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_fp
1457 ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_fp
1458 
1459 ATOMIC_CMPXCHG_MIX( float4,  kmp_real32, add, 32, +, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_fp
1460 ATOMIC_CMPXCHG_MIX( float4,  kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_fp
1461 ATOMIC_CMPXCHG_MIX( float4,  kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_fp
1462 ATOMIC_CMPXCHG_MIX( float4,  kmp_real32, div, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_fp
1463 
1464 ATOMIC_CMPXCHG_MIX( float8,  kmp_real64, add, 64, +, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_fp
1465 ATOMIC_CMPXCHG_MIX( float8,  kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_fp
1466 ATOMIC_CMPXCHG_MIX( float8,  kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_fp
1467 ATOMIC_CMPXCHG_MIX( float8,  kmp_real64, div, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_fp
1468 
1469 ATOMIC_CRITICAL_FP( float10, long double,    add, +, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_add_fp
1470 ATOMIC_CRITICAL_FP( float10, long double,    sub, -, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_sub_fp
1471 ATOMIC_CRITICAL_FP( float10, long double,    mul, *, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_mul_fp
1472 ATOMIC_CRITICAL_FP( float10, long double,    div, /, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_div_fp
1473 
1474 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1475 // Reverse operations
1476 ATOMIC_CMPXCHG_REV_MIX( fixed1,  char,       sub_rev,  8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_rev_fp
1477 ATOMIC_CMPXCHG_REV_MIX( fixed1u, uchar,      sub_rev,  8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_sub_rev_fp
1478 ATOMIC_CMPXCHG_REV_MIX( fixed1,  char,       div_rev,  8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_rev_fp
1479 ATOMIC_CMPXCHG_REV_MIX( fixed1u, uchar,      div_rev,  8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_rev_fp
1480 
1481 ATOMIC_CMPXCHG_REV_MIX( fixed2,  short,      sub_rev, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_rev_fp
1482 ATOMIC_CMPXCHG_REV_MIX( fixed2u, ushort,     sub_rev, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_sub_rev_fp
1483 ATOMIC_CMPXCHG_REV_MIX( fixed2,  short,      div_rev, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_rev_fp
1484 ATOMIC_CMPXCHG_REV_MIX( fixed2u, ushort,     div_rev, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_rev_fp
1485 
1486 ATOMIC_CMPXCHG_REV_MIX( fixed4,  kmp_int32,  sub_rev, 32, -, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_sub_rev_fp
1487 ATOMIC_CMPXCHG_REV_MIX( fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4u_sub_rev_fp
1488 ATOMIC_CMPXCHG_REV_MIX( fixed4,  kmp_int32,  div_rev, 32, /, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_div_rev_fp
1489 ATOMIC_CMPXCHG_REV_MIX( fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4u_div_rev_fp
1490 
1491 ATOMIC_CMPXCHG_REV_MIX( fixed8,  kmp_int64,  sub_rev, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_rev_fp
1492 ATOMIC_CMPXCHG_REV_MIX( fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_sub_rev_fp
1493 ATOMIC_CMPXCHG_REV_MIX( fixed8,  kmp_int64,  div_rev, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_rev_fp
1494 ATOMIC_CMPXCHG_REV_MIX( fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_rev_fp
1495 
1496 ATOMIC_CMPXCHG_REV_MIX( float4,  kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_rev_fp
1497 ATOMIC_CMPXCHG_REV_MIX( float4,  kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_rev_fp
1498 
1499 ATOMIC_CMPXCHG_REV_MIX( float8,  kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_rev_fp
1500 ATOMIC_CMPXCHG_REV_MIX( float8,  kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_rev_fp
1501 
1502 ATOMIC_CRITICAL_REV_FP( float10, long double,    sub_rev, -, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_sub_rev_fp
1503 ATOMIC_CRITICAL_REV_FP( float10, long double,    div_rev, /, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_div_rev_fp
1504 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1505 
1506 #endif
1507 
1508 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1509 // ------------------------------------------------------------------------
1510 // X86 or X86_64: no alignment problems ====================================
1511 #if USE_CMPXCHG_FIX
1512 // workaround for C78287 (complex(kind=4) data type)
1513 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1514 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                           \
1515     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                         \
1516     OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP)                                                       \
1517 }
1518 // end of the second part of the workaround for C78287
1519 #else
1520 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1521 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                           \
1522     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                         \
1523     OP_CMPXCHG(TYPE,BITS,OP)                                                                  \
1524 }
1525 #endif // USE_CMPXCHG_FIX
1526 #else
1527 // ------------------------------------------------------------------------
1528 // Code for other architectures that don't handle unaligned accesses.
1529 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \
1530 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE)                                           \
1531     OP_GOMP_CRITICAL(OP##=,GOMP_FLAG)                                                         \
1532     if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) {                                              \
1533         OP_CMPXCHG(TYPE,BITS,OP)     /* aligned address */                                    \
1534     } else {                                                                                  \
1535         KMP_CHECK_GTID;                                                                       \
1536         OP_CRITICAL(OP##=,LCK_ID)  /* unaligned address - use critical */                     \
1537     }                                                                                         \
1538 }
1539 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1540 
1541 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, add, 64, +, cmplx8,  kmp_cmplx64,  8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_add_cmplx8
1542 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, sub, 64, -, cmplx8,  kmp_cmplx64,  8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_sub_cmplx8
1543 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, mul, 64, *, cmplx8,  kmp_cmplx64,  8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_mul_cmplx8
1544 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, div, 64, /, cmplx8,  kmp_cmplx64,  8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_div_cmplx8
1545 
1546 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1547 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1548 
1549 //////////////////////////////////////////////////////////////////////////////////////////////////////
1550 // ------------------------------------------------------------------------
1551 // Atomic READ routines
1552 // ------------------------------------------------------------------------
1553 
1554 // ------------------------------------------------------------------------
1555 // Beginning of a definition (provides name, parameters, gebug trace)
1556 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1557 //     OP_ID   - operation identifier (add, sub, mul, ...)
1558 //     TYPE    - operands' type
1559 #define ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE, RET_TYPE) \
1560 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * loc ) \
1561 {                                                                                   \
1562     KMP_DEBUG_ASSERT( __kmp_init_serial );                                          \
1563     KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1564 
1565 // ------------------------------------------------------------------------
1566 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1567 //     TYPE    - operands' type
1568 //     BITS    - size in bits, used to distinguish low level calls
1569 //     OP      - operator
1570 // Note: temp_val introduced in order to force the compiler to read
1571 //       *lhs only once (w/o it the compiler reads *lhs twice)
1572 // TODO: check if it is still necessary
1573 // Return old value regardless of the result of "compare & swap# operation
1574 
1575 #define OP_CMPXCHG_READ(TYPE,BITS,OP)                                     \
1576     {                                                                     \
1577         TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
1578         union f_i_union {                                                 \
1579             TYPE f_val;                                                   \
1580             kmp_int##BITS i_val;                                          \
1581         };                                                                \
1582         union f_i_union old_value;                                        \
1583         temp_val = *loc;                                                  \
1584         old_value.f_val = temp_val;                                       \
1585         old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( (kmp_int##BITS *) loc, \
1586                       *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val,   \
1587                       *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val ); \
1588         new_value = old_value.f_val;                                      \
1589         return new_value;                                                 \
1590     }
1591 
1592 // -------------------------------------------------------------------------
1593 // Operation on *lhs, rhs bound by critical section
1594 //     OP     - operator (it's supposed to contain an assignment)
1595 //     LCK_ID - lock identifier
1596 // Note: don't check gtid as it should always be valid
1597 // 1, 2-byte - expect valid parameter, other - check before this macro
1598 #define OP_CRITICAL_READ(OP,LCK_ID)                                       \
1599     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                    \
1600                                                                           \
1601     new_value = (*loc);                                                   \
1602                                                                           \
1603     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1604 
1605 // -------------------------------------------------------------------------
1606 #ifdef KMP_GOMP_COMPAT
1607 #define OP_GOMP_CRITICAL_READ(OP,FLAG)                                    \
1608     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
1609         KMP_CHECK_GTID;                                                   \
1610         OP_CRITICAL_READ( OP, 0 );                                        \
1611         return new_value;                                                 \
1612     }
1613 #else
1614 #define OP_GOMP_CRITICAL_READ(OP,FLAG)
1615 #endif /* KMP_GOMP_COMPAT */
1616 
1617 // -------------------------------------------------------------------------
1618 #define ATOMIC_FIXED_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)           \
1619 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE)                                \
1620     TYPE new_value;                                                       \
1621     OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG)                                \
1622     new_value = KMP_TEST_THEN_ADD##BITS( loc, OP 0 );                     \
1623     return new_value;                                                     \
1624 }
1625 // -------------------------------------------------------------------------
1626 #define ATOMIC_CMPXCHG_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)         \
1627 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE)                                \
1628     TYPE new_value;                                                       \
1629     OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG)                                \
1630     OP_CMPXCHG_READ(TYPE,BITS,OP)                                         \
1631 }
1632 // ------------------------------------------------------------------------
1633 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1634 //     TYPE_ID, OP_ID, TYPE - detailed above
1635 //     OP      - operator
1636 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1637 #define ATOMIC_CRITICAL_READ(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)      \
1638 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE)                                \
1639     TYPE new_value;                                                       \
1640     OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG)  /* send assignment */         \
1641     OP_CRITICAL_READ(OP,LCK_ID)          /* send assignment */            \
1642     return new_value;                                                     \
1643 }
1644 
1645 // ------------------------------------------------------------------------
1646 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return value doesn't work.
1647 // Let's return the read value through the additional parameter.
1648 
1649 #if ( KMP_OS_WINDOWS )
1650 
1651 #define OP_CRITICAL_READ_WRK(OP,LCK_ID)                                   \
1652     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                    \
1653                                                                           \
1654     (*out) = (*loc);                                                      \
1655                                                                           \
1656     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
1657 // ------------------------------------------------------------------------
1658 #ifdef KMP_GOMP_COMPAT
1659 #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG)                                \
1660     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
1661         KMP_CHECK_GTID;                                                   \
1662         OP_CRITICAL_READ_WRK( OP, 0 );                                    \
1663     }
1664 #else
1665 #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG)
1666 #endif /* KMP_GOMP_COMPAT */
1667 // ------------------------------------------------------------------------
1668 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \
1669 void __kmpc_atomic_##TYPE_ID##_##OP_ID( TYPE * out, ident_t *id_ref, int gtid, TYPE * loc ) \
1670 {                                                                                   \
1671     KMP_DEBUG_ASSERT( __kmp_init_serial );                                          \
1672     KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1673 
1674 // ------------------------------------------------------------------------
1675 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)      \
1676 ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE)                                     \
1677     OP_GOMP_CRITICAL_READ_WRK(OP##=,GOMP_FLAG)  /* send assignment */         \
1678     OP_CRITICAL_READ_WRK(OP,LCK_ID)          /* send assignment */            \
1679 }
1680 
1681 #endif // KMP_OS_WINDOWS
1682 
1683 // ------------------------------------------------------------------------
1684 //                  TYPE_ID,OP_ID, TYPE,      OP, GOMP_FLAG
1685 ATOMIC_FIXED_READ( fixed4, rd, kmp_int32,  32, +, 0            )      // __kmpc_atomic_fixed4_rd
1686 ATOMIC_FIXED_READ( fixed8, rd, kmp_int64,  64, +, KMP_ARCH_X86 )      // __kmpc_atomic_fixed8_rd
1687 ATOMIC_CMPXCHG_READ( float4, rd, kmp_real32, 32, +, KMP_ARCH_X86 )    // __kmpc_atomic_float4_rd
1688 ATOMIC_CMPXCHG_READ( float8, rd, kmp_real64, 64, +, KMP_ARCH_X86 )    // __kmpc_atomic_float8_rd
1689 
1690 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
1691 ATOMIC_CMPXCHG_READ( fixed1,  rd, kmp_int8,    8, +,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_rd
1692 ATOMIC_CMPXCHG_READ( fixed2,  rd, kmp_int16,  16, +,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_rd
1693 
1694 ATOMIC_CRITICAL_READ( float10, rd, long double, +, 10r,   1 )         // __kmpc_atomic_float10_rd
1695 #if KMP_HAVE_QUAD
1696 ATOMIC_CRITICAL_READ( float16, rd, QUAD_LEGACY, +, 16r,   1 )         // __kmpc_atomic_float16_rd
1697 #endif // KMP_HAVE_QUAD
1698 
1699 // Fix for CQ220361 on Windows* OS
1700 #if ( KMP_OS_WINDOWS )
1701     ATOMIC_CRITICAL_READ_WRK( cmplx4,  rd, kmp_cmplx32, +,  8c, 1 )   // __kmpc_atomic_cmplx4_rd
1702 #else
1703     ATOMIC_CRITICAL_READ( cmplx4,  rd, kmp_cmplx32, +,  8c, 1 )       // __kmpc_atomic_cmplx4_rd
1704 #endif
1705 ATOMIC_CRITICAL_READ( cmplx8,  rd, kmp_cmplx64, +, 16c, 1 )           // __kmpc_atomic_cmplx8_rd
1706 ATOMIC_CRITICAL_READ( cmplx10, rd, kmp_cmplx80, +, 20c, 1 )           // __kmpc_atomic_cmplx10_rd
1707 #if KMP_HAVE_QUAD
1708 ATOMIC_CRITICAL_READ( cmplx16, rd, CPLX128_LEG, +, 32c, 1 )           // __kmpc_atomic_cmplx16_rd
1709 #if ( KMP_ARCH_X86 )
1710     ATOMIC_CRITICAL_READ( float16, a16_rd, Quad_a16_t, +, 16r, 1 )         // __kmpc_atomic_float16_a16_rd
1711     ATOMIC_CRITICAL_READ( cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_rd
1712 #endif
1713 #endif
1714 
1715 
1716 // ------------------------------------------------------------------------
1717 // Atomic WRITE routines
1718 // ------------------------------------------------------------------------
1719 
1720 #define ATOMIC_XCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)              \
1721 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
1722     OP_GOMP_CRITICAL(OP,GOMP_FLAG)                                        \
1723     KMP_XCHG_FIXED##BITS( lhs, rhs );                                     \
1724 }
1725 // ------------------------------------------------------------------------
1726 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)        \
1727 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
1728     OP_GOMP_CRITICAL(OP,GOMP_FLAG)                                        \
1729     KMP_XCHG_REAL##BITS( lhs, rhs );                                      \
1730 }
1731 
1732 
1733 // ------------------------------------------------------------------------
1734 // Operation on *lhs, rhs using "compare_and_store" routine
1735 //     TYPE    - operands' type
1736 //     BITS    - size in bits, used to distinguish low level calls
1737 //     OP      - operator
1738 // Note: temp_val introduced in order to force the compiler to read
1739 //       *lhs only once (w/o it the compiler reads *lhs twice)
1740 #define OP_CMPXCHG_WR(TYPE,BITS,OP)                                       \
1741     {                                                                     \
1742         TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
1743         TYPE old_value, new_value;                                        \
1744         temp_val = *lhs;                                                  \
1745         old_value = temp_val;                                             \
1746         new_value = rhs;                                                  \
1747         while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1748                       *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \
1749                       *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \
1750         {                                                                 \
1751             KMP_CPU_PAUSE();                                              \
1752                                                                           \
1753             temp_val = *lhs;                                              \
1754             old_value = temp_val;                                         \
1755             new_value = rhs;                                              \
1756         }                                                                 \
1757     }
1758 
1759 // -------------------------------------------------------------------------
1760 #define ATOMIC_CMPXCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)           \
1761 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
1762     OP_GOMP_CRITICAL(OP,GOMP_FLAG)                                        \
1763     OP_CMPXCHG_WR(TYPE,BITS,OP)                                           \
1764 }
1765 
1766 // ------------------------------------------------------------------------
1767 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
1768 //     TYPE_ID, OP_ID, TYPE - detailed above
1769 //     OP      - operator
1770 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
1771 #define ATOMIC_CRITICAL_WR(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)        \
1772 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void)                                     \
1773     OP_GOMP_CRITICAL(OP,GOMP_FLAG)       /* send assignment */            \
1774     OP_CRITICAL(OP,LCK_ID)               /* send assignment */            \
1775 }
1776 // -------------------------------------------------------------------------
1777 
1778 ATOMIC_XCHG_WR( fixed1,  wr, kmp_int8,    8, =,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_wr
1779 ATOMIC_XCHG_WR( fixed2,  wr, kmp_int16,  16, =,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_wr
1780 ATOMIC_XCHG_WR( fixed4,  wr, kmp_int32,  32, =,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_wr
1781 #if ( KMP_ARCH_X86 )
1782     ATOMIC_CMPXCHG_WR( fixed8,  wr, kmp_int64,  64, =,  KMP_ARCH_X86 )      // __kmpc_atomic_fixed8_wr
1783 #else
1784     ATOMIC_XCHG_WR( fixed8,  wr, kmp_int64,  64, =,  KMP_ARCH_X86 )         // __kmpc_atomic_fixed8_wr
1785 #endif
1786 
1787 ATOMIC_XCHG_FLOAT_WR( float4, wr, kmp_real32, 32, =, KMP_ARCH_X86 )         // __kmpc_atomic_float4_wr
1788 #if ( KMP_ARCH_X86 )
1789     ATOMIC_CMPXCHG_WR( float8,  wr, kmp_real64,  64, =,  KMP_ARCH_X86 )     // __kmpc_atomic_float8_wr
1790 #else
1791     ATOMIC_XCHG_FLOAT_WR( float8,  wr, kmp_real64,  64, =,  KMP_ARCH_X86 )  // __kmpc_atomic_float8_wr
1792 #endif
1793 
1794 ATOMIC_CRITICAL_WR( float10, wr, long double, =, 10r,   1 )         // __kmpc_atomic_float10_wr
1795 #if KMP_HAVE_QUAD
1796 ATOMIC_CRITICAL_WR( float16, wr, QUAD_LEGACY, =, 16r,   1 )         // __kmpc_atomic_float16_wr
1797 #endif
1798 ATOMIC_CRITICAL_WR( cmplx4,  wr, kmp_cmplx32, =,  8c,   1 )         // __kmpc_atomic_cmplx4_wr
1799 ATOMIC_CRITICAL_WR( cmplx8,  wr, kmp_cmplx64, =, 16c,   1 )         // __kmpc_atomic_cmplx8_wr
1800 ATOMIC_CRITICAL_WR( cmplx10, wr, kmp_cmplx80, =, 20c,   1 )         // __kmpc_atomic_cmplx10_wr
1801 #if KMP_HAVE_QUAD
1802 ATOMIC_CRITICAL_WR( cmplx16, wr, CPLX128_LEG, =, 32c,   1 )         // __kmpc_atomic_cmplx16_wr
1803 #if ( KMP_ARCH_X86 )
1804     ATOMIC_CRITICAL_WR( float16, a16_wr, Quad_a16_t,         =, 16r, 1 ) // __kmpc_atomic_float16_a16_wr
1805     ATOMIC_CRITICAL_WR( cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_wr
1806 #endif
1807 #endif
1808 
1809 
1810 // ------------------------------------------------------------------------
1811 // Atomic CAPTURE routines
1812 // ------------------------------------------------------------------------
1813 
1814 // Beginning of a definition (provides name, parameters, gebug trace)
1815 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1816 //     OP_ID   - operation identifier (add, sub, mul, ...)
1817 //     TYPE    - operands' type
1818 #define ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,RET_TYPE)                                    \
1819 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ) \
1820 {                                                                                         \
1821     KMP_DEBUG_ASSERT( __kmp_init_serial );                                                \
1822     KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
1823 
1824 // -------------------------------------------------------------------------
1825 // Operation on *lhs, rhs bound by critical section
1826 //     OP     - operator (it's supposed to contain an assignment)
1827 //     LCK_ID - lock identifier
1828 // Note: don't check gtid as it should always be valid
1829 // 1, 2-byte - expect valid parameter, other - check before this macro
1830 #define OP_CRITICAL_CPT(OP,LCK_ID)                                        \
1831     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
1832                                                                           \
1833     if( flag ) {                                                          \
1834         (*lhs) OP rhs;                                                    \
1835         new_value = (*lhs);                                               \
1836     } else {                                                              \
1837         new_value = (*lhs);                                               \
1838         (*lhs) OP rhs;                                                    \
1839     }                                                                     \
1840                                                                           \
1841     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
1842     return new_value;
1843 
1844 // ------------------------------------------------------------------------
1845 #ifdef KMP_GOMP_COMPAT
1846 #define OP_GOMP_CRITICAL_CPT(OP,FLAG)                                     \
1847     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
1848         KMP_CHECK_GTID;                                                   \
1849         OP_CRITICAL_CPT( OP##=, 0 );                                      \
1850     }
1851 #else
1852 #define OP_GOMP_CRITICAL_CPT(OP,FLAG)
1853 #endif /* KMP_GOMP_COMPAT */
1854 
1855 // ------------------------------------------------------------------------
1856 // Operation on *lhs, rhs using "compare_and_store" routine
1857 //     TYPE    - operands' type
1858 //     BITS    - size in bits, used to distinguish low level calls
1859 //     OP      - operator
1860 // Note: temp_val introduced in order to force the compiler to read
1861 //       *lhs only once (w/o it the compiler reads *lhs twice)
1862 #define OP_CMPXCHG_CPT(TYPE,BITS,OP)                                      \
1863     {                                                                     \
1864         TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
1865         TYPE old_value, new_value;                                        \
1866         temp_val = *lhs;                                                  \
1867         old_value = temp_val;                                             \
1868         new_value = old_value OP rhs;                                     \
1869         while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
1870                       *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \
1871                       *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \
1872         {                                                                 \
1873             KMP_CPU_PAUSE();                                              \
1874                                                                           \
1875             temp_val = *lhs;                                              \
1876             old_value = temp_val;                                         \
1877             new_value = old_value OP rhs;                                 \
1878         }                                                                 \
1879         if( flag ) {                                                      \
1880             return new_value;                                             \
1881         } else                                                            \
1882             return old_value;                                             \
1883     }
1884 
1885 // -------------------------------------------------------------------------
1886 #define ATOMIC_CMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)           \
1887 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                  \
1888     TYPE new_value;                                                        \
1889     OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG)                                     \
1890     OP_CMPXCHG_CPT(TYPE,BITS,OP)                                           \
1891 }
1892 
1893 // -------------------------------------------------------------------------
1894 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)         \
1895 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                  \
1896     TYPE old_value, new_value;                                             \
1897     OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG)                                     \
1898     /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */      \
1899     old_value = KMP_TEST_THEN_ADD##BITS( lhs, OP rhs );                    \
1900     if( flag ) {                                                           \
1901         return old_value OP rhs;                                           \
1902     } else                                                                 \
1903         return old_value;                                                  \
1904 }
1905 // -------------------------------------------------------------------------
1906 
1907 ATOMIC_FIXED_ADD_CPT( fixed4, add_cpt, kmp_int32,  32, +, 0            )  // __kmpc_atomic_fixed4_add_cpt
1908 ATOMIC_FIXED_ADD_CPT( fixed4, sub_cpt, kmp_int32,  32, -, 0            )  // __kmpc_atomic_fixed4_sub_cpt
1909 ATOMIC_FIXED_ADD_CPT( fixed8, add_cpt, kmp_int64,  64, +, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_add_cpt
1910 ATOMIC_FIXED_ADD_CPT( fixed8, sub_cpt, kmp_int64,  64, -, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_sub_cpt
1911 
1912 ATOMIC_CMPXCHG_CPT( float4, add_cpt, kmp_real32, 32, +, KMP_ARCH_X86 )  // __kmpc_atomic_float4_add_cpt
1913 ATOMIC_CMPXCHG_CPT( float4, sub_cpt, kmp_real32, 32, -, KMP_ARCH_X86 )  // __kmpc_atomic_float4_sub_cpt
1914 ATOMIC_CMPXCHG_CPT( float8, add_cpt, kmp_real64, 64, +, KMP_ARCH_X86 )  // __kmpc_atomic_float8_add_cpt
1915 ATOMIC_CMPXCHG_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 )  // __kmpc_atomic_float8_sub_cpt
1916 
1917 // ------------------------------------------------------------------------
1918 // Entries definition for integer operands
1919 //     TYPE_ID - operands type and size (fixed4, float4)
1920 //     OP_ID   - operation identifier (add, sub, mul, ...)
1921 //     TYPE    - operand type
1922 //     BITS    - size in bits, used to distinguish low level calls
1923 //     OP      - operator (used in critical section)
1924 //               TYPE_ID,OP_ID,  TYPE,   BITS,OP,GOMP_FLAG
1925 // ------------------------------------------------------------------------
1926 // Routines for ATOMIC integer operands, other operators
1927 // ------------------------------------------------------------------------
1928 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
1929 ATOMIC_CMPXCHG_CPT( fixed1,  add_cpt, kmp_int8,    8, +,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_add_cpt
1930 ATOMIC_CMPXCHG_CPT( fixed1, andb_cpt, kmp_int8,    8, &,  0            )  // __kmpc_atomic_fixed1_andb_cpt
1931 ATOMIC_CMPXCHG_CPT( fixed1,  div_cpt, kmp_int8,    8, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_div_cpt
1932 ATOMIC_CMPXCHG_CPT( fixed1u, div_cpt, kmp_uint8,   8, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_div_cpt
1933 ATOMIC_CMPXCHG_CPT( fixed1,  mul_cpt, kmp_int8,    8, *,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_mul_cpt
1934 ATOMIC_CMPXCHG_CPT( fixed1,  orb_cpt, kmp_int8,    8, |,  0            )  // __kmpc_atomic_fixed1_orb_cpt
1935 ATOMIC_CMPXCHG_CPT( fixed1,  shl_cpt, kmp_int8,    8, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shl_cpt
1936 ATOMIC_CMPXCHG_CPT( fixed1,  shr_cpt, kmp_int8,    8, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shr_cpt
1937 ATOMIC_CMPXCHG_CPT( fixed1u, shr_cpt, kmp_uint8,   8, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_shr_cpt
1938 ATOMIC_CMPXCHG_CPT( fixed1,  sub_cpt, kmp_int8,    8, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_sub_cpt
1939 ATOMIC_CMPXCHG_CPT( fixed1,  xor_cpt, kmp_int8,    8, ^,  0            )  // __kmpc_atomic_fixed1_xor_cpt
1940 ATOMIC_CMPXCHG_CPT( fixed2,  add_cpt, kmp_int16,  16, +,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_add_cpt
1941 ATOMIC_CMPXCHG_CPT( fixed2, andb_cpt, kmp_int16,  16, &,  0            )  // __kmpc_atomic_fixed2_andb_cpt
1942 ATOMIC_CMPXCHG_CPT( fixed2,  div_cpt, kmp_int16,  16, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_div_cpt
1943 ATOMIC_CMPXCHG_CPT( fixed2u, div_cpt, kmp_uint16, 16, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_div_cpt
1944 ATOMIC_CMPXCHG_CPT( fixed2,  mul_cpt, kmp_int16,  16, *,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_mul_cpt
1945 ATOMIC_CMPXCHG_CPT( fixed2,  orb_cpt, kmp_int16,  16, |,  0            )  // __kmpc_atomic_fixed2_orb_cpt
1946 ATOMIC_CMPXCHG_CPT( fixed2,  shl_cpt, kmp_int16,  16, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shl_cpt
1947 ATOMIC_CMPXCHG_CPT( fixed2,  shr_cpt, kmp_int16,  16, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shr_cpt
1948 ATOMIC_CMPXCHG_CPT( fixed2u, shr_cpt, kmp_uint16, 16, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_shr_cpt
1949 ATOMIC_CMPXCHG_CPT( fixed2,  sub_cpt, kmp_int16,  16, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_sub_cpt
1950 ATOMIC_CMPXCHG_CPT( fixed2,  xor_cpt, kmp_int16,  16, ^,  0            )  // __kmpc_atomic_fixed2_xor_cpt
1951 ATOMIC_CMPXCHG_CPT( fixed4, andb_cpt, kmp_int32,  32, &,  0            )  // __kmpc_atomic_fixed4_andb_cpt
1952 ATOMIC_CMPXCHG_CPT( fixed4,  div_cpt, kmp_int32,  32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_div_cpt
1953 ATOMIC_CMPXCHG_CPT( fixed4u, div_cpt, kmp_uint32, 32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_div_cpt
1954 ATOMIC_CMPXCHG_CPT( fixed4,  mul_cpt, kmp_int32,  32, *,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_mul_cpt
1955 ATOMIC_CMPXCHG_CPT( fixed4,  orb_cpt, kmp_int32,  32, |,  0            )  // __kmpc_atomic_fixed4_orb_cpt
1956 ATOMIC_CMPXCHG_CPT( fixed4,  shl_cpt, kmp_int32,  32, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shl_cpt
1957 ATOMIC_CMPXCHG_CPT( fixed4,  shr_cpt, kmp_int32,  32, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shr_cpt
1958 ATOMIC_CMPXCHG_CPT( fixed4u, shr_cpt, kmp_uint32, 32, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_shr_cpt
1959 ATOMIC_CMPXCHG_CPT( fixed4,  xor_cpt, kmp_int32,  32, ^,  0            )  // __kmpc_atomic_fixed4_xor_cpt
1960 ATOMIC_CMPXCHG_CPT( fixed8, andb_cpt, kmp_int64,  64, &,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_andb_cpt
1961 ATOMIC_CMPXCHG_CPT( fixed8,  div_cpt, kmp_int64,  64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_div_cpt
1962 ATOMIC_CMPXCHG_CPT( fixed8u, div_cpt, kmp_uint64, 64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_div_cpt
1963 ATOMIC_CMPXCHG_CPT( fixed8,  mul_cpt, kmp_int64,  64, *,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_mul_cpt
1964 ATOMIC_CMPXCHG_CPT( fixed8,  orb_cpt, kmp_int64,  64, |,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_orb_cpt
1965 ATOMIC_CMPXCHG_CPT( fixed8,  shl_cpt, kmp_int64,  64, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shl_cpt
1966 ATOMIC_CMPXCHG_CPT( fixed8,  shr_cpt, kmp_int64,  64, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shr_cpt
1967 ATOMIC_CMPXCHG_CPT( fixed8u, shr_cpt, kmp_uint64, 64, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_shr_cpt
1968 ATOMIC_CMPXCHG_CPT( fixed8,  xor_cpt, kmp_int64,  64, ^,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_xor_cpt
1969 ATOMIC_CMPXCHG_CPT( float4,  div_cpt, kmp_real32, 32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_float4_div_cpt
1970 ATOMIC_CMPXCHG_CPT( float4,  mul_cpt, kmp_real32, 32, *,  KMP_ARCH_X86 )  // __kmpc_atomic_float4_mul_cpt
1971 ATOMIC_CMPXCHG_CPT( float8,  div_cpt, kmp_real64, 64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_float8_div_cpt
1972 ATOMIC_CMPXCHG_CPT( float8,  mul_cpt, kmp_real64, 64, *,  KMP_ARCH_X86 )  // __kmpc_atomic_float8_mul_cpt
1973 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
1974 
1975 //////////////////////////////////
1976 
1977 // CAPTURE routines for mixed types RHS=float16
1978 #if KMP_HAVE_QUAD
1979 
1980 // Beginning of a definition (provides name, parameters, gebug trace)
1981 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
1982 //     OP_ID   - operation identifier (add, sub, mul, ...)
1983 //     TYPE    - operands' type
1984 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID,OP_ID,TYPE,RTYPE_ID,RTYPE)       \
1985 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( ident_t *id_ref, int gtid, TYPE * lhs, RTYPE rhs, int flag ) \
1986 {                                                                                         \
1987     KMP_DEBUG_ASSERT( __kmp_init_serial );                                                \
1988     KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", gtid ));
1989 
1990 // -------------------------------------------------------------------------
1991 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG)       \
1992 ATOMIC_BEGIN_CPT_MIX(TYPE_ID,OP_ID,TYPE,RTYPE_ID,RTYPE)                    \
1993     TYPE new_value;                                                        \
1994     OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG)                                     \
1995     OP_CMPXCHG_CPT(TYPE,BITS,OP)                                           \
1996 }
1997 
1998 // -------------------------------------------------------------------------
1999 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG)         \
2000 ATOMIC_BEGIN_CPT_MIX(TYPE_ID,OP_ID,TYPE,RTYPE_ID,RTYPE)                    \
2001     TYPE new_value;                                                        \
2002     OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG)  /* send assignment */                              \
2003     OP_CRITICAL_CPT(OP##=,LCK_ID)  /* send assignment */                                      \
2004 }
2005 
2006 ATOMIC_CMPXCHG_CPT_MIX( fixed1,  char,       add_cpt,  8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_cpt_fp
2007 ATOMIC_CMPXCHG_CPT_MIX( fixed1u, uchar,      add_cpt,  8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_add_cpt_fp
2008 ATOMIC_CMPXCHG_CPT_MIX( fixed1,  char,       sub_cpt,  8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt_fp
2009 ATOMIC_CMPXCHG_CPT_MIX( fixed1u, uchar,      sub_cpt,  8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_sub_cpt_fp
2010 ATOMIC_CMPXCHG_CPT_MIX( fixed1,  char,       mul_cpt,  8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_cpt_fp
2011 ATOMIC_CMPXCHG_CPT_MIX( fixed1u, uchar,      mul_cpt,  8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_mul_cpt_fp
2012 ATOMIC_CMPXCHG_CPT_MIX( fixed1,  char,       div_cpt,  8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt_fp
2013 ATOMIC_CMPXCHG_CPT_MIX( fixed1u, uchar,      div_cpt,  8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt_fp
2014 
2015 ATOMIC_CMPXCHG_CPT_MIX( fixed2,  short,      add_cpt, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_cpt_fp
2016 ATOMIC_CMPXCHG_CPT_MIX( fixed2u, ushort,     add_cpt, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_add_cpt_fp
2017 ATOMIC_CMPXCHG_CPT_MIX( fixed2,  short,      sub_cpt, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt_fp
2018 ATOMIC_CMPXCHG_CPT_MIX( fixed2u, ushort,     sub_cpt, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_sub_cpt_fp
2019 ATOMIC_CMPXCHG_CPT_MIX( fixed2,  short,      mul_cpt, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_cpt_fp
2020 ATOMIC_CMPXCHG_CPT_MIX( fixed2u, ushort,     mul_cpt, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_mul_cpt_fp
2021 ATOMIC_CMPXCHG_CPT_MIX( fixed2,  short,      div_cpt, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt_fp
2022 ATOMIC_CMPXCHG_CPT_MIX( fixed2u, ushort,     div_cpt, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt_fp
2023 
2024 ATOMIC_CMPXCHG_CPT_MIX( fixed4,  kmp_int32,  add_cpt, 32, +, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_add_cpt_fp
2025 ATOMIC_CMPXCHG_CPT_MIX( fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4u_add_cpt_fp
2026 ATOMIC_CMPXCHG_CPT_MIX( fixed4,  kmp_int32,  sub_cpt, 32, -, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_sub_cpt_fp
2027 ATOMIC_CMPXCHG_CPT_MIX( fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4u_sub_cpt_fp
2028 ATOMIC_CMPXCHG_CPT_MIX( fixed4,  kmp_int32,  mul_cpt, 32, *, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_mul_cpt_fp
2029 ATOMIC_CMPXCHG_CPT_MIX( fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4u_mul_cpt_fp
2030 ATOMIC_CMPXCHG_CPT_MIX( fixed4,  kmp_int32,  div_cpt, 32, /, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_div_cpt_fp
2031 ATOMIC_CMPXCHG_CPT_MIX( fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4u_div_cpt_fp
2032 
2033 ATOMIC_CMPXCHG_CPT_MIX( fixed8,  kmp_int64,  add_cpt, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_cpt_fp
2034 ATOMIC_CMPXCHG_CPT_MIX( fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_add_cpt_fp
2035 ATOMIC_CMPXCHG_CPT_MIX( fixed8,  kmp_int64,  sub_cpt, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt_fp
2036 ATOMIC_CMPXCHG_CPT_MIX( fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_sub_cpt_fp
2037 ATOMIC_CMPXCHG_CPT_MIX( fixed8,  kmp_int64,  mul_cpt, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_cpt_fp
2038 ATOMIC_CMPXCHG_CPT_MIX( fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_mul_cpt_fp
2039 ATOMIC_CMPXCHG_CPT_MIX( fixed8,  kmp_int64,  div_cpt, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt_fp
2040 ATOMIC_CMPXCHG_CPT_MIX( fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt_fp
2041 
2042 ATOMIC_CMPXCHG_CPT_MIX( float4,  kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_cpt_fp
2043 ATOMIC_CMPXCHG_CPT_MIX( float4,  kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt_fp
2044 ATOMIC_CMPXCHG_CPT_MIX( float4,  kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_cpt_fp
2045 ATOMIC_CMPXCHG_CPT_MIX( float4,  kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt_fp
2046 
2047 ATOMIC_CMPXCHG_CPT_MIX( float8,  kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_cpt_fp
2048 ATOMIC_CMPXCHG_CPT_MIX( float8,  kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt_fp
2049 ATOMIC_CMPXCHG_CPT_MIX( float8,  kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_cpt_fp
2050 ATOMIC_CMPXCHG_CPT_MIX( float8,  kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt_fp
2051 
2052 ATOMIC_CRITICAL_CPT_MIX( float10, long double, add_cpt, +, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_add_cpt_fp
2053 ATOMIC_CRITICAL_CPT_MIX( float10, long double, sub_cpt, -, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_sub_cpt_fp
2054 ATOMIC_CRITICAL_CPT_MIX( float10, long double, mul_cpt, *, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_mul_cpt_fp
2055 ATOMIC_CRITICAL_CPT_MIX( float10, long double, div_cpt, /, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_div_cpt_fp
2056 
2057 #endif //KMP_HAVE_QUAD
2058 
2059 ///////////////////////////////////
2060 
2061 // ------------------------------------------------------------------------
2062 // Routines for C/C++ Reduction operators && and ||
2063 // ------------------------------------------------------------------------
2064 
2065 // -------------------------------------------------------------------------
2066 // Operation on *lhs, rhs bound by critical section
2067 //     OP     - operator (it's supposed to contain an assignment)
2068 //     LCK_ID - lock identifier
2069 // Note: don't check gtid as it should always be valid
2070 // 1, 2-byte - expect valid parameter, other - check before this macro
2071 #define OP_CRITICAL_L_CPT(OP,LCK_ID)                                      \
2072     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                    \
2073                                                                           \
2074     if( flag ) {                                                          \
2075         new_value OP rhs;                                                 \
2076     } else                                                                \
2077         new_value = (*lhs);                                               \
2078                                                                           \
2079     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );
2080 
2081 // ------------------------------------------------------------------------
2082 #ifdef KMP_GOMP_COMPAT
2083 #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG)                                   \
2084     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
2085         KMP_CHECK_GTID;                                                   \
2086         OP_CRITICAL_L_CPT( OP, 0 );                                       \
2087         return new_value;                                                 \
2088     }
2089 #else
2090 #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG)
2091 #endif /* KMP_GOMP_COMPAT */
2092 
2093 // ------------------------------------------------------------------------
2094 // Need separate macros for &&, || because there is no combined assignment
2095 #define ATOMIC_CMPX_L_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)           \
2096 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                 \
2097     TYPE new_value;                                                       \
2098     OP_GOMP_CRITICAL_L_CPT( = *lhs OP, GOMP_FLAG )                        \
2099     OP_CMPXCHG_CPT(TYPE,BITS,OP)                                          \
2100 }
2101 
2102 ATOMIC_CMPX_L_CPT( fixed1, andl_cpt, char,       8, &&, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_andl_cpt
2103 ATOMIC_CMPX_L_CPT( fixed1,  orl_cpt, char,       8, ||, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_orl_cpt
2104 ATOMIC_CMPX_L_CPT( fixed2, andl_cpt, short,     16, &&, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_andl_cpt
2105 ATOMIC_CMPX_L_CPT( fixed2,  orl_cpt, short,     16, ||, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_orl_cpt
2106 ATOMIC_CMPX_L_CPT( fixed4, andl_cpt, kmp_int32, 32, &&, 0 )             // __kmpc_atomic_fixed4_andl_cpt
2107 ATOMIC_CMPX_L_CPT( fixed4,  orl_cpt, kmp_int32, 32, ||, 0 )             // __kmpc_atomic_fixed4_orl_cpt
2108 ATOMIC_CMPX_L_CPT( fixed8, andl_cpt, kmp_int64, 64, &&, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_andl_cpt
2109 ATOMIC_CMPX_L_CPT( fixed8,  orl_cpt, kmp_int64, 64, ||, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_orl_cpt
2110 
2111 
2112 // -------------------------------------------------------------------------
2113 // Routines for Fortran operators that matched no one in C:
2114 // MAX, MIN, .EQV., .NEQV.
2115 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2116 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2117 // -------------------------------------------------------------------------
2118 
2119 // -------------------------------------------------------------------------
2120 // MIN and MAX need separate macros
2121 // OP - operator to check if we need any actions?
2122 #define MIN_MAX_CRITSECT_CPT(OP,LCK_ID)                                    \
2123     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                     \
2124                                                                            \
2125     if ( *lhs OP rhs ) {                 /* still need actions? */         \
2126         old_value = *lhs;                                                  \
2127         *lhs = rhs;                                                        \
2128         if ( flag )                                                        \
2129             new_value = rhs;                                               \
2130         else                                                               \
2131             new_value = old_value;                                         \
2132     }                                                                      \
2133     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );                     \
2134     return new_value;                                                      \
2135 
2136 // -------------------------------------------------------------------------
2137 #ifdef KMP_GOMP_COMPAT
2138 #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG)                                 \
2139     if (( FLAG ) && ( __kmp_atomic_mode == 2 )) {                          \
2140         KMP_CHECK_GTID;                                                    \
2141         MIN_MAX_CRITSECT_CPT( OP, 0 );                                     \
2142     }
2143 #else
2144 #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG)
2145 #endif /* KMP_GOMP_COMPAT */
2146 
2147 // -------------------------------------------------------------------------
2148 #define MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP)                                  \
2149     {                                                                      \
2150         TYPE KMP_ATOMIC_VOLATILE temp_val;                                 \
2151         /*TYPE old_value; */                                               \
2152         temp_val = *lhs;                                                   \
2153         old_value = temp_val;                                              \
2154         while ( old_value OP rhs &&          /* still need actions? */     \
2155             ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs,      \
2156                       *VOLATILE_CAST(kmp_int##BITS *) &old_value,          \
2157                       *VOLATILE_CAST(kmp_int##BITS *) &rhs ) )             \
2158         {                                                                  \
2159             KMP_CPU_PAUSE();                                               \
2160             temp_val = *lhs;                                               \
2161             old_value = temp_val;                                          \
2162         }                                                                  \
2163         if( flag )                                                         \
2164             return rhs;                                                    \
2165         else                                                               \
2166             return old_value;                                              \
2167     }
2168 
2169 // -------------------------------------------------------------------------
2170 // 1-byte, 2-byte operands - use critical section
2171 #define MIN_MAX_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)       \
2172 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                  \
2173     TYPE new_value, old_value;                                             \
2174     if ( *lhs OP rhs ) {     /* need actions? */                           \
2175         GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG)                            \
2176         MIN_MAX_CRITSECT_CPT(OP,LCK_ID)                                    \
2177     }                                                                      \
2178     return *lhs;                                                           \
2179 }
2180 
2181 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)         \
2182 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                  \
2183     TYPE new_value, old_value;                                             \
2184     if ( *lhs OP rhs ) {                                                   \
2185         GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG)                            \
2186         MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP)                                  \
2187     }                                                                      \
2188     return *lhs;                                                           \
2189 }
2190 
2191 
2192 MIN_MAX_COMPXCHG_CPT( fixed1,  max_cpt, char,        8, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max_cpt
2193 MIN_MAX_COMPXCHG_CPT( fixed1,  min_cpt, char,        8, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min_cpt
2194 MIN_MAX_COMPXCHG_CPT( fixed2,  max_cpt, short,      16, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max_cpt
2195 MIN_MAX_COMPXCHG_CPT( fixed2,  min_cpt, short,      16, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min_cpt
2196 MIN_MAX_COMPXCHG_CPT( fixed4,  max_cpt, kmp_int32,  32, <, 0 )            // __kmpc_atomic_fixed4_max_cpt
2197 MIN_MAX_COMPXCHG_CPT( fixed4,  min_cpt, kmp_int32,  32, >, 0 )            // __kmpc_atomic_fixed4_min_cpt
2198 MIN_MAX_COMPXCHG_CPT( fixed8,  max_cpt, kmp_int64,  64, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max_cpt
2199 MIN_MAX_COMPXCHG_CPT( fixed8,  min_cpt, kmp_int64,  64, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min_cpt
2200 MIN_MAX_COMPXCHG_CPT( float4,  max_cpt, kmp_real32, 32, <, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max_cpt
2201 MIN_MAX_COMPXCHG_CPT( float4,  min_cpt, kmp_real32, 32, >, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min_cpt
2202 MIN_MAX_COMPXCHG_CPT( float8,  max_cpt, kmp_real64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max_cpt
2203 MIN_MAX_COMPXCHG_CPT( float8,  min_cpt, kmp_real64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min_cpt
2204 #if KMP_HAVE_QUAD
2205 MIN_MAX_CRITICAL_CPT( float16, max_cpt, QUAD_LEGACY,    <, 16r,   1 )     // __kmpc_atomic_float16_max_cpt
2206 MIN_MAX_CRITICAL_CPT( float16, min_cpt, QUAD_LEGACY,    >, 16r,   1 )     // __kmpc_atomic_float16_min_cpt
2207 #if ( KMP_ARCH_X86 )
2208     MIN_MAX_CRITICAL_CPT( float16, max_a16_cpt, Quad_a16_t, <, 16r,  1 )  // __kmpc_atomic_float16_max_a16_cpt
2209     MIN_MAX_CRITICAL_CPT( float16, min_a16_cpt, Quad_a16_t, >, 16r,  1 )  // __kmpc_atomic_float16_mix_a16_cpt
2210 #endif
2211 #endif
2212 
2213 // ------------------------------------------------------------------------
2214 #ifdef KMP_GOMP_COMPAT
2215 #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG)                                 \
2216     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
2217         KMP_CHECK_GTID;                                                   \
2218         OP_CRITICAL_CPT( OP, 0 );                                         \
2219     }
2220 #else
2221 #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG)
2222 #endif /* KMP_GOMP_COMPAT */
2223 // ------------------------------------------------------------------------
2224 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)         \
2225 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                 \
2226     TYPE new_value;                                                       \
2227     OP_GOMP_CRITICAL_EQV_CPT(^=~,GOMP_FLAG)  /* send assignment */        \
2228     OP_CMPXCHG_CPT(TYPE,BITS,OP)                                          \
2229 }
2230 
2231 // ------------------------------------------------------------------------
2232 
2233 ATOMIC_CMPXCHG_CPT(  fixed1, neqv_cpt, kmp_int8,   8,   ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv_cpt
2234 ATOMIC_CMPXCHG_CPT(  fixed2, neqv_cpt, kmp_int16, 16,   ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv_cpt
2235 ATOMIC_CMPXCHG_CPT(  fixed4, neqv_cpt, kmp_int32, 32,   ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv_cpt
2236 ATOMIC_CMPXCHG_CPT(  fixed8, neqv_cpt, kmp_int64, 64,   ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv_cpt
2237 ATOMIC_CMPX_EQV_CPT( fixed1, eqv_cpt,  kmp_int8,   8,  ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv_cpt
2238 ATOMIC_CMPX_EQV_CPT( fixed2, eqv_cpt,  kmp_int16, 16,  ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv_cpt
2239 ATOMIC_CMPX_EQV_CPT( fixed4, eqv_cpt,  kmp_int32, 32,  ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv_cpt
2240 ATOMIC_CMPX_EQV_CPT( fixed8, eqv_cpt,  kmp_int64, 64,  ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv_cpt
2241 
2242 // ------------------------------------------------------------------------
2243 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2244 //     TYPE_ID, OP_ID, TYPE - detailed above
2245 //     OP      - operator
2246 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2247 #define ATOMIC_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2248 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                           \
2249     TYPE new_value;                                                 \
2250     OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG)  /* send assignment */       \
2251     OP_CRITICAL_CPT(OP##=,LCK_ID)          /* send assignment */    \
2252 }
2253 
2254 // ------------------------------------------------------------------------
2255 
2256 // Workaround for cmplx4. Regular routines with return value don't work
2257 // on Win_32e. Let's return captured values through the additional parameter.
2258 #define OP_CRITICAL_CPT_WRK(OP,LCK_ID)                                    \
2259     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
2260                                                                           \
2261     if( flag ) {                                                          \
2262         (*lhs) OP rhs;                                                    \
2263         (*out) = (*lhs);                                                  \
2264     } else {                                                              \
2265         (*out) = (*lhs);                                                  \
2266         (*lhs) OP rhs;                                                    \
2267     }                                                                     \
2268                                                                           \
2269     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
2270     return;
2271 // ------------------------------------------------------------------------
2272 
2273 #ifdef KMP_GOMP_COMPAT
2274 #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG)                                 \
2275     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
2276         KMP_CHECK_GTID;                                                   \
2277         OP_CRITICAL_CPT_WRK( OP##=, 0 );                                  \
2278     }
2279 #else
2280 #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG)
2281 #endif /* KMP_GOMP_COMPAT */
2282 // ------------------------------------------------------------------------
2283 
2284 #define ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE)                              \
2285 void __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out, int flag ) \
2286 {                                                                         \
2287     KMP_DEBUG_ASSERT( __kmp_init_serial );                                \
2288     KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid ));
2289 // ------------------------------------------------------------------------
2290 
2291 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)   \
2292 ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE)                                      \
2293     OP_GOMP_CRITICAL_CPT_WRK(OP,GOMP_FLAG)                                \
2294     OP_CRITICAL_CPT_WRK(OP##=,LCK_ID)                                     \
2295 }
2296 // The end of workaround for cmplx4
2297 
2298 /* ------------------------------------------------------------------------- */
2299 // routines for long double type
2300 ATOMIC_CRITICAL_CPT( float10, add_cpt, long double,     +, 10r,   1 )            // __kmpc_atomic_float10_add_cpt
2301 ATOMIC_CRITICAL_CPT( float10, sub_cpt, long double,     -, 10r,   1 )            // __kmpc_atomic_float10_sub_cpt
2302 ATOMIC_CRITICAL_CPT( float10, mul_cpt, long double,     *, 10r,   1 )            // __kmpc_atomic_float10_mul_cpt
2303 ATOMIC_CRITICAL_CPT( float10, div_cpt, long double,     /, 10r,   1 )            // __kmpc_atomic_float10_div_cpt
2304 #if KMP_HAVE_QUAD
2305 // routines for _Quad type
2306 ATOMIC_CRITICAL_CPT( float16, add_cpt, QUAD_LEGACY,     +, 16r,   1 )            // __kmpc_atomic_float16_add_cpt
2307 ATOMIC_CRITICAL_CPT( float16, sub_cpt, QUAD_LEGACY,     -, 16r,   1 )            // __kmpc_atomic_float16_sub_cpt
2308 ATOMIC_CRITICAL_CPT( float16, mul_cpt, QUAD_LEGACY,     *, 16r,   1 )            // __kmpc_atomic_float16_mul_cpt
2309 ATOMIC_CRITICAL_CPT( float16, div_cpt, QUAD_LEGACY,     /, 16r,   1 )            // __kmpc_atomic_float16_div_cpt
2310 #if ( KMP_ARCH_X86 )
2311     ATOMIC_CRITICAL_CPT( float16, add_a16_cpt, Quad_a16_t, +, 16r,  1 )          // __kmpc_atomic_float16_add_a16_cpt
2312     ATOMIC_CRITICAL_CPT( float16, sub_a16_cpt, Quad_a16_t, -, 16r,  1 )          // __kmpc_atomic_float16_sub_a16_cpt
2313     ATOMIC_CRITICAL_CPT( float16, mul_a16_cpt, Quad_a16_t, *, 16r,  1 )          // __kmpc_atomic_float16_mul_a16_cpt
2314     ATOMIC_CRITICAL_CPT( float16, div_a16_cpt, Quad_a16_t, /, 16r,  1 )          // __kmpc_atomic_float16_div_a16_cpt
2315 #endif
2316 #endif
2317 
2318 // routines for complex types
2319 
2320 // cmplx4 routines to return void
2321 ATOMIC_CRITICAL_CPT_WRK( cmplx4,  add_cpt, kmp_cmplx32, +, 8c,    1 )            // __kmpc_atomic_cmplx4_add_cpt
2322 ATOMIC_CRITICAL_CPT_WRK( cmplx4,  sub_cpt, kmp_cmplx32, -, 8c,    1 )            // __kmpc_atomic_cmplx4_sub_cpt
2323 ATOMIC_CRITICAL_CPT_WRK( cmplx4,  mul_cpt, kmp_cmplx32, *, 8c,    1 )            // __kmpc_atomic_cmplx4_mul_cpt
2324 ATOMIC_CRITICAL_CPT_WRK( cmplx4,  div_cpt, kmp_cmplx32, /, 8c,    1 )            // __kmpc_atomic_cmplx4_div_cpt
2325 
2326 ATOMIC_CRITICAL_CPT( cmplx8,  add_cpt, kmp_cmplx64, +, 16c,   1 )            // __kmpc_atomic_cmplx8_add_cpt
2327 ATOMIC_CRITICAL_CPT( cmplx8,  sub_cpt, kmp_cmplx64, -, 16c,   1 )            // __kmpc_atomic_cmplx8_sub_cpt
2328 ATOMIC_CRITICAL_CPT( cmplx8,  mul_cpt, kmp_cmplx64, *, 16c,   1 )            // __kmpc_atomic_cmplx8_mul_cpt
2329 ATOMIC_CRITICAL_CPT( cmplx8,  div_cpt, kmp_cmplx64, /, 16c,   1 )            // __kmpc_atomic_cmplx8_div_cpt
2330 ATOMIC_CRITICAL_CPT( cmplx10, add_cpt, kmp_cmplx80, +, 20c,   1 )            // __kmpc_atomic_cmplx10_add_cpt
2331 ATOMIC_CRITICAL_CPT( cmplx10, sub_cpt, kmp_cmplx80, -, 20c,   1 )            // __kmpc_atomic_cmplx10_sub_cpt
2332 ATOMIC_CRITICAL_CPT( cmplx10, mul_cpt, kmp_cmplx80, *, 20c,   1 )            // __kmpc_atomic_cmplx10_mul_cpt
2333 ATOMIC_CRITICAL_CPT( cmplx10, div_cpt, kmp_cmplx80, /, 20c,   1 )            // __kmpc_atomic_cmplx10_div_cpt
2334 #if KMP_HAVE_QUAD
2335 ATOMIC_CRITICAL_CPT( cmplx16, add_cpt, CPLX128_LEG, +, 32c,   1 )            // __kmpc_atomic_cmplx16_add_cpt
2336 ATOMIC_CRITICAL_CPT( cmplx16, sub_cpt, CPLX128_LEG, -, 32c,   1 )            // __kmpc_atomic_cmplx16_sub_cpt
2337 ATOMIC_CRITICAL_CPT( cmplx16, mul_cpt, CPLX128_LEG, *, 32c,   1 )            // __kmpc_atomic_cmplx16_mul_cpt
2338 ATOMIC_CRITICAL_CPT( cmplx16, div_cpt, CPLX128_LEG, /, 32c,   1 )            // __kmpc_atomic_cmplx16_div_cpt
2339 #if ( KMP_ARCH_X86 )
2340     ATOMIC_CRITICAL_CPT( cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,   1 )   // __kmpc_atomic_cmplx16_add_a16_cpt
2341     ATOMIC_CRITICAL_CPT( cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,   1 )   // __kmpc_atomic_cmplx16_sub_a16_cpt
2342     ATOMIC_CRITICAL_CPT( cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,   1 )   // __kmpc_atomic_cmplx16_mul_a16_cpt
2343     ATOMIC_CRITICAL_CPT( cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,   1 )   // __kmpc_atomic_cmplx16_div_a16_cpt
2344 #endif
2345 #endif
2346 
2347 #if OMP_40_ENABLED
2348 
2349 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; }  for non-commutative operations.
2350 // Supported only on IA-32 architecture and Intel(R) 64
2351 
2352 // -------------------------------------------------------------------------
2353 // Operation on *lhs, rhs bound by critical section
2354 //     OP     - operator (it's supposed to contain an assignment)
2355 //     LCK_ID - lock identifier
2356 // Note: don't check gtid as it should always be valid
2357 // 1, 2-byte - expect valid parameter, other - check before this macro
2358 #define OP_CRITICAL_CPT_REV(OP,LCK_ID)                                    \
2359     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
2360                                                                           \
2361     if( flag ) {                                                          \
2362         /*temp_val = (*lhs);*/\
2363         (*lhs) = (rhs) OP (*lhs);                                         \
2364         new_value = (*lhs);                                               \
2365     } else {                                                              \
2366         new_value = (*lhs);\
2367         (*lhs) = (rhs) OP (*lhs);                                         \
2368     }                                                                     \
2369     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
2370     return new_value;
2371 
2372 // ------------------------------------------------------------------------
2373 #ifdef KMP_GOMP_COMPAT
2374 #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG)                                 \
2375     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
2376         KMP_CHECK_GTID;                                                   \
2377         OP_CRITICAL_CPT_REV( OP, 0 );                                     \
2378     }
2379 #else
2380 #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG)
2381 #endif /* KMP_GOMP_COMPAT */
2382 
2383 // ------------------------------------------------------------------------
2384 // Operation on *lhs, rhs using "compare_and_store" routine
2385 //     TYPE    - operands' type
2386 //     BITS    - size in bits, used to distinguish low level calls
2387 //     OP      - operator
2388 // Note: temp_val introduced in order to force the compiler to read
2389 //       *lhs only once (w/o it the compiler reads *lhs twice)
2390 #define OP_CMPXCHG_CPT_REV(TYPE,BITS,OP)                                  \
2391     {                                                                     \
2392         TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
2393         TYPE old_value, new_value;                                        \
2394         temp_val = *lhs;                                                  \
2395         old_value = temp_val;                                             \
2396         new_value = rhs OP old_value;                                     \
2397         while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2398                       *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \
2399                       *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \
2400         {                                                                 \
2401             KMP_CPU_PAUSE();                                              \
2402                                                                           \
2403             temp_val = *lhs;                                              \
2404             old_value = temp_val;                                         \
2405             new_value = rhs OP old_value;                                 \
2406         }                                                                 \
2407         if( flag ) {                                                      \
2408             return new_value;                                             \
2409         } else                                                            \
2410             return old_value;                                             \
2411     }
2412 
2413 // -------------------------------------------------------------------------
2414 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG)       \
2415 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                                  \
2416     TYPE new_value;                                                        \
2417         TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
2418     OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG)                                 \
2419     OP_CMPXCHG_CPT_REV(TYPE,BITS,OP)                                       \
2420 }
2421 
2422 
2423 ATOMIC_CMPXCHG_CPT_REV( fixed1,  div_cpt_rev, kmp_int8,    8, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_div_cpt_rev
2424 ATOMIC_CMPXCHG_CPT_REV( fixed1u, div_cpt_rev, kmp_uint8,   8, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_div_cpt_rev
2425 ATOMIC_CMPXCHG_CPT_REV( fixed1,  shl_cpt_rev, kmp_int8,    8, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shl_cpt_rev
2426 ATOMIC_CMPXCHG_CPT_REV( fixed1,  shr_cpt_rev, kmp_int8,    8, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_shr_cpt_rev
2427 ATOMIC_CMPXCHG_CPT_REV( fixed1u, shr_cpt_rev, kmp_uint8,   8, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1u_shr_cpt_rev
2428 ATOMIC_CMPXCHG_CPT_REV( fixed1,  sub_cpt_rev, kmp_int8,    8, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_sub_cpt_rev
2429 ATOMIC_CMPXCHG_CPT_REV( fixed2,  div_cpt_rev, kmp_int16,  16, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_div_cpt_rev
2430 ATOMIC_CMPXCHG_CPT_REV( fixed2u, div_cpt_rev, kmp_uint16, 16, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_div_cpt_rev
2431 ATOMIC_CMPXCHG_CPT_REV( fixed2,  shl_cpt_rev, kmp_int16,  16, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shl_cpt_rev
2432 ATOMIC_CMPXCHG_CPT_REV( fixed2,  shr_cpt_rev, kmp_int16,  16, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_shr_cpt_rev
2433 ATOMIC_CMPXCHG_CPT_REV( fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2u_shr_cpt_rev
2434 ATOMIC_CMPXCHG_CPT_REV( fixed2,  sub_cpt_rev, kmp_int16,  16, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_sub_cpt_rev
2435 ATOMIC_CMPXCHG_CPT_REV( fixed4,  div_cpt_rev, kmp_int32,  32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_div_cpt_rev
2436 ATOMIC_CMPXCHG_CPT_REV( fixed4u, div_cpt_rev, kmp_uint32, 32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_div_cpt_rev
2437 ATOMIC_CMPXCHG_CPT_REV( fixed4,  shl_cpt_rev, kmp_int32,  32, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shl_cpt_rev
2438 ATOMIC_CMPXCHG_CPT_REV( fixed4,  shr_cpt_rev, kmp_int32,  32, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_shr_cpt_rev
2439 ATOMIC_CMPXCHG_CPT_REV( fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4u_shr_cpt_rev
2440 ATOMIC_CMPXCHG_CPT_REV( fixed4,  sub_cpt_rev, kmp_int32,  32, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_sub_cpt_rev
2441 ATOMIC_CMPXCHG_CPT_REV( fixed8,  div_cpt_rev, kmp_int64,  64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_div_cpt_rev
2442 ATOMIC_CMPXCHG_CPT_REV( fixed8u, div_cpt_rev, kmp_uint64, 64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_div_cpt_rev
2443 ATOMIC_CMPXCHG_CPT_REV( fixed8,  shl_cpt_rev, kmp_int64,  64, <<, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shl_cpt_rev
2444 ATOMIC_CMPXCHG_CPT_REV( fixed8,  shr_cpt_rev, kmp_int64,  64, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_shr_cpt_rev
2445 ATOMIC_CMPXCHG_CPT_REV( fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, KMP_ARCH_X86 )  // __kmpc_atomic_fixed8u_shr_cpt_rev
2446 ATOMIC_CMPXCHG_CPT_REV( fixed8,  sub_cpt_rev, kmp_int64,  64, -,  KMP_ARCH_X86 )  // __kmpc_atomic_fixed8_sub_cpt_rev
2447 ATOMIC_CMPXCHG_CPT_REV( float4,  div_cpt_rev, kmp_real32, 32, /,  KMP_ARCH_X86 )  // __kmpc_atomic_float4_div_cpt_rev
2448 ATOMIC_CMPXCHG_CPT_REV( float4,  sub_cpt_rev, kmp_real32, 32, -,  KMP_ARCH_X86 )  // __kmpc_atomic_float4_sub_cpt_rev
2449 ATOMIC_CMPXCHG_CPT_REV( float8,  div_cpt_rev, kmp_real64, 64, /,  KMP_ARCH_X86 )  // __kmpc_atomic_float8_div_cpt_rev
2450 ATOMIC_CMPXCHG_CPT_REV( float8,  sub_cpt_rev, kmp_real64, 64, -,  KMP_ARCH_X86 )  // __kmpc_atomic_float8_sub_cpt_rev
2451 //              TYPE_ID,OP_ID, TYPE,          OP,  GOMP_FLAG
2452 
2453 
2454 // ------------------------------------------------------------------------
2455 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2456 //     TYPE_ID, OP_ID, TYPE - detailed above
2457 //     OP      - operator
2458 //     LCK_ID  - lock identifier, used to possibly distinguish lock variable
2459 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \
2460 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE)                               \
2461     TYPE new_value;                                                     \
2462         TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
2463     /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/\
2464     OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG)                              \
2465     OP_CRITICAL_CPT_REV(OP,LCK_ID)                                      \
2466 }
2467 
2468 
2469 /* ------------------------------------------------------------------------- */
2470 // routines for long double type
2471 ATOMIC_CRITICAL_CPT_REV( float10, sub_cpt_rev, long double,     -, 10r,   1 )            // __kmpc_atomic_float10_sub_cpt_rev
2472 ATOMIC_CRITICAL_CPT_REV( float10, div_cpt_rev, long double,     /, 10r,   1 )            // __kmpc_atomic_float10_div_cpt_rev
2473 #if KMP_HAVE_QUAD
2474 // routines for _Quad type
2475 ATOMIC_CRITICAL_CPT_REV( float16, sub_cpt_rev, QUAD_LEGACY,     -, 16r,   1 )            // __kmpc_atomic_float16_sub_cpt_rev
2476 ATOMIC_CRITICAL_CPT_REV( float16, div_cpt_rev, QUAD_LEGACY,     /, 16r,   1 )            // __kmpc_atomic_float16_div_cpt_rev
2477 #if ( KMP_ARCH_X86 )
2478     ATOMIC_CRITICAL_CPT_REV( float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,  1 )          // __kmpc_atomic_float16_sub_a16_cpt_rev
2479     ATOMIC_CRITICAL_CPT_REV( float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,  1 )          // __kmpc_atomic_float16_div_a16_cpt_rev
2480 #endif
2481 #endif
2482 
2483 // routines for complex types
2484 
2485 // ------------------------------------------------------------------------
2486 
2487 // Workaround for cmplx4. Regular routines with return value don't work
2488 // on Win_32e. Let's return captured values through the additional parameter.
2489 #define OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID)                                \
2490     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
2491                                                                           \
2492     if( flag ) {                                                          \
2493         (*lhs) = (rhs) OP (*lhs);                                         \
2494         (*out) = (*lhs);                                                  \
2495     } else {                                                              \
2496         (*out) = (*lhs);                                                  \
2497         (*lhs) = (rhs) OP (*lhs);                                         \
2498     }                                                                     \
2499                                                                           \
2500     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
2501     return;
2502 // ------------------------------------------------------------------------
2503 
2504 #ifdef KMP_GOMP_COMPAT
2505 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG)                             \
2506     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
2507         KMP_CHECK_GTID;                                                   \
2508         OP_CRITICAL_CPT_REV_WRK( OP, 0 );                                 \
2509     }
2510 #else
2511 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG)
2512 #endif /* KMP_GOMP_COMPAT */
2513 // ------------------------------------------------------------------------
2514 
2515 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG)   \
2516 ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE)                                          \
2517     OP_GOMP_CRITICAL_CPT_REV_WRK(OP,GOMP_FLAG)                                \
2518     OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID)                                        \
2519 }
2520 // The end of workaround for cmplx4
2521 
2522 
2523 // !!! TODO: check if we need to return void for cmplx4 routines
2524 // cmplx4 routines to return void
2525 ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4,  sub_cpt_rev, kmp_cmplx32, -, 8c,    1 )            // __kmpc_atomic_cmplx4_sub_cpt_rev
2526 ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4,  div_cpt_rev, kmp_cmplx32, /, 8c,    1 )            // __kmpc_atomic_cmplx4_div_cpt_rev
2527 
2528 ATOMIC_CRITICAL_CPT_REV( cmplx8,  sub_cpt_rev, kmp_cmplx64, -, 16c,   1 )            // __kmpc_atomic_cmplx8_sub_cpt_rev
2529 ATOMIC_CRITICAL_CPT_REV( cmplx8,  div_cpt_rev, kmp_cmplx64, /, 16c,   1 )            // __kmpc_atomic_cmplx8_div_cpt_rev
2530 ATOMIC_CRITICAL_CPT_REV( cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,   1 )            // __kmpc_atomic_cmplx10_sub_cpt_rev
2531 ATOMIC_CRITICAL_CPT_REV( cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,   1 )            // __kmpc_atomic_cmplx10_div_cpt_rev
2532 #if KMP_HAVE_QUAD
2533 ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,   1 )            // __kmpc_atomic_cmplx16_sub_cpt_rev
2534 ATOMIC_CRITICAL_CPT_REV( cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,   1 )            // __kmpc_atomic_cmplx16_div_cpt_rev
2535 #if ( KMP_ARCH_X86 )
2536     ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,   1 )   // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
2537     ATOMIC_CRITICAL_CPT_REV( cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,   1 )   // __kmpc_atomic_cmplx16_div_a16_cpt_rev
2538 #endif
2539 #endif
2540 
2541 // Capture reverse for mixed type: RHS=float16
2542 #if KMP_HAVE_QUAD
2543 
2544 // Beginning of a definition (provides name, parameters, gebug trace)
2545 //     TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed)
2546 //     OP_ID   - operation identifier (add, sub, mul, ...)
2547 //     TYPE    - operands' type
2548 // -------------------------------------------------------------------------
2549 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG)       \
2550 ATOMIC_BEGIN_CPT_MIX(TYPE_ID,OP_ID,TYPE,RTYPE_ID,RTYPE)                    \
2551     TYPE new_value;                                                        \
2552     OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG)                                 \
2553     OP_CMPXCHG_CPT_REV(TYPE,BITS,OP)                                       \
2554 }
2555 
2556 // -------------------------------------------------------------------------
2557 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG)         \
2558 ATOMIC_BEGIN_CPT_MIX(TYPE_ID,OP_ID,TYPE,RTYPE_ID,RTYPE)                    \
2559     TYPE new_value;                                                        \
2560     OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG)  /* send assignment */                              \
2561     OP_CRITICAL_CPT_REV(OP,LCK_ID)  /* send assignment */                                      \
2562 }
2563 
2564 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed1,  char,       sub_cpt_rev,  8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
2565 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed1u, uchar,      sub_cpt_rev,  8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
2566 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed1,  char,       div_cpt_rev,  8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt_rev_fp
2567 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed1u, uchar,      div_cpt_rev,  8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
2568 
2569 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed2,  short,      sub_cpt_rev, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
2570 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed2u, ushort,     sub_cpt_rev, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
2571 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed2,  short,      div_cpt_rev, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt_rev_fp
2572 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed2u, ushort,     div_cpt_rev, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
2573 
2574 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed4,  kmp_int32,  sub_cpt_rev, 32, -, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_sub_cpt_rev_fp
2575 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
2576 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed4,  kmp_int32,  div_cpt_rev, 32, /, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4_div_cpt_rev_fp
2577 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad, 4i, 3, 0 )            // __kmpc_atomic_fixed4u_div_cpt_rev_fp
2578 
2579 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed8,  kmp_int64,  sub_cpt_rev, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
2580 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
2581 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed8,  kmp_int64,  div_cpt_rev, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt_rev_fp
2582 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
2583 
2584 ATOMIC_CMPXCHG_CPT_REV_MIX( float4,  kmp_real32, sub_cpt_rev, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt_rev_fp
2585 ATOMIC_CMPXCHG_CPT_REV_MIX( float4,  kmp_real32, div_cpt_rev, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt_rev_fp
2586 
2587 ATOMIC_CMPXCHG_CPT_REV_MIX( float8,  kmp_real64, sub_cpt_rev, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt_rev_fp
2588 ATOMIC_CMPXCHG_CPT_REV_MIX( float8,  kmp_real64, div_cpt_rev, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt_rev_fp
2589 
2590 ATOMIC_CRITICAL_CPT_REV_MIX( float10, long double, sub_cpt_rev, -, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_sub_cpt_rev_fp
2591 ATOMIC_CRITICAL_CPT_REV_MIX( float10, long double, div_cpt_rev, /, fp, _Quad, 10r,   1 )            // __kmpc_atomic_float10_div_cpt_rev_fp
2592 
2593 #endif //KMP_HAVE_QUAD
2594 
2595 
2596 //   OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
2597 
2598 #define ATOMIC_BEGIN_SWP(TYPE_ID,TYPE)                                                    \
2599 TYPE __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs )     \
2600 {                                                                                         \
2601     KMP_DEBUG_ASSERT( __kmp_init_serial );                                                \
2602     KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
2603 
2604 #define CRITICAL_SWP(LCK_ID)                                              \
2605     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
2606                                                                           \
2607     old_value = (*lhs);                                                   \
2608     (*lhs) = rhs;                                                         \
2609                                                                           \
2610     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
2611     return old_value;
2612 
2613 // ------------------------------------------------------------------------
2614 #ifdef KMP_GOMP_COMPAT
2615 #define GOMP_CRITICAL_SWP(FLAG)                                           \
2616     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
2617         KMP_CHECK_GTID;                                                   \
2618         CRITICAL_SWP( 0 );                                                \
2619     }
2620 #else
2621 #define GOMP_CRITICAL_SWP(FLAG)
2622 #endif /* KMP_GOMP_COMPAT */
2623 
2624 
2625 #define ATOMIC_XCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG)                      \
2626 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE)                                            \
2627     TYPE old_value;                                                       \
2628     GOMP_CRITICAL_SWP(GOMP_FLAG)                                          \
2629     old_value = KMP_XCHG_FIXED##BITS( lhs, rhs );                         \
2630     return old_value;                                                     \
2631 }
2632 // ------------------------------------------------------------------------
2633 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG)                \
2634 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE)                                            \
2635     TYPE old_value;                                                       \
2636     GOMP_CRITICAL_SWP(GOMP_FLAG)                                          \
2637     old_value = KMP_XCHG_REAL##BITS( lhs, rhs );                          \
2638     return old_value;                                                     \
2639 }
2640 
2641 // ------------------------------------------------------------------------
2642 #define CMPXCHG_SWP(TYPE,BITS)                                            \
2643     {                                                                     \
2644         TYPE KMP_ATOMIC_VOLATILE temp_val;                                \
2645         TYPE old_value, new_value;                                        \
2646         temp_val = *lhs;                                                  \
2647         old_value = temp_val;                                             \
2648         new_value = rhs;                                                  \
2649         while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \
2650                       *VOLATILE_CAST(kmp_int##BITS *) &old_value,         \
2651                       *VOLATILE_CAST(kmp_int##BITS *) &new_value ) )      \
2652         {                                                                 \
2653             KMP_CPU_PAUSE();                                              \
2654                                                                           \
2655             temp_val = *lhs;                                              \
2656             old_value = temp_val;                                         \
2657             new_value = rhs;                                              \
2658         }                                                                 \
2659         return old_value;                                                 \
2660     }
2661 
2662 // -------------------------------------------------------------------------
2663 #define ATOMIC_CMPXCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG)                   \
2664 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE)                                            \
2665     TYPE old_value;                                                       \
2666     GOMP_CRITICAL_SWP(GOMP_FLAG)                                          \
2667     CMPXCHG_SWP(TYPE,BITS)                                                \
2668 }
2669 
2670 ATOMIC_XCHG_SWP( fixed1, kmp_int8,    8, KMP_ARCH_X86 )  // __kmpc_atomic_fixed1_swp
2671 ATOMIC_XCHG_SWP( fixed2, kmp_int16,  16, KMP_ARCH_X86 )  // __kmpc_atomic_fixed2_swp
2672 ATOMIC_XCHG_SWP( fixed4, kmp_int32,  32, KMP_ARCH_X86 )  // __kmpc_atomic_fixed4_swp
2673 
2674 ATOMIC_XCHG_FLOAT_SWP( float4, kmp_real32, 32, KMP_ARCH_X86 )      // __kmpc_atomic_float4_swp
2675 
2676 #if ( KMP_ARCH_X86 )
2677     ATOMIC_CMPXCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 )      // __kmpc_atomic_fixed8_swp
2678     ATOMIC_CMPXCHG_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 )     // __kmpc_atomic_float8_swp
2679 #else
2680     ATOMIC_XCHG_SWP(       fixed8, kmp_int64, 64, KMP_ARCH_X86 )   // __kmpc_atomic_fixed8_swp
2681     ATOMIC_XCHG_FLOAT_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 )  // __kmpc_atomic_float8_swp
2682 #endif
2683 
2684 // ------------------------------------------------------------------------
2685 // Routines for Extended types: long double, _Quad, complex flavours (use critical section)
2686 #define ATOMIC_CRITICAL_SWP(TYPE_ID,TYPE,LCK_ID,GOMP_FLAG)              \
2687 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE)                                          \
2688     TYPE old_value;                                                     \
2689     GOMP_CRITICAL_SWP(GOMP_FLAG)                                        \
2690     CRITICAL_SWP(LCK_ID)                                                \
2691 }
2692 
2693 // ------------------------------------------------------------------------
2694 
2695 // !!! TODO: check if we need to return void for cmplx4 routines
2696 // Workaround for cmplx4. Regular routines with return value don't work
2697 // on Win_32e. Let's return captured values through the additional parameter.
2698 
2699 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE)                                                \
2700 void __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out )     \
2701 {                                                                                         \
2702     KMP_DEBUG_ASSERT( __kmp_init_serial );                                                \
2703     KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid ));
2704 
2705 
2706 #define CRITICAL_SWP_WRK(LCK_ID)                                          \
2707     __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
2708                                                                           \
2709     tmp = (*lhs);                                                         \
2710     (*lhs) = (rhs);                                                       \
2711     (*out) = tmp;                                                         \
2712     __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid );             \
2713     return;
2714 
2715 // ------------------------------------------------------------------------
2716 
2717 #ifdef KMP_GOMP_COMPAT
2718 #define GOMP_CRITICAL_SWP_WRK(FLAG)                                       \
2719     if ( (FLAG) && (__kmp_atomic_mode == 2) ) {                           \
2720         KMP_CHECK_GTID;                                                   \
2721         CRITICAL_SWP_WRK( 0 );                                            \
2722     }
2723 #else
2724 #define GOMP_CRITICAL_SWP_WRK(FLAG)
2725 #endif /* KMP_GOMP_COMPAT */
2726 // ------------------------------------------------------------------------
2727 
2728 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE,LCK_ID,GOMP_FLAG)           \
2729 ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE)                                        \
2730     TYPE tmp;                                                             \
2731     GOMP_CRITICAL_SWP_WRK(GOMP_FLAG)                                      \
2732     CRITICAL_SWP_WRK(LCK_ID)                                              \
2733 }
2734 // The end of workaround for cmplx4
2735 
2736 
2737 ATOMIC_CRITICAL_SWP( float10, long double, 10r,   1 )              // __kmpc_atomic_float10_swp
2738 #if KMP_HAVE_QUAD
2739 ATOMIC_CRITICAL_SWP( float16, QUAD_LEGACY, 16r,   1 )              // __kmpc_atomic_float16_swp
2740 #endif
2741 // cmplx4 routine to return void
2742 ATOMIC_CRITICAL_SWP_WRK( cmplx4, kmp_cmplx32,  8c,   1 )           // __kmpc_atomic_cmplx4_swp
2743 
2744 //ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32,  8c,   1 )           // __kmpc_atomic_cmplx4_swp
2745 
2746 
2747 ATOMIC_CRITICAL_SWP( cmplx8,  kmp_cmplx64, 16c,   1 )              // __kmpc_atomic_cmplx8_swp
2748 ATOMIC_CRITICAL_SWP( cmplx10, kmp_cmplx80, 20c,   1 )              // __kmpc_atomic_cmplx10_swp
2749 #if KMP_HAVE_QUAD
2750 ATOMIC_CRITICAL_SWP( cmplx16, CPLX128_LEG, 32c,   1 )              // __kmpc_atomic_cmplx16_swp
2751 #if ( KMP_ARCH_X86 )
2752     ATOMIC_CRITICAL_SWP( float16_a16, Quad_a16_t,         16r, 1 )  // __kmpc_atomic_float16_a16_swp
2753     ATOMIC_CRITICAL_SWP( cmplx16_a16, kmp_cmplx128_a16_t, 32c, 1 )  // __kmpc_atomic_cmplx16_a16_swp
2754 #endif
2755 #endif
2756 
2757 
2758 // End of OpenMP 4.0 Capture
2759 
2760 #endif //OMP_40_ENABLED
2761 
2762 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64
2763 
2764 
2765 #undef OP_CRITICAL
2766 
2767 /* ------------------------------------------------------------------------ */
2768 /* Generic atomic routines                                                  */
2769 /* ------------------------------------------------------------------------ */
2770 
2771 void
2772 __kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2773 {
2774     KMP_DEBUG_ASSERT( __kmp_init_serial );
2775 
2776     if (
2777 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2778         FALSE                                   /* must use lock */
2779 #else
2780         TRUE
2781 #endif
2782 	)
2783     {
2784 	kmp_int8 old_value, new_value;
2785 
2786 	old_value = *(kmp_int8 *) lhs;
2787 	(*f)( &new_value, &old_value, rhs );
2788 
2789 	/* TODO: Should this be acquire or release? */
2790 	while ( !  KMP_COMPARE_AND_STORE_ACQ8 ( (kmp_int8 *) lhs,
2791 		    		*(kmp_int8 *) &old_value, *(kmp_int8 *) &new_value ) )
2792 	{
2793 	    KMP_CPU_PAUSE();
2794 
2795 	    old_value = *(kmp_int8 *) lhs;
2796 	    (*f)( &new_value, &old_value, rhs );
2797 	}
2798 
2799 	return;
2800     }
2801     else {
2802         //
2803         // All 1-byte data is of integer data type.
2804         //
2805 
2806 #ifdef KMP_GOMP_COMPAT
2807         if ( __kmp_atomic_mode == 2 ) {
2808             __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2809         }
2810         else
2811 #endif /* KMP_GOMP_COMPAT */
2812 	__kmp_acquire_atomic_lock( & __kmp_atomic_lock_1i, gtid );
2813 
2814 	(*f)( lhs, lhs, rhs );
2815 
2816 #ifdef KMP_GOMP_COMPAT
2817         if ( __kmp_atomic_mode == 2 ) {
2818             __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2819         }
2820         else
2821 #endif /* KMP_GOMP_COMPAT */
2822 	__kmp_release_atomic_lock( & __kmp_atomic_lock_1i, gtid );
2823     }
2824 }
2825 
2826 void
2827 __kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2828 {
2829     if (
2830 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2831         FALSE                                   /* must use lock */
2832 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
2833 	TRUE					/* no alignment problems */
2834 #else
2835 	! ( (kmp_uintptr_t) lhs & 0x1)		/* make sure address is 2-byte aligned */
2836 #endif
2837 	)
2838     {
2839 	kmp_int16 old_value, new_value;
2840 
2841 	old_value = *(kmp_int16 *) lhs;
2842 	(*f)( &new_value, &old_value, rhs );
2843 
2844 	/* TODO: Should this be acquire or release? */
2845 	while ( !  KMP_COMPARE_AND_STORE_ACQ16 ( (kmp_int16 *) lhs,
2846 		    		*(kmp_int16 *) &old_value, *(kmp_int16 *) &new_value ) )
2847 	{
2848 	    KMP_CPU_PAUSE();
2849 
2850 	    old_value = *(kmp_int16 *) lhs;
2851 	    (*f)( &new_value, &old_value, rhs );
2852 	}
2853 
2854 	return;
2855     }
2856     else {
2857         //
2858         // All 2-byte data is of integer data type.
2859         //
2860 
2861 #ifdef KMP_GOMP_COMPAT
2862         if ( __kmp_atomic_mode == 2 ) {
2863             __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2864         }
2865         else
2866 #endif /* KMP_GOMP_COMPAT */
2867 	__kmp_acquire_atomic_lock( & __kmp_atomic_lock_2i, gtid );
2868 
2869 	(*f)( lhs, lhs, rhs );
2870 
2871 #ifdef KMP_GOMP_COMPAT
2872         if ( __kmp_atomic_mode == 2 ) {
2873             __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2874         }
2875         else
2876 #endif /* KMP_GOMP_COMPAT */
2877 	__kmp_release_atomic_lock( & __kmp_atomic_lock_2i, gtid );
2878     }
2879 }
2880 
2881 void
2882 __kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2883 {
2884     KMP_DEBUG_ASSERT( __kmp_init_serial );
2885 
2886     if (
2887         //
2888         // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
2889         // Gomp compatibility is broken if this routine is called for floats.
2890         //
2891 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
2892 	TRUE					/* no alignment problems */
2893 #else
2894 	! ( (kmp_uintptr_t) lhs & 0x3)		/* make sure address is 4-byte aligned */
2895 #endif
2896 	)
2897     {
2898 	kmp_int32 old_value, new_value;
2899 
2900 	old_value = *(kmp_int32 *) lhs;
2901 	(*f)( &new_value, &old_value, rhs );
2902 
2903 	/* TODO: Should this be acquire or release? */
2904 	while ( !  KMP_COMPARE_AND_STORE_ACQ32 ( (kmp_int32 *) lhs,
2905 		    		*(kmp_int32 *) &old_value, *(kmp_int32 *) &new_value ) )
2906 	{
2907 	    KMP_CPU_PAUSE();
2908 
2909 	    old_value = *(kmp_int32 *) lhs;
2910 	    (*f)( &new_value, &old_value, rhs );
2911 	}
2912 
2913 	return;
2914     }
2915     else {
2916         //
2917         // Use __kmp_atomic_lock_4i for all 4-byte data,
2918         // even if it isn't of integer data type.
2919         //
2920 
2921 #ifdef KMP_GOMP_COMPAT
2922         if ( __kmp_atomic_mode == 2 ) {
2923             __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2924         }
2925         else
2926 #endif /* KMP_GOMP_COMPAT */
2927 	__kmp_acquire_atomic_lock( & __kmp_atomic_lock_4i, gtid );
2928 
2929 	(*f)( lhs, lhs, rhs );
2930 
2931 #ifdef KMP_GOMP_COMPAT
2932         if ( __kmp_atomic_mode == 2 ) {
2933             __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2934         }
2935         else
2936 #endif /* KMP_GOMP_COMPAT */
2937 	__kmp_release_atomic_lock( & __kmp_atomic_lock_4i, gtid );
2938     }
2939 }
2940 
2941 void
2942 __kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
2943 {
2944     KMP_DEBUG_ASSERT( __kmp_init_serial );
2945     if (
2946 
2947 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
2948         FALSE                                   /* must use lock */
2949 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
2950 	TRUE					/* no alignment problems */
2951 #else
2952 	! ( (kmp_uintptr_t) lhs & 0x7)		/* make sure address is 8-byte aligned */
2953 #endif
2954 	)
2955     {
2956 	kmp_int64 old_value, new_value;
2957 
2958 	old_value = *(kmp_int64 *) lhs;
2959 	(*f)( &new_value, &old_value, rhs );
2960 	/* TODO: Should this be acquire or release? */
2961 	while ( !  KMP_COMPARE_AND_STORE_ACQ64 ( (kmp_int64 *) lhs,
2962 					       *(kmp_int64 *) &old_value,
2963 					       *(kmp_int64 *) &new_value ) )
2964 	{
2965 	    KMP_CPU_PAUSE();
2966 
2967 	    old_value = *(kmp_int64 *) lhs;
2968 	    (*f)( &new_value, &old_value, rhs );
2969 	}
2970 
2971 	return;
2972     } else {
2973         //
2974         // Use __kmp_atomic_lock_8i for all 8-byte data,
2975         // even if it isn't of integer data type.
2976         //
2977 
2978 #ifdef KMP_GOMP_COMPAT
2979         if ( __kmp_atomic_mode == 2 ) {
2980             __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
2981         }
2982         else
2983 #endif /* KMP_GOMP_COMPAT */
2984 	__kmp_acquire_atomic_lock( & __kmp_atomic_lock_8i, gtid );
2985 
2986 	(*f)( lhs, lhs, rhs );
2987 
2988 #ifdef KMP_GOMP_COMPAT
2989         if ( __kmp_atomic_mode == 2 ) {
2990             __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
2991         }
2992         else
2993 #endif /* KMP_GOMP_COMPAT */
2994 	__kmp_release_atomic_lock( & __kmp_atomic_lock_8i, gtid );
2995     }
2996 }
2997 
2998 void
2999 __kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
3000 {
3001     KMP_DEBUG_ASSERT( __kmp_init_serial );
3002 
3003 #ifdef KMP_GOMP_COMPAT
3004     if ( __kmp_atomic_mode == 2 ) {
3005         __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
3006     }
3007     else
3008 #endif /* KMP_GOMP_COMPAT */
3009     __kmp_acquire_atomic_lock( & __kmp_atomic_lock_10r, gtid );
3010 
3011     (*f)( lhs, lhs, rhs );
3012 
3013 #ifdef KMP_GOMP_COMPAT
3014     if ( __kmp_atomic_mode == 2 ) {
3015         __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
3016     }
3017     else
3018 #endif /* KMP_GOMP_COMPAT */
3019     __kmp_release_atomic_lock( & __kmp_atomic_lock_10r, gtid );
3020 }
3021 
3022 void
3023 __kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
3024 {
3025     KMP_DEBUG_ASSERT( __kmp_init_serial );
3026 
3027 #ifdef KMP_GOMP_COMPAT
3028     if ( __kmp_atomic_mode == 2 ) {
3029         __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
3030     }
3031     else
3032 #endif /* KMP_GOMP_COMPAT */
3033     __kmp_acquire_atomic_lock( & __kmp_atomic_lock_16c, gtid );
3034 
3035     (*f)( lhs, lhs, rhs );
3036 
3037 #ifdef KMP_GOMP_COMPAT
3038     if ( __kmp_atomic_mode == 2 ) {
3039         __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
3040     }
3041     else
3042 #endif /* KMP_GOMP_COMPAT */
3043     __kmp_release_atomic_lock( & __kmp_atomic_lock_16c, gtid );
3044 }
3045 
3046 void
3047 __kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
3048 {
3049     KMP_DEBUG_ASSERT( __kmp_init_serial );
3050 
3051 #ifdef KMP_GOMP_COMPAT
3052     if ( __kmp_atomic_mode == 2 ) {
3053         __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
3054     }
3055     else
3056 #endif /* KMP_GOMP_COMPAT */
3057     __kmp_acquire_atomic_lock( & __kmp_atomic_lock_20c, gtid );
3058 
3059     (*f)( lhs, lhs, rhs );
3060 
3061 #ifdef KMP_GOMP_COMPAT
3062     if ( __kmp_atomic_mode == 2 ) {
3063         __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
3064     }
3065     else
3066 #endif /* KMP_GOMP_COMPAT */
3067     __kmp_release_atomic_lock( & __kmp_atomic_lock_20c, gtid );
3068 }
3069 
3070 void
3071 __kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) )
3072 {
3073     KMP_DEBUG_ASSERT( __kmp_init_serial );
3074 
3075 #ifdef KMP_GOMP_COMPAT
3076     if ( __kmp_atomic_mode == 2 ) {
3077         __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid );
3078     }
3079     else
3080 #endif /* KMP_GOMP_COMPAT */
3081     __kmp_acquire_atomic_lock( & __kmp_atomic_lock_32c, gtid );
3082 
3083     (*f)( lhs, lhs, rhs );
3084 
3085 #ifdef KMP_GOMP_COMPAT
3086     if ( __kmp_atomic_mode == 2 ) {
3087         __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid );
3088     }
3089     else
3090 #endif /* KMP_GOMP_COMPAT */
3091     __kmp_release_atomic_lock( & __kmp_atomic_lock_32c, gtid );
3092 }
3093 
3094 // AC: same two routines as GOMP_atomic_start/end, but will be called by our compiler
3095 //     duplicated in order to not use 3-party names in pure Intel code
3096 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3097 void
3098 __kmpc_atomic_start(void)
3099 {
3100     int gtid = __kmp_entry_gtid();
3101     KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3102     __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3103 }
3104 
3105 
3106 void
3107 __kmpc_atomic_end(void)
3108 {
3109     int gtid = __kmp_get_gtid();
3110     KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3111     __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3112 }
3113 
3114 /* ------------------------------------------------------------------------ */
3115 /* ------------------------------------------------------------------------ */
3116 /*!
3117 @}
3118 */
3119 
3120 // end of file
3121