1 /* 2 * kmp_atomic.cpp -- ATOMIC implementation routines 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "kmp_atomic.h" 14 #include "kmp.h" // TRUE, asm routines prototypes 15 16 typedef unsigned char uchar; 17 typedef unsigned short ushort; 18 19 /*! 20 @defgroup ATOMIC_OPS Atomic Operations 21 These functions are used for implementing the many different varieties of atomic 22 operations. 23 24 The compiler is at liberty to inline atomic operations that are naturally 25 supported by the target architecture. For instance on IA-32 architecture an 26 atomic like this can be inlined 27 @code 28 static int s = 0; 29 #pragma omp atomic 30 s++; 31 @endcode 32 using the single instruction: `lock; incl s` 33 34 However the runtime does provide entrypoints for these operations to support 35 compilers that choose not to inline them. (For instance, 36 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.) 37 38 The names of the functions are encoded by using the data type name and the 39 operation name, as in these tables. 40 41 Data Type | Data type encoding 42 -----------|--------------- 43 int8_t | `fixed1` 44 uint8_t | `fixed1u` 45 int16_t | `fixed2` 46 uint16_t | `fixed2u` 47 int32_t | `fixed4` 48 uint32_t | `fixed4u` 49 int32_t | `fixed8` 50 uint32_t | `fixed8u` 51 float | `float4` 52 double | `float8` 53 float 10 (8087 eighty bit float) | `float10` 54 complex<float> | `cmplx4` 55 complex<double> | `cmplx8` 56 complex<float10> | `cmplx10` 57 <br> 58 59 Operation | Operation encoding 60 ----------|------------------- 61 + | add 62 - | sub 63 \* | mul 64 / | div 65 & | andb 66 << | shl 67 \>\> | shr 68 \| | orb 69 ^ | xor 70 && | andl 71 \|\| | orl 72 maximum | max 73 minimum | min 74 .eqv. | eqv 75 .neqv. | neqv 76 77 <br> 78 For non-commutative operations, `_rev` can also be added for the reversed 79 operation. For the functions that capture the result, the suffix `_cpt` is 80 added. 81 82 Update Functions 83 ================ 84 The general form of an atomic function that just performs an update (without a 85 `capture`) 86 @code 87 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE * 88 lhs, TYPE rhs ); 89 @endcode 90 @param ident_t a pointer to source location 91 @param gtid the global thread id 92 @param lhs a pointer to the left operand 93 @param rhs the right operand 94 95 `capture` functions 96 =================== 97 The capture functions perform an atomic update and return a result, which is 98 either the value before the capture, or that after. They take an additional 99 argument to determine which result is returned. 100 Their general form is therefore 101 @code 102 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE * 103 lhs, TYPE rhs, int flag ); 104 @endcode 105 @param ident_t a pointer to source location 106 @param gtid the global thread id 107 @param lhs a pointer to the left operand 108 @param rhs the right operand 109 @param flag one if the result is to be captured *after* the operation, zero if 110 captured *before*. 111 112 The one set of exceptions to this is the `complex<float>` type where the value 113 is not returned, rather an extra argument pointer is passed. 114 115 They look like 116 @code 117 void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * 118 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); 119 @endcode 120 121 Read and Write Operations 122 ========================= 123 The OpenMP<sup>*</sup> standard now supports atomic operations that simply 124 ensure that the value is read or written atomically, with no modification 125 performed. In many cases on IA-32 architecture these operations can be inlined 126 since the architecture guarantees that no tearing occurs on aligned objects 127 accessed with a single memory operation of up to 64 bits in size. 128 129 The general form of the read operations is 130 @code 131 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc ); 132 @endcode 133 134 For the write operations the form is 135 @code 136 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs 137 ); 138 @endcode 139 140 Full list of functions 141 ====================== 142 This leads to the generation of 376 atomic functions, as follows. 143 144 Functons for integers 145 --------------------- 146 There are versions here for integers of size 1,2,4 and 8 bytes both signed and 147 unsigned (where that matters). 148 @code 149 __kmpc_atomic_fixed1_add 150 __kmpc_atomic_fixed1_add_cpt 151 __kmpc_atomic_fixed1_add_fp 152 __kmpc_atomic_fixed1_andb 153 __kmpc_atomic_fixed1_andb_cpt 154 __kmpc_atomic_fixed1_andl 155 __kmpc_atomic_fixed1_andl_cpt 156 __kmpc_atomic_fixed1_div 157 __kmpc_atomic_fixed1_div_cpt 158 __kmpc_atomic_fixed1_div_cpt_rev 159 __kmpc_atomic_fixed1_div_float8 160 __kmpc_atomic_fixed1_div_fp 161 __kmpc_atomic_fixed1_div_rev 162 __kmpc_atomic_fixed1_eqv 163 __kmpc_atomic_fixed1_eqv_cpt 164 __kmpc_atomic_fixed1_max 165 __kmpc_atomic_fixed1_max_cpt 166 __kmpc_atomic_fixed1_min 167 __kmpc_atomic_fixed1_min_cpt 168 __kmpc_atomic_fixed1_mul 169 __kmpc_atomic_fixed1_mul_cpt 170 __kmpc_atomic_fixed1_mul_float8 171 __kmpc_atomic_fixed1_mul_fp 172 __kmpc_atomic_fixed1_neqv 173 __kmpc_atomic_fixed1_neqv_cpt 174 __kmpc_atomic_fixed1_orb 175 __kmpc_atomic_fixed1_orb_cpt 176 __kmpc_atomic_fixed1_orl 177 __kmpc_atomic_fixed1_orl_cpt 178 __kmpc_atomic_fixed1_rd 179 __kmpc_atomic_fixed1_shl 180 __kmpc_atomic_fixed1_shl_cpt 181 __kmpc_atomic_fixed1_shl_cpt_rev 182 __kmpc_atomic_fixed1_shl_rev 183 __kmpc_atomic_fixed1_shr 184 __kmpc_atomic_fixed1_shr_cpt 185 __kmpc_atomic_fixed1_shr_cpt_rev 186 __kmpc_atomic_fixed1_shr_rev 187 __kmpc_atomic_fixed1_sub 188 __kmpc_atomic_fixed1_sub_cpt 189 __kmpc_atomic_fixed1_sub_cpt_rev 190 __kmpc_atomic_fixed1_sub_fp 191 __kmpc_atomic_fixed1_sub_rev 192 __kmpc_atomic_fixed1_swp 193 __kmpc_atomic_fixed1_wr 194 __kmpc_atomic_fixed1_xor 195 __kmpc_atomic_fixed1_xor_cpt 196 __kmpc_atomic_fixed1u_add_fp 197 __kmpc_atomic_fixed1u_sub_fp 198 __kmpc_atomic_fixed1u_mul_fp 199 __kmpc_atomic_fixed1u_div 200 __kmpc_atomic_fixed1u_div_cpt 201 __kmpc_atomic_fixed1u_div_cpt_rev 202 __kmpc_atomic_fixed1u_div_fp 203 __kmpc_atomic_fixed1u_div_rev 204 __kmpc_atomic_fixed1u_shr 205 __kmpc_atomic_fixed1u_shr_cpt 206 __kmpc_atomic_fixed1u_shr_cpt_rev 207 __kmpc_atomic_fixed1u_shr_rev 208 __kmpc_atomic_fixed2_add 209 __kmpc_atomic_fixed2_add_cpt 210 __kmpc_atomic_fixed2_add_fp 211 __kmpc_atomic_fixed2_andb 212 __kmpc_atomic_fixed2_andb_cpt 213 __kmpc_atomic_fixed2_andl 214 __kmpc_atomic_fixed2_andl_cpt 215 __kmpc_atomic_fixed2_div 216 __kmpc_atomic_fixed2_div_cpt 217 __kmpc_atomic_fixed2_div_cpt_rev 218 __kmpc_atomic_fixed2_div_float8 219 __kmpc_atomic_fixed2_div_fp 220 __kmpc_atomic_fixed2_div_rev 221 __kmpc_atomic_fixed2_eqv 222 __kmpc_atomic_fixed2_eqv_cpt 223 __kmpc_atomic_fixed2_max 224 __kmpc_atomic_fixed2_max_cpt 225 __kmpc_atomic_fixed2_min 226 __kmpc_atomic_fixed2_min_cpt 227 __kmpc_atomic_fixed2_mul 228 __kmpc_atomic_fixed2_mul_cpt 229 __kmpc_atomic_fixed2_mul_float8 230 __kmpc_atomic_fixed2_mul_fp 231 __kmpc_atomic_fixed2_neqv 232 __kmpc_atomic_fixed2_neqv_cpt 233 __kmpc_atomic_fixed2_orb 234 __kmpc_atomic_fixed2_orb_cpt 235 __kmpc_atomic_fixed2_orl 236 __kmpc_atomic_fixed2_orl_cpt 237 __kmpc_atomic_fixed2_rd 238 __kmpc_atomic_fixed2_shl 239 __kmpc_atomic_fixed2_shl_cpt 240 __kmpc_atomic_fixed2_shl_cpt_rev 241 __kmpc_atomic_fixed2_shl_rev 242 __kmpc_atomic_fixed2_shr 243 __kmpc_atomic_fixed2_shr_cpt 244 __kmpc_atomic_fixed2_shr_cpt_rev 245 __kmpc_atomic_fixed2_shr_rev 246 __kmpc_atomic_fixed2_sub 247 __kmpc_atomic_fixed2_sub_cpt 248 __kmpc_atomic_fixed2_sub_cpt_rev 249 __kmpc_atomic_fixed2_sub_fp 250 __kmpc_atomic_fixed2_sub_rev 251 __kmpc_atomic_fixed2_swp 252 __kmpc_atomic_fixed2_wr 253 __kmpc_atomic_fixed2_xor 254 __kmpc_atomic_fixed2_xor_cpt 255 __kmpc_atomic_fixed2u_add_fp 256 __kmpc_atomic_fixed2u_sub_fp 257 __kmpc_atomic_fixed2u_mul_fp 258 __kmpc_atomic_fixed2u_div 259 __kmpc_atomic_fixed2u_div_cpt 260 __kmpc_atomic_fixed2u_div_cpt_rev 261 __kmpc_atomic_fixed2u_div_fp 262 __kmpc_atomic_fixed2u_div_rev 263 __kmpc_atomic_fixed2u_shr 264 __kmpc_atomic_fixed2u_shr_cpt 265 __kmpc_atomic_fixed2u_shr_cpt_rev 266 __kmpc_atomic_fixed2u_shr_rev 267 __kmpc_atomic_fixed4_add 268 __kmpc_atomic_fixed4_add_cpt 269 __kmpc_atomic_fixed4_add_fp 270 __kmpc_atomic_fixed4_andb 271 __kmpc_atomic_fixed4_andb_cpt 272 __kmpc_atomic_fixed4_andl 273 __kmpc_atomic_fixed4_andl_cpt 274 __kmpc_atomic_fixed4_div 275 __kmpc_atomic_fixed4_div_cpt 276 __kmpc_atomic_fixed4_div_cpt_rev 277 __kmpc_atomic_fixed4_div_float8 278 __kmpc_atomic_fixed4_div_fp 279 __kmpc_atomic_fixed4_div_rev 280 __kmpc_atomic_fixed4_eqv 281 __kmpc_atomic_fixed4_eqv_cpt 282 __kmpc_atomic_fixed4_max 283 __kmpc_atomic_fixed4_max_cpt 284 __kmpc_atomic_fixed4_min 285 __kmpc_atomic_fixed4_min_cpt 286 __kmpc_atomic_fixed4_mul 287 __kmpc_atomic_fixed4_mul_cpt 288 __kmpc_atomic_fixed4_mul_float8 289 __kmpc_atomic_fixed4_mul_fp 290 __kmpc_atomic_fixed4_neqv 291 __kmpc_atomic_fixed4_neqv_cpt 292 __kmpc_atomic_fixed4_orb 293 __kmpc_atomic_fixed4_orb_cpt 294 __kmpc_atomic_fixed4_orl 295 __kmpc_atomic_fixed4_orl_cpt 296 __kmpc_atomic_fixed4_rd 297 __kmpc_atomic_fixed4_shl 298 __kmpc_atomic_fixed4_shl_cpt 299 __kmpc_atomic_fixed4_shl_cpt_rev 300 __kmpc_atomic_fixed4_shl_rev 301 __kmpc_atomic_fixed4_shr 302 __kmpc_atomic_fixed4_shr_cpt 303 __kmpc_atomic_fixed4_shr_cpt_rev 304 __kmpc_atomic_fixed4_shr_rev 305 __kmpc_atomic_fixed4_sub 306 __kmpc_atomic_fixed4_sub_cpt 307 __kmpc_atomic_fixed4_sub_cpt_rev 308 __kmpc_atomic_fixed4_sub_fp 309 __kmpc_atomic_fixed4_sub_rev 310 __kmpc_atomic_fixed4_swp 311 __kmpc_atomic_fixed4_wr 312 __kmpc_atomic_fixed4_xor 313 __kmpc_atomic_fixed4_xor_cpt 314 __kmpc_atomic_fixed4u_add_fp 315 __kmpc_atomic_fixed4u_sub_fp 316 __kmpc_atomic_fixed4u_mul_fp 317 __kmpc_atomic_fixed4u_div 318 __kmpc_atomic_fixed4u_div_cpt 319 __kmpc_atomic_fixed4u_div_cpt_rev 320 __kmpc_atomic_fixed4u_div_fp 321 __kmpc_atomic_fixed4u_div_rev 322 __kmpc_atomic_fixed4u_shr 323 __kmpc_atomic_fixed4u_shr_cpt 324 __kmpc_atomic_fixed4u_shr_cpt_rev 325 __kmpc_atomic_fixed4u_shr_rev 326 __kmpc_atomic_fixed8_add 327 __kmpc_atomic_fixed8_add_cpt 328 __kmpc_atomic_fixed8_add_fp 329 __kmpc_atomic_fixed8_andb 330 __kmpc_atomic_fixed8_andb_cpt 331 __kmpc_atomic_fixed8_andl 332 __kmpc_atomic_fixed8_andl_cpt 333 __kmpc_atomic_fixed8_div 334 __kmpc_atomic_fixed8_div_cpt 335 __kmpc_atomic_fixed8_div_cpt_rev 336 __kmpc_atomic_fixed8_div_float8 337 __kmpc_atomic_fixed8_div_fp 338 __kmpc_atomic_fixed8_div_rev 339 __kmpc_atomic_fixed8_eqv 340 __kmpc_atomic_fixed8_eqv_cpt 341 __kmpc_atomic_fixed8_max 342 __kmpc_atomic_fixed8_max_cpt 343 __kmpc_atomic_fixed8_min 344 __kmpc_atomic_fixed8_min_cpt 345 __kmpc_atomic_fixed8_mul 346 __kmpc_atomic_fixed8_mul_cpt 347 __kmpc_atomic_fixed8_mul_float8 348 __kmpc_atomic_fixed8_mul_fp 349 __kmpc_atomic_fixed8_neqv 350 __kmpc_atomic_fixed8_neqv_cpt 351 __kmpc_atomic_fixed8_orb 352 __kmpc_atomic_fixed8_orb_cpt 353 __kmpc_atomic_fixed8_orl 354 __kmpc_atomic_fixed8_orl_cpt 355 __kmpc_atomic_fixed8_rd 356 __kmpc_atomic_fixed8_shl 357 __kmpc_atomic_fixed8_shl_cpt 358 __kmpc_atomic_fixed8_shl_cpt_rev 359 __kmpc_atomic_fixed8_shl_rev 360 __kmpc_atomic_fixed8_shr 361 __kmpc_atomic_fixed8_shr_cpt 362 __kmpc_atomic_fixed8_shr_cpt_rev 363 __kmpc_atomic_fixed8_shr_rev 364 __kmpc_atomic_fixed8_sub 365 __kmpc_atomic_fixed8_sub_cpt 366 __kmpc_atomic_fixed8_sub_cpt_rev 367 __kmpc_atomic_fixed8_sub_fp 368 __kmpc_atomic_fixed8_sub_rev 369 __kmpc_atomic_fixed8_swp 370 __kmpc_atomic_fixed8_wr 371 __kmpc_atomic_fixed8_xor 372 __kmpc_atomic_fixed8_xor_cpt 373 __kmpc_atomic_fixed8u_add_fp 374 __kmpc_atomic_fixed8u_sub_fp 375 __kmpc_atomic_fixed8u_mul_fp 376 __kmpc_atomic_fixed8u_div 377 __kmpc_atomic_fixed8u_div_cpt 378 __kmpc_atomic_fixed8u_div_cpt_rev 379 __kmpc_atomic_fixed8u_div_fp 380 __kmpc_atomic_fixed8u_div_rev 381 __kmpc_atomic_fixed8u_shr 382 __kmpc_atomic_fixed8u_shr_cpt 383 __kmpc_atomic_fixed8u_shr_cpt_rev 384 __kmpc_atomic_fixed8u_shr_rev 385 @endcode 386 387 Functions for floating point 388 ---------------------------- 389 There are versions here for floating point numbers of size 4, 8, 10 and 16 390 bytes. (Ten byte floats are used by X87, but are now rare). 391 @code 392 __kmpc_atomic_float4_add 393 __kmpc_atomic_float4_add_cpt 394 __kmpc_atomic_float4_add_float8 395 __kmpc_atomic_float4_add_fp 396 __kmpc_atomic_float4_div 397 __kmpc_atomic_float4_div_cpt 398 __kmpc_atomic_float4_div_cpt_rev 399 __kmpc_atomic_float4_div_float8 400 __kmpc_atomic_float4_div_fp 401 __kmpc_atomic_float4_div_rev 402 __kmpc_atomic_float4_max 403 __kmpc_atomic_float4_max_cpt 404 __kmpc_atomic_float4_min 405 __kmpc_atomic_float4_min_cpt 406 __kmpc_atomic_float4_mul 407 __kmpc_atomic_float4_mul_cpt 408 __kmpc_atomic_float4_mul_float8 409 __kmpc_atomic_float4_mul_fp 410 __kmpc_atomic_float4_rd 411 __kmpc_atomic_float4_sub 412 __kmpc_atomic_float4_sub_cpt 413 __kmpc_atomic_float4_sub_cpt_rev 414 __kmpc_atomic_float4_sub_float8 415 __kmpc_atomic_float4_sub_fp 416 __kmpc_atomic_float4_sub_rev 417 __kmpc_atomic_float4_swp 418 __kmpc_atomic_float4_wr 419 __kmpc_atomic_float8_add 420 __kmpc_atomic_float8_add_cpt 421 __kmpc_atomic_float8_add_fp 422 __kmpc_atomic_float8_div 423 __kmpc_atomic_float8_div_cpt 424 __kmpc_atomic_float8_div_cpt_rev 425 __kmpc_atomic_float8_div_fp 426 __kmpc_atomic_float8_div_rev 427 __kmpc_atomic_float8_max 428 __kmpc_atomic_float8_max_cpt 429 __kmpc_atomic_float8_min 430 __kmpc_atomic_float8_min_cpt 431 __kmpc_atomic_float8_mul 432 __kmpc_atomic_float8_mul_cpt 433 __kmpc_atomic_float8_mul_fp 434 __kmpc_atomic_float8_rd 435 __kmpc_atomic_float8_sub 436 __kmpc_atomic_float8_sub_cpt 437 __kmpc_atomic_float8_sub_cpt_rev 438 __kmpc_atomic_float8_sub_fp 439 __kmpc_atomic_float8_sub_rev 440 __kmpc_atomic_float8_swp 441 __kmpc_atomic_float8_wr 442 __kmpc_atomic_float10_add 443 __kmpc_atomic_float10_add_cpt 444 __kmpc_atomic_float10_add_fp 445 __kmpc_atomic_float10_div 446 __kmpc_atomic_float10_div_cpt 447 __kmpc_atomic_float10_div_cpt_rev 448 __kmpc_atomic_float10_div_fp 449 __kmpc_atomic_float10_div_rev 450 __kmpc_atomic_float10_mul 451 __kmpc_atomic_float10_mul_cpt 452 __kmpc_atomic_float10_mul_fp 453 __kmpc_atomic_float10_rd 454 __kmpc_atomic_float10_sub 455 __kmpc_atomic_float10_sub_cpt 456 __kmpc_atomic_float10_sub_cpt_rev 457 __kmpc_atomic_float10_sub_fp 458 __kmpc_atomic_float10_sub_rev 459 __kmpc_atomic_float10_swp 460 __kmpc_atomic_float10_wr 461 __kmpc_atomic_float16_add 462 __kmpc_atomic_float16_add_cpt 463 __kmpc_atomic_float16_div 464 __kmpc_atomic_float16_div_cpt 465 __kmpc_atomic_float16_div_cpt_rev 466 __kmpc_atomic_float16_div_rev 467 __kmpc_atomic_float16_max 468 __kmpc_atomic_float16_max_cpt 469 __kmpc_atomic_float16_min 470 __kmpc_atomic_float16_min_cpt 471 __kmpc_atomic_float16_mul 472 __kmpc_atomic_float16_mul_cpt 473 __kmpc_atomic_float16_rd 474 __kmpc_atomic_float16_sub 475 __kmpc_atomic_float16_sub_cpt 476 __kmpc_atomic_float16_sub_cpt_rev 477 __kmpc_atomic_float16_sub_rev 478 __kmpc_atomic_float16_swp 479 __kmpc_atomic_float16_wr 480 @endcode 481 482 Functions for Complex types 483 --------------------------- 484 Functions for complex types whose component floating point variables are of size 485 4,8,10 or 16 bytes. The names here are based on the size of the component float, 486 *not* the size of the complex type. So `__kmpc_atomc_cmplx8_add` is an operation 487 on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`. 488 489 @code 490 __kmpc_atomic_cmplx4_add 491 __kmpc_atomic_cmplx4_add_cmplx8 492 __kmpc_atomic_cmplx4_add_cpt 493 __kmpc_atomic_cmplx4_div 494 __kmpc_atomic_cmplx4_div_cmplx8 495 __kmpc_atomic_cmplx4_div_cpt 496 __kmpc_atomic_cmplx4_div_cpt_rev 497 __kmpc_atomic_cmplx4_div_rev 498 __kmpc_atomic_cmplx4_mul 499 __kmpc_atomic_cmplx4_mul_cmplx8 500 __kmpc_atomic_cmplx4_mul_cpt 501 __kmpc_atomic_cmplx4_rd 502 __kmpc_atomic_cmplx4_sub 503 __kmpc_atomic_cmplx4_sub_cmplx8 504 __kmpc_atomic_cmplx4_sub_cpt 505 __kmpc_atomic_cmplx4_sub_cpt_rev 506 __kmpc_atomic_cmplx4_sub_rev 507 __kmpc_atomic_cmplx4_swp 508 __kmpc_atomic_cmplx4_wr 509 __kmpc_atomic_cmplx8_add 510 __kmpc_atomic_cmplx8_add_cpt 511 __kmpc_atomic_cmplx8_div 512 __kmpc_atomic_cmplx8_div_cpt 513 __kmpc_atomic_cmplx8_div_cpt_rev 514 __kmpc_atomic_cmplx8_div_rev 515 __kmpc_atomic_cmplx8_mul 516 __kmpc_atomic_cmplx8_mul_cpt 517 __kmpc_atomic_cmplx8_rd 518 __kmpc_atomic_cmplx8_sub 519 __kmpc_atomic_cmplx8_sub_cpt 520 __kmpc_atomic_cmplx8_sub_cpt_rev 521 __kmpc_atomic_cmplx8_sub_rev 522 __kmpc_atomic_cmplx8_swp 523 __kmpc_atomic_cmplx8_wr 524 __kmpc_atomic_cmplx10_add 525 __kmpc_atomic_cmplx10_add_cpt 526 __kmpc_atomic_cmplx10_div 527 __kmpc_atomic_cmplx10_div_cpt 528 __kmpc_atomic_cmplx10_div_cpt_rev 529 __kmpc_atomic_cmplx10_div_rev 530 __kmpc_atomic_cmplx10_mul 531 __kmpc_atomic_cmplx10_mul_cpt 532 __kmpc_atomic_cmplx10_rd 533 __kmpc_atomic_cmplx10_sub 534 __kmpc_atomic_cmplx10_sub_cpt 535 __kmpc_atomic_cmplx10_sub_cpt_rev 536 __kmpc_atomic_cmplx10_sub_rev 537 __kmpc_atomic_cmplx10_swp 538 __kmpc_atomic_cmplx10_wr 539 __kmpc_atomic_cmplx16_add 540 __kmpc_atomic_cmplx16_add_cpt 541 __kmpc_atomic_cmplx16_div 542 __kmpc_atomic_cmplx16_div_cpt 543 __kmpc_atomic_cmplx16_div_cpt_rev 544 __kmpc_atomic_cmplx16_div_rev 545 __kmpc_atomic_cmplx16_mul 546 __kmpc_atomic_cmplx16_mul_cpt 547 __kmpc_atomic_cmplx16_rd 548 __kmpc_atomic_cmplx16_sub 549 __kmpc_atomic_cmplx16_sub_cpt 550 __kmpc_atomic_cmplx16_sub_cpt_rev 551 __kmpc_atomic_cmplx16_swp 552 __kmpc_atomic_cmplx16_wr 553 @endcode 554 */ 555 556 /*! 557 @ingroup ATOMIC_OPS 558 @{ 559 */ 560 561 /* 562 * Global vars 563 */ 564 565 #ifndef KMP_GOMP_COMPAT 566 int __kmp_atomic_mode = 1; // Intel perf 567 #else 568 int __kmp_atomic_mode = 2; // GOMP compatibility 569 #endif /* KMP_GOMP_COMPAT */ 570 571 KMP_ALIGN(128) 572 573 // Control access to all user coded atomics in Gnu compat mode 574 kmp_atomic_lock_t __kmp_atomic_lock; 575 // Control access to all user coded atomics for 1-byte fixed data types 576 kmp_atomic_lock_t __kmp_atomic_lock_1i; 577 // Control access to all user coded atomics for 2-byte fixed data types 578 kmp_atomic_lock_t __kmp_atomic_lock_2i; 579 // Control access to all user coded atomics for 4-byte fixed data types 580 kmp_atomic_lock_t __kmp_atomic_lock_4i; 581 // Control access to all user coded atomics for kmp_real32 data type 582 kmp_atomic_lock_t __kmp_atomic_lock_4r; 583 // Control access to all user coded atomics for 8-byte fixed data types 584 kmp_atomic_lock_t __kmp_atomic_lock_8i; 585 // Control access to all user coded atomics for kmp_real64 data type 586 kmp_atomic_lock_t __kmp_atomic_lock_8r; 587 // Control access to all user coded atomics for complex byte data type 588 kmp_atomic_lock_t __kmp_atomic_lock_8c; 589 // Control access to all user coded atomics for long double data type 590 kmp_atomic_lock_t __kmp_atomic_lock_10r; 591 // Control access to all user coded atomics for _Quad data type 592 kmp_atomic_lock_t __kmp_atomic_lock_16r; 593 // Control access to all user coded atomics for double complex data type 594 kmp_atomic_lock_t __kmp_atomic_lock_16c; 595 // Control access to all user coded atomics for long double complex type 596 kmp_atomic_lock_t __kmp_atomic_lock_20c; 597 // Control access to all user coded atomics for _Quad complex data type 598 kmp_atomic_lock_t __kmp_atomic_lock_32c; 599 600 /* 2007-03-02: 601 Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug 602 on *_32 and *_32e. This is just a temporary workaround for the problem. It 603 seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines 604 in assembler language. */ 605 #define KMP_ATOMIC_VOLATILE volatile 606 607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD 608 609 static inline void operator+=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 610 lhs.q += rhs.q; 611 } 612 static inline void operator-=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 613 lhs.q -= rhs.q; 614 } 615 static inline void operator*=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 616 lhs.q *= rhs.q; 617 } 618 static inline void operator/=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 619 lhs.q /= rhs.q; 620 } 621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) { 622 return lhs.q < rhs.q; 623 } 624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) { 625 return lhs.q > rhs.q; 626 } 627 628 static inline void operator+=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 629 lhs.q += rhs.q; 630 } 631 static inline void operator-=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 632 lhs.q -= rhs.q; 633 } 634 static inline void operator*=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 635 lhs.q *= rhs.q; 636 } 637 static inline void operator/=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 638 lhs.q /= rhs.q; 639 } 640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) { 641 return lhs.q < rhs.q; 642 } 643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) { 644 return lhs.q > rhs.q; 645 } 646 647 static inline void operator+=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 648 lhs.q += rhs.q; 649 } 650 static inline void operator-=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 651 lhs.q -= rhs.q; 652 } 653 static inline void operator*=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 654 lhs.q *= rhs.q; 655 } 656 static inline void operator/=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 657 lhs.q /= rhs.q; 658 } 659 660 static inline void operator+=(kmp_cmplx128_a16_t &lhs, 661 kmp_cmplx128_a16_t &rhs) { 662 lhs.q += rhs.q; 663 } 664 static inline void operator-=(kmp_cmplx128_a16_t &lhs, 665 kmp_cmplx128_a16_t &rhs) { 666 lhs.q -= rhs.q; 667 } 668 static inline void operator*=(kmp_cmplx128_a16_t &lhs, 669 kmp_cmplx128_a16_t &rhs) { 670 lhs.q *= rhs.q; 671 } 672 static inline void operator/=(kmp_cmplx128_a16_t &lhs, 673 kmp_cmplx128_a16_t &rhs) { 674 lhs.q /= rhs.q; 675 } 676 677 #endif 678 679 // ATOMIC implementation routines ----------------------------------------- 680 // One routine for each operation and operand type. 681 // All routines declarations looks like 682 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs ); 683 684 #define KMP_CHECK_GTID \ 685 if (gtid == KMP_GTID_UNKNOWN) { \ 686 gtid = __kmp_entry_gtid(); \ 687 } // check and get gtid when needed 688 689 // Beginning of a definition (provides name, parameters, gebug trace) 690 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 691 // fixed) 692 // OP_ID - operation identifier (add, sub, mul, ...) 693 // TYPE - operands' type 694 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 695 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 696 TYPE *lhs, TYPE rhs) { \ 697 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 698 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 699 700 // ------------------------------------------------------------------------ 701 // Lock variables used for critical sections for various size operands 702 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat 703 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char 704 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short 705 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int 706 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float 707 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int 708 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double 709 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex 710 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double 711 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad 712 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex 713 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex 714 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex 715 716 // ------------------------------------------------------------------------ 717 // Operation on *lhs, rhs bound by critical section 718 // OP - operator (it's supposed to contain an assignment) 719 // LCK_ID - lock identifier 720 // Note: don't check gtid as it should always be valid 721 // 1, 2-byte - expect valid parameter, other - check before this macro 722 #define OP_CRITICAL(OP, LCK_ID) \ 723 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 724 \ 725 (*lhs) OP(rhs); \ 726 \ 727 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 728 729 // ------------------------------------------------------------------------ 730 // For GNU compatibility, we may need to use a critical section, 731 // even though it is not required by the ISA. 732 // 733 // On IA-32 architecture, all atomic operations except for fixed 4 byte add, 734 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common 735 // critical section. On Intel(R) 64, all atomic operations are done with fetch 736 // and add or compare and exchange. Therefore, the FLAG parameter to this 737 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which 738 // require a critical section, where we predict that they will be implemented 739 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()). 740 // 741 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct, 742 // the FLAG parameter should always be 1. If we know that we will be using 743 // a critical section, then we want to make certain that we use the generic 744 // lock __kmp_atomic_lock to protect the atomic update, and not of of the 745 // locks that are specialized based upon the size or type of the data. 746 // 747 // If FLAG is 0, then we are relying on dead code elimination by the build 748 // compiler to get rid of the useless block of code, and save a needless 749 // branch at runtime. 750 751 #ifdef KMP_GOMP_COMPAT 752 #define OP_GOMP_CRITICAL(OP, FLAG) \ 753 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 754 KMP_CHECK_GTID; \ 755 OP_CRITICAL(OP, 0); \ 756 return; \ 757 } 758 #else 759 #define OP_GOMP_CRITICAL(OP, FLAG) 760 #endif /* KMP_GOMP_COMPAT */ 761 762 #if KMP_MIC 763 #define KMP_DO_PAUSE _mm_delay_32(1) 764 #else 765 #define KMP_DO_PAUSE KMP_CPU_PAUSE() 766 #endif /* KMP_MIC */ 767 768 // ------------------------------------------------------------------------ 769 // Operation on *lhs, rhs using "compare_and_store" routine 770 // TYPE - operands' type 771 // BITS - size in bits, used to distinguish low level calls 772 // OP - operator 773 #define OP_CMPXCHG(TYPE, BITS, OP) \ 774 { \ 775 TYPE old_value, new_value; \ 776 old_value = *(TYPE volatile *)lhs; \ 777 new_value = old_value OP rhs; \ 778 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 779 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 780 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 781 KMP_DO_PAUSE; \ 782 \ 783 old_value = *(TYPE volatile *)lhs; \ 784 new_value = old_value OP rhs; \ 785 } \ 786 } 787 788 #if USE_CMPXCHG_FIX 789 // 2007-06-25: 790 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32 791 // and win_32e are affected (I verified the asm). Compiler ignores the volatile 792 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the 793 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of 794 // the workaround. 795 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 796 { \ 797 struct _sss { \ 798 TYPE cmp; \ 799 kmp_int##BITS *vvv; \ 800 }; \ 801 struct _sss old_value, new_value; \ 802 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \ 803 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \ 804 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 805 new_value.cmp = old_value.cmp OP rhs; \ 806 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 807 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ 808 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \ 809 KMP_DO_PAUSE; \ 810 \ 811 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 812 new_value.cmp = old_value.cmp OP rhs; \ 813 } \ 814 } 815 // end of the first part of the workaround for C78287 816 #endif // USE_CMPXCHG_FIX 817 818 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 819 820 // ------------------------------------------------------------------------ 821 // X86 or X86_64: no alignment problems ==================================== 822 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 823 GOMP_FLAG) \ 824 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 825 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 826 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 827 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 828 } 829 // ------------------------------------------------------------------------- 830 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 831 GOMP_FLAG) \ 832 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 833 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 834 OP_CMPXCHG(TYPE, BITS, OP) \ 835 } 836 #if USE_CMPXCHG_FIX 837 // ------------------------------------------------------------------------- 838 // workaround for C78287 (complex(kind=4) data type) 839 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ 840 MASK, GOMP_FLAG) \ 841 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 842 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 843 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 844 } 845 // end of the second part of the workaround for C78287 846 #endif 847 848 #else 849 // ------------------------------------------------------------------------- 850 // Code for other architectures that don't handle unaligned accesses. 851 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 852 GOMP_FLAG) \ 853 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 854 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 855 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 856 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 857 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 858 } else { \ 859 KMP_CHECK_GTID; \ 860 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 861 } \ 862 } 863 // ------------------------------------------------------------------------- 864 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 865 GOMP_FLAG) \ 866 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 867 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 868 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 869 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 870 } else { \ 871 KMP_CHECK_GTID; \ 872 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 873 } \ 874 } 875 #if USE_CMPXCHG_FIX 876 // ------------------------------------------------------------------------- 877 // workaround for C78287 (complex(kind=4) data type) 878 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ 879 MASK, GOMP_FLAG) \ 880 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 881 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 882 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 883 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 884 } else { \ 885 KMP_CHECK_GTID; \ 886 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 887 } \ 888 } 889 // end of the second part of the workaround for C78287 890 #endif // USE_CMPXCHG_FIX 891 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 892 893 // Routines for ATOMIC 4-byte operands addition and subtraction 894 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3, 895 0) // __kmpc_atomic_fixed4_add 896 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3, 897 0) // __kmpc_atomic_fixed4_sub 898 899 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3, 900 KMP_ARCH_X86) // __kmpc_atomic_float4_add 901 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3, 902 KMP_ARCH_X86) // __kmpc_atomic_float4_sub 903 904 // Routines for ATOMIC 8-byte operands addition and subtraction 905 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7, 906 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add 907 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7, 908 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub 909 910 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7, 911 KMP_ARCH_X86) // __kmpc_atomic_float8_add 912 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7, 913 KMP_ARCH_X86) // __kmpc_atomic_float8_sub 914 915 // ------------------------------------------------------------------------ 916 // Entries definition for integer operands 917 // TYPE_ID - operands type and size (fixed4, float4) 918 // OP_ID - operation identifier (add, sub, mul, ...) 919 // TYPE - operand type 920 // BITS - size in bits, used to distinguish low level calls 921 // OP - operator (used in critical section) 922 // LCK_ID - lock identifier, used to possibly distinguish lock variable 923 // MASK - used for alignment check 924 925 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG 926 // ------------------------------------------------------------------------ 927 // Routines for ATOMIC integer operands, other operators 928 // ------------------------------------------------------------------------ 929 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 930 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0, 931 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add 932 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0, 933 0) // __kmpc_atomic_fixed1_andb 934 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0, 935 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div 936 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0, 937 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div 938 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0, 939 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul 940 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0, 941 0) // __kmpc_atomic_fixed1_orb 942 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0, 943 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl 944 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0, 945 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr 946 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, 947 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr 948 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0, 949 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub 950 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0, 951 0) // __kmpc_atomic_fixed1_xor 952 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1, 953 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add 954 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1, 955 0) // __kmpc_atomic_fixed2_andb 956 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1, 957 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div 958 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1, 959 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div 960 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1, 961 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul 962 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1, 963 0) // __kmpc_atomic_fixed2_orb 964 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1, 965 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl 966 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1, 967 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr 968 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, 969 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr 970 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1, 971 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub 972 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1, 973 0) // __kmpc_atomic_fixed2_xor 974 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3, 975 0) // __kmpc_atomic_fixed4_andb 976 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3, 977 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div 978 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3, 979 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div 980 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3, 981 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul 982 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3, 983 0) // __kmpc_atomic_fixed4_orb 984 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3, 985 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl 986 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3, 987 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr 988 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, 989 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr 990 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3, 991 0) // __kmpc_atomic_fixed4_xor 992 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7, 993 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb 994 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7, 995 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div 996 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7, 997 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div 998 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7, 999 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul 1000 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7, 1001 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb 1002 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7, 1003 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl 1004 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7, 1005 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr 1006 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, 1007 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr 1008 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7, 1009 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor 1010 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3, 1011 KMP_ARCH_X86) // __kmpc_atomic_float4_div 1012 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3, 1013 KMP_ARCH_X86) // __kmpc_atomic_float4_mul 1014 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7, 1015 KMP_ARCH_X86) // __kmpc_atomic_float8_div 1016 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7, 1017 KMP_ARCH_X86) // __kmpc_atomic_float8_mul 1018 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 1019 1020 /* ------------------------------------------------------------------------ */ 1021 /* Routines for C/C++ Reduction operators && and || */ 1022 1023 // ------------------------------------------------------------------------ 1024 // Need separate macros for &&, || because there is no combined assignment 1025 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used 1026 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1027 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1028 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1029 OP_CRITICAL(= *lhs OP, LCK_ID) \ 1030 } 1031 1032 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1033 1034 // ------------------------------------------------------------------------ 1035 // X86 or X86_64: no alignment problems =================================== 1036 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ 1037 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1038 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1039 OP_CMPXCHG(TYPE, BITS, OP) \ 1040 } 1041 1042 #else 1043 // ------------------------------------------------------------------------ 1044 // Code for other architectures that don't handle unaligned accesses. 1045 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ 1046 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1047 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1048 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1049 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1050 } else { \ 1051 KMP_CHECK_GTID; \ 1052 OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \ 1053 } \ 1054 } 1055 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1056 1057 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0, 1058 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl 1059 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0, 1060 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl 1061 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1, 1062 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl 1063 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1, 1064 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl 1065 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3, 1066 0) // __kmpc_atomic_fixed4_andl 1067 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3, 1068 0) // __kmpc_atomic_fixed4_orl 1069 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7, 1070 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl 1071 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7, 1072 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl 1073 1074 /* ------------------------------------------------------------------------- */ 1075 /* Routines for Fortran operators that matched no one in C: */ 1076 /* MAX, MIN, .EQV., .NEQV. */ 1077 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */ 1078 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */ 1079 1080 // ------------------------------------------------------------------------- 1081 // MIN and MAX need separate macros 1082 // OP - operator to check if we need any actions? 1083 #define MIN_MAX_CRITSECT(OP, LCK_ID) \ 1084 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1085 \ 1086 if (*lhs OP rhs) { /* still need actions? */ \ 1087 *lhs = rhs; \ 1088 } \ 1089 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1090 1091 // ------------------------------------------------------------------------- 1092 #ifdef KMP_GOMP_COMPAT 1093 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \ 1094 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1095 KMP_CHECK_GTID; \ 1096 MIN_MAX_CRITSECT(OP, 0); \ 1097 return; \ 1098 } 1099 #else 1100 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) 1101 #endif /* KMP_GOMP_COMPAT */ 1102 1103 // ------------------------------------------------------------------------- 1104 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ 1105 { \ 1106 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1107 TYPE old_value; \ 1108 temp_val = *lhs; \ 1109 old_value = temp_val; \ 1110 while (old_value OP rhs && /* still need actions? */ \ 1111 !KMP_COMPARE_AND_STORE_ACQ##BITS( \ 1112 (kmp_int##BITS *)lhs, \ 1113 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 1114 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ 1115 KMP_CPU_PAUSE(); \ 1116 temp_val = *lhs; \ 1117 old_value = temp_val; \ 1118 } \ 1119 } 1120 1121 // ------------------------------------------------------------------------- 1122 // 1-byte, 2-byte operands - use critical section 1123 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1124 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1125 if (*lhs OP rhs) { /* need actions? */ \ 1126 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1127 MIN_MAX_CRITSECT(OP, LCK_ID) \ 1128 } \ 1129 } 1130 1131 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1132 1133 // ------------------------------------------------------------------------- 1134 // X86 or X86_64: no alignment problems ==================================== 1135 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1136 GOMP_FLAG) \ 1137 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1138 if (*lhs OP rhs) { \ 1139 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1140 MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ 1141 } \ 1142 } 1143 1144 #else 1145 // ------------------------------------------------------------------------- 1146 // Code for other architectures that don't handle unaligned accesses. 1147 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1148 GOMP_FLAG) \ 1149 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1150 if (*lhs OP rhs) { \ 1151 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1152 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1153 MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1154 } else { \ 1155 KMP_CHECK_GTID; \ 1156 MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \ 1157 } \ 1158 } \ 1159 } 1160 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1161 1162 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0, 1163 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max 1164 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0, 1165 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min 1166 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1, 1167 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max 1168 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1, 1169 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min 1170 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3, 1171 0) // __kmpc_atomic_fixed4_max 1172 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3, 1173 0) // __kmpc_atomic_fixed4_min 1174 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7, 1175 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max 1176 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7, 1177 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min 1178 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3, 1179 KMP_ARCH_X86) // __kmpc_atomic_float4_max 1180 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3, 1181 KMP_ARCH_X86) // __kmpc_atomic_float4_min 1182 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7, 1183 KMP_ARCH_X86) // __kmpc_atomic_float8_max 1184 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7, 1185 KMP_ARCH_X86) // __kmpc_atomic_float8_min 1186 #if KMP_HAVE_QUAD 1187 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r, 1188 1) // __kmpc_atomic_float16_max 1189 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r, 1190 1) // __kmpc_atomic_float16_min 1191 #if (KMP_ARCH_X86) 1192 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r, 1193 1) // __kmpc_atomic_float16_max_a16 1194 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r, 1195 1) // __kmpc_atomic_float16_min_a16 1196 #endif 1197 #endif 1198 // ------------------------------------------------------------------------ 1199 // Need separate macros for .EQV. because of the need of complement (~) 1200 // OP ignored for critical sections, ^=~ used instead 1201 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1202 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1203 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \ 1204 OP_CRITICAL(^= ~, LCK_ID) /* send assignment and complement */ \ 1205 } 1206 1207 // ------------------------------------------------------------------------ 1208 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1209 // ------------------------------------------------------------------------ 1210 // X86 or X86_64: no alignment problems =================================== 1211 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1212 GOMP_FLAG) \ 1213 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1214 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \ 1215 OP_CMPXCHG(TYPE, BITS, OP) \ 1216 } 1217 // ------------------------------------------------------------------------ 1218 #else 1219 // ------------------------------------------------------------------------ 1220 // Code for other architectures that don't handle unaligned accesses. 1221 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1222 GOMP_FLAG) \ 1223 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1224 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) \ 1225 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1226 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1227 } else { \ 1228 KMP_CHECK_GTID; \ 1229 OP_CRITICAL(^= ~, LCK_ID) /* unaligned address - use critical */ \ 1230 } \ 1231 } 1232 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1233 1234 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0, 1235 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv 1236 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1, 1237 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv 1238 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3, 1239 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv 1240 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7, 1241 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv 1242 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, 1243 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv 1244 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, 1245 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv 1246 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, 1247 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv 1248 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, 1249 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv 1250 1251 // ------------------------------------------------------------------------ 1252 // Routines for Extended types: long double, _Quad, complex flavours (use 1253 // critical section) 1254 // TYPE_ID, OP_ID, TYPE - detailed above 1255 // OP - operator 1256 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1257 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1258 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1259 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \ 1260 OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \ 1261 } 1262 1263 /* ------------------------------------------------------------------------- */ 1264 // routines for long double type 1265 ATOMIC_CRITICAL(float10, add, long double, +, 10r, 1266 1) // __kmpc_atomic_float10_add 1267 ATOMIC_CRITICAL(float10, sub, long double, -, 10r, 1268 1) // __kmpc_atomic_float10_sub 1269 ATOMIC_CRITICAL(float10, mul, long double, *, 10r, 1270 1) // __kmpc_atomic_float10_mul 1271 ATOMIC_CRITICAL(float10, div, long double, /, 10r, 1272 1) // __kmpc_atomic_float10_div 1273 #if KMP_HAVE_QUAD 1274 // routines for _Quad type 1275 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r, 1276 1) // __kmpc_atomic_float16_add 1277 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r, 1278 1) // __kmpc_atomic_float16_sub 1279 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r, 1280 1) // __kmpc_atomic_float16_mul 1281 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r, 1282 1) // __kmpc_atomic_float16_div 1283 #if (KMP_ARCH_X86) 1284 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r, 1285 1) // __kmpc_atomic_float16_add_a16 1286 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r, 1287 1) // __kmpc_atomic_float16_sub_a16 1288 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r, 1289 1) // __kmpc_atomic_float16_mul_a16 1290 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r, 1291 1) // __kmpc_atomic_float16_div_a16 1292 #endif 1293 #endif 1294 // routines for complex types 1295 1296 #if USE_CMPXCHG_FIX 1297 // workaround for C78287 (complex(kind=4) data type) 1298 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1299 1) // __kmpc_atomic_cmplx4_add 1300 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1301 1) // __kmpc_atomic_cmplx4_sub 1302 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1303 1) // __kmpc_atomic_cmplx4_mul 1304 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1305 1) // __kmpc_atomic_cmplx4_div 1306 // end of the workaround for C78287 1307 #else 1308 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add 1309 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub 1310 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul 1311 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div 1312 #endif // USE_CMPXCHG_FIX 1313 1314 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add 1315 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub 1316 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul 1317 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div 1318 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c, 1319 1) // __kmpc_atomic_cmplx10_add 1320 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c, 1321 1) // __kmpc_atomic_cmplx10_sub 1322 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c, 1323 1) // __kmpc_atomic_cmplx10_mul 1324 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c, 1325 1) // __kmpc_atomic_cmplx10_div 1326 #if KMP_HAVE_QUAD 1327 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c, 1328 1) // __kmpc_atomic_cmplx16_add 1329 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c, 1330 1) // __kmpc_atomic_cmplx16_sub 1331 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c, 1332 1) // __kmpc_atomic_cmplx16_mul 1333 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c, 1334 1) // __kmpc_atomic_cmplx16_div 1335 #if (KMP_ARCH_X86) 1336 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1337 1) // __kmpc_atomic_cmplx16_add_a16 1338 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1339 1) // __kmpc_atomic_cmplx16_sub_a16 1340 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1341 1) // __kmpc_atomic_cmplx16_mul_a16 1342 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1343 1) // __kmpc_atomic_cmplx16_div_a16 1344 #endif 1345 #endif 1346 1347 #if OMP_40_ENABLED 1348 1349 // OpenMP 4.0: x = expr binop x for non-commutative operations. 1350 // Supported only on IA-32 architecture and Intel(R) 64 1351 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1352 1353 // ------------------------------------------------------------------------ 1354 // Operation on *lhs, rhs bound by critical section 1355 // OP - operator (it's supposed to contain an assignment) 1356 // LCK_ID - lock identifier 1357 // Note: don't check gtid as it should always be valid 1358 // 1, 2-byte - expect valid parameter, other - check before this macro 1359 #define OP_CRITICAL_REV(OP, LCK_ID) \ 1360 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1361 \ 1362 (*lhs) = (rhs)OP(*lhs); \ 1363 \ 1364 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1365 1366 #ifdef KMP_GOMP_COMPAT 1367 #define OP_GOMP_CRITICAL_REV(OP, FLAG) \ 1368 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1369 KMP_CHECK_GTID; \ 1370 OP_CRITICAL_REV(OP, 0); \ 1371 return; \ 1372 } 1373 #else 1374 #define OP_GOMP_CRITICAL_REV(OP, FLAG) 1375 #endif /* KMP_GOMP_COMPAT */ 1376 1377 // Beginning of a definition (provides name, parameters, gebug trace) 1378 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 1379 // fixed) 1380 // OP_ID - operation identifier (add, sub, mul, ...) 1381 // TYPE - operands' type 1382 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 1383 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \ 1384 TYPE *lhs, TYPE rhs) { \ 1385 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1386 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid)); 1387 1388 // ------------------------------------------------------------------------ 1389 // Operation on *lhs, rhs using "compare_and_store" routine 1390 // TYPE - operands' type 1391 // BITS - size in bits, used to distinguish low level calls 1392 // OP - operator 1393 // Note: temp_val introduced in order to force the compiler to read 1394 // *lhs only once (w/o it the compiler reads *lhs twice) 1395 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1396 { \ 1397 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1398 TYPE old_value, new_value; \ 1399 temp_val = *lhs; \ 1400 old_value = temp_val; \ 1401 new_value = rhs OP old_value; \ 1402 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 1403 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 1404 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 1405 KMP_DO_PAUSE; \ 1406 \ 1407 temp_val = *lhs; \ 1408 old_value = temp_val; \ 1409 new_value = rhs OP old_value; \ 1410 } \ 1411 } 1412 1413 // ------------------------------------------------------------------------- 1414 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \ 1415 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ 1416 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1417 OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1418 } 1419 1420 // ------------------------------------------------------------------------ 1421 // Entries definition for integer operands 1422 // TYPE_ID - operands type and size (fixed4, float4) 1423 // OP_ID - operation identifier (add, sub, mul, ...) 1424 // TYPE - operand type 1425 // BITS - size in bits, used to distinguish low level calls 1426 // OP - operator (used in critical section) 1427 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1428 1429 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG 1430 // ------------------------------------------------------------------------ 1431 // Routines for ATOMIC integer operands, other operators 1432 // ------------------------------------------------------------------------ 1433 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG 1434 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i, 1435 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev 1436 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i, 1437 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev 1438 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i, 1439 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev 1440 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i, 1441 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev 1442 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i, 1443 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev 1444 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i, 1445 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev 1446 1447 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i, 1448 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev 1449 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i, 1450 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev 1451 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i, 1452 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev 1453 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i, 1454 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev 1455 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1456 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev 1457 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i, 1458 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev 1459 1460 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i, 1461 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev 1462 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i, 1463 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev 1464 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i, 1465 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev 1466 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i, 1467 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev 1468 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i, 1469 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev 1470 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i, 1471 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev 1472 1473 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i, 1474 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev 1475 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i, 1476 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev 1477 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i, 1478 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev 1479 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i, 1480 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev 1481 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i, 1482 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev 1483 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i, 1484 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev 1485 1486 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r, 1487 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev 1488 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r, 1489 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev 1490 1491 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r, 1492 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev 1493 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r, 1494 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev 1495 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG 1496 1497 // ------------------------------------------------------------------------ 1498 // Routines for Extended types: long double, _Quad, complex flavours (use 1499 // critical section) 1500 // TYPE_ID, OP_ID, TYPE - detailed above 1501 // OP - operator 1502 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1503 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1504 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ 1505 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1506 OP_CRITICAL_REV(OP, LCK_ID) \ 1507 } 1508 1509 /* ------------------------------------------------------------------------- */ 1510 // routines for long double type 1511 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r, 1512 1) // __kmpc_atomic_float10_sub_rev 1513 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r, 1514 1) // __kmpc_atomic_float10_div_rev 1515 #if KMP_HAVE_QUAD 1516 // routines for _Quad type 1517 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r, 1518 1) // __kmpc_atomic_float16_sub_rev 1519 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r, 1520 1) // __kmpc_atomic_float16_div_rev 1521 #if (KMP_ARCH_X86) 1522 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r, 1523 1) // __kmpc_atomic_float16_sub_a16_rev 1524 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r, 1525 1) // __kmpc_atomic_float16_div_a16_rev 1526 #endif 1527 #endif 1528 1529 // routines for complex types 1530 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c, 1531 1) // __kmpc_atomic_cmplx4_sub_rev 1532 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c, 1533 1) // __kmpc_atomic_cmplx4_div_rev 1534 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c, 1535 1) // __kmpc_atomic_cmplx8_sub_rev 1536 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c, 1537 1) // __kmpc_atomic_cmplx8_div_rev 1538 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c, 1539 1) // __kmpc_atomic_cmplx10_sub_rev 1540 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c, 1541 1) // __kmpc_atomic_cmplx10_div_rev 1542 #if KMP_HAVE_QUAD 1543 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c, 1544 1) // __kmpc_atomic_cmplx16_sub_rev 1545 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c, 1546 1) // __kmpc_atomic_cmplx16_div_rev 1547 #if (KMP_ARCH_X86) 1548 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1549 1) // __kmpc_atomic_cmplx16_sub_a16_rev 1550 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1551 1) // __kmpc_atomic_cmplx16_div_a16_rev 1552 #endif 1553 #endif 1554 1555 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 1556 // End of OpenMP 4.0: x = expr binop x for non-commutative operations. 1557 1558 #endif // OMP_40_ENABLED 1559 1560 /* ------------------------------------------------------------------------ */ 1561 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */ 1562 /* Note: in order to reduce the total number of types combinations */ 1563 /* it is supposed that compiler converts RHS to longest floating type,*/ 1564 /* that is _Quad, before call to any of these routines */ 1565 /* Conversion to _Quad will be done by the compiler during calculation, */ 1566 /* conversion back to TYPE - before the assignment, like: */ 1567 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */ 1568 /* Performance penalty expected because of SW emulation use */ 1569 /* ------------------------------------------------------------------------ */ 1570 1571 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1572 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ 1573 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \ 1574 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1575 KA_TRACE(100, \ 1576 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ 1577 gtid)); 1578 1579 // ------------------------------------------------------------------------- 1580 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \ 1581 GOMP_FLAG) \ 1582 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1583 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \ 1584 OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \ 1585 } 1586 1587 // ------------------------------------------------------------------------- 1588 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1589 // ------------------------------------------------------------------------- 1590 // X86 or X86_64: no alignment problems ==================================== 1591 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1592 LCK_ID, MASK, GOMP_FLAG) \ 1593 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1594 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1595 OP_CMPXCHG(TYPE, BITS, OP) \ 1596 } 1597 // ------------------------------------------------------------------------- 1598 #else 1599 // ------------------------------------------------------------------------ 1600 // Code for other architectures that don't handle unaligned accesses. 1601 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1602 LCK_ID, MASK, GOMP_FLAG) \ 1603 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1604 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1605 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1606 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1607 } else { \ 1608 KMP_CHECK_GTID; \ 1609 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 1610 } \ 1611 } 1612 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1613 1614 // ------------------------------------------------------------------------- 1615 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1616 // ------------------------------------------------------------------------- 1617 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 1618 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 1619 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1620 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1621 OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1622 } 1623 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 1624 LCK_ID, GOMP_FLAG) \ 1625 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1626 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1627 OP_CRITICAL_REV(OP, LCK_ID) \ 1628 } 1629 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1630 1631 // RHS=float8 1632 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, 1633 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8 1634 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, 1635 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8 1636 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, 1637 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8 1638 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, 1639 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8 1640 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 1641 0) // __kmpc_atomic_fixed4_mul_float8 1642 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 1643 0) // __kmpc_atomic_fixed4_div_float8 1644 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, 1645 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8 1646 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, 1647 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8 1648 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, 1649 KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8 1650 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, 1651 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8 1652 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, 1653 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8 1654 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, 1655 KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8 1656 1657 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not 1658 // use them) 1659 #if KMP_HAVE_QUAD 1660 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0, 1661 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp 1662 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0, 1663 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp 1664 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, 1665 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp 1666 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0, 1667 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp 1668 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, 1669 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp 1670 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0, 1671 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp 1672 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0, 1673 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp 1674 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, 1675 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp 1676 1677 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1, 1678 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp 1679 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1, 1680 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp 1681 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, 1682 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp 1683 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1, 1684 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp 1685 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, 1686 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp 1687 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1, 1688 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp 1689 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1, 1690 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp 1691 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, 1692 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp 1693 1694 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 1695 0) // __kmpc_atomic_fixed4_add_fp 1696 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3, 1697 0) // __kmpc_atomic_fixed4u_add_fp 1698 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 1699 0) // __kmpc_atomic_fixed4_sub_fp 1700 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3, 1701 0) // __kmpc_atomic_fixed4u_sub_fp 1702 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 1703 0) // __kmpc_atomic_fixed4_mul_fp 1704 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3, 1705 0) // __kmpc_atomic_fixed4u_mul_fp 1706 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 1707 0) // __kmpc_atomic_fixed4_div_fp 1708 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 1709 0) // __kmpc_atomic_fixed4u_div_fp 1710 1711 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, 1712 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp 1713 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7, 1714 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp 1715 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, 1716 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp 1717 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7, 1718 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp 1719 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, 1720 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp 1721 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7, 1722 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp 1723 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, 1724 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp 1725 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, 1726 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp 1727 1728 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, 1729 KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp 1730 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, 1731 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp 1732 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, 1733 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp 1734 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, 1735 KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp 1736 1737 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, 1738 KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp 1739 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, 1740 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp 1741 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, 1742 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp 1743 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, 1744 KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp 1745 1746 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r, 1747 1) // __kmpc_atomic_float10_add_fp 1748 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r, 1749 1) // __kmpc_atomic_float10_sub_fp 1750 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r, 1751 1) // __kmpc_atomic_float10_mul_fp 1752 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r, 1753 1) // __kmpc_atomic_float10_div_fp 1754 1755 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1756 // Reverse operations 1757 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0, 1758 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp 1759 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0, 1760 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp 1761 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0, 1762 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp 1763 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0, 1764 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp 1765 1766 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1, 1767 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp 1768 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1, 1769 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp 1770 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1, 1771 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp 1772 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1, 1773 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp 1774 1775 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3, 1776 0) // __kmpc_atomic_fixed4_sub_rev_fp 1777 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3, 1778 0) // __kmpc_atomic_fixed4u_sub_rev_fp 1779 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3, 1780 0) // __kmpc_atomic_fixed4_div_rev_fp 1781 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3, 1782 0) // __kmpc_atomic_fixed4u_div_rev_fp 1783 1784 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7, 1785 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp 1786 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7, 1787 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp 1788 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7, 1789 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp 1790 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7, 1791 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp 1792 1793 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3, 1794 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp 1795 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3, 1796 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp 1797 1798 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7, 1799 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp 1800 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7, 1801 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp 1802 1803 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r, 1804 1) // __kmpc_atomic_float10_sub_rev_fp 1805 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r, 1806 1) // __kmpc_atomic_float10_div_rev_fp 1807 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1808 1809 #endif 1810 1811 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1812 // ------------------------------------------------------------------------ 1813 // X86 or X86_64: no alignment problems ==================================== 1814 #if USE_CMPXCHG_FIX 1815 // workaround for C78287 (complex(kind=4) data type) 1816 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1817 LCK_ID, MASK, GOMP_FLAG) \ 1818 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1819 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1820 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 1821 } 1822 // end of the second part of the workaround for C78287 1823 #else 1824 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1825 LCK_ID, MASK, GOMP_FLAG) \ 1826 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1827 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1828 OP_CMPXCHG(TYPE, BITS, OP) \ 1829 } 1830 #endif // USE_CMPXCHG_FIX 1831 #else 1832 // ------------------------------------------------------------------------ 1833 // Code for other architectures that don't handle unaligned accesses. 1834 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1835 LCK_ID, MASK, GOMP_FLAG) \ 1836 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1837 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1838 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1839 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1840 } else { \ 1841 KMP_CHECK_GTID; \ 1842 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 1843 } \ 1844 } 1845 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1846 1847 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 1848 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8 1849 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 1850 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8 1851 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 1852 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8 1853 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 1854 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8 1855 1856 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 1857 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1858 1859 // ------------------------------------------------------------------------ 1860 // Atomic READ routines 1861 1862 // ------------------------------------------------------------------------ 1863 // Beginning of a definition (provides name, parameters, gebug trace) 1864 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 1865 // fixed) 1866 // OP_ID - operation identifier (add, sub, mul, ...) 1867 // TYPE - operands' type 1868 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 1869 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 1870 TYPE *loc) { \ 1871 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1872 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 1873 1874 // ------------------------------------------------------------------------ 1875 // Operation on *lhs, rhs using "compare_and_store_ret" routine 1876 // TYPE - operands' type 1877 // BITS - size in bits, used to distinguish low level calls 1878 // OP - operator 1879 // Note: temp_val introduced in order to force the compiler to read 1880 // *lhs only once (w/o it the compiler reads *lhs twice) 1881 // TODO: check if it is still necessary 1882 // Return old value regardless of the result of "compare & swap# operation 1883 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \ 1884 { \ 1885 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1886 union f_i_union { \ 1887 TYPE f_val; \ 1888 kmp_int##BITS i_val; \ 1889 }; \ 1890 union f_i_union old_value; \ 1891 temp_val = *loc; \ 1892 old_value.f_val = temp_val; \ 1893 old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \ 1894 (kmp_int##BITS *)loc, \ 1895 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \ 1896 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \ 1897 new_value = old_value.f_val; \ 1898 return new_value; \ 1899 } 1900 1901 // ------------------------------------------------------------------------- 1902 // Operation on *lhs, rhs bound by critical section 1903 // OP - operator (it's supposed to contain an assignment) 1904 // LCK_ID - lock identifier 1905 // Note: don't check gtid as it should always be valid 1906 // 1, 2-byte - expect valid parameter, other - check before this macro 1907 #define OP_CRITICAL_READ(OP, LCK_ID) \ 1908 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1909 \ 1910 new_value = (*loc); \ 1911 \ 1912 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1913 1914 // ------------------------------------------------------------------------- 1915 #ifdef KMP_GOMP_COMPAT 1916 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \ 1917 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1918 KMP_CHECK_GTID; \ 1919 OP_CRITICAL_READ(OP, 0); \ 1920 return new_value; \ 1921 } 1922 #else 1923 #define OP_GOMP_CRITICAL_READ(OP, FLAG) 1924 #endif /* KMP_GOMP_COMPAT */ 1925 1926 // ------------------------------------------------------------------------- 1927 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 1928 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1929 TYPE new_value; \ 1930 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ 1931 new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \ 1932 return new_value; \ 1933 } 1934 // ------------------------------------------------------------------------- 1935 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 1936 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1937 TYPE new_value; \ 1938 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ 1939 OP_CMPXCHG_READ(TYPE, BITS, OP) \ 1940 } 1941 // ------------------------------------------------------------------------ 1942 // Routines for Extended types: long double, _Quad, complex flavours (use 1943 // critical section) 1944 // TYPE_ID, OP_ID, TYPE - detailed above 1945 // OP - operator 1946 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1947 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1948 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1949 TYPE new_value; \ 1950 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \ 1951 OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \ 1952 return new_value; \ 1953 } 1954 1955 // ------------------------------------------------------------------------ 1956 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return 1957 // value doesn't work. 1958 // Let's return the read value through the additional parameter. 1959 #if (KMP_OS_WINDOWS) 1960 1961 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \ 1962 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1963 \ 1964 (*out) = (*loc); \ 1965 \ 1966 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1967 // ------------------------------------------------------------------------ 1968 #ifdef KMP_GOMP_COMPAT 1969 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \ 1970 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1971 KMP_CHECK_GTID; \ 1972 OP_CRITICAL_READ_WRK(OP, 0); \ 1973 } 1974 #else 1975 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) 1976 #endif /* KMP_GOMP_COMPAT */ 1977 // ------------------------------------------------------------------------ 1978 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ 1979 void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \ 1980 TYPE *loc) { \ 1981 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1982 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 1983 1984 // ------------------------------------------------------------------------ 1985 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1986 ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ 1987 OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \ 1988 OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \ 1989 } 1990 1991 #endif // KMP_OS_WINDOWS 1992 1993 // ------------------------------------------------------------------------ 1994 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 1995 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd 1996 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +, 1997 KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd 1998 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +, 1999 KMP_ARCH_X86) // __kmpc_atomic_float4_rd 2000 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +, 2001 KMP_ARCH_X86) // __kmpc_atomic_float8_rd 2002 2003 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic 2004 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +, 2005 KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd 2006 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +, 2007 KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd 2008 2009 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r, 2010 1) // __kmpc_atomic_float10_rd 2011 #if KMP_HAVE_QUAD 2012 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r, 2013 1) // __kmpc_atomic_float16_rd 2014 #endif // KMP_HAVE_QUAD 2015 2016 // Fix for CQ220361 on Windows* OS 2017 #if (KMP_OS_WINDOWS) 2018 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c, 2019 1) // __kmpc_atomic_cmplx4_rd 2020 #else 2021 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c, 2022 1) // __kmpc_atomic_cmplx4_rd 2023 #endif 2024 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c, 2025 1) // __kmpc_atomic_cmplx8_rd 2026 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c, 2027 1) // __kmpc_atomic_cmplx10_rd 2028 #if KMP_HAVE_QUAD 2029 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c, 2030 1) // __kmpc_atomic_cmplx16_rd 2031 #if (KMP_ARCH_X86) 2032 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r, 2033 1) // __kmpc_atomic_float16_a16_rd 2034 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 2035 1) // __kmpc_atomic_cmplx16_a16_rd 2036 #endif 2037 #endif 2038 2039 // ------------------------------------------------------------------------ 2040 // Atomic WRITE routines 2041 2042 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2043 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2044 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2045 KMP_XCHG_FIXED##BITS(lhs, rhs); \ 2046 } 2047 // ------------------------------------------------------------------------ 2048 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2049 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2050 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2051 KMP_XCHG_REAL##BITS(lhs, rhs); \ 2052 } 2053 2054 // ------------------------------------------------------------------------ 2055 // Operation on *lhs, rhs using "compare_and_store" routine 2056 // TYPE - operands' type 2057 // BITS - size in bits, used to distinguish low level calls 2058 // OP - operator 2059 // Note: temp_val introduced in order to force the compiler to read 2060 // *lhs only once (w/o it the compiler reads *lhs twice) 2061 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \ 2062 { \ 2063 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2064 TYPE old_value, new_value; \ 2065 temp_val = *lhs; \ 2066 old_value = temp_val; \ 2067 new_value = rhs; \ 2068 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2069 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2070 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2071 KMP_CPU_PAUSE(); \ 2072 \ 2073 temp_val = *lhs; \ 2074 old_value = temp_val; \ 2075 new_value = rhs; \ 2076 } \ 2077 } 2078 2079 // ------------------------------------------------------------------------- 2080 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2081 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2082 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2083 OP_CMPXCHG_WR(TYPE, BITS, OP) \ 2084 } 2085 2086 // ------------------------------------------------------------------------ 2087 // Routines for Extended types: long double, _Quad, complex flavours (use 2088 // critical section) 2089 // TYPE_ID, OP_ID, TYPE - detailed above 2090 // OP - operator 2091 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2092 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2093 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2094 OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \ 2095 OP_CRITICAL(OP, LCK_ID) /* send assignment */ \ 2096 } 2097 // ------------------------------------------------------------------------- 2098 2099 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =, 2100 KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr 2101 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =, 2102 KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr 2103 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =, 2104 KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr 2105 #if (KMP_ARCH_X86) 2106 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =, 2107 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr 2108 #else 2109 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =, 2110 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr 2111 #endif 2112 2113 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =, 2114 KMP_ARCH_X86) // __kmpc_atomic_float4_wr 2115 #if (KMP_ARCH_X86) 2116 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =, 2117 KMP_ARCH_X86) // __kmpc_atomic_float8_wr 2118 #else 2119 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =, 2120 KMP_ARCH_X86) // __kmpc_atomic_float8_wr 2121 #endif 2122 2123 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r, 2124 1) // __kmpc_atomic_float10_wr 2125 #if KMP_HAVE_QUAD 2126 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r, 2127 1) // __kmpc_atomic_float16_wr 2128 #endif 2129 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr 2130 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c, 2131 1) // __kmpc_atomic_cmplx8_wr 2132 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c, 2133 1) // __kmpc_atomic_cmplx10_wr 2134 #if KMP_HAVE_QUAD 2135 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c, 2136 1) // __kmpc_atomic_cmplx16_wr 2137 #if (KMP_ARCH_X86) 2138 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r, 2139 1) // __kmpc_atomic_float16_a16_wr 2140 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 2141 1) // __kmpc_atomic_cmplx16_a16_wr 2142 #endif 2143 #endif 2144 2145 // ------------------------------------------------------------------------ 2146 // Atomic CAPTURE routines 2147 2148 // Beginning of a definition (provides name, parameters, gebug trace) 2149 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 2150 // fixed) 2151 // OP_ID - operation identifier (add, sub, mul, ...) 2152 // TYPE - operands' type 2153 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 2154 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 2155 TYPE *lhs, TYPE rhs, int flag) { \ 2156 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2157 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2158 2159 // ------------------------------------------------------------------------- 2160 // Operation on *lhs, rhs bound by critical section 2161 // OP - operator (it's supposed to contain an assignment) 2162 // LCK_ID - lock identifier 2163 // Note: don't check gtid as it should always be valid 2164 // 1, 2-byte - expect valid parameter, other - check before this macro 2165 #define OP_CRITICAL_CPT(OP, LCK_ID) \ 2166 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2167 \ 2168 if (flag) { \ 2169 (*lhs) OP rhs; \ 2170 new_value = (*lhs); \ 2171 } else { \ 2172 new_value = (*lhs); \ 2173 (*lhs) OP rhs; \ 2174 } \ 2175 \ 2176 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2177 return new_value; 2178 2179 // ------------------------------------------------------------------------ 2180 #ifdef KMP_GOMP_COMPAT 2181 #define OP_GOMP_CRITICAL_CPT(OP, FLAG) \ 2182 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2183 KMP_CHECK_GTID; \ 2184 OP_CRITICAL_CPT(OP## =, 0); \ 2185 } 2186 #else 2187 #define OP_GOMP_CRITICAL_CPT(OP, FLAG) 2188 #endif /* KMP_GOMP_COMPAT */ 2189 2190 // ------------------------------------------------------------------------ 2191 // Operation on *lhs, rhs using "compare_and_store" routine 2192 // TYPE - operands' type 2193 // BITS - size in bits, used to distinguish low level calls 2194 // OP - operator 2195 // Note: temp_val introduced in order to force the compiler to read 2196 // *lhs only once (w/o it the compiler reads *lhs twice) 2197 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2198 { \ 2199 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2200 TYPE old_value, new_value; \ 2201 temp_val = *lhs; \ 2202 old_value = temp_val; \ 2203 new_value = old_value OP rhs; \ 2204 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2205 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2206 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2207 KMP_CPU_PAUSE(); \ 2208 \ 2209 temp_val = *lhs; \ 2210 old_value = temp_val; \ 2211 new_value = old_value OP rhs; \ 2212 } \ 2213 if (flag) { \ 2214 return new_value; \ 2215 } else \ 2216 return old_value; \ 2217 } 2218 2219 // ------------------------------------------------------------------------- 2220 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2221 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2222 TYPE new_value; \ 2223 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ 2224 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2225 } 2226 2227 // ------------------------------------------------------------------------- 2228 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2229 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2230 TYPE old_value, new_value; \ 2231 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ 2232 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 2233 old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 2234 if (flag) { \ 2235 return old_value OP rhs; \ 2236 } else \ 2237 return old_value; \ 2238 } 2239 // ------------------------------------------------------------------------- 2240 2241 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +, 2242 0) // __kmpc_atomic_fixed4_add_cpt 2243 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -, 2244 0) // __kmpc_atomic_fixed4_sub_cpt 2245 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +, 2246 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt 2247 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -, 2248 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt 2249 2250 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +, 2251 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt 2252 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -, 2253 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt 2254 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +, 2255 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt 2256 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -, 2257 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt 2258 2259 // ------------------------------------------------------------------------ 2260 // Entries definition for integer operands 2261 // TYPE_ID - operands type and size (fixed4, float4) 2262 // OP_ID - operation identifier (add, sub, mul, ...) 2263 // TYPE - operand type 2264 // BITS - size in bits, used to distinguish low level calls 2265 // OP - operator (used in critical section) 2266 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG 2267 // ------------------------------------------------------------------------ 2268 // Routines for ATOMIC integer operands, other operators 2269 // ------------------------------------------------------------------------ 2270 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2271 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +, 2272 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt 2273 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &, 2274 0) // __kmpc_atomic_fixed1_andb_cpt 2275 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /, 2276 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt 2277 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /, 2278 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt 2279 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *, 2280 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt 2281 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |, 2282 0) // __kmpc_atomic_fixed1_orb_cpt 2283 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<, 2284 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt 2285 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>, 2286 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt 2287 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>, 2288 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt 2289 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -, 2290 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt 2291 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^, 2292 0) // __kmpc_atomic_fixed1_xor_cpt 2293 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +, 2294 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt 2295 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &, 2296 0) // __kmpc_atomic_fixed2_andb_cpt 2297 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /, 2298 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt 2299 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /, 2300 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt 2301 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *, 2302 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt 2303 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |, 2304 0) // __kmpc_atomic_fixed2_orb_cpt 2305 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<, 2306 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt 2307 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>, 2308 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt 2309 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>, 2310 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt 2311 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -, 2312 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt 2313 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^, 2314 0) // __kmpc_atomic_fixed2_xor_cpt 2315 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &, 2316 0) // __kmpc_atomic_fixed4_andb_cpt 2317 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /, 2318 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt 2319 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /, 2320 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt 2321 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *, 2322 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt 2323 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |, 2324 0) // __kmpc_atomic_fixed4_orb_cpt 2325 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<, 2326 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt 2327 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>, 2328 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt 2329 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>, 2330 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt 2331 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^, 2332 0) // __kmpc_atomic_fixed4_xor_cpt 2333 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &, 2334 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt 2335 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /, 2336 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt 2337 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /, 2338 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt 2339 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *, 2340 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt 2341 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |, 2342 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt 2343 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<, 2344 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt 2345 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>, 2346 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt 2347 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>, 2348 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt 2349 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^, 2350 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt 2351 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /, 2352 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt 2353 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *, 2354 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt 2355 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /, 2356 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt 2357 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *, 2358 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt 2359 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2360 2361 // CAPTURE routines for mixed types RHS=float16 2362 #if KMP_HAVE_QUAD 2363 2364 // Beginning of a definition (provides name, parameters, gebug trace) 2365 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 2366 // fixed) 2367 // OP_ID - operation identifier (add, sub, mul, ...) 2368 // TYPE - operands' type 2369 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2370 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ 2371 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \ 2372 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2373 KA_TRACE(100, \ 2374 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ 2375 gtid)); 2376 2377 // ------------------------------------------------------------------------- 2378 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 2379 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 2380 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2381 TYPE new_value; \ 2382 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ 2383 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2384 } 2385 2386 // ------------------------------------------------------------------------- 2387 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 2388 LCK_ID, GOMP_FLAG) \ 2389 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2390 TYPE new_value; \ 2391 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \ 2392 OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \ 2393 } 2394 2395 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0, 2396 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp 2397 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0, 2398 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp 2399 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0, 2400 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp 2401 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0, 2402 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp 2403 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0, 2404 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp 2405 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0, 2406 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp 2407 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0, 2408 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp 2409 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0, 2410 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp 2411 2412 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1, 2413 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp 2414 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1, 2415 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp 2416 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1, 2417 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp 2418 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1, 2419 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp 2420 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1, 2421 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp 2422 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1, 2423 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp 2424 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1, 2425 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp 2426 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1, 2427 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp 2428 2429 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3, 2430 0) // __kmpc_atomic_fixed4_add_cpt_fp 2431 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3, 2432 0) // __kmpc_atomic_fixed4u_add_cpt_fp 2433 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 2434 0) // __kmpc_atomic_fixed4_sub_cpt_fp 2435 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 2436 0) // __kmpc_atomic_fixed4u_sub_cpt_fp 2437 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 2438 0) // __kmpc_atomic_fixed4_mul_cpt_fp 2439 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 2440 0) // __kmpc_atomic_fixed4u_mul_cpt_fp 2441 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3, 2442 0) // __kmpc_atomic_fixed4_div_cpt_fp 2443 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3, 2444 0) // __kmpc_atomic_fixed4u_div_cpt_fp 2445 2446 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7, 2447 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp 2448 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7, 2449 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp 2450 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7, 2451 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp 2452 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7, 2453 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp 2454 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7, 2455 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp 2456 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7, 2457 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp 2458 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7, 2459 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp 2460 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7, 2461 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp 2462 2463 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3, 2464 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp 2465 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3, 2466 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp 2467 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3, 2468 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp 2469 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3, 2470 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp 2471 2472 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7, 2473 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp 2474 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7, 2475 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp 2476 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7, 2477 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp 2478 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7, 2479 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp 2480 2481 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r, 2482 1) // __kmpc_atomic_float10_add_cpt_fp 2483 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r, 2484 1) // __kmpc_atomic_float10_sub_cpt_fp 2485 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r, 2486 1) // __kmpc_atomic_float10_mul_cpt_fp 2487 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r, 2488 1) // __kmpc_atomic_float10_div_cpt_fp 2489 2490 #endif // KMP_HAVE_QUAD 2491 2492 // ------------------------------------------------------------------------ 2493 // Routines for C/C++ Reduction operators && and || 2494 2495 // ------------------------------------------------------------------------- 2496 // Operation on *lhs, rhs bound by critical section 2497 // OP - operator (it's supposed to contain an assignment) 2498 // LCK_ID - lock identifier 2499 // Note: don't check gtid as it should always be valid 2500 // 1, 2-byte - expect valid parameter, other - check before this macro 2501 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \ 2502 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2503 \ 2504 if (flag) { \ 2505 new_value OP rhs; \ 2506 } else \ 2507 new_value = (*lhs); \ 2508 \ 2509 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 2510 2511 // ------------------------------------------------------------------------ 2512 #ifdef KMP_GOMP_COMPAT 2513 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \ 2514 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2515 KMP_CHECK_GTID; \ 2516 OP_CRITICAL_L_CPT(OP, 0); \ 2517 return new_value; \ 2518 } 2519 #else 2520 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) 2521 #endif /* KMP_GOMP_COMPAT */ 2522 2523 // ------------------------------------------------------------------------ 2524 // Need separate macros for &&, || because there is no combined assignment 2525 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2526 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2527 TYPE new_value; \ 2528 OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \ 2529 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2530 } 2531 2532 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&, 2533 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt 2534 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||, 2535 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt 2536 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&, 2537 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt 2538 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||, 2539 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt 2540 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&, 2541 0) // __kmpc_atomic_fixed4_andl_cpt 2542 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||, 2543 0) // __kmpc_atomic_fixed4_orl_cpt 2544 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&, 2545 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt 2546 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||, 2547 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt 2548 2549 // ------------------------------------------------------------------------- 2550 // Routines for Fortran operators that matched no one in C: 2551 // MAX, MIN, .EQV., .NEQV. 2552 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt 2553 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt 2554 2555 // ------------------------------------------------------------------------- 2556 // MIN and MAX need separate macros 2557 // OP - operator to check if we need any actions? 2558 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ 2559 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2560 \ 2561 if (*lhs OP rhs) { /* still need actions? */ \ 2562 old_value = *lhs; \ 2563 *lhs = rhs; \ 2564 if (flag) \ 2565 new_value = rhs; \ 2566 else \ 2567 new_value = old_value; \ 2568 } \ 2569 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2570 return new_value; 2571 2572 // ------------------------------------------------------------------------- 2573 #ifdef KMP_GOMP_COMPAT 2574 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \ 2575 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2576 KMP_CHECK_GTID; \ 2577 MIN_MAX_CRITSECT_CPT(OP, 0); \ 2578 } 2579 #else 2580 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) 2581 #endif /* KMP_GOMP_COMPAT */ 2582 2583 // ------------------------------------------------------------------------- 2584 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ 2585 { \ 2586 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2587 /*TYPE old_value; */ \ 2588 temp_val = *lhs; \ 2589 old_value = temp_val; \ 2590 while (old_value OP rhs && /* still need actions? */ \ 2591 !KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2592 (kmp_int##BITS *)lhs, \ 2593 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2594 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ 2595 KMP_CPU_PAUSE(); \ 2596 temp_val = *lhs; \ 2597 old_value = temp_val; \ 2598 } \ 2599 if (flag) \ 2600 return rhs; \ 2601 else \ 2602 return old_value; \ 2603 } 2604 2605 // ------------------------------------------------------------------------- 2606 // 1-byte, 2-byte operands - use critical section 2607 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2608 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2609 TYPE new_value, old_value; \ 2610 if (*lhs OP rhs) { /* need actions? */ \ 2611 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ 2612 MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ 2613 } \ 2614 return *lhs; \ 2615 } 2616 2617 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2618 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2619 TYPE new_value, old_value; \ 2620 if (*lhs OP rhs) { \ 2621 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ 2622 MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ 2623 } \ 2624 return *lhs; \ 2625 } 2626 2627 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <, 2628 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt 2629 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >, 2630 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt 2631 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <, 2632 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt 2633 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >, 2634 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt 2635 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <, 2636 0) // __kmpc_atomic_fixed4_max_cpt 2637 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >, 2638 0) // __kmpc_atomic_fixed4_min_cpt 2639 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <, 2640 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt 2641 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >, 2642 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt 2643 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <, 2644 KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt 2645 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >, 2646 KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt 2647 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <, 2648 KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt 2649 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >, 2650 KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt 2651 #if KMP_HAVE_QUAD 2652 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r, 2653 1) // __kmpc_atomic_float16_max_cpt 2654 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r, 2655 1) // __kmpc_atomic_float16_min_cpt 2656 #if (KMP_ARCH_X86) 2657 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r, 2658 1) // __kmpc_atomic_float16_max_a16_cpt 2659 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r, 2660 1) // __kmpc_atomic_float16_mix_a16_cpt 2661 #endif 2662 #endif 2663 2664 // ------------------------------------------------------------------------ 2665 #ifdef KMP_GOMP_COMPAT 2666 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \ 2667 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2668 KMP_CHECK_GTID; \ 2669 OP_CRITICAL_CPT(OP, 0); \ 2670 } 2671 #else 2672 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) 2673 #endif /* KMP_GOMP_COMPAT */ 2674 // ------------------------------------------------------------------------ 2675 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2676 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2677 TYPE new_value; \ 2678 OP_GOMP_CRITICAL_EQV_CPT(^= ~, GOMP_FLAG) /* send assignment */ \ 2679 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2680 } 2681 2682 // ------------------------------------------------------------------------ 2683 2684 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^, 2685 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt 2686 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^, 2687 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt 2688 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^, 2689 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt 2690 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^, 2691 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt 2692 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~, 2693 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt 2694 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~, 2695 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt 2696 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~, 2697 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt 2698 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~, 2699 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt 2700 2701 // ------------------------------------------------------------------------ 2702 // Routines for Extended types: long double, _Quad, complex flavours (use 2703 // critical section) 2704 // TYPE_ID, OP_ID, TYPE - detailed above 2705 // OP - operator 2706 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2707 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2708 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2709 TYPE new_value; \ 2710 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \ 2711 OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \ 2712 } 2713 2714 // ------------------------------------------------------------------------ 2715 // Workaround for cmplx4. Regular routines with return value don't work 2716 // on Win_32e. Let's return captured values through the additional parameter. 2717 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \ 2718 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2719 \ 2720 if (flag) { \ 2721 (*lhs) OP rhs; \ 2722 (*out) = (*lhs); \ 2723 } else { \ 2724 (*out) = (*lhs); \ 2725 (*lhs) OP rhs; \ 2726 } \ 2727 \ 2728 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2729 return; 2730 // ------------------------------------------------------------------------ 2731 2732 #ifdef KMP_GOMP_COMPAT 2733 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \ 2734 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2735 KMP_CHECK_GTID; \ 2736 OP_CRITICAL_CPT_WRK(OP## =, 0); \ 2737 } 2738 #else 2739 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) 2740 #endif /* KMP_GOMP_COMPAT */ 2741 // ------------------------------------------------------------------------ 2742 2743 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 2744 void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \ 2745 TYPE rhs, TYPE *out, int flag) { \ 2746 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2747 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2748 // ------------------------------------------------------------------------ 2749 2750 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2751 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 2752 OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \ 2753 OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \ 2754 } 2755 // The end of workaround for cmplx4 2756 2757 /* ------------------------------------------------------------------------- */ 2758 // routines for long double type 2759 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r, 2760 1) // __kmpc_atomic_float10_add_cpt 2761 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r, 2762 1) // __kmpc_atomic_float10_sub_cpt 2763 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r, 2764 1) // __kmpc_atomic_float10_mul_cpt 2765 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r, 2766 1) // __kmpc_atomic_float10_div_cpt 2767 #if KMP_HAVE_QUAD 2768 // routines for _Quad type 2769 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r, 2770 1) // __kmpc_atomic_float16_add_cpt 2771 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r, 2772 1) // __kmpc_atomic_float16_sub_cpt 2773 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r, 2774 1) // __kmpc_atomic_float16_mul_cpt 2775 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r, 2776 1) // __kmpc_atomic_float16_div_cpt 2777 #if (KMP_ARCH_X86) 2778 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r, 2779 1) // __kmpc_atomic_float16_add_a16_cpt 2780 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r, 2781 1) // __kmpc_atomic_float16_sub_a16_cpt 2782 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r, 2783 1) // __kmpc_atomic_float16_mul_a16_cpt 2784 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r, 2785 1) // __kmpc_atomic_float16_div_a16_cpt 2786 #endif 2787 #endif 2788 2789 // routines for complex types 2790 2791 // cmplx4 routines to return void 2792 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c, 2793 1) // __kmpc_atomic_cmplx4_add_cpt 2794 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 2795 1) // __kmpc_atomic_cmplx4_sub_cpt 2796 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 2797 1) // __kmpc_atomic_cmplx4_mul_cpt 2798 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c, 2799 1) // __kmpc_atomic_cmplx4_div_cpt 2800 2801 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c, 2802 1) // __kmpc_atomic_cmplx8_add_cpt 2803 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 2804 1) // __kmpc_atomic_cmplx8_sub_cpt 2805 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 2806 1) // __kmpc_atomic_cmplx8_mul_cpt 2807 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c, 2808 1) // __kmpc_atomic_cmplx8_div_cpt 2809 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c, 2810 1) // __kmpc_atomic_cmplx10_add_cpt 2811 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 2812 1) // __kmpc_atomic_cmplx10_sub_cpt 2813 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 2814 1) // __kmpc_atomic_cmplx10_mul_cpt 2815 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c, 2816 1) // __kmpc_atomic_cmplx10_div_cpt 2817 #if KMP_HAVE_QUAD 2818 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c, 2819 1) // __kmpc_atomic_cmplx16_add_cpt 2820 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 2821 1) // __kmpc_atomic_cmplx16_sub_cpt 2822 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 2823 1) // __kmpc_atomic_cmplx16_mul_cpt 2824 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c, 2825 1) // __kmpc_atomic_cmplx16_div_cpt 2826 #if (KMP_ARCH_X86) 2827 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 2828 1) // __kmpc_atomic_cmplx16_add_a16_cpt 2829 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 2830 1) // __kmpc_atomic_cmplx16_sub_a16_cpt 2831 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 2832 1) // __kmpc_atomic_cmplx16_mul_a16_cpt 2833 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 2834 1) // __kmpc_atomic_cmplx16_div_a16_cpt 2835 #endif 2836 #endif 2837 2838 #if OMP_40_ENABLED 2839 2840 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr 2841 // binop x; v = x; } for non-commutative operations. 2842 // Supported only on IA-32 architecture and Intel(R) 64 2843 2844 // ------------------------------------------------------------------------- 2845 // Operation on *lhs, rhs bound by critical section 2846 // OP - operator (it's supposed to contain an assignment) 2847 // LCK_ID - lock identifier 2848 // Note: don't check gtid as it should always be valid 2849 // 1, 2-byte - expect valid parameter, other - check before this macro 2850 #define OP_CRITICAL_CPT_REV(OP, LCK_ID) \ 2851 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2852 \ 2853 if (flag) { \ 2854 /*temp_val = (*lhs);*/ \ 2855 (*lhs) = (rhs)OP(*lhs); \ 2856 new_value = (*lhs); \ 2857 } else { \ 2858 new_value = (*lhs); \ 2859 (*lhs) = (rhs)OP(*lhs); \ 2860 } \ 2861 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2862 return new_value; 2863 2864 // ------------------------------------------------------------------------ 2865 #ifdef KMP_GOMP_COMPAT 2866 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) \ 2867 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2868 KMP_CHECK_GTID; \ 2869 OP_CRITICAL_CPT_REV(OP, 0); \ 2870 } 2871 #else 2872 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) 2873 #endif /* KMP_GOMP_COMPAT */ 2874 2875 // ------------------------------------------------------------------------ 2876 // Operation on *lhs, rhs using "compare_and_store" routine 2877 // TYPE - operands' type 2878 // BITS - size in bits, used to distinguish low level calls 2879 // OP - operator 2880 // Note: temp_val introduced in order to force the compiler to read 2881 // *lhs only once (w/o it the compiler reads *lhs twice) 2882 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 2883 { \ 2884 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2885 TYPE old_value, new_value; \ 2886 temp_val = *lhs; \ 2887 old_value = temp_val; \ 2888 new_value = rhs OP old_value; \ 2889 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2890 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2891 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2892 KMP_CPU_PAUSE(); \ 2893 \ 2894 temp_val = *lhs; \ 2895 old_value = temp_val; \ 2896 new_value = rhs OP old_value; \ 2897 } \ 2898 if (flag) { \ 2899 return new_value; \ 2900 } else \ 2901 return old_value; \ 2902 } 2903 2904 // ------------------------------------------------------------------------- 2905 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2906 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2907 TYPE new_value; \ 2908 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ 2909 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 2910 } 2911 2912 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /, 2913 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev 2914 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /, 2915 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev 2916 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<, 2917 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev 2918 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>, 2919 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev 2920 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, 2921 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev 2922 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -, 2923 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev 2924 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /, 2925 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev 2926 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /, 2927 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev 2928 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<, 2929 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev 2930 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>, 2931 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev 2932 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, 2933 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev 2934 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -, 2935 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev 2936 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /, 2937 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev 2938 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /, 2939 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev 2940 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<, 2941 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev 2942 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>, 2943 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev 2944 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, 2945 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev 2946 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -, 2947 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev 2948 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /, 2949 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev 2950 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /, 2951 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev 2952 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<, 2953 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev 2954 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>, 2955 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev 2956 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, 2957 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev 2958 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -, 2959 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev 2960 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /, 2961 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev 2962 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -, 2963 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev 2964 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /, 2965 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev 2966 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -, 2967 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev 2968 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2969 2970 // ------------------------------------------------------------------------ 2971 // Routines for Extended types: long double, _Quad, complex flavours (use 2972 // critical section) 2973 // TYPE_ID, OP_ID, TYPE - detailed above 2974 // OP - operator 2975 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2976 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2977 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2978 TYPE new_value; \ 2979 /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \ 2980 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ 2981 OP_CRITICAL_CPT_REV(OP, LCK_ID) \ 2982 } 2983 2984 /* ------------------------------------------------------------------------- */ 2985 // routines for long double type 2986 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r, 2987 1) // __kmpc_atomic_float10_sub_cpt_rev 2988 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r, 2989 1) // __kmpc_atomic_float10_div_cpt_rev 2990 #if KMP_HAVE_QUAD 2991 // routines for _Quad type 2992 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 2993 1) // __kmpc_atomic_float16_sub_cpt_rev 2994 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 2995 1) // __kmpc_atomic_float16_div_cpt_rev 2996 #if (KMP_ARCH_X86) 2997 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 2998 1) // __kmpc_atomic_float16_sub_a16_cpt_rev 2999 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 3000 1) // __kmpc_atomic_float16_div_a16_cpt_rev 3001 #endif 3002 #endif 3003 3004 // routines for complex types 3005 3006 // ------------------------------------------------------------------------ 3007 // Workaround for cmplx4. Regular routines with return value don't work 3008 // on Win_32e. Let's return captured values through the additional parameter. 3009 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ 3010 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3011 \ 3012 if (flag) { \ 3013 (*lhs) = (rhs)OP(*lhs); \ 3014 (*out) = (*lhs); \ 3015 } else { \ 3016 (*out) = (*lhs); \ 3017 (*lhs) = (rhs)OP(*lhs); \ 3018 } \ 3019 \ 3020 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3021 return; 3022 // ------------------------------------------------------------------------ 3023 3024 #ifdef KMP_GOMP_COMPAT 3025 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \ 3026 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3027 KMP_CHECK_GTID; \ 3028 OP_CRITICAL_CPT_REV_WRK(OP, 0); \ 3029 } 3030 #else 3031 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) 3032 #endif /* KMP_GOMP_COMPAT */ 3033 // ------------------------------------------------------------------------ 3034 3035 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \ 3036 GOMP_FLAG) \ 3037 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 3038 OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \ 3039 OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ 3040 } 3041 // The end of workaround for cmplx4 3042 3043 // !!! TODO: check if we need to return void for cmplx4 routines 3044 // cmplx4 routines to return void 3045 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 3046 1) // __kmpc_atomic_cmplx4_sub_cpt_rev 3047 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 3048 1) // __kmpc_atomic_cmplx4_div_cpt_rev 3049 3050 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 3051 1) // __kmpc_atomic_cmplx8_sub_cpt_rev 3052 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 3053 1) // __kmpc_atomic_cmplx8_div_cpt_rev 3054 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 3055 1) // __kmpc_atomic_cmplx10_sub_cpt_rev 3056 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 3057 1) // __kmpc_atomic_cmplx10_div_cpt_rev 3058 #if KMP_HAVE_QUAD 3059 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 3060 1) // __kmpc_atomic_cmplx16_sub_cpt_rev 3061 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 3062 1) // __kmpc_atomic_cmplx16_div_cpt_rev 3063 #if (KMP_ARCH_X86) 3064 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 3065 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev 3066 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 3067 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev 3068 #endif 3069 #endif 3070 3071 // Capture reverse for mixed type: RHS=float16 3072 #if KMP_HAVE_QUAD 3073 3074 // Beginning of a definition (provides name, parameters, gebug trace) 3075 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 3076 // fixed) 3077 // OP_ID - operation identifier (add, sub, mul, ...) 3078 // TYPE - operands' type 3079 // ------------------------------------------------------------------------- 3080 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 3081 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 3082 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 3083 TYPE new_value; \ 3084 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ 3085 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 3086 } 3087 3088 // ------------------------------------------------------------------------- 3089 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 3090 LCK_ID, GOMP_FLAG) \ 3091 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 3092 TYPE new_value; \ 3093 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) /* send assignment */ \ 3094 OP_CRITICAL_CPT_REV(OP, LCK_ID) /* send assignment */ \ 3095 } 3096 3097 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, 3098 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp 3099 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, 3100 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp 3101 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, 3102 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp 3103 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, 3104 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp 3105 3106 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1, 3107 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp 3108 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i, 3109 1, 3110 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp 3111 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1, 3112 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp 3113 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i, 3114 1, 3115 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp 3116 3117 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i, 3118 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp 3119 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad, 3120 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp 3121 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i, 3122 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp 3123 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad, 3124 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp 3125 3126 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i, 3127 7, 3128 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp 3129 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad, 3130 8i, 7, 3131 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp 3132 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i, 3133 7, 3134 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp 3135 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad, 3136 8i, 7, 3137 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp 3138 3139 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad, 3140 4r, 3, 3141 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp 3142 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad, 3143 4r, 3, 3144 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp 3145 3146 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad, 3147 8r, 7, 3148 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp 3149 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad, 3150 8r, 7, 3151 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp 3152 3153 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad, 3154 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp 3155 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad, 3156 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp 3157 3158 #endif // KMP_HAVE_QUAD 3159 3160 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} 3161 3162 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3163 TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ 3164 TYPE rhs) { \ 3165 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 3166 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); 3167 3168 #define CRITICAL_SWP(LCK_ID) \ 3169 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3170 \ 3171 old_value = (*lhs); \ 3172 (*lhs) = rhs; \ 3173 \ 3174 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3175 return old_value; 3176 3177 // ------------------------------------------------------------------------ 3178 #ifdef KMP_GOMP_COMPAT 3179 #define GOMP_CRITICAL_SWP(FLAG) \ 3180 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3181 KMP_CHECK_GTID; \ 3182 CRITICAL_SWP(0); \ 3183 } 3184 #else 3185 #define GOMP_CRITICAL_SWP(FLAG) 3186 #endif /* KMP_GOMP_COMPAT */ 3187 3188 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3189 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3190 TYPE old_value; \ 3191 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3192 old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \ 3193 return old_value; \ 3194 } 3195 // ------------------------------------------------------------------------ 3196 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3197 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3198 TYPE old_value; \ 3199 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3200 old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \ 3201 return old_value; \ 3202 } 3203 3204 // ------------------------------------------------------------------------ 3205 #define CMPXCHG_SWP(TYPE, BITS) \ 3206 { \ 3207 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 3208 TYPE old_value, new_value; \ 3209 temp_val = *lhs; \ 3210 old_value = temp_val; \ 3211 new_value = rhs; \ 3212 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 3213 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 3214 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 3215 KMP_CPU_PAUSE(); \ 3216 \ 3217 temp_val = *lhs; \ 3218 old_value = temp_val; \ 3219 new_value = rhs; \ 3220 } \ 3221 return old_value; \ 3222 } 3223 3224 // ------------------------------------------------------------------------- 3225 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3226 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3227 TYPE old_value; \ 3228 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3229 CMPXCHG_SWP(TYPE, BITS) \ 3230 } 3231 3232 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp 3233 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp 3234 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp 3235 3236 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32, 3237 KMP_ARCH_X86) // __kmpc_atomic_float4_swp 3238 3239 #if (KMP_ARCH_X86) 3240 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64, 3241 KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp 3242 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64, 3243 KMP_ARCH_X86) // __kmpc_atomic_float8_swp 3244 #else 3245 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp 3246 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64, 3247 KMP_ARCH_X86) // __kmpc_atomic_float8_swp 3248 #endif 3249 3250 // ------------------------------------------------------------------------ 3251 // Routines for Extended types: long double, _Quad, complex flavours (use 3252 // critical section) 3253 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ 3254 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3255 TYPE old_value; \ 3256 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3257 CRITICAL_SWP(LCK_ID) \ 3258 } 3259 3260 // ------------------------------------------------------------------------ 3261 // !!! TODO: check if we need to return void for cmplx4 routines 3262 // Workaround for cmplx4. Regular routines with return value don't work 3263 // on Win_32e. Let's return captured values through the additional parameter. 3264 3265 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ 3266 void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ 3267 TYPE rhs, TYPE *out) { \ 3268 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 3269 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); 3270 3271 #define CRITICAL_SWP_WRK(LCK_ID) \ 3272 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3273 \ 3274 tmp = (*lhs); \ 3275 (*lhs) = (rhs); \ 3276 (*out) = tmp; \ 3277 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3278 return; 3279 // ------------------------------------------------------------------------ 3280 3281 #ifdef KMP_GOMP_COMPAT 3282 #define GOMP_CRITICAL_SWP_WRK(FLAG) \ 3283 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3284 KMP_CHECK_GTID; \ 3285 CRITICAL_SWP_WRK(0); \ 3286 } 3287 #else 3288 #define GOMP_CRITICAL_SWP_WRK(FLAG) 3289 #endif /* KMP_GOMP_COMPAT */ 3290 // ------------------------------------------------------------------------ 3291 3292 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ 3293 ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ 3294 TYPE tmp; \ 3295 GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \ 3296 CRITICAL_SWP_WRK(LCK_ID) \ 3297 } 3298 // The end of workaround for cmplx4 3299 3300 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp 3301 #if KMP_HAVE_QUAD 3302 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp 3303 #endif 3304 // cmplx4 routine to return void 3305 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp 3306 3307 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // 3308 // __kmpc_atomic_cmplx4_swp 3309 3310 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp 3311 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp 3312 #if KMP_HAVE_QUAD 3313 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp 3314 #if (KMP_ARCH_X86) 3315 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r, 3316 1) // __kmpc_atomic_float16_a16_swp 3317 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c, 3318 1) // __kmpc_atomic_cmplx16_a16_swp 3319 #endif 3320 #endif 3321 3322 // End of OpenMP 4.0 Capture 3323 3324 #endif // OMP_40_ENABLED 3325 3326 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 3327 3328 #undef OP_CRITICAL 3329 3330 /* ------------------------------------------------------------------------ */ 3331 /* Generic atomic routines */ 3332 3333 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3334 void (*f)(void *, void *, void *)) { 3335 KMP_DEBUG_ASSERT(__kmp_init_serial); 3336 3337 if ( 3338 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3339 FALSE /* must use lock */ 3340 #else 3341 TRUE 3342 #endif 3343 ) { 3344 kmp_int8 old_value, new_value; 3345 3346 old_value = *(kmp_int8 *)lhs; 3347 (*f)(&new_value, &old_value, rhs); 3348 3349 /* TODO: Should this be acquire or release? */ 3350 while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value, 3351 *(kmp_int8 *)&new_value)) { 3352 KMP_CPU_PAUSE(); 3353 3354 old_value = *(kmp_int8 *)lhs; 3355 (*f)(&new_value, &old_value, rhs); 3356 } 3357 3358 return; 3359 } else { 3360 // All 1-byte data is of integer data type. 3361 3362 #ifdef KMP_GOMP_COMPAT 3363 if (__kmp_atomic_mode == 2) { 3364 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3365 } else 3366 #endif /* KMP_GOMP_COMPAT */ 3367 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid); 3368 3369 (*f)(lhs, lhs, rhs); 3370 3371 #ifdef KMP_GOMP_COMPAT 3372 if (__kmp_atomic_mode == 2) { 3373 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3374 } else 3375 #endif /* KMP_GOMP_COMPAT */ 3376 __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid); 3377 } 3378 } 3379 3380 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3381 void (*f)(void *, void *, void *)) { 3382 if ( 3383 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3384 FALSE /* must use lock */ 3385 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 3386 TRUE /* no alignment problems */ 3387 #else 3388 !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */ 3389 #endif 3390 ) { 3391 kmp_int16 old_value, new_value; 3392 3393 old_value = *(kmp_int16 *)lhs; 3394 (*f)(&new_value, &old_value, rhs); 3395 3396 /* TODO: Should this be acquire or release? */ 3397 while (!KMP_COMPARE_AND_STORE_ACQ16( 3398 (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) { 3399 KMP_CPU_PAUSE(); 3400 3401 old_value = *(kmp_int16 *)lhs; 3402 (*f)(&new_value, &old_value, rhs); 3403 } 3404 3405 return; 3406 } else { 3407 // All 2-byte data is of integer data type. 3408 3409 #ifdef KMP_GOMP_COMPAT 3410 if (__kmp_atomic_mode == 2) { 3411 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3412 } else 3413 #endif /* KMP_GOMP_COMPAT */ 3414 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid); 3415 3416 (*f)(lhs, lhs, rhs); 3417 3418 #ifdef KMP_GOMP_COMPAT 3419 if (__kmp_atomic_mode == 2) { 3420 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3421 } else 3422 #endif /* KMP_GOMP_COMPAT */ 3423 __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid); 3424 } 3425 } 3426 3427 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3428 void (*f)(void *, void *, void *)) { 3429 KMP_DEBUG_ASSERT(__kmp_init_serial); 3430 3431 if ( 3432 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints. 3433 // Gomp compatibility is broken if this routine is called for floats. 3434 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 3435 TRUE /* no alignment problems */ 3436 #else 3437 !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */ 3438 #endif 3439 ) { 3440 kmp_int32 old_value, new_value; 3441 3442 old_value = *(kmp_int32 *)lhs; 3443 (*f)(&new_value, &old_value, rhs); 3444 3445 /* TODO: Should this be acquire or release? */ 3446 while (!KMP_COMPARE_AND_STORE_ACQ32( 3447 (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) { 3448 KMP_CPU_PAUSE(); 3449 3450 old_value = *(kmp_int32 *)lhs; 3451 (*f)(&new_value, &old_value, rhs); 3452 } 3453 3454 return; 3455 } else { 3456 // Use __kmp_atomic_lock_4i for all 4-byte data, 3457 // even if it isn't of integer data type. 3458 3459 #ifdef KMP_GOMP_COMPAT 3460 if (__kmp_atomic_mode == 2) { 3461 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3462 } else 3463 #endif /* KMP_GOMP_COMPAT */ 3464 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid); 3465 3466 (*f)(lhs, lhs, rhs); 3467 3468 #ifdef KMP_GOMP_COMPAT 3469 if (__kmp_atomic_mode == 2) { 3470 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3471 } else 3472 #endif /* KMP_GOMP_COMPAT */ 3473 __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid); 3474 } 3475 } 3476 3477 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3478 void (*f)(void *, void *, void *)) { 3479 KMP_DEBUG_ASSERT(__kmp_init_serial); 3480 if ( 3481 3482 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3483 FALSE /* must use lock */ 3484 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 3485 TRUE /* no alignment problems */ 3486 #else 3487 !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */ 3488 #endif 3489 ) { 3490 kmp_int64 old_value, new_value; 3491 3492 old_value = *(kmp_int64 *)lhs; 3493 (*f)(&new_value, &old_value, rhs); 3494 /* TODO: Should this be acquire or release? */ 3495 while (!KMP_COMPARE_AND_STORE_ACQ64( 3496 (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) { 3497 KMP_CPU_PAUSE(); 3498 3499 old_value = *(kmp_int64 *)lhs; 3500 (*f)(&new_value, &old_value, rhs); 3501 } 3502 3503 return; 3504 } else { 3505 // Use __kmp_atomic_lock_8i for all 8-byte data, 3506 // even if it isn't of integer data type. 3507 3508 #ifdef KMP_GOMP_COMPAT 3509 if (__kmp_atomic_mode == 2) { 3510 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3511 } else 3512 #endif /* KMP_GOMP_COMPAT */ 3513 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid); 3514 3515 (*f)(lhs, lhs, rhs); 3516 3517 #ifdef KMP_GOMP_COMPAT 3518 if (__kmp_atomic_mode == 2) { 3519 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3520 } else 3521 #endif /* KMP_GOMP_COMPAT */ 3522 __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid); 3523 } 3524 } 3525 3526 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3527 void (*f)(void *, void *, void *)) { 3528 KMP_DEBUG_ASSERT(__kmp_init_serial); 3529 3530 #ifdef KMP_GOMP_COMPAT 3531 if (__kmp_atomic_mode == 2) { 3532 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3533 } else 3534 #endif /* KMP_GOMP_COMPAT */ 3535 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid); 3536 3537 (*f)(lhs, lhs, rhs); 3538 3539 #ifdef KMP_GOMP_COMPAT 3540 if (__kmp_atomic_mode == 2) { 3541 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3542 } else 3543 #endif /* KMP_GOMP_COMPAT */ 3544 __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid); 3545 } 3546 3547 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3548 void (*f)(void *, void *, void *)) { 3549 KMP_DEBUG_ASSERT(__kmp_init_serial); 3550 3551 #ifdef KMP_GOMP_COMPAT 3552 if (__kmp_atomic_mode == 2) { 3553 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3554 } else 3555 #endif /* KMP_GOMP_COMPAT */ 3556 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid); 3557 3558 (*f)(lhs, lhs, rhs); 3559 3560 #ifdef KMP_GOMP_COMPAT 3561 if (__kmp_atomic_mode == 2) { 3562 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3563 } else 3564 #endif /* KMP_GOMP_COMPAT */ 3565 __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid); 3566 } 3567 3568 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3569 void (*f)(void *, void *, void *)) { 3570 KMP_DEBUG_ASSERT(__kmp_init_serial); 3571 3572 #ifdef KMP_GOMP_COMPAT 3573 if (__kmp_atomic_mode == 2) { 3574 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3575 } else 3576 #endif /* KMP_GOMP_COMPAT */ 3577 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid); 3578 3579 (*f)(lhs, lhs, rhs); 3580 3581 #ifdef KMP_GOMP_COMPAT 3582 if (__kmp_atomic_mode == 2) { 3583 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3584 } else 3585 #endif /* KMP_GOMP_COMPAT */ 3586 __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid); 3587 } 3588 3589 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3590 void (*f)(void *, void *, void *)) { 3591 KMP_DEBUG_ASSERT(__kmp_init_serial); 3592 3593 #ifdef KMP_GOMP_COMPAT 3594 if (__kmp_atomic_mode == 2) { 3595 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3596 } else 3597 #endif /* KMP_GOMP_COMPAT */ 3598 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid); 3599 3600 (*f)(lhs, lhs, rhs); 3601 3602 #ifdef KMP_GOMP_COMPAT 3603 if (__kmp_atomic_mode == 2) { 3604 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3605 } else 3606 #endif /* KMP_GOMP_COMPAT */ 3607 __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid); 3608 } 3609 3610 // AC: same two routines as GOMP_atomic_start/end, but will be called by our 3611 // compiler; duplicated in order to not use 3-party names in pure Intel code 3612 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin. 3613 void __kmpc_atomic_start(void) { 3614 int gtid = __kmp_entry_gtid(); 3615 KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid)); 3616 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3617 } 3618 3619 void __kmpc_atomic_end(void) { 3620 int gtid = __kmp_get_gtid(); 3621 KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid)); 3622 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3623 } 3624 3625 /*! 3626 @} 3627 */ 3628 3629 // end of file 3630