1 /* 2 * kmp_atomic.cpp -- ATOMIC implementation routines 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include "kmp_atomic.h" 17 #include "kmp.h" // TRUE, asm routines prototypes 18 19 typedef unsigned char uchar; 20 typedef unsigned short ushort; 21 22 /*! 23 @defgroup ATOMIC_OPS Atomic Operations 24 These functions are used for implementing the many different varieties of atomic 25 operations. 26 27 The compiler is at liberty to inline atomic operations that are naturally 28 supported by the target architecture. For instance on IA-32 architecture an 29 atomic like this can be inlined 30 @code 31 static int s = 0; 32 #pragma omp atomic 33 s++; 34 @endcode 35 using the single instruction: `lock; incl s` 36 37 However the runtime does provide entrypoints for these operations to support 38 compilers that choose not to inline them. (For instance, 39 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.) 40 41 The names of the functions are encoded by using the data type name and the 42 operation name, as in these tables. 43 44 Data Type | Data type encoding 45 -----------|--------------- 46 int8_t | `fixed1` 47 uint8_t | `fixed1u` 48 int16_t | `fixed2` 49 uint16_t | `fixed2u` 50 int32_t | `fixed4` 51 uint32_t | `fixed4u` 52 int32_t | `fixed8` 53 uint32_t | `fixed8u` 54 float | `float4` 55 double | `float8` 56 float 10 (8087 eighty bit float) | `float10` 57 complex<float> | `cmplx4` 58 complex<double> | `cmplx8` 59 complex<float10> | `cmplx10` 60 <br> 61 62 Operation | Operation encoding 63 ----------|------------------- 64 + | add 65 - | sub 66 \* | mul 67 / | div 68 & | andb 69 << | shl 70 \>\> | shr 71 \| | orb 72 ^ | xor 73 && | andl 74 \|\| | orl 75 maximum | max 76 minimum | min 77 .eqv. | eqv 78 .neqv. | neqv 79 80 <br> 81 For non-commutative operations, `_rev` can also be added for the reversed 82 operation. For the functions that capture the result, the suffix `_cpt` is 83 added. 84 85 Update Functions 86 ================ 87 The general form of an atomic function that just performs an update (without a 88 `capture`) 89 @code 90 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE * 91 lhs, TYPE rhs ); 92 @endcode 93 @param ident_t a pointer to source location 94 @param gtid the global thread id 95 @param lhs a pointer to the left operand 96 @param rhs the right operand 97 98 `capture` functions 99 =================== 100 The capture functions perform an atomic update and return a result, which is 101 either the value before the capture, or that after. They take an additional 102 argument to determine which result is returned. 103 Their general form is therefore 104 @code 105 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE * 106 lhs, TYPE rhs, int flag ); 107 @endcode 108 @param ident_t a pointer to source location 109 @param gtid the global thread id 110 @param lhs a pointer to the left operand 111 @param rhs the right operand 112 @param flag one if the result is to be captured *after* the operation, zero if 113 captured *before*. 114 115 The one set of exceptions to this is the `complex<float>` type where the value 116 is not returned, rather an extra argument pointer is passed. 117 118 They look like 119 @code 120 void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * 121 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); 122 @endcode 123 124 Read and Write Operations 125 ========================= 126 The OpenMP<sup>*</sup> standard now supports atomic operations that simply 127 ensure that the value is read or written atomically, with no modification 128 performed. In many cases on IA-32 architecture these operations can be inlined 129 since the architecture guarantees that no tearing occurs on aligned objects 130 accessed with a single memory operation of up to 64 bits in size. 131 132 The general form of the read operations is 133 @code 134 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc ); 135 @endcode 136 137 For the write operations the form is 138 @code 139 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs 140 ); 141 @endcode 142 143 Full list of functions 144 ====================== 145 This leads to the generation of 376 atomic functions, as follows. 146 147 Functons for integers 148 --------------------- 149 There are versions here for integers of size 1,2,4 and 8 bytes both signed and 150 unsigned (where that matters). 151 @code 152 __kmpc_atomic_fixed1_add 153 __kmpc_atomic_fixed1_add_cpt 154 __kmpc_atomic_fixed1_add_fp 155 __kmpc_atomic_fixed1_andb 156 __kmpc_atomic_fixed1_andb_cpt 157 __kmpc_atomic_fixed1_andl 158 __kmpc_atomic_fixed1_andl_cpt 159 __kmpc_atomic_fixed1_div 160 __kmpc_atomic_fixed1_div_cpt 161 __kmpc_atomic_fixed1_div_cpt_rev 162 __kmpc_atomic_fixed1_div_float8 163 __kmpc_atomic_fixed1_div_fp 164 __kmpc_atomic_fixed1_div_rev 165 __kmpc_atomic_fixed1_eqv 166 __kmpc_atomic_fixed1_eqv_cpt 167 __kmpc_atomic_fixed1_max 168 __kmpc_atomic_fixed1_max_cpt 169 __kmpc_atomic_fixed1_min 170 __kmpc_atomic_fixed1_min_cpt 171 __kmpc_atomic_fixed1_mul 172 __kmpc_atomic_fixed1_mul_cpt 173 __kmpc_atomic_fixed1_mul_float8 174 __kmpc_atomic_fixed1_mul_fp 175 __kmpc_atomic_fixed1_neqv 176 __kmpc_atomic_fixed1_neqv_cpt 177 __kmpc_atomic_fixed1_orb 178 __kmpc_atomic_fixed1_orb_cpt 179 __kmpc_atomic_fixed1_orl 180 __kmpc_atomic_fixed1_orl_cpt 181 __kmpc_atomic_fixed1_rd 182 __kmpc_atomic_fixed1_shl 183 __kmpc_atomic_fixed1_shl_cpt 184 __kmpc_atomic_fixed1_shl_cpt_rev 185 __kmpc_atomic_fixed1_shl_rev 186 __kmpc_atomic_fixed1_shr 187 __kmpc_atomic_fixed1_shr_cpt 188 __kmpc_atomic_fixed1_shr_cpt_rev 189 __kmpc_atomic_fixed1_shr_rev 190 __kmpc_atomic_fixed1_sub 191 __kmpc_atomic_fixed1_sub_cpt 192 __kmpc_atomic_fixed1_sub_cpt_rev 193 __kmpc_atomic_fixed1_sub_fp 194 __kmpc_atomic_fixed1_sub_rev 195 __kmpc_atomic_fixed1_swp 196 __kmpc_atomic_fixed1_wr 197 __kmpc_atomic_fixed1_xor 198 __kmpc_atomic_fixed1_xor_cpt 199 __kmpc_atomic_fixed1u_add_fp 200 __kmpc_atomic_fixed1u_sub_fp 201 __kmpc_atomic_fixed1u_mul_fp 202 __kmpc_atomic_fixed1u_div 203 __kmpc_atomic_fixed1u_div_cpt 204 __kmpc_atomic_fixed1u_div_cpt_rev 205 __kmpc_atomic_fixed1u_div_fp 206 __kmpc_atomic_fixed1u_div_rev 207 __kmpc_atomic_fixed1u_shr 208 __kmpc_atomic_fixed1u_shr_cpt 209 __kmpc_atomic_fixed1u_shr_cpt_rev 210 __kmpc_atomic_fixed1u_shr_rev 211 __kmpc_atomic_fixed2_add 212 __kmpc_atomic_fixed2_add_cpt 213 __kmpc_atomic_fixed2_add_fp 214 __kmpc_atomic_fixed2_andb 215 __kmpc_atomic_fixed2_andb_cpt 216 __kmpc_atomic_fixed2_andl 217 __kmpc_atomic_fixed2_andl_cpt 218 __kmpc_atomic_fixed2_div 219 __kmpc_atomic_fixed2_div_cpt 220 __kmpc_atomic_fixed2_div_cpt_rev 221 __kmpc_atomic_fixed2_div_float8 222 __kmpc_atomic_fixed2_div_fp 223 __kmpc_atomic_fixed2_div_rev 224 __kmpc_atomic_fixed2_eqv 225 __kmpc_atomic_fixed2_eqv_cpt 226 __kmpc_atomic_fixed2_max 227 __kmpc_atomic_fixed2_max_cpt 228 __kmpc_atomic_fixed2_min 229 __kmpc_atomic_fixed2_min_cpt 230 __kmpc_atomic_fixed2_mul 231 __kmpc_atomic_fixed2_mul_cpt 232 __kmpc_atomic_fixed2_mul_float8 233 __kmpc_atomic_fixed2_mul_fp 234 __kmpc_atomic_fixed2_neqv 235 __kmpc_atomic_fixed2_neqv_cpt 236 __kmpc_atomic_fixed2_orb 237 __kmpc_atomic_fixed2_orb_cpt 238 __kmpc_atomic_fixed2_orl 239 __kmpc_atomic_fixed2_orl_cpt 240 __kmpc_atomic_fixed2_rd 241 __kmpc_atomic_fixed2_shl 242 __kmpc_atomic_fixed2_shl_cpt 243 __kmpc_atomic_fixed2_shl_cpt_rev 244 __kmpc_atomic_fixed2_shl_rev 245 __kmpc_atomic_fixed2_shr 246 __kmpc_atomic_fixed2_shr_cpt 247 __kmpc_atomic_fixed2_shr_cpt_rev 248 __kmpc_atomic_fixed2_shr_rev 249 __kmpc_atomic_fixed2_sub 250 __kmpc_atomic_fixed2_sub_cpt 251 __kmpc_atomic_fixed2_sub_cpt_rev 252 __kmpc_atomic_fixed2_sub_fp 253 __kmpc_atomic_fixed2_sub_rev 254 __kmpc_atomic_fixed2_swp 255 __kmpc_atomic_fixed2_wr 256 __kmpc_atomic_fixed2_xor 257 __kmpc_atomic_fixed2_xor_cpt 258 __kmpc_atomic_fixed2u_add_fp 259 __kmpc_atomic_fixed2u_sub_fp 260 __kmpc_atomic_fixed2u_mul_fp 261 __kmpc_atomic_fixed2u_div 262 __kmpc_atomic_fixed2u_div_cpt 263 __kmpc_atomic_fixed2u_div_cpt_rev 264 __kmpc_atomic_fixed2u_div_fp 265 __kmpc_atomic_fixed2u_div_rev 266 __kmpc_atomic_fixed2u_shr 267 __kmpc_atomic_fixed2u_shr_cpt 268 __kmpc_atomic_fixed2u_shr_cpt_rev 269 __kmpc_atomic_fixed2u_shr_rev 270 __kmpc_atomic_fixed4_add 271 __kmpc_atomic_fixed4_add_cpt 272 __kmpc_atomic_fixed4_add_fp 273 __kmpc_atomic_fixed4_andb 274 __kmpc_atomic_fixed4_andb_cpt 275 __kmpc_atomic_fixed4_andl 276 __kmpc_atomic_fixed4_andl_cpt 277 __kmpc_atomic_fixed4_div 278 __kmpc_atomic_fixed4_div_cpt 279 __kmpc_atomic_fixed4_div_cpt_rev 280 __kmpc_atomic_fixed4_div_float8 281 __kmpc_atomic_fixed4_div_fp 282 __kmpc_atomic_fixed4_div_rev 283 __kmpc_atomic_fixed4_eqv 284 __kmpc_atomic_fixed4_eqv_cpt 285 __kmpc_atomic_fixed4_max 286 __kmpc_atomic_fixed4_max_cpt 287 __kmpc_atomic_fixed4_min 288 __kmpc_atomic_fixed4_min_cpt 289 __kmpc_atomic_fixed4_mul 290 __kmpc_atomic_fixed4_mul_cpt 291 __kmpc_atomic_fixed4_mul_float8 292 __kmpc_atomic_fixed4_mul_fp 293 __kmpc_atomic_fixed4_neqv 294 __kmpc_atomic_fixed4_neqv_cpt 295 __kmpc_atomic_fixed4_orb 296 __kmpc_atomic_fixed4_orb_cpt 297 __kmpc_atomic_fixed4_orl 298 __kmpc_atomic_fixed4_orl_cpt 299 __kmpc_atomic_fixed4_rd 300 __kmpc_atomic_fixed4_shl 301 __kmpc_atomic_fixed4_shl_cpt 302 __kmpc_atomic_fixed4_shl_cpt_rev 303 __kmpc_atomic_fixed4_shl_rev 304 __kmpc_atomic_fixed4_shr 305 __kmpc_atomic_fixed4_shr_cpt 306 __kmpc_atomic_fixed4_shr_cpt_rev 307 __kmpc_atomic_fixed4_shr_rev 308 __kmpc_atomic_fixed4_sub 309 __kmpc_atomic_fixed4_sub_cpt 310 __kmpc_atomic_fixed4_sub_cpt_rev 311 __kmpc_atomic_fixed4_sub_fp 312 __kmpc_atomic_fixed4_sub_rev 313 __kmpc_atomic_fixed4_swp 314 __kmpc_atomic_fixed4_wr 315 __kmpc_atomic_fixed4_xor 316 __kmpc_atomic_fixed4_xor_cpt 317 __kmpc_atomic_fixed4u_add_fp 318 __kmpc_atomic_fixed4u_sub_fp 319 __kmpc_atomic_fixed4u_mul_fp 320 __kmpc_atomic_fixed4u_div 321 __kmpc_atomic_fixed4u_div_cpt 322 __kmpc_atomic_fixed4u_div_cpt_rev 323 __kmpc_atomic_fixed4u_div_fp 324 __kmpc_atomic_fixed4u_div_rev 325 __kmpc_atomic_fixed4u_shr 326 __kmpc_atomic_fixed4u_shr_cpt 327 __kmpc_atomic_fixed4u_shr_cpt_rev 328 __kmpc_atomic_fixed4u_shr_rev 329 __kmpc_atomic_fixed8_add 330 __kmpc_atomic_fixed8_add_cpt 331 __kmpc_atomic_fixed8_add_fp 332 __kmpc_atomic_fixed8_andb 333 __kmpc_atomic_fixed8_andb_cpt 334 __kmpc_atomic_fixed8_andl 335 __kmpc_atomic_fixed8_andl_cpt 336 __kmpc_atomic_fixed8_div 337 __kmpc_atomic_fixed8_div_cpt 338 __kmpc_atomic_fixed8_div_cpt_rev 339 __kmpc_atomic_fixed8_div_float8 340 __kmpc_atomic_fixed8_div_fp 341 __kmpc_atomic_fixed8_div_rev 342 __kmpc_atomic_fixed8_eqv 343 __kmpc_atomic_fixed8_eqv_cpt 344 __kmpc_atomic_fixed8_max 345 __kmpc_atomic_fixed8_max_cpt 346 __kmpc_atomic_fixed8_min 347 __kmpc_atomic_fixed8_min_cpt 348 __kmpc_atomic_fixed8_mul 349 __kmpc_atomic_fixed8_mul_cpt 350 __kmpc_atomic_fixed8_mul_float8 351 __kmpc_atomic_fixed8_mul_fp 352 __kmpc_atomic_fixed8_neqv 353 __kmpc_atomic_fixed8_neqv_cpt 354 __kmpc_atomic_fixed8_orb 355 __kmpc_atomic_fixed8_orb_cpt 356 __kmpc_atomic_fixed8_orl 357 __kmpc_atomic_fixed8_orl_cpt 358 __kmpc_atomic_fixed8_rd 359 __kmpc_atomic_fixed8_shl 360 __kmpc_atomic_fixed8_shl_cpt 361 __kmpc_atomic_fixed8_shl_cpt_rev 362 __kmpc_atomic_fixed8_shl_rev 363 __kmpc_atomic_fixed8_shr 364 __kmpc_atomic_fixed8_shr_cpt 365 __kmpc_atomic_fixed8_shr_cpt_rev 366 __kmpc_atomic_fixed8_shr_rev 367 __kmpc_atomic_fixed8_sub 368 __kmpc_atomic_fixed8_sub_cpt 369 __kmpc_atomic_fixed8_sub_cpt_rev 370 __kmpc_atomic_fixed8_sub_fp 371 __kmpc_atomic_fixed8_sub_rev 372 __kmpc_atomic_fixed8_swp 373 __kmpc_atomic_fixed8_wr 374 __kmpc_atomic_fixed8_xor 375 __kmpc_atomic_fixed8_xor_cpt 376 __kmpc_atomic_fixed8u_add_fp 377 __kmpc_atomic_fixed8u_sub_fp 378 __kmpc_atomic_fixed8u_mul_fp 379 __kmpc_atomic_fixed8u_div 380 __kmpc_atomic_fixed8u_div_cpt 381 __kmpc_atomic_fixed8u_div_cpt_rev 382 __kmpc_atomic_fixed8u_div_fp 383 __kmpc_atomic_fixed8u_div_rev 384 __kmpc_atomic_fixed8u_shr 385 __kmpc_atomic_fixed8u_shr_cpt 386 __kmpc_atomic_fixed8u_shr_cpt_rev 387 __kmpc_atomic_fixed8u_shr_rev 388 @endcode 389 390 Functions for floating point 391 ---------------------------- 392 There are versions here for floating point numbers of size 4, 8, 10 and 16 393 bytes. (Ten byte floats are used by X87, but are now rare). 394 @code 395 __kmpc_atomic_float4_add 396 __kmpc_atomic_float4_add_cpt 397 __kmpc_atomic_float4_add_float8 398 __kmpc_atomic_float4_add_fp 399 __kmpc_atomic_float4_div 400 __kmpc_atomic_float4_div_cpt 401 __kmpc_atomic_float4_div_cpt_rev 402 __kmpc_atomic_float4_div_float8 403 __kmpc_atomic_float4_div_fp 404 __kmpc_atomic_float4_div_rev 405 __kmpc_atomic_float4_max 406 __kmpc_atomic_float4_max_cpt 407 __kmpc_atomic_float4_min 408 __kmpc_atomic_float4_min_cpt 409 __kmpc_atomic_float4_mul 410 __kmpc_atomic_float4_mul_cpt 411 __kmpc_atomic_float4_mul_float8 412 __kmpc_atomic_float4_mul_fp 413 __kmpc_atomic_float4_rd 414 __kmpc_atomic_float4_sub 415 __kmpc_atomic_float4_sub_cpt 416 __kmpc_atomic_float4_sub_cpt_rev 417 __kmpc_atomic_float4_sub_float8 418 __kmpc_atomic_float4_sub_fp 419 __kmpc_atomic_float4_sub_rev 420 __kmpc_atomic_float4_swp 421 __kmpc_atomic_float4_wr 422 __kmpc_atomic_float8_add 423 __kmpc_atomic_float8_add_cpt 424 __kmpc_atomic_float8_add_fp 425 __kmpc_atomic_float8_div 426 __kmpc_atomic_float8_div_cpt 427 __kmpc_atomic_float8_div_cpt_rev 428 __kmpc_atomic_float8_div_fp 429 __kmpc_atomic_float8_div_rev 430 __kmpc_atomic_float8_max 431 __kmpc_atomic_float8_max_cpt 432 __kmpc_atomic_float8_min 433 __kmpc_atomic_float8_min_cpt 434 __kmpc_atomic_float8_mul 435 __kmpc_atomic_float8_mul_cpt 436 __kmpc_atomic_float8_mul_fp 437 __kmpc_atomic_float8_rd 438 __kmpc_atomic_float8_sub 439 __kmpc_atomic_float8_sub_cpt 440 __kmpc_atomic_float8_sub_cpt_rev 441 __kmpc_atomic_float8_sub_fp 442 __kmpc_atomic_float8_sub_rev 443 __kmpc_atomic_float8_swp 444 __kmpc_atomic_float8_wr 445 __kmpc_atomic_float10_add 446 __kmpc_atomic_float10_add_cpt 447 __kmpc_atomic_float10_add_fp 448 __kmpc_atomic_float10_div 449 __kmpc_atomic_float10_div_cpt 450 __kmpc_atomic_float10_div_cpt_rev 451 __kmpc_atomic_float10_div_fp 452 __kmpc_atomic_float10_div_rev 453 __kmpc_atomic_float10_mul 454 __kmpc_atomic_float10_mul_cpt 455 __kmpc_atomic_float10_mul_fp 456 __kmpc_atomic_float10_rd 457 __kmpc_atomic_float10_sub 458 __kmpc_atomic_float10_sub_cpt 459 __kmpc_atomic_float10_sub_cpt_rev 460 __kmpc_atomic_float10_sub_fp 461 __kmpc_atomic_float10_sub_rev 462 __kmpc_atomic_float10_swp 463 __kmpc_atomic_float10_wr 464 __kmpc_atomic_float16_add 465 __kmpc_atomic_float16_add_cpt 466 __kmpc_atomic_float16_div 467 __kmpc_atomic_float16_div_cpt 468 __kmpc_atomic_float16_div_cpt_rev 469 __kmpc_atomic_float16_div_rev 470 __kmpc_atomic_float16_max 471 __kmpc_atomic_float16_max_cpt 472 __kmpc_atomic_float16_min 473 __kmpc_atomic_float16_min_cpt 474 __kmpc_atomic_float16_mul 475 __kmpc_atomic_float16_mul_cpt 476 __kmpc_atomic_float16_rd 477 __kmpc_atomic_float16_sub 478 __kmpc_atomic_float16_sub_cpt 479 __kmpc_atomic_float16_sub_cpt_rev 480 __kmpc_atomic_float16_sub_rev 481 __kmpc_atomic_float16_swp 482 __kmpc_atomic_float16_wr 483 @endcode 484 485 Functions for Complex types 486 --------------------------- 487 Functions for complex types whose component floating point variables are of size 488 4,8,10 or 16 bytes. The names here are based on the size of the component float, 489 *not* the size of the complex type. So `__kmpc_atomc_cmplx8_add` is an operation 490 on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`. 491 492 @code 493 __kmpc_atomic_cmplx4_add 494 __kmpc_atomic_cmplx4_add_cmplx8 495 __kmpc_atomic_cmplx4_add_cpt 496 __kmpc_atomic_cmplx4_div 497 __kmpc_atomic_cmplx4_div_cmplx8 498 __kmpc_atomic_cmplx4_div_cpt 499 __kmpc_atomic_cmplx4_div_cpt_rev 500 __kmpc_atomic_cmplx4_div_rev 501 __kmpc_atomic_cmplx4_mul 502 __kmpc_atomic_cmplx4_mul_cmplx8 503 __kmpc_atomic_cmplx4_mul_cpt 504 __kmpc_atomic_cmplx4_rd 505 __kmpc_atomic_cmplx4_sub 506 __kmpc_atomic_cmplx4_sub_cmplx8 507 __kmpc_atomic_cmplx4_sub_cpt 508 __kmpc_atomic_cmplx4_sub_cpt_rev 509 __kmpc_atomic_cmplx4_sub_rev 510 __kmpc_atomic_cmplx4_swp 511 __kmpc_atomic_cmplx4_wr 512 __kmpc_atomic_cmplx8_add 513 __kmpc_atomic_cmplx8_add_cpt 514 __kmpc_atomic_cmplx8_div 515 __kmpc_atomic_cmplx8_div_cpt 516 __kmpc_atomic_cmplx8_div_cpt_rev 517 __kmpc_atomic_cmplx8_div_rev 518 __kmpc_atomic_cmplx8_mul 519 __kmpc_atomic_cmplx8_mul_cpt 520 __kmpc_atomic_cmplx8_rd 521 __kmpc_atomic_cmplx8_sub 522 __kmpc_atomic_cmplx8_sub_cpt 523 __kmpc_atomic_cmplx8_sub_cpt_rev 524 __kmpc_atomic_cmplx8_sub_rev 525 __kmpc_atomic_cmplx8_swp 526 __kmpc_atomic_cmplx8_wr 527 __kmpc_atomic_cmplx10_add 528 __kmpc_atomic_cmplx10_add_cpt 529 __kmpc_atomic_cmplx10_div 530 __kmpc_atomic_cmplx10_div_cpt 531 __kmpc_atomic_cmplx10_div_cpt_rev 532 __kmpc_atomic_cmplx10_div_rev 533 __kmpc_atomic_cmplx10_mul 534 __kmpc_atomic_cmplx10_mul_cpt 535 __kmpc_atomic_cmplx10_rd 536 __kmpc_atomic_cmplx10_sub 537 __kmpc_atomic_cmplx10_sub_cpt 538 __kmpc_atomic_cmplx10_sub_cpt_rev 539 __kmpc_atomic_cmplx10_sub_rev 540 __kmpc_atomic_cmplx10_swp 541 __kmpc_atomic_cmplx10_wr 542 __kmpc_atomic_cmplx16_add 543 __kmpc_atomic_cmplx16_add_cpt 544 __kmpc_atomic_cmplx16_div 545 __kmpc_atomic_cmplx16_div_cpt 546 __kmpc_atomic_cmplx16_div_cpt_rev 547 __kmpc_atomic_cmplx16_div_rev 548 __kmpc_atomic_cmplx16_mul 549 __kmpc_atomic_cmplx16_mul_cpt 550 __kmpc_atomic_cmplx16_rd 551 __kmpc_atomic_cmplx16_sub 552 __kmpc_atomic_cmplx16_sub_cpt 553 __kmpc_atomic_cmplx16_sub_cpt_rev 554 __kmpc_atomic_cmplx16_swp 555 __kmpc_atomic_cmplx16_wr 556 @endcode 557 */ 558 559 /*! 560 @ingroup ATOMIC_OPS 561 @{ 562 */ 563 564 /* 565 * Global vars 566 */ 567 568 #ifndef KMP_GOMP_COMPAT 569 int __kmp_atomic_mode = 1; // Intel perf 570 #else 571 int __kmp_atomic_mode = 2; // GOMP compatibility 572 #endif /* KMP_GOMP_COMPAT */ 573 574 KMP_ALIGN(128) 575 576 // Control access to all user coded atomics in Gnu compat mode 577 kmp_atomic_lock_t __kmp_atomic_lock; 578 // Control access to all user coded atomics for 1-byte fixed data types 579 kmp_atomic_lock_t __kmp_atomic_lock_1i; 580 // Control access to all user coded atomics for 2-byte fixed data types 581 kmp_atomic_lock_t __kmp_atomic_lock_2i; 582 // Control access to all user coded atomics for 4-byte fixed data types 583 kmp_atomic_lock_t __kmp_atomic_lock_4i; 584 // Control access to all user coded atomics for kmp_real32 data type 585 kmp_atomic_lock_t __kmp_atomic_lock_4r; 586 // Control access to all user coded atomics for 8-byte fixed data types 587 kmp_atomic_lock_t __kmp_atomic_lock_8i; 588 // Control access to all user coded atomics for kmp_real64 data type 589 kmp_atomic_lock_t __kmp_atomic_lock_8r; 590 // Control access to all user coded atomics for complex byte data type 591 kmp_atomic_lock_t __kmp_atomic_lock_8c; 592 // Control access to all user coded atomics for long double data type 593 kmp_atomic_lock_t __kmp_atomic_lock_10r; 594 // Control access to all user coded atomics for _Quad data type 595 kmp_atomic_lock_t __kmp_atomic_lock_16r; 596 // Control access to all user coded atomics for double complex data type 597 kmp_atomic_lock_t __kmp_atomic_lock_16c; 598 // Control access to all user coded atomics for long double complex type 599 kmp_atomic_lock_t __kmp_atomic_lock_20c; 600 // Control access to all user coded atomics for _Quad complex data type 601 kmp_atomic_lock_t __kmp_atomic_lock_32c; 602 603 /* 2007-03-02: 604 Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug 605 on *_32 and *_32e. This is just a temporary workaround for the problem. It 606 seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines 607 in assembler language. */ 608 #define KMP_ATOMIC_VOLATILE volatile 609 610 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD 611 612 static inline void operator+=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 613 lhs.q += rhs.q; 614 }; 615 static inline void operator-=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 616 lhs.q -= rhs.q; 617 }; 618 static inline void operator*=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 619 lhs.q *= rhs.q; 620 }; 621 static inline void operator/=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 622 lhs.q /= rhs.q; 623 }; 624 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) { 625 return lhs.q < rhs.q; 626 } 627 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) { 628 return lhs.q > rhs.q; 629 } 630 631 static inline void operator+=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 632 lhs.q += rhs.q; 633 }; 634 static inline void operator-=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 635 lhs.q -= rhs.q; 636 }; 637 static inline void operator*=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 638 lhs.q *= rhs.q; 639 }; 640 static inline void operator/=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 641 lhs.q /= rhs.q; 642 }; 643 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) { 644 return lhs.q < rhs.q; 645 } 646 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) { 647 return lhs.q > rhs.q; 648 } 649 650 static inline void operator+=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 651 lhs.q += rhs.q; 652 }; 653 static inline void operator-=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 654 lhs.q -= rhs.q; 655 }; 656 static inline void operator*=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 657 lhs.q *= rhs.q; 658 }; 659 static inline void operator/=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 660 lhs.q /= rhs.q; 661 }; 662 663 static inline void operator+=(kmp_cmplx128_a16_t &lhs, 664 kmp_cmplx128_a16_t &rhs) { 665 lhs.q += rhs.q; 666 }; 667 static inline void operator-=(kmp_cmplx128_a16_t &lhs, 668 kmp_cmplx128_a16_t &rhs) { 669 lhs.q -= rhs.q; 670 }; 671 static inline void operator*=(kmp_cmplx128_a16_t &lhs, 672 kmp_cmplx128_a16_t &rhs) { 673 lhs.q *= rhs.q; 674 }; 675 static inline void operator/=(kmp_cmplx128_a16_t &lhs, 676 kmp_cmplx128_a16_t &rhs) { 677 lhs.q /= rhs.q; 678 }; 679 680 #endif 681 682 // ATOMIC implementation routines ----------------------------------------- 683 // One routine for each operation and operand type. 684 // All routines declarations looks like 685 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs ); 686 687 #define KMP_CHECK_GTID \ 688 if (gtid == KMP_GTID_UNKNOWN) { \ 689 gtid = __kmp_entry_gtid(); \ 690 } // check and get gtid when needed 691 692 // Beginning of a definition (provides name, parameters, gebug trace) 693 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 694 // fixed) 695 // OP_ID - operation identifier (add, sub, mul, ...) 696 // TYPE - operands' type 697 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 698 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 699 TYPE *lhs, TYPE rhs) { \ 700 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 701 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 702 703 // ------------------------------------------------------------------------ 704 // Lock variables used for critical sections for various size operands 705 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat 706 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char 707 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short 708 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int 709 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float 710 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int 711 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double 712 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex 713 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double 714 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad 715 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex 716 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex 717 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex 718 719 // ------------------------------------------------------------------------ 720 // Operation on *lhs, rhs bound by critical section 721 // OP - operator (it's supposed to contain an assignment) 722 // LCK_ID - lock identifier 723 // Note: don't check gtid as it should always be valid 724 // 1, 2-byte - expect valid parameter, other - check before this macro 725 #define OP_CRITICAL(OP, LCK_ID) \ 726 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 727 \ 728 (*lhs) OP(rhs); \ 729 \ 730 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 731 732 // ------------------------------------------------------------------------ 733 // For GNU compatibility, we may need to use a critical section, 734 // even though it is not required by the ISA. 735 // 736 // On IA-32 architecture, all atomic operations except for fixed 4 byte add, 737 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common 738 // critical section. On Intel(R) 64, all atomic operations are done with fetch 739 // and add or compare and exchange. Therefore, the FLAG parameter to this 740 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which 741 // require a critical section, where we predict that they will be implemented 742 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()). 743 // 744 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct, 745 // the FLAG parameter should always be 1. If we know that we will be using 746 // a critical section, then we want to make certain that we use the generic 747 // lock __kmp_atomic_lock to protect the atomic update, and not of of the 748 // locks that are specialized based upon the size or type of the data. 749 // 750 // If FLAG is 0, then we are relying on dead code elimination by the build 751 // compiler to get rid of the useless block of code, and save a needless 752 // branch at runtime. 753 754 #ifdef KMP_GOMP_COMPAT 755 #define OP_GOMP_CRITICAL(OP, FLAG) \ 756 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 757 KMP_CHECK_GTID; \ 758 OP_CRITICAL(OP, 0); \ 759 return; \ 760 } 761 #else 762 #define OP_GOMP_CRITICAL(OP, FLAG) 763 #endif /* KMP_GOMP_COMPAT */ 764 765 #if KMP_MIC 766 #define KMP_DO_PAUSE _mm_delay_32(1) 767 #else 768 #define KMP_DO_PAUSE KMP_CPU_PAUSE() 769 #endif /* KMP_MIC */ 770 771 // ------------------------------------------------------------------------ 772 // Operation on *lhs, rhs using "compare_and_store" routine 773 // TYPE - operands' type 774 // BITS - size in bits, used to distinguish low level calls 775 // OP - operator 776 #define OP_CMPXCHG(TYPE, BITS, OP) \ 777 { \ 778 TYPE old_value, new_value; \ 779 old_value = *(TYPE volatile *)lhs; \ 780 new_value = old_value OP rhs; \ 781 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 782 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 783 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 784 KMP_DO_PAUSE; \ 785 \ 786 old_value = *(TYPE volatile *)lhs; \ 787 new_value = old_value OP rhs; \ 788 } \ 789 } 790 791 #if USE_CMPXCHG_FIX 792 // 2007-06-25: 793 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32 794 // and win_32e are affected (I verified the asm). Compiler ignores the volatile 795 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the 796 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of 797 // the workaround. 798 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 799 { \ 800 struct _sss { \ 801 TYPE cmp; \ 802 kmp_int##BITS *vvv; \ 803 }; \ 804 struct _sss old_value, new_value; \ 805 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \ 806 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \ 807 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 808 new_value.cmp = old_value.cmp OP rhs; \ 809 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 810 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ 811 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \ 812 KMP_DO_PAUSE; \ 813 \ 814 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 815 new_value.cmp = old_value.cmp OP rhs; \ 816 } \ 817 } 818 // end of the first part of the workaround for C78287 819 #endif // USE_CMPXCHG_FIX 820 821 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 822 823 // ------------------------------------------------------------------------ 824 // X86 or X86_64: no alignment problems ==================================== 825 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 826 GOMP_FLAG) \ 827 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 828 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 829 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 830 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 831 } 832 // ------------------------------------------------------------------------- 833 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 834 GOMP_FLAG) \ 835 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 836 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 837 OP_CMPXCHG(TYPE, BITS, OP) \ 838 } 839 #if USE_CMPXCHG_FIX 840 // ------------------------------------------------------------------------- 841 // workaround for C78287 (complex(kind=4) data type) 842 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ 843 MASK, GOMP_FLAG) \ 844 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 845 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 846 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 847 } 848 // end of the second part of the workaround for C78287 849 #endif 850 851 #else 852 // ------------------------------------------------------------------------- 853 // Code for other architectures that don't handle unaligned accesses. 854 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 855 GOMP_FLAG) \ 856 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 857 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 858 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 859 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 860 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 861 } else { \ 862 KMP_CHECK_GTID; \ 863 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 864 } \ 865 } 866 // ------------------------------------------------------------------------- 867 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 868 GOMP_FLAG) \ 869 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 870 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 871 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 872 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 873 } else { \ 874 KMP_CHECK_GTID; \ 875 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 876 } \ 877 } 878 #if USE_CMPXCHG_FIX 879 // ------------------------------------------------------------------------- 880 // workaround for C78287 (complex(kind=4) data type) 881 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ 882 MASK, GOMP_FLAG) \ 883 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 884 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 885 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 886 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 887 } else { \ 888 KMP_CHECK_GTID; \ 889 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 890 } \ 891 } 892 // end of the second part of the workaround for C78287 893 #endif // USE_CMPXCHG_FIX 894 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 895 896 // Routines for ATOMIC 4-byte operands addition and subtraction 897 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3, 898 0) // __kmpc_atomic_fixed4_add 899 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3, 900 0) // __kmpc_atomic_fixed4_sub 901 902 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3, 903 KMP_ARCH_X86) // __kmpc_atomic_float4_add 904 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3, 905 KMP_ARCH_X86) // __kmpc_atomic_float4_sub 906 907 // Routines for ATOMIC 8-byte operands addition and subtraction 908 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7, 909 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add 910 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7, 911 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub 912 913 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7, 914 KMP_ARCH_X86) // __kmpc_atomic_float8_add 915 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7, 916 KMP_ARCH_X86) // __kmpc_atomic_float8_sub 917 918 // ------------------------------------------------------------------------ 919 // Entries definition for integer operands 920 // TYPE_ID - operands type and size (fixed4, float4) 921 // OP_ID - operation identifier (add, sub, mul, ...) 922 // TYPE - operand type 923 // BITS - size in bits, used to distinguish low level calls 924 // OP - operator (used in critical section) 925 // LCK_ID - lock identifier, used to possibly distinguish lock variable 926 // MASK - used for alignment check 927 928 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG 929 // ------------------------------------------------------------------------ 930 // Routines for ATOMIC integer operands, other operators 931 // ------------------------------------------------------------------------ 932 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 933 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0, 934 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add 935 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0, 936 0) // __kmpc_atomic_fixed1_andb 937 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0, 938 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div 939 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0, 940 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div 941 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0, 942 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul 943 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0, 944 0) // __kmpc_atomic_fixed1_orb 945 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0, 946 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl 947 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0, 948 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr 949 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, 950 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr 951 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0, 952 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub 953 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0, 954 0) // __kmpc_atomic_fixed1_xor 955 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1, 956 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add 957 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1, 958 0) // __kmpc_atomic_fixed2_andb 959 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1, 960 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div 961 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1, 962 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div 963 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1, 964 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul 965 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1, 966 0) // __kmpc_atomic_fixed2_orb 967 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1, 968 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl 969 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1, 970 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr 971 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, 972 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr 973 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1, 974 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub 975 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1, 976 0) // __kmpc_atomic_fixed2_xor 977 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3, 978 0) // __kmpc_atomic_fixed4_andb 979 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3, 980 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div 981 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3, 982 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div 983 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3, 984 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul 985 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3, 986 0) // __kmpc_atomic_fixed4_orb 987 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3, 988 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl 989 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3, 990 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr 991 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, 992 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr 993 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3, 994 0) // __kmpc_atomic_fixed4_xor 995 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7, 996 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb 997 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7, 998 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div 999 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7, 1000 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div 1001 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7, 1002 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul 1003 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7, 1004 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb 1005 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7, 1006 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl 1007 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7, 1008 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr 1009 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, 1010 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr 1011 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7, 1012 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor 1013 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3, 1014 KMP_ARCH_X86) // __kmpc_atomic_float4_div 1015 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3, 1016 KMP_ARCH_X86) // __kmpc_atomic_float4_mul 1017 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7, 1018 KMP_ARCH_X86) // __kmpc_atomic_float8_div 1019 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7, 1020 KMP_ARCH_X86) // __kmpc_atomic_float8_mul 1021 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 1022 1023 /* ------------------------------------------------------------------------ */ 1024 /* Routines for C/C++ Reduction operators && and || */ 1025 1026 // ------------------------------------------------------------------------ 1027 // Need separate macros for &&, || because there is no combined assignment 1028 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used 1029 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1030 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1031 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1032 OP_CRITICAL(= *lhs OP, LCK_ID) \ 1033 } 1034 1035 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1036 1037 // ------------------------------------------------------------------------ 1038 // X86 or X86_64: no alignment problems =================================== 1039 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ 1040 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1041 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1042 OP_CMPXCHG(TYPE, BITS, OP) \ 1043 } 1044 1045 #else 1046 // ------------------------------------------------------------------------ 1047 // Code for other architectures that don't handle unaligned accesses. 1048 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ 1049 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1050 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1051 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1052 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1053 } else { \ 1054 KMP_CHECK_GTID; \ 1055 OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \ 1056 } \ 1057 } 1058 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1059 1060 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0, 1061 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl 1062 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0, 1063 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl 1064 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1, 1065 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl 1066 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1, 1067 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl 1068 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3, 1069 0) // __kmpc_atomic_fixed4_andl 1070 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3, 1071 0) // __kmpc_atomic_fixed4_orl 1072 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7, 1073 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl 1074 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7, 1075 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl 1076 1077 /* ------------------------------------------------------------------------- */ 1078 /* Routines for Fortran operators that matched no one in C: */ 1079 /* MAX, MIN, .EQV., .NEQV. */ 1080 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */ 1081 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */ 1082 1083 // ------------------------------------------------------------------------- 1084 // MIN and MAX need separate macros 1085 // OP - operator to check if we need any actions? 1086 #define MIN_MAX_CRITSECT(OP, LCK_ID) \ 1087 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1088 \ 1089 if (*lhs OP rhs) { /* still need actions? */ \ 1090 *lhs = rhs; \ 1091 } \ 1092 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1093 1094 // ------------------------------------------------------------------------- 1095 #ifdef KMP_GOMP_COMPAT 1096 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \ 1097 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1098 KMP_CHECK_GTID; \ 1099 MIN_MAX_CRITSECT(OP, 0); \ 1100 return; \ 1101 } 1102 #else 1103 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) 1104 #endif /* KMP_GOMP_COMPAT */ 1105 1106 // ------------------------------------------------------------------------- 1107 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ 1108 { \ 1109 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1110 TYPE old_value; \ 1111 temp_val = *lhs; \ 1112 old_value = temp_val; \ 1113 while (old_value OP rhs && /* still need actions? */ \ 1114 !KMP_COMPARE_AND_STORE_ACQ##BITS( \ 1115 (kmp_int##BITS *)lhs, \ 1116 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 1117 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ 1118 KMP_CPU_PAUSE(); \ 1119 temp_val = *lhs; \ 1120 old_value = temp_val; \ 1121 } \ 1122 } 1123 1124 // ------------------------------------------------------------------------- 1125 // 1-byte, 2-byte operands - use critical section 1126 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1127 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1128 if (*lhs OP rhs) { /* need actions? */ \ 1129 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1130 MIN_MAX_CRITSECT(OP, LCK_ID) \ 1131 } \ 1132 } 1133 1134 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1135 1136 // ------------------------------------------------------------------------- 1137 // X86 or X86_64: no alignment problems ==================================== 1138 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1139 GOMP_FLAG) \ 1140 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1141 if (*lhs OP rhs) { \ 1142 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1143 MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ 1144 } \ 1145 } 1146 1147 #else 1148 // ------------------------------------------------------------------------- 1149 // Code for other architectures that don't handle unaligned accesses. 1150 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1151 GOMP_FLAG) \ 1152 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1153 if (*lhs OP rhs) { \ 1154 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1155 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1156 MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1157 } else { \ 1158 KMP_CHECK_GTID; \ 1159 MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \ 1160 } \ 1161 } \ 1162 } 1163 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1164 1165 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0, 1166 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max 1167 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0, 1168 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min 1169 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1, 1170 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max 1171 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1, 1172 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min 1173 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3, 1174 0) // __kmpc_atomic_fixed4_max 1175 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3, 1176 0) // __kmpc_atomic_fixed4_min 1177 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7, 1178 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max 1179 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7, 1180 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min 1181 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3, 1182 KMP_ARCH_X86) // __kmpc_atomic_float4_max 1183 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3, 1184 KMP_ARCH_X86) // __kmpc_atomic_float4_min 1185 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7, 1186 KMP_ARCH_X86) // __kmpc_atomic_float8_max 1187 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7, 1188 KMP_ARCH_X86) // __kmpc_atomic_float8_min 1189 #if KMP_HAVE_QUAD 1190 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r, 1191 1) // __kmpc_atomic_float16_max 1192 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r, 1193 1) // __kmpc_atomic_float16_min 1194 #if (KMP_ARCH_X86) 1195 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r, 1196 1) // __kmpc_atomic_float16_max_a16 1197 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r, 1198 1) // __kmpc_atomic_float16_min_a16 1199 #endif 1200 #endif 1201 // ------------------------------------------------------------------------ 1202 // Need separate macros for .EQV. because of the need of complement (~) 1203 // OP ignored for critical sections, ^=~ used instead 1204 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1205 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1206 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \ 1207 OP_CRITICAL(^= ~, LCK_ID) /* send assignment and complement */ \ 1208 } 1209 1210 // ------------------------------------------------------------------------ 1211 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1212 // ------------------------------------------------------------------------ 1213 // X86 or X86_64: no alignment problems =================================== 1214 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1215 GOMP_FLAG) \ 1216 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1217 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \ 1218 OP_CMPXCHG(TYPE, BITS, OP) \ 1219 } 1220 // ------------------------------------------------------------------------ 1221 #else 1222 // ------------------------------------------------------------------------ 1223 // Code for other architectures that don't handle unaligned accesses. 1224 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1225 GOMP_FLAG) \ 1226 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1227 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) \ 1228 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1229 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1230 } else { \ 1231 KMP_CHECK_GTID; \ 1232 OP_CRITICAL(^= ~, LCK_ID) /* unaligned address - use critical */ \ 1233 } \ 1234 } 1235 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1236 1237 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0, 1238 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv 1239 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1, 1240 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv 1241 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3, 1242 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv 1243 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7, 1244 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv 1245 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, 1246 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv 1247 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, 1248 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv 1249 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, 1250 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv 1251 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, 1252 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv 1253 1254 // ------------------------------------------------------------------------ 1255 // Routines for Extended types: long double, _Quad, complex flavours (use 1256 // critical section) 1257 // TYPE_ID, OP_ID, TYPE - detailed above 1258 // OP - operator 1259 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1260 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1261 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1262 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \ 1263 OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \ 1264 } 1265 1266 /* ------------------------------------------------------------------------- */ 1267 // routines for long double type 1268 ATOMIC_CRITICAL(float10, add, long double, +, 10r, 1269 1) // __kmpc_atomic_float10_add 1270 ATOMIC_CRITICAL(float10, sub, long double, -, 10r, 1271 1) // __kmpc_atomic_float10_sub 1272 ATOMIC_CRITICAL(float10, mul, long double, *, 10r, 1273 1) // __kmpc_atomic_float10_mul 1274 ATOMIC_CRITICAL(float10, div, long double, /, 10r, 1275 1) // __kmpc_atomic_float10_div 1276 #if KMP_HAVE_QUAD 1277 // routines for _Quad type 1278 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r, 1279 1) // __kmpc_atomic_float16_add 1280 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r, 1281 1) // __kmpc_atomic_float16_sub 1282 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r, 1283 1) // __kmpc_atomic_float16_mul 1284 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r, 1285 1) // __kmpc_atomic_float16_div 1286 #if (KMP_ARCH_X86) 1287 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r, 1288 1) // __kmpc_atomic_float16_add_a16 1289 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r, 1290 1) // __kmpc_atomic_float16_sub_a16 1291 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r, 1292 1) // __kmpc_atomic_float16_mul_a16 1293 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r, 1294 1) // __kmpc_atomic_float16_div_a16 1295 #endif 1296 #endif 1297 // routines for complex types 1298 1299 #if USE_CMPXCHG_FIX 1300 // workaround for C78287 (complex(kind=4) data type) 1301 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1302 1) // __kmpc_atomic_cmplx4_add 1303 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1304 1) // __kmpc_atomic_cmplx4_sub 1305 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1306 1) // __kmpc_atomic_cmplx4_mul 1307 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1308 1) // __kmpc_atomic_cmplx4_div 1309 // end of the workaround for C78287 1310 #else 1311 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add 1312 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub 1313 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul 1314 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div 1315 #endif // USE_CMPXCHG_FIX 1316 1317 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add 1318 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub 1319 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul 1320 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div 1321 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c, 1322 1) // __kmpc_atomic_cmplx10_add 1323 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c, 1324 1) // __kmpc_atomic_cmplx10_sub 1325 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c, 1326 1) // __kmpc_atomic_cmplx10_mul 1327 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c, 1328 1) // __kmpc_atomic_cmplx10_div 1329 #if KMP_HAVE_QUAD 1330 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c, 1331 1) // __kmpc_atomic_cmplx16_add 1332 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c, 1333 1) // __kmpc_atomic_cmplx16_sub 1334 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c, 1335 1) // __kmpc_atomic_cmplx16_mul 1336 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c, 1337 1) // __kmpc_atomic_cmplx16_div 1338 #if (KMP_ARCH_X86) 1339 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1340 1) // __kmpc_atomic_cmplx16_add_a16 1341 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1342 1) // __kmpc_atomic_cmplx16_sub_a16 1343 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1344 1) // __kmpc_atomic_cmplx16_mul_a16 1345 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1346 1) // __kmpc_atomic_cmplx16_div_a16 1347 #endif 1348 #endif 1349 1350 #if OMP_40_ENABLED 1351 1352 // OpenMP 4.0: x = expr binop x for non-commutative operations. 1353 // Supported only on IA-32 architecture and Intel(R) 64 1354 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1355 1356 // ------------------------------------------------------------------------ 1357 // Operation on *lhs, rhs bound by critical section 1358 // OP - operator (it's supposed to contain an assignment) 1359 // LCK_ID - lock identifier 1360 // Note: don't check gtid as it should always be valid 1361 // 1, 2-byte - expect valid parameter, other - check before this macro 1362 #define OP_CRITICAL_REV(OP, LCK_ID) \ 1363 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1364 \ 1365 (*lhs) = (rhs)OP(*lhs); \ 1366 \ 1367 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1368 1369 #ifdef KMP_GOMP_COMPAT 1370 #define OP_GOMP_CRITICAL_REV(OP, FLAG) \ 1371 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1372 KMP_CHECK_GTID; \ 1373 OP_CRITICAL_REV(OP, 0); \ 1374 return; \ 1375 } 1376 #else 1377 #define OP_GOMP_CRITICAL_REV(OP, FLAG) 1378 #endif /* KMP_GOMP_COMPAT */ 1379 1380 // Beginning of a definition (provides name, parameters, gebug trace) 1381 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 1382 // fixed) 1383 // OP_ID - operation identifier (add, sub, mul, ...) 1384 // TYPE - operands' type 1385 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 1386 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \ 1387 TYPE *lhs, TYPE rhs) { \ 1388 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1389 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid)); 1390 1391 // ------------------------------------------------------------------------ 1392 // Operation on *lhs, rhs using "compare_and_store" routine 1393 // TYPE - operands' type 1394 // BITS - size in bits, used to distinguish low level calls 1395 // OP - operator 1396 // Note: temp_val introduced in order to force the compiler to read 1397 // *lhs only once (w/o it the compiler reads *lhs twice) 1398 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1399 { \ 1400 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1401 TYPE old_value, new_value; \ 1402 temp_val = *lhs; \ 1403 old_value = temp_val; \ 1404 new_value = rhs OP old_value; \ 1405 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 1406 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 1407 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 1408 KMP_DO_PAUSE; \ 1409 \ 1410 temp_val = *lhs; \ 1411 old_value = temp_val; \ 1412 new_value = rhs OP old_value; \ 1413 } \ 1414 } 1415 1416 // ------------------------------------------------------------------------- 1417 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \ 1418 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ 1419 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1420 OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1421 } 1422 1423 // ------------------------------------------------------------------------ 1424 // Entries definition for integer operands 1425 // TYPE_ID - operands type and size (fixed4, float4) 1426 // OP_ID - operation identifier (add, sub, mul, ...) 1427 // TYPE - operand type 1428 // BITS - size in bits, used to distinguish low level calls 1429 // OP - operator (used in critical section) 1430 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1431 1432 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG 1433 // ------------------------------------------------------------------------ 1434 // Routines for ATOMIC integer operands, other operators 1435 // ------------------------------------------------------------------------ 1436 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG 1437 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i, 1438 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev 1439 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i, 1440 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev 1441 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i, 1442 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev 1443 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i, 1444 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev 1445 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i, 1446 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev 1447 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i, 1448 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev 1449 1450 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i, 1451 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev 1452 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i, 1453 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev 1454 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i, 1455 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev 1456 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i, 1457 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev 1458 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1459 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev 1460 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i, 1461 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev 1462 1463 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i, 1464 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev 1465 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i, 1466 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev 1467 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i, 1468 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev 1469 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i, 1470 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev 1471 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i, 1472 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev 1473 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i, 1474 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev 1475 1476 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i, 1477 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev 1478 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i, 1479 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev 1480 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i, 1481 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev 1482 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i, 1483 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev 1484 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i, 1485 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev 1486 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i, 1487 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev 1488 1489 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r, 1490 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev 1491 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r, 1492 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev 1493 1494 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r, 1495 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev 1496 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r, 1497 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev 1498 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG 1499 1500 // ------------------------------------------------------------------------ 1501 // Routines for Extended types: long double, _Quad, complex flavours (use 1502 // critical section) 1503 // TYPE_ID, OP_ID, TYPE - detailed above 1504 // OP - operator 1505 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1506 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1507 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ 1508 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1509 OP_CRITICAL_REV(OP, LCK_ID) \ 1510 } 1511 1512 /* ------------------------------------------------------------------------- */ 1513 // routines for long double type 1514 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r, 1515 1) // __kmpc_atomic_float10_sub_rev 1516 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r, 1517 1) // __kmpc_atomic_float10_div_rev 1518 #if KMP_HAVE_QUAD 1519 // routines for _Quad type 1520 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r, 1521 1) // __kmpc_atomic_float16_sub_rev 1522 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r, 1523 1) // __kmpc_atomic_float16_div_rev 1524 #if (KMP_ARCH_X86) 1525 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r, 1526 1) // __kmpc_atomic_float16_sub_a16_rev 1527 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r, 1528 1) // __kmpc_atomic_float16_div_a16_rev 1529 #endif 1530 #endif 1531 1532 // routines for complex types 1533 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c, 1534 1) // __kmpc_atomic_cmplx4_sub_rev 1535 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c, 1536 1) // __kmpc_atomic_cmplx4_div_rev 1537 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c, 1538 1) // __kmpc_atomic_cmplx8_sub_rev 1539 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c, 1540 1) // __kmpc_atomic_cmplx8_div_rev 1541 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c, 1542 1) // __kmpc_atomic_cmplx10_sub_rev 1543 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c, 1544 1) // __kmpc_atomic_cmplx10_div_rev 1545 #if KMP_HAVE_QUAD 1546 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c, 1547 1) // __kmpc_atomic_cmplx16_sub_rev 1548 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c, 1549 1) // __kmpc_atomic_cmplx16_div_rev 1550 #if (KMP_ARCH_X86) 1551 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1552 1) // __kmpc_atomic_cmplx16_sub_a16_rev 1553 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1554 1) // __kmpc_atomic_cmplx16_div_a16_rev 1555 #endif 1556 #endif 1557 1558 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 1559 // End of OpenMP 4.0: x = expr binop x for non-commutative operations. 1560 1561 #endif // OMP_40_ENABLED 1562 1563 /* ------------------------------------------------------------------------ */ 1564 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */ 1565 /* Note: in order to reduce the total number of types combinations */ 1566 /* it is supposed that compiler converts RHS to longest floating type,*/ 1567 /* that is _Quad, before call to any of these routines */ 1568 /* Conversion to _Quad will be done by the compiler during calculation, */ 1569 /* conversion back to TYPE - before the assignment, like: */ 1570 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */ 1571 /* Performance penalty expected because of SW emulation use */ 1572 /* ------------------------------------------------------------------------ */ 1573 1574 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1575 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ 1576 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \ 1577 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1578 KA_TRACE(100, \ 1579 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ 1580 gtid)); 1581 1582 // ------------------------------------------------------------------------- 1583 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \ 1584 GOMP_FLAG) \ 1585 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1586 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \ 1587 OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \ 1588 } 1589 1590 // ------------------------------------------------------------------------- 1591 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1592 // ------------------------------------------------------------------------- 1593 // X86 or X86_64: no alignment problems ==================================== 1594 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1595 LCK_ID, MASK, GOMP_FLAG) \ 1596 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1597 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1598 OP_CMPXCHG(TYPE, BITS, OP) \ 1599 } 1600 // ------------------------------------------------------------------------- 1601 #else 1602 // ------------------------------------------------------------------------ 1603 // Code for other architectures that don't handle unaligned accesses. 1604 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1605 LCK_ID, MASK, GOMP_FLAG) \ 1606 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1607 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1608 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1609 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1610 } else { \ 1611 KMP_CHECK_GTID; \ 1612 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 1613 } \ 1614 } 1615 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1616 1617 // ------------------------------------------------------------------------- 1618 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1619 // ------------------------------------------------------------------------- 1620 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 1621 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 1622 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1623 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1624 OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1625 } 1626 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 1627 LCK_ID, GOMP_FLAG) \ 1628 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1629 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1630 OP_CRITICAL_REV(OP, LCK_ID) \ 1631 } 1632 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1633 1634 // RHS=float8 1635 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, 1636 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8 1637 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, 1638 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8 1639 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, 1640 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8 1641 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, 1642 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8 1643 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 1644 0) // __kmpc_atomic_fixed4_mul_float8 1645 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 1646 0) // __kmpc_atomic_fixed4_div_float8 1647 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, 1648 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8 1649 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, 1650 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8 1651 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, 1652 KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8 1653 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, 1654 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8 1655 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, 1656 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8 1657 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, 1658 KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8 1659 1660 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not 1661 // use them) 1662 #if KMP_HAVE_QUAD 1663 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0, 1664 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp 1665 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0, 1666 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp 1667 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, 1668 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp 1669 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0, 1670 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp 1671 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, 1672 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp 1673 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0, 1674 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp 1675 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0, 1676 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp 1677 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, 1678 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp 1679 1680 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1, 1681 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp 1682 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1, 1683 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp 1684 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, 1685 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp 1686 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1, 1687 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp 1688 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, 1689 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp 1690 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1, 1691 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp 1692 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1, 1693 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp 1694 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, 1695 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp 1696 1697 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 1698 0) // __kmpc_atomic_fixed4_add_fp 1699 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3, 1700 0) // __kmpc_atomic_fixed4u_add_fp 1701 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 1702 0) // __kmpc_atomic_fixed4_sub_fp 1703 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3, 1704 0) // __kmpc_atomic_fixed4u_sub_fp 1705 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 1706 0) // __kmpc_atomic_fixed4_mul_fp 1707 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3, 1708 0) // __kmpc_atomic_fixed4u_mul_fp 1709 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 1710 0) // __kmpc_atomic_fixed4_div_fp 1711 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 1712 0) // __kmpc_atomic_fixed4u_div_fp 1713 1714 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, 1715 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp 1716 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7, 1717 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp 1718 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, 1719 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp 1720 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7, 1721 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp 1722 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, 1723 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp 1724 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7, 1725 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp 1726 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, 1727 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp 1728 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, 1729 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp 1730 1731 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, 1732 KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp 1733 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, 1734 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp 1735 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, 1736 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp 1737 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, 1738 KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp 1739 1740 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, 1741 KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp 1742 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, 1743 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp 1744 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, 1745 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp 1746 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, 1747 KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp 1748 1749 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r, 1750 1) // __kmpc_atomic_float10_add_fp 1751 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r, 1752 1) // __kmpc_atomic_float10_sub_fp 1753 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r, 1754 1) // __kmpc_atomic_float10_mul_fp 1755 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r, 1756 1) // __kmpc_atomic_float10_div_fp 1757 1758 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1759 // Reverse operations 1760 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0, 1761 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp 1762 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0, 1763 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp 1764 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0, 1765 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp 1766 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0, 1767 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp 1768 1769 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1, 1770 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp 1771 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1, 1772 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp 1773 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1, 1774 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp 1775 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1, 1776 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp 1777 1778 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3, 1779 0) // __kmpc_atomic_fixed4_sub_rev_fp 1780 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3, 1781 0) // __kmpc_atomic_fixed4u_sub_rev_fp 1782 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3, 1783 0) // __kmpc_atomic_fixed4_div_rev_fp 1784 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3, 1785 0) // __kmpc_atomic_fixed4u_div_rev_fp 1786 1787 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7, 1788 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp 1789 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7, 1790 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp 1791 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7, 1792 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp 1793 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7, 1794 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp 1795 1796 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3, 1797 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp 1798 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3, 1799 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp 1800 1801 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7, 1802 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp 1803 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7, 1804 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp 1805 1806 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r, 1807 1) // __kmpc_atomic_float10_sub_rev_fp 1808 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r, 1809 1) // __kmpc_atomic_float10_div_rev_fp 1810 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1811 1812 #endif 1813 1814 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1815 // ------------------------------------------------------------------------ 1816 // X86 or X86_64: no alignment problems ==================================== 1817 #if USE_CMPXCHG_FIX 1818 // workaround for C78287 (complex(kind=4) data type) 1819 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1820 LCK_ID, MASK, GOMP_FLAG) \ 1821 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1822 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1823 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 1824 } 1825 // end of the second part of the workaround for C78287 1826 #else 1827 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1828 LCK_ID, MASK, GOMP_FLAG) \ 1829 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1830 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1831 OP_CMPXCHG(TYPE, BITS, OP) \ 1832 } 1833 #endif // USE_CMPXCHG_FIX 1834 #else 1835 // ------------------------------------------------------------------------ 1836 // Code for other architectures that don't handle unaligned accesses. 1837 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1838 LCK_ID, MASK, GOMP_FLAG) \ 1839 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1840 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1841 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1842 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1843 } else { \ 1844 KMP_CHECK_GTID; \ 1845 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 1846 } \ 1847 } 1848 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1849 1850 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 1851 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8 1852 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 1853 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8 1854 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 1855 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8 1856 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 1857 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8 1858 1859 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 1860 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1861 1862 // ------------------------------------------------------------------------ 1863 // Atomic READ routines 1864 1865 // ------------------------------------------------------------------------ 1866 // Beginning of a definition (provides name, parameters, gebug trace) 1867 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 1868 // fixed) 1869 // OP_ID - operation identifier (add, sub, mul, ...) 1870 // TYPE - operands' type 1871 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 1872 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 1873 TYPE *loc) { \ 1874 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1875 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 1876 1877 // ------------------------------------------------------------------------ 1878 // Operation on *lhs, rhs using "compare_and_store_ret" routine 1879 // TYPE - operands' type 1880 // BITS - size in bits, used to distinguish low level calls 1881 // OP - operator 1882 // Note: temp_val introduced in order to force the compiler to read 1883 // *lhs only once (w/o it the compiler reads *lhs twice) 1884 // TODO: check if it is still necessary 1885 // Return old value regardless of the result of "compare & swap# operation 1886 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \ 1887 { \ 1888 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1889 union f_i_union { \ 1890 TYPE f_val; \ 1891 kmp_int##BITS i_val; \ 1892 }; \ 1893 union f_i_union old_value; \ 1894 temp_val = *loc; \ 1895 old_value.f_val = temp_val; \ 1896 old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \ 1897 (kmp_int##BITS *)loc, \ 1898 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \ 1899 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \ 1900 new_value = old_value.f_val; \ 1901 return new_value; \ 1902 } 1903 1904 // ------------------------------------------------------------------------- 1905 // Operation on *lhs, rhs bound by critical section 1906 // OP - operator (it's supposed to contain an assignment) 1907 // LCK_ID - lock identifier 1908 // Note: don't check gtid as it should always be valid 1909 // 1, 2-byte - expect valid parameter, other - check before this macro 1910 #define OP_CRITICAL_READ(OP, LCK_ID) \ 1911 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1912 \ 1913 new_value = (*loc); \ 1914 \ 1915 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1916 1917 // ------------------------------------------------------------------------- 1918 #ifdef KMP_GOMP_COMPAT 1919 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \ 1920 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1921 KMP_CHECK_GTID; \ 1922 OP_CRITICAL_READ(OP, 0); \ 1923 return new_value; \ 1924 } 1925 #else 1926 #define OP_GOMP_CRITICAL_READ(OP, FLAG) 1927 #endif /* KMP_GOMP_COMPAT */ 1928 1929 // ------------------------------------------------------------------------- 1930 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 1931 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1932 TYPE new_value; \ 1933 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ 1934 new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \ 1935 return new_value; \ 1936 } 1937 // ------------------------------------------------------------------------- 1938 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 1939 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1940 TYPE new_value; \ 1941 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ 1942 OP_CMPXCHG_READ(TYPE, BITS, OP) \ 1943 } 1944 // ------------------------------------------------------------------------ 1945 // Routines for Extended types: long double, _Quad, complex flavours (use 1946 // critical section) 1947 // TYPE_ID, OP_ID, TYPE - detailed above 1948 // OP - operator 1949 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1950 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1951 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1952 TYPE new_value; \ 1953 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \ 1954 OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \ 1955 return new_value; \ 1956 } 1957 1958 // ------------------------------------------------------------------------ 1959 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return 1960 // value doesn't work. 1961 // Let's return the read value through the additional parameter. 1962 #if (KMP_OS_WINDOWS) 1963 1964 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \ 1965 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1966 \ 1967 (*out) = (*loc); \ 1968 \ 1969 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1970 // ------------------------------------------------------------------------ 1971 #ifdef KMP_GOMP_COMPAT 1972 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \ 1973 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1974 KMP_CHECK_GTID; \ 1975 OP_CRITICAL_READ_WRK(OP, 0); \ 1976 } 1977 #else 1978 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) 1979 #endif /* KMP_GOMP_COMPAT */ 1980 // ------------------------------------------------------------------------ 1981 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ 1982 void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \ 1983 TYPE *loc) { \ 1984 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1985 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 1986 1987 // ------------------------------------------------------------------------ 1988 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1989 ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ 1990 OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \ 1991 OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \ 1992 } 1993 1994 #endif // KMP_OS_WINDOWS 1995 1996 // ------------------------------------------------------------------------ 1997 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 1998 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd 1999 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +, 2000 KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd 2001 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +, 2002 KMP_ARCH_X86) // __kmpc_atomic_float4_rd 2003 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +, 2004 KMP_ARCH_X86) // __kmpc_atomic_float8_rd 2005 2006 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic 2007 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +, 2008 KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd 2009 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +, 2010 KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd 2011 2012 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r, 2013 1) // __kmpc_atomic_float10_rd 2014 #if KMP_HAVE_QUAD 2015 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r, 2016 1) // __kmpc_atomic_float16_rd 2017 #endif // KMP_HAVE_QUAD 2018 2019 // Fix for CQ220361 on Windows* OS 2020 #if (KMP_OS_WINDOWS) 2021 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c, 2022 1) // __kmpc_atomic_cmplx4_rd 2023 #else 2024 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c, 2025 1) // __kmpc_atomic_cmplx4_rd 2026 #endif 2027 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c, 2028 1) // __kmpc_atomic_cmplx8_rd 2029 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c, 2030 1) // __kmpc_atomic_cmplx10_rd 2031 #if KMP_HAVE_QUAD 2032 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c, 2033 1) // __kmpc_atomic_cmplx16_rd 2034 #if (KMP_ARCH_X86) 2035 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r, 2036 1) // __kmpc_atomic_float16_a16_rd 2037 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 2038 1) // __kmpc_atomic_cmplx16_a16_rd 2039 #endif 2040 #endif 2041 2042 // ------------------------------------------------------------------------ 2043 // Atomic WRITE routines 2044 2045 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2046 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2047 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2048 KMP_XCHG_FIXED##BITS(lhs, rhs); \ 2049 } 2050 // ------------------------------------------------------------------------ 2051 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2052 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2053 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2054 KMP_XCHG_REAL##BITS(lhs, rhs); \ 2055 } 2056 2057 // ------------------------------------------------------------------------ 2058 // Operation on *lhs, rhs using "compare_and_store" routine 2059 // TYPE - operands' type 2060 // BITS - size in bits, used to distinguish low level calls 2061 // OP - operator 2062 // Note: temp_val introduced in order to force the compiler to read 2063 // *lhs only once (w/o it the compiler reads *lhs twice) 2064 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \ 2065 { \ 2066 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2067 TYPE old_value, new_value; \ 2068 temp_val = *lhs; \ 2069 old_value = temp_val; \ 2070 new_value = rhs; \ 2071 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2072 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2073 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2074 KMP_CPU_PAUSE(); \ 2075 \ 2076 temp_val = *lhs; \ 2077 old_value = temp_val; \ 2078 new_value = rhs; \ 2079 } \ 2080 } 2081 2082 // ------------------------------------------------------------------------- 2083 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2084 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2085 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2086 OP_CMPXCHG_WR(TYPE, BITS, OP) \ 2087 } 2088 2089 // ------------------------------------------------------------------------ 2090 // Routines for Extended types: long double, _Quad, complex flavours (use 2091 // critical section) 2092 // TYPE_ID, OP_ID, TYPE - detailed above 2093 // OP - operator 2094 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2095 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2096 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2097 OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \ 2098 OP_CRITICAL(OP, LCK_ID) /* send assignment */ \ 2099 } 2100 // ------------------------------------------------------------------------- 2101 2102 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =, 2103 KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr 2104 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =, 2105 KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr 2106 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =, 2107 KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr 2108 #if (KMP_ARCH_X86) 2109 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =, 2110 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr 2111 #else 2112 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =, 2113 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr 2114 #endif 2115 2116 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =, 2117 KMP_ARCH_X86) // __kmpc_atomic_float4_wr 2118 #if (KMP_ARCH_X86) 2119 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =, 2120 KMP_ARCH_X86) // __kmpc_atomic_float8_wr 2121 #else 2122 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =, 2123 KMP_ARCH_X86) // __kmpc_atomic_float8_wr 2124 #endif 2125 2126 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r, 2127 1) // __kmpc_atomic_float10_wr 2128 #if KMP_HAVE_QUAD 2129 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r, 2130 1) // __kmpc_atomic_float16_wr 2131 #endif 2132 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr 2133 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c, 2134 1) // __kmpc_atomic_cmplx8_wr 2135 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c, 2136 1) // __kmpc_atomic_cmplx10_wr 2137 #if KMP_HAVE_QUAD 2138 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c, 2139 1) // __kmpc_atomic_cmplx16_wr 2140 #if (KMP_ARCH_X86) 2141 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r, 2142 1) // __kmpc_atomic_float16_a16_wr 2143 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 2144 1) // __kmpc_atomic_cmplx16_a16_wr 2145 #endif 2146 #endif 2147 2148 // ------------------------------------------------------------------------ 2149 // Atomic CAPTURE routines 2150 2151 // Beginning of a definition (provides name, parameters, gebug trace) 2152 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 2153 // fixed) 2154 // OP_ID - operation identifier (add, sub, mul, ...) 2155 // TYPE - operands' type 2156 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 2157 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 2158 TYPE *lhs, TYPE rhs, int flag) { \ 2159 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2160 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2161 2162 // ------------------------------------------------------------------------- 2163 // Operation on *lhs, rhs bound by critical section 2164 // OP - operator (it's supposed to contain an assignment) 2165 // LCK_ID - lock identifier 2166 // Note: don't check gtid as it should always be valid 2167 // 1, 2-byte - expect valid parameter, other - check before this macro 2168 #define OP_CRITICAL_CPT(OP, LCK_ID) \ 2169 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2170 \ 2171 if (flag) { \ 2172 (*lhs) OP rhs; \ 2173 new_value = (*lhs); \ 2174 } else { \ 2175 new_value = (*lhs); \ 2176 (*lhs) OP rhs; \ 2177 } \ 2178 \ 2179 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2180 return new_value; 2181 2182 // ------------------------------------------------------------------------ 2183 #ifdef KMP_GOMP_COMPAT 2184 #define OP_GOMP_CRITICAL_CPT(OP, FLAG) \ 2185 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2186 KMP_CHECK_GTID; \ 2187 OP_CRITICAL_CPT(OP## =, 0); \ 2188 } 2189 #else 2190 #define OP_GOMP_CRITICAL_CPT(OP, FLAG) 2191 #endif /* KMP_GOMP_COMPAT */ 2192 2193 // ------------------------------------------------------------------------ 2194 // Operation on *lhs, rhs using "compare_and_store" routine 2195 // TYPE - operands' type 2196 // BITS - size in bits, used to distinguish low level calls 2197 // OP - operator 2198 // Note: temp_val introduced in order to force the compiler to read 2199 // *lhs only once (w/o it the compiler reads *lhs twice) 2200 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2201 { \ 2202 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2203 TYPE old_value, new_value; \ 2204 temp_val = *lhs; \ 2205 old_value = temp_val; \ 2206 new_value = old_value OP rhs; \ 2207 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2208 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2209 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2210 KMP_CPU_PAUSE(); \ 2211 \ 2212 temp_val = *lhs; \ 2213 old_value = temp_val; \ 2214 new_value = old_value OP rhs; \ 2215 } \ 2216 if (flag) { \ 2217 return new_value; \ 2218 } else \ 2219 return old_value; \ 2220 } 2221 2222 // ------------------------------------------------------------------------- 2223 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2224 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2225 TYPE new_value; \ 2226 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ 2227 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2228 } 2229 2230 // ------------------------------------------------------------------------- 2231 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2232 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2233 TYPE old_value, new_value; \ 2234 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ 2235 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 2236 old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 2237 if (flag) { \ 2238 return old_value OP rhs; \ 2239 } else \ 2240 return old_value; \ 2241 } 2242 // ------------------------------------------------------------------------- 2243 2244 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +, 2245 0) // __kmpc_atomic_fixed4_add_cpt 2246 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -, 2247 0) // __kmpc_atomic_fixed4_sub_cpt 2248 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +, 2249 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt 2250 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -, 2251 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt 2252 2253 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +, 2254 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt 2255 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -, 2256 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt 2257 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +, 2258 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt 2259 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -, 2260 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt 2261 2262 // ------------------------------------------------------------------------ 2263 // Entries definition for integer operands 2264 // TYPE_ID - operands type and size (fixed4, float4) 2265 // OP_ID - operation identifier (add, sub, mul, ...) 2266 // TYPE - operand type 2267 // BITS - size in bits, used to distinguish low level calls 2268 // OP - operator (used in critical section) 2269 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG 2270 // ------------------------------------------------------------------------ 2271 // Routines for ATOMIC integer operands, other operators 2272 // ------------------------------------------------------------------------ 2273 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2274 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +, 2275 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt 2276 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &, 2277 0) // __kmpc_atomic_fixed1_andb_cpt 2278 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /, 2279 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt 2280 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /, 2281 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt 2282 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *, 2283 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt 2284 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |, 2285 0) // __kmpc_atomic_fixed1_orb_cpt 2286 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<, 2287 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt 2288 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>, 2289 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt 2290 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>, 2291 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt 2292 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -, 2293 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt 2294 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^, 2295 0) // __kmpc_atomic_fixed1_xor_cpt 2296 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +, 2297 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt 2298 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &, 2299 0) // __kmpc_atomic_fixed2_andb_cpt 2300 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /, 2301 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt 2302 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /, 2303 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt 2304 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *, 2305 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt 2306 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |, 2307 0) // __kmpc_atomic_fixed2_orb_cpt 2308 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<, 2309 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt 2310 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>, 2311 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt 2312 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>, 2313 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt 2314 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -, 2315 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt 2316 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^, 2317 0) // __kmpc_atomic_fixed2_xor_cpt 2318 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &, 2319 0) // __kmpc_atomic_fixed4_andb_cpt 2320 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /, 2321 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt 2322 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /, 2323 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt 2324 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *, 2325 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt 2326 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |, 2327 0) // __kmpc_atomic_fixed4_orb_cpt 2328 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<, 2329 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt 2330 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>, 2331 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt 2332 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>, 2333 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt 2334 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^, 2335 0) // __kmpc_atomic_fixed4_xor_cpt 2336 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &, 2337 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt 2338 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /, 2339 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt 2340 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /, 2341 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt 2342 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *, 2343 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt 2344 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |, 2345 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt 2346 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<, 2347 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt 2348 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>, 2349 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt 2350 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>, 2351 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt 2352 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^, 2353 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt 2354 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /, 2355 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt 2356 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *, 2357 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt 2358 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /, 2359 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt 2360 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *, 2361 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt 2362 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2363 2364 // CAPTURE routines for mixed types RHS=float16 2365 #if KMP_HAVE_QUAD 2366 2367 // Beginning of a definition (provides name, parameters, gebug trace) 2368 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 2369 // fixed) 2370 // OP_ID - operation identifier (add, sub, mul, ...) 2371 // TYPE - operands' type 2372 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2373 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ 2374 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \ 2375 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2376 KA_TRACE(100, \ 2377 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ 2378 gtid)); 2379 2380 // ------------------------------------------------------------------------- 2381 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 2382 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 2383 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2384 TYPE new_value; \ 2385 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ 2386 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2387 } 2388 2389 // ------------------------------------------------------------------------- 2390 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 2391 LCK_ID, GOMP_FLAG) \ 2392 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2393 TYPE new_value; \ 2394 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \ 2395 OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \ 2396 } 2397 2398 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0, 2399 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp 2400 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0, 2401 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp 2402 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0, 2403 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp 2404 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0, 2405 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp 2406 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0, 2407 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp 2408 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0, 2409 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp 2410 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0, 2411 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp 2412 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0, 2413 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp 2414 2415 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1, 2416 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp 2417 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1, 2418 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp 2419 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1, 2420 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp 2421 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1, 2422 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp 2423 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1, 2424 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp 2425 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1, 2426 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp 2427 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1, 2428 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp 2429 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1, 2430 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp 2431 2432 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3, 2433 0) // __kmpc_atomic_fixed4_add_cpt_fp 2434 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3, 2435 0) // __kmpc_atomic_fixed4u_add_cpt_fp 2436 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 2437 0) // __kmpc_atomic_fixed4_sub_cpt_fp 2438 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 2439 0) // __kmpc_atomic_fixed4u_sub_cpt_fp 2440 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 2441 0) // __kmpc_atomic_fixed4_mul_cpt_fp 2442 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 2443 0) // __kmpc_atomic_fixed4u_mul_cpt_fp 2444 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3, 2445 0) // __kmpc_atomic_fixed4_div_cpt_fp 2446 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3, 2447 0) // __kmpc_atomic_fixed4u_div_cpt_fp 2448 2449 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7, 2450 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp 2451 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7, 2452 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp 2453 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7, 2454 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp 2455 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7, 2456 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp 2457 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7, 2458 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp 2459 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7, 2460 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp 2461 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7, 2462 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp 2463 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7, 2464 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp 2465 2466 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3, 2467 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp 2468 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3, 2469 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp 2470 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3, 2471 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp 2472 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3, 2473 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp 2474 2475 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7, 2476 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp 2477 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7, 2478 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp 2479 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7, 2480 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp 2481 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7, 2482 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp 2483 2484 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r, 2485 1) // __kmpc_atomic_float10_add_cpt_fp 2486 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r, 2487 1) // __kmpc_atomic_float10_sub_cpt_fp 2488 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r, 2489 1) // __kmpc_atomic_float10_mul_cpt_fp 2490 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r, 2491 1) // __kmpc_atomic_float10_div_cpt_fp 2492 2493 #endif // KMP_HAVE_QUAD 2494 2495 // ------------------------------------------------------------------------ 2496 // Routines for C/C++ Reduction operators && and || 2497 2498 // ------------------------------------------------------------------------- 2499 // Operation on *lhs, rhs bound by critical section 2500 // OP - operator (it's supposed to contain an assignment) 2501 // LCK_ID - lock identifier 2502 // Note: don't check gtid as it should always be valid 2503 // 1, 2-byte - expect valid parameter, other - check before this macro 2504 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \ 2505 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2506 \ 2507 if (flag) { \ 2508 new_value OP rhs; \ 2509 } else \ 2510 new_value = (*lhs); \ 2511 \ 2512 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 2513 2514 // ------------------------------------------------------------------------ 2515 #ifdef KMP_GOMP_COMPAT 2516 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \ 2517 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2518 KMP_CHECK_GTID; \ 2519 OP_CRITICAL_L_CPT(OP, 0); \ 2520 return new_value; \ 2521 } 2522 #else 2523 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) 2524 #endif /* KMP_GOMP_COMPAT */ 2525 2526 // ------------------------------------------------------------------------ 2527 // Need separate macros for &&, || because there is no combined assignment 2528 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2529 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2530 TYPE new_value; \ 2531 OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \ 2532 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2533 } 2534 2535 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&, 2536 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt 2537 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||, 2538 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt 2539 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&, 2540 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt 2541 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||, 2542 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt 2543 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&, 2544 0) // __kmpc_atomic_fixed4_andl_cpt 2545 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||, 2546 0) // __kmpc_atomic_fixed4_orl_cpt 2547 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&, 2548 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt 2549 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||, 2550 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt 2551 2552 // ------------------------------------------------------------------------- 2553 // Routines for Fortran operators that matched no one in C: 2554 // MAX, MIN, .EQV., .NEQV. 2555 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt 2556 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt 2557 2558 // ------------------------------------------------------------------------- 2559 // MIN and MAX need separate macros 2560 // OP - operator to check if we need any actions? 2561 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ 2562 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2563 \ 2564 if (*lhs OP rhs) { /* still need actions? */ \ 2565 old_value = *lhs; \ 2566 *lhs = rhs; \ 2567 if (flag) \ 2568 new_value = rhs; \ 2569 else \ 2570 new_value = old_value; \ 2571 } \ 2572 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2573 return new_value; 2574 2575 // ------------------------------------------------------------------------- 2576 #ifdef KMP_GOMP_COMPAT 2577 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \ 2578 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2579 KMP_CHECK_GTID; \ 2580 MIN_MAX_CRITSECT_CPT(OP, 0); \ 2581 } 2582 #else 2583 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) 2584 #endif /* KMP_GOMP_COMPAT */ 2585 2586 // ------------------------------------------------------------------------- 2587 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ 2588 { \ 2589 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2590 /*TYPE old_value; */ \ 2591 temp_val = *lhs; \ 2592 old_value = temp_val; \ 2593 while (old_value OP rhs && /* still need actions? */ \ 2594 !KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2595 (kmp_int##BITS *)lhs, \ 2596 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2597 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ 2598 KMP_CPU_PAUSE(); \ 2599 temp_val = *lhs; \ 2600 old_value = temp_val; \ 2601 } \ 2602 if (flag) \ 2603 return rhs; \ 2604 else \ 2605 return old_value; \ 2606 } 2607 2608 // ------------------------------------------------------------------------- 2609 // 1-byte, 2-byte operands - use critical section 2610 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2611 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2612 TYPE new_value, old_value; \ 2613 if (*lhs OP rhs) { /* need actions? */ \ 2614 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ 2615 MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ 2616 } \ 2617 return *lhs; \ 2618 } 2619 2620 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2621 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2622 TYPE new_value, old_value; \ 2623 if (*lhs OP rhs) { \ 2624 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ 2625 MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ 2626 } \ 2627 return *lhs; \ 2628 } 2629 2630 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <, 2631 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt 2632 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >, 2633 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt 2634 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <, 2635 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt 2636 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >, 2637 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt 2638 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <, 2639 0) // __kmpc_atomic_fixed4_max_cpt 2640 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >, 2641 0) // __kmpc_atomic_fixed4_min_cpt 2642 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <, 2643 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt 2644 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >, 2645 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt 2646 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <, 2647 KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt 2648 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >, 2649 KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt 2650 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <, 2651 KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt 2652 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >, 2653 KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt 2654 #if KMP_HAVE_QUAD 2655 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r, 2656 1) // __kmpc_atomic_float16_max_cpt 2657 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r, 2658 1) // __kmpc_atomic_float16_min_cpt 2659 #if (KMP_ARCH_X86) 2660 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r, 2661 1) // __kmpc_atomic_float16_max_a16_cpt 2662 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r, 2663 1) // __kmpc_atomic_float16_mix_a16_cpt 2664 #endif 2665 #endif 2666 2667 // ------------------------------------------------------------------------ 2668 #ifdef KMP_GOMP_COMPAT 2669 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \ 2670 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2671 KMP_CHECK_GTID; \ 2672 OP_CRITICAL_CPT(OP, 0); \ 2673 } 2674 #else 2675 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) 2676 #endif /* KMP_GOMP_COMPAT */ 2677 // ------------------------------------------------------------------------ 2678 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2679 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2680 TYPE new_value; \ 2681 OP_GOMP_CRITICAL_EQV_CPT(^= ~, GOMP_FLAG) /* send assignment */ \ 2682 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2683 } 2684 2685 // ------------------------------------------------------------------------ 2686 2687 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^, 2688 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt 2689 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^, 2690 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt 2691 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^, 2692 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt 2693 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^, 2694 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt 2695 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~, 2696 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt 2697 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~, 2698 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt 2699 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~, 2700 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt 2701 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~, 2702 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt 2703 2704 // ------------------------------------------------------------------------ 2705 // Routines for Extended types: long double, _Quad, complex flavours (use 2706 // critical section) 2707 // TYPE_ID, OP_ID, TYPE - detailed above 2708 // OP - operator 2709 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2710 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2711 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2712 TYPE new_value; \ 2713 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \ 2714 OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \ 2715 } 2716 2717 // ------------------------------------------------------------------------ 2718 // Workaround for cmplx4. Regular routines with return value don't work 2719 // on Win_32e. Let's return captured values through the additional parameter. 2720 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \ 2721 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2722 \ 2723 if (flag) { \ 2724 (*lhs) OP rhs; \ 2725 (*out) = (*lhs); \ 2726 } else { \ 2727 (*out) = (*lhs); \ 2728 (*lhs) OP rhs; \ 2729 } \ 2730 \ 2731 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2732 return; 2733 // ------------------------------------------------------------------------ 2734 2735 #ifdef KMP_GOMP_COMPAT 2736 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \ 2737 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2738 KMP_CHECK_GTID; \ 2739 OP_CRITICAL_CPT_WRK(OP## =, 0); \ 2740 } 2741 #else 2742 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) 2743 #endif /* KMP_GOMP_COMPAT */ 2744 // ------------------------------------------------------------------------ 2745 2746 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 2747 void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \ 2748 TYPE rhs, TYPE *out, int flag) { \ 2749 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2750 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2751 // ------------------------------------------------------------------------ 2752 2753 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2754 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 2755 OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \ 2756 OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \ 2757 } 2758 // The end of workaround for cmplx4 2759 2760 /* ------------------------------------------------------------------------- */ 2761 // routines for long double type 2762 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r, 2763 1) // __kmpc_atomic_float10_add_cpt 2764 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r, 2765 1) // __kmpc_atomic_float10_sub_cpt 2766 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r, 2767 1) // __kmpc_atomic_float10_mul_cpt 2768 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r, 2769 1) // __kmpc_atomic_float10_div_cpt 2770 #if KMP_HAVE_QUAD 2771 // routines for _Quad type 2772 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r, 2773 1) // __kmpc_atomic_float16_add_cpt 2774 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r, 2775 1) // __kmpc_atomic_float16_sub_cpt 2776 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r, 2777 1) // __kmpc_atomic_float16_mul_cpt 2778 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r, 2779 1) // __kmpc_atomic_float16_div_cpt 2780 #if (KMP_ARCH_X86) 2781 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r, 2782 1) // __kmpc_atomic_float16_add_a16_cpt 2783 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r, 2784 1) // __kmpc_atomic_float16_sub_a16_cpt 2785 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r, 2786 1) // __kmpc_atomic_float16_mul_a16_cpt 2787 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r, 2788 1) // __kmpc_atomic_float16_div_a16_cpt 2789 #endif 2790 #endif 2791 2792 // routines for complex types 2793 2794 // cmplx4 routines to return void 2795 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c, 2796 1) // __kmpc_atomic_cmplx4_add_cpt 2797 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 2798 1) // __kmpc_atomic_cmplx4_sub_cpt 2799 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 2800 1) // __kmpc_atomic_cmplx4_mul_cpt 2801 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c, 2802 1) // __kmpc_atomic_cmplx4_div_cpt 2803 2804 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c, 2805 1) // __kmpc_atomic_cmplx8_add_cpt 2806 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 2807 1) // __kmpc_atomic_cmplx8_sub_cpt 2808 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 2809 1) // __kmpc_atomic_cmplx8_mul_cpt 2810 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c, 2811 1) // __kmpc_atomic_cmplx8_div_cpt 2812 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c, 2813 1) // __kmpc_atomic_cmplx10_add_cpt 2814 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 2815 1) // __kmpc_atomic_cmplx10_sub_cpt 2816 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 2817 1) // __kmpc_atomic_cmplx10_mul_cpt 2818 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c, 2819 1) // __kmpc_atomic_cmplx10_div_cpt 2820 #if KMP_HAVE_QUAD 2821 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c, 2822 1) // __kmpc_atomic_cmplx16_add_cpt 2823 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 2824 1) // __kmpc_atomic_cmplx16_sub_cpt 2825 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 2826 1) // __kmpc_atomic_cmplx16_mul_cpt 2827 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c, 2828 1) // __kmpc_atomic_cmplx16_div_cpt 2829 #if (KMP_ARCH_X86) 2830 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 2831 1) // __kmpc_atomic_cmplx16_add_a16_cpt 2832 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 2833 1) // __kmpc_atomic_cmplx16_sub_a16_cpt 2834 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 2835 1) // __kmpc_atomic_cmplx16_mul_a16_cpt 2836 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 2837 1) // __kmpc_atomic_cmplx16_div_a16_cpt 2838 #endif 2839 #endif 2840 2841 #if OMP_40_ENABLED 2842 2843 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr 2844 // binop x; v = x; } for non-commutative operations. 2845 // Supported only on IA-32 architecture and Intel(R) 64 2846 2847 // ------------------------------------------------------------------------- 2848 // Operation on *lhs, rhs bound by critical section 2849 // OP - operator (it's supposed to contain an assignment) 2850 // LCK_ID - lock identifier 2851 // Note: don't check gtid as it should always be valid 2852 // 1, 2-byte - expect valid parameter, other - check before this macro 2853 #define OP_CRITICAL_CPT_REV(OP, LCK_ID) \ 2854 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2855 \ 2856 if (flag) { \ 2857 /*temp_val = (*lhs);*/ \ 2858 (*lhs) = (rhs)OP(*lhs); \ 2859 new_value = (*lhs); \ 2860 } else { \ 2861 new_value = (*lhs); \ 2862 (*lhs) = (rhs)OP(*lhs); \ 2863 } \ 2864 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2865 return new_value; 2866 2867 // ------------------------------------------------------------------------ 2868 #ifdef KMP_GOMP_COMPAT 2869 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) \ 2870 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2871 KMP_CHECK_GTID; \ 2872 OP_CRITICAL_CPT_REV(OP, 0); \ 2873 } 2874 #else 2875 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) 2876 #endif /* KMP_GOMP_COMPAT */ 2877 2878 // ------------------------------------------------------------------------ 2879 // Operation on *lhs, rhs using "compare_and_store" routine 2880 // TYPE - operands' type 2881 // BITS - size in bits, used to distinguish low level calls 2882 // OP - operator 2883 // Note: temp_val introduced in order to force the compiler to read 2884 // *lhs only once (w/o it the compiler reads *lhs twice) 2885 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 2886 { \ 2887 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2888 TYPE old_value, new_value; \ 2889 temp_val = *lhs; \ 2890 old_value = temp_val; \ 2891 new_value = rhs OP old_value; \ 2892 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2893 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2894 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2895 KMP_CPU_PAUSE(); \ 2896 \ 2897 temp_val = *lhs; \ 2898 old_value = temp_val; \ 2899 new_value = rhs OP old_value; \ 2900 } \ 2901 if (flag) { \ 2902 return new_value; \ 2903 } else \ 2904 return old_value; \ 2905 } 2906 2907 // ------------------------------------------------------------------------- 2908 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2909 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2910 TYPE new_value; \ 2911 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2912 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ 2913 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 2914 } 2915 2916 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /, 2917 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev 2918 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /, 2919 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev 2920 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<, 2921 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev 2922 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>, 2923 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev 2924 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, 2925 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev 2926 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -, 2927 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev 2928 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /, 2929 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev 2930 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /, 2931 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev 2932 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<, 2933 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev 2934 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>, 2935 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev 2936 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, 2937 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev 2938 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -, 2939 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev 2940 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /, 2941 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev 2942 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /, 2943 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev 2944 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<, 2945 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev 2946 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>, 2947 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev 2948 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, 2949 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev 2950 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -, 2951 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev 2952 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /, 2953 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev 2954 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /, 2955 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev 2956 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<, 2957 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev 2958 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>, 2959 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev 2960 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, 2961 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev 2962 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -, 2963 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev 2964 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /, 2965 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev 2966 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -, 2967 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev 2968 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /, 2969 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev 2970 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -, 2971 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev 2972 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2973 2974 // ------------------------------------------------------------------------ 2975 // Routines for Extended types: long double, _Quad, complex flavours (use 2976 // critical section) 2977 // TYPE_ID, OP_ID, TYPE - detailed above 2978 // OP - operator 2979 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2980 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2981 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2982 TYPE new_value; \ 2983 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2984 /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \ 2985 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ 2986 OP_CRITICAL_CPT_REV(OP, LCK_ID) \ 2987 } 2988 2989 /* ------------------------------------------------------------------------- */ 2990 // routines for long double type 2991 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r, 2992 1) // __kmpc_atomic_float10_sub_cpt_rev 2993 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r, 2994 1) // __kmpc_atomic_float10_div_cpt_rev 2995 #if KMP_HAVE_QUAD 2996 // routines for _Quad type 2997 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 2998 1) // __kmpc_atomic_float16_sub_cpt_rev 2999 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 3000 1) // __kmpc_atomic_float16_div_cpt_rev 3001 #if (KMP_ARCH_X86) 3002 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 3003 1) // __kmpc_atomic_float16_sub_a16_cpt_rev 3004 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 3005 1) // __kmpc_atomic_float16_div_a16_cpt_rev 3006 #endif 3007 #endif 3008 3009 // routines for complex types 3010 3011 // ------------------------------------------------------------------------ 3012 // Workaround for cmplx4. Regular routines with return value don't work 3013 // on Win_32e. Let's return captured values through the additional parameter. 3014 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ 3015 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3016 \ 3017 if (flag) { \ 3018 (*lhs) = (rhs)OP(*lhs); \ 3019 (*out) = (*lhs); \ 3020 } else { \ 3021 (*out) = (*lhs); \ 3022 (*lhs) = (rhs)OP(*lhs); \ 3023 } \ 3024 \ 3025 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3026 return; 3027 // ------------------------------------------------------------------------ 3028 3029 #ifdef KMP_GOMP_COMPAT 3030 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \ 3031 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3032 KMP_CHECK_GTID; \ 3033 OP_CRITICAL_CPT_REV_WRK(OP, 0); \ 3034 } 3035 #else 3036 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) 3037 #endif /* KMP_GOMP_COMPAT */ 3038 // ------------------------------------------------------------------------ 3039 3040 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \ 3041 GOMP_FLAG) \ 3042 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 3043 OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \ 3044 OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ 3045 } 3046 // The end of workaround for cmplx4 3047 3048 // !!! TODO: check if we need to return void for cmplx4 routines 3049 // cmplx4 routines to return void 3050 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 3051 1) // __kmpc_atomic_cmplx4_sub_cpt_rev 3052 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 3053 1) // __kmpc_atomic_cmplx4_div_cpt_rev 3054 3055 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 3056 1) // __kmpc_atomic_cmplx8_sub_cpt_rev 3057 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 3058 1) // __kmpc_atomic_cmplx8_div_cpt_rev 3059 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 3060 1) // __kmpc_atomic_cmplx10_sub_cpt_rev 3061 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 3062 1) // __kmpc_atomic_cmplx10_div_cpt_rev 3063 #if KMP_HAVE_QUAD 3064 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 3065 1) // __kmpc_atomic_cmplx16_sub_cpt_rev 3066 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 3067 1) // __kmpc_atomic_cmplx16_div_cpt_rev 3068 #if (KMP_ARCH_X86) 3069 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 3070 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev 3071 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 3072 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev 3073 #endif 3074 #endif 3075 3076 // Capture reverse for mixed type: RHS=float16 3077 #if KMP_HAVE_QUAD 3078 3079 // Beginning of a definition (provides name, parameters, gebug trace) 3080 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 3081 // fixed) 3082 // OP_ID - operation identifier (add, sub, mul, ...) 3083 // TYPE - operands' type 3084 // ------------------------------------------------------------------------- 3085 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 3086 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 3087 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 3088 TYPE new_value; \ 3089 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ 3090 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 3091 } 3092 3093 // ------------------------------------------------------------------------- 3094 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 3095 LCK_ID, GOMP_FLAG) \ 3096 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 3097 TYPE new_value; \ 3098 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) /* send assignment */ \ 3099 OP_CRITICAL_CPT_REV(OP, LCK_ID) /* send assignment */ \ 3100 } 3101 3102 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, 3103 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp 3104 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, 3105 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp 3106 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, 3107 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp 3108 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, 3109 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp 3110 3111 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1, 3112 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp 3113 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i, 3114 1, 3115 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp 3116 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1, 3117 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp 3118 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i, 3119 1, 3120 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp 3121 3122 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i, 3123 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp 3124 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad, 3125 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp 3126 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i, 3127 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp 3128 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad, 3129 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp 3130 3131 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i, 3132 7, 3133 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp 3134 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad, 3135 8i, 7, 3136 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp 3137 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i, 3138 7, 3139 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp 3140 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad, 3141 8i, 7, 3142 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp 3143 3144 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad, 3145 4r, 3, 3146 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp 3147 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad, 3148 4r, 3, 3149 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp 3150 3151 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad, 3152 8r, 7, 3153 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp 3154 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad, 3155 8r, 7, 3156 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp 3157 3158 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad, 3159 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp 3160 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad, 3161 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp 3162 3163 #endif // KMP_HAVE_QUAD 3164 3165 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} 3166 3167 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3168 TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ 3169 TYPE rhs) { \ 3170 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 3171 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); 3172 3173 #define CRITICAL_SWP(LCK_ID) \ 3174 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3175 \ 3176 old_value = (*lhs); \ 3177 (*lhs) = rhs; \ 3178 \ 3179 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3180 return old_value; 3181 3182 // ------------------------------------------------------------------------ 3183 #ifdef KMP_GOMP_COMPAT 3184 #define GOMP_CRITICAL_SWP(FLAG) \ 3185 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3186 KMP_CHECK_GTID; \ 3187 CRITICAL_SWP(0); \ 3188 } 3189 #else 3190 #define GOMP_CRITICAL_SWP(FLAG) 3191 #endif /* KMP_GOMP_COMPAT */ 3192 3193 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3194 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3195 TYPE old_value; \ 3196 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3197 old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \ 3198 return old_value; \ 3199 } 3200 // ------------------------------------------------------------------------ 3201 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3202 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3203 TYPE old_value; \ 3204 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3205 old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \ 3206 return old_value; \ 3207 } 3208 3209 // ------------------------------------------------------------------------ 3210 #define CMPXCHG_SWP(TYPE, BITS) \ 3211 { \ 3212 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 3213 TYPE old_value, new_value; \ 3214 temp_val = *lhs; \ 3215 old_value = temp_val; \ 3216 new_value = rhs; \ 3217 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 3218 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 3219 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 3220 KMP_CPU_PAUSE(); \ 3221 \ 3222 temp_val = *lhs; \ 3223 old_value = temp_val; \ 3224 new_value = rhs; \ 3225 } \ 3226 return old_value; \ 3227 } 3228 3229 // ------------------------------------------------------------------------- 3230 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3231 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3232 TYPE old_value; \ 3233 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3234 CMPXCHG_SWP(TYPE, BITS) \ 3235 } 3236 3237 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp 3238 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp 3239 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp 3240 3241 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32, 3242 KMP_ARCH_X86) // __kmpc_atomic_float4_swp 3243 3244 #if (KMP_ARCH_X86) 3245 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64, 3246 KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp 3247 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64, 3248 KMP_ARCH_X86) // __kmpc_atomic_float8_swp 3249 #else 3250 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp 3251 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64, 3252 KMP_ARCH_X86) // __kmpc_atomic_float8_swp 3253 #endif 3254 3255 // ------------------------------------------------------------------------ 3256 // Routines for Extended types: long double, _Quad, complex flavours (use 3257 // critical section) 3258 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ 3259 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3260 TYPE old_value; \ 3261 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3262 CRITICAL_SWP(LCK_ID) \ 3263 } 3264 3265 // ------------------------------------------------------------------------ 3266 // !!! TODO: check if we need to return void for cmplx4 routines 3267 // Workaround for cmplx4. Regular routines with return value don't work 3268 // on Win_32e. Let's return captured values through the additional parameter. 3269 3270 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ 3271 void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ 3272 TYPE rhs, TYPE *out) { \ 3273 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 3274 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); 3275 3276 #define CRITICAL_SWP_WRK(LCK_ID) \ 3277 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3278 \ 3279 tmp = (*lhs); \ 3280 (*lhs) = (rhs); \ 3281 (*out) = tmp; \ 3282 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3283 return; 3284 // ------------------------------------------------------------------------ 3285 3286 #ifdef KMP_GOMP_COMPAT 3287 #define GOMP_CRITICAL_SWP_WRK(FLAG) \ 3288 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3289 KMP_CHECK_GTID; \ 3290 CRITICAL_SWP_WRK(0); \ 3291 } 3292 #else 3293 #define GOMP_CRITICAL_SWP_WRK(FLAG) 3294 #endif /* KMP_GOMP_COMPAT */ 3295 // ------------------------------------------------------------------------ 3296 3297 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ 3298 ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ 3299 TYPE tmp; \ 3300 GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \ 3301 CRITICAL_SWP_WRK(LCK_ID) \ 3302 } 3303 // The end of workaround for cmplx4 3304 3305 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp 3306 #if KMP_HAVE_QUAD 3307 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp 3308 #endif 3309 // cmplx4 routine to return void 3310 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp 3311 3312 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // 3313 // __kmpc_atomic_cmplx4_swp 3314 3315 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp 3316 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp 3317 #if KMP_HAVE_QUAD 3318 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp 3319 #if (KMP_ARCH_X86) 3320 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r, 3321 1) // __kmpc_atomic_float16_a16_swp 3322 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c, 3323 1) // __kmpc_atomic_cmplx16_a16_swp 3324 #endif 3325 #endif 3326 3327 // End of OpenMP 4.0 Capture 3328 3329 #endif // OMP_40_ENABLED 3330 3331 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 3332 3333 #undef OP_CRITICAL 3334 3335 /* ------------------------------------------------------------------------ */ 3336 /* Generic atomic routines */ 3337 3338 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3339 void (*f)(void *, void *, void *)) { 3340 KMP_DEBUG_ASSERT(__kmp_init_serial); 3341 3342 if ( 3343 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3344 FALSE /* must use lock */ 3345 #else 3346 TRUE 3347 #endif 3348 ) { 3349 kmp_int8 old_value, new_value; 3350 3351 old_value = *(kmp_int8 *)lhs; 3352 (*f)(&new_value, &old_value, rhs); 3353 3354 /* TODO: Should this be acquire or release? */ 3355 while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value, 3356 *(kmp_int8 *)&new_value)) { 3357 KMP_CPU_PAUSE(); 3358 3359 old_value = *(kmp_int8 *)lhs; 3360 (*f)(&new_value, &old_value, rhs); 3361 } 3362 3363 return; 3364 } else { 3365 // All 1-byte data is of integer data type. 3366 3367 #ifdef KMP_GOMP_COMPAT 3368 if (__kmp_atomic_mode == 2) { 3369 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3370 } else 3371 #endif /* KMP_GOMP_COMPAT */ 3372 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid); 3373 3374 (*f)(lhs, lhs, rhs); 3375 3376 #ifdef KMP_GOMP_COMPAT 3377 if (__kmp_atomic_mode == 2) { 3378 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3379 } else 3380 #endif /* KMP_GOMP_COMPAT */ 3381 __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid); 3382 } 3383 } 3384 3385 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3386 void (*f)(void *, void *, void *)) { 3387 if ( 3388 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3389 FALSE /* must use lock */ 3390 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 3391 TRUE /* no alignment problems */ 3392 #else 3393 !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */ 3394 #endif 3395 ) { 3396 kmp_int16 old_value, new_value; 3397 3398 old_value = *(kmp_int16 *)lhs; 3399 (*f)(&new_value, &old_value, rhs); 3400 3401 /* TODO: Should this be acquire or release? */ 3402 while (!KMP_COMPARE_AND_STORE_ACQ16( 3403 (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) { 3404 KMP_CPU_PAUSE(); 3405 3406 old_value = *(kmp_int16 *)lhs; 3407 (*f)(&new_value, &old_value, rhs); 3408 } 3409 3410 return; 3411 } else { 3412 // All 2-byte data is of integer data type. 3413 3414 #ifdef KMP_GOMP_COMPAT 3415 if (__kmp_atomic_mode == 2) { 3416 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3417 } else 3418 #endif /* KMP_GOMP_COMPAT */ 3419 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid); 3420 3421 (*f)(lhs, lhs, rhs); 3422 3423 #ifdef KMP_GOMP_COMPAT 3424 if (__kmp_atomic_mode == 2) { 3425 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3426 } else 3427 #endif /* KMP_GOMP_COMPAT */ 3428 __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid); 3429 } 3430 } 3431 3432 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3433 void (*f)(void *, void *, void *)) { 3434 KMP_DEBUG_ASSERT(__kmp_init_serial); 3435 3436 if ( 3437 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints. 3438 // Gomp compatibility is broken if this routine is called for floats. 3439 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 3440 TRUE /* no alignment problems */ 3441 #else 3442 !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */ 3443 #endif 3444 ) { 3445 kmp_int32 old_value, new_value; 3446 3447 old_value = *(kmp_int32 *)lhs; 3448 (*f)(&new_value, &old_value, rhs); 3449 3450 /* TODO: Should this be acquire or release? */ 3451 while (!KMP_COMPARE_AND_STORE_ACQ32( 3452 (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) { 3453 KMP_CPU_PAUSE(); 3454 3455 old_value = *(kmp_int32 *)lhs; 3456 (*f)(&new_value, &old_value, rhs); 3457 } 3458 3459 return; 3460 } else { 3461 // Use __kmp_atomic_lock_4i for all 4-byte data, 3462 // even if it isn't of integer data type. 3463 3464 #ifdef KMP_GOMP_COMPAT 3465 if (__kmp_atomic_mode == 2) { 3466 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3467 } else 3468 #endif /* KMP_GOMP_COMPAT */ 3469 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid); 3470 3471 (*f)(lhs, lhs, rhs); 3472 3473 #ifdef KMP_GOMP_COMPAT 3474 if (__kmp_atomic_mode == 2) { 3475 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3476 } else 3477 #endif /* KMP_GOMP_COMPAT */ 3478 __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid); 3479 } 3480 } 3481 3482 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3483 void (*f)(void *, void *, void *)) { 3484 KMP_DEBUG_ASSERT(__kmp_init_serial); 3485 if ( 3486 3487 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3488 FALSE /* must use lock */ 3489 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 3490 TRUE /* no alignment problems */ 3491 #else 3492 !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */ 3493 #endif 3494 ) { 3495 kmp_int64 old_value, new_value; 3496 3497 old_value = *(kmp_int64 *)lhs; 3498 (*f)(&new_value, &old_value, rhs); 3499 /* TODO: Should this be acquire or release? */ 3500 while (!KMP_COMPARE_AND_STORE_ACQ64( 3501 (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) { 3502 KMP_CPU_PAUSE(); 3503 3504 old_value = *(kmp_int64 *)lhs; 3505 (*f)(&new_value, &old_value, rhs); 3506 } 3507 3508 return; 3509 } else { 3510 // Use __kmp_atomic_lock_8i for all 8-byte data, 3511 // even if it isn't of integer data type. 3512 3513 #ifdef KMP_GOMP_COMPAT 3514 if (__kmp_atomic_mode == 2) { 3515 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3516 } else 3517 #endif /* KMP_GOMP_COMPAT */ 3518 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid); 3519 3520 (*f)(lhs, lhs, rhs); 3521 3522 #ifdef KMP_GOMP_COMPAT 3523 if (__kmp_atomic_mode == 2) { 3524 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3525 } else 3526 #endif /* KMP_GOMP_COMPAT */ 3527 __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid); 3528 } 3529 } 3530 3531 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3532 void (*f)(void *, void *, void *)) { 3533 KMP_DEBUG_ASSERT(__kmp_init_serial); 3534 3535 #ifdef KMP_GOMP_COMPAT 3536 if (__kmp_atomic_mode == 2) { 3537 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3538 } else 3539 #endif /* KMP_GOMP_COMPAT */ 3540 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid); 3541 3542 (*f)(lhs, lhs, rhs); 3543 3544 #ifdef KMP_GOMP_COMPAT 3545 if (__kmp_atomic_mode == 2) { 3546 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3547 } else 3548 #endif /* KMP_GOMP_COMPAT */ 3549 __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid); 3550 } 3551 3552 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3553 void (*f)(void *, void *, void *)) { 3554 KMP_DEBUG_ASSERT(__kmp_init_serial); 3555 3556 #ifdef KMP_GOMP_COMPAT 3557 if (__kmp_atomic_mode == 2) { 3558 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3559 } else 3560 #endif /* KMP_GOMP_COMPAT */ 3561 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid); 3562 3563 (*f)(lhs, lhs, rhs); 3564 3565 #ifdef KMP_GOMP_COMPAT 3566 if (__kmp_atomic_mode == 2) { 3567 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3568 } else 3569 #endif /* KMP_GOMP_COMPAT */ 3570 __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid); 3571 } 3572 3573 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3574 void (*f)(void *, void *, void *)) { 3575 KMP_DEBUG_ASSERT(__kmp_init_serial); 3576 3577 #ifdef KMP_GOMP_COMPAT 3578 if (__kmp_atomic_mode == 2) { 3579 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3580 } else 3581 #endif /* KMP_GOMP_COMPAT */ 3582 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid); 3583 3584 (*f)(lhs, lhs, rhs); 3585 3586 #ifdef KMP_GOMP_COMPAT 3587 if (__kmp_atomic_mode == 2) { 3588 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3589 } else 3590 #endif /* KMP_GOMP_COMPAT */ 3591 __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid); 3592 } 3593 3594 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3595 void (*f)(void *, void *, void *)) { 3596 KMP_DEBUG_ASSERT(__kmp_init_serial); 3597 3598 #ifdef KMP_GOMP_COMPAT 3599 if (__kmp_atomic_mode == 2) { 3600 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3601 } else 3602 #endif /* KMP_GOMP_COMPAT */ 3603 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid); 3604 3605 (*f)(lhs, lhs, rhs); 3606 3607 #ifdef KMP_GOMP_COMPAT 3608 if (__kmp_atomic_mode == 2) { 3609 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3610 } else 3611 #endif /* KMP_GOMP_COMPAT */ 3612 __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid); 3613 } 3614 3615 // AC: same two routines as GOMP_atomic_start/end, but will be called by our 3616 // compiler; duplicated in order to not use 3-party names in pure Intel code 3617 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin. 3618 void __kmpc_atomic_start(void) { 3619 int gtid = __kmp_entry_gtid(); 3620 KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid)); 3621 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3622 } 3623 3624 void __kmpc_atomic_end(void) { 3625 int gtid = __kmp_get_gtid(); 3626 KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid)); 3627 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3628 } 3629 3630 /*! 3631 @} 3632 */ 3633 3634 // end of file 3635