1 /* 2 * kmp_atomic.cpp -- ATOMIC implementation routines 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // The LLVM Compiler Infrastructure 8 // 9 // This file is dual licensed under the MIT and the University of Illinois Open 10 // Source Licenses. See LICENSE.txt for details. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "kmp_atomic.h" 15 #include "kmp.h" // TRUE, asm routines prototypes 16 17 typedef unsigned char uchar; 18 typedef unsigned short ushort; 19 20 /*! 21 @defgroup ATOMIC_OPS Atomic Operations 22 These functions are used for implementing the many different varieties of atomic 23 operations. 24 25 The compiler is at liberty to inline atomic operations that are naturally 26 supported by the target architecture. For instance on IA-32 architecture an 27 atomic like this can be inlined 28 @code 29 static int s = 0; 30 #pragma omp atomic 31 s++; 32 @endcode 33 using the single instruction: `lock; incl s` 34 35 However the runtime does provide entrypoints for these operations to support 36 compilers that choose not to inline them. (For instance, 37 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.) 38 39 The names of the functions are encoded by using the data type name and the 40 operation name, as in these tables. 41 42 Data Type | Data type encoding 43 -----------|--------------- 44 int8_t | `fixed1` 45 uint8_t | `fixed1u` 46 int16_t | `fixed2` 47 uint16_t | `fixed2u` 48 int32_t | `fixed4` 49 uint32_t | `fixed4u` 50 int32_t | `fixed8` 51 uint32_t | `fixed8u` 52 float | `float4` 53 double | `float8` 54 float 10 (8087 eighty bit float) | `float10` 55 complex<float> | `cmplx4` 56 complex<double> | `cmplx8` 57 complex<float10> | `cmplx10` 58 <br> 59 60 Operation | Operation encoding 61 ----------|------------------- 62 + | add 63 - | sub 64 \* | mul 65 / | div 66 & | andb 67 << | shl 68 \>\> | shr 69 \| | orb 70 ^ | xor 71 && | andl 72 \|\| | orl 73 maximum | max 74 minimum | min 75 .eqv. | eqv 76 .neqv. | neqv 77 78 <br> 79 For non-commutative operations, `_rev` can also be added for the reversed 80 operation. For the functions that capture the result, the suffix `_cpt` is 81 added. 82 83 Update Functions 84 ================ 85 The general form of an atomic function that just performs an update (without a 86 `capture`) 87 @code 88 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE * 89 lhs, TYPE rhs ); 90 @endcode 91 @param ident_t a pointer to source location 92 @param gtid the global thread id 93 @param lhs a pointer to the left operand 94 @param rhs the right operand 95 96 `capture` functions 97 =================== 98 The capture functions perform an atomic update and return a result, which is 99 either the value before the capture, or that after. They take an additional 100 argument to determine which result is returned. 101 Their general form is therefore 102 @code 103 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE * 104 lhs, TYPE rhs, int flag ); 105 @endcode 106 @param ident_t a pointer to source location 107 @param gtid the global thread id 108 @param lhs a pointer to the left operand 109 @param rhs the right operand 110 @param flag one if the result is to be captured *after* the operation, zero if 111 captured *before*. 112 113 The one set of exceptions to this is the `complex<float>` type where the value 114 is not returned, rather an extra argument pointer is passed. 115 116 They look like 117 @code 118 void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * 119 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); 120 @endcode 121 122 Read and Write Operations 123 ========================= 124 The OpenMP<sup>*</sup> standard now supports atomic operations that simply 125 ensure that the value is read or written atomically, with no modification 126 performed. In many cases on IA-32 architecture these operations can be inlined 127 since the architecture guarantees that no tearing occurs on aligned objects 128 accessed with a single memory operation of up to 64 bits in size. 129 130 The general form of the read operations is 131 @code 132 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc ); 133 @endcode 134 135 For the write operations the form is 136 @code 137 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs 138 ); 139 @endcode 140 141 Full list of functions 142 ====================== 143 This leads to the generation of 376 atomic functions, as follows. 144 145 Functons for integers 146 --------------------- 147 There are versions here for integers of size 1,2,4 and 8 bytes both signed and 148 unsigned (where that matters). 149 @code 150 __kmpc_atomic_fixed1_add 151 __kmpc_atomic_fixed1_add_cpt 152 __kmpc_atomic_fixed1_add_fp 153 __kmpc_atomic_fixed1_andb 154 __kmpc_atomic_fixed1_andb_cpt 155 __kmpc_atomic_fixed1_andl 156 __kmpc_atomic_fixed1_andl_cpt 157 __kmpc_atomic_fixed1_div 158 __kmpc_atomic_fixed1_div_cpt 159 __kmpc_atomic_fixed1_div_cpt_rev 160 __kmpc_atomic_fixed1_div_float8 161 __kmpc_atomic_fixed1_div_fp 162 __kmpc_atomic_fixed1_div_rev 163 __kmpc_atomic_fixed1_eqv 164 __kmpc_atomic_fixed1_eqv_cpt 165 __kmpc_atomic_fixed1_max 166 __kmpc_atomic_fixed1_max_cpt 167 __kmpc_atomic_fixed1_min 168 __kmpc_atomic_fixed1_min_cpt 169 __kmpc_atomic_fixed1_mul 170 __kmpc_atomic_fixed1_mul_cpt 171 __kmpc_atomic_fixed1_mul_float8 172 __kmpc_atomic_fixed1_mul_fp 173 __kmpc_atomic_fixed1_neqv 174 __kmpc_atomic_fixed1_neqv_cpt 175 __kmpc_atomic_fixed1_orb 176 __kmpc_atomic_fixed1_orb_cpt 177 __kmpc_atomic_fixed1_orl 178 __kmpc_atomic_fixed1_orl_cpt 179 __kmpc_atomic_fixed1_rd 180 __kmpc_atomic_fixed1_shl 181 __kmpc_atomic_fixed1_shl_cpt 182 __kmpc_atomic_fixed1_shl_cpt_rev 183 __kmpc_atomic_fixed1_shl_rev 184 __kmpc_atomic_fixed1_shr 185 __kmpc_atomic_fixed1_shr_cpt 186 __kmpc_atomic_fixed1_shr_cpt_rev 187 __kmpc_atomic_fixed1_shr_rev 188 __kmpc_atomic_fixed1_sub 189 __kmpc_atomic_fixed1_sub_cpt 190 __kmpc_atomic_fixed1_sub_cpt_rev 191 __kmpc_atomic_fixed1_sub_fp 192 __kmpc_atomic_fixed1_sub_rev 193 __kmpc_atomic_fixed1_swp 194 __kmpc_atomic_fixed1_wr 195 __kmpc_atomic_fixed1_xor 196 __kmpc_atomic_fixed1_xor_cpt 197 __kmpc_atomic_fixed1u_add_fp 198 __kmpc_atomic_fixed1u_sub_fp 199 __kmpc_atomic_fixed1u_mul_fp 200 __kmpc_atomic_fixed1u_div 201 __kmpc_atomic_fixed1u_div_cpt 202 __kmpc_atomic_fixed1u_div_cpt_rev 203 __kmpc_atomic_fixed1u_div_fp 204 __kmpc_atomic_fixed1u_div_rev 205 __kmpc_atomic_fixed1u_shr 206 __kmpc_atomic_fixed1u_shr_cpt 207 __kmpc_atomic_fixed1u_shr_cpt_rev 208 __kmpc_atomic_fixed1u_shr_rev 209 __kmpc_atomic_fixed2_add 210 __kmpc_atomic_fixed2_add_cpt 211 __kmpc_atomic_fixed2_add_fp 212 __kmpc_atomic_fixed2_andb 213 __kmpc_atomic_fixed2_andb_cpt 214 __kmpc_atomic_fixed2_andl 215 __kmpc_atomic_fixed2_andl_cpt 216 __kmpc_atomic_fixed2_div 217 __kmpc_atomic_fixed2_div_cpt 218 __kmpc_atomic_fixed2_div_cpt_rev 219 __kmpc_atomic_fixed2_div_float8 220 __kmpc_atomic_fixed2_div_fp 221 __kmpc_atomic_fixed2_div_rev 222 __kmpc_atomic_fixed2_eqv 223 __kmpc_atomic_fixed2_eqv_cpt 224 __kmpc_atomic_fixed2_max 225 __kmpc_atomic_fixed2_max_cpt 226 __kmpc_atomic_fixed2_min 227 __kmpc_atomic_fixed2_min_cpt 228 __kmpc_atomic_fixed2_mul 229 __kmpc_atomic_fixed2_mul_cpt 230 __kmpc_atomic_fixed2_mul_float8 231 __kmpc_atomic_fixed2_mul_fp 232 __kmpc_atomic_fixed2_neqv 233 __kmpc_atomic_fixed2_neqv_cpt 234 __kmpc_atomic_fixed2_orb 235 __kmpc_atomic_fixed2_orb_cpt 236 __kmpc_atomic_fixed2_orl 237 __kmpc_atomic_fixed2_orl_cpt 238 __kmpc_atomic_fixed2_rd 239 __kmpc_atomic_fixed2_shl 240 __kmpc_atomic_fixed2_shl_cpt 241 __kmpc_atomic_fixed2_shl_cpt_rev 242 __kmpc_atomic_fixed2_shl_rev 243 __kmpc_atomic_fixed2_shr 244 __kmpc_atomic_fixed2_shr_cpt 245 __kmpc_atomic_fixed2_shr_cpt_rev 246 __kmpc_atomic_fixed2_shr_rev 247 __kmpc_atomic_fixed2_sub 248 __kmpc_atomic_fixed2_sub_cpt 249 __kmpc_atomic_fixed2_sub_cpt_rev 250 __kmpc_atomic_fixed2_sub_fp 251 __kmpc_atomic_fixed2_sub_rev 252 __kmpc_atomic_fixed2_swp 253 __kmpc_atomic_fixed2_wr 254 __kmpc_atomic_fixed2_xor 255 __kmpc_atomic_fixed2_xor_cpt 256 __kmpc_atomic_fixed2u_add_fp 257 __kmpc_atomic_fixed2u_sub_fp 258 __kmpc_atomic_fixed2u_mul_fp 259 __kmpc_atomic_fixed2u_div 260 __kmpc_atomic_fixed2u_div_cpt 261 __kmpc_atomic_fixed2u_div_cpt_rev 262 __kmpc_atomic_fixed2u_div_fp 263 __kmpc_atomic_fixed2u_div_rev 264 __kmpc_atomic_fixed2u_shr 265 __kmpc_atomic_fixed2u_shr_cpt 266 __kmpc_atomic_fixed2u_shr_cpt_rev 267 __kmpc_atomic_fixed2u_shr_rev 268 __kmpc_atomic_fixed4_add 269 __kmpc_atomic_fixed4_add_cpt 270 __kmpc_atomic_fixed4_add_fp 271 __kmpc_atomic_fixed4_andb 272 __kmpc_atomic_fixed4_andb_cpt 273 __kmpc_atomic_fixed4_andl 274 __kmpc_atomic_fixed4_andl_cpt 275 __kmpc_atomic_fixed4_div 276 __kmpc_atomic_fixed4_div_cpt 277 __kmpc_atomic_fixed4_div_cpt_rev 278 __kmpc_atomic_fixed4_div_float8 279 __kmpc_atomic_fixed4_div_fp 280 __kmpc_atomic_fixed4_div_rev 281 __kmpc_atomic_fixed4_eqv 282 __kmpc_atomic_fixed4_eqv_cpt 283 __kmpc_atomic_fixed4_max 284 __kmpc_atomic_fixed4_max_cpt 285 __kmpc_atomic_fixed4_min 286 __kmpc_atomic_fixed4_min_cpt 287 __kmpc_atomic_fixed4_mul 288 __kmpc_atomic_fixed4_mul_cpt 289 __kmpc_atomic_fixed4_mul_float8 290 __kmpc_atomic_fixed4_mul_fp 291 __kmpc_atomic_fixed4_neqv 292 __kmpc_atomic_fixed4_neqv_cpt 293 __kmpc_atomic_fixed4_orb 294 __kmpc_atomic_fixed4_orb_cpt 295 __kmpc_atomic_fixed4_orl 296 __kmpc_atomic_fixed4_orl_cpt 297 __kmpc_atomic_fixed4_rd 298 __kmpc_atomic_fixed4_shl 299 __kmpc_atomic_fixed4_shl_cpt 300 __kmpc_atomic_fixed4_shl_cpt_rev 301 __kmpc_atomic_fixed4_shl_rev 302 __kmpc_atomic_fixed4_shr 303 __kmpc_atomic_fixed4_shr_cpt 304 __kmpc_atomic_fixed4_shr_cpt_rev 305 __kmpc_atomic_fixed4_shr_rev 306 __kmpc_atomic_fixed4_sub 307 __kmpc_atomic_fixed4_sub_cpt 308 __kmpc_atomic_fixed4_sub_cpt_rev 309 __kmpc_atomic_fixed4_sub_fp 310 __kmpc_atomic_fixed4_sub_rev 311 __kmpc_atomic_fixed4_swp 312 __kmpc_atomic_fixed4_wr 313 __kmpc_atomic_fixed4_xor 314 __kmpc_atomic_fixed4_xor_cpt 315 __kmpc_atomic_fixed4u_add_fp 316 __kmpc_atomic_fixed4u_sub_fp 317 __kmpc_atomic_fixed4u_mul_fp 318 __kmpc_atomic_fixed4u_div 319 __kmpc_atomic_fixed4u_div_cpt 320 __kmpc_atomic_fixed4u_div_cpt_rev 321 __kmpc_atomic_fixed4u_div_fp 322 __kmpc_atomic_fixed4u_div_rev 323 __kmpc_atomic_fixed4u_shr 324 __kmpc_atomic_fixed4u_shr_cpt 325 __kmpc_atomic_fixed4u_shr_cpt_rev 326 __kmpc_atomic_fixed4u_shr_rev 327 __kmpc_atomic_fixed8_add 328 __kmpc_atomic_fixed8_add_cpt 329 __kmpc_atomic_fixed8_add_fp 330 __kmpc_atomic_fixed8_andb 331 __kmpc_atomic_fixed8_andb_cpt 332 __kmpc_atomic_fixed8_andl 333 __kmpc_atomic_fixed8_andl_cpt 334 __kmpc_atomic_fixed8_div 335 __kmpc_atomic_fixed8_div_cpt 336 __kmpc_atomic_fixed8_div_cpt_rev 337 __kmpc_atomic_fixed8_div_float8 338 __kmpc_atomic_fixed8_div_fp 339 __kmpc_atomic_fixed8_div_rev 340 __kmpc_atomic_fixed8_eqv 341 __kmpc_atomic_fixed8_eqv_cpt 342 __kmpc_atomic_fixed8_max 343 __kmpc_atomic_fixed8_max_cpt 344 __kmpc_atomic_fixed8_min 345 __kmpc_atomic_fixed8_min_cpt 346 __kmpc_atomic_fixed8_mul 347 __kmpc_atomic_fixed8_mul_cpt 348 __kmpc_atomic_fixed8_mul_float8 349 __kmpc_atomic_fixed8_mul_fp 350 __kmpc_atomic_fixed8_neqv 351 __kmpc_atomic_fixed8_neqv_cpt 352 __kmpc_atomic_fixed8_orb 353 __kmpc_atomic_fixed8_orb_cpt 354 __kmpc_atomic_fixed8_orl 355 __kmpc_atomic_fixed8_orl_cpt 356 __kmpc_atomic_fixed8_rd 357 __kmpc_atomic_fixed8_shl 358 __kmpc_atomic_fixed8_shl_cpt 359 __kmpc_atomic_fixed8_shl_cpt_rev 360 __kmpc_atomic_fixed8_shl_rev 361 __kmpc_atomic_fixed8_shr 362 __kmpc_atomic_fixed8_shr_cpt 363 __kmpc_atomic_fixed8_shr_cpt_rev 364 __kmpc_atomic_fixed8_shr_rev 365 __kmpc_atomic_fixed8_sub 366 __kmpc_atomic_fixed8_sub_cpt 367 __kmpc_atomic_fixed8_sub_cpt_rev 368 __kmpc_atomic_fixed8_sub_fp 369 __kmpc_atomic_fixed8_sub_rev 370 __kmpc_atomic_fixed8_swp 371 __kmpc_atomic_fixed8_wr 372 __kmpc_atomic_fixed8_xor 373 __kmpc_atomic_fixed8_xor_cpt 374 __kmpc_atomic_fixed8u_add_fp 375 __kmpc_atomic_fixed8u_sub_fp 376 __kmpc_atomic_fixed8u_mul_fp 377 __kmpc_atomic_fixed8u_div 378 __kmpc_atomic_fixed8u_div_cpt 379 __kmpc_atomic_fixed8u_div_cpt_rev 380 __kmpc_atomic_fixed8u_div_fp 381 __kmpc_atomic_fixed8u_div_rev 382 __kmpc_atomic_fixed8u_shr 383 __kmpc_atomic_fixed8u_shr_cpt 384 __kmpc_atomic_fixed8u_shr_cpt_rev 385 __kmpc_atomic_fixed8u_shr_rev 386 @endcode 387 388 Functions for floating point 389 ---------------------------- 390 There are versions here for floating point numbers of size 4, 8, 10 and 16 391 bytes. (Ten byte floats are used by X87, but are now rare). 392 @code 393 __kmpc_atomic_float4_add 394 __kmpc_atomic_float4_add_cpt 395 __kmpc_atomic_float4_add_float8 396 __kmpc_atomic_float4_add_fp 397 __kmpc_atomic_float4_div 398 __kmpc_atomic_float4_div_cpt 399 __kmpc_atomic_float4_div_cpt_rev 400 __kmpc_atomic_float4_div_float8 401 __kmpc_atomic_float4_div_fp 402 __kmpc_atomic_float4_div_rev 403 __kmpc_atomic_float4_max 404 __kmpc_atomic_float4_max_cpt 405 __kmpc_atomic_float4_min 406 __kmpc_atomic_float4_min_cpt 407 __kmpc_atomic_float4_mul 408 __kmpc_atomic_float4_mul_cpt 409 __kmpc_atomic_float4_mul_float8 410 __kmpc_atomic_float4_mul_fp 411 __kmpc_atomic_float4_rd 412 __kmpc_atomic_float4_sub 413 __kmpc_atomic_float4_sub_cpt 414 __kmpc_atomic_float4_sub_cpt_rev 415 __kmpc_atomic_float4_sub_float8 416 __kmpc_atomic_float4_sub_fp 417 __kmpc_atomic_float4_sub_rev 418 __kmpc_atomic_float4_swp 419 __kmpc_atomic_float4_wr 420 __kmpc_atomic_float8_add 421 __kmpc_atomic_float8_add_cpt 422 __kmpc_atomic_float8_add_fp 423 __kmpc_atomic_float8_div 424 __kmpc_atomic_float8_div_cpt 425 __kmpc_atomic_float8_div_cpt_rev 426 __kmpc_atomic_float8_div_fp 427 __kmpc_atomic_float8_div_rev 428 __kmpc_atomic_float8_max 429 __kmpc_atomic_float8_max_cpt 430 __kmpc_atomic_float8_min 431 __kmpc_atomic_float8_min_cpt 432 __kmpc_atomic_float8_mul 433 __kmpc_atomic_float8_mul_cpt 434 __kmpc_atomic_float8_mul_fp 435 __kmpc_atomic_float8_rd 436 __kmpc_atomic_float8_sub 437 __kmpc_atomic_float8_sub_cpt 438 __kmpc_atomic_float8_sub_cpt_rev 439 __kmpc_atomic_float8_sub_fp 440 __kmpc_atomic_float8_sub_rev 441 __kmpc_atomic_float8_swp 442 __kmpc_atomic_float8_wr 443 __kmpc_atomic_float10_add 444 __kmpc_atomic_float10_add_cpt 445 __kmpc_atomic_float10_add_fp 446 __kmpc_atomic_float10_div 447 __kmpc_atomic_float10_div_cpt 448 __kmpc_atomic_float10_div_cpt_rev 449 __kmpc_atomic_float10_div_fp 450 __kmpc_atomic_float10_div_rev 451 __kmpc_atomic_float10_mul 452 __kmpc_atomic_float10_mul_cpt 453 __kmpc_atomic_float10_mul_fp 454 __kmpc_atomic_float10_rd 455 __kmpc_atomic_float10_sub 456 __kmpc_atomic_float10_sub_cpt 457 __kmpc_atomic_float10_sub_cpt_rev 458 __kmpc_atomic_float10_sub_fp 459 __kmpc_atomic_float10_sub_rev 460 __kmpc_atomic_float10_swp 461 __kmpc_atomic_float10_wr 462 __kmpc_atomic_float16_add 463 __kmpc_atomic_float16_add_cpt 464 __kmpc_atomic_float16_div 465 __kmpc_atomic_float16_div_cpt 466 __kmpc_atomic_float16_div_cpt_rev 467 __kmpc_atomic_float16_div_rev 468 __kmpc_atomic_float16_max 469 __kmpc_atomic_float16_max_cpt 470 __kmpc_atomic_float16_min 471 __kmpc_atomic_float16_min_cpt 472 __kmpc_atomic_float16_mul 473 __kmpc_atomic_float16_mul_cpt 474 __kmpc_atomic_float16_rd 475 __kmpc_atomic_float16_sub 476 __kmpc_atomic_float16_sub_cpt 477 __kmpc_atomic_float16_sub_cpt_rev 478 __kmpc_atomic_float16_sub_rev 479 __kmpc_atomic_float16_swp 480 __kmpc_atomic_float16_wr 481 @endcode 482 483 Functions for Complex types 484 --------------------------- 485 Functions for complex types whose component floating point variables are of size 486 4,8,10 or 16 bytes. The names here are based on the size of the component float, 487 *not* the size of the complex type. So `__kmpc_atomc_cmplx8_add` is an operation 488 on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`. 489 490 @code 491 __kmpc_atomic_cmplx4_add 492 __kmpc_atomic_cmplx4_add_cmplx8 493 __kmpc_atomic_cmplx4_add_cpt 494 __kmpc_atomic_cmplx4_div 495 __kmpc_atomic_cmplx4_div_cmplx8 496 __kmpc_atomic_cmplx4_div_cpt 497 __kmpc_atomic_cmplx4_div_cpt_rev 498 __kmpc_atomic_cmplx4_div_rev 499 __kmpc_atomic_cmplx4_mul 500 __kmpc_atomic_cmplx4_mul_cmplx8 501 __kmpc_atomic_cmplx4_mul_cpt 502 __kmpc_atomic_cmplx4_rd 503 __kmpc_atomic_cmplx4_sub 504 __kmpc_atomic_cmplx4_sub_cmplx8 505 __kmpc_atomic_cmplx4_sub_cpt 506 __kmpc_atomic_cmplx4_sub_cpt_rev 507 __kmpc_atomic_cmplx4_sub_rev 508 __kmpc_atomic_cmplx4_swp 509 __kmpc_atomic_cmplx4_wr 510 __kmpc_atomic_cmplx8_add 511 __kmpc_atomic_cmplx8_add_cpt 512 __kmpc_atomic_cmplx8_div 513 __kmpc_atomic_cmplx8_div_cpt 514 __kmpc_atomic_cmplx8_div_cpt_rev 515 __kmpc_atomic_cmplx8_div_rev 516 __kmpc_atomic_cmplx8_mul 517 __kmpc_atomic_cmplx8_mul_cpt 518 __kmpc_atomic_cmplx8_rd 519 __kmpc_atomic_cmplx8_sub 520 __kmpc_atomic_cmplx8_sub_cpt 521 __kmpc_atomic_cmplx8_sub_cpt_rev 522 __kmpc_atomic_cmplx8_sub_rev 523 __kmpc_atomic_cmplx8_swp 524 __kmpc_atomic_cmplx8_wr 525 __kmpc_atomic_cmplx10_add 526 __kmpc_atomic_cmplx10_add_cpt 527 __kmpc_atomic_cmplx10_div 528 __kmpc_atomic_cmplx10_div_cpt 529 __kmpc_atomic_cmplx10_div_cpt_rev 530 __kmpc_atomic_cmplx10_div_rev 531 __kmpc_atomic_cmplx10_mul 532 __kmpc_atomic_cmplx10_mul_cpt 533 __kmpc_atomic_cmplx10_rd 534 __kmpc_atomic_cmplx10_sub 535 __kmpc_atomic_cmplx10_sub_cpt 536 __kmpc_atomic_cmplx10_sub_cpt_rev 537 __kmpc_atomic_cmplx10_sub_rev 538 __kmpc_atomic_cmplx10_swp 539 __kmpc_atomic_cmplx10_wr 540 __kmpc_atomic_cmplx16_add 541 __kmpc_atomic_cmplx16_add_cpt 542 __kmpc_atomic_cmplx16_div 543 __kmpc_atomic_cmplx16_div_cpt 544 __kmpc_atomic_cmplx16_div_cpt_rev 545 __kmpc_atomic_cmplx16_div_rev 546 __kmpc_atomic_cmplx16_mul 547 __kmpc_atomic_cmplx16_mul_cpt 548 __kmpc_atomic_cmplx16_rd 549 __kmpc_atomic_cmplx16_sub 550 __kmpc_atomic_cmplx16_sub_cpt 551 __kmpc_atomic_cmplx16_sub_cpt_rev 552 __kmpc_atomic_cmplx16_swp 553 __kmpc_atomic_cmplx16_wr 554 @endcode 555 */ 556 557 /*! 558 @ingroup ATOMIC_OPS 559 @{ 560 */ 561 562 /* 563 * Global vars 564 */ 565 566 #ifndef KMP_GOMP_COMPAT 567 int __kmp_atomic_mode = 1; // Intel perf 568 #else 569 int __kmp_atomic_mode = 2; // GOMP compatibility 570 #endif /* KMP_GOMP_COMPAT */ 571 572 KMP_ALIGN(128) 573 574 // Control access to all user coded atomics in Gnu compat mode 575 kmp_atomic_lock_t __kmp_atomic_lock; 576 // Control access to all user coded atomics for 1-byte fixed data types 577 kmp_atomic_lock_t __kmp_atomic_lock_1i; 578 // Control access to all user coded atomics for 2-byte fixed data types 579 kmp_atomic_lock_t __kmp_atomic_lock_2i; 580 // Control access to all user coded atomics for 4-byte fixed data types 581 kmp_atomic_lock_t __kmp_atomic_lock_4i; 582 // Control access to all user coded atomics for kmp_real32 data type 583 kmp_atomic_lock_t __kmp_atomic_lock_4r; 584 // Control access to all user coded atomics for 8-byte fixed data types 585 kmp_atomic_lock_t __kmp_atomic_lock_8i; 586 // Control access to all user coded atomics for kmp_real64 data type 587 kmp_atomic_lock_t __kmp_atomic_lock_8r; 588 // Control access to all user coded atomics for complex byte data type 589 kmp_atomic_lock_t __kmp_atomic_lock_8c; 590 // Control access to all user coded atomics for long double data type 591 kmp_atomic_lock_t __kmp_atomic_lock_10r; 592 // Control access to all user coded atomics for _Quad data type 593 kmp_atomic_lock_t __kmp_atomic_lock_16r; 594 // Control access to all user coded atomics for double complex data type 595 kmp_atomic_lock_t __kmp_atomic_lock_16c; 596 // Control access to all user coded atomics for long double complex type 597 kmp_atomic_lock_t __kmp_atomic_lock_20c; 598 // Control access to all user coded atomics for _Quad complex data type 599 kmp_atomic_lock_t __kmp_atomic_lock_32c; 600 601 /* 2007-03-02: 602 Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug 603 on *_32 and *_32e. This is just a temporary workaround for the problem. It 604 seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines 605 in assembler language. */ 606 #define KMP_ATOMIC_VOLATILE volatile 607 608 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD 609 610 static inline void operator+=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 611 lhs.q += rhs.q; 612 } 613 static inline void operator-=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 614 lhs.q -= rhs.q; 615 } 616 static inline void operator*=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 617 lhs.q *= rhs.q; 618 } 619 static inline void operator/=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 620 lhs.q /= rhs.q; 621 } 622 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) { 623 return lhs.q < rhs.q; 624 } 625 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) { 626 return lhs.q > rhs.q; 627 } 628 629 static inline void operator+=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 630 lhs.q += rhs.q; 631 } 632 static inline void operator-=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 633 lhs.q -= rhs.q; 634 } 635 static inline void operator*=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 636 lhs.q *= rhs.q; 637 } 638 static inline void operator/=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 639 lhs.q /= rhs.q; 640 } 641 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) { 642 return lhs.q < rhs.q; 643 } 644 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) { 645 return lhs.q > rhs.q; 646 } 647 648 static inline void operator+=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 649 lhs.q += rhs.q; 650 } 651 static inline void operator-=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 652 lhs.q -= rhs.q; 653 } 654 static inline void operator*=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 655 lhs.q *= rhs.q; 656 } 657 static inline void operator/=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 658 lhs.q /= rhs.q; 659 } 660 661 static inline void operator+=(kmp_cmplx128_a16_t &lhs, 662 kmp_cmplx128_a16_t &rhs) { 663 lhs.q += rhs.q; 664 } 665 static inline void operator-=(kmp_cmplx128_a16_t &lhs, 666 kmp_cmplx128_a16_t &rhs) { 667 lhs.q -= rhs.q; 668 } 669 static inline void operator*=(kmp_cmplx128_a16_t &lhs, 670 kmp_cmplx128_a16_t &rhs) { 671 lhs.q *= rhs.q; 672 } 673 static inline void operator/=(kmp_cmplx128_a16_t &lhs, 674 kmp_cmplx128_a16_t &rhs) { 675 lhs.q /= rhs.q; 676 } 677 678 #endif 679 680 // ATOMIC implementation routines ----------------------------------------- 681 // One routine for each operation and operand type. 682 // All routines declarations looks like 683 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs ); 684 685 #define KMP_CHECK_GTID \ 686 if (gtid == KMP_GTID_UNKNOWN) { \ 687 gtid = __kmp_entry_gtid(); \ 688 } // check and get gtid when needed 689 690 // Beginning of a definition (provides name, parameters, gebug trace) 691 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 692 // fixed) 693 // OP_ID - operation identifier (add, sub, mul, ...) 694 // TYPE - operands' type 695 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 696 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 697 TYPE *lhs, TYPE rhs) { \ 698 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 699 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 700 701 // ------------------------------------------------------------------------ 702 // Lock variables used for critical sections for various size operands 703 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat 704 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char 705 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short 706 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int 707 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float 708 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int 709 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double 710 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex 711 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double 712 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad 713 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex 714 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex 715 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex 716 717 // ------------------------------------------------------------------------ 718 // Operation on *lhs, rhs bound by critical section 719 // OP - operator (it's supposed to contain an assignment) 720 // LCK_ID - lock identifier 721 // Note: don't check gtid as it should always be valid 722 // 1, 2-byte - expect valid parameter, other - check before this macro 723 #define OP_CRITICAL(OP, LCK_ID) \ 724 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 725 \ 726 (*lhs) OP(rhs); \ 727 \ 728 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 729 730 // ------------------------------------------------------------------------ 731 // For GNU compatibility, we may need to use a critical section, 732 // even though it is not required by the ISA. 733 // 734 // On IA-32 architecture, all atomic operations except for fixed 4 byte add, 735 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common 736 // critical section. On Intel(R) 64, all atomic operations are done with fetch 737 // and add or compare and exchange. Therefore, the FLAG parameter to this 738 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which 739 // require a critical section, where we predict that they will be implemented 740 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()). 741 // 742 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct, 743 // the FLAG parameter should always be 1. If we know that we will be using 744 // a critical section, then we want to make certain that we use the generic 745 // lock __kmp_atomic_lock to protect the atomic update, and not of of the 746 // locks that are specialized based upon the size or type of the data. 747 // 748 // If FLAG is 0, then we are relying on dead code elimination by the build 749 // compiler to get rid of the useless block of code, and save a needless 750 // branch at runtime. 751 752 #ifdef KMP_GOMP_COMPAT 753 #define OP_GOMP_CRITICAL(OP, FLAG) \ 754 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 755 KMP_CHECK_GTID; \ 756 OP_CRITICAL(OP, 0); \ 757 return; \ 758 } 759 #else 760 #define OP_GOMP_CRITICAL(OP, FLAG) 761 #endif /* KMP_GOMP_COMPAT */ 762 763 #if KMP_MIC 764 #define KMP_DO_PAUSE _mm_delay_32(1) 765 #else 766 #define KMP_DO_PAUSE KMP_CPU_PAUSE() 767 #endif /* KMP_MIC */ 768 769 // ------------------------------------------------------------------------ 770 // Operation on *lhs, rhs using "compare_and_store" routine 771 // TYPE - operands' type 772 // BITS - size in bits, used to distinguish low level calls 773 // OP - operator 774 #define OP_CMPXCHG(TYPE, BITS, OP) \ 775 { \ 776 TYPE old_value, new_value; \ 777 old_value = *(TYPE volatile *)lhs; \ 778 new_value = old_value OP rhs; \ 779 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 780 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 781 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 782 KMP_DO_PAUSE; \ 783 \ 784 old_value = *(TYPE volatile *)lhs; \ 785 new_value = old_value OP rhs; \ 786 } \ 787 } 788 789 #if USE_CMPXCHG_FIX 790 // 2007-06-25: 791 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32 792 // and win_32e are affected (I verified the asm). Compiler ignores the volatile 793 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the 794 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of 795 // the workaround. 796 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 797 { \ 798 struct _sss { \ 799 TYPE cmp; \ 800 kmp_int##BITS *vvv; \ 801 }; \ 802 struct _sss old_value, new_value; \ 803 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \ 804 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \ 805 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 806 new_value.cmp = old_value.cmp OP rhs; \ 807 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 808 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ 809 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \ 810 KMP_DO_PAUSE; \ 811 \ 812 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 813 new_value.cmp = old_value.cmp OP rhs; \ 814 } \ 815 } 816 // end of the first part of the workaround for C78287 817 #endif // USE_CMPXCHG_FIX 818 819 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 820 821 // ------------------------------------------------------------------------ 822 // X86 or X86_64: no alignment problems ==================================== 823 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 824 GOMP_FLAG) \ 825 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 826 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 827 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 828 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 829 } 830 // ------------------------------------------------------------------------- 831 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 832 GOMP_FLAG) \ 833 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 834 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 835 OP_CMPXCHG(TYPE, BITS, OP) \ 836 } 837 #if USE_CMPXCHG_FIX 838 // ------------------------------------------------------------------------- 839 // workaround for C78287 (complex(kind=4) data type) 840 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ 841 MASK, GOMP_FLAG) \ 842 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 843 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 844 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 845 } 846 // end of the second part of the workaround for C78287 847 #endif 848 849 #else 850 // ------------------------------------------------------------------------- 851 // Code for other architectures that don't handle unaligned accesses. 852 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 853 GOMP_FLAG) \ 854 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 855 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 856 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 857 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 858 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 859 } else { \ 860 KMP_CHECK_GTID; \ 861 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 862 } \ 863 } 864 // ------------------------------------------------------------------------- 865 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 866 GOMP_FLAG) \ 867 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 868 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 869 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 870 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 871 } else { \ 872 KMP_CHECK_GTID; \ 873 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 874 } \ 875 } 876 #if USE_CMPXCHG_FIX 877 // ------------------------------------------------------------------------- 878 // workaround for C78287 (complex(kind=4) data type) 879 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ 880 MASK, GOMP_FLAG) \ 881 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 882 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 883 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 884 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 885 } else { \ 886 KMP_CHECK_GTID; \ 887 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 888 } \ 889 } 890 // end of the second part of the workaround for C78287 891 #endif // USE_CMPXCHG_FIX 892 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 893 894 // Routines for ATOMIC 4-byte operands addition and subtraction 895 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3, 896 0) // __kmpc_atomic_fixed4_add 897 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3, 898 0) // __kmpc_atomic_fixed4_sub 899 900 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3, 901 KMP_ARCH_X86) // __kmpc_atomic_float4_add 902 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3, 903 KMP_ARCH_X86) // __kmpc_atomic_float4_sub 904 905 // Routines for ATOMIC 8-byte operands addition and subtraction 906 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7, 907 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add 908 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7, 909 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub 910 911 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7, 912 KMP_ARCH_X86) // __kmpc_atomic_float8_add 913 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7, 914 KMP_ARCH_X86) // __kmpc_atomic_float8_sub 915 916 // ------------------------------------------------------------------------ 917 // Entries definition for integer operands 918 // TYPE_ID - operands type and size (fixed4, float4) 919 // OP_ID - operation identifier (add, sub, mul, ...) 920 // TYPE - operand type 921 // BITS - size in bits, used to distinguish low level calls 922 // OP - operator (used in critical section) 923 // LCK_ID - lock identifier, used to possibly distinguish lock variable 924 // MASK - used for alignment check 925 926 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG 927 // ------------------------------------------------------------------------ 928 // Routines for ATOMIC integer operands, other operators 929 // ------------------------------------------------------------------------ 930 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 931 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0, 932 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add 933 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0, 934 0) // __kmpc_atomic_fixed1_andb 935 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0, 936 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div 937 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0, 938 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div 939 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0, 940 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul 941 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0, 942 0) // __kmpc_atomic_fixed1_orb 943 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0, 944 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl 945 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0, 946 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr 947 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, 948 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr 949 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0, 950 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub 951 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0, 952 0) // __kmpc_atomic_fixed1_xor 953 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1, 954 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add 955 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1, 956 0) // __kmpc_atomic_fixed2_andb 957 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1, 958 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div 959 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1, 960 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div 961 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1, 962 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul 963 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1, 964 0) // __kmpc_atomic_fixed2_orb 965 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1, 966 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl 967 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1, 968 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr 969 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, 970 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr 971 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1, 972 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub 973 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1, 974 0) // __kmpc_atomic_fixed2_xor 975 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3, 976 0) // __kmpc_atomic_fixed4_andb 977 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3, 978 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div 979 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3, 980 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div 981 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3, 982 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul 983 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3, 984 0) // __kmpc_atomic_fixed4_orb 985 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3, 986 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl 987 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3, 988 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr 989 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, 990 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr 991 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3, 992 0) // __kmpc_atomic_fixed4_xor 993 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7, 994 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb 995 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7, 996 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div 997 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7, 998 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div 999 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7, 1000 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul 1001 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7, 1002 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb 1003 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7, 1004 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl 1005 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7, 1006 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr 1007 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, 1008 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr 1009 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7, 1010 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor 1011 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3, 1012 KMP_ARCH_X86) // __kmpc_atomic_float4_div 1013 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3, 1014 KMP_ARCH_X86) // __kmpc_atomic_float4_mul 1015 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7, 1016 KMP_ARCH_X86) // __kmpc_atomic_float8_div 1017 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7, 1018 KMP_ARCH_X86) // __kmpc_atomic_float8_mul 1019 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 1020 1021 /* ------------------------------------------------------------------------ */ 1022 /* Routines for C/C++ Reduction operators && and || */ 1023 1024 // ------------------------------------------------------------------------ 1025 // Need separate macros for &&, || because there is no combined assignment 1026 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used 1027 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1028 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1029 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1030 OP_CRITICAL(= *lhs OP, LCK_ID) \ 1031 } 1032 1033 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1034 1035 // ------------------------------------------------------------------------ 1036 // X86 or X86_64: no alignment problems =================================== 1037 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ 1038 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1039 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1040 OP_CMPXCHG(TYPE, BITS, OP) \ 1041 } 1042 1043 #else 1044 // ------------------------------------------------------------------------ 1045 // Code for other architectures that don't handle unaligned accesses. 1046 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ 1047 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1048 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1049 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1050 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1051 } else { \ 1052 KMP_CHECK_GTID; \ 1053 OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \ 1054 } \ 1055 } 1056 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1057 1058 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0, 1059 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl 1060 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0, 1061 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl 1062 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1, 1063 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl 1064 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1, 1065 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl 1066 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3, 1067 0) // __kmpc_atomic_fixed4_andl 1068 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3, 1069 0) // __kmpc_atomic_fixed4_orl 1070 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7, 1071 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl 1072 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7, 1073 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl 1074 1075 /* ------------------------------------------------------------------------- */ 1076 /* Routines for Fortran operators that matched no one in C: */ 1077 /* MAX, MIN, .EQV., .NEQV. */ 1078 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */ 1079 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */ 1080 1081 // ------------------------------------------------------------------------- 1082 // MIN and MAX need separate macros 1083 // OP - operator to check if we need any actions? 1084 #define MIN_MAX_CRITSECT(OP, LCK_ID) \ 1085 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1086 \ 1087 if (*lhs OP rhs) { /* still need actions? */ \ 1088 *lhs = rhs; \ 1089 } \ 1090 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1091 1092 // ------------------------------------------------------------------------- 1093 #ifdef KMP_GOMP_COMPAT 1094 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \ 1095 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1096 KMP_CHECK_GTID; \ 1097 MIN_MAX_CRITSECT(OP, 0); \ 1098 return; \ 1099 } 1100 #else 1101 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) 1102 #endif /* KMP_GOMP_COMPAT */ 1103 1104 // ------------------------------------------------------------------------- 1105 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ 1106 { \ 1107 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1108 TYPE old_value; \ 1109 temp_val = *lhs; \ 1110 old_value = temp_val; \ 1111 while (old_value OP rhs && /* still need actions? */ \ 1112 !KMP_COMPARE_AND_STORE_ACQ##BITS( \ 1113 (kmp_int##BITS *)lhs, \ 1114 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 1115 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ 1116 KMP_CPU_PAUSE(); \ 1117 temp_val = *lhs; \ 1118 old_value = temp_val; \ 1119 } \ 1120 } 1121 1122 // ------------------------------------------------------------------------- 1123 // 1-byte, 2-byte operands - use critical section 1124 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1125 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1126 if (*lhs OP rhs) { /* need actions? */ \ 1127 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1128 MIN_MAX_CRITSECT(OP, LCK_ID) \ 1129 } \ 1130 } 1131 1132 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1133 1134 // ------------------------------------------------------------------------- 1135 // X86 or X86_64: no alignment problems ==================================== 1136 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1137 GOMP_FLAG) \ 1138 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1139 if (*lhs OP rhs) { \ 1140 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1141 MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ 1142 } \ 1143 } 1144 1145 #else 1146 // ------------------------------------------------------------------------- 1147 // Code for other architectures that don't handle unaligned accesses. 1148 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1149 GOMP_FLAG) \ 1150 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1151 if (*lhs OP rhs) { \ 1152 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1153 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1154 MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1155 } else { \ 1156 KMP_CHECK_GTID; \ 1157 MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \ 1158 } \ 1159 } \ 1160 } 1161 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1162 1163 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0, 1164 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max 1165 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0, 1166 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min 1167 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1, 1168 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max 1169 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1, 1170 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min 1171 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3, 1172 0) // __kmpc_atomic_fixed4_max 1173 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3, 1174 0) // __kmpc_atomic_fixed4_min 1175 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7, 1176 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max 1177 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7, 1178 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min 1179 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3, 1180 KMP_ARCH_X86) // __kmpc_atomic_float4_max 1181 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3, 1182 KMP_ARCH_X86) // __kmpc_atomic_float4_min 1183 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7, 1184 KMP_ARCH_X86) // __kmpc_atomic_float8_max 1185 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7, 1186 KMP_ARCH_X86) // __kmpc_atomic_float8_min 1187 #if KMP_HAVE_QUAD 1188 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r, 1189 1) // __kmpc_atomic_float16_max 1190 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r, 1191 1) // __kmpc_atomic_float16_min 1192 #if (KMP_ARCH_X86) 1193 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r, 1194 1) // __kmpc_atomic_float16_max_a16 1195 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r, 1196 1) // __kmpc_atomic_float16_min_a16 1197 #endif 1198 #endif 1199 // ------------------------------------------------------------------------ 1200 // Need separate macros for .EQV. because of the need of complement (~) 1201 // OP ignored for critical sections, ^=~ used instead 1202 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1203 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1204 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \ 1205 OP_CRITICAL(^= ~, LCK_ID) /* send assignment and complement */ \ 1206 } 1207 1208 // ------------------------------------------------------------------------ 1209 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1210 // ------------------------------------------------------------------------ 1211 // X86 or X86_64: no alignment problems =================================== 1212 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1213 GOMP_FLAG) \ 1214 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1215 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \ 1216 OP_CMPXCHG(TYPE, BITS, OP) \ 1217 } 1218 // ------------------------------------------------------------------------ 1219 #else 1220 // ------------------------------------------------------------------------ 1221 // Code for other architectures that don't handle unaligned accesses. 1222 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1223 GOMP_FLAG) \ 1224 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1225 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) \ 1226 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1227 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1228 } else { \ 1229 KMP_CHECK_GTID; \ 1230 OP_CRITICAL(^= ~, LCK_ID) /* unaligned address - use critical */ \ 1231 } \ 1232 } 1233 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1234 1235 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0, 1236 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv 1237 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1, 1238 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv 1239 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3, 1240 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv 1241 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7, 1242 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv 1243 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, 1244 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv 1245 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, 1246 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv 1247 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, 1248 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv 1249 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, 1250 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv 1251 1252 // ------------------------------------------------------------------------ 1253 // Routines for Extended types: long double, _Quad, complex flavours (use 1254 // critical section) 1255 // TYPE_ID, OP_ID, TYPE - detailed above 1256 // OP - operator 1257 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1258 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1259 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1260 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \ 1261 OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \ 1262 } 1263 1264 /* ------------------------------------------------------------------------- */ 1265 // routines for long double type 1266 ATOMIC_CRITICAL(float10, add, long double, +, 10r, 1267 1) // __kmpc_atomic_float10_add 1268 ATOMIC_CRITICAL(float10, sub, long double, -, 10r, 1269 1) // __kmpc_atomic_float10_sub 1270 ATOMIC_CRITICAL(float10, mul, long double, *, 10r, 1271 1) // __kmpc_atomic_float10_mul 1272 ATOMIC_CRITICAL(float10, div, long double, /, 10r, 1273 1) // __kmpc_atomic_float10_div 1274 #if KMP_HAVE_QUAD 1275 // routines for _Quad type 1276 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r, 1277 1) // __kmpc_atomic_float16_add 1278 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r, 1279 1) // __kmpc_atomic_float16_sub 1280 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r, 1281 1) // __kmpc_atomic_float16_mul 1282 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r, 1283 1) // __kmpc_atomic_float16_div 1284 #if (KMP_ARCH_X86) 1285 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r, 1286 1) // __kmpc_atomic_float16_add_a16 1287 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r, 1288 1) // __kmpc_atomic_float16_sub_a16 1289 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r, 1290 1) // __kmpc_atomic_float16_mul_a16 1291 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r, 1292 1) // __kmpc_atomic_float16_div_a16 1293 #endif 1294 #endif 1295 // routines for complex types 1296 1297 #if USE_CMPXCHG_FIX 1298 // workaround for C78287 (complex(kind=4) data type) 1299 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1300 1) // __kmpc_atomic_cmplx4_add 1301 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1302 1) // __kmpc_atomic_cmplx4_sub 1303 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1304 1) // __kmpc_atomic_cmplx4_mul 1305 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1306 1) // __kmpc_atomic_cmplx4_div 1307 // end of the workaround for C78287 1308 #else 1309 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add 1310 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub 1311 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul 1312 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div 1313 #endif // USE_CMPXCHG_FIX 1314 1315 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add 1316 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub 1317 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul 1318 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div 1319 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c, 1320 1) // __kmpc_atomic_cmplx10_add 1321 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c, 1322 1) // __kmpc_atomic_cmplx10_sub 1323 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c, 1324 1) // __kmpc_atomic_cmplx10_mul 1325 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c, 1326 1) // __kmpc_atomic_cmplx10_div 1327 #if KMP_HAVE_QUAD 1328 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c, 1329 1) // __kmpc_atomic_cmplx16_add 1330 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c, 1331 1) // __kmpc_atomic_cmplx16_sub 1332 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c, 1333 1) // __kmpc_atomic_cmplx16_mul 1334 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c, 1335 1) // __kmpc_atomic_cmplx16_div 1336 #if (KMP_ARCH_X86) 1337 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1338 1) // __kmpc_atomic_cmplx16_add_a16 1339 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1340 1) // __kmpc_atomic_cmplx16_sub_a16 1341 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1342 1) // __kmpc_atomic_cmplx16_mul_a16 1343 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1344 1) // __kmpc_atomic_cmplx16_div_a16 1345 #endif 1346 #endif 1347 1348 #if OMP_40_ENABLED 1349 1350 // OpenMP 4.0: x = expr binop x for non-commutative operations. 1351 // Supported only on IA-32 architecture and Intel(R) 64 1352 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1353 1354 // ------------------------------------------------------------------------ 1355 // Operation on *lhs, rhs bound by critical section 1356 // OP - operator (it's supposed to contain an assignment) 1357 // LCK_ID - lock identifier 1358 // Note: don't check gtid as it should always be valid 1359 // 1, 2-byte - expect valid parameter, other - check before this macro 1360 #define OP_CRITICAL_REV(OP, LCK_ID) \ 1361 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1362 \ 1363 (*lhs) = (rhs)OP(*lhs); \ 1364 \ 1365 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1366 1367 #ifdef KMP_GOMP_COMPAT 1368 #define OP_GOMP_CRITICAL_REV(OP, FLAG) \ 1369 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1370 KMP_CHECK_GTID; \ 1371 OP_CRITICAL_REV(OP, 0); \ 1372 return; \ 1373 } 1374 #else 1375 #define OP_GOMP_CRITICAL_REV(OP, FLAG) 1376 #endif /* KMP_GOMP_COMPAT */ 1377 1378 // Beginning of a definition (provides name, parameters, gebug trace) 1379 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 1380 // fixed) 1381 // OP_ID - operation identifier (add, sub, mul, ...) 1382 // TYPE - operands' type 1383 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 1384 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \ 1385 TYPE *lhs, TYPE rhs) { \ 1386 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1387 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid)); 1388 1389 // ------------------------------------------------------------------------ 1390 // Operation on *lhs, rhs using "compare_and_store" routine 1391 // TYPE - operands' type 1392 // BITS - size in bits, used to distinguish low level calls 1393 // OP - operator 1394 // Note: temp_val introduced in order to force the compiler to read 1395 // *lhs only once (w/o it the compiler reads *lhs twice) 1396 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1397 { \ 1398 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1399 TYPE old_value, new_value; \ 1400 temp_val = *lhs; \ 1401 old_value = temp_val; \ 1402 new_value = rhs OP old_value; \ 1403 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 1404 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 1405 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 1406 KMP_DO_PAUSE; \ 1407 \ 1408 temp_val = *lhs; \ 1409 old_value = temp_val; \ 1410 new_value = rhs OP old_value; \ 1411 } \ 1412 } 1413 1414 // ------------------------------------------------------------------------- 1415 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \ 1416 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ 1417 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1418 OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1419 } 1420 1421 // ------------------------------------------------------------------------ 1422 // Entries definition for integer operands 1423 // TYPE_ID - operands type and size (fixed4, float4) 1424 // OP_ID - operation identifier (add, sub, mul, ...) 1425 // TYPE - operand type 1426 // BITS - size in bits, used to distinguish low level calls 1427 // OP - operator (used in critical section) 1428 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1429 1430 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG 1431 // ------------------------------------------------------------------------ 1432 // Routines for ATOMIC integer operands, other operators 1433 // ------------------------------------------------------------------------ 1434 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG 1435 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i, 1436 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev 1437 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i, 1438 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev 1439 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i, 1440 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev 1441 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i, 1442 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev 1443 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i, 1444 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev 1445 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i, 1446 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev 1447 1448 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i, 1449 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev 1450 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i, 1451 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev 1452 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i, 1453 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev 1454 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i, 1455 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev 1456 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1457 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev 1458 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i, 1459 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev 1460 1461 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i, 1462 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev 1463 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i, 1464 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev 1465 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i, 1466 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev 1467 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i, 1468 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev 1469 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i, 1470 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev 1471 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i, 1472 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev 1473 1474 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i, 1475 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev 1476 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i, 1477 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev 1478 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i, 1479 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev 1480 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i, 1481 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev 1482 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i, 1483 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev 1484 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i, 1485 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev 1486 1487 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r, 1488 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev 1489 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r, 1490 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev 1491 1492 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r, 1493 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev 1494 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r, 1495 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev 1496 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG 1497 1498 // ------------------------------------------------------------------------ 1499 // Routines for Extended types: long double, _Quad, complex flavours (use 1500 // critical section) 1501 // TYPE_ID, OP_ID, TYPE - detailed above 1502 // OP - operator 1503 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1504 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1505 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ 1506 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1507 OP_CRITICAL_REV(OP, LCK_ID) \ 1508 } 1509 1510 /* ------------------------------------------------------------------------- */ 1511 // routines for long double type 1512 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r, 1513 1) // __kmpc_atomic_float10_sub_rev 1514 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r, 1515 1) // __kmpc_atomic_float10_div_rev 1516 #if KMP_HAVE_QUAD 1517 // routines for _Quad type 1518 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r, 1519 1) // __kmpc_atomic_float16_sub_rev 1520 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r, 1521 1) // __kmpc_atomic_float16_div_rev 1522 #if (KMP_ARCH_X86) 1523 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r, 1524 1) // __kmpc_atomic_float16_sub_a16_rev 1525 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r, 1526 1) // __kmpc_atomic_float16_div_a16_rev 1527 #endif 1528 #endif 1529 1530 // routines for complex types 1531 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c, 1532 1) // __kmpc_atomic_cmplx4_sub_rev 1533 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c, 1534 1) // __kmpc_atomic_cmplx4_div_rev 1535 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c, 1536 1) // __kmpc_atomic_cmplx8_sub_rev 1537 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c, 1538 1) // __kmpc_atomic_cmplx8_div_rev 1539 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c, 1540 1) // __kmpc_atomic_cmplx10_sub_rev 1541 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c, 1542 1) // __kmpc_atomic_cmplx10_div_rev 1543 #if KMP_HAVE_QUAD 1544 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c, 1545 1) // __kmpc_atomic_cmplx16_sub_rev 1546 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c, 1547 1) // __kmpc_atomic_cmplx16_div_rev 1548 #if (KMP_ARCH_X86) 1549 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1550 1) // __kmpc_atomic_cmplx16_sub_a16_rev 1551 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1552 1) // __kmpc_atomic_cmplx16_div_a16_rev 1553 #endif 1554 #endif 1555 1556 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 1557 // End of OpenMP 4.0: x = expr binop x for non-commutative operations. 1558 1559 #endif // OMP_40_ENABLED 1560 1561 /* ------------------------------------------------------------------------ */ 1562 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */ 1563 /* Note: in order to reduce the total number of types combinations */ 1564 /* it is supposed that compiler converts RHS to longest floating type,*/ 1565 /* that is _Quad, before call to any of these routines */ 1566 /* Conversion to _Quad will be done by the compiler during calculation, */ 1567 /* conversion back to TYPE - before the assignment, like: */ 1568 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */ 1569 /* Performance penalty expected because of SW emulation use */ 1570 /* ------------------------------------------------------------------------ */ 1571 1572 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1573 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ 1574 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \ 1575 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1576 KA_TRACE(100, \ 1577 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ 1578 gtid)); 1579 1580 // ------------------------------------------------------------------------- 1581 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \ 1582 GOMP_FLAG) \ 1583 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1584 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \ 1585 OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \ 1586 } 1587 1588 // ------------------------------------------------------------------------- 1589 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1590 // ------------------------------------------------------------------------- 1591 // X86 or X86_64: no alignment problems ==================================== 1592 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1593 LCK_ID, MASK, GOMP_FLAG) \ 1594 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1595 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1596 OP_CMPXCHG(TYPE, BITS, OP) \ 1597 } 1598 // ------------------------------------------------------------------------- 1599 #else 1600 // ------------------------------------------------------------------------ 1601 // Code for other architectures that don't handle unaligned accesses. 1602 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1603 LCK_ID, MASK, GOMP_FLAG) \ 1604 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1605 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1606 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1607 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1608 } else { \ 1609 KMP_CHECK_GTID; \ 1610 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 1611 } \ 1612 } 1613 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1614 1615 // ------------------------------------------------------------------------- 1616 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1617 // ------------------------------------------------------------------------- 1618 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 1619 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 1620 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1621 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1622 OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1623 } 1624 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 1625 LCK_ID, GOMP_FLAG) \ 1626 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1627 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1628 OP_CRITICAL_REV(OP, LCK_ID) \ 1629 } 1630 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1631 1632 // RHS=float8 1633 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, 1634 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8 1635 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, 1636 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8 1637 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, 1638 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8 1639 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, 1640 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8 1641 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 1642 0) // __kmpc_atomic_fixed4_mul_float8 1643 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 1644 0) // __kmpc_atomic_fixed4_div_float8 1645 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, 1646 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8 1647 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, 1648 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8 1649 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, 1650 KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8 1651 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, 1652 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8 1653 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, 1654 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8 1655 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, 1656 KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8 1657 1658 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not 1659 // use them) 1660 #if KMP_HAVE_QUAD 1661 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0, 1662 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp 1663 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0, 1664 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp 1665 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, 1666 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp 1667 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0, 1668 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp 1669 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, 1670 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp 1671 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0, 1672 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp 1673 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0, 1674 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp 1675 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, 1676 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp 1677 1678 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1, 1679 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp 1680 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1, 1681 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp 1682 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, 1683 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp 1684 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1, 1685 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp 1686 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, 1687 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp 1688 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1, 1689 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp 1690 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1, 1691 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp 1692 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, 1693 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp 1694 1695 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 1696 0) // __kmpc_atomic_fixed4_add_fp 1697 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3, 1698 0) // __kmpc_atomic_fixed4u_add_fp 1699 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 1700 0) // __kmpc_atomic_fixed4_sub_fp 1701 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3, 1702 0) // __kmpc_atomic_fixed4u_sub_fp 1703 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 1704 0) // __kmpc_atomic_fixed4_mul_fp 1705 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3, 1706 0) // __kmpc_atomic_fixed4u_mul_fp 1707 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 1708 0) // __kmpc_atomic_fixed4_div_fp 1709 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 1710 0) // __kmpc_atomic_fixed4u_div_fp 1711 1712 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, 1713 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp 1714 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7, 1715 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp 1716 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, 1717 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp 1718 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7, 1719 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp 1720 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, 1721 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp 1722 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7, 1723 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp 1724 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, 1725 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp 1726 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, 1727 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp 1728 1729 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, 1730 KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp 1731 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, 1732 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp 1733 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, 1734 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp 1735 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, 1736 KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp 1737 1738 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, 1739 KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp 1740 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, 1741 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp 1742 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, 1743 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp 1744 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, 1745 KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp 1746 1747 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r, 1748 1) // __kmpc_atomic_float10_add_fp 1749 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r, 1750 1) // __kmpc_atomic_float10_sub_fp 1751 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r, 1752 1) // __kmpc_atomic_float10_mul_fp 1753 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r, 1754 1) // __kmpc_atomic_float10_div_fp 1755 1756 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1757 // Reverse operations 1758 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0, 1759 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp 1760 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0, 1761 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp 1762 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0, 1763 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp 1764 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0, 1765 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp 1766 1767 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1, 1768 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp 1769 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1, 1770 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp 1771 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1, 1772 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp 1773 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1, 1774 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp 1775 1776 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3, 1777 0) // __kmpc_atomic_fixed4_sub_rev_fp 1778 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3, 1779 0) // __kmpc_atomic_fixed4u_sub_rev_fp 1780 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3, 1781 0) // __kmpc_atomic_fixed4_div_rev_fp 1782 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3, 1783 0) // __kmpc_atomic_fixed4u_div_rev_fp 1784 1785 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7, 1786 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp 1787 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7, 1788 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp 1789 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7, 1790 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp 1791 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7, 1792 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp 1793 1794 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3, 1795 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp 1796 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3, 1797 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp 1798 1799 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7, 1800 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp 1801 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7, 1802 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp 1803 1804 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r, 1805 1) // __kmpc_atomic_float10_sub_rev_fp 1806 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r, 1807 1) // __kmpc_atomic_float10_div_rev_fp 1808 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1809 1810 #endif 1811 1812 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1813 // ------------------------------------------------------------------------ 1814 // X86 or X86_64: no alignment problems ==================================== 1815 #if USE_CMPXCHG_FIX 1816 // workaround for C78287 (complex(kind=4) data type) 1817 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1818 LCK_ID, MASK, GOMP_FLAG) \ 1819 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1820 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1821 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 1822 } 1823 // end of the second part of the workaround for C78287 1824 #else 1825 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1826 LCK_ID, MASK, GOMP_FLAG) \ 1827 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1828 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1829 OP_CMPXCHG(TYPE, BITS, OP) \ 1830 } 1831 #endif // USE_CMPXCHG_FIX 1832 #else 1833 // ------------------------------------------------------------------------ 1834 // Code for other architectures that don't handle unaligned accesses. 1835 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1836 LCK_ID, MASK, GOMP_FLAG) \ 1837 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1838 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1839 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1840 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1841 } else { \ 1842 KMP_CHECK_GTID; \ 1843 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 1844 } \ 1845 } 1846 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1847 1848 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 1849 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8 1850 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 1851 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8 1852 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 1853 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8 1854 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 1855 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8 1856 1857 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 1858 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1859 1860 // ------------------------------------------------------------------------ 1861 // Atomic READ routines 1862 1863 // ------------------------------------------------------------------------ 1864 // Beginning of a definition (provides name, parameters, gebug trace) 1865 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 1866 // fixed) 1867 // OP_ID - operation identifier (add, sub, mul, ...) 1868 // TYPE - operands' type 1869 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 1870 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 1871 TYPE *loc) { \ 1872 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1873 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 1874 1875 // ------------------------------------------------------------------------ 1876 // Operation on *lhs, rhs using "compare_and_store_ret" routine 1877 // TYPE - operands' type 1878 // BITS - size in bits, used to distinguish low level calls 1879 // OP - operator 1880 // Note: temp_val introduced in order to force the compiler to read 1881 // *lhs only once (w/o it the compiler reads *lhs twice) 1882 // TODO: check if it is still necessary 1883 // Return old value regardless of the result of "compare & swap# operation 1884 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \ 1885 { \ 1886 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1887 union f_i_union { \ 1888 TYPE f_val; \ 1889 kmp_int##BITS i_val; \ 1890 }; \ 1891 union f_i_union old_value; \ 1892 temp_val = *loc; \ 1893 old_value.f_val = temp_val; \ 1894 old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \ 1895 (kmp_int##BITS *)loc, \ 1896 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \ 1897 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \ 1898 new_value = old_value.f_val; \ 1899 return new_value; \ 1900 } 1901 1902 // ------------------------------------------------------------------------- 1903 // Operation on *lhs, rhs bound by critical section 1904 // OP - operator (it's supposed to contain an assignment) 1905 // LCK_ID - lock identifier 1906 // Note: don't check gtid as it should always be valid 1907 // 1, 2-byte - expect valid parameter, other - check before this macro 1908 #define OP_CRITICAL_READ(OP, LCK_ID) \ 1909 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1910 \ 1911 new_value = (*loc); \ 1912 \ 1913 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1914 1915 // ------------------------------------------------------------------------- 1916 #ifdef KMP_GOMP_COMPAT 1917 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \ 1918 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1919 KMP_CHECK_GTID; \ 1920 OP_CRITICAL_READ(OP, 0); \ 1921 return new_value; \ 1922 } 1923 #else 1924 #define OP_GOMP_CRITICAL_READ(OP, FLAG) 1925 #endif /* KMP_GOMP_COMPAT */ 1926 1927 // ------------------------------------------------------------------------- 1928 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 1929 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1930 TYPE new_value; \ 1931 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ 1932 new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \ 1933 return new_value; \ 1934 } 1935 // ------------------------------------------------------------------------- 1936 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 1937 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1938 TYPE new_value; \ 1939 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ 1940 OP_CMPXCHG_READ(TYPE, BITS, OP) \ 1941 } 1942 // ------------------------------------------------------------------------ 1943 // Routines for Extended types: long double, _Quad, complex flavours (use 1944 // critical section) 1945 // TYPE_ID, OP_ID, TYPE - detailed above 1946 // OP - operator 1947 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1948 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1949 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1950 TYPE new_value; \ 1951 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \ 1952 OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \ 1953 return new_value; \ 1954 } 1955 1956 // ------------------------------------------------------------------------ 1957 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return 1958 // value doesn't work. 1959 // Let's return the read value through the additional parameter. 1960 #if (KMP_OS_WINDOWS) 1961 1962 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \ 1963 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1964 \ 1965 (*out) = (*loc); \ 1966 \ 1967 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1968 // ------------------------------------------------------------------------ 1969 #ifdef KMP_GOMP_COMPAT 1970 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \ 1971 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1972 KMP_CHECK_GTID; \ 1973 OP_CRITICAL_READ_WRK(OP, 0); \ 1974 } 1975 #else 1976 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) 1977 #endif /* KMP_GOMP_COMPAT */ 1978 // ------------------------------------------------------------------------ 1979 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ 1980 void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \ 1981 TYPE *loc) { \ 1982 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1983 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 1984 1985 // ------------------------------------------------------------------------ 1986 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1987 ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ 1988 OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \ 1989 OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \ 1990 } 1991 1992 #endif // KMP_OS_WINDOWS 1993 1994 // ------------------------------------------------------------------------ 1995 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 1996 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd 1997 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +, 1998 KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd 1999 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +, 2000 KMP_ARCH_X86) // __kmpc_atomic_float4_rd 2001 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +, 2002 KMP_ARCH_X86) // __kmpc_atomic_float8_rd 2003 2004 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic 2005 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +, 2006 KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd 2007 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +, 2008 KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd 2009 2010 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r, 2011 1) // __kmpc_atomic_float10_rd 2012 #if KMP_HAVE_QUAD 2013 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r, 2014 1) // __kmpc_atomic_float16_rd 2015 #endif // KMP_HAVE_QUAD 2016 2017 // Fix for CQ220361 on Windows* OS 2018 #if (KMP_OS_WINDOWS) 2019 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c, 2020 1) // __kmpc_atomic_cmplx4_rd 2021 #else 2022 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c, 2023 1) // __kmpc_atomic_cmplx4_rd 2024 #endif 2025 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c, 2026 1) // __kmpc_atomic_cmplx8_rd 2027 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c, 2028 1) // __kmpc_atomic_cmplx10_rd 2029 #if KMP_HAVE_QUAD 2030 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c, 2031 1) // __kmpc_atomic_cmplx16_rd 2032 #if (KMP_ARCH_X86) 2033 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r, 2034 1) // __kmpc_atomic_float16_a16_rd 2035 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 2036 1) // __kmpc_atomic_cmplx16_a16_rd 2037 #endif 2038 #endif 2039 2040 // ------------------------------------------------------------------------ 2041 // Atomic WRITE routines 2042 2043 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2044 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2045 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2046 KMP_XCHG_FIXED##BITS(lhs, rhs); \ 2047 } 2048 // ------------------------------------------------------------------------ 2049 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2050 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2051 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2052 KMP_XCHG_REAL##BITS(lhs, rhs); \ 2053 } 2054 2055 // ------------------------------------------------------------------------ 2056 // Operation on *lhs, rhs using "compare_and_store" routine 2057 // TYPE - operands' type 2058 // BITS - size in bits, used to distinguish low level calls 2059 // OP - operator 2060 // Note: temp_val introduced in order to force the compiler to read 2061 // *lhs only once (w/o it the compiler reads *lhs twice) 2062 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \ 2063 { \ 2064 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2065 TYPE old_value, new_value; \ 2066 temp_val = *lhs; \ 2067 old_value = temp_val; \ 2068 new_value = rhs; \ 2069 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2070 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2071 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2072 KMP_CPU_PAUSE(); \ 2073 \ 2074 temp_val = *lhs; \ 2075 old_value = temp_val; \ 2076 new_value = rhs; \ 2077 } \ 2078 } 2079 2080 // ------------------------------------------------------------------------- 2081 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2082 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2083 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2084 OP_CMPXCHG_WR(TYPE, BITS, OP) \ 2085 } 2086 2087 // ------------------------------------------------------------------------ 2088 // Routines for Extended types: long double, _Quad, complex flavours (use 2089 // critical section) 2090 // TYPE_ID, OP_ID, TYPE - detailed above 2091 // OP - operator 2092 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2093 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2094 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2095 OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \ 2096 OP_CRITICAL(OP, LCK_ID) /* send assignment */ \ 2097 } 2098 // ------------------------------------------------------------------------- 2099 2100 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =, 2101 KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr 2102 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =, 2103 KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr 2104 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =, 2105 KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr 2106 #if (KMP_ARCH_X86) 2107 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =, 2108 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr 2109 #else 2110 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =, 2111 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr 2112 #endif 2113 2114 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =, 2115 KMP_ARCH_X86) // __kmpc_atomic_float4_wr 2116 #if (KMP_ARCH_X86) 2117 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =, 2118 KMP_ARCH_X86) // __kmpc_atomic_float8_wr 2119 #else 2120 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =, 2121 KMP_ARCH_X86) // __kmpc_atomic_float8_wr 2122 #endif 2123 2124 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r, 2125 1) // __kmpc_atomic_float10_wr 2126 #if KMP_HAVE_QUAD 2127 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r, 2128 1) // __kmpc_atomic_float16_wr 2129 #endif 2130 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr 2131 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c, 2132 1) // __kmpc_atomic_cmplx8_wr 2133 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c, 2134 1) // __kmpc_atomic_cmplx10_wr 2135 #if KMP_HAVE_QUAD 2136 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c, 2137 1) // __kmpc_atomic_cmplx16_wr 2138 #if (KMP_ARCH_X86) 2139 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r, 2140 1) // __kmpc_atomic_float16_a16_wr 2141 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 2142 1) // __kmpc_atomic_cmplx16_a16_wr 2143 #endif 2144 #endif 2145 2146 // ------------------------------------------------------------------------ 2147 // Atomic CAPTURE routines 2148 2149 // Beginning of a definition (provides name, parameters, gebug trace) 2150 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 2151 // fixed) 2152 // OP_ID - operation identifier (add, sub, mul, ...) 2153 // TYPE - operands' type 2154 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 2155 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 2156 TYPE *lhs, TYPE rhs, int flag) { \ 2157 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2158 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2159 2160 // ------------------------------------------------------------------------- 2161 // Operation on *lhs, rhs bound by critical section 2162 // OP - operator (it's supposed to contain an assignment) 2163 // LCK_ID - lock identifier 2164 // Note: don't check gtid as it should always be valid 2165 // 1, 2-byte - expect valid parameter, other - check before this macro 2166 #define OP_CRITICAL_CPT(OP, LCK_ID) \ 2167 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2168 \ 2169 if (flag) { \ 2170 (*lhs) OP rhs; \ 2171 new_value = (*lhs); \ 2172 } else { \ 2173 new_value = (*lhs); \ 2174 (*lhs) OP rhs; \ 2175 } \ 2176 \ 2177 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2178 return new_value; 2179 2180 // ------------------------------------------------------------------------ 2181 #ifdef KMP_GOMP_COMPAT 2182 #define OP_GOMP_CRITICAL_CPT(OP, FLAG) \ 2183 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2184 KMP_CHECK_GTID; \ 2185 OP_CRITICAL_CPT(OP## =, 0); \ 2186 } 2187 #else 2188 #define OP_GOMP_CRITICAL_CPT(OP, FLAG) 2189 #endif /* KMP_GOMP_COMPAT */ 2190 2191 // ------------------------------------------------------------------------ 2192 // Operation on *lhs, rhs using "compare_and_store" routine 2193 // TYPE - operands' type 2194 // BITS - size in bits, used to distinguish low level calls 2195 // OP - operator 2196 // Note: temp_val introduced in order to force the compiler to read 2197 // *lhs only once (w/o it the compiler reads *lhs twice) 2198 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2199 { \ 2200 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2201 TYPE old_value, new_value; \ 2202 temp_val = *lhs; \ 2203 old_value = temp_val; \ 2204 new_value = old_value OP rhs; \ 2205 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2206 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2207 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2208 KMP_CPU_PAUSE(); \ 2209 \ 2210 temp_val = *lhs; \ 2211 old_value = temp_val; \ 2212 new_value = old_value OP rhs; \ 2213 } \ 2214 if (flag) { \ 2215 return new_value; \ 2216 } else \ 2217 return old_value; \ 2218 } 2219 2220 // ------------------------------------------------------------------------- 2221 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2222 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2223 TYPE new_value; \ 2224 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ 2225 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2226 } 2227 2228 // ------------------------------------------------------------------------- 2229 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2230 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2231 TYPE old_value, new_value; \ 2232 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ 2233 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 2234 old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 2235 if (flag) { \ 2236 return old_value OP rhs; \ 2237 } else \ 2238 return old_value; \ 2239 } 2240 // ------------------------------------------------------------------------- 2241 2242 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +, 2243 0) // __kmpc_atomic_fixed4_add_cpt 2244 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -, 2245 0) // __kmpc_atomic_fixed4_sub_cpt 2246 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +, 2247 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt 2248 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -, 2249 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt 2250 2251 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +, 2252 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt 2253 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -, 2254 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt 2255 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +, 2256 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt 2257 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -, 2258 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt 2259 2260 // ------------------------------------------------------------------------ 2261 // Entries definition for integer operands 2262 // TYPE_ID - operands type and size (fixed4, float4) 2263 // OP_ID - operation identifier (add, sub, mul, ...) 2264 // TYPE - operand type 2265 // BITS - size in bits, used to distinguish low level calls 2266 // OP - operator (used in critical section) 2267 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG 2268 // ------------------------------------------------------------------------ 2269 // Routines for ATOMIC integer operands, other operators 2270 // ------------------------------------------------------------------------ 2271 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2272 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +, 2273 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt 2274 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &, 2275 0) // __kmpc_atomic_fixed1_andb_cpt 2276 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /, 2277 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt 2278 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /, 2279 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt 2280 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *, 2281 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt 2282 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |, 2283 0) // __kmpc_atomic_fixed1_orb_cpt 2284 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<, 2285 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt 2286 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>, 2287 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt 2288 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>, 2289 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt 2290 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -, 2291 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt 2292 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^, 2293 0) // __kmpc_atomic_fixed1_xor_cpt 2294 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +, 2295 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt 2296 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &, 2297 0) // __kmpc_atomic_fixed2_andb_cpt 2298 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /, 2299 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt 2300 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /, 2301 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt 2302 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *, 2303 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt 2304 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |, 2305 0) // __kmpc_atomic_fixed2_orb_cpt 2306 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<, 2307 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt 2308 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>, 2309 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt 2310 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>, 2311 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt 2312 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -, 2313 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt 2314 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^, 2315 0) // __kmpc_atomic_fixed2_xor_cpt 2316 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &, 2317 0) // __kmpc_atomic_fixed4_andb_cpt 2318 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /, 2319 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt 2320 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /, 2321 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt 2322 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *, 2323 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt 2324 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |, 2325 0) // __kmpc_atomic_fixed4_orb_cpt 2326 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<, 2327 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt 2328 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>, 2329 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt 2330 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>, 2331 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt 2332 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^, 2333 0) // __kmpc_atomic_fixed4_xor_cpt 2334 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &, 2335 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt 2336 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /, 2337 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt 2338 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /, 2339 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt 2340 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *, 2341 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt 2342 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |, 2343 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt 2344 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<, 2345 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt 2346 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>, 2347 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt 2348 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>, 2349 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt 2350 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^, 2351 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt 2352 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /, 2353 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt 2354 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *, 2355 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt 2356 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /, 2357 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt 2358 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *, 2359 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt 2360 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2361 2362 // CAPTURE routines for mixed types RHS=float16 2363 #if KMP_HAVE_QUAD 2364 2365 // Beginning of a definition (provides name, parameters, gebug trace) 2366 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 2367 // fixed) 2368 // OP_ID - operation identifier (add, sub, mul, ...) 2369 // TYPE - operands' type 2370 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2371 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ 2372 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \ 2373 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2374 KA_TRACE(100, \ 2375 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ 2376 gtid)); 2377 2378 // ------------------------------------------------------------------------- 2379 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 2380 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 2381 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2382 TYPE new_value; \ 2383 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ 2384 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2385 } 2386 2387 // ------------------------------------------------------------------------- 2388 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 2389 LCK_ID, GOMP_FLAG) \ 2390 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2391 TYPE new_value; \ 2392 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \ 2393 OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \ 2394 } 2395 2396 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0, 2397 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp 2398 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0, 2399 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp 2400 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0, 2401 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp 2402 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0, 2403 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp 2404 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0, 2405 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp 2406 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0, 2407 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp 2408 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0, 2409 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp 2410 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0, 2411 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp 2412 2413 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1, 2414 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp 2415 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1, 2416 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp 2417 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1, 2418 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp 2419 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1, 2420 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp 2421 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1, 2422 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp 2423 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1, 2424 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp 2425 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1, 2426 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp 2427 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1, 2428 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp 2429 2430 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3, 2431 0) // __kmpc_atomic_fixed4_add_cpt_fp 2432 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3, 2433 0) // __kmpc_atomic_fixed4u_add_cpt_fp 2434 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 2435 0) // __kmpc_atomic_fixed4_sub_cpt_fp 2436 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 2437 0) // __kmpc_atomic_fixed4u_sub_cpt_fp 2438 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 2439 0) // __kmpc_atomic_fixed4_mul_cpt_fp 2440 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 2441 0) // __kmpc_atomic_fixed4u_mul_cpt_fp 2442 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3, 2443 0) // __kmpc_atomic_fixed4_div_cpt_fp 2444 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3, 2445 0) // __kmpc_atomic_fixed4u_div_cpt_fp 2446 2447 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7, 2448 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp 2449 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7, 2450 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp 2451 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7, 2452 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp 2453 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7, 2454 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp 2455 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7, 2456 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp 2457 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7, 2458 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp 2459 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7, 2460 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp 2461 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7, 2462 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp 2463 2464 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3, 2465 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp 2466 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3, 2467 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp 2468 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3, 2469 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp 2470 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3, 2471 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp 2472 2473 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7, 2474 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp 2475 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7, 2476 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp 2477 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7, 2478 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp 2479 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7, 2480 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp 2481 2482 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r, 2483 1) // __kmpc_atomic_float10_add_cpt_fp 2484 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r, 2485 1) // __kmpc_atomic_float10_sub_cpt_fp 2486 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r, 2487 1) // __kmpc_atomic_float10_mul_cpt_fp 2488 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r, 2489 1) // __kmpc_atomic_float10_div_cpt_fp 2490 2491 #endif // KMP_HAVE_QUAD 2492 2493 // ------------------------------------------------------------------------ 2494 // Routines for C/C++ Reduction operators && and || 2495 2496 // ------------------------------------------------------------------------- 2497 // Operation on *lhs, rhs bound by critical section 2498 // OP - operator (it's supposed to contain an assignment) 2499 // LCK_ID - lock identifier 2500 // Note: don't check gtid as it should always be valid 2501 // 1, 2-byte - expect valid parameter, other - check before this macro 2502 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \ 2503 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2504 \ 2505 if (flag) { \ 2506 new_value OP rhs; \ 2507 } else \ 2508 new_value = (*lhs); \ 2509 \ 2510 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 2511 2512 // ------------------------------------------------------------------------ 2513 #ifdef KMP_GOMP_COMPAT 2514 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \ 2515 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2516 KMP_CHECK_GTID; \ 2517 OP_CRITICAL_L_CPT(OP, 0); \ 2518 return new_value; \ 2519 } 2520 #else 2521 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) 2522 #endif /* KMP_GOMP_COMPAT */ 2523 2524 // ------------------------------------------------------------------------ 2525 // Need separate macros for &&, || because there is no combined assignment 2526 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2527 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2528 TYPE new_value; \ 2529 OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \ 2530 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2531 } 2532 2533 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&, 2534 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt 2535 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||, 2536 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt 2537 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&, 2538 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt 2539 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||, 2540 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt 2541 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&, 2542 0) // __kmpc_atomic_fixed4_andl_cpt 2543 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||, 2544 0) // __kmpc_atomic_fixed4_orl_cpt 2545 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&, 2546 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt 2547 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||, 2548 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt 2549 2550 // ------------------------------------------------------------------------- 2551 // Routines for Fortran operators that matched no one in C: 2552 // MAX, MIN, .EQV., .NEQV. 2553 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt 2554 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt 2555 2556 // ------------------------------------------------------------------------- 2557 // MIN and MAX need separate macros 2558 // OP - operator to check if we need any actions? 2559 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ 2560 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2561 \ 2562 if (*lhs OP rhs) { /* still need actions? */ \ 2563 old_value = *lhs; \ 2564 *lhs = rhs; \ 2565 if (flag) \ 2566 new_value = rhs; \ 2567 else \ 2568 new_value = old_value; \ 2569 } \ 2570 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2571 return new_value; 2572 2573 // ------------------------------------------------------------------------- 2574 #ifdef KMP_GOMP_COMPAT 2575 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \ 2576 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2577 KMP_CHECK_GTID; \ 2578 MIN_MAX_CRITSECT_CPT(OP, 0); \ 2579 } 2580 #else 2581 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) 2582 #endif /* KMP_GOMP_COMPAT */ 2583 2584 // ------------------------------------------------------------------------- 2585 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ 2586 { \ 2587 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2588 /*TYPE old_value; */ \ 2589 temp_val = *lhs; \ 2590 old_value = temp_val; \ 2591 while (old_value OP rhs && /* still need actions? */ \ 2592 !KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2593 (kmp_int##BITS *)lhs, \ 2594 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2595 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ 2596 KMP_CPU_PAUSE(); \ 2597 temp_val = *lhs; \ 2598 old_value = temp_val; \ 2599 } \ 2600 if (flag) \ 2601 return rhs; \ 2602 else \ 2603 return old_value; \ 2604 } 2605 2606 // ------------------------------------------------------------------------- 2607 // 1-byte, 2-byte operands - use critical section 2608 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2609 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2610 TYPE new_value, old_value; \ 2611 if (*lhs OP rhs) { /* need actions? */ \ 2612 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ 2613 MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ 2614 } \ 2615 return *lhs; \ 2616 } 2617 2618 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2619 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2620 TYPE new_value, old_value; \ 2621 if (*lhs OP rhs) { \ 2622 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ 2623 MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ 2624 } \ 2625 return *lhs; \ 2626 } 2627 2628 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <, 2629 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt 2630 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >, 2631 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt 2632 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <, 2633 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt 2634 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >, 2635 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt 2636 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <, 2637 0) // __kmpc_atomic_fixed4_max_cpt 2638 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >, 2639 0) // __kmpc_atomic_fixed4_min_cpt 2640 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <, 2641 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt 2642 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >, 2643 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt 2644 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <, 2645 KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt 2646 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >, 2647 KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt 2648 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <, 2649 KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt 2650 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >, 2651 KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt 2652 #if KMP_HAVE_QUAD 2653 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r, 2654 1) // __kmpc_atomic_float16_max_cpt 2655 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r, 2656 1) // __kmpc_atomic_float16_min_cpt 2657 #if (KMP_ARCH_X86) 2658 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r, 2659 1) // __kmpc_atomic_float16_max_a16_cpt 2660 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r, 2661 1) // __kmpc_atomic_float16_mix_a16_cpt 2662 #endif 2663 #endif 2664 2665 // ------------------------------------------------------------------------ 2666 #ifdef KMP_GOMP_COMPAT 2667 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \ 2668 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2669 KMP_CHECK_GTID; \ 2670 OP_CRITICAL_CPT(OP, 0); \ 2671 } 2672 #else 2673 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) 2674 #endif /* KMP_GOMP_COMPAT */ 2675 // ------------------------------------------------------------------------ 2676 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2677 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2678 TYPE new_value; \ 2679 OP_GOMP_CRITICAL_EQV_CPT(^= ~, GOMP_FLAG) /* send assignment */ \ 2680 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2681 } 2682 2683 // ------------------------------------------------------------------------ 2684 2685 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^, 2686 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt 2687 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^, 2688 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt 2689 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^, 2690 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt 2691 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^, 2692 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt 2693 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~, 2694 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt 2695 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~, 2696 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt 2697 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~, 2698 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt 2699 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~, 2700 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt 2701 2702 // ------------------------------------------------------------------------ 2703 // Routines for Extended types: long double, _Quad, complex flavours (use 2704 // critical section) 2705 // TYPE_ID, OP_ID, TYPE - detailed above 2706 // OP - operator 2707 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2708 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2709 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2710 TYPE new_value; \ 2711 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \ 2712 OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \ 2713 } 2714 2715 // ------------------------------------------------------------------------ 2716 // Workaround for cmplx4. Regular routines with return value don't work 2717 // on Win_32e. Let's return captured values through the additional parameter. 2718 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \ 2719 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2720 \ 2721 if (flag) { \ 2722 (*lhs) OP rhs; \ 2723 (*out) = (*lhs); \ 2724 } else { \ 2725 (*out) = (*lhs); \ 2726 (*lhs) OP rhs; \ 2727 } \ 2728 \ 2729 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2730 return; 2731 // ------------------------------------------------------------------------ 2732 2733 #ifdef KMP_GOMP_COMPAT 2734 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \ 2735 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2736 KMP_CHECK_GTID; \ 2737 OP_CRITICAL_CPT_WRK(OP## =, 0); \ 2738 } 2739 #else 2740 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) 2741 #endif /* KMP_GOMP_COMPAT */ 2742 // ------------------------------------------------------------------------ 2743 2744 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 2745 void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \ 2746 TYPE rhs, TYPE *out, int flag) { \ 2747 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2748 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2749 // ------------------------------------------------------------------------ 2750 2751 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2752 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 2753 OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \ 2754 OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \ 2755 } 2756 // The end of workaround for cmplx4 2757 2758 /* ------------------------------------------------------------------------- */ 2759 // routines for long double type 2760 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r, 2761 1) // __kmpc_atomic_float10_add_cpt 2762 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r, 2763 1) // __kmpc_atomic_float10_sub_cpt 2764 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r, 2765 1) // __kmpc_atomic_float10_mul_cpt 2766 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r, 2767 1) // __kmpc_atomic_float10_div_cpt 2768 #if KMP_HAVE_QUAD 2769 // routines for _Quad type 2770 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r, 2771 1) // __kmpc_atomic_float16_add_cpt 2772 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r, 2773 1) // __kmpc_atomic_float16_sub_cpt 2774 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r, 2775 1) // __kmpc_atomic_float16_mul_cpt 2776 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r, 2777 1) // __kmpc_atomic_float16_div_cpt 2778 #if (KMP_ARCH_X86) 2779 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r, 2780 1) // __kmpc_atomic_float16_add_a16_cpt 2781 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r, 2782 1) // __kmpc_atomic_float16_sub_a16_cpt 2783 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r, 2784 1) // __kmpc_atomic_float16_mul_a16_cpt 2785 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r, 2786 1) // __kmpc_atomic_float16_div_a16_cpt 2787 #endif 2788 #endif 2789 2790 // routines for complex types 2791 2792 // cmplx4 routines to return void 2793 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c, 2794 1) // __kmpc_atomic_cmplx4_add_cpt 2795 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 2796 1) // __kmpc_atomic_cmplx4_sub_cpt 2797 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 2798 1) // __kmpc_atomic_cmplx4_mul_cpt 2799 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c, 2800 1) // __kmpc_atomic_cmplx4_div_cpt 2801 2802 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c, 2803 1) // __kmpc_atomic_cmplx8_add_cpt 2804 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 2805 1) // __kmpc_atomic_cmplx8_sub_cpt 2806 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 2807 1) // __kmpc_atomic_cmplx8_mul_cpt 2808 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c, 2809 1) // __kmpc_atomic_cmplx8_div_cpt 2810 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c, 2811 1) // __kmpc_atomic_cmplx10_add_cpt 2812 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 2813 1) // __kmpc_atomic_cmplx10_sub_cpt 2814 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 2815 1) // __kmpc_atomic_cmplx10_mul_cpt 2816 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c, 2817 1) // __kmpc_atomic_cmplx10_div_cpt 2818 #if KMP_HAVE_QUAD 2819 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c, 2820 1) // __kmpc_atomic_cmplx16_add_cpt 2821 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 2822 1) // __kmpc_atomic_cmplx16_sub_cpt 2823 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 2824 1) // __kmpc_atomic_cmplx16_mul_cpt 2825 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c, 2826 1) // __kmpc_atomic_cmplx16_div_cpt 2827 #if (KMP_ARCH_X86) 2828 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 2829 1) // __kmpc_atomic_cmplx16_add_a16_cpt 2830 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 2831 1) // __kmpc_atomic_cmplx16_sub_a16_cpt 2832 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 2833 1) // __kmpc_atomic_cmplx16_mul_a16_cpt 2834 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 2835 1) // __kmpc_atomic_cmplx16_div_a16_cpt 2836 #endif 2837 #endif 2838 2839 #if OMP_40_ENABLED 2840 2841 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr 2842 // binop x; v = x; } for non-commutative operations. 2843 // Supported only on IA-32 architecture and Intel(R) 64 2844 2845 // ------------------------------------------------------------------------- 2846 // Operation on *lhs, rhs bound by critical section 2847 // OP - operator (it's supposed to contain an assignment) 2848 // LCK_ID - lock identifier 2849 // Note: don't check gtid as it should always be valid 2850 // 1, 2-byte - expect valid parameter, other - check before this macro 2851 #define OP_CRITICAL_CPT_REV(OP, LCK_ID) \ 2852 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2853 \ 2854 if (flag) { \ 2855 /*temp_val = (*lhs);*/ \ 2856 (*lhs) = (rhs)OP(*lhs); \ 2857 new_value = (*lhs); \ 2858 } else { \ 2859 new_value = (*lhs); \ 2860 (*lhs) = (rhs)OP(*lhs); \ 2861 } \ 2862 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2863 return new_value; 2864 2865 // ------------------------------------------------------------------------ 2866 #ifdef KMP_GOMP_COMPAT 2867 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) \ 2868 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2869 KMP_CHECK_GTID; \ 2870 OP_CRITICAL_CPT_REV(OP, 0); \ 2871 } 2872 #else 2873 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) 2874 #endif /* KMP_GOMP_COMPAT */ 2875 2876 // ------------------------------------------------------------------------ 2877 // Operation on *lhs, rhs using "compare_and_store" routine 2878 // TYPE - operands' type 2879 // BITS - size in bits, used to distinguish low level calls 2880 // OP - operator 2881 // Note: temp_val introduced in order to force the compiler to read 2882 // *lhs only once (w/o it the compiler reads *lhs twice) 2883 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 2884 { \ 2885 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2886 TYPE old_value, new_value; \ 2887 temp_val = *lhs; \ 2888 old_value = temp_val; \ 2889 new_value = rhs OP old_value; \ 2890 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2891 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2892 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2893 KMP_CPU_PAUSE(); \ 2894 \ 2895 temp_val = *lhs; \ 2896 old_value = temp_val; \ 2897 new_value = rhs OP old_value; \ 2898 } \ 2899 if (flag) { \ 2900 return new_value; \ 2901 } else \ 2902 return old_value; \ 2903 } 2904 2905 // ------------------------------------------------------------------------- 2906 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2907 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2908 TYPE new_value; \ 2909 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2910 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ 2911 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 2912 } 2913 2914 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /, 2915 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev 2916 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /, 2917 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev 2918 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<, 2919 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev 2920 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>, 2921 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev 2922 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, 2923 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev 2924 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -, 2925 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev 2926 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /, 2927 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev 2928 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /, 2929 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev 2930 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<, 2931 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev 2932 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>, 2933 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev 2934 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, 2935 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev 2936 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -, 2937 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev 2938 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /, 2939 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev 2940 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /, 2941 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev 2942 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<, 2943 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev 2944 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>, 2945 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev 2946 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, 2947 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev 2948 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -, 2949 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev 2950 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /, 2951 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev 2952 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /, 2953 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev 2954 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<, 2955 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev 2956 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>, 2957 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev 2958 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, 2959 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev 2960 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -, 2961 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev 2962 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /, 2963 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev 2964 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -, 2965 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev 2966 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /, 2967 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev 2968 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -, 2969 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev 2970 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2971 2972 // ------------------------------------------------------------------------ 2973 // Routines for Extended types: long double, _Quad, complex flavours (use 2974 // critical section) 2975 // TYPE_ID, OP_ID, TYPE - detailed above 2976 // OP - operator 2977 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2978 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2979 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2980 TYPE new_value; \ 2981 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2982 /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \ 2983 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ 2984 OP_CRITICAL_CPT_REV(OP, LCK_ID) \ 2985 } 2986 2987 /* ------------------------------------------------------------------------- */ 2988 // routines for long double type 2989 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r, 2990 1) // __kmpc_atomic_float10_sub_cpt_rev 2991 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r, 2992 1) // __kmpc_atomic_float10_div_cpt_rev 2993 #if KMP_HAVE_QUAD 2994 // routines for _Quad type 2995 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 2996 1) // __kmpc_atomic_float16_sub_cpt_rev 2997 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 2998 1) // __kmpc_atomic_float16_div_cpt_rev 2999 #if (KMP_ARCH_X86) 3000 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 3001 1) // __kmpc_atomic_float16_sub_a16_cpt_rev 3002 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 3003 1) // __kmpc_atomic_float16_div_a16_cpt_rev 3004 #endif 3005 #endif 3006 3007 // routines for complex types 3008 3009 // ------------------------------------------------------------------------ 3010 // Workaround for cmplx4. Regular routines with return value don't work 3011 // on Win_32e. Let's return captured values through the additional parameter. 3012 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ 3013 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3014 \ 3015 if (flag) { \ 3016 (*lhs) = (rhs)OP(*lhs); \ 3017 (*out) = (*lhs); \ 3018 } else { \ 3019 (*out) = (*lhs); \ 3020 (*lhs) = (rhs)OP(*lhs); \ 3021 } \ 3022 \ 3023 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3024 return; 3025 // ------------------------------------------------------------------------ 3026 3027 #ifdef KMP_GOMP_COMPAT 3028 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \ 3029 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3030 KMP_CHECK_GTID; \ 3031 OP_CRITICAL_CPT_REV_WRK(OP, 0); \ 3032 } 3033 #else 3034 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) 3035 #endif /* KMP_GOMP_COMPAT */ 3036 // ------------------------------------------------------------------------ 3037 3038 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \ 3039 GOMP_FLAG) \ 3040 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 3041 OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \ 3042 OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ 3043 } 3044 // The end of workaround for cmplx4 3045 3046 // !!! TODO: check if we need to return void for cmplx4 routines 3047 // cmplx4 routines to return void 3048 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 3049 1) // __kmpc_atomic_cmplx4_sub_cpt_rev 3050 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 3051 1) // __kmpc_atomic_cmplx4_div_cpt_rev 3052 3053 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 3054 1) // __kmpc_atomic_cmplx8_sub_cpt_rev 3055 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 3056 1) // __kmpc_atomic_cmplx8_div_cpt_rev 3057 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 3058 1) // __kmpc_atomic_cmplx10_sub_cpt_rev 3059 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 3060 1) // __kmpc_atomic_cmplx10_div_cpt_rev 3061 #if KMP_HAVE_QUAD 3062 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 3063 1) // __kmpc_atomic_cmplx16_sub_cpt_rev 3064 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 3065 1) // __kmpc_atomic_cmplx16_div_cpt_rev 3066 #if (KMP_ARCH_X86) 3067 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 3068 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev 3069 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 3070 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev 3071 #endif 3072 #endif 3073 3074 // Capture reverse for mixed type: RHS=float16 3075 #if KMP_HAVE_QUAD 3076 3077 // Beginning of a definition (provides name, parameters, gebug trace) 3078 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 3079 // fixed) 3080 // OP_ID - operation identifier (add, sub, mul, ...) 3081 // TYPE - operands' type 3082 // ------------------------------------------------------------------------- 3083 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 3084 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 3085 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 3086 TYPE new_value; \ 3087 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ 3088 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 3089 } 3090 3091 // ------------------------------------------------------------------------- 3092 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 3093 LCK_ID, GOMP_FLAG) \ 3094 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 3095 TYPE new_value; \ 3096 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) /* send assignment */ \ 3097 OP_CRITICAL_CPT_REV(OP, LCK_ID) /* send assignment */ \ 3098 } 3099 3100 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, 3101 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp 3102 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, 3103 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp 3104 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, 3105 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp 3106 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, 3107 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp 3108 3109 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1, 3110 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp 3111 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i, 3112 1, 3113 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp 3114 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1, 3115 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp 3116 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i, 3117 1, 3118 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp 3119 3120 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i, 3121 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp 3122 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad, 3123 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp 3124 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i, 3125 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp 3126 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad, 3127 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp 3128 3129 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i, 3130 7, 3131 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp 3132 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad, 3133 8i, 7, 3134 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp 3135 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i, 3136 7, 3137 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp 3138 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad, 3139 8i, 7, 3140 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp 3141 3142 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad, 3143 4r, 3, 3144 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp 3145 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad, 3146 4r, 3, 3147 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp 3148 3149 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad, 3150 8r, 7, 3151 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp 3152 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad, 3153 8r, 7, 3154 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp 3155 3156 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad, 3157 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp 3158 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad, 3159 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp 3160 3161 #endif // KMP_HAVE_QUAD 3162 3163 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} 3164 3165 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3166 TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ 3167 TYPE rhs) { \ 3168 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 3169 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); 3170 3171 #define CRITICAL_SWP(LCK_ID) \ 3172 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3173 \ 3174 old_value = (*lhs); \ 3175 (*lhs) = rhs; \ 3176 \ 3177 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3178 return old_value; 3179 3180 // ------------------------------------------------------------------------ 3181 #ifdef KMP_GOMP_COMPAT 3182 #define GOMP_CRITICAL_SWP(FLAG) \ 3183 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3184 KMP_CHECK_GTID; \ 3185 CRITICAL_SWP(0); \ 3186 } 3187 #else 3188 #define GOMP_CRITICAL_SWP(FLAG) 3189 #endif /* KMP_GOMP_COMPAT */ 3190 3191 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3192 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3193 TYPE old_value; \ 3194 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3195 old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \ 3196 return old_value; \ 3197 } 3198 // ------------------------------------------------------------------------ 3199 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3200 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3201 TYPE old_value; \ 3202 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3203 old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \ 3204 return old_value; \ 3205 } 3206 3207 // ------------------------------------------------------------------------ 3208 #define CMPXCHG_SWP(TYPE, BITS) \ 3209 { \ 3210 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 3211 TYPE old_value, new_value; \ 3212 temp_val = *lhs; \ 3213 old_value = temp_val; \ 3214 new_value = rhs; \ 3215 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 3216 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 3217 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 3218 KMP_CPU_PAUSE(); \ 3219 \ 3220 temp_val = *lhs; \ 3221 old_value = temp_val; \ 3222 new_value = rhs; \ 3223 } \ 3224 return old_value; \ 3225 } 3226 3227 // ------------------------------------------------------------------------- 3228 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3229 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3230 TYPE old_value; \ 3231 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3232 CMPXCHG_SWP(TYPE, BITS) \ 3233 } 3234 3235 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp 3236 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp 3237 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp 3238 3239 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32, 3240 KMP_ARCH_X86) // __kmpc_atomic_float4_swp 3241 3242 #if (KMP_ARCH_X86) 3243 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64, 3244 KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp 3245 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64, 3246 KMP_ARCH_X86) // __kmpc_atomic_float8_swp 3247 #else 3248 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp 3249 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64, 3250 KMP_ARCH_X86) // __kmpc_atomic_float8_swp 3251 #endif 3252 3253 // ------------------------------------------------------------------------ 3254 // Routines for Extended types: long double, _Quad, complex flavours (use 3255 // critical section) 3256 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ 3257 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3258 TYPE old_value; \ 3259 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3260 CRITICAL_SWP(LCK_ID) \ 3261 } 3262 3263 // ------------------------------------------------------------------------ 3264 // !!! TODO: check if we need to return void for cmplx4 routines 3265 // Workaround for cmplx4. Regular routines with return value don't work 3266 // on Win_32e. Let's return captured values through the additional parameter. 3267 3268 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ 3269 void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ 3270 TYPE rhs, TYPE *out) { \ 3271 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 3272 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); 3273 3274 #define CRITICAL_SWP_WRK(LCK_ID) \ 3275 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3276 \ 3277 tmp = (*lhs); \ 3278 (*lhs) = (rhs); \ 3279 (*out) = tmp; \ 3280 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3281 return; 3282 // ------------------------------------------------------------------------ 3283 3284 #ifdef KMP_GOMP_COMPAT 3285 #define GOMP_CRITICAL_SWP_WRK(FLAG) \ 3286 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3287 KMP_CHECK_GTID; \ 3288 CRITICAL_SWP_WRK(0); \ 3289 } 3290 #else 3291 #define GOMP_CRITICAL_SWP_WRK(FLAG) 3292 #endif /* KMP_GOMP_COMPAT */ 3293 // ------------------------------------------------------------------------ 3294 3295 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ 3296 ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ 3297 TYPE tmp; \ 3298 GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \ 3299 CRITICAL_SWP_WRK(LCK_ID) \ 3300 } 3301 // The end of workaround for cmplx4 3302 3303 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp 3304 #if KMP_HAVE_QUAD 3305 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp 3306 #endif 3307 // cmplx4 routine to return void 3308 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp 3309 3310 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // 3311 // __kmpc_atomic_cmplx4_swp 3312 3313 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp 3314 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp 3315 #if KMP_HAVE_QUAD 3316 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp 3317 #if (KMP_ARCH_X86) 3318 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r, 3319 1) // __kmpc_atomic_float16_a16_swp 3320 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c, 3321 1) // __kmpc_atomic_cmplx16_a16_swp 3322 #endif 3323 #endif 3324 3325 // End of OpenMP 4.0 Capture 3326 3327 #endif // OMP_40_ENABLED 3328 3329 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 3330 3331 #undef OP_CRITICAL 3332 3333 /* ------------------------------------------------------------------------ */ 3334 /* Generic atomic routines */ 3335 3336 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3337 void (*f)(void *, void *, void *)) { 3338 KMP_DEBUG_ASSERT(__kmp_init_serial); 3339 3340 if ( 3341 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3342 FALSE /* must use lock */ 3343 #else 3344 TRUE 3345 #endif 3346 ) { 3347 kmp_int8 old_value, new_value; 3348 3349 old_value = *(kmp_int8 *)lhs; 3350 (*f)(&new_value, &old_value, rhs); 3351 3352 /* TODO: Should this be acquire or release? */ 3353 while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value, 3354 *(kmp_int8 *)&new_value)) { 3355 KMP_CPU_PAUSE(); 3356 3357 old_value = *(kmp_int8 *)lhs; 3358 (*f)(&new_value, &old_value, rhs); 3359 } 3360 3361 return; 3362 } else { 3363 // All 1-byte data is of integer data type. 3364 3365 #ifdef KMP_GOMP_COMPAT 3366 if (__kmp_atomic_mode == 2) { 3367 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3368 } else 3369 #endif /* KMP_GOMP_COMPAT */ 3370 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid); 3371 3372 (*f)(lhs, lhs, rhs); 3373 3374 #ifdef KMP_GOMP_COMPAT 3375 if (__kmp_atomic_mode == 2) { 3376 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3377 } else 3378 #endif /* KMP_GOMP_COMPAT */ 3379 __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid); 3380 } 3381 } 3382 3383 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3384 void (*f)(void *, void *, void *)) { 3385 if ( 3386 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3387 FALSE /* must use lock */ 3388 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 3389 TRUE /* no alignment problems */ 3390 #else 3391 !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */ 3392 #endif 3393 ) { 3394 kmp_int16 old_value, new_value; 3395 3396 old_value = *(kmp_int16 *)lhs; 3397 (*f)(&new_value, &old_value, rhs); 3398 3399 /* TODO: Should this be acquire or release? */ 3400 while (!KMP_COMPARE_AND_STORE_ACQ16( 3401 (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) { 3402 KMP_CPU_PAUSE(); 3403 3404 old_value = *(kmp_int16 *)lhs; 3405 (*f)(&new_value, &old_value, rhs); 3406 } 3407 3408 return; 3409 } else { 3410 // All 2-byte data is of integer data type. 3411 3412 #ifdef KMP_GOMP_COMPAT 3413 if (__kmp_atomic_mode == 2) { 3414 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3415 } else 3416 #endif /* KMP_GOMP_COMPAT */ 3417 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid); 3418 3419 (*f)(lhs, lhs, rhs); 3420 3421 #ifdef KMP_GOMP_COMPAT 3422 if (__kmp_atomic_mode == 2) { 3423 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3424 } else 3425 #endif /* KMP_GOMP_COMPAT */ 3426 __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid); 3427 } 3428 } 3429 3430 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3431 void (*f)(void *, void *, void *)) { 3432 KMP_DEBUG_ASSERT(__kmp_init_serial); 3433 3434 if ( 3435 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints. 3436 // Gomp compatibility is broken if this routine is called for floats. 3437 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 3438 TRUE /* no alignment problems */ 3439 #else 3440 !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */ 3441 #endif 3442 ) { 3443 kmp_int32 old_value, new_value; 3444 3445 old_value = *(kmp_int32 *)lhs; 3446 (*f)(&new_value, &old_value, rhs); 3447 3448 /* TODO: Should this be acquire or release? */ 3449 while (!KMP_COMPARE_AND_STORE_ACQ32( 3450 (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) { 3451 KMP_CPU_PAUSE(); 3452 3453 old_value = *(kmp_int32 *)lhs; 3454 (*f)(&new_value, &old_value, rhs); 3455 } 3456 3457 return; 3458 } else { 3459 // Use __kmp_atomic_lock_4i for all 4-byte data, 3460 // even if it isn't of integer data type. 3461 3462 #ifdef KMP_GOMP_COMPAT 3463 if (__kmp_atomic_mode == 2) { 3464 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3465 } else 3466 #endif /* KMP_GOMP_COMPAT */ 3467 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid); 3468 3469 (*f)(lhs, lhs, rhs); 3470 3471 #ifdef KMP_GOMP_COMPAT 3472 if (__kmp_atomic_mode == 2) { 3473 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3474 } else 3475 #endif /* KMP_GOMP_COMPAT */ 3476 __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid); 3477 } 3478 } 3479 3480 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3481 void (*f)(void *, void *, void *)) { 3482 KMP_DEBUG_ASSERT(__kmp_init_serial); 3483 if ( 3484 3485 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3486 FALSE /* must use lock */ 3487 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 3488 TRUE /* no alignment problems */ 3489 #else 3490 !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */ 3491 #endif 3492 ) { 3493 kmp_int64 old_value, new_value; 3494 3495 old_value = *(kmp_int64 *)lhs; 3496 (*f)(&new_value, &old_value, rhs); 3497 /* TODO: Should this be acquire or release? */ 3498 while (!KMP_COMPARE_AND_STORE_ACQ64( 3499 (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) { 3500 KMP_CPU_PAUSE(); 3501 3502 old_value = *(kmp_int64 *)lhs; 3503 (*f)(&new_value, &old_value, rhs); 3504 } 3505 3506 return; 3507 } else { 3508 // Use __kmp_atomic_lock_8i for all 8-byte data, 3509 // even if it isn't of integer data type. 3510 3511 #ifdef KMP_GOMP_COMPAT 3512 if (__kmp_atomic_mode == 2) { 3513 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3514 } else 3515 #endif /* KMP_GOMP_COMPAT */ 3516 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid); 3517 3518 (*f)(lhs, lhs, rhs); 3519 3520 #ifdef KMP_GOMP_COMPAT 3521 if (__kmp_atomic_mode == 2) { 3522 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3523 } else 3524 #endif /* KMP_GOMP_COMPAT */ 3525 __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid); 3526 } 3527 } 3528 3529 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3530 void (*f)(void *, void *, void *)) { 3531 KMP_DEBUG_ASSERT(__kmp_init_serial); 3532 3533 #ifdef KMP_GOMP_COMPAT 3534 if (__kmp_atomic_mode == 2) { 3535 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3536 } else 3537 #endif /* KMP_GOMP_COMPAT */ 3538 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid); 3539 3540 (*f)(lhs, lhs, rhs); 3541 3542 #ifdef KMP_GOMP_COMPAT 3543 if (__kmp_atomic_mode == 2) { 3544 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3545 } else 3546 #endif /* KMP_GOMP_COMPAT */ 3547 __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid); 3548 } 3549 3550 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3551 void (*f)(void *, void *, void *)) { 3552 KMP_DEBUG_ASSERT(__kmp_init_serial); 3553 3554 #ifdef KMP_GOMP_COMPAT 3555 if (__kmp_atomic_mode == 2) { 3556 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3557 } else 3558 #endif /* KMP_GOMP_COMPAT */ 3559 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid); 3560 3561 (*f)(lhs, lhs, rhs); 3562 3563 #ifdef KMP_GOMP_COMPAT 3564 if (__kmp_atomic_mode == 2) { 3565 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3566 } else 3567 #endif /* KMP_GOMP_COMPAT */ 3568 __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid); 3569 } 3570 3571 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3572 void (*f)(void *, void *, void *)) { 3573 KMP_DEBUG_ASSERT(__kmp_init_serial); 3574 3575 #ifdef KMP_GOMP_COMPAT 3576 if (__kmp_atomic_mode == 2) { 3577 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3578 } else 3579 #endif /* KMP_GOMP_COMPAT */ 3580 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid); 3581 3582 (*f)(lhs, lhs, rhs); 3583 3584 #ifdef KMP_GOMP_COMPAT 3585 if (__kmp_atomic_mode == 2) { 3586 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3587 } else 3588 #endif /* KMP_GOMP_COMPAT */ 3589 __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid); 3590 } 3591 3592 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3593 void (*f)(void *, void *, void *)) { 3594 KMP_DEBUG_ASSERT(__kmp_init_serial); 3595 3596 #ifdef KMP_GOMP_COMPAT 3597 if (__kmp_atomic_mode == 2) { 3598 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3599 } else 3600 #endif /* KMP_GOMP_COMPAT */ 3601 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid); 3602 3603 (*f)(lhs, lhs, rhs); 3604 3605 #ifdef KMP_GOMP_COMPAT 3606 if (__kmp_atomic_mode == 2) { 3607 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3608 } else 3609 #endif /* KMP_GOMP_COMPAT */ 3610 __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid); 3611 } 3612 3613 // AC: same two routines as GOMP_atomic_start/end, but will be called by our 3614 // compiler; duplicated in order to not use 3-party names in pure Intel code 3615 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin. 3616 void __kmpc_atomic_start(void) { 3617 int gtid = __kmp_entry_gtid(); 3618 KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid)); 3619 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3620 } 3621 3622 void __kmpc_atomic_end(void) { 3623 int gtid = __kmp_get_gtid(); 3624 KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid)); 3625 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3626 } 3627 3628 /*! 3629 @} 3630 */ 3631 3632 // end of file 3633