1 /* 2 * kmp_atomic.cpp -- ATOMIC implementation routines 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "kmp_atomic.h" 14 #include "kmp.h" // TRUE, asm routines prototypes 15 16 typedef unsigned char uchar; 17 typedef unsigned short ushort; 18 19 /*! 20 @defgroup ATOMIC_OPS Atomic Operations 21 These functions are used for implementing the many different varieties of atomic 22 operations. 23 24 The compiler is at liberty to inline atomic operations that are naturally 25 supported by the target architecture. For instance on IA-32 architecture an 26 atomic like this can be inlined 27 @code 28 static int s = 0; 29 #pragma omp atomic 30 s++; 31 @endcode 32 using the single instruction: `lock; incl s` 33 34 However the runtime does provide entrypoints for these operations to support 35 compilers that choose not to inline them. (For instance, 36 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.) 37 38 The names of the functions are encoded by using the data type name and the 39 operation name, as in these tables. 40 41 Data Type | Data type encoding 42 -----------|--------------- 43 int8_t | `fixed1` 44 uint8_t | `fixed1u` 45 int16_t | `fixed2` 46 uint16_t | `fixed2u` 47 int32_t | `fixed4` 48 uint32_t | `fixed4u` 49 int32_t | `fixed8` 50 uint32_t | `fixed8u` 51 float | `float4` 52 double | `float8` 53 float 10 (8087 eighty bit float) | `float10` 54 complex<float> | `cmplx4` 55 complex<double> | `cmplx8` 56 complex<float10> | `cmplx10` 57 <br> 58 59 Operation | Operation encoding 60 ----------|------------------- 61 + | add 62 - | sub 63 \* | mul 64 / | div 65 & | andb 66 << | shl 67 \>\> | shr 68 \| | orb 69 ^ | xor 70 && | andl 71 \|\| | orl 72 maximum | max 73 minimum | min 74 .eqv. | eqv 75 .neqv. | neqv 76 77 <br> 78 For non-commutative operations, `_rev` can also be added for the reversed 79 operation. For the functions that capture the result, the suffix `_cpt` is 80 added. 81 82 Update Functions 83 ================ 84 The general form of an atomic function that just performs an update (without a 85 `capture`) 86 @code 87 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE * 88 lhs, TYPE rhs ); 89 @endcode 90 @param ident_t a pointer to source location 91 @param gtid the global thread id 92 @param lhs a pointer to the left operand 93 @param rhs the right operand 94 95 `capture` functions 96 =================== 97 The capture functions perform an atomic update and return a result, which is 98 either the value before the capture, or that after. They take an additional 99 argument to determine which result is returned. 100 Their general form is therefore 101 @code 102 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE * 103 lhs, TYPE rhs, int flag ); 104 @endcode 105 @param ident_t a pointer to source location 106 @param gtid the global thread id 107 @param lhs a pointer to the left operand 108 @param rhs the right operand 109 @param flag one if the result is to be captured *after* the operation, zero if 110 captured *before*. 111 112 The one set of exceptions to this is the `complex<float>` type where the value 113 is not returned, rather an extra argument pointer is passed. 114 115 They look like 116 @code 117 void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * 118 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); 119 @endcode 120 121 Read and Write Operations 122 ========================= 123 The OpenMP<sup>*</sup> standard now supports atomic operations that simply 124 ensure that the value is read or written atomically, with no modification 125 performed. In many cases on IA-32 architecture these operations can be inlined 126 since the architecture guarantees that no tearing occurs on aligned objects 127 accessed with a single memory operation of up to 64 bits in size. 128 129 The general form of the read operations is 130 @code 131 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc ); 132 @endcode 133 134 For the write operations the form is 135 @code 136 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs 137 ); 138 @endcode 139 140 Full list of functions 141 ====================== 142 This leads to the generation of 376 atomic functions, as follows. 143 144 Functons for integers 145 --------------------- 146 There are versions here for integers of size 1,2,4 and 8 bytes both signed and 147 unsigned (where that matters). 148 @code 149 __kmpc_atomic_fixed1_add 150 __kmpc_atomic_fixed1_add_cpt 151 __kmpc_atomic_fixed1_add_fp 152 __kmpc_atomic_fixed1_andb 153 __kmpc_atomic_fixed1_andb_cpt 154 __kmpc_atomic_fixed1_andl 155 __kmpc_atomic_fixed1_andl_cpt 156 __kmpc_atomic_fixed1_div 157 __kmpc_atomic_fixed1_div_cpt 158 __kmpc_atomic_fixed1_div_cpt_rev 159 __kmpc_atomic_fixed1_div_float8 160 __kmpc_atomic_fixed1_div_fp 161 __kmpc_atomic_fixed1_div_rev 162 __kmpc_atomic_fixed1_eqv 163 __kmpc_atomic_fixed1_eqv_cpt 164 __kmpc_atomic_fixed1_max 165 __kmpc_atomic_fixed1_max_cpt 166 __kmpc_atomic_fixed1_min 167 __kmpc_atomic_fixed1_min_cpt 168 __kmpc_atomic_fixed1_mul 169 __kmpc_atomic_fixed1_mul_cpt 170 __kmpc_atomic_fixed1_mul_float8 171 __kmpc_atomic_fixed1_mul_fp 172 __kmpc_atomic_fixed1_neqv 173 __kmpc_atomic_fixed1_neqv_cpt 174 __kmpc_atomic_fixed1_orb 175 __kmpc_atomic_fixed1_orb_cpt 176 __kmpc_atomic_fixed1_orl 177 __kmpc_atomic_fixed1_orl_cpt 178 __kmpc_atomic_fixed1_rd 179 __kmpc_atomic_fixed1_shl 180 __kmpc_atomic_fixed1_shl_cpt 181 __kmpc_atomic_fixed1_shl_cpt_rev 182 __kmpc_atomic_fixed1_shl_rev 183 __kmpc_atomic_fixed1_shr 184 __kmpc_atomic_fixed1_shr_cpt 185 __kmpc_atomic_fixed1_shr_cpt_rev 186 __kmpc_atomic_fixed1_shr_rev 187 __kmpc_atomic_fixed1_sub 188 __kmpc_atomic_fixed1_sub_cpt 189 __kmpc_atomic_fixed1_sub_cpt_rev 190 __kmpc_atomic_fixed1_sub_fp 191 __kmpc_atomic_fixed1_sub_rev 192 __kmpc_atomic_fixed1_swp 193 __kmpc_atomic_fixed1_wr 194 __kmpc_atomic_fixed1_xor 195 __kmpc_atomic_fixed1_xor_cpt 196 __kmpc_atomic_fixed1u_add_fp 197 __kmpc_atomic_fixed1u_sub_fp 198 __kmpc_atomic_fixed1u_mul_fp 199 __kmpc_atomic_fixed1u_div 200 __kmpc_atomic_fixed1u_div_cpt 201 __kmpc_atomic_fixed1u_div_cpt_rev 202 __kmpc_atomic_fixed1u_div_fp 203 __kmpc_atomic_fixed1u_div_rev 204 __kmpc_atomic_fixed1u_shr 205 __kmpc_atomic_fixed1u_shr_cpt 206 __kmpc_atomic_fixed1u_shr_cpt_rev 207 __kmpc_atomic_fixed1u_shr_rev 208 __kmpc_atomic_fixed2_add 209 __kmpc_atomic_fixed2_add_cpt 210 __kmpc_atomic_fixed2_add_fp 211 __kmpc_atomic_fixed2_andb 212 __kmpc_atomic_fixed2_andb_cpt 213 __kmpc_atomic_fixed2_andl 214 __kmpc_atomic_fixed2_andl_cpt 215 __kmpc_atomic_fixed2_div 216 __kmpc_atomic_fixed2_div_cpt 217 __kmpc_atomic_fixed2_div_cpt_rev 218 __kmpc_atomic_fixed2_div_float8 219 __kmpc_atomic_fixed2_div_fp 220 __kmpc_atomic_fixed2_div_rev 221 __kmpc_atomic_fixed2_eqv 222 __kmpc_atomic_fixed2_eqv_cpt 223 __kmpc_atomic_fixed2_max 224 __kmpc_atomic_fixed2_max_cpt 225 __kmpc_atomic_fixed2_min 226 __kmpc_atomic_fixed2_min_cpt 227 __kmpc_atomic_fixed2_mul 228 __kmpc_atomic_fixed2_mul_cpt 229 __kmpc_atomic_fixed2_mul_float8 230 __kmpc_atomic_fixed2_mul_fp 231 __kmpc_atomic_fixed2_neqv 232 __kmpc_atomic_fixed2_neqv_cpt 233 __kmpc_atomic_fixed2_orb 234 __kmpc_atomic_fixed2_orb_cpt 235 __kmpc_atomic_fixed2_orl 236 __kmpc_atomic_fixed2_orl_cpt 237 __kmpc_atomic_fixed2_rd 238 __kmpc_atomic_fixed2_shl 239 __kmpc_atomic_fixed2_shl_cpt 240 __kmpc_atomic_fixed2_shl_cpt_rev 241 __kmpc_atomic_fixed2_shl_rev 242 __kmpc_atomic_fixed2_shr 243 __kmpc_atomic_fixed2_shr_cpt 244 __kmpc_atomic_fixed2_shr_cpt_rev 245 __kmpc_atomic_fixed2_shr_rev 246 __kmpc_atomic_fixed2_sub 247 __kmpc_atomic_fixed2_sub_cpt 248 __kmpc_atomic_fixed2_sub_cpt_rev 249 __kmpc_atomic_fixed2_sub_fp 250 __kmpc_atomic_fixed2_sub_rev 251 __kmpc_atomic_fixed2_swp 252 __kmpc_atomic_fixed2_wr 253 __kmpc_atomic_fixed2_xor 254 __kmpc_atomic_fixed2_xor_cpt 255 __kmpc_atomic_fixed2u_add_fp 256 __kmpc_atomic_fixed2u_sub_fp 257 __kmpc_atomic_fixed2u_mul_fp 258 __kmpc_atomic_fixed2u_div 259 __kmpc_atomic_fixed2u_div_cpt 260 __kmpc_atomic_fixed2u_div_cpt_rev 261 __kmpc_atomic_fixed2u_div_fp 262 __kmpc_atomic_fixed2u_div_rev 263 __kmpc_atomic_fixed2u_shr 264 __kmpc_atomic_fixed2u_shr_cpt 265 __kmpc_atomic_fixed2u_shr_cpt_rev 266 __kmpc_atomic_fixed2u_shr_rev 267 __kmpc_atomic_fixed4_add 268 __kmpc_atomic_fixed4_add_cpt 269 __kmpc_atomic_fixed4_add_fp 270 __kmpc_atomic_fixed4_andb 271 __kmpc_atomic_fixed4_andb_cpt 272 __kmpc_atomic_fixed4_andl 273 __kmpc_atomic_fixed4_andl_cpt 274 __kmpc_atomic_fixed4_div 275 __kmpc_atomic_fixed4_div_cpt 276 __kmpc_atomic_fixed4_div_cpt_rev 277 __kmpc_atomic_fixed4_div_float8 278 __kmpc_atomic_fixed4_div_fp 279 __kmpc_atomic_fixed4_div_rev 280 __kmpc_atomic_fixed4_eqv 281 __kmpc_atomic_fixed4_eqv_cpt 282 __kmpc_atomic_fixed4_max 283 __kmpc_atomic_fixed4_max_cpt 284 __kmpc_atomic_fixed4_min 285 __kmpc_atomic_fixed4_min_cpt 286 __kmpc_atomic_fixed4_mul 287 __kmpc_atomic_fixed4_mul_cpt 288 __kmpc_atomic_fixed4_mul_float8 289 __kmpc_atomic_fixed4_mul_fp 290 __kmpc_atomic_fixed4_neqv 291 __kmpc_atomic_fixed4_neqv_cpt 292 __kmpc_atomic_fixed4_orb 293 __kmpc_atomic_fixed4_orb_cpt 294 __kmpc_atomic_fixed4_orl 295 __kmpc_atomic_fixed4_orl_cpt 296 __kmpc_atomic_fixed4_rd 297 __kmpc_atomic_fixed4_shl 298 __kmpc_atomic_fixed4_shl_cpt 299 __kmpc_atomic_fixed4_shl_cpt_rev 300 __kmpc_atomic_fixed4_shl_rev 301 __kmpc_atomic_fixed4_shr 302 __kmpc_atomic_fixed4_shr_cpt 303 __kmpc_atomic_fixed4_shr_cpt_rev 304 __kmpc_atomic_fixed4_shr_rev 305 __kmpc_atomic_fixed4_sub 306 __kmpc_atomic_fixed4_sub_cpt 307 __kmpc_atomic_fixed4_sub_cpt_rev 308 __kmpc_atomic_fixed4_sub_fp 309 __kmpc_atomic_fixed4_sub_rev 310 __kmpc_atomic_fixed4_swp 311 __kmpc_atomic_fixed4_wr 312 __kmpc_atomic_fixed4_xor 313 __kmpc_atomic_fixed4_xor_cpt 314 __kmpc_atomic_fixed4u_add_fp 315 __kmpc_atomic_fixed4u_sub_fp 316 __kmpc_atomic_fixed4u_mul_fp 317 __kmpc_atomic_fixed4u_div 318 __kmpc_atomic_fixed4u_div_cpt 319 __kmpc_atomic_fixed4u_div_cpt_rev 320 __kmpc_atomic_fixed4u_div_fp 321 __kmpc_atomic_fixed4u_div_rev 322 __kmpc_atomic_fixed4u_shr 323 __kmpc_atomic_fixed4u_shr_cpt 324 __kmpc_atomic_fixed4u_shr_cpt_rev 325 __kmpc_atomic_fixed4u_shr_rev 326 __kmpc_atomic_fixed8_add 327 __kmpc_atomic_fixed8_add_cpt 328 __kmpc_atomic_fixed8_add_fp 329 __kmpc_atomic_fixed8_andb 330 __kmpc_atomic_fixed8_andb_cpt 331 __kmpc_atomic_fixed8_andl 332 __kmpc_atomic_fixed8_andl_cpt 333 __kmpc_atomic_fixed8_div 334 __kmpc_atomic_fixed8_div_cpt 335 __kmpc_atomic_fixed8_div_cpt_rev 336 __kmpc_atomic_fixed8_div_float8 337 __kmpc_atomic_fixed8_div_fp 338 __kmpc_atomic_fixed8_div_rev 339 __kmpc_atomic_fixed8_eqv 340 __kmpc_atomic_fixed8_eqv_cpt 341 __kmpc_atomic_fixed8_max 342 __kmpc_atomic_fixed8_max_cpt 343 __kmpc_atomic_fixed8_min 344 __kmpc_atomic_fixed8_min_cpt 345 __kmpc_atomic_fixed8_mul 346 __kmpc_atomic_fixed8_mul_cpt 347 __kmpc_atomic_fixed8_mul_float8 348 __kmpc_atomic_fixed8_mul_fp 349 __kmpc_atomic_fixed8_neqv 350 __kmpc_atomic_fixed8_neqv_cpt 351 __kmpc_atomic_fixed8_orb 352 __kmpc_atomic_fixed8_orb_cpt 353 __kmpc_atomic_fixed8_orl 354 __kmpc_atomic_fixed8_orl_cpt 355 __kmpc_atomic_fixed8_rd 356 __kmpc_atomic_fixed8_shl 357 __kmpc_atomic_fixed8_shl_cpt 358 __kmpc_atomic_fixed8_shl_cpt_rev 359 __kmpc_atomic_fixed8_shl_rev 360 __kmpc_atomic_fixed8_shr 361 __kmpc_atomic_fixed8_shr_cpt 362 __kmpc_atomic_fixed8_shr_cpt_rev 363 __kmpc_atomic_fixed8_shr_rev 364 __kmpc_atomic_fixed8_sub 365 __kmpc_atomic_fixed8_sub_cpt 366 __kmpc_atomic_fixed8_sub_cpt_rev 367 __kmpc_atomic_fixed8_sub_fp 368 __kmpc_atomic_fixed8_sub_rev 369 __kmpc_atomic_fixed8_swp 370 __kmpc_atomic_fixed8_wr 371 __kmpc_atomic_fixed8_xor 372 __kmpc_atomic_fixed8_xor_cpt 373 __kmpc_atomic_fixed8u_add_fp 374 __kmpc_atomic_fixed8u_sub_fp 375 __kmpc_atomic_fixed8u_mul_fp 376 __kmpc_atomic_fixed8u_div 377 __kmpc_atomic_fixed8u_div_cpt 378 __kmpc_atomic_fixed8u_div_cpt_rev 379 __kmpc_atomic_fixed8u_div_fp 380 __kmpc_atomic_fixed8u_div_rev 381 __kmpc_atomic_fixed8u_shr 382 __kmpc_atomic_fixed8u_shr_cpt 383 __kmpc_atomic_fixed8u_shr_cpt_rev 384 __kmpc_atomic_fixed8u_shr_rev 385 @endcode 386 387 Functions for floating point 388 ---------------------------- 389 There are versions here for floating point numbers of size 4, 8, 10 and 16 390 bytes. (Ten byte floats are used by X87, but are now rare). 391 @code 392 __kmpc_atomic_float4_add 393 __kmpc_atomic_float4_add_cpt 394 __kmpc_atomic_float4_add_float8 395 __kmpc_atomic_float4_add_fp 396 __kmpc_atomic_float4_div 397 __kmpc_atomic_float4_div_cpt 398 __kmpc_atomic_float4_div_cpt_rev 399 __kmpc_atomic_float4_div_float8 400 __kmpc_atomic_float4_div_fp 401 __kmpc_atomic_float4_div_rev 402 __kmpc_atomic_float4_max 403 __kmpc_atomic_float4_max_cpt 404 __kmpc_atomic_float4_min 405 __kmpc_atomic_float4_min_cpt 406 __kmpc_atomic_float4_mul 407 __kmpc_atomic_float4_mul_cpt 408 __kmpc_atomic_float4_mul_float8 409 __kmpc_atomic_float4_mul_fp 410 __kmpc_atomic_float4_rd 411 __kmpc_atomic_float4_sub 412 __kmpc_atomic_float4_sub_cpt 413 __kmpc_atomic_float4_sub_cpt_rev 414 __kmpc_atomic_float4_sub_float8 415 __kmpc_atomic_float4_sub_fp 416 __kmpc_atomic_float4_sub_rev 417 __kmpc_atomic_float4_swp 418 __kmpc_atomic_float4_wr 419 __kmpc_atomic_float8_add 420 __kmpc_atomic_float8_add_cpt 421 __kmpc_atomic_float8_add_fp 422 __kmpc_atomic_float8_div 423 __kmpc_atomic_float8_div_cpt 424 __kmpc_atomic_float8_div_cpt_rev 425 __kmpc_atomic_float8_div_fp 426 __kmpc_atomic_float8_div_rev 427 __kmpc_atomic_float8_max 428 __kmpc_atomic_float8_max_cpt 429 __kmpc_atomic_float8_min 430 __kmpc_atomic_float8_min_cpt 431 __kmpc_atomic_float8_mul 432 __kmpc_atomic_float8_mul_cpt 433 __kmpc_atomic_float8_mul_fp 434 __kmpc_atomic_float8_rd 435 __kmpc_atomic_float8_sub 436 __kmpc_atomic_float8_sub_cpt 437 __kmpc_atomic_float8_sub_cpt_rev 438 __kmpc_atomic_float8_sub_fp 439 __kmpc_atomic_float8_sub_rev 440 __kmpc_atomic_float8_swp 441 __kmpc_atomic_float8_wr 442 __kmpc_atomic_float10_add 443 __kmpc_atomic_float10_add_cpt 444 __kmpc_atomic_float10_add_fp 445 __kmpc_atomic_float10_div 446 __kmpc_atomic_float10_div_cpt 447 __kmpc_atomic_float10_div_cpt_rev 448 __kmpc_atomic_float10_div_fp 449 __kmpc_atomic_float10_div_rev 450 __kmpc_atomic_float10_mul 451 __kmpc_atomic_float10_mul_cpt 452 __kmpc_atomic_float10_mul_fp 453 __kmpc_atomic_float10_rd 454 __kmpc_atomic_float10_sub 455 __kmpc_atomic_float10_sub_cpt 456 __kmpc_atomic_float10_sub_cpt_rev 457 __kmpc_atomic_float10_sub_fp 458 __kmpc_atomic_float10_sub_rev 459 __kmpc_atomic_float10_swp 460 __kmpc_atomic_float10_wr 461 __kmpc_atomic_float16_add 462 __kmpc_atomic_float16_add_cpt 463 __kmpc_atomic_float16_div 464 __kmpc_atomic_float16_div_cpt 465 __kmpc_atomic_float16_div_cpt_rev 466 __kmpc_atomic_float16_div_rev 467 __kmpc_atomic_float16_max 468 __kmpc_atomic_float16_max_cpt 469 __kmpc_atomic_float16_min 470 __kmpc_atomic_float16_min_cpt 471 __kmpc_atomic_float16_mul 472 __kmpc_atomic_float16_mul_cpt 473 __kmpc_atomic_float16_rd 474 __kmpc_atomic_float16_sub 475 __kmpc_atomic_float16_sub_cpt 476 __kmpc_atomic_float16_sub_cpt_rev 477 __kmpc_atomic_float16_sub_rev 478 __kmpc_atomic_float16_swp 479 __kmpc_atomic_float16_wr 480 @endcode 481 482 Functions for Complex types 483 --------------------------- 484 Functions for complex types whose component floating point variables are of size 485 4,8,10 or 16 bytes. The names here are based on the size of the component float, 486 *not* the size of the complex type. So `__kmpc_atomc_cmplx8_add` is an operation 487 on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`. 488 489 @code 490 __kmpc_atomic_cmplx4_add 491 __kmpc_atomic_cmplx4_add_cmplx8 492 __kmpc_atomic_cmplx4_add_cpt 493 __kmpc_atomic_cmplx4_div 494 __kmpc_atomic_cmplx4_div_cmplx8 495 __kmpc_atomic_cmplx4_div_cpt 496 __kmpc_atomic_cmplx4_div_cpt_rev 497 __kmpc_atomic_cmplx4_div_rev 498 __kmpc_atomic_cmplx4_mul 499 __kmpc_atomic_cmplx4_mul_cmplx8 500 __kmpc_atomic_cmplx4_mul_cpt 501 __kmpc_atomic_cmplx4_rd 502 __kmpc_atomic_cmplx4_sub 503 __kmpc_atomic_cmplx4_sub_cmplx8 504 __kmpc_atomic_cmplx4_sub_cpt 505 __kmpc_atomic_cmplx4_sub_cpt_rev 506 __kmpc_atomic_cmplx4_sub_rev 507 __kmpc_atomic_cmplx4_swp 508 __kmpc_atomic_cmplx4_wr 509 __kmpc_atomic_cmplx8_add 510 __kmpc_atomic_cmplx8_add_cpt 511 __kmpc_atomic_cmplx8_div 512 __kmpc_atomic_cmplx8_div_cpt 513 __kmpc_atomic_cmplx8_div_cpt_rev 514 __kmpc_atomic_cmplx8_div_rev 515 __kmpc_atomic_cmplx8_mul 516 __kmpc_atomic_cmplx8_mul_cpt 517 __kmpc_atomic_cmplx8_rd 518 __kmpc_atomic_cmplx8_sub 519 __kmpc_atomic_cmplx8_sub_cpt 520 __kmpc_atomic_cmplx8_sub_cpt_rev 521 __kmpc_atomic_cmplx8_sub_rev 522 __kmpc_atomic_cmplx8_swp 523 __kmpc_atomic_cmplx8_wr 524 __kmpc_atomic_cmplx10_add 525 __kmpc_atomic_cmplx10_add_cpt 526 __kmpc_atomic_cmplx10_div 527 __kmpc_atomic_cmplx10_div_cpt 528 __kmpc_atomic_cmplx10_div_cpt_rev 529 __kmpc_atomic_cmplx10_div_rev 530 __kmpc_atomic_cmplx10_mul 531 __kmpc_atomic_cmplx10_mul_cpt 532 __kmpc_atomic_cmplx10_rd 533 __kmpc_atomic_cmplx10_sub 534 __kmpc_atomic_cmplx10_sub_cpt 535 __kmpc_atomic_cmplx10_sub_cpt_rev 536 __kmpc_atomic_cmplx10_sub_rev 537 __kmpc_atomic_cmplx10_swp 538 __kmpc_atomic_cmplx10_wr 539 __kmpc_atomic_cmplx16_add 540 __kmpc_atomic_cmplx16_add_cpt 541 __kmpc_atomic_cmplx16_div 542 __kmpc_atomic_cmplx16_div_cpt 543 __kmpc_atomic_cmplx16_div_cpt_rev 544 __kmpc_atomic_cmplx16_div_rev 545 __kmpc_atomic_cmplx16_mul 546 __kmpc_atomic_cmplx16_mul_cpt 547 __kmpc_atomic_cmplx16_rd 548 __kmpc_atomic_cmplx16_sub 549 __kmpc_atomic_cmplx16_sub_cpt 550 __kmpc_atomic_cmplx16_sub_cpt_rev 551 __kmpc_atomic_cmplx16_swp 552 __kmpc_atomic_cmplx16_wr 553 @endcode 554 */ 555 556 /*! 557 @ingroup ATOMIC_OPS 558 @{ 559 */ 560 561 /* 562 * Global vars 563 */ 564 565 #ifndef KMP_GOMP_COMPAT 566 int __kmp_atomic_mode = 1; // Intel perf 567 #else 568 int __kmp_atomic_mode = 2; // GOMP compatibility 569 #endif /* KMP_GOMP_COMPAT */ 570 571 KMP_ALIGN(128) 572 573 // Control access to all user coded atomics in Gnu compat mode 574 kmp_atomic_lock_t __kmp_atomic_lock; 575 // Control access to all user coded atomics for 1-byte fixed data types 576 kmp_atomic_lock_t __kmp_atomic_lock_1i; 577 // Control access to all user coded atomics for 2-byte fixed data types 578 kmp_atomic_lock_t __kmp_atomic_lock_2i; 579 // Control access to all user coded atomics for 4-byte fixed data types 580 kmp_atomic_lock_t __kmp_atomic_lock_4i; 581 // Control access to all user coded atomics for kmp_real32 data type 582 kmp_atomic_lock_t __kmp_atomic_lock_4r; 583 // Control access to all user coded atomics for 8-byte fixed data types 584 kmp_atomic_lock_t __kmp_atomic_lock_8i; 585 // Control access to all user coded atomics for kmp_real64 data type 586 kmp_atomic_lock_t __kmp_atomic_lock_8r; 587 // Control access to all user coded atomics for complex byte data type 588 kmp_atomic_lock_t __kmp_atomic_lock_8c; 589 // Control access to all user coded atomics for long double data type 590 kmp_atomic_lock_t __kmp_atomic_lock_10r; 591 // Control access to all user coded atomics for _Quad data type 592 kmp_atomic_lock_t __kmp_atomic_lock_16r; 593 // Control access to all user coded atomics for double complex data type 594 kmp_atomic_lock_t __kmp_atomic_lock_16c; 595 // Control access to all user coded atomics for long double complex type 596 kmp_atomic_lock_t __kmp_atomic_lock_20c; 597 // Control access to all user coded atomics for _Quad complex data type 598 kmp_atomic_lock_t __kmp_atomic_lock_32c; 599 600 /* 2007-03-02: 601 Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug 602 on *_32 and *_32e. This is just a temporary workaround for the problem. It 603 seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines 604 in assembler language. */ 605 #define KMP_ATOMIC_VOLATILE volatile 606 607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD 608 609 static inline void operator+=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 610 lhs.q += rhs.q; 611 } 612 static inline void operator-=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 613 lhs.q -= rhs.q; 614 } 615 static inline void operator*=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 616 lhs.q *= rhs.q; 617 } 618 static inline void operator/=(Quad_a4_t &lhs, Quad_a4_t &rhs) { 619 lhs.q /= rhs.q; 620 } 621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) { 622 return lhs.q < rhs.q; 623 } 624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) { 625 return lhs.q > rhs.q; 626 } 627 628 static inline void operator+=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 629 lhs.q += rhs.q; 630 } 631 static inline void operator-=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 632 lhs.q -= rhs.q; 633 } 634 static inline void operator*=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 635 lhs.q *= rhs.q; 636 } 637 static inline void operator/=(Quad_a16_t &lhs, Quad_a16_t &rhs) { 638 lhs.q /= rhs.q; 639 } 640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) { 641 return lhs.q < rhs.q; 642 } 643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) { 644 return lhs.q > rhs.q; 645 } 646 647 static inline void operator+=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 648 lhs.q += rhs.q; 649 } 650 static inline void operator-=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 651 lhs.q -= rhs.q; 652 } 653 static inline void operator*=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 654 lhs.q *= rhs.q; 655 } 656 static inline void operator/=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { 657 lhs.q /= rhs.q; 658 } 659 660 static inline void operator+=(kmp_cmplx128_a16_t &lhs, 661 kmp_cmplx128_a16_t &rhs) { 662 lhs.q += rhs.q; 663 } 664 static inline void operator-=(kmp_cmplx128_a16_t &lhs, 665 kmp_cmplx128_a16_t &rhs) { 666 lhs.q -= rhs.q; 667 } 668 static inline void operator*=(kmp_cmplx128_a16_t &lhs, 669 kmp_cmplx128_a16_t &rhs) { 670 lhs.q *= rhs.q; 671 } 672 static inline void operator/=(kmp_cmplx128_a16_t &lhs, 673 kmp_cmplx128_a16_t &rhs) { 674 lhs.q /= rhs.q; 675 } 676 677 #endif 678 679 // ATOMIC implementation routines ----------------------------------------- 680 // One routine for each operation and operand type. 681 // All routines declarations looks like 682 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs ); 683 684 #define KMP_CHECK_GTID \ 685 if (gtid == KMP_GTID_UNKNOWN) { \ 686 gtid = __kmp_entry_gtid(); \ 687 } // check and get gtid when needed 688 689 // Beginning of a definition (provides name, parameters, gebug trace) 690 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 691 // fixed) 692 // OP_ID - operation identifier (add, sub, mul, ...) 693 // TYPE - operands' type 694 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 695 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 696 TYPE *lhs, TYPE rhs) { \ 697 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 698 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 699 700 // ------------------------------------------------------------------------ 701 // Lock variables used for critical sections for various size operands 702 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat 703 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char 704 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short 705 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int 706 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float 707 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int 708 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double 709 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex 710 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double 711 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad 712 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex 713 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex 714 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex 715 716 // ------------------------------------------------------------------------ 717 // Operation on *lhs, rhs bound by critical section 718 // OP - operator (it's supposed to contain an assignment) 719 // LCK_ID - lock identifier 720 // Note: don't check gtid as it should always be valid 721 // 1, 2-byte - expect valid parameter, other - check before this macro 722 #define OP_CRITICAL(OP, LCK_ID) \ 723 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 724 \ 725 (*lhs) OP(rhs); \ 726 \ 727 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 728 729 // ------------------------------------------------------------------------ 730 // For GNU compatibility, we may need to use a critical section, 731 // even though it is not required by the ISA. 732 // 733 // On IA-32 architecture, all atomic operations except for fixed 4 byte add, 734 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common 735 // critical section. On Intel(R) 64, all atomic operations are done with fetch 736 // and add or compare and exchange. Therefore, the FLAG parameter to this 737 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which 738 // require a critical section, where we predict that they will be implemented 739 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()). 740 // 741 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct, 742 // the FLAG parameter should always be 1. If we know that we will be using 743 // a critical section, then we want to make certain that we use the generic 744 // lock __kmp_atomic_lock to protect the atomic update, and not of of the 745 // locks that are specialized based upon the size or type of the data. 746 // 747 // If FLAG is 0, then we are relying on dead code elimination by the build 748 // compiler to get rid of the useless block of code, and save a needless 749 // branch at runtime. 750 751 #ifdef KMP_GOMP_COMPAT 752 #define OP_GOMP_CRITICAL(OP, FLAG) \ 753 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 754 KMP_CHECK_GTID; \ 755 OP_CRITICAL(OP, 0); \ 756 return; \ 757 } 758 #else 759 #define OP_GOMP_CRITICAL(OP, FLAG) 760 #endif /* KMP_GOMP_COMPAT */ 761 762 #if KMP_MIC 763 #define KMP_DO_PAUSE _mm_delay_32(1) 764 #else 765 #define KMP_DO_PAUSE KMP_CPU_PAUSE() 766 #endif /* KMP_MIC */ 767 768 // ------------------------------------------------------------------------ 769 // Operation on *lhs, rhs using "compare_and_store" routine 770 // TYPE - operands' type 771 // BITS - size in bits, used to distinguish low level calls 772 // OP - operator 773 #define OP_CMPXCHG(TYPE, BITS, OP) \ 774 { \ 775 TYPE old_value, new_value; \ 776 old_value = *(TYPE volatile *)lhs; \ 777 new_value = old_value OP rhs; \ 778 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 779 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 780 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 781 KMP_DO_PAUSE; \ 782 \ 783 old_value = *(TYPE volatile *)lhs; \ 784 new_value = old_value OP rhs; \ 785 } \ 786 } 787 788 #if USE_CMPXCHG_FIX 789 // 2007-06-25: 790 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32 791 // and win_32e are affected (I verified the asm). Compiler ignores the volatile 792 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the 793 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of 794 // the workaround. 795 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 796 { \ 797 struct _sss { \ 798 TYPE cmp; \ 799 kmp_int##BITS *vvv; \ 800 }; \ 801 struct _sss old_value, new_value; \ 802 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \ 803 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \ 804 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 805 new_value.cmp = old_value.cmp OP rhs; \ 806 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 807 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ 808 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \ 809 KMP_DO_PAUSE; \ 810 \ 811 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 812 new_value.cmp = old_value.cmp OP rhs; \ 813 } \ 814 } 815 // end of the first part of the workaround for C78287 816 #endif // USE_CMPXCHG_FIX 817 818 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 819 820 // ------------------------------------------------------------------------ 821 // X86 or X86_64: no alignment problems ==================================== 822 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 823 GOMP_FLAG) \ 824 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 825 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 826 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 827 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 828 } 829 // ------------------------------------------------------------------------- 830 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 831 GOMP_FLAG) \ 832 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 833 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 834 OP_CMPXCHG(TYPE, BITS, OP) \ 835 } 836 #if USE_CMPXCHG_FIX 837 // ------------------------------------------------------------------------- 838 // workaround for C78287 (complex(kind=4) data type) 839 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ 840 MASK, GOMP_FLAG) \ 841 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 842 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 843 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 844 } 845 // end of the second part of the workaround for C78287 846 #endif 847 848 #else 849 // ------------------------------------------------------------------------- 850 // Code for other architectures that don't handle unaligned accesses. 851 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 852 GOMP_FLAG) \ 853 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 854 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 855 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 856 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 857 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 858 } else { \ 859 KMP_CHECK_GTID; \ 860 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 861 } \ 862 } 863 // ------------------------------------------------------------------------- 864 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 865 GOMP_FLAG) \ 866 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 867 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 868 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 869 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 870 } else { \ 871 KMP_CHECK_GTID; \ 872 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 873 } \ 874 } 875 #if USE_CMPXCHG_FIX 876 // ------------------------------------------------------------------------- 877 // workaround for C78287 (complex(kind=4) data type) 878 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ 879 MASK, GOMP_FLAG) \ 880 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 881 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 882 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 883 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 884 } else { \ 885 KMP_CHECK_GTID; \ 886 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 887 } \ 888 } 889 // end of the second part of the workaround for C78287 890 #endif // USE_CMPXCHG_FIX 891 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 892 893 // Routines for ATOMIC 4-byte operands addition and subtraction 894 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3, 895 0) // __kmpc_atomic_fixed4_add 896 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3, 897 0) // __kmpc_atomic_fixed4_sub 898 899 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3, 900 KMP_ARCH_X86) // __kmpc_atomic_float4_add 901 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3, 902 KMP_ARCH_X86) // __kmpc_atomic_float4_sub 903 904 // Routines for ATOMIC 8-byte operands addition and subtraction 905 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7, 906 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add 907 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7, 908 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub 909 910 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7, 911 KMP_ARCH_X86) // __kmpc_atomic_float8_add 912 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7, 913 KMP_ARCH_X86) // __kmpc_atomic_float8_sub 914 915 // ------------------------------------------------------------------------ 916 // Entries definition for integer operands 917 // TYPE_ID - operands type and size (fixed4, float4) 918 // OP_ID - operation identifier (add, sub, mul, ...) 919 // TYPE - operand type 920 // BITS - size in bits, used to distinguish low level calls 921 // OP - operator (used in critical section) 922 // LCK_ID - lock identifier, used to possibly distinguish lock variable 923 // MASK - used for alignment check 924 925 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG 926 // ------------------------------------------------------------------------ 927 // Routines for ATOMIC integer operands, other operators 928 // ------------------------------------------------------------------------ 929 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 930 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0, 931 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add 932 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0, 933 0) // __kmpc_atomic_fixed1_andb 934 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0, 935 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div 936 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0, 937 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div 938 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0, 939 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul 940 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0, 941 0) // __kmpc_atomic_fixed1_orb 942 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0, 943 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl 944 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0, 945 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr 946 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, 947 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr 948 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0, 949 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub 950 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0, 951 0) // __kmpc_atomic_fixed1_xor 952 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1, 953 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add 954 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1, 955 0) // __kmpc_atomic_fixed2_andb 956 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1, 957 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div 958 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1, 959 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div 960 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1, 961 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul 962 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1, 963 0) // __kmpc_atomic_fixed2_orb 964 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1, 965 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl 966 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1, 967 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr 968 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, 969 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr 970 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1, 971 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub 972 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1, 973 0) // __kmpc_atomic_fixed2_xor 974 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3, 975 0) // __kmpc_atomic_fixed4_andb 976 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3, 977 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div 978 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3, 979 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div 980 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3, 981 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul 982 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3, 983 0) // __kmpc_atomic_fixed4_orb 984 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3, 985 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl 986 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3, 987 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr 988 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, 989 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr 990 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3, 991 0) // __kmpc_atomic_fixed4_xor 992 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7, 993 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb 994 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7, 995 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div 996 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7, 997 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div 998 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7, 999 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul 1000 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7, 1001 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb 1002 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7, 1003 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl 1004 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7, 1005 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr 1006 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, 1007 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr 1008 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7, 1009 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor 1010 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3, 1011 KMP_ARCH_X86) // __kmpc_atomic_float4_div 1012 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3, 1013 KMP_ARCH_X86) // __kmpc_atomic_float4_mul 1014 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7, 1015 KMP_ARCH_X86) // __kmpc_atomic_float8_div 1016 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7, 1017 KMP_ARCH_X86) // __kmpc_atomic_float8_mul 1018 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 1019 1020 /* ------------------------------------------------------------------------ */ 1021 /* Routines for C/C++ Reduction operators && and || */ 1022 1023 // ------------------------------------------------------------------------ 1024 // Need separate macros for &&, || because there is no combined assignment 1025 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used 1026 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1027 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1028 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1029 OP_CRITICAL(= *lhs OP, LCK_ID) \ 1030 } 1031 1032 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1033 1034 // ------------------------------------------------------------------------ 1035 // X86 or X86_64: no alignment problems =================================== 1036 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ 1037 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1038 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1039 OP_CMPXCHG(TYPE, BITS, OP) \ 1040 } 1041 1042 #else 1043 // ------------------------------------------------------------------------ 1044 // Code for other architectures that don't handle unaligned accesses. 1045 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ 1046 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1047 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1048 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1049 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1050 } else { \ 1051 KMP_CHECK_GTID; \ 1052 OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \ 1053 } \ 1054 } 1055 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1056 1057 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0, 1058 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl 1059 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0, 1060 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl 1061 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1, 1062 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl 1063 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1, 1064 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl 1065 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3, 1066 0) // __kmpc_atomic_fixed4_andl 1067 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3, 1068 0) // __kmpc_atomic_fixed4_orl 1069 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7, 1070 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl 1071 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7, 1072 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl 1073 1074 /* ------------------------------------------------------------------------- */ 1075 /* Routines for Fortran operators that matched no one in C: */ 1076 /* MAX, MIN, .EQV., .NEQV. */ 1077 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */ 1078 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */ 1079 1080 // ------------------------------------------------------------------------- 1081 // MIN and MAX need separate macros 1082 // OP - operator to check if we need any actions? 1083 #define MIN_MAX_CRITSECT(OP, LCK_ID) \ 1084 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1085 \ 1086 if (*lhs OP rhs) { /* still need actions? */ \ 1087 *lhs = rhs; \ 1088 } \ 1089 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1090 1091 // ------------------------------------------------------------------------- 1092 #ifdef KMP_GOMP_COMPAT 1093 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \ 1094 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1095 KMP_CHECK_GTID; \ 1096 MIN_MAX_CRITSECT(OP, 0); \ 1097 return; \ 1098 } 1099 #else 1100 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) 1101 #endif /* KMP_GOMP_COMPAT */ 1102 1103 // ------------------------------------------------------------------------- 1104 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ 1105 { \ 1106 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1107 TYPE old_value; \ 1108 temp_val = *lhs; \ 1109 old_value = temp_val; \ 1110 while (old_value OP rhs && /* still need actions? */ \ 1111 !KMP_COMPARE_AND_STORE_ACQ##BITS( \ 1112 (kmp_int##BITS *)lhs, \ 1113 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 1114 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ 1115 KMP_CPU_PAUSE(); \ 1116 temp_val = *lhs; \ 1117 old_value = temp_val; \ 1118 } \ 1119 } 1120 1121 // ------------------------------------------------------------------------- 1122 // 1-byte, 2-byte operands - use critical section 1123 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1124 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1125 if (*lhs OP rhs) { /* need actions? */ \ 1126 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1127 MIN_MAX_CRITSECT(OP, LCK_ID) \ 1128 } \ 1129 } 1130 1131 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1132 1133 // ------------------------------------------------------------------------- 1134 // X86 or X86_64: no alignment problems ==================================== 1135 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1136 GOMP_FLAG) \ 1137 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1138 if (*lhs OP rhs) { \ 1139 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1140 MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ 1141 } \ 1142 } 1143 1144 #else 1145 // ------------------------------------------------------------------------- 1146 // Code for other architectures that don't handle unaligned accesses. 1147 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1148 GOMP_FLAG) \ 1149 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1150 if (*lhs OP rhs) { \ 1151 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1152 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1153 MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1154 } else { \ 1155 KMP_CHECK_GTID; \ 1156 MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \ 1157 } \ 1158 } \ 1159 } 1160 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1161 1162 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0, 1163 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max 1164 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0, 1165 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min 1166 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1, 1167 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max 1168 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1, 1169 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min 1170 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3, 1171 0) // __kmpc_atomic_fixed4_max 1172 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3, 1173 0) // __kmpc_atomic_fixed4_min 1174 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7, 1175 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max 1176 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7, 1177 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min 1178 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3, 1179 KMP_ARCH_X86) // __kmpc_atomic_float4_max 1180 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3, 1181 KMP_ARCH_X86) // __kmpc_atomic_float4_min 1182 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7, 1183 KMP_ARCH_X86) // __kmpc_atomic_float8_max 1184 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7, 1185 KMP_ARCH_X86) // __kmpc_atomic_float8_min 1186 #if KMP_HAVE_QUAD 1187 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r, 1188 1) // __kmpc_atomic_float16_max 1189 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r, 1190 1) // __kmpc_atomic_float16_min 1191 #if (KMP_ARCH_X86) 1192 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r, 1193 1) // __kmpc_atomic_float16_max_a16 1194 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r, 1195 1) // __kmpc_atomic_float16_min_a16 1196 #endif 1197 #endif 1198 // ------------------------------------------------------------------------ 1199 // Need separate macros for .EQV. because of the need of complement (~) 1200 // OP ignored for critical sections, ^=~ used instead 1201 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1202 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1203 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \ 1204 OP_CRITICAL(^= ~, LCK_ID) /* send assignment and complement */ \ 1205 } 1206 1207 // ------------------------------------------------------------------------ 1208 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1209 // ------------------------------------------------------------------------ 1210 // X86 or X86_64: no alignment problems =================================== 1211 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1212 GOMP_FLAG) \ 1213 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1214 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \ 1215 OP_CMPXCHG(TYPE, BITS, OP) \ 1216 } 1217 // ------------------------------------------------------------------------ 1218 #else 1219 // ------------------------------------------------------------------------ 1220 // Code for other architectures that don't handle unaligned accesses. 1221 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1222 GOMP_FLAG) \ 1223 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1224 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) \ 1225 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1226 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1227 } else { \ 1228 KMP_CHECK_GTID; \ 1229 OP_CRITICAL(^= ~, LCK_ID) /* unaligned address - use critical */ \ 1230 } \ 1231 } 1232 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1233 1234 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0, 1235 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv 1236 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1, 1237 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv 1238 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3, 1239 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv 1240 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7, 1241 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv 1242 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, 1243 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv 1244 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, 1245 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv 1246 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, 1247 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv 1248 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, 1249 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv 1250 1251 // ------------------------------------------------------------------------ 1252 // Routines for Extended types: long double, _Quad, complex flavours (use 1253 // critical section) 1254 // TYPE_ID, OP_ID, TYPE - detailed above 1255 // OP - operator 1256 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1257 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1258 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1259 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \ 1260 OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \ 1261 } 1262 1263 /* ------------------------------------------------------------------------- */ 1264 // routines for long double type 1265 ATOMIC_CRITICAL(float10, add, long double, +, 10r, 1266 1) // __kmpc_atomic_float10_add 1267 ATOMIC_CRITICAL(float10, sub, long double, -, 10r, 1268 1) // __kmpc_atomic_float10_sub 1269 ATOMIC_CRITICAL(float10, mul, long double, *, 10r, 1270 1) // __kmpc_atomic_float10_mul 1271 ATOMIC_CRITICAL(float10, div, long double, /, 10r, 1272 1) // __kmpc_atomic_float10_div 1273 #if KMP_HAVE_QUAD 1274 // routines for _Quad type 1275 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r, 1276 1) // __kmpc_atomic_float16_add 1277 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r, 1278 1) // __kmpc_atomic_float16_sub 1279 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r, 1280 1) // __kmpc_atomic_float16_mul 1281 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r, 1282 1) // __kmpc_atomic_float16_div 1283 #if (KMP_ARCH_X86) 1284 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r, 1285 1) // __kmpc_atomic_float16_add_a16 1286 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r, 1287 1) // __kmpc_atomic_float16_sub_a16 1288 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r, 1289 1) // __kmpc_atomic_float16_mul_a16 1290 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r, 1291 1) // __kmpc_atomic_float16_div_a16 1292 #endif 1293 #endif 1294 // routines for complex types 1295 1296 #if USE_CMPXCHG_FIX 1297 // workaround for C78287 (complex(kind=4) data type) 1298 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1299 1) // __kmpc_atomic_cmplx4_add 1300 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1301 1) // __kmpc_atomic_cmplx4_sub 1302 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1303 1) // __kmpc_atomic_cmplx4_mul 1304 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1305 1) // __kmpc_atomic_cmplx4_div 1306 // end of the workaround for C78287 1307 #else 1308 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add 1309 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub 1310 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul 1311 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div 1312 #endif // USE_CMPXCHG_FIX 1313 1314 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add 1315 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub 1316 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul 1317 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div 1318 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c, 1319 1) // __kmpc_atomic_cmplx10_add 1320 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c, 1321 1) // __kmpc_atomic_cmplx10_sub 1322 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c, 1323 1) // __kmpc_atomic_cmplx10_mul 1324 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c, 1325 1) // __kmpc_atomic_cmplx10_div 1326 #if KMP_HAVE_QUAD 1327 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c, 1328 1) // __kmpc_atomic_cmplx16_add 1329 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c, 1330 1) // __kmpc_atomic_cmplx16_sub 1331 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c, 1332 1) // __kmpc_atomic_cmplx16_mul 1333 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c, 1334 1) // __kmpc_atomic_cmplx16_div 1335 #if (KMP_ARCH_X86) 1336 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1337 1) // __kmpc_atomic_cmplx16_add_a16 1338 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1339 1) // __kmpc_atomic_cmplx16_sub_a16 1340 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1341 1) // __kmpc_atomic_cmplx16_mul_a16 1342 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1343 1) // __kmpc_atomic_cmplx16_div_a16 1344 #endif 1345 #endif 1346 1347 #if OMP_40_ENABLED 1348 1349 // OpenMP 4.0: x = expr binop x for non-commutative operations. 1350 // Supported only on IA-32 architecture and Intel(R) 64 1351 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1352 1353 // ------------------------------------------------------------------------ 1354 // Operation on *lhs, rhs bound by critical section 1355 // OP - operator (it's supposed to contain an assignment) 1356 // LCK_ID - lock identifier 1357 // Note: don't check gtid as it should always be valid 1358 // 1, 2-byte - expect valid parameter, other - check before this macro 1359 #define OP_CRITICAL_REV(OP, LCK_ID) \ 1360 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1361 \ 1362 (*lhs) = (rhs)OP(*lhs); \ 1363 \ 1364 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1365 1366 #ifdef KMP_GOMP_COMPAT 1367 #define OP_GOMP_CRITICAL_REV(OP, FLAG) \ 1368 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1369 KMP_CHECK_GTID; \ 1370 OP_CRITICAL_REV(OP, 0); \ 1371 return; \ 1372 } 1373 #else 1374 #define OP_GOMP_CRITICAL_REV(OP, FLAG) 1375 #endif /* KMP_GOMP_COMPAT */ 1376 1377 // Beginning of a definition (provides name, parameters, gebug trace) 1378 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 1379 // fixed) 1380 // OP_ID - operation identifier (add, sub, mul, ...) 1381 // TYPE - operands' type 1382 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 1383 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \ 1384 TYPE *lhs, TYPE rhs) { \ 1385 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1386 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid)); 1387 1388 // ------------------------------------------------------------------------ 1389 // Operation on *lhs, rhs using "compare_and_store" routine 1390 // TYPE - operands' type 1391 // BITS - size in bits, used to distinguish low level calls 1392 // OP - operator 1393 // Note: temp_val introduced in order to force the compiler to read 1394 // *lhs only once (w/o it the compiler reads *lhs twice) 1395 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1396 { \ 1397 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1398 TYPE old_value, new_value; \ 1399 temp_val = *lhs; \ 1400 old_value = temp_val; \ 1401 new_value = rhs OP old_value; \ 1402 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 1403 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 1404 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 1405 KMP_DO_PAUSE; \ 1406 \ 1407 temp_val = *lhs; \ 1408 old_value = temp_val; \ 1409 new_value = rhs OP old_value; \ 1410 } \ 1411 } 1412 1413 // ------------------------------------------------------------------------- 1414 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \ 1415 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ 1416 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1417 OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1418 } 1419 1420 // ------------------------------------------------------------------------ 1421 // Entries definition for integer operands 1422 // TYPE_ID - operands type and size (fixed4, float4) 1423 // OP_ID - operation identifier (add, sub, mul, ...) 1424 // TYPE - operand type 1425 // BITS - size in bits, used to distinguish low level calls 1426 // OP - operator (used in critical section) 1427 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1428 1429 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG 1430 // ------------------------------------------------------------------------ 1431 // Routines for ATOMIC integer operands, other operators 1432 // ------------------------------------------------------------------------ 1433 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG 1434 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i, 1435 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev 1436 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i, 1437 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev 1438 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i, 1439 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev 1440 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i, 1441 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev 1442 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i, 1443 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev 1444 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i, 1445 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev 1446 1447 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i, 1448 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev 1449 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i, 1450 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev 1451 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i, 1452 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev 1453 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i, 1454 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev 1455 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1456 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev 1457 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i, 1458 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev 1459 1460 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i, 1461 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev 1462 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i, 1463 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev 1464 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i, 1465 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev 1466 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i, 1467 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev 1468 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i, 1469 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev 1470 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i, 1471 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev 1472 1473 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i, 1474 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev 1475 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i, 1476 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev 1477 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i, 1478 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev 1479 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i, 1480 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev 1481 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i, 1482 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev 1483 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i, 1484 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev 1485 1486 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r, 1487 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev 1488 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r, 1489 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev 1490 1491 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r, 1492 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev 1493 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r, 1494 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev 1495 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG 1496 1497 // ------------------------------------------------------------------------ 1498 // Routines for Extended types: long double, _Quad, complex flavours (use 1499 // critical section) 1500 // TYPE_ID, OP_ID, TYPE - detailed above 1501 // OP - operator 1502 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1503 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1504 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ 1505 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1506 OP_CRITICAL_REV(OP, LCK_ID) \ 1507 } 1508 1509 /* ------------------------------------------------------------------------- */ 1510 // routines for long double type 1511 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r, 1512 1) // __kmpc_atomic_float10_sub_rev 1513 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r, 1514 1) // __kmpc_atomic_float10_div_rev 1515 #if KMP_HAVE_QUAD 1516 // routines for _Quad type 1517 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r, 1518 1) // __kmpc_atomic_float16_sub_rev 1519 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r, 1520 1) // __kmpc_atomic_float16_div_rev 1521 #if (KMP_ARCH_X86) 1522 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r, 1523 1) // __kmpc_atomic_float16_sub_a16_rev 1524 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r, 1525 1) // __kmpc_atomic_float16_div_a16_rev 1526 #endif 1527 #endif 1528 1529 // routines for complex types 1530 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c, 1531 1) // __kmpc_atomic_cmplx4_sub_rev 1532 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c, 1533 1) // __kmpc_atomic_cmplx4_div_rev 1534 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c, 1535 1) // __kmpc_atomic_cmplx8_sub_rev 1536 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c, 1537 1) // __kmpc_atomic_cmplx8_div_rev 1538 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c, 1539 1) // __kmpc_atomic_cmplx10_sub_rev 1540 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c, 1541 1) // __kmpc_atomic_cmplx10_div_rev 1542 #if KMP_HAVE_QUAD 1543 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c, 1544 1) // __kmpc_atomic_cmplx16_sub_rev 1545 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c, 1546 1) // __kmpc_atomic_cmplx16_div_rev 1547 #if (KMP_ARCH_X86) 1548 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1549 1) // __kmpc_atomic_cmplx16_sub_a16_rev 1550 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1551 1) // __kmpc_atomic_cmplx16_div_a16_rev 1552 #endif 1553 #endif 1554 1555 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 1556 // End of OpenMP 4.0: x = expr binop x for non-commutative operations. 1557 1558 #endif // OMP_40_ENABLED 1559 1560 /* ------------------------------------------------------------------------ */ 1561 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */ 1562 /* Note: in order to reduce the total number of types combinations */ 1563 /* it is supposed that compiler converts RHS to longest floating type,*/ 1564 /* that is _Quad, before call to any of these routines */ 1565 /* Conversion to _Quad will be done by the compiler during calculation, */ 1566 /* conversion back to TYPE - before the assignment, like: */ 1567 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */ 1568 /* Performance penalty expected because of SW emulation use */ 1569 /* ------------------------------------------------------------------------ */ 1570 1571 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1572 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ 1573 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \ 1574 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1575 KA_TRACE(100, \ 1576 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ 1577 gtid)); 1578 1579 // ------------------------------------------------------------------------- 1580 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \ 1581 GOMP_FLAG) \ 1582 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1583 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \ 1584 OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \ 1585 } 1586 1587 // ------------------------------------------------------------------------- 1588 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1589 // ------------------------------------------------------------------------- 1590 // X86 or X86_64: no alignment problems ==================================== 1591 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1592 LCK_ID, MASK, GOMP_FLAG) \ 1593 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1594 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1595 OP_CMPXCHG(TYPE, BITS, OP) \ 1596 } 1597 // ------------------------------------------------------------------------- 1598 #else 1599 // ------------------------------------------------------------------------ 1600 // Code for other architectures that don't handle unaligned accesses. 1601 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1602 LCK_ID, MASK, GOMP_FLAG) \ 1603 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1604 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1605 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1606 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1607 } else { \ 1608 KMP_CHECK_GTID; \ 1609 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 1610 } \ 1611 } 1612 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1613 1614 // ------------------------------------------------------------------------- 1615 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1616 // ------------------------------------------------------------------------- 1617 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 1618 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 1619 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1620 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1621 OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1622 } 1623 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 1624 LCK_ID, GOMP_FLAG) \ 1625 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1626 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ 1627 OP_CRITICAL_REV(OP, LCK_ID) \ 1628 } 1629 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1630 1631 // RHS=float8 1632 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, 1633 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8 1634 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, 1635 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8 1636 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, 1637 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8 1638 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, 1639 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8 1640 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 1641 0) // __kmpc_atomic_fixed4_mul_float8 1642 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 1643 0) // __kmpc_atomic_fixed4_div_float8 1644 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, 1645 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8 1646 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, 1647 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8 1648 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, 1649 KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8 1650 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, 1651 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8 1652 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, 1653 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8 1654 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, 1655 KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8 1656 1657 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not 1658 // use them) 1659 #if KMP_HAVE_QUAD 1660 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0, 1661 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp 1662 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0, 1663 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp 1664 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, 1665 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp 1666 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0, 1667 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp 1668 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, 1669 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp 1670 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0, 1671 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp 1672 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0, 1673 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp 1674 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, 1675 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp 1676 1677 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1, 1678 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp 1679 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1, 1680 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp 1681 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, 1682 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp 1683 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1, 1684 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp 1685 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, 1686 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp 1687 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1, 1688 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp 1689 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1, 1690 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp 1691 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, 1692 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp 1693 1694 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 1695 0) // __kmpc_atomic_fixed4_add_fp 1696 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3, 1697 0) // __kmpc_atomic_fixed4u_add_fp 1698 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 1699 0) // __kmpc_atomic_fixed4_sub_fp 1700 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3, 1701 0) // __kmpc_atomic_fixed4u_sub_fp 1702 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 1703 0) // __kmpc_atomic_fixed4_mul_fp 1704 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3, 1705 0) // __kmpc_atomic_fixed4u_mul_fp 1706 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 1707 0) // __kmpc_atomic_fixed4_div_fp 1708 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 1709 0) // __kmpc_atomic_fixed4u_div_fp 1710 1711 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, 1712 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp 1713 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7, 1714 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp 1715 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, 1716 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp 1717 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7, 1718 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp 1719 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, 1720 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp 1721 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7, 1722 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp 1723 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, 1724 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp 1725 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, 1726 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp 1727 1728 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, 1729 KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp 1730 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, 1731 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp 1732 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, 1733 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp 1734 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, 1735 KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp 1736 1737 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, 1738 KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp 1739 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, 1740 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp 1741 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, 1742 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp 1743 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, 1744 KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp 1745 1746 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r, 1747 1) // __kmpc_atomic_float10_add_fp 1748 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r, 1749 1) // __kmpc_atomic_float10_sub_fp 1750 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r, 1751 1) // __kmpc_atomic_float10_mul_fp 1752 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r, 1753 1) // __kmpc_atomic_float10_div_fp 1754 1755 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1756 // Reverse operations 1757 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0, 1758 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp 1759 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0, 1760 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp 1761 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0, 1762 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp 1763 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0, 1764 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp 1765 1766 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1, 1767 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp 1768 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1, 1769 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp 1770 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1, 1771 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp 1772 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1, 1773 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp 1774 1775 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3, 1776 0) // __kmpc_atomic_fixed4_sub_rev_fp 1777 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3, 1778 0) // __kmpc_atomic_fixed4u_sub_rev_fp 1779 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3, 1780 0) // __kmpc_atomic_fixed4_div_rev_fp 1781 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3, 1782 0) // __kmpc_atomic_fixed4u_div_rev_fp 1783 1784 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7, 1785 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp 1786 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7, 1787 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp 1788 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7, 1789 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp 1790 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7, 1791 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp 1792 1793 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3, 1794 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp 1795 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3, 1796 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp 1797 1798 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7, 1799 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp 1800 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7, 1801 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp 1802 1803 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r, 1804 1) // __kmpc_atomic_float10_sub_rev_fp 1805 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r, 1806 1) // __kmpc_atomic_float10_div_rev_fp 1807 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1808 1809 #endif 1810 1811 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1812 // ------------------------------------------------------------------------ 1813 // X86 or X86_64: no alignment problems ==================================== 1814 #if USE_CMPXCHG_FIX 1815 // workaround for C78287 (complex(kind=4) data type) 1816 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1817 LCK_ID, MASK, GOMP_FLAG) \ 1818 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1819 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1820 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 1821 } 1822 // end of the second part of the workaround for C78287 1823 #else 1824 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1825 LCK_ID, MASK, GOMP_FLAG) \ 1826 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1827 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1828 OP_CMPXCHG(TYPE, BITS, OP) \ 1829 } 1830 #endif // USE_CMPXCHG_FIX 1831 #else 1832 // ------------------------------------------------------------------------ 1833 // Code for other architectures that don't handle unaligned accesses. 1834 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1835 LCK_ID, MASK, GOMP_FLAG) \ 1836 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1837 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ 1838 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1839 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1840 } else { \ 1841 KMP_CHECK_GTID; \ 1842 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ 1843 } \ 1844 } 1845 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1846 1847 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 1848 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8 1849 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 1850 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8 1851 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 1852 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8 1853 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 1854 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8 1855 1856 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 1857 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1858 1859 // ------------------------------------------------------------------------ 1860 // Atomic READ routines 1861 1862 // ------------------------------------------------------------------------ 1863 // Beginning of a definition (provides name, parameters, gebug trace) 1864 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 1865 // fixed) 1866 // OP_ID - operation identifier (add, sub, mul, ...) 1867 // TYPE - operands' type 1868 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 1869 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 1870 TYPE *loc) { \ 1871 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1872 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 1873 1874 // ------------------------------------------------------------------------ 1875 // Operation on *lhs, rhs using "compare_and_store_ret" routine 1876 // TYPE - operands' type 1877 // BITS - size in bits, used to distinguish low level calls 1878 // OP - operator 1879 // Note: temp_val introduced in order to force the compiler to read 1880 // *lhs only once (w/o it the compiler reads *lhs twice) 1881 // TODO: check if it is still necessary 1882 // Return old value regardless of the result of "compare & swap# operation 1883 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \ 1884 { \ 1885 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1886 union f_i_union { \ 1887 TYPE f_val; \ 1888 kmp_int##BITS i_val; \ 1889 }; \ 1890 union f_i_union old_value; \ 1891 temp_val = *loc; \ 1892 old_value.f_val = temp_val; \ 1893 old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \ 1894 (kmp_int##BITS *)loc, \ 1895 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \ 1896 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \ 1897 new_value = old_value.f_val; \ 1898 return new_value; \ 1899 } 1900 1901 // ------------------------------------------------------------------------- 1902 // Operation on *lhs, rhs bound by critical section 1903 // OP - operator (it's supposed to contain an assignment) 1904 // LCK_ID - lock identifier 1905 // Note: don't check gtid as it should always be valid 1906 // 1, 2-byte - expect valid parameter, other - check before this macro 1907 #define OP_CRITICAL_READ(OP, LCK_ID) \ 1908 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1909 \ 1910 new_value = (*loc); \ 1911 \ 1912 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1913 1914 // ------------------------------------------------------------------------- 1915 #ifdef KMP_GOMP_COMPAT 1916 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \ 1917 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1918 KMP_CHECK_GTID; \ 1919 OP_CRITICAL_READ(OP, 0); \ 1920 return new_value; \ 1921 } 1922 #else 1923 #define OP_GOMP_CRITICAL_READ(OP, FLAG) 1924 #endif /* KMP_GOMP_COMPAT */ 1925 1926 // ------------------------------------------------------------------------- 1927 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 1928 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1929 TYPE new_value; \ 1930 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ 1931 new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \ 1932 return new_value; \ 1933 } 1934 // ------------------------------------------------------------------------- 1935 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 1936 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1937 TYPE new_value; \ 1938 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ 1939 OP_CMPXCHG_READ(TYPE, BITS, OP) \ 1940 } 1941 // ------------------------------------------------------------------------ 1942 // Routines for Extended types: long double, _Quad, complex flavours (use 1943 // critical section) 1944 // TYPE_ID, OP_ID, TYPE - detailed above 1945 // OP - operator 1946 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1947 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1948 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1949 TYPE new_value; \ 1950 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \ 1951 OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \ 1952 return new_value; \ 1953 } 1954 1955 // ------------------------------------------------------------------------ 1956 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return 1957 // value doesn't work. 1958 // Let's return the read value through the additional parameter. 1959 #if (KMP_OS_WINDOWS) 1960 1961 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \ 1962 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1963 \ 1964 (*out) = (*loc); \ 1965 \ 1966 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1967 // ------------------------------------------------------------------------ 1968 #ifdef KMP_GOMP_COMPAT 1969 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \ 1970 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1971 KMP_CHECK_GTID; \ 1972 OP_CRITICAL_READ_WRK(OP, 0); \ 1973 } 1974 #else 1975 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) 1976 #endif /* KMP_GOMP_COMPAT */ 1977 // ------------------------------------------------------------------------ 1978 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ 1979 void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \ 1980 TYPE *loc) { \ 1981 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1982 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 1983 1984 // ------------------------------------------------------------------------ 1985 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1986 ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ 1987 OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \ 1988 OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \ 1989 } 1990 1991 #endif // KMP_OS_WINDOWS 1992 1993 // ------------------------------------------------------------------------ 1994 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 1995 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd 1996 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +, 1997 KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd 1998 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +, 1999 KMP_ARCH_X86) // __kmpc_atomic_float4_rd 2000 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +, 2001 KMP_ARCH_X86) // __kmpc_atomic_float8_rd 2002 2003 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic 2004 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +, 2005 KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd 2006 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +, 2007 KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd 2008 2009 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r, 2010 1) // __kmpc_atomic_float10_rd 2011 #if KMP_HAVE_QUAD 2012 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r, 2013 1) // __kmpc_atomic_float16_rd 2014 #endif // KMP_HAVE_QUAD 2015 2016 // Fix for CQ220361 on Windows* OS 2017 #if (KMP_OS_WINDOWS) 2018 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c, 2019 1) // __kmpc_atomic_cmplx4_rd 2020 #else 2021 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c, 2022 1) // __kmpc_atomic_cmplx4_rd 2023 #endif 2024 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c, 2025 1) // __kmpc_atomic_cmplx8_rd 2026 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c, 2027 1) // __kmpc_atomic_cmplx10_rd 2028 #if KMP_HAVE_QUAD 2029 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c, 2030 1) // __kmpc_atomic_cmplx16_rd 2031 #if (KMP_ARCH_X86) 2032 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r, 2033 1) // __kmpc_atomic_float16_a16_rd 2034 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 2035 1) // __kmpc_atomic_cmplx16_a16_rd 2036 #endif 2037 #endif 2038 2039 // ------------------------------------------------------------------------ 2040 // Atomic WRITE routines 2041 2042 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2043 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2044 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2045 KMP_XCHG_FIXED##BITS(lhs, rhs); \ 2046 } 2047 // ------------------------------------------------------------------------ 2048 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2049 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2050 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2051 KMP_XCHG_REAL##BITS(lhs, rhs); \ 2052 } 2053 2054 // ------------------------------------------------------------------------ 2055 // Operation on *lhs, rhs using "compare_and_store" routine 2056 // TYPE - operands' type 2057 // BITS - size in bits, used to distinguish low level calls 2058 // OP - operator 2059 // Note: temp_val introduced in order to force the compiler to read 2060 // *lhs only once (w/o it the compiler reads *lhs twice) 2061 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \ 2062 { \ 2063 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2064 TYPE old_value, new_value; \ 2065 temp_val = *lhs; \ 2066 old_value = temp_val; \ 2067 new_value = rhs; \ 2068 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2069 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2070 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2071 KMP_CPU_PAUSE(); \ 2072 \ 2073 temp_val = *lhs; \ 2074 old_value = temp_val; \ 2075 new_value = rhs; \ 2076 } \ 2077 } 2078 2079 // ------------------------------------------------------------------------- 2080 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2081 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2082 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2083 OP_CMPXCHG_WR(TYPE, BITS, OP) \ 2084 } 2085 2086 // ------------------------------------------------------------------------ 2087 // Routines for Extended types: long double, _Quad, complex flavours (use 2088 // critical section) 2089 // TYPE_ID, OP_ID, TYPE - detailed above 2090 // OP - operator 2091 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2092 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2093 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2094 OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \ 2095 OP_CRITICAL(OP, LCK_ID) /* send assignment */ \ 2096 } 2097 // ------------------------------------------------------------------------- 2098 2099 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =, 2100 KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr 2101 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =, 2102 KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr 2103 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =, 2104 KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr 2105 #if (KMP_ARCH_X86) 2106 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =, 2107 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr 2108 #else 2109 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =, 2110 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr 2111 #endif 2112 2113 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =, 2114 KMP_ARCH_X86) // __kmpc_atomic_float4_wr 2115 #if (KMP_ARCH_X86) 2116 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =, 2117 KMP_ARCH_X86) // __kmpc_atomic_float8_wr 2118 #else 2119 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =, 2120 KMP_ARCH_X86) // __kmpc_atomic_float8_wr 2121 #endif 2122 2123 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r, 2124 1) // __kmpc_atomic_float10_wr 2125 #if KMP_HAVE_QUAD 2126 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r, 2127 1) // __kmpc_atomic_float16_wr 2128 #endif 2129 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr 2130 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c, 2131 1) // __kmpc_atomic_cmplx8_wr 2132 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c, 2133 1) // __kmpc_atomic_cmplx10_wr 2134 #if KMP_HAVE_QUAD 2135 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c, 2136 1) // __kmpc_atomic_cmplx16_wr 2137 #if (KMP_ARCH_X86) 2138 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r, 2139 1) // __kmpc_atomic_float16_a16_wr 2140 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 2141 1) // __kmpc_atomic_cmplx16_a16_wr 2142 #endif 2143 #endif 2144 2145 // ------------------------------------------------------------------------ 2146 // Atomic CAPTURE routines 2147 2148 // Beginning of a definition (provides name, parameters, gebug trace) 2149 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 2150 // fixed) 2151 // OP_ID - operation identifier (add, sub, mul, ...) 2152 // TYPE - operands' type 2153 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 2154 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 2155 TYPE *lhs, TYPE rhs, int flag) { \ 2156 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2157 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2158 2159 // ------------------------------------------------------------------------- 2160 // Operation on *lhs, rhs bound by critical section 2161 // OP - operator (it's supposed to contain an assignment) 2162 // LCK_ID - lock identifier 2163 // Note: don't check gtid as it should always be valid 2164 // 1, 2-byte - expect valid parameter, other - check before this macro 2165 #define OP_CRITICAL_CPT(OP, LCK_ID) \ 2166 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2167 \ 2168 if (flag) { \ 2169 (*lhs) OP rhs; \ 2170 new_value = (*lhs); \ 2171 } else { \ 2172 new_value = (*lhs); \ 2173 (*lhs) OP rhs; \ 2174 } \ 2175 \ 2176 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2177 return new_value; 2178 2179 // ------------------------------------------------------------------------ 2180 #ifdef KMP_GOMP_COMPAT 2181 #define OP_GOMP_CRITICAL_CPT(OP, FLAG) \ 2182 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2183 KMP_CHECK_GTID; \ 2184 OP_CRITICAL_CPT(OP## =, 0); \ 2185 } 2186 #else 2187 #define OP_GOMP_CRITICAL_CPT(OP, FLAG) 2188 #endif /* KMP_GOMP_COMPAT */ 2189 2190 // ------------------------------------------------------------------------ 2191 // Operation on *lhs, rhs using "compare_and_store" routine 2192 // TYPE - operands' type 2193 // BITS - size in bits, used to distinguish low level calls 2194 // OP - operator 2195 // Note: temp_val introduced in order to force the compiler to read 2196 // *lhs only once (w/o it the compiler reads *lhs twice) 2197 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2198 { \ 2199 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2200 TYPE old_value, new_value; \ 2201 temp_val = *lhs; \ 2202 old_value = temp_val; \ 2203 new_value = old_value OP rhs; \ 2204 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2205 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2206 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2207 KMP_CPU_PAUSE(); \ 2208 \ 2209 temp_val = *lhs; \ 2210 old_value = temp_val; \ 2211 new_value = old_value OP rhs; \ 2212 } \ 2213 if (flag) { \ 2214 return new_value; \ 2215 } else \ 2216 return old_value; \ 2217 } 2218 2219 // ------------------------------------------------------------------------- 2220 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2221 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2222 TYPE new_value; \ 2223 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ 2224 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2225 } 2226 2227 // ------------------------------------------------------------------------- 2228 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2229 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2230 TYPE old_value, new_value; \ 2231 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ 2232 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 2233 old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 2234 if (flag) { \ 2235 return old_value OP rhs; \ 2236 } else \ 2237 return old_value; \ 2238 } 2239 // ------------------------------------------------------------------------- 2240 2241 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +, 2242 0) // __kmpc_atomic_fixed4_add_cpt 2243 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -, 2244 0) // __kmpc_atomic_fixed4_sub_cpt 2245 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +, 2246 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt 2247 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -, 2248 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt 2249 2250 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +, 2251 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt 2252 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -, 2253 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt 2254 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +, 2255 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt 2256 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -, 2257 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt 2258 2259 // ------------------------------------------------------------------------ 2260 // Entries definition for integer operands 2261 // TYPE_ID - operands type and size (fixed4, float4) 2262 // OP_ID - operation identifier (add, sub, mul, ...) 2263 // TYPE - operand type 2264 // BITS - size in bits, used to distinguish low level calls 2265 // OP - operator (used in critical section) 2266 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG 2267 // ------------------------------------------------------------------------ 2268 // Routines for ATOMIC integer operands, other operators 2269 // ------------------------------------------------------------------------ 2270 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2271 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +, 2272 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt 2273 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &, 2274 0) // __kmpc_atomic_fixed1_andb_cpt 2275 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /, 2276 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt 2277 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /, 2278 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt 2279 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *, 2280 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt 2281 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |, 2282 0) // __kmpc_atomic_fixed1_orb_cpt 2283 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<, 2284 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt 2285 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>, 2286 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt 2287 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>, 2288 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt 2289 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -, 2290 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt 2291 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^, 2292 0) // __kmpc_atomic_fixed1_xor_cpt 2293 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +, 2294 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt 2295 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &, 2296 0) // __kmpc_atomic_fixed2_andb_cpt 2297 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /, 2298 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt 2299 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /, 2300 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt 2301 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *, 2302 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt 2303 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |, 2304 0) // __kmpc_atomic_fixed2_orb_cpt 2305 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<, 2306 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt 2307 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>, 2308 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt 2309 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>, 2310 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt 2311 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -, 2312 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt 2313 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^, 2314 0) // __kmpc_atomic_fixed2_xor_cpt 2315 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &, 2316 0) // __kmpc_atomic_fixed4_andb_cpt 2317 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /, 2318 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt 2319 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /, 2320 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt 2321 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *, 2322 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt 2323 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |, 2324 0) // __kmpc_atomic_fixed4_orb_cpt 2325 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<, 2326 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt 2327 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>, 2328 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt 2329 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>, 2330 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt 2331 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^, 2332 0) // __kmpc_atomic_fixed4_xor_cpt 2333 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &, 2334 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt 2335 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /, 2336 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt 2337 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /, 2338 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt 2339 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *, 2340 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt 2341 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |, 2342 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt 2343 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<, 2344 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt 2345 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>, 2346 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt 2347 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>, 2348 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt 2349 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^, 2350 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt 2351 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /, 2352 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt 2353 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *, 2354 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt 2355 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /, 2356 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt 2357 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *, 2358 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt 2359 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2360 2361 // CAPTURE routines for mixed types RHS=float16 2362 #if KMP_HAVE_QUAD 2363 2364 // Beginning of a definition (provides name, parameters, gebug trace) 2365 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 2366 // fixed) 2367 // OP_ID - operation identifier (add, sub, mul, ...) 2368 // TYPE - operands' type 2369 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2370 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ 2371 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \ 2372 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2373 KA_TRACE(100, \ 2374 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ 2375 gtid)); 2376 2377 // ------------------------------------------------------------------------- 2378 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 2379 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 2380 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2381 TYPE new_value; \ 2382 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ 2383 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2384 } 2385 2386 // ------------------------------------------------------------------------- 2387 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 2388 LCK_ID, GOMP_FLAG) \ 2389 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2390 TYPE new_value; \ 2391 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \ 2392 OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \ 2393 } 2394 2395 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0, 2396 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp 2397 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0, 2398 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp 2399 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0, 2400 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp 2401 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0, 2402 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp 2403 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0, 2404 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp 2405 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0, 2406 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp 2407 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0, 2408 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp 2409 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0, 2410 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp 2411 2412 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1, 2413 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp 2414 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1, 2415 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp 2416 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1, 2417 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp 2418 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1, 2419 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp 2420 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1, 2421 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp 2422 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1, 2423 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp 2424 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1, 2425 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp 2426 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1, 2427 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp 2428 2429 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3, 2430 0) // __kmpc_atomic_fixed4_add_cpt_fp 2431 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3, 2432 0) // __kmpc_atomic_fixed4u_add_cpt_fp 2433 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 2434 0) // __kmpc_atomic_fixed4_sub_cpt_fp 2435 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 2436 0) // __kmpc_atomic_fixed4u_sub_cpt_fp 2437 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 2438 0) // __kmpc_atomic_fixed4_mul_cpt_fp 2439 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 2440 0) // __kmpc_atomic_fixed4u_mul_cpt_fp 2441 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3, 2442 0) // __kmpc_atomic_fixed4_div_cpt_fp 2443 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3, 2444 0) // __kmpc_atomic_fixed4u_div_cpt_fp 2445 2446 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7, 2447 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp 2448 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7, 2449 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp 2450 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7, 2451 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp 2452 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7, 2453 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp 2454 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7, 2455 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp 2456 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7, 2457 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp 2458 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7, 2459 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp 2460 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7, 2461 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp 2462 2463 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3, 2464 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp 2465 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3, 2466 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp 2467 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3, 2468 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp 2469 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3, 2470 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp 2471 2472 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7, 2473 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp 2474 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7, 2475 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp 2476 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7, 2477 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp 2478 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7, 2479 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp 2480 2481 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r, 2482 1) // __kmpc_atomic_float10_add_cpt_fp 2483 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r, 2484 1) // __kmpc_atomic_float10_sub_cpt_fp 2485 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r, 2486 1) // __kmpc_atomic_float10_mul_cpt_fp 2487 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r, 2488 1) // __kmpc_atomic_float10_div_cpt_fp 2489 2490 #endif // KMP_HAVE_QUAD 2491 2492 // ------------------------------------------------------------------------ 2493 // Routines for C/C++ Reduction operators && and || 2494 2495 // ------------------------------------------------------------------------- 2496 // Operation on *lhs, rhs bound by critical section 2497 // OP - operator (it's supposed to contain an assignment) 2498 // LCK_ID - lock identifier 2499 // Note: don't check gtid as it should always be valid 2500 // 1, 2-byte - expect valid parameter, other - check before this macro 2501 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \ 2502 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2503 \ 2504 if (flag) { \ 2505 new_value OP rhs; \ 2506 } else \ 2507 new_value = (*lhs); \ 2508 \ 2509 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 2510 2511 // ------------------------------------------------------------------------ 2512 #ifdef KMP_GOMP_COMPAT 2513 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \ 2514 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2515 KMP_CHECK_GTID; \ 2516 OP_CRITICAL_L_CPT(OP, 0); \ 2517 return new_value; \ 2518 } 2519 #else 2520 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) 2521 #endif /* KMP_GOMP_COMPAT */ 2522 2523 // ------------------------------------------------------------------------ 2524 // Need separate macros for &&, || because there is no combined assignment 2525 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2526 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2527 TYPE new_value; \ 2528 OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \ 2529 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2530 } 2531 2532 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&, 2533 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt 2534 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||, 2535 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt 2536 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&, 2537 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt 2538 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||, 2539 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt 2540 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&, 2541 0) // __kmpc_atomic_fixed4_andl_cpt 2542 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||, 2543 0) // __kmpc_atomic_fixed4_orl_cpt 2544 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&, 2545 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt 2546 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||, 2547 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt 2548 2549 // ------------------------------------------------------------------------- 2550 // Routines for Fortran operators that matched no one in C: 2551 // MAX, MIN, .EQV., .NEQV. 2552 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt 2553 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt 2554 2555 // ------------------------------------------------------------------------- 2556 // MIN and MAX need separate macros 2557 // OP - operator to check if we need any actions? 2558 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ 2559 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2560 \ 2561 if (*lhs OP rhs) { /* still need actions? */ \ 2562 old_value = *lhs; \ 2563 *lhs = rhs; \ 2564 if (flag) \ 2565 new_value = rhs; \ 2566 else \ 2567 new_value = old_value; \ 2568 } else { \ 2569 new_value = *lhs; \ 2570 } \ 2571 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2572 return new_value; 2573 2574 // ------------------------------------------------------------------------- 2575 #ifdef KMP_GOMP_COMPAT 2576 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \ 2577 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2578 KMP_CHECK_GTID; \ 2579 MIN_MAX_CRITSECT_CPT(OP, 0); \ 2580 } 2581 #else 2582 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) 2583 #endif /* KMP_GOMP_COMPAT */ 2584 2585 // ------------------------------------------------------------------------- 2586 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ 2587 { \ 2588 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2589 /*TYPE old_value; */ \ 2590 temp_val = *lhs; \ 2591 old_value = temp_val; \ 2592 while (old_value OP rhs && /* still need actions? */ \ 2593 !KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2594 (kmp_int##BITS *)lhs, \ 2595 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2596 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ 2597 KMP_CPU_PAUSE(); \ 2598 temp_val = *lhs; \ 2599 old_value = temp_val; \ 2600 } \ 2601 if (flag) \ 2602 return rhs; \ 2603 else \ 2604 return old_value; \ 2605 } 2606 2607 // ------------------------------------------------------------------------- 2608 // 1-byte, 2-byte operands - use critical section 2609 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2610 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2611 TYPE new_value, old_value; \ 2612 if (*lhs OP rhs) { /* need actions? */ \ 2613 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ 2614 MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ 2615 } \ 2616 return *lhs; \ 2617 } 2618 2619 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2620 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2621 TYPE new_value, old_value; \ 2622 if (*lhs OP rhs) { \ 2623 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ 2624 MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ 2625 } \ 2626 return *lhs; \ 2627 } 2628 2629 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <, 2630 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt 2631 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >, 2632 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt 2633 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <, 2634 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt 2635 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >, 2636 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt 2637 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <, 2638 0) // __kmpc_atomic_fixed4_max_cpt 2639 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >, 2640 0) // __kmpc_atomic_fixed4_min_cpt 2641 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <, 2642 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt 2643 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >, 2644 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt 2645 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <, 2646 KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt 2647 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >, 2648 KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt 2649 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <, 2650 KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt 2651 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >, 2652 KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt 2653 #if KMP_HAVE_QUAD 2654 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r, 2655 1) // __kmpc_atomic_float16_max_cpt 2656 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r, 2657 1) // __kmpc_atomic_float16_min_cpt 2658 #if (KMP_ARCH_X86) 2659 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r, 2660 1) // __kmpc_atomic_float16_max_a16_cpt 2661 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r, 2662 1) // __kmpc_atomic_float16_mix_a16_cpt 2663 #endif 2664 #endif 2665 2666 // ------------------------------------------------------------------------ 2667 #ifdef KMP_GOMP_COMPAT 2668 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \ 2669 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2670 KMP_CHECK_GTID; \ 2671 OP_CRITICAL_CPT(OP, 0); \ 2672 } 2673 #else 2674 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) 2675 #endif /* KMP_GOMP_COMPAT */ 2676 // ------------------------------------------------------------------------ 2677 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2678 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2679 TYPE new_value; \ 2680 OP_GOMP_CRITICAL_EQV_CPT(^= ~, GOMP_FLAG) /* send assignment */ \ 2681 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2682 } 2683 2684 // ------------------------------------------------------------------------ 2685 2686 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^, 2687 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt 2688 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^, 2689 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt 2690 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^, 2691 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt 2692 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^, 2693 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt 2694 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~, 2695 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt 2696 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~, 2697 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt 2698 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~, 2699 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt 2700 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~, 2701 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt 2702 2703 // ------------------------------------------------------------------------ 2704 // Routines for Extended types: long double, _Quad, complex flavours (use 2705 // critical section) 2706 // TYPE_ID, OP_ID, TYPE - detailed above 2707 // OP - operator 2708 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2709 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2710 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2711 TYPE new_value; \ 2712 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \ 2713 OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \ 2714 } 2715 2716 // ------------------------------------------------------------------------ 2717 // Workaround for cmplx4. Regular routines with return value don't work 2718 // on Win_32e. Let's return captured values through the additional parameter. 2719 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \ 2720 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2721 \ 2722 if (flag) { \ 2723 (*lhs) OP rhs; \ 2724 (*out) = (*lhs); \ 2725 } else { \ 2726 (*out) = (*lhs); \ 2727 (*lhs) OP rhs; \ 2728 } \ 2729 \ 2730 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2731 return; 2732 // ------------------------------------------------------------------------ 2733 2734 #ifdef KMP_GOMP_COMPAT 2735 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \ 2736 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2737 KMP_CHECK_GTID; \ 2738 OP_CRITICAL_CPT_WRK(OP## =, 0); \ 2739 } 2740 #else 2741 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) 2742 #endif /* KMP_GOMP_COMPAT */ 2743 // ------------------------------------------------------------------------ 2744 2745 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 2746 void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \ 2747 TYPE rhs, TYPE *out, int flag) { \ 2748 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2749 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2750 // ------------------------------------------------------------------------ 2751 2752 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2753 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 2754 OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \ 2755 OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \ 2756 } 2757 // The end of workaround for cmplx4 2758 2759 /* ------------------------------------------------------------------------- */ 2760 // routines for long double type 2761 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r, 2762 1) // __kmpc_atomic_float10_add_cpt 2763 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r, 2764 1) // __kmpc_atomic_float10_sub_cpt 2765 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r, 2766 1) // __kmpc_atomic_float10_mul_cpt 2767 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r, 2768 1) // __kmpc_atomic_float10_div_cpt 2769 #if KMP_HAVE_QUAD 2770 // routines for _Quad type 2771 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r, 2772 1) // __kmpc_atomic_float16_add_cpt 2773 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r, 2774 1) // __kmpc_atomic_float16_sub_cpt 2775 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r, 2776 1) // __kmpc_atomic_float16_mul_cpt 2777 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r, 2778 1) // __kmpc_atomic_float16_div_cpt 2779 #if (KMP_ARCH_X86) 2780 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r, 2781 1) // __kmpc_atomic_float16_add_a16_cpt 2782 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r, 2783 1) // __kmpc_atomic_float16_sub_a16_cpt 2784 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r, 2785 1) // __kmpc_atomic_float16_mul_a16_cpt 2786 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r, 2787 1) // __kmpc_atomic_float16_div_a16_cpt 2788 #endif 2789 #endif 2790 2791 // routines for complex types 2792 2793 // cmplx4 routines to return void 2794 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c, 2795 1) // __kmpc_atomic_cmplx4_add_cpt 2796 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 2797 1) // __kmpc_atomic_cmplx4_sub_cpt 2798 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 2799 1) // __kmpc_atomic_cmplx4_mul_cpt 2800 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c, 2801 1) // __kmpc_atomic_cmplx4_div_cpt 2802 2803 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c, 2804 1) // __kmpc_atomic_cmplx8_add_cpt 2805 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 2806 1) // __kmpc_atomic_cmplx8_sub_cpt 2807 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 2808 1) // __kmpc_atomic_cmplx8_mul_cpt 2809 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c, 2810 1) // __kmpc_atomic_cmplx8_div_cpt 2811 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c, 2812 1) // __kmpc_atomic_cmplx10_add_cpt 2813 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 2814 1) // __kmpc_atomic_cmplx10_sub_cpt 2815 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 2816 1) // __kmpc_atomic_cmplx10_mul_cpt 2817 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c, 2818 1) // __kmpc_atomic_cmplx10_div_cpt 2819 #if KMP_HAVE_QUAD 2820 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c, 2821 1) // __kmpc_atomic_cmplx16_add_cpt 2822 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 2823 1) // __kmpc_atomic_cmplx16_sub_cpt 2824 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 2825 1) // __kmpc_atomic_cmplx16_mul_cpt 2826 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c, 2827 1) // __kmpc_atomic_cmplx16_div_cpt 2828 #if (KMP_ARCH_X86) 2829 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 2830 1) // __kmpc_atomic_cmplx16_add_a16_cpt 2831 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 2832 1) // __kmpc_atomic_cmplx16_sub_a16_cpt 2833 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 2834 1) // __kmpc_atomic_cmplx16_mul_a16_cpt 2835 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 2836 1) // __kmpc_atomic_cmplx16_div_a16_cpt 2837 #endif 2838 #endif 2839 2840 #if OMP_40_ENABLED 2841 2842 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr 2843 // binop x; v = x; } for non-commutative operations. 2844 // Supported only on IA-32 architecture and Intel(R) 64 2845 2846 // ------------------------------------------------------------------------- 2847 // Operation on *lhs, rhs bound by critical section 2848 // OP - operator (it's supposed to contain an assignment) 2849 // LCK_ID - lock identifier 2850 // Note: don't check gtid as it should always be valid 2851 // 1, 2-byte - expect valid parameter, other - check before this macro 2852 #define OP_CRITICAL_CPT_REV(OP, LCK_ID) \ 2853 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2854 \ 2855 if (flag) { \ 2856 /*temp_val = (*lhs);*/ \ 2857 (*lhs) = (rhs)OP(*lhs); \ 2858 new_value = (*lhs); \ 2859 } else { \ 2860 new_value = (*lhs); \ 2861 (*lhs) = (rhs)OP(*lhs); \ 2862 } \ 2863 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2864 return new_value; 2865 2866 // ------------------------------------------------------------------------ 2867 #ifdef KMP_GOMP_COMPAT 2868 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) \ 2869 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2870 KMP_CHECK_GTID; \ 2871 OP_CRITICAL_CPT_REV(OP, 0); \ 2872 } 2873 #else 2874 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) 2875 #endif /* KMP_GOMP_COMPAT */ 2876 2877 // ------------------------------------------------------------------------ 2878 // Operation on *lhs, rhs using "compare_and_store" routine 2879 // TYPE - operands' type 2880 // BITS - size in bits, used to distinguish low level calls 2881 // OP - operator 2882 // Note: temp_val introduced in order to force the compiler to read 2883 // *lhs only once (w/o it the compiler reads *lhs twice) 2884 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 2885 { \ 2886 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2887 TYPE old_value, new_value; \ 2888 temp_val = *lhs; \ 2889 old_value = temp_val; \ 2890 new_value = rhs OP old_value; \ 2891 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2892 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2893 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2894 KMP_CPU_PAUSE(); \ 2895 \ 2896 temp_val = *lhs; \ 2897 old_value = temp_val; \ 2898 new_value = rhs OP old_value; \ 2899 } \ 2900 if (flag) { \ 2901 return new_value; \ 2902 } else \ 2903 return old_value; \ 2904 } 2905 2906 // ------------------------------------------------------------------------- 2907 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2908 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2909 TYPE new_value; \ 2910 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ 2911 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 2912 } 2913 2914 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /, 2915 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev 2916 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /, 2917 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev 2918 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<, 2919 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev 2920 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>, 2921 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev 2922 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, 2923 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev 2924 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -, 2925 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev 2926 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /, 2927 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev 2928 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /, 2929 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev 2930 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<, 2931 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev 2932 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>, 2933 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev 2934 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, 2935 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev 2936 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -, 2937 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev 2938 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /, 2939 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev 2940 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /, 2941 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev 2942 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<, 2943 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev 2944 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>, 2945 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev 2946 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, 2947 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev 2948 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -, 2949 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev 2950 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /, 2951 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev 2952 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /, 2953 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev 2954 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<, 2955 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev 2956 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>, 2957 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev 2958 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, 2959 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev 2960 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -, 2961 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev 2962 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /, 2963 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev 2964 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -, 2965 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev 2966 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /, 2967 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev 2968 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -, 2969 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev 2970 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2971 2972 // ------------------------------------------------------------------------ 2973 // Routines for Extended types: long double, _Quad, complex flavours (use 2974 // critical section) 2975 // TYPE_ID, OP_ID, TYPE - detailed above 2976 // OP - operator 2977 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2978 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2979 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2980 TYPE new_value; \ 2981 /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \ 2982 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ 2983 OP_CRITICAL_CPT_REV(OP, LCK_ID) \ 2984 } 2985 2986 /* ------------------------------------------------------------------------- */ 2987 // routines for long double type 2988 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r, 2989 1) // __kmpc_atomic_float10_sub_cpt_rev 2990 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r, 2991 1) // __kmpc_atomic_float10_div_cpt_rev 2992 #if KMP_HAVE_QUAD 2993 // routines for _Quad type 2994 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 2995 1) // __kmpc_atomic_float16_sub_cpt_rev 2996 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 2997 1) // __kmpc_atomic_float16_div_cpt_rev 2998 #if (KMP_ARCH_X86) 2999 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 3000 1) // __kmpc_atomic_float16_sub_a16_cpt_rev 3001 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 3002 1) // __kmpc_atomic_float16_div_a16_cpt_rev 3003 #endif 3004 #endif 3005 3006 // routines for complex types 3007 3008 // ------------------------------------------------------------------------ 3009 // Workaround for cmplx4. Regular routines with return value don't work 3010 // on Win_32e. Let's return captured values through the additional parameter. 3011 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ 3012 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3013 \ 3014 if (flag) { \ 3015 (*lhs) = (rhs)OP(*lhs); \ 3016 (*out) = (*lhs); \ 3017 } else { \ 3018 (*out) = (*lhs); \ 3019 (*lhs) = (rhs)OP(*lhs); \ 3020 } \ 3021 \ 3022 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3023 return; 3024 // ------------------------------------------------------------------------ 3025 3026 #ifdef KMP_GOMP_COMPAT 3027 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \ 3028 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3029 KMP_CHECK_GTID; \ 3030 OP_CRITICAL_CPT_REV_WRK(OP, 0); \ 3031 } 3032 #else 3033 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) 3034 #endif /* KMP_GOMP_COMPAT */ 3035 // ------------------------------------------------------------------------ 3036 3037 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \ 3038 GOMP_FLAG) \ 3039 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 3040 OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \ 3041 OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ 3042 } 3043 // The end of workaround for cmplx4 3044 3045 // !!! TODO: check if we need to return void for cmplx4 routines 3046 // cmplx4 routines to return void 3047 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 3048 1) // __kmpc_atomic_cmplx4_sub_cpt_rev 3049 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 3050 1) // __kmpc_atomic_cmplx4_div_cpt_rev 3051 3052 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 3053 1) // __kmpc_atomic_cmplx8_sub_cpt_rev 3054 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 3055 1) // __kmpc_atomic_cmplx8_div_cpt_rev 3056 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 3057 1) // __kmpc_atomic_cmplx10_sub_cpt_rev 3058 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 3059 1) // __kmpc_atomic_cmplx10_div_cpt_rev 3060 #if KMP_HAVE_QUAD 3061 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 3062 1) // __kmpc_atomic_cmplx16_sub_cpt_rev 3063 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 3064 1) // __kmpc_atomic_cmplx16_div_cpt_rev 3065 #if (KMP_ARCH_X86) 3066 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 3067 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev 3068 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 3069 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev 3070 #endif 3071 #endif 3072 3073 // Capture reverse for mixed type: RHS=float16 3074 #if KMP_HAVE_QUAD 3075 3076 // Beginning of a definition (provides name, parameters, gebug trace) 3077 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 3078 // fixed) 3079 // OP_ID - operation identifier (add, sub, mul, ...) 3080 // TYPE - operands' type 3081 // ------------------------------------------------------------------------- 3082 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 3083 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 3084 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 3085 TYPE new_value; \ 3086 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ 3087 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 3088 } 3089 3090 // ------------------------------------------------------------------------- 3091 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 3092 LCK_ID, GOMP_FLAG) \ 3093 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 3094 TYPE new_value; \ 3095 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) /* send assignment */ \ 3096 OP_CRITICAL_CPT_REV(OP, LCK_ID) /* send assignment */ \ 3097 } 3098 3099 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, 3100 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp 3101 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, 3102 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp 3103 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, 3104 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp 3105 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, 3106 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp 3107 3108 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1, 3109 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp 3110 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i, 3111 1, 3112 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp 3113 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1, 3114 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp 3115 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i, 3116 1, 3117 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp 3118 3119 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i, 3120 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp 3121 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad, 3122 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp 3123 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i, 3124 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp 3125 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad, 3126 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp 3127 3128 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i, 3129 7, 3130 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp 3131 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad, 3132 8i, 7, 3133 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp 3134 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i, 3135 7, 3136 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp 3137 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad, 3138 8i, 7, 3139 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp 3140 3141 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad, 3142 4r, 3, 3143 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp 3144 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad, 3145 4r, 3, 3146 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp 3147 3148 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad, 3149 8r, 7, 3150 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp 3151 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad, 3152 8r, 7, 3153 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp 3154 3155 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad, 3156 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp 3157 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad, 3158 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp 3159 3160 #endif // KMP_HAVE_QUAD 3161 3162 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} 3163 3164 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3165 TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ 3166 TYPE rhs) { \ 3167 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 3168 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); 3169 3170 #define CRITICAL_SWP(LCK_ID) \ 3171 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3172 \ 3173 old_value = (*lhs); \ 3174 (*lhs) = rhs; \ 3175 \ 3176 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3177 return old_value; 3178 3179 // ------------------------------------------------------------------------ 3180 #ifdef KMP_GOMP_COMPAT 3181 #define GOMP_CRITICAL_SWP(FLAG) \ 3182 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3183 KMP_CHECK_GTID; \ 3184 CRITICAL_SWP(0); \ 3185 } 3186 #else 3187 #define GOMP_CRITICAL_SWP(FLAG) 3188 #endif /* KMP_GOMP_COMPAT */ 3189 3190 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3191 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3192 TYPE old_value; \ 3193 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3194 old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \ 3195 return old_value; \ 3196 } 3197 // ------------------------------------------------------------------------ 3198 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3199 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3200 TYPE old_value; \ 3201 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3202 old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \ 3203 return old_value; \ 3204 } 3205 3206 // ------------------------------------------------------------------------ 3207 #define CMPXCHG_SWP(TYPE, BITS) \ 3208 { \ 3209 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 3210 TYPE old_value, new_value; \ 3211 temp_val = *lhs; \ 3212 old_value = temp_val; \ 3213 new_value = rhs; \ 3214 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 3215 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 3216 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 3217 KMP_CPU_PAUSE(); \ 3218 \ 3219 temp_val = *lhs; \ 3220 old_value = temp_val; \ 3221 new_value = rhs; \ 3222 } \ 3223 return old_value; \ 3224 } 3225 3226 // ------------------------------------------------------------------------- 3227 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3228 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3229 TYPE old_value; \ 3230 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3231 CMPXCHG_SWP(TYPE, BITS) \ 3232 } 3233 3234 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp 3235 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp 3236 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp 3237 3238 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32, 3239 KMP_ARCH_X86) // __kmpc_atomic_float4_swp 3240 3241 #if (KMP_ARCH_X86) 3242 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64, 3243 KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp 3244 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64, 3245 KMP_ARCH_X86) // __kmpc_atomic_float8_swp 3246 #else 3247 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp 3248 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64, 3249 KMP_ARCH_X86) // __kmpc_atomic_float8_swp 3250 #endif 3251 3252 // ------------------------------------------------------------------------ 3253 // Routines for Extended types: long double, _Quad, complex flavours (use 3254 // critical section) 3255 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ 3256 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3257 TYPE old_value; \ 3258 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3259 CRITICAL_SWP(LCK_ID) \ 3260 } 3261 3262 // ------------------------------------------------------------------------ 3263 // !!! TODO: check if we need to return void for cmplx4 routines 3264 // Workaround for cmplx4. Regular routines with return value don't work 3265 // on Win_32e. Let's return captured values through the additional parameter. 3266 3267 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ 3268 void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ 3269 TYPE rhs, TYPE *out) { \ 3270 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 3271 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); 3272 3273 #define CRITICAL_SWP_WRK(LCK_ID) \ 3274 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3275 \ 3276 tmp = (*lhs); \ 3277 (*lhs) = (rhs); \ 3278 (*out) = tmp; \ 3279 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3280 return; 3281 // ------------------------------------------------------------------------ 3282 3283 #ifdef KMP_GOMP_COMPAT 3284 #define GOMP_CRITICAL_SWP_WRK(FLAG) \ 3285 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3286 KMP_CHECK_GTID; \ 3287 CRITICAL_SWP_WRK(0); \ 3288 } 3289 #else 3290 #define GOMP_CRITICAL_SWP_WRK(FLAG) 3291 #endif /* KMP_GOMP_COMPAT */ 3292 // ------------------------------------------------------------------------ 3293 3294 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ 3295 ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ 3296 TYPE tmp; \ 3297 GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \ 3298 CRITICAL_SWP_WRK(LCK_ID) \ 3299 } 3300 // The end of workaround for cmplx4 3301 3302 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp 3303 #if KMP_HAVE_QUAD 3304 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp 3305 #endif 3306 // cmplx4 routine to return void 3307 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp 3308 3309 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // 3310 // __kmpc_atomic_cmplx4_swp 3311 3312 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp 3313 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp 3314 #if KMP_HAVE_QUAD 3315 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp 3316 #if (KMP_ARCH_X86) 3317 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r, 3318 1) // __kmpc_atomic_float16_a16_swp 3319 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c, 3320 1) // __kmpc_atomic_cmplx16_a16_swp 3321 #endif 3322 #endif 3323 3324 // End of OpenMP 4.0 Capture 3325 3326 #endif // OMP_40_ENABLED 3327 3328 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 3329 3330 #undef OP_CRITICAL 3331 3332 /* ------------------------------------------------------------------------ */ 3333 /* Generic atomic routines */ 3334 3335 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3336 void (*f)(void *, void *, void *)) { 3337 KMP_DEBUG_ASSERT(__kmp_init_serial); 3338 3339 if ( 3340 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3341 FALSE /* must use lock */ 3342 #else 3343 TRUE 3344 #endif 3345 ) { 3346 kmp_int8 old_value, new_value; 3347 3348 old_value = *(kmp_int8 *)lhs; 3349 (*f)(&new_value, &old_value, rhs); 3350 3351 /* TODO: Should this be acquire or release? */ 3352 while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value, 3353 *(kmp_int8 *)&new_value)) { 3354 KMP_CPU_PAUSE(); 3355 3356 old_value = *(kmp_int8 *)lhs; 3357 (*f)(&new_value, &old_value, rhs); 3358 } 3359 3360 return; 3361 } else { 3362 // All 1-byte data is of integer data type. 3363 3364 #ifdef KMP_GOMP_COMPAT 3365 if (__kmp_atomic_mode == 2) { 3366 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3367 } else 3368 #endif /* KMP_GOMP_COMPAT */ 3369 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid); 3370 3371 (*f)(lhs, lhs, rhs); 3372 3373 #ifdef KMP_GOMP_COMPAT 3374 if (__kmp_atomic_mode == 2) { 3375 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3376 } else 3377 #endif /* KMP_GOMP_COMPAT */ 3378 __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid); 3379 } 3380 } 3381 3382 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3383 void (*f)(void *, void *, void *)) { 3384 if ( 3385 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3386 FALSE /* must use lock */ 3387 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 3388 TRUE /* no alignment problems */ 3389 #else 3390 !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */ 3391 #endif 3392 ) { 3393 kmp_int16 old_value, new_value; 3394 3395 old_value = *(kmp_int16 *)lhs; 3396 (*f)(&new_value, &old_value, rhs); 3397 3398 /* TODO: Should this be acquire or release? */ 3399 while (!KMP_COMPARE_AND_STORE_ACQ16( 3400 (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) { 3401 KMP_CPU_PAUSE(); 3402 3403 old_value = *(kmp_int16 *)lhs; 3404 (*f)(&new_value, &old_value, rhs); 3405 } 3406 3407 return; 3408 } else { 3409 // All 2-byte data is of integer data type. 3410 3411 #ifdef KMP_GOMP_COMPAT 3412 if (__kmp_atomic_mode == 2) { 3413 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3414 } else 3415 #endif /* KMP_GOMP_COMPAT */ 3416 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid); 3417 3418 (*f)(lhs, lhs, rhs); 3419 3420 #ifdef KMP_GOMP_COMPAT 3421 if (__kmp_atomic_mode == 2) { 3422 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3423 } else 3424 #endif /* KMP_GOMP_COMPAT */ 3425 __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid); 3426 } 3427 } 3428 3429 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3430 void (*f)(void *, void *, void *)) { 3431 KMP_DEBUG_ASSERT(__kmp_init_serial); 3432 3433 if ( 3434 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints. 3435 // Gomp compatibility is broken if this routine is called for floats. 3436 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 3437 TRUE /* no alignment problems */ 3438 #else 3439 !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */ 3440 #endif 3441 ) { 3442 kmp_int32 old_value, new_value; 3443 3444 old_value = *(kmp_int32 *)lhs; 3445 (*f)(&new_value, &old_value, rhs); 3446 3447 /* TODO: Should this be acquire or release? */ 3448 while (!KMP_COMPARE_AND_STORE_ACQ32( 3449 (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) { 3450 KMP_CPU_PAUSE(); 3451 3452 old_value = *(kmp_int32 *)lhs; 3453 (*f)(&new_value, &old_value, rhs); 3454 } 3455 3456 return; 3457 } else { 3458 // Use __kmp_atomic_lock_4i for all 4-byte data, 3459 // even if it isn't of integer data type. 3460 3461 #ifdef KMP_GOMP_COMPAT 3462 if (__kmp_atomic_mode == 2) { 3463 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3464 } else 3465 #endif /* KMP_GOMP_COMPAT */ 3466 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid); 3467 3468 (*f)(lhs, lhs, rhs); 3469 3470 #ifdef KMP_GOMP_COMPAT 3471 if (__kmp_atomic_mode == 2) { 3472 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3473 } else 3474 #endif /* KMP_GOMP_COMPAT */ 3475 __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid); 3476 } 3477 } 3478 3479 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3480 void (*f)(void *, void *, void *)) { 3481 KMP_DEBUG_ASSERT(__kmp_init_serial); 3482 if ( 3483 3484 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3485 FALSE /* must use lock */ 3486 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 3487 TRUE /* no alignment problems */ 3488 #else 3489 !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */ 3490 #endif 3491 ) { 3492 kmp_int64 old_value, new_value; 3493 3494 old_value = *(kmp_int64 *)lhs; 3495 (*f)(&new_value, &old_value, rhs); 3496 /* TODO: Should this be acquire or release? */ 3497 while (!KMP_COMPARE_AND_STORE_ACQ64( 3498 (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) { 3499 KMP_CPU_PAUSE(); 3500 3501 old_value = *(kmp_int64 *)lhs; 3502 (*f)(&new_value, &old_value, rhs); 3503 } 3504 3505 return; 3506 } else { 3507 // Use __kmp_atomic_lock_8i for all 8-byte data, 3508 // even if it isn't of integer data type. 3509 3510 #ifdef KMP_GOMP_COMPAT 3511 if (__kmp_atomic_mode == 2) { 3512 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3513 } else 3514 #endif /* KMP_GOMP_COMPAT */ 3515 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid); 3516 3517 (*f)(lhs, lhs, rhs); 3518 3519 #ifdef KMP_GOMP_COMPAT 3520 if (__kmp_atomic_mode == 2) { 3521 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3522 } else 3523 #endif /* KMP_GOMP_COMPAT */ 3524 __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid); 3525 } 3526 } 3527 3528 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3529 void (*f)(void *, void *, void *)) { 3530 KMP_DEBUG_ASSERT(__kmp_init_serial); 3531 3532 #ifdef KMP_GOMP_COMPAT 3533 if (__kmp_atomic_mode == 2) { 3534 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3535 } else 3536 #endif /* KMP_GOMP_COMPAT */ 3537 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid); 3538 3539 (*f)(lhs, lhs, rhs); 3540 3541 #ifdef KMP_GOMP_COMPAT 3542 if (__kmp_atomic_mode == 2) { 3543 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3544 } else 3545 #endif /* KMP_GOMP_COMPAT */ 3546 __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid); 3547 } 3548 3549 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3550 void (*f)(void *, void *, void *)) { 3551 KMP_DEBUG_ASSERT(__kmp_init_serial); 3552 3553 #ifdef KMP_GOMP_COMPAT 3554 if (__kmp_atomic_mode == 2) { 3555 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3556 } else 3557 #endif /* KMP_GOMP_COMPAT */ 3558 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid); 3559 3560 (*f)(lhs, lhs, rhs); 3561 3562 #ifdef KMP_GOMP_COMPAT 3563 if (__kmp_atomic_mode == 2) { 3564 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3565 } else 3566 #endif /* KMP_GOMP_COMPAT */ 3567 __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid); 3568 } 3569 3570 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3571 void (*f)(void *, void *, void *)) { 3572 KMP_DEBUG_ASSERT(__kmp_init_serial); 3573 3574 #ifdef KMP_GOMP_COMPAT 3575 if (__kmp_atomic_mode == 2) { 3576 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3577 } else 3578 #endif /* KMP_GOMP_COMPAT */ 3579 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid); 3580 3581 (*f)(lhs, lhs, rhs); 3582 3583 #ifdef KMP_GOMP_COMPAT 3584 if (__kmp_atomic_mode == 2) { 3585 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3586 } else 3587 #endif /* KMP_GOMP_COMPAT */ 3588 __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid); 3589 } 3590 3591 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3592 void (*f)(void *, void *, void *)) { 3593 KMP_DEBUG_ASSERT(__kmp_init_serial); 3594 3595 #ifdef KMP_GOMP_COMPAT 3596 if (__kmp_atomic_mode == 2) { 3597 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3598 } else 3599 #endif /* KMP_GOMP_COMPAT */ 3600 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid); 3601 3602 (*f)(lhs, lhs, rhs); 3603 3604 #ifdef KMP_GOMP_COMPAT 3605 if (__kmp_atomic_mode == 2) { 3606 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3607 } else 3608 #endif /* KMP_GOMP_COMPAT */ 3609 __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid); 3610 } 3611 3612 // AC: same two routines as GOMP_atomic_start/end, but will be called by our 3613 // compiler; duplicated in order to not use 3-party names in pure Intel code 3614 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin. 3615 void __kmpc_atomic_start(void) { 3616 int gtid = __kmp_entry_gtid(); 3617 KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid)); 3618 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3619 } 3620 3621 void __kmpc_atomic_end(void) { 3622 int gtid = __kmp_get_gtid(); 3623 KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid)); 3624 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3625 } 3626 3627 /*! 3628 @} 3629 */ 3630 3631 // end of file 3632