1 /* 2 * kmp_atomic.cpp -- ATOMIC implementation routines 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "kmp_atomic.h" 14 #include "kmp.h" // TRUE, asm routines prototypes 15 16 typedef unsigned char uchar; 17 typedef unsigned short ushort; 18 19 /*! 20 @defgroup ATOMIC_OPS Atomic Operations 21 These functions are used for implementing the many different varieties of atomic 22 operations. 23 24 The compiler is at liberty to inline atomic operations that are naturally 25 supported by the target architecture. For instance on IA-32 architecture an 26 atomic like this can be inlined 27 @code 28 static int s = 0; 29 #pragma omp atomic 30 s++; 31 @endcode 32 using the single instruction: `lock; incl s` 33 34 However the runtime does provide entrypoints for these operations to support 35 compilers that choose not to inline them. (For instance, 36 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.) 37 38 The names of the functions are encoded by using the data type name and the 39 operation name, as in these tables. 40 41 Data Type | Data type encoding 42 -----------|--------------- 43 int8_t | `fixed1` 44 uint8_t | `fixed1u` 45 int16_t | `fixed2` 46 uint16_t | `fixed2u` 47 int32_t | `fixed4` 48 uint32_t | `fixed4u` 49 int32_t | `fixed8` 50 uint32_t | `fixed8u` 51 float | `float4` 52 double | `float8` 53 float 10 (8087 eighty bit float) | `float10` 54 complex<float> | `cmplx4` 55 complex<double> | `cmplx8` 56 complex<float10> | `cmplx10` 57 <br> 58 59 Operation | Operation encoding 60 ----------|------------------- 61 + | add 62 - | sub 63 \* | mul 64 / | div 65 & | andb 66 << | shl 67 \>\> | shr 68 \| | orb 69 ^ | xor 70 && | andl 71 \|\| | orl 72 maximum | max 73 minimum | min 74 .eqv. | eqv 75 .neqv. | neqv 76 77 <br> 78 For non-commutative operations, `_rev` can also be added for the reversed 79 operation. For the functions that capture the result, the suffix `_cpt` is 80 added. 81 82 Update Functions 83 ================ 84 The general form of an atomic function that just performs an update (without a 85 `capture`) 86 @code 87 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE * 88 lhs, TYPE rhs ); 89 @endcode 90 @param ident_t a pointer to source location 91 @param gtid the global thread id 92 @param lhs a pointer to the left operand 93 @param rhs the right operand 94 95 `capture` functions 96 =================== 97 The capture functions perform an atomic update and return a result, which is 98 either the value before the capture, or that after. They take an additional 99 argument to determine which result is returned. 100 Their general form is therefore 101 @code 102 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE * 103 lhs, TYPE rhs, int flag ); 104 @endcode 105 @param ident_t a pointer to source location 106 @param gtid the global thread id 107 @param lhs a pointer to the left operand 108 @param rhs the right operand 109 @param flag one if the result is to be captured *after* the operation, zero if 110 captured *before*. 111 112 The one set of exceptions to this is the `complex<float>` type where the value 113 is not returned, rather an extra argument pointer is passed. 114 115 They look like 116 @code 117 void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * 118 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); 119 @endcode 120 121 Read and Write Operations 122 ========================= 123 The OpenMP<sup>*</sup> standard now supports atomic operations that simply 124 ensure that the value is read or written atomically, with no modification 125 performed. In many cases on IA-32 architecture these operations can be inlined 126 since the architecture guarantees that no tearing occurs on aligned objects 127 accessed with a single memory operation of up to 64 bits in size. 128 129 The general form of the read operations is 130 @code 131 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc ); 132 @endcode 133 134 For the write operations the form is 135 @code 136 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs 137 ); 138 @endcode 139 140 Full list of functions 141 ====================== 142 This leads to the generation of 376 atomic functions, as follows. 143 144 Functions for integers 145 --------------------- 146 There are versions here for integers of size 1,2,4 and 8 bytes both signed and 147 unsigned (where that matters). 148 @code 149 __kmpc_atomic_fixed1_add 150 __kmpc_atomic_fixed1_add_cpt 151 __kmpc_atomic_fixed1_add_fp 152 __kmpc_atomic_fixed1_andb 153 __kmpc_atomic_fixed1_andb_cpt 154 __kmpc_atomic_fixed1_andl 155 __kmpc_atomic_fixed1_andl_cpt 156 __kmpc_atomic_fixed1_div 157 __kmpc_atomic_fixed1_div_cpt 158 __kmpc_atomic_fixed1_div_cpt_rev 159 __kmpc_atomic_fixed1_div_float8 160 __kmpc_atomic_fixed1_div_fp 161 __kmpc_atomic_fixed1_div_rev 162 __kmpc_atomic_fixed1_eqv 163 __kmpc_atomic_fixed1_eqv_cpt 164 __kmpc_atomic_fixed1_max 165 __kmpc_atomic_fixed1_max_cpt 166 __kmpc_atomic_fixed1_min 167 __kmpc_atomic_fixed1_min_cpt 168 __kmpc_atomic_fixed1_mul 169 __kmpc_atomic_fixed1_mul_cpt 170 __kmpc_atomic_fixed1_mul_float8 171 __kmpc_atomic_fixed1_mul_fp 172 __kmpc_atomic_fixed1_neqv 173 __kmpc_atomic_fixed1_neqv_cpt 174 __kmpc_atomic_fixed1_orb 175 __kmpc_atomic_fixed1_orb_cpt 176 __kmpc_atomic_fixed1_orl 177 __kmpc_atomic_fixed1_orl_cpt 178 __kmpc_atomic_fixed1_rd 179 __kmpc_atomic_fixed1_shl 180 __kmpc_atomic_fixed1_shl_cpt 181 __kmpc_atomic_fixed1_shl_cpt_rev 182 __kmpc_atomic_fixed1_shl_rev 183 __kmpc_atomic_fixed1_shr 184 __kmpc_atomic_fixed1_shr_cpt 185 __kmpc_atomic_fixed1_shr_cpt_rev 186 __kmpc_atomic_fixed1_shr_rev 187 __kmpc_atomic_fixed1_sub 188 __kmpc_atomic_fixed1_sub_cpt 189 __kmpc_atomic_fixed1_sub_cpt_rev 190 __kmpc_atomic_fixed1_sub_fp 191 __kmpc_atomic_fixed1_sub_rev 192 __kmpc_atomic_fixed1_swp 193 __kmpc_atomic_fixed1_wr 194 __kmpc_atomic_fixed1_xor 195 __kmpc_atomic_fixed1_xor_cpt 196 __kmpc_atomic_fixed1u_add_fp 197 __kmpc_atomic_fixed1u_sub_fp 198 __kmpc_atomic_fixed1u_mul_fp 199 __kmpc_atomic_fixed1u_div 200 __kmpc_atomic_fixed1u_div_cpt 201 __kmpc_atomic_fixed1u_div_cpt_rev 202 __kmpc_atomic_fixed1u_div_fp 203 __kmpc_atomic_fixed1u_div_rev 204 __kmpc_atomic_fixed1u_shr 205 __kmpc_atomic_fixed1u_shr_cpt 206 __kmpc_atomic_fixed1u_shr_cpt_rev 207 __kmpc_atomic_fixed1u_shr_rev 208 __kmpc_atomic_fixed2_add 209 __kmpc_atomic_fixed2_add_cpt 210 __kmpc_atomic_fixed2_add_fp 211 __kmpc_atomic_fixed2_andb 212 __kmpc_atomic_fixed2_andb_cpt 213 __kmpc_atomic_fixed2_andl 214 __kmpc_atomic_fixed2_andl_cpt 215 __kmpc_atomic_fixed2_div 216 __kmpc_atomic_fixed2_div_cpt 217 __kmpc_atomic_fixed2_div_cpt_rev 218 __kmpc_atomic_fixed2_div_float8 219 __kmpc_atomic_fixed2_div_fp 220 __kmpc_atomic_fixed2_div_rev 221 __kmpc_atomic_fixed2_eqv 222 __kmpc_atomic_fixed2_eqv_cpt 223 __kmpc_atomic_fixed2_max 224 __kmpc_atomic_fixed2_max_cpt 225 __kmpc_atomic_fixed2_min 226 __kmpc_atomic_fixed2_min_cpt 227 __kmpc_atomic_fixed2_mul 228 __kmpc_atomic_fixed2_mul_cpt 229 __kmpc_atomic_fixed2_mul_float8 230 __kmpc_atomic_fixed2_mul_fp 231 __kmpc_atomic_fixed2_neqv 232 __kmpc_atomic_fixed2_neqv_cpt 233 __kmpc_atomic_fixed2_orb 234 __kmpc_atomic_fixed2_orb_cpt 235 __kmpc_atomic_fixed2_orl 236 __kmpc_atomic_fixed2_orl_cpt 237 __kmpc_atomic_fixed2_rd 238 __kmpc_atomic_fixed2_shl 239 __kmpc_atomic_fixed2_shl_cpt 240 __kmpc_atomic_fixed2_shl_cpt_rev 241 __kmpc_atomic_fixed2_shl_rev 242 __kmpc_atomic_fixed2_shr 243 __kmpc_atomic_fixed2_shr_cpt 244 __kmpc_atomic_fixed2_shr_cpt_rev 245 __kmpc_atomic_fixed2_shr_rev 246 __kmpc_atomic_fixed2_sub 247 __kmpc_atomic_fixed2_sub_cpt 248 __kmpc_atomic_fixed2_sub_cpt_rev 249 __kmpc_atomic_fixed2_sub_fp 250 __kmpc_atomic_fixed2_sub_rev 251 __kmpc_atomic_fixed2_swp 252 __kmpc_atomic_fixed2_wr 253 __kmpc_atomic_fixed2_xor 254 __kmpc_atomic_fixed2_xor_cpt 255 __kmpc_atomic_fixed2u_add_fp 256 __kmpc_atomic_fixed2u_sub_fp 257 __kmpc_atomic_fixed2u_mul_fp 258 __kmpc_atomic_fixed2u_div 259 __kmpc_atomic_fixed2u_div_cpt 260 __kmpc_atomic_fixed2u_div_cpt_rev 261 __kmpc_atomic_fixed2u_div_fp 262 __kmpc_atomic_fixed2u_div_rev 263 __kmpc_atomic_fixed2u_shr 264 __kmpc_atomic_fixed2u_shr_cpt 265 __kmpc_atomic_fixed2u_shr_cpt_rev 266 __kmpc_atomic_fixed2u_shr_rev 267 __kmpc_atomic_fixed4_add 268 __kmpc_atomic_fixed4_add_cpt 269 __kmpc_atomic_fixed4_add_fp 270 __kmpc_atomic_fixed4_andb 271 __kmpc_atomic_fixed4_andb_cpt 272 __kmpc_atomic_fixed4_andl 273 __kmpc_atomic_fixed4_andl_cpt 274 __kmpc_atomic_fixed4_div 275 __kmpc_atomic_fixed4_div_cpt 276 __kmpc_atomic_fixed4_div_cpt_rev 277 __kmpc_atomic_fixed4_div_float8 278 __kmpc_atomic_fixed4_div_fp 279 __kmpc_atomic_fixed4_div_rev 280 __kmpc_atomic_fixed4_eqv 281 __kmpc_atomic_fixed4_eqv_cpt 282 __kmpc_atomic_fixed4_max 283 __kmpc_atomic_fixed4_max_cpt 284 __kmpc_atomic_fixed4_min 285 __kmpc_atomic_fixed4_min_cpt 286 __kmpc_atomic_fixed4_mul 287 __kmpc_atomic_fixed4_mul_cpt 288 __kmpc_atomic_fixed4_mul_float8 289 __kmpc_atomic_fixed4_mul_fp 290 __kmpc_atomic_fixed4_neqv 291 __kmpc_atomic_fixed4_neqv_cpt 292 __kmpc_atomic_fixed4_orb 293 __kmpc_atomic_fixed4_orb_cpt 294 __kmpc_atomic_fixed4_orl 295 __kmpc_atomic_fixed4_orl_cpt 296 __kmpc_atomic_fixed4_rd 297 __kmpc_atomic_fixed4_shl 298 __kmpc_atomic_fixed4_shl_cpt 299 __kmpc_atomic_fixed4_shl_cpt_rev 300 __kmpc_atomic_fixed4_shl_rev 301 __kmpc_atomic_fixed4_shr 302 __kmpc_atomic_fixed4_shr_cpt 303 __kmpc_atomic_fixed4_shr_cpt_rev 304 __kmpc_atomic_fixed4_shr_rev 305 __kmpc_atomic_fixed4_sub 306 __kmpc_atomic_fixed4_sub_cpt 307 __kmpc_atomic_fixed4_sub_cpt_rev 308 __kmpc_atomic_fixed4_sub_fp 309 __kmpc_atomic_fixed4_sub_rev 310 __kmpc_atomic_fixed4_swp 311 __kmpc_atomic_fixed4_wr 312 __kmpc_atomic_fixed4_xor 313 __kmpc_atomic_fixed4_xor_cpt 314 __kmpc_atomic_fixed4u_add_fp 315 __kmpc_atomic_fixed4u_sub_fp 316 __kmpc_atomic_fixed4u_mul_fp 317 __kmpc_atomic_fixed4u_div 318 __kmpc_atomic_fixed4u_div_cpt 319 __kmpc_atomic_fixed4u_div_cpt_rev 320 __kmpc_atomic_fixed4u_div_fp 321 __kmpc_atomic_fixed4u_div_rev 322 __kmpc_atomic_fixed4u_shr 323 __kmpc_atomic_fixed4u_shr_cpt 324 __kmpc_atomic_fixed4u_shr_cpt_rev 325 __kmpc_atomic_fixed4u_shr_rev 326 __kmpc_atomic_fixed8_add 327 __kmpc_atomic_fixed8_add_cpt 328 __kmpc_atomic_fixed8_add_fp 329 __kmpc_atomic_fixed8_andb 330 __kmpc_atomic_fixed8_andb_cpt 331 __kmpc_atomic_fixed8_andl 332 __kmpc_atomic_fixed8_andl_cpt 333 __kmpc_atomic_fixed8_div 334 __kmpc_atomic_fixed8_div_cpt 335 __kmpc_atomic_fixed8_div_cpt_rev 336 __kmpc_atomic_fixed8_div_float8 337 __kmpc_atomic_fixed8_div_fp 338 __kmpc_atomic_fixed8_div_rev 339 __kmpc_atomic_fixed8_eqv 340 __kmpc_atomic_fixed8_eqv_cpt 341 __kmpc_atomic_fixed8_max 342 __kmpc_atomic_fixed8_max_cpt 343 __kmpc_atomic_fixed8_min 344 __kmpc_atomic_fixed8_min_cpt 345 __kmpc_atomic_fixed8_mul 346 __kmpc_atomic_fixed8_mul_cpt 347 __kmpc_atomic_fixed8_mul_float8 348 __kmpc_atomic_fixed8_mul_fp 349 __kmpc_atomic_fixed8_neqv 350 __kmpc_atomic_fixed8_neqv_cpt 351 __kmpc_atomic_fixed8_orb 352 __kmpc_atomic_fixed8_orb_cpt 353 __kmpc_atomic_fixed8_orl 354 __kmpc_atomic_fixed8_orl_cpt 355 __kmpc_atomic_fixed8_rd 356 __kmpc_atomic_fixed8_shl 357 __kmpc_atomic_fixed8_shl_cpt 358 __kmpc_atomic_fixed8_shl_cpt_rev 359 __kmpc_atomic_fixed8_shl_rev 360 __kmpc_atomic_fixed8_shr 361 __kmpc_atomic_fixed8_shr_cpt 362 __kmpc_atomic_fixed8_shr_cpt_rev 363 __kmpc_atomic_fixed8_shr_rev 364 __kmpc_atomic_fixed8_sub 365 __kmpc_atomic_fixed8_sub_cpt 366 __kmpc_atomic_fixed8_sub_cpt_rev 367 __kmpc_atomic_fixed8_sub_fp 368 __kmpc_atomic_fixed8_sub_rev 369 __kmpc_atomic_fixed8_swp 370 __kmpc_atomic_fixed8_wr 371 __kmpc_atomic_fixed8_xor 372 __kmpc_atomic_fixed8_xor_cpt 373 __kmpc_atomic_fixed8u_add_fp 374 __kmpc_atomic_fixed8u_sub_fp 375 __kmpc_atomic_fixed8u_mul_fp 376 __kmpc_atomic_fixed8u_div 377 __kmpc_atomic_fixed8u_div_cpt 378 __kmpc_atomic_fixed8u_div_cpt_rev 379 __kmpc_atomic_fixed8u_div_fp 380 __kmpc_atomic_fixed8u_div_rev 381 __kmpc_atomic_fixed8u_shr 382 __kmpc_atomic_fixed8u_shr_cpt 383 __kmpc_atomic_fixed8u_shr_cpt_rev 384 __kmpc_atomic_fixed8u_shr_rev 385 @endcode 386 387 Functions for floating point 388 ---------------------------- 389 There are versions here for floating point numbers of size 4, 8, 10 and 16 390 bytes. (Ten byte floats are used by X87, but are now rare). 391 @code 392 __kmpc_atomic_float4_add 393 __kmpc_atomic_float4_add_cpt 394 __kmpc_atomic_float4_add_float8 395 __kmpc_atomic_float4_add_fp 396 __kmpc_atomic_float4_div 397 __kmpc_atomic_float4_div_cpt 398 __kmpc_atomic_float4_div_cpt_rev 399 __kmpc_atomic_float4_div_float8 400 __kmpc_atomic_float4_div_fp 401 __kmpc_atomic_float4_div_rev 402 __kmpc_atomic_float4_max 403 __kmpc_atomic_float4_max_cpt 404 __kmpc_atomic_float4_min 405 __kmpc_atomic_float4_min_cpt 406 __kmpc_atomic_float4_mul 407 __kmpc_atomic_float4_mul_cpt 408 __kmpc_atomic_float4_mul_float8 409 __kmpc_atomic_float4_mul_fp 410 __kmpc_atomic_float4_rd 411 __kmpc_atomic_float4_sub 412 __kmpc_atomic_float4_sub_cpt 413 __kmpc_atomic_float4_sub_cpt_rev 414 __kmpc_atomic_float4_sub_float8 415 __kmpc_atomic_float4_sub_fp 416 __kmpc_atomic_float4_sub_rev 417 __kmpc_atomic_float4_swp 418 __kmpc_atomic_float4_wr 419 __kmpc_atomic_float8_add 420 __kmpc_atomic_float8_add_cpt 421 __kmpc_atomic_float8_add_fp 422 __kmpc_atomic_float8_div 423 __kmpc_atomic_float8_div_cpt 424 __kmpc_atomic_float8_div_cpt_rev 425 __kmpc_atomic_float8_div_fp 426 __kmpc_atomic_float8_div_rev 427 __kmpc_atomic_float8_max 428 __kmpc_atomic_float8_max_cpt 429 __kmpc_atomic_float8_min 430 __kmpc_atomic_float8_min_cpt 431 __kmpc_atomic_float8_mul 432 __kmpc_atomic_float8_mul_cpt 433 __kmpc_atomic_float8_mul_fp 434 __kmpc_atomic_float8_rd 435 __kmpc_atomic_float8_sub 436 __kmpc_atomic_float8_sub_cpt 437 __kmpc_atomic_float8_sub_cpt_rev 438 __kmpc_atomic_float8_sub_fp 439 __kmpc_atomic_float8_sub_rev 440 __kmpc_atomic_float8_swp 441 __kmpc_atomic_float8_wr 442 __kmpc_atomic_float10_add 443 __kmpc_atomic_float10_add_cpt 444 __kmpc_atomic_float10_add_fp 445 __kmpc_atomic_float10_div 446 __kmpc_atomic_float10_div_cpt 447 __kmpc_atomic_float10_div_cpt_rev 448 __kmpc_atomic_float10_div_fp 449 __kmpc_atomic_float10_div_rev 450 __kmpc_atomic_float10_mul 451 __kmpc_atomic_float10_mul_cpt 452 __kmpc_atomic_float10_mul_fp 453 __kmpc_atomic_float10_rd 454 __kmpc_atomic_float10_sub 455 __kmpc_atomic_float10_sub_cpt 456 __kmpc_atomic_float10_sub_cpt_rev 457 __kmpc_atomic_float10_sub_fp 458 __kmpc_atomic_float10_sub_rev 459 __kmpc_atomic_float10_swp 460 __kmpc_atomic_float10_wr 461 __kmpc_atomic_float16_add 462 __kmpc_atomic_float16_add_cpt 463 __kmpc_atomic_float16_div 464 __kmpc_atomic_float16_div_cpt 465 __kmpc_atomic_float16_div_cpt_rev 466 __kmpc_atomic_float16_div_rev 467 __kmpc_atomic_float16_max 468 __kmpc_atomic_float16_max_cpt 469 __kmpc_atomic_float16_min 470 __kmpc_atomic_float16_min_cpt 471 __kmpc_atomic_float16_mul 472 __kmpc_atomic_float16_mul_cpt 473 __kmpc_atomic_float16_rd 474 __kmpc_atomic_float16_sub 475 __kmpc_atomic_float16_sub_cpt 476 __kmpc_atomic_float16_sub_cpt_rev 477 __kmpc_atomic_float16_sub_rev 478 __kmpc_atomic_float16_swp 479 __kmpc_atomic_float16_wr 480 @endcode 481 482 Functions for Complex types 483 --------------------------- 484 Functions for complex types whose component floating point variables are of size 485 4,8,10 or 16 bytes. The names here are based on the size of the component float, 486 *not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an 487 operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`. 488 489 @code 490 __kmpc_atomic_cmplx4_add 491 __kmpc_atomic_cmplx4_add_cmplx8 492 __kmpc_atomic_cmplx4_add_cpt 493 __kmpc_atomic_cmplx4_div 494 __kmpc_atomic_cmplx4_div_cmplx8 495 __kmpc_atomic_cmplx4_div_cpt 496 __kmpc_atomic_cmplx4_div_cpt_rev 497 __kmpc_atomic_cmplx4_div_rev 498 __kmpc_atomic_cmplx4_mul 499 __kmpc_atomic_cmplx4_mul_cmplx8 500 __kmpc_atomic_cmplx4_mul_cpt 501 __kmpc_atomic_cmplx4_rd 502 __kmpc_atomic_cmplx4_sub 503 __kmpc_atomic_cmplx4_sub_cmplx8 504 __kmpc_atomic_cmplx4_sub_cpt 505 __kmpc_atomic_cmplx4_sub_cpt_rev 506 __kmpc_atomic_cmplx4_sub_rev 507 __kmpc_atomic_cmplx4_swp 508 __kmpc_atomic_cmplx4_wr 509 __kmpc_atomic_cmplx8_add 510 __kmpc_atomic_cmplx8_add_cpt 511 __kmpc_atomic_cmplx8_div 512 __kmpc_atomic_cmplx8_div_cpt 513 __kmpc_atomic_cmplx8_div_cpt_rev 514 __kmpc_atomic_cmplx8_div_rev 515 __kmpc_atomic_cmplx8_mul 516 __kmpc_atomic_cmplx8_mul_cpt 517 __kmpc_atomic_cmplx8_rd 518 __kmpc_atomic_cmplx8_sub 519 __kmpc_atomic_cmplx8_sub_cpt 520 __kmpc_atomic_cmplx8_sub_cpt_rev 521 __kmpc_atomic_cmplx8_sub_rev 522 __kmpc_atomic_cmplx8_swp 523 __kmpc_atomic_cmplx8_wr 524 __kmpc_atomic_cmplx10_add 525 __kmpc_atomic_cmplx10_add_cpt 526 __kmpc_atomic_cmplx10_div 527 __kmpc_atomic_cmplx10_div_cpt 528 __kmpc_atomic_cmplx10_div_cpt_rev 529 __kmpc_atomic_cmplx10_div_rev 530 __kmpc_atomic_cmplx10_mul 531 __kmpc_atomic_cmplx10_mul_cpt 532 __kmpc_atomic_cmplx10_rd 533 __kmpc_atomic_cmplx10_sub 534 __kmpc_atomic_cmplx10_sub_cpt 535 __kmpc_atomic_cmplx10_sub_cpt_rev 536 __kmpc_atomic_cmplx10_sub_rev 537 __kmpc_atomic_cmplx10_swp 538 __kmpc_atomic_cmplx10_wr 539 __kmpc_atomic_cmplx16_add 540 __kmpc_atomic_cmplx16_add_cpt 541 __kmpc_atomic_cmplx16_div 542 __kmpc_atomic_cmplx16_div_cpt 543 __kmpc_atomic_cmplx16_div_cpt_rev 544 __kmpc_atomic_cmplx16_div_rev 545 __kmpc_atomic_cmplx16_mul 546 __kmpc_atomic_cmplx16_mul_cpt 547 __kmpc_atomic_cmplx16_rd 548 __kmpc_atomic_cmplx16_sub 549 __kmpc_atomic_cmplx16_sub_cpt 550 __kmpc_atomic_cmplx16_sub_cpt_rev 551 __kmpc_atomic_cmplx16_swp 552 __kmpc_atomic_cmplx16_wr 553 @endcode 554 */ 555 556 /*! 557 @ingroup ATOMIC_OPS 558 @{ 559 */ 560 561 /* 562 * Global vars 563 */ 564 565 #ifndef KMP_GOMP_COMPAT 566 int __kmp_atomic_mode = 1; // Intel perf 567 #else 568 int __kmp_atomic_mode = 2; // GOMP compatibility 569 #endif /* KMP_GOMP_COMPAT */ 570 571 KMP_ALIGN(128) 572 573 // Control access to all user coded atomics in Gnu compat mode 574 kmp_atomic_lock_t __kmp_atomic_lock; 575 // Control access to all user coded atomics for 1-byte fixed data types 576 kmp_atomic_lock_t __kmp_atomic_lock_1i; 577 // Control access to all user coded atomics for 2-byte fixed data types 578 kmp_atomic_lock_t __kmp_atomic_lock_2i; 579 // Control access to all user coded atomics for 4-byte fixed data types 580 kmp_atomic_lock_t __kmp_atomic_lock_4i; 581 // Control access to all user coded atomics for kmp_real32 data type 582 kmp_atomic_lock_t __kmp_atomic_lock_4r; 583 // Control access to all user coded atomics for 8-byte fixed data types 584 kmp_atomic_lock_t __kmp_atomic_lock_8i; 585 // Control access to all user coded atomics for kmp_real64 data type 586 kmp_atomic_lock_t __kmp_atomic_lock_8r; 587 // Control access to all user coded atomics for complex byte data type 588 kmp_atomic_lock_t __kmp_atomic_lock_8c; 589 // Control access to all user coded atomics for long double data type 590 kmp_atomic_lock_t __kmp_atomic_lock_10r; 591 // Control access to all user coded atomics for _Quad data type 592 kmp_atomic_lock_t __kmp_atomic_lock_16r; 593 // Control access to all user coded atomics for double complex data type 594 kmp_atomic_lock_t __kmp_atomic_lock_16c; 595 // Control access to all user coded atomics for long double complex type 596 kmp_atomic_lock_t __kmp_atomic_lock_20c; 597 // Control access to all user coded atomics for _Quad complex data type 598 kmp_atomic_lock_t __kmp_atomic_lock_32c; 599 600 /* 2007-03-02: 601 Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug 602 on *_32 and *_32e. This is just a temporary workaround for the problem. It 603 seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines 604 in assembler language. */ 605 #define KMP_ATOMIC_VOLATILE volatile 606 607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD 608 609 static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) { 610 return lhs.q + rhs.q; 611 } 612 static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) { 613 return lhs.q - rhs.q; 614 } 615 static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) { 616 return lhs.q * rhs.q; 617 } 618 static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) { 619 return lhs.q / rhs.q; 620 } 621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) { 622 return lhs.q < rhs.q; 623 } 624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) { 625 return lhs.q > rhs.q; 626 } 627 628 static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) { 629 return lhs.q + rhs.q; 630 } 631 static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) { 632 return lhs.q - rhs.q; 633 } 634 static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) { 635 return lhs.q * rhs.q; 636 } 637 static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) { 638 return lhs.q / rhs.q; 639 } 640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) { 641 return lhs.q < rhs.q; 642 } 643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) { 644 return lhs.q > rhs.q; 645 } 646 647 static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs, 648 kmp_cmplx128_a4_t &rhs) { 649 return lhs.q + rhs.q; 650 } 651 static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs, 652 kmp_cmplx128_a4_t &rhs) { 653 return lhs.q - rhs.q; 654 } 655 static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs, 656 kmp_cmplx128_a4_t &rhs) { 657 return lhs.q * rhs.q; 658 } 659 static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs, 660 kmp_cmplx128_a4_t &rhs) { 661 return lhs.q / rhs.q; 662 } 663 664 static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs, 665 kmp_cmplx128_a16_t &rhs) { 666 return lhs.q + rhs.q; 667 } 668 static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs, 669 kmp_cmplx128_a16_t &rhs) { 670 return lhs.q - rhs.q; 671 } 672 static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs, 673 kmp_cmplx128_a16_t &rhs) { 674 return lhs.q * rhs.q; 675 } 676 static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs, 677 kmp_cmplx128_a16_t &rhs) { 678 return lhs.q / rhs.q; 679 } 680 681 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD 682 683 // ATOMIC implementation routines ----------------------------------------- 684 // One routine for each operation and operand type. 685 // All routines declarations looks like 686 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs ); 687 688 #define KMP_CHECK_GTID \ 689 if (gtid == KMP_GTID_UNKNOWN) { \ 690 gtid = __kmp_entry_gtid(); \ 691 } // check and get gtid when needed 692 693 // Beginning of a definition (provides name, parameters, gebug trace) 694 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 695 // fixed) 696 // OP_ID - operation identifier (add, sub, mul, ...) 697 // TYPE - operands' type 698 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 699 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 700 TYPE *lhs, TYPE rhs) { \ 701 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 702 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 703 704 // ------------------------------------------------------------------------ 705 // Lock variables used for critical sections for various size operands 706 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat 707 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char 708 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short 709 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int 710 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float 711 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int 712 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double 713 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex 714 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double 715 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad 716 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex 717 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex 718 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex 719 720 // ------------------------------------------------------------------------ 721 // Operation on *lhs, rhs bound by critical section 722 // OP - operator (it's supposed to contain an assignment) 723 // LCK_ID - lock identifier 724 // Note: don't check gtid as it should always be valid 725 // 1, 2-byte - expect valid parameter, other - check before this macro 726 #define OP_CRITICAL(OP, LCK_ID) \ 727 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 728 \ 729 (*lhs) OP(rhs); \ 730 \ 731 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 732 733 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \ 734 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 735 (*lhs) = (TYPE)((*lhs)OP((TYPE)rhs)); \ 736 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 737 738 // ------------------------------------------------------------------------ 739 // For GNU compatibility, we may need to use a critical section, 740 // even though it is not required by the ISA. 741 // 742 // On IA-32 architecture, all atomic operations except for fixed 4 byte add, 743 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common 744 // critical section. On Intel(R) 64, all atomic operations are done with fetch 745 // and add or compare and exchange. Therefore, the FLAG parameter to this 746 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which 747 // require a critical section, where we predict that they will be implemented 748 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()). 749 // 750 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct, 751 // the FLAG parameter should always be 1. If we know that we will be using 752 // a critical section, then we want to make certain that we use the generic 753 // lock __kmp_atomic_lock to protect the atomic update, and not of of the 754 // locks that are specialized based upon the size or type of the data. 755 // 756 // If FLAG is 0, then we are relying on dead code elimination by the build 757 // compiler to get rid of the useless block of code, and save a needless 758 // branch at runtime. 759 760 #ifdef KMP_GOMP_COMPAT 761 #define OP_GOMP_CRITICAL(OP, FLAG) \ 762 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 763 KMP_CHECK_GTID; \ 764 OP_CRITICAL(OP, 0); \ 765 return; \ 766 } 767 768 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) \ 769 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 770 KMP_CHECK_GTID; \ 771 OP_UPDATE_CRITICAL(TYPE, OP, 0); \ 772 return; \ 773 } 774 #else 775 #define OP_GOMP_CRITICAL(OP, FLAG) 776 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) 777 #endif /* KMP_GOMP_COMPAT */ 778 779 #if KMP_MIC 780 #define KMP_DO_PAUSE _mm_delay_32(1) 781 #else 782 #define KMP_DO_PAUSE 783 #endif /* KMP_MIC */ 784 785 // ------------------------------------------------------------------------ 786 // Operation on *lhs, rhs using "compare_and_store" routine 787 // TYPE - operands' type 788 // BITS - size in bits, used to distinguish low level calls 789 // OP - operator 790 #define OP_CMPXCHG(TYPE, BITS, OP) \ 791 { \ 792 TYPE old_value, new_value; \ 793 old_value = *(TYPE volatile *)lhs; \ 794 new_value = (TYPE)(old_value OP((TYPE)rhs)); \ 795 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 796 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 797 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 798 KMP_DO_PAUSE; \ 799 \ 800 old_value = *(TYPE volatile *)lhs; \ 801 new_value = (TYPE)(old_value OP((TYPE)rhs)); \ 802 } \ 803 } 804 805 #if USE_CMPXCHG_FIX 806 // 2007-06-25: 807 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32 808 // and win_32e are affected (I verified the asm). Compiler ignores the volatile 809 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the 810 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of 811 // the workaround. 812 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 813 { \ 814 struct _sss { \ 815 TYPE cmp; \ 816 kmp_int##BITS *vvv; \ 817 }; \ 818 struct _sss old_value, new_value; \ 819 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \ 820 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \ 821 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 822 new_value.cmp = (TYPE)(old_value.cmp OP rhs); \ 823 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 824 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ 825 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \ 826 KMP_DO_PAUSE; \ 827 \ 828 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 829 new_value.cmp = (TYPE)(old_value.cmp OP rhs); \ 830 } \ 831 } 832 // end of the first part of the workaround for C78287 833 #endif // USE_CMPXCHG_FIX 834 835 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 836 837 // ------------------------------------------------------------------------ 838 // X86 or X86_64: no alignment problems ==================================== 839 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 840 GOMP_FLAG) \ 841 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 842 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 843 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 844 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 845 } 846 // ------------------------------------------------------------------------- 847 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 848 GOMP_FLAG) \ 849 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 850 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 851 OP_CMPXCHG(TYPE, BITS, OP) \ 852 } 853 #if USE_CMPXCHG_FIX 854 // ------------------------------------------------------------------------- 855 // workaround for C78287 (complex(kind=4) data type) 856 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ 857 MASK, GOMP_FLAG) \ 858 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 859 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 860 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 861 } 862 // end of the second part of the workaround for C78287 863 #endif // USE_CMPXCHG_FIX 864 865 #else 866 // ------------------------------------------------------------------------- 867 // Code for other architectures that don't handle unaligned accesses. 868 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 869 GOMP_FLAG) \ 870 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 871 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 872 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 873 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 874 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 875 } else { \ 876 KMP_CHECK_GTID; \ 877 OP_UPDATE_CRITICAL(TYPE, OP, \ 878 LCK_ID) /* unaligned address - use critical */ \ 879 } \ 880 } 881 // ------------------------------------------------------------------------- 882 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 883 GOMP_FLAG) \ 884 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 885 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 886 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 887 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 888 } else { \ 889 KMP_CHECK_GTID; \ 890 OP_UPDATE_CRITICAL(TYPE, OP, \ 891 LCK_ID) /* unaligned address - use critical */ \ 892 } \ 893 } 894 #if USE_CMPXCHG_FIX 895 // ------------------------------------------------------------------------- 896 // workaround for C78287 (complex(kind=4) data type) 897 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ 898 MASK, GOMP_FLAG) \ 899 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 900 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 901 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 902 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 903 } else { \ 904 KMP_CHECK_GTID; \ 905 OP_UPDATE_CRITICAL(TYPE, OP, \ 906 LCK_ID) /* unaligned address - use critical */ \ 907 } \ 908 } 909 // end of the second part of the workaround for C78287 910 #endif // USE_CMPXCHG_FIX 911 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 912 913 // Routines for ATOMIC 4-byte operands addition and subtraction 914 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3, 915 0) // __kmpc_atomic_fixed4_add 916 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3, 917 0) // __kmpc_atomic_fixed4_sub 918 919 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3, 920 KMP_ARCH_X86) // __kmpc_atomic_float4_add 921 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3, 922 KMP_ARCH_X86) // __kmpc_atomic_float4_sub 923 924 // Routines for ATOMIC 8-byte operands addition and subtraction 925 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7, 926 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add 927 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7, 928 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub 929 930 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7, 931 KMP_ARCH_X86) // __kmpc_atomic_float8_add 932 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7, 933 KMP_ARCH_X86) // __kmpc_atomic_float8_sub 934 935 // ------------------------------------------------------------------------ 936 // Entries definition for integer operands 937 // TYPE_ID - operands type and size (fixed4, float4) 938 // OP_ID - operation identifier (add, sub, mul, ...) 939 // TYPE - operand type 940 // BITS - size in bits, used to distinguish low level calls 941 // OP - operator (used in critical section) 942 // LCK_ID - lock identifier, used to possibly distinguish lock variable 943 // MASK - used for alignment check 944 945 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG 946 // ------------------------------------------------------------------------ 947 // Routines for ATOMIC integer operands, other operators 948 // ------------------------------------------------------------------------ 949 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 950 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0, 951 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add 952 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0, 953 0) // __kmpc_atomic_fixed1_andb 954 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0, 955 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div 956 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0, 957 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div 958 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0, 959 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul 960 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0, 961 0) // __kmpc_atomic_fixed1_orb 962 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0, 963 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl 964 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0, 965 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr 966 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, 967 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr 968 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0, 969 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub 970 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0, 971 0) // __kmpc_atomic_fixed1_xor 972 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1, 973 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add 974 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1, 975 0) // __kmpc_atomic_fixed2_andb 976 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1, 977 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div 978 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1, 979 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div 980 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1, 981 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul 982 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1, 983 0) // __kmpc_atomic_fixed2_orb 984 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1, 985 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl 986 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1, 987 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr 988 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, 989 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr 990 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1, 991 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub 992 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1, 993 0) // __kmpc_atomic_fixed2_xor 994 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3, 995 0) // __kmpc_atomic_fixed4_andb 996 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3, 997 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div 998 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3, 999 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div 1000 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3, 1001 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul 1002 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3, 1003 0) // __kmpc_atomic_fixed4_orb 1004 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3, 1005 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl 1006 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3, 1007 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr 1008 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, 1009 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr 1010 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3, 1011 0) // __kmpc_atomic_fixed4_xor 1012 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7, 1013 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb 1014 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7, 1015 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div 1016 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7, 1017 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div 1018 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7, 1019 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul 1020 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7, 1021 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb 1022 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7, 1023 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl 1024 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7, 1025 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr 1026 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, 1027 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr 1028 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7, 1029 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor 1030 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3, 1031 KMP_ARCH_X86) // __kmpc_atomic_float4_div 1032 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3, 1033 KMP_ARCH_X86) // __kmpc_atomic_float4_mul 1034 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7, 1035 KMP_ARCH_X86) // __kmpc_atomic_float8_div 1036 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7, 1037 KMP_ARCH_X86) // __kmpc_atomic_float8_mul 1038 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 1039 1040 /* ------------------------------------------------------------------------ */ 1041 /* Routines for C/C++ Reduction operators && and || */ 1042 1043 // ------------------------------------------------------------------------ 1044 // Need separate macros for &&, || because there is no combined assignment 1045 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used 1046 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1047 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1048 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1049 OP_CRITICAL(= *lhs OP, LCK_ID) \ 1050 } 1051 1052 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1053 1054 // ------------------------------------------------------------------------ 1055 // X86 or X86_64: no alignment problems =================================== 1056 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ 1057 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1058 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1059 OP_CMPXCHG(TYPE, BITS, OP) \ 1060 } 1061 1062 #else 1063 // ------------------------------------------------------------------------ 1064 // Code for other architectures that don't handle unaligned accesses. 1065 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ 1066 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1067 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1068 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1069 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1070 } else { \ 1071 KMP_CHECK_GTID; \ 1072 OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \ 1073 } \ 1074 } 1075 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1076 1077 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0, 1078 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl 1079 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0, 1080 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl 1081 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1, 1082 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl 1083 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1, 1084 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl 1085 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3, 1086 0) // __kmpc_atomic_fixed4_andl 1087 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3, 1088 0) // __kmpc_atomic_fixed4_orl 1089 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7, 1090 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl 1091 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7, 1092 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl 1093 1094 /* ------------------------------------------------------------------------- */ 1095 /* Routines for Fortran operators that matched no one in C: */ 1096 /* MAX, MIN, .EQV., .NEQV. */ 1097 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */ 1098 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */ 1099 1100 // ------------------------------------------------------------------------- 1101 // MIN and MAX need separate macros 1102 // OP - operator to check if we need any actions? 1103 #define MIN_MAX_CRITSECT(OP, LCK_ID) \ 1104 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1105 \ 1106 if (*lhs OP rhs) { /* still need actions? */ \ 1107 *lhs = rhs; \ 1108 } \ 1109 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1110 1111 // ------------------------------------------------------------------------- 1112 #ifdef KMP_GOMP_COMPAT 1113 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \ 1114 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1115 KMP_CHECK_GTID; \ 1116 MIN_MAX_CRITSECT(OP, 0); \ 1117 return; \ 1118 } 1119 #else 1120 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) 1121 #endif /* KMP_GOMP_COMPAT */ 1122 1123 // ------------------------------------------------------------------------- 1124 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ 1125 { \ 1126 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1127 TYPE old_value; \ 1128 temp_val = *lhs; \ 1129 old_value = temp_val; \ 1130 while (old_value OP rhs && /* still need actions? */ \ 1131 !KMP_COMPARE_AND_STORE_ACQ##BITS( \ 1132 (kmp_int##BITS *)lhs, \ 1133 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 1134 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ 1135 temp_val = *lhs; \ 1136 old_value = temp_val; \ 1137 } \ 1138 } 1139 1140 // ------------------------------------------------------------------------- 1141 // 1-byte, 2-byte operands - use critical section 1142 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1143 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1144 if (*lhs OP rhs) { /* need actions? */ \ 1145 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1146 MIN_MAX_CRITSECT(OP, LCK_ID) \ 1147 } \ 1148 } 1149 1150 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1151 1152 // ------------------------------------------------------------------------- 1153 // X86 or X86_64: no alignment problems ==================================== 1154 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1155 GOMP_FLAG) \ 1156 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1157 if (*lhs OP rhs) { \ 1158 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1159 MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ 1160 } \ 1161 } 1162 1163 #else 1164 // ------------------------------------------------------------------------- 1165 // Code for other architectures that don't handle unaligned accesses. 1166 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1167 GOMP_FLAG) \ 1168 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1169 if (*lhs OP rhs) { \ 1170 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1171 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1172 MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1173 } else { \ 1174 KMP_CHECK_GTID; \ 1175 MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \ 1176 } \ 1177 } \ 1178 } 1179 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1180 1181 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0, 1182 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max 1183 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0, 1184 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min 1185 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1, 1186 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max 1187 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1, 1188 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min 1189 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3, 1190 0) // __kmpc_atomic_fixed4_max 1191 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3, 1192 0) // __kmpc_atomic_fixed4_min 1193 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7, 1194 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max 1195 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7, 1196 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min 1197 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3, 1198 KMP_ARCH_X86) // __kmpc_atomic_float4_max 1199 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3, 1200 KMP_ARCH_X86) // __kmpc_atomic_float4_min 1201 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7, 1202 KMP_ARCH_X86) // __kmpc_atomic_float8_max 1203 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7, 1204 KMP_ARCH_X86) // __kmpc_atomic_float8_min 1205 #if KMP_HAVE_QUAD 1206 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r, 1207 1) // __kmpc_atomic_float16_max 1208 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r, 1209 1) // __kmpc_atomic_float16_min 1210 #if (KMP_ARCH_X86) 1211 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r, 1212 1) // __kmpc_atomic_float16_max_a16 1213 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r, 1214 1) // __kmpc_atomic_float16_min_a16 1215 #endif // (KMP_ARCH_X86) 1216 #endif // KMP_HAVE_QUAD 1217 // ------------------------------------------------------------------------ 1218 // Need separate macros for .EQV. because of the need of complement (~) 1219 // OP ignored for critical sections, ^=~ used instead 1220 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1221 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1222 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \ 1223 OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */ \ 1224 } 1225 1226 // ------------------------------------------------------------------------ 1227 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1228 // ------------------------------------------------------------------------ 1229 // X86 or X86_64: no alignment problems =================================== 1230 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1231 GOMP_FLAG) \ 1232 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1233 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \ 1234 OP_CMPXCHG(TYPE, BITS, OP) \ 1235 } 1236 // ------------------------------------------------------------------------ 1237 #else 1238 // ------------------------------------------------------------------------ 1239 // Code for other architectures that don't handle unaligned accesses. 1240 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1241 GOMP_FLAG) \ 1242 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1243 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) \ 1244 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1245 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1246 } else { \ 1247 KMP_CHECK_GTID; \ 1248 OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */ \ 1249 } \ 1250 } 1251 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1252 1253 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0, 1254 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv 1255 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1, 1256 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv 1257 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3, 1258 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv 1259 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7, 1260 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv 1261 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, 1262 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv 1263 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, 1264 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv 1265 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, 1266 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv 1267 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, 1268 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv 1269 1270 // ------------------------------------------------------------------------ 1271 // Routines for Extended types: long double, _Quad, complex flavours (use 1272 // critical section) 1273 // TYPE_ID, OP_ID, TYPE - detailed above 1274 // OP - operator 1275 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1276 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1277 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1278 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 1279 OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \ 1280 } 1281 1282 /* ------------------------------------------------------------------------- */ 1283 // routines for long double type 1284 ATOMIC_CRITICAL(float10, add, long double, +, 10r, 1285 1) // __kmpc_atomic_float10_add 1286 ATOMIC_CRITICAL(float10, sub, long double, -, 10r, 1287 1) // __kmpc_atomic_float10_sub 1288 ATOMIC_CRITICAL(float10, mul, long double, *, 10r, 1289 1) // __kmpc_atomic_float10_mul 1290 ATOMIC_CRITICAL(float10, div, long double, /, 10r, 1291 1) // __kmpc_atomic_float10_div 1292 #if KMP_HAVE_QUAD 1293 // routines for _Quad type 1294 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r, 1295 1) // __kmpc_atomic_float16_add 1296 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r, 1297 1) // __kmpc_atomic_float16_sub 1298 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r, 1299 1) // __kmpc_atomic_float16_mul 1300 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r, 1301 1) // __kmpc_atomic_float16_div 1302 #if (KMP_ARCH_X86) 1303 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r, 1304 1) // __kmpc_atomic_float16_add_a16 1305 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r, 1306 1) // __kmpc_atomic_float16_sub_a16 1307 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r, 1308 1) // __kmpc_atomic_float16_mul_a16 1309 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r, 1310 1) // __kmpc_atomic_float16_div_a16 1311 #endif // (KMP_ARCH_X86) 1312 #endif // KMP_HAVE_QUAD 1313 // routines for complex types 1314 1315 #if USE_CMPXCHG_FIX 1316 // workaround for C78287 (complex(kind=4) data type) 1317 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1318 1) // __kmpc_atomic_cmplx4_add 1319 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1320 1) // __kmpc_atomic_cmplx4_sub 1321 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1322 1) // __kmpc_atomic_cmplx4_mul 1323 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1324 1) // __kmpc_atomic_cmplx4_div 1325 // end of the workaround for C78287 1326 #else 1327 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add 1328 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub 1329 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul 1330 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div 1331 #endif // USE_CMPXCHG_FIX 1332 1333 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add 1334 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub 1335 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul 1336 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div 1337 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c, 1338 1) // __kmpc_atomic_cmplx10_add 1339 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c, 1340 1) // __kmpc_atomic_cmplx10_sub 1341 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c, 1342 1) // __kmpc_atomic_cmplx10_mul 1343 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c, 1344 1) // __kmpc_atomic_cmplx10_div 1345 #if KMP_HAVE_QUAD 1346 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c, 1347 1) // __kmpc_atomic_cmplx16_add 1348 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c, 1349 1) // __kmpc_atomic_cmplx16_sub 1350 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c, 1351 1) // __kmpc_atomic_cmplx16_mul 1352 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c, 1353 1) // __kmpc_atomic_cmplx16_div 1354 #if (KMP_ARCH_X86) 1355 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1356 1) // __kmpc_atomic_cmplx16_add_a16 1357 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1358 1) // __kmpc_atomic_cmplx16_sub_a16 1359 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1360 1) // __kmpc_atomic_cmplx16_mul_a16 1361 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1362 1) // __kmpc_atomic_cmplx16_div_a16 1363 #endif // (KMP_ARCH_X86) 1364 #endif // KMP_HAVE_QUAD 1365 1366 // OpenMP 4.0: x = expr binop x for non-commutative operations. 1367 // Supported only on IA-32 architecture and Intel(R) 64 1368 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1369 1370 // ------------------------------------------------------------------------ 1371 // Operation on *lhs, rhs bound by critical section 1372 // OP - operator (it's supposed to contain an assignment) 1373 // LCK_ID - lock identifier 1374 // Note: don't check gtid as it should always be valid 1375 // 1, 2-byte - expect valid parameter, other - check before this macro 1376 #define OP_CRITICAL_REV(TYPE, OP, LCK_ID) \ 1377 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1378 \ 1379 (*lhs) = (TYPE)((rhs)OP(*lhs)); \ 1380 \ 1381 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1382 1383 #ifdef KMP_GOMP_COMPAT 1384 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) \ 1385 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1386 KMP_CHECK_GTID; \ 1387 OP_CRITICAL_REV(TYPE, OP, 0); \ 1388 return; \ 1389 } 1390 1391 #else 1392 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) 1393 #endif /* KMP_GOMP_COMPAT */ 1394 1395 // Beginning of a definition (provides name, parameters, gebug trace) 1396 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 1397 // fixed) 1398 // OP_ID - operation identifier (add, sub, mul, ...) 1399 // TYPE - operands' type 1400 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 1401 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \ 1402 TYPE *lhs, TYPE rhs) { \ 1403 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1404 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid)); 1405 1406 // ------------------------------------------------------------------------ 1407 // Operation on *lhs, rhs using "compare_and_store" routine 1408 // TYPE - operands' type 1409 // BITS - size in bits, used to distinguish low level calls 1410 // OP - operator 1411 // Note: temp_val introduced in order to force the compiler to read 1412 // *lhs only once (w/o it the compiler reads *lhs twice) 1413 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1414 { \ 1415 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1416 TYPE old_value, new_value; \ 1417 temp_val = *lhs; \ 1418 old_value = temp_val; \ 1419 new_value = (TYPE)(rhs OP old_value); \ 1420 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 1421 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 1422 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 1423 KMP_DO_PAUSE; \ 1424 \ 1425 temp_val = *lhs; \ 1426 old_value = temp_val; \ 1427 new_value = (TYPE)(rhs OP old_value); \ 1428 } \ 1429 } 1430 1431 // ------------------------------------------------------------------------- 1432 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \ 1433 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ 1434 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ 1435 OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1436 } 1437 1438 // ------------------------------------------------------------------------ 1439 // Entries definition for integer operands 1440 // TYPE_ID - operands type and size (fixed4, float4) 1441 // OP_ID - operation identifier (add, sub, mul, ...) 1442 // TYPE - operand type 1443 // BITS - size in bits, used to distinguish low level calls 1444 // OP - operator (used in critical section) 1445 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1446 1447 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG 1448 // ------------------------------------------------------------------------ 1449 // Routines for ATOMIC integer operands, other operators 1450 // ------------------------------------------------------------------------ 1451 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG 1452 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i, 1453 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev 1454 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i, 1455 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev 1456 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i, 1457 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev 1458 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i, 1459 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev 1460 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i, 1461 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev 1462 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i, 1463 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev 1464 1465 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i, 1466 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev 1467 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i, 1468 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev 1469 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i, 1470 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev 1471 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i, 1472 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev 1473 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1474 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev 1475 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i, 1476 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev 1477 1478 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i, 1479 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev 1480 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i, 1481 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev 1482 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i, 1483 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev 1484 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i, 1485 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev 1486 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i, 1487 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev 1488 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i, 1489 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev 1490 1491 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i, 1492 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev 1493 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i, 1494 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev 1495 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i, 1496 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev 1497 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i, 1498 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev 1499 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i, 1500 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev 1501 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i, 1502 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev 1503 1504 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r, 1505 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev 1506 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r, 1507 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev 1508 1509 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r, 1510 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev 1511 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r, 1512 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev 1513 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG 1514 1515 // ------------------------------------------------------------------------ 1516 // Routines for Extended types: long double, _Quad, complex flavours (use 1517 // critical section) 1518 // TYPE_ID, OP_ID, TYPE - detailed above 1519 // OP - operator 1520 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1521 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1522 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ 1523 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ 1524 OP_CRITICAL_REV(TYPE, OP, LCK_ID) \ 1525 } 1526 1527 /* ------------------------------------------------------------------------- */ 1528 // routines for long double type 1529 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r, 1530 1) // __kmpc_atomic_float10_sub_rev 1531 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r, 1532 1) // __kmpc_atomic_float10_div_rev 1533 #if KMP_HAVE_QUAD 1534 // routines for _Quad type 1535 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r, 1536 1) // __kmpc_atomic_float16_sub_rev 1537 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r, 1538 1) // __kmpc_atomic_float16_div_rev 1539 #if (KMP_ARCH_X86) 1540 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r, 1541 1) // __kmpc_atomic_float16_sub_a16_rev 1542 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r, 1543 1) // __kmpc_atomic_float16_div_a16_rev 1544 #endif // KMP_ARCH_X86 1545 #endif // KMP_HAVE_QUAD 1546 1547 // routines for complex types 1548 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c, 1549 1) // __kmpc_atomic_cmplx4_sub_rev 1550 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c, 1551 1) // __kmpc_atomic_cmplx4_div_rev 1552 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c, 1553 1) // __kmpc_atomic_cmplx8_sub_rev 1554 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c, 1555 1) // __kmpc_atomic_cmplx8_div_rev 1556 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c, 1557 1) // __kmpc_atomic_cmplx10_sub_rev 1558 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c, 1559 1) // __kmpc_atomic_cmplx10_div_rev 1560 #if KMP_HAVE_QUAD 1561 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c, 1562 1) // __kmpc_atomic_cmplx16_sub_rev 1563 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c, 1564 1) // __kmpc_atomic_cmplx16_div_rev 1565 #if (KMP_ARCH_X86) 1566 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1567 1) // __kmpc_atomic_cmplx16_sub_a16_rev 1568 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1569 1) // __kmpc_atomic_cmplx16_div_a16_rev 1570 #endif // KMP_ARCH_X86 1571 #endif // KMP_HAVE_QUAD 1572 1573 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 1574 // End of OpenMP 4.0: x = expr binop x for non-commutative operations. 1575 1576 /* ------------------------------------------------------------------------ */ 1577 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */ 1578 /* Note: in order to reduce the total number of types combinations */ 1579 /* it is supposed that compiler converts RHS to longest floating type,*/ 1580 /* that is _Quad, before call to any of these routines */ 1581 /* Conversion to _Quad will be done by the compiler during calculation, */ 1582 /* conversion back to TYPE - before the assignment, like: */ 1583 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */ 1584 /* Performance penalty expected because of SW emulation use */ 1585 /* ------------------------------------------------------------------------ */ 1586 1587 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1588 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ 1589 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \ 1590 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1591 KA_TRACE(100, \ 1592 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ 1593 gtid)); 1594 1595 // ------------------------------------------------------------------------- 1596 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \ 1597 GOMP_FLAG) \ 1598 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1599 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 1600 OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \ 1601 } 1602 1603 // ------------------------------------------------------------------------- 1604 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1605 // ------------------------------------------------------------------------- 1606 // X86 or X86_64: no alignment problems ==================================== 1607 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1608 LCK_ID, MASK, GOMP_FLAG) \ 1609 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1610 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1611 OP_CMPXCHG(TYPE, BITS, OP) \ 1612 } 1613 // ------------------------------------------------------------------------- 1614 #else 1615 // ------------------------------------------------------------------------ 1616 // Code for other architectures that don't handle unaligned accesses. 1617 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1618 LCK_ID, MASK, GOMP_FLAG) \ 1619 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1620 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1621 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1622 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1623 } else { \ 1624 KMP_CHECK_GTID; \ 1625 OP_UPDATE_CRITICAL(TYPE, OP, \ 1626 LCK_ID) /* unaligned address - use critical */ \ 1627 } \ 1628 } 1629 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1630 1631 // ------------------------------------------------------------------------- 1632 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1633 // ------------------------------------------------------------------------- 1634 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 1635 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 1636 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1637 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ 1638 OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1639 } 1640 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 1641 LCK_ID, GOMP_FLAG) \ 1642 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1643 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ 1644 OP_CRITICAL_REV(TYPE, OP, LCK_ID) \ 1645 } 1646 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1647 1648 // RHS=float8 1649 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, 1650 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8 1651 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, 1652 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8 1653 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, 1654 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8 1655 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, 1656 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8 1657 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 1658 0) // __kmpc_atomic_fixed4_mul_float8 1659 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 1660 0) // __kmpc_atomic_fixed4_div_float8 1661 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, 1662 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8 1663 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, 1664 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8 1665 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, 1666 KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8 1667 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, 1668 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8 1669 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, 1670 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8 1671 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, 1672 KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8 1673 1674 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not 1675 // use them) 1676 #if KMP_HAVE_QUAD 1677 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0, 1678 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp 1679 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0, 1680 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp 1681 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, 1682 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp 1683 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0, 1684 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp 1685 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, 1686 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp 1687 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0, 1688 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp 1689 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0, 1690 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp 1691 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, 1692 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp 1693 1694 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1, 1695 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp 1696 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1, 1697 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp 1698 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, 1699 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp 1700 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1, 1701 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp 1702 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, 1703 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp 1704 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1, 1705 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp 1706 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1, 1707 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp 1708 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, 1709 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp 1710 1711 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 1712 0) // __kmpc_atomic_fixed4_add_fp 1713 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3, 1714 0) // __kmpc_atomic_fixed4u_add_fp 1715 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 1716 0) // __kmpc_atomic_fixed4_sub_fp 1717 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3, 1718 0) // __kmpc_atomic_fixed4u_sub_fp 1719 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 1720 0) // __kmpc_atomic_fixed4_mul_fp 1721 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3, 1722 0) // __kmpc_atomic_fixed4u_mul_fp 1723 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 1724 0) // __kmpc_atomic_fixed4_div_fp 1725 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 1726 0) // __kmpc_atomic_fixed4u_div_fp 1727 1728 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, 1729 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp 1730 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7, 1731 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp 1732 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, 1733 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp 1734 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7, 1735 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp 1736 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, 1737 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp 1738 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7, 1739 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp 1740 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, 1741 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp 1742 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, 1743 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp 1744 1745 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, 1746 KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp 1747 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, 1748 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp 1749 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, 1750 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp 1751 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, 1752 KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp 1753 1754 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, 1755 KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp 1756 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, 1757 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp 1758 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, 1759 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp 1760 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, 1761 KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp 1762 1763 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r, 1764 1) // __kmpc_atomic_float10_add_fp 1765 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r, 1766 1) // __kmpc_atomic_float10_sub_fp 1767 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r, 1768 1) // __kmpc_atomic_float10_mul_fp 1769 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r, 1770 1) // __kmpc_atomic_float10_div_fp 1771 1772 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1773 // Reverse operations 1774 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0, 1775 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp 1776 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0, 1777 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp 1778 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0, 1779 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp 1780 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0, 1781 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp 1782 1783 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1, 1784 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp 1785 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1, 1786 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp 1787 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1, 1788 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp 1789 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1, 1790 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp 1791 1792 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3, 1793 0) // __kmpc_atomic_fixed4_sub_rev_fp 1794 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3, 1795 0) // __kmpc_atomic_fixed4u_sub_rev_fp 1796 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3, 1797 0) // __kmpc_atomic_fixed4_div_rev_fp 1798 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3, 1799 0) // __kmpc_atomic_fixed4u_div_rev_fp 1800 1801 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7, 1802 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp 1803 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7, 1804 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp 1805 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7, 1806 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp 1807 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7, 1808 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp 1809 1810 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3, 1811 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp 1812 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3, 1813 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp 1814 1815 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7, 1816 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp 1817 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7, 1818 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp 1819 1820 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r, 1821 1) // __kmpc_atomic_float10_sub_rev_fp 1822 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r, 1823 1) // __kmpc_atomic_float10_div_rev_fp 1824 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1825 1826 #endif // KMP_HAVE_QUAD 1827 1828 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1829 // ------------------------------------------------------------------------ 1830 // X86 or X86_64: no alignment problems ==================================== 1831 #if USE_CMPXCHG_FIX 1832 // workaround for C78287 (complex(kind=4) data type) 1833 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1834 LCK_ID, MASK, GOMP_FLAG) \ 1835 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1836 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1837 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 1838 } 1839 // end of the second part of the workaround for C78287 1840 #else 1841 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1842 LCK_ID, MASK, GOMP_FLAG) \ 1843 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1844 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1845 OP_CMPXCHG(TYPE, BITS, OP) \ 1846 } 1847 #endif // USE_CMPXCHG_FIX 1848 #else 1849 // ------------------------------------------------------------------------ 1850 // Code for other architectures that don't handle unaligned accesses. 1851 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1852 LCK_ID, MASK, GOMP_FLAG) \ 1853 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1854 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1855 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1856 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1857 } else { \ 1858 KMP_CHECK_GTID; \ 1859 OP_UPDATE_CRITICAL(TYPE, OP, \ 1860 LCK_ID) /* unaligned address - use critical */ \ 1861 } \ 1862 } 1863 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1864 1865 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 1866 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8 1867 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 1868 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8 1869 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 1870 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8 1871 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 1872 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8 1873 1874 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 1875 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1876 1877 // ------------------------------------------------------------------------ 1878 // Atomic READ routines 1879 1880 // ------------------------------------------------------------------------ 1881 // Beginning of a definition (provides name, parameters, gebug trace) 1882 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 1883 // fixed) 1884 // OP_ID - operation identifier (add, sub, mul, ...) 1885 // TYPE - operands' type 1886 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 1887 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 1888 TYPE *loc) { \ 1889 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1890 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 1891 1892 // ------------------------------------------------------------------------ 1893 // Operation on *lhs, rhs using "compare_and_store_ret" routine 1894 // TYPE - operands' type 1895 // BITS - size in bits, used to distinguish low level calls 1896 // OP - operator 1897 // Note: temp_val introduced in order to force the compiler to read 1898 // *lhs only once (w/o it the compiler reads *lhs twice) 1899 // TODO: check if it is still necessary 1900 // Return old value regardless of the result of "compare & swap# operation 1901 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \ 1902 { \ 1903 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1904 union f_i_union { \ 1905 TYPE f_val; \ 1906 kmp_int##BITS i_val; \ 1907 }; \ 1908 union f_i_union old_value; \ 1909 temp_val = *loc; \ 1910 old_value.f_val = temp_val; \ 1911 old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \ 1912 (kmp_int##BITS *)loc, \ 1913 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \ 1914 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \ 1915 new_value = old_value.f_val; \ 1916 return new_value; \ 1917 } 1918 1919 // ------------------------------------------------------------------------- 1920 // Operation on *lhs, rhs bound by critical section 1921 // OP - operator (it's supposed to contain an assignment) 1922 // LCK_ID - lock identifier 1923 // Note: don't check gtid as it should always be valid 1924 // 1, 2-byte - expect valid parameter, other - check before this macro 1925 #define OP_CRITICAL_READ(OP, LCK_ID) \ 1926 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1927 \ 1928 new_value = (*loc); \ 1929 \ 1930 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1931 1932 // ------------------------------------------------------------------------- 1933 #ifdef KMP_GOMP_COMPAT 1934 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \ 1935 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1936 KMP_CHECK_GTID; \ 1937 OP_CRITICAL_READ(OP, 0); \ 1938 return new_value; \ 1939 } 1940 #else 1941 #define OP_GOMP_CRITICAL_READ(OP, FLAG) 1942 #endif /* KMP_GOMP_COMPAT */ 1943 1944 // ------------------------------------------------------------------------- 1945 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 1946 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1947 TYPE new_value; \ 1948 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ 1949 new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \ 1950 return new_value; \ 1951 } 1952 // ------------------------------------------------------------------------- 1953 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 1954 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1955 TYPE new_value; \ 1956 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ 1957 OP_CMPXCHG_READ(TYPE, BITS, OP) \ 1958 } 1959 // ------------------------------------------------------------------------ 1960 // Routines for Extended types: long double, _Quad, complex flavours (use 1961 // critical section) 1962 // TYPE_ID, OP_ID, TYPE - detailed above 1963 // OP - operator 1964 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1965 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1966 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1967 TYPE new_value; \ 1968 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \ 1969 OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \ 1970 return new_value; \ 1971 } 1972 1973 // ------------------------------------------------------------------------ 1974 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return 1975 // value doesn't work. 1976 // Let's return the read value through the additional parameter. 1977 #if (KMP_OS_WINDOWS) 1978 1979 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \ 1980 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1981 \ 1982 (*out) = (*loc); \ 1983 \ 1984 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1985 // ------------------------------------------------------------------------ 1986 #ifdef KMP_GOMP_COMPAT 1987 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \ 1988 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1989 KMP_CHECK_GTID; \ 1990 OP_CRITICAL_READ_WRK(OP, 0); \ 1991 } 1992 #else 1993 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) 1994 #endif /* KMP_GOMP_COMPAT */ 1995 // ------------------------------------------------------------------------ 1996 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ 1997 void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \ 1998 TYPE *loc) { \ 1999 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2000 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2001 2002 // ------------------------------------------------------------------------ 2003 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2004 ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ 2005 OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \ 2006 OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \ 2007 } 2008 2009 #endif // KMP_OS_WINDOWS 2010 2011 // ------------------------------------------------------------------------ 2012 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2013 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd 2014 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +, 2015 KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd 2016 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +, 2017 KMP_ARCH_X86) // __kmpc_atomic_float4_rd 2018 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +, 2019 KMP_ARCH_X86) // __kmpc_atomic_float8_rd 2020 2021 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic 2022 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +, 2023 KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd 2024 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +, 2025 KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd 2026 2027 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r, 2028 1) // __kmpc_atomic_float10_rd 2029 #if KMP_HAVE_QUAD 2030 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r, 2031 1) // __kmpc_atomic_float16_rd 2032 #endif // KMP_HAVE_QUAD 2033 2034 // Fix for CQ220361 on Windows* OS 2035 #if (KMP_OS_WINDOWS) 2036 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c, 2037 1) // __kmpc_atomic_cmplx4_rd 2038 #else 2039 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c, 2040 1) // __kmpc_atomic_cmplx4_rd 2041 #endif // (KMP_OS_WINDOWS) 2042 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c, 2043 1) // __kmpc_atomic_cmplx8_rd 2044 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c, 2045 1) // __kmpc_atomic_cmplx10_rd 2046 #if KMP_HAVE_QUAD 2047 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c, 2048 1) // __kmpc_atomic_cmplx16_rd 2049 #if (KMP_ARCH_X86) 2050 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r, 2051 1) // __kmpc_atomic_float16_a16_rd 2052 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 2053 1) // __kmpc_atomic_cmplx16_a16_rd 2054 #endif // (KMP_ARCH_X86) 2055 #endif // KMP_HAVE_QUAD 2056 2057 // ------------------------------------------------------------------------ 2058 // Atomic WRITE routines 2059 2060 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2061 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2062 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2063 KMP_XCHG_FIXED##BITS(lhs, rhs); \ 2064 } 2065 // ------------------------------------------------------------------------ 2066 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2067 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2068 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2069 KMP_XCHG_REAL##BITS(lhs, rhs); \ 2070 } 2071 2072 // ------------------------------------------------------------------------ 2073 // Operation on *lhs, rhs using "compare_and_store" routine 2074 // TYPE - operands' type 2075 // BITS - size in bits, used to distinguish low level calls 2076 // OP - operator 2077 // Note: temp_val introduced in order to force the compiler to read 2078 // *lhs only once (w/o it the compiler reads *lhs twice) 2079 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \ 2080 { \ 2081 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2082 TYPE old_value, new_value; \ 2083 temp_val = *lhs; \ 2084 old_value = temp_val; \ 2085 new_value = rhs; \ 2086 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2087 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2088 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2089 temp_val = *lhs; \ 2090 old_value = temp_val; \ 2091 new_value = rhs; \ 2092 } \ 2093 } 2094 2095 // ------------------------------------------------------------------------- 2096 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2097 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2098 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2099 OP_CMPXCHG_WR(TYPE, BITS, OP) \ 2100 } 2101 2102 // ------------------------------------------------------------------------ 2103 // Routines for Extended types: long double, _Quad, complex flavours (use 2104 // critical section) 2105 // TYPE_ID, OP_ID, TYPE - detailed above 2106 // OP - operator 2107 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2108 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2109 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2110 OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \ 2111 OP_CRITICAL(OP, LCK_ID) /* send assignment */ \ 2112 } 2113 // ------------------------------------------------------------------------- 2114 2115 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =, 2116 KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr 2117 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =, 2118 KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr 2119 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =, 2120 KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr 2121 #if (KMP_ARCH_X86) 2122 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =, 2123 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr 2124 #else 2125 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =, 2126 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr 2127 #endif // (KMP_ARCH_X86) 2128 2129 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =, 2130 KMP_ARCH_X86) // __kmpc_atomic_float4_wr 2131 #if (KMP_ARCH_X86) 2132 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =, 2133 KMP_ARCH_X86) // __kmpc_atomic_float8_wr 2134 #else 2135 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =, 2136 KMP_ARCH_X86) // __kmpc_atomic_float8_wr 2137 #endif // (KMP_ARCH_X86) 2138 2139 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r, 2140 1) // __kmpc_atomic_float10_wr 2141 #if KMP_HAVE_QUAD 2142 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r, 2143 1) // __kmpc_atomic_float16_wr 2144 #endif // KMP_HAVE_QUAD 2145 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr 2146 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c, 2147 1) // __kmpc_atomic_cmplx8_wr 2148 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c, 2149 1) // __kmpc_atomic_cmplx10_wr 2150 #if KMP_HAVE_QUAD 2151 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c, 2152 1) // __kmpc_atomic_cmplx16_wr 2153 #if (KMP_ARCH_X86) 2154 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r, 2155 1) // __kmpc_atomic_float16_a16_wr 2156 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 2157 1) // __kmpc_atomic_cmplx16_a16_wr 2158 #endif // (KMP_ARCH_X86) 2159 #endif // KMP_HAVE_QUAD 2160 2161 // ------------------------------------------------------------------------ 2162 // Atomic CAPTURE routines 2163 2164 // Beginning of a definition (provides name, parameters, gebug trace) 2165 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 2166 // fixed) 2167 // OP_ID - operation identifier (add, sub, mul, ...) 2168 // TYPE - operands' type 2169 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 2170 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 2171 TYPE *lhs, TYPE rhs, int flag) { \ 2172 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2173 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2174 2175 // ------------------------------------------------------------------------- 2176 // Operation on *lhs, rhs bound by critical section 2177 // OP - operator (it's supposed to contain an assignment) 2178 // LCK_ID - lock identifier 2179 // Note: don't check gtid as it should always be valid 2180 // 1, 2-byte - expect valid parameter, other - check before this macro 2181 #define OP_CRITICAL_CPT(OP, LCK_ID) \ 2182 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2183 \ 2184 if (flag) { \ 2185 (*lhs) OP rhs; \ 2186 new_value = (*lhs); \ 2187 } else { \ 2188 new_value = (*lhs); \ 2189 (*lhs) OP rhs; \ 2190 } \ 2191 \ 2192 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2193 return new_value; 2194 2195 #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) \ 2196 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2197 \ 2198 if (flag) { \ 2199 (*lhs) = (TYPE)((*lhs)OP rhs); \ 2200 new_value = (*lhs); \ 2201 } else { \ 2202 new_value = (*lhs); \ 2203 (*lhs) = (TYPE)((*lhs)OP rhs); \ 2204 } \ 2205 \ 2206 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2207 return new_value; 2208 2209 // ------------------------------------------------------------------------ 2210 #ifdef KMP_GOMP_COMPAT 2211 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) \ 2212 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2213 KMP_CHECK_GTID; \ 2214 OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0); \ 2215 } 2216 #else 2217 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) 2218 #endif /* KMP_GOMP_COMPAT */ 2219 2220 // ------------------------------------------------------------------------ 2221 // Operation on *lhs, rhs using "compare_and_store" routine 2222 // TYPE - operands' type 2223 // BITS - size in bits, used to distinguish low level calls 2224 // OP - operator 2225 // Note: temp_val introduced in order to force the compiler to read 2226 // *lhs only once (w/o it the compiler reads *lhs twice) 2227 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2228 { \ 2229 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2230 TYPE old_value, new_value; \ 2231 temp_val = *lhs; \ 2232 old_value = temp_val; \ 2233 new_value = (TYPE)(old_value OP rhs); \ 2234 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2235 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2236 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2237 temp_val = *lhs; \ 2238 old_value = temp_val; \ 2239 new_value = (TYPE)(old_value OP rhs); \ 2240 } \ 2241 if (flag) { \ 2242 return new_value; \ 2243 } else \ 2244 return old_value; \ 2245 } 2246 2247 // ------------------------------------------------------------------------- 2248 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2249 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2250 TYPE new_value; \ 2251 (void)new_value; \ 2252 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \ 2253 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2254 } 2255 2256 // ------------------------------------------------------------------------- 2257 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2258 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2259 TYPE old_value, new_value; \ 2260 (void)new_value; \ 2261 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \ 2262 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 2263 old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 2264 if (flag) { \ 2265 return old_value OP rhs; \ 2266 } else \ 2267 return old_value; \ 2268 } 2269 // ------------------------------------------------------------------------- 2270 2271 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +, 2272 0) // __kmpc_atomic_fixed4_add_cpt 2273 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -, 2274 0) // __kmpc_atomic_fixed4_sub_cpt 2275 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +, 2276 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt 2277 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -, 2278 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt 2279 2280 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +, 2281 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt 2282 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -, 2283 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt 2284 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +, 2285 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt 2286 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -, 2287 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt 2288 2289 // ------------------------------------------------------------------------ 2290 // Entries definition for integer operands 2291 // TYPE_ID - operands type and size (fixed4, float4) 2292 // OP_ID - operation identifier (add, sub, mul, ...) 2293 // TYPE - operand type 2294 // BITS - size in bits, used to distinguish low level calls 2295 // OP - operator (used in critical section) 2296 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG 2297 // ------------------------------------------------------------------------ 2298 // Routines for ATOMIC integer operands, other operators 2299 // ------------------------------------------------------------------------ 2300 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2301 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +, 2302 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt 2303 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &, 2304 0) // __kmpc_atomic_fixed1_andb_cpt 2305 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /, 2306 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt 2307 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /, 2308 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt 2309 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *, 2310 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt 2311 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |, 2312 0) // __kmpc_atomic_fixed1_orb_cpt 2313 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<, 2314 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt 2315 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>, 2316 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt 2317 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>, 2318 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt 2319 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -, 2320 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt 2321 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^, 2322 0) // __kmpc_atomic_fixed1_xor_cpt 2323 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +, 2324 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt 2325 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &, 2326 0) // __kmpc_atomic_fixed2_andb_cpt 2327 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /, 2328 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt 2329 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /, 2330 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt 2331 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *, 2332 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt 2333 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |, 2334 0) // __kmpc_atomic_fixed2_orb_cpt 2335 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<, 2336 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt 2337 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>, 2338 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt 2339 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>, 2340 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt 2341 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -, 2342 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt 2343 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^, 2344 0) // __kmpc_atomic_fixed2_xor_cpt 2345 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &, 2346 0) // __kmpc_atomic_fixed4_andb_cpt 2347 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /, 2348 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt 2349 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /, 2350 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt 2351 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *, 2352 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt 2353 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |, 2354 0) // __kmpc_atomic_fixed4_orb_cpt 2355 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<, 2356 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt 2357 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>, 2358 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt 2359 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>, 2360 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt 2361 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^, 2362 0) // __kmpc_atomic_fixed4_xor_cpt 2363 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &, 2364 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt 2365 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /, 2366 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt 2367 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /, 2368 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt 2369 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *, 2370 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt 2371 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |, 2372 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt 2373 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<, 2374 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt 2375 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>, 2376 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt 2377 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>, 2378 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt 2379 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^, 2380 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt 2381 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /, 2382 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt 2383 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *, 2384 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt 2385 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /, 2386 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt 2387 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *, 2388 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt 2389 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2390 2391 // CAPTURE routines for mixed types RHS=float16 2392 #if KMP_HAVE_QUAD 2393 2394 // Beginning of a definition (provides name, parameters, gebug trace) 2395 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 2396 // fixed) 2397 // OP_ID - operation identifier (add, sub, mul, ...) 2398 // TYPE - operands' type 2399 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2400 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ 2401 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \ 2402 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2403 KA_TRACE(100, \ 2404 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ 2405 gtid)); 2406 2407 // ------------------------------------------------------------------------- 2408 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 2409 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 2410 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2411 TYPE new_value; \ 2412 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \ 2413 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2414 } 2415 2416 // ------------------------------------------------------------------------- 2417 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 2418 LCK_ID, GOMP_FLAG) \ 2419 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2420 TYPE new_value; \ 2421 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 2422 OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \ 2423 } 2424 2425 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0, 2426 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp 2427 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0, 2428 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp 2429 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0, 2430 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp 2431 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0, 2432 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp 2433 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0, 2434 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp 2435 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0, 2436 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp 2437 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0, 2438 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp 2439 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0, 2440 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp 2441 2442 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1, 2443 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp 2444 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1, 2445 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp 2446 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1, 2447 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp 2448 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1, 2449 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp 2450 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1, 2451 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp 2452 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1, 2453 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp 2454 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1, 2455 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp 2456 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1, 2457 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp 2458 2459 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3, 2460 0) // __kmpc_atomic_fixed4_add_cpt_fp 2461 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3, 2462 0) // __kmpc_atomic_fixed4u_add_cpt_fp 2463 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 2464 0) // __kmpc_atomic_fixed4_sub_cpt_fp 2465 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 2466 0) // __kmpc_atomic_fixed4u_sub_cpt_fp 2467 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 2468 0) // __kmpc_atomic_fixed4_mul_cpt_fp 2469 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 2470 0) // __kmpc_atomic_fixed4u_mul_cpt_fp 2471 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3, 2472 0) // __kmpc_atomic_fixed4_div_cpt_fp 2473 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3, 2474 0) // __kmpc_atomic_fixed4u_div_cpt_fp 2475 2476 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7, 2477 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp 2478 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7, 2479 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp 2480 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7, 2481 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp 2482 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7, 2483 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp 2484 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7, 2485 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp 2486 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7, 2487 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp 2488 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7, 2489 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp 2490 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7, 2491 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp 2492 2493 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3, 2494 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp 2495 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3, 2496 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp 2497 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3, 2498 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp 2499 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3, 2500 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp 2501 2502 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7, 2503 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp 2504 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7, 2505 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp 2506 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7, 2507 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp 2508 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7, 2509 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp 2510 2511 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r, 2512 1) // __kmpc_atomic_float10_add_cpt_fp 2513 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r, 2514 1) // __kmpc_atomic_float10_sub_cpt_fp 2515 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r, 2516 1) // __kmpc_atomic_float10_mul_cpt_fp 2517 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r, 2518 1) // __kmpc_atomic_float10_div_cpt_fp 2519 2520 #endif // KMP_HAVE_QUAD 2521 2522 // ------------------------------------------------------------------------ 2523 // Routines for C/C++ Reduction operators && and || 2524 2525 // ------------------------------------------------------------------------- 2526 // Operation on *lhs, rhs bound by critical section 2527 // OP - operator (it's supposed to contain an assignment) 2528 // LCK_ID - lock identifier 2529 // Note: don't check gtid as it should always be valid 2530 // 1, 2-byte - expect valid parameter, other - check before this macro 2531 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \ 2532 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2533 \ 2534 if (flag) { \ 2535 new_value OP rhs; \ 2536 (*lhs) = new_value; \ 2537 } else { \ 2538 new_value = (*lhs); \ 2539 (*lhs) OP rhs; \ 2540 } \ 2541 \ 2542 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 2543 2544 // ------------------------------------------------------------------------ 2545 #ifdef KMP_GOMP_COMPAT 2546 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \ 2547 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2548 KMP_CHECK_GTID; \ 2549 OP_CRITICAL_L_CPT(OP, 0); \ 2550 return new_value; \ 2551 } 2552 #else 2553 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) 2554 #endif /* KMP_GOMP_COMPAT */ 2555 2556 // ------------------------------------------------------------------------ 2557 // Need separate macros for &&, || because there is no combined assignment 2558 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2559 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2560 TYPE new_value; \ 2561 (void)new_value; \ 2562 OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \ 2563 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2564 } 2565 2566 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&, 2567 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt 2568 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||, 2569 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt 2570 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&, 2571 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt 2572 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||, 2573 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt 2574 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&, 2575 0) // __kmpc_atomic_fixed4_andl_cpt 2576 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||, 2577 0) // __kmpc_atomic_fixed4_orl_cpt 2578 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&, 2579 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt 2580 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||, 2581 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt 2582 2583 // ------------------------------------------------------------------------- 2584 // Routines for Fortran operators that matched no one in C: 2585 // MAX, MIN, .EQV., .NEQV. 2586 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt 2587 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt 2588 2589 // ------------------------------------------------------------------------- 2590 // MIN and MAX need separate macros 2591 // OP - operator to check if we need any actions? 2592 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ 2593 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2594 \ 2595 if (*lhs OP rhs) { /* still need actions? */ \ 2596 old_value = *lhs; \ 2597 *lhs = rhs; \ 2598 if (flag) \ 2599 new_value = rhs; \ 2600 else \ 2601 new_value = old_value; \ 2602 } else { \ 2603 new_value = *lhs; \ 2604 } \ 2605 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2606 return new_value; 2607 2608 // ------------------------------------------------------------------------- 2609 #ifdef KMP_GOMP_COMPAT 2610 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \ 2611 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2612 KMP_CHECK_GTID; \ 2613 MIN_MAX_CRITSECT_CPT(OP, 0); \ 2614 } 2615 #else 2616 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) 2617 #endif /* KMP_GOMP_COMPAT */ 2618 2619 // ------------------------------------------------------------------------- 2620 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ 2621 { \ 2622 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2623 /*TYPE old_value; */ \ 2624 temp_val = *lhs; \ 2625 old_value = temp_val; \ 2626 while (old_value OP rhs && /* still need actions? */ \ 2627 !KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2628 (kmp_int##BITS *)lhs, \ 2629 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2630 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ 2631 temp_val = *lhs; \ 2632 old_value = temp_val; \ 2633 } \ 2634 if (flag) \ 2635 return rhs; \ 2636 else \ 2637 return old_value; \ 2638 } 2639 2640 // ------------------------------------------------------------------------- 2641 // 1-byte, 2-byte operands - use critical section 2642 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2643 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2644 TYPE new_value, old_value; \ 2645 if (*lhs OP rhs) { /* need actions? */ \ 2646 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ 2647 MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ 2648 } \ 2649 return *lhs; \ 2650 } 2651 2652 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2653 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2654 TYPE new_value, old_value; \ 2655 (void)new_value; \ 2656 if (*lhs OP rhs) { \ 2657 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ 2658 MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ 2659 } \ 2660 return *lhs; \ 2661 } 2662 2663 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <, 2664 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt 2665 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >, 2666 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt 2667 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <, 2668 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt 2669 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >, 2670 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt 2671 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <, 2672 0) // __kmpc_atomic_fixed4_max_cpt 2673 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >, 2674 0) // __kmpc_atomic_fixed4_min_cpt 2675 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <, 2676 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt 2677 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >, 2678 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt 2679 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <, 2680 KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt 2681 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >, 2682 KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt 2683 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <, 2684 KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt 2685 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >, 2686 KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt 2687 #if KMP_HAVE_QUAD 2688 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r, 2689 1) // __kmpc_atomic_float16_max_cpt 2690 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r, 2691 1) // __kmpc_atomic_float16_min_cpt 2692 #if (KMP_ARCH_X86) 2693 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r, 2694 1) // __kmpc_atomic_float16_max_a16_cpt 2695 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r, 2696 1) // __kmpc_atomic_float16_mix_a16_cpt 2697 #endif // (KMP_ARCH_X86) 2698 #endif // KMP_HAVE_QUAD 2699 2700 // ------------------------------------------------------------------------ 2701 #ifdef KMP_GOMP_COMPAT 2702 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \ 2703 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2704 KMP_CHECK_GTID; \ 2705 OP_CRITICAL_CPT(OP, 0); \ 2706 } 2707 #else 2708 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) 2709 #endif /* KMP_GOMP_COMPAT */ 2710 // ------------------------------------------------------------------------ 2711 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2712 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2713 TYPE new_value; \ 2714 (void)new_value; \ 2715 OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \ 2716 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2717 } 2718 2719 // ------------------------------------------------------------------------ 2720 2721 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^, 2722 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt 2723 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^, 2724 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt 2725 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^, 2726 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt 2727 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^, 2728 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt 2729 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~, 2730 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt 2731 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~, 2732 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt 2733 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~, 2734 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt 2735 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~, 2736 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt 2737 2738 // ------------------------------------------------------------------------ 2739 // Routines for Extended types: long double, _Quad, complex flavours (use 2740 // critical section) 2741 // TYPE_ID, OP_ID, TYPE - detailed above 2742 // OP - operator 2743 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2744 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2745 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2746 TYPE new_value; \ 2747 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 2748 OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \ 2749 } 2750 2751 // ------------------------------------------------------------------------ 2752 // Workaround for cmplx4. Regular routines with return value don't work 2753 // on Win_32e. Let's return captured values through the additional parameter. 2754 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \ 2755 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2756 \ 2757 if (flag) { \ 2758 (*lhs) OP rhs; \ 2759 (*out) = (*lhs); \ 2760 } else { \ 2761 (*out) = (*lhs); \ 2762 (*lhs) OP rhs; \ 2763 } \ 2764 \ 2765 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2766 return; 2767 // ------------------------------------------------------------------------ 2768 2769 #ifdef KMP_GOMP_COMPAT 2770 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \ 2771 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2772 KMP_CHECK_GTID; \ 2773 OP_CRITICAL_CPT_WRK(OP## =, 0); \ 2774 } 2775 #else 2776 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) 2777 #endif /* KMP_GOMP_COMPAT */ 2778 // ------------------------------------------------------------------------ 2779 2780 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 2781 void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \ 2782 TYPE rhs, TYPE *out, int flag) { \ 2783 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2784 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2785 // ------------------------------------------------------------------------ 2786 2787 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2788 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 2789 OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \ 2790 OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \ 2791 } 2792 // The end of workaround for cmplx4 2793 2794 /* ------------------------------------------------------------------------- */ 2795 // routines for long double type 2796 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r, 2797 1) // __kmpc_atomic_float10_add_cpt 2798 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r, 2799 1) // __kmpc_atomic_float10_sub_cpt 2800 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r, 2801 1) // __kmpc_atomic_float10_mul_cpt 2802 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r, 2803 1) // __kmpc_atomic_float10_div_cpt 2804 #if KMP_HAVE_QUAD 2805 // routines for _Quad type 2806 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r, 2807 1) // __kmpc_atomic_float16_add_cpt 2808 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r, 2809 1) // __kmpc_atomic_float16_sub_cpt 2810 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r, 2811 1) // __kmpc_atomic_float16_mul_cpt 2812 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r, 2813 1) // __kmpc_atomic_float16_div_cpt 2814 #if (KMP_ARCH_X86) 2815 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r, 2816 1) // __kmpc_atomic_float16_add_a16_cpt 2817 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r, 2818 1) // __kmpc_atomic_float16_sub_a16_cpt 2819 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r, 2820 1) // __kmpc_atomic_float16_mul_a16_cpt 2821 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r, 2822 1) // __kmpc_atomic_float16_div_a16_cpt 2823 #endif // (KMP_ARCH_X86) 2824 #endif // KMP_HAVE_QUAD 2825 2826 // routines for complex types 2827 2828 // cmplx4 routines to return void 2829 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c, 2830 1) // __kmpc_atomic_cmplx4_add_cpt 2831 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 2832 1) // __kmpc_atomic_cmplx4_sub_cpt 2833 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 2834 1) // __kmpc_atomic_cmplx4_mul_cpt 2835 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c, 2836 1) // __kmpc_atomic_cmplx4_div_cpt 2837 2838 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c, 2839 1) // __kmpc_atomic_cmplx8_add_cpt 2840 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 2841 1) // __kmpc_atomic_cmplx8_sub_cpt 2842 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 2843 1) // __kmpc_atomic_cmplx8_mul_cpt 2844 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c, 2845 1) // __kmpc_atomic_cmplx8_div_cpt 2846 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c, 2847 1) // __kmpc_atomic_cmplx10_add_cpt 2848 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 2849 1) // __kmpc_atomic_cmplx10_sub_cpt 2850 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 2851 1) // __kmpc_atomic_cmplx10_mul_cpt 2852 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c, 2853 1) // __kmpc_atomic_cmplx10_div_cpt 2854 #if KMP_HAVE_QUAD 2855 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c, 2856 1) // __kmpc_atomic_cmplx16_add_cpt 2857 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 2858 1) // __kmpc_atomic_cmplx16_sub_cpt 2859 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 2860 1) // __kmpc_atomic_cmplx16_mul_cpt 2861 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c, 2862 1) // __kmpc_atomic_cmplx16_div_cpt 2863 #if (KMP_ARCH_X86) 2864 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 2865 1) // __kmpc_atomic_cmplx16_add_a16_cpt 2866 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 2867 1) // __kmpc_atomic_cmplx16_sub_a16_cpt 2868 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 2869 1) // __kmpc_atomic_cmplx16_mul_a16_cpt 2870 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 2871 1) // __kmpc_atomic_cmplx16_div_a16_cpt 2872 #endif // (KMP_ARCH_X86) 2873 #endif // KMP_HAVE_QUAD 2874 2875 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr 2876 // binop x; v = x; } for non-commutative operations. 2877 // Supported only on IA-32 architecture and Intel(R) 64 2878 2879 // ------------------------------------------------------------------------- 2880 // Operation on *lhs, rhs bound by critical section 2881 // OP - operator (it's supposed to contain an assignment) 2882 // LCK_ID - lock identifier 2883 // Note: don't check gtid as it should always be valid 2884 // 1, 2-byte - expect valid parameter, other - check before this macro 2885 #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \ 2886 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2887 \ 2888 if (flag) { \ 2889 /*temp_val = (*lhs);*/ \ 2890 (*lhs) = (TYPE)((rhs)OP(*lhs)); \ 2891 new_value = (*lhs); \ 2892 } else { \ 2893 new_value = (*lhs); \ 2894 (*lhs) = (TYPE)((rhs)OP(*lhs)); \ 2895 } \ 2896 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2897 return new_value; 2898 2899 // ------------------------------------------------------------------------ 2900 #ifdef KMP_GOMP_COMPAT 2901 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) \ 2902 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2903 KMP_CHECK_GTID; \ 2904 OP_CRITICAL_CPT_REV(TYPE, OP, 0); \ 2905 } 2906 #else 2907 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) 2908 #endif /* KMP_GOMP_COMPAT */ 2909 2910 // ------------------------------------------------------------------------ 2911 // Operation on *lhs, rhs using "compare_and_store" routine 2912 // TYPE - operands' type 2913 // BITS - size in bits, used to distinguish low level calls 2914 // OP - operator 2915 // Note: temp_val introduced in order to force the compiler to read 2916 // *lhs only once (w/o it the compiler reads *lhs twice) 2917 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 2918 { \ 2919 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2920 TYPE old_value, new_value; \ 2921 temp_val = *lhs; \ 2922 old_value = temp_val; \ 2923 new_value = (TYPE)(rhs OP old_value); \ 2924 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2925 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2926 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2927 temp_val = *lhs; \ 2928 old_value = temp_val; \ 2929 new_value = (TYPE)(rhs OP old_value); \ 2930 } \ 2931 if (flag) { \ 2932 return new_value; \ 2933 } else \ 2934 return old_value; \ 2935 } 2936 2937 // ------------------------------------------------------------------------- 2938 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2939 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2940 TYPE new_value; \ 2941 (void)new_value; \ 2942 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \ 2943 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 2944 } 2945 2946 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /, 2947 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev 2948 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /, 2949 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev 2950 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<, 2951 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev 2952 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>, 2953 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev 2954 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, 2955 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev 2956 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -, 2957 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev 2958 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /, 2959 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev 2960 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /, 2961 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev 2962 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<, 2963 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev 2964 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>, 2965 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev 2966 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, 2967 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev 2968 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -, 2969 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev 2970 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /, 2971 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev 2972 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /, 2973 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev 2974 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<, 2975 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev 2976 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>, 2977 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev 2978 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, 2979 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev 2980 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -, 2981 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev 2982 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /, 2983 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev 2984 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /, 2985 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev 2986 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<, 2987 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev 2988 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>, 2989 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev 2990 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, 2991 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev 2992 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -, 2993 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev 2994 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /, 2995 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev 2996 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -, 2997 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev 2998 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /, 2999 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev 3000 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -, 3001 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev 3002 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 3003 3004 // ------------------------------------------------------------------------ 3005 // Routines for Extended types: long double, _Quad, complex flavours (use 3006 // critical section) 3007 // TYPE_ID, OP_ID, TYPE - detailed above 3008 // OP - operator 3009 // LCK_ID - lock identifier, used to possibly distinguish lock variable 3010 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 3011 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 3012 TYPE new_value; \ 3013 /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \ 3014 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \ 3015 OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \ 3016 } 3017 3018 /* ------------------------------------------------------------------------- */ 3019 // routines for long double type 3020 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r, 3021 1) // __kmpc_atomic_float10_sub_cpt_rev 3022 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r, 3023 1) // __kmpc_atomic_float10_div_cpt_rev 3024 #if KMP_HAVE_QUAD 3025 // routines for _Quad type 3026 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 3027 1) // __kmpc_atomic_float16_sub_cpt_rev 3028 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 3029 1) // __kmpc_atomic_float16_div_cpt_rev 3030 #if (KMP_ARCH_X86) 3031 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 3032 1) // __kmpc_atomic_float16_sub_a16_cpt_rev 3033 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 3034 1) // __kmpc_atomic_float16_div_a16_cpt_rev 3035 #endif // (KMP_ARCH_X86) 3036 #endif // KMP_HAVE_QUAD 3037 3038 // routines for complex types 3039 3040 // ------------------------------------------------------------------------ 3041 // Workaround for cmplx4. Regular routines with return value don't work 3042 // on Win_32e. Let's return captured values through the additional parameter. 3043 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ 3044 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3045 \ 3046 if (flag) { \ 3047 (*lhs) = (rhs)OP(*lhs); \ 3048 (*out) = (*lhs); \ 3049 } else { \ 3050 (*out) = (*lhs); \ 3051 (*lhs) = (rhs)OP(*lhs); \ 3052 } \ 3053 \ 3054 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3055 return; 3056 // ------------------------------------------------------------------------ 3057 3058 #ifdef KMP_GOMP_COMPAT 3059 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \ 3060 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3061 KMP_CHECK_GTID; \ 3062 OP_CRITICAL_CPT_REV_WRK(OP, 0); \ 3063 } 3064 #else 3065 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) 3066 #endif /* KMP_GOMP_COMPAT */ 3067 // ------------------------------------------------------------------------ 3068 3069 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \ 3070 GOMP_FLAG) \ 3071 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 3072 OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \ 3073 OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ 3074 } 3075 // The end of workaround for cmplx4 3076 3077 // !!! TODO: check if we need to return void for cmplx4 routines 3078 // cmplx4 routines to return void 3079 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 3080 1) // __kmpc_atomic_cmplx4_sub_cpt_rev 3081 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 3082 1) // __kmpc_atomic_cmplx4_div_cpt_rev 3083 3084 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 3085 1) // __kmpc_atomic_cmplx8_sub_cpt_rev 3086 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 3087 1) // __kmpc_atomic_cmplx8_div_cpt_rev 3088 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 3089 1) // __kmpc_atomic_cmplx10_sub_cpt_rev 3090 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 3091 1) // __kmpc_atomic_cmplx10_div_cpt_rev 3092 #if KMP_HAVE_QUAD 3093 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 3094 1) // __kmpc_atomic_cmplx16_sub_cpt_rev 3095 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 3096 1) // __kmpc_atomic_cmplx16_div_cpt_rev 3097 #if (KMP_ARCH_X86) 3098 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 3099 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev 3100 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 3101 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev 3102 #endif // (KMP_ARCH_X86) 3103 #endif // KMP_HAVE_QUAD 3104 3105 // Capture reverse for mixed type: RHS=float16 3106 #if KMP_HAVE_QUAD 3107 3108 // Beginning of a definition (provides name, parameters, gebug trace) 3109 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 3110 // fixed) 3111 // OP_ID - operation identifier (add, sub, mul, ...) 3112 // TYPE - operands' type 3113 // ------------------------------------------------------------------------- 3114 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 3115 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 3116 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 3117 TYPE new_value; \ 3118 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \ 3119 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 3120 } 3121 3122 // ------------------------------------------------------------------------- 3123 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 3124 LCK_ID, GOMP_FLAG) \ 3125 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 3126 TYPE new_value; \ 3127 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 3128 OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \ 3129 } 3130 3131 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, 3132 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp 3133 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, 3134 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp 3135 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, 3136 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp 3137 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, 3138 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp 3139 3140 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1, 3141 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp 3142 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i, 3143 1, 3144 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp 3145 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1, 3146 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp 3147 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i, 3148 1, 3149 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp 3150 3151 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i, 3152 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp 3153 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad, 3154 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp 3155 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i, 3156 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp 3157 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad, 3158 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp 3159 3160 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i, 3161 7, 3162 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp 3163 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad, 3164 8i, 7, 3165 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp 3166 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i, 3167 7, 3168 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp 3169 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad, 3170 8i, 7, 3171 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp 3172 3173 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad, 3174 4r, 3, 3175 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp 3176 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad, 3177 4r, 3, 3178 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp 3179 3180 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad, 3181 8r, 7, 3182 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp 3183 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad, 3184 8r, 7, 3185 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp 3186 3187 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad, 3188 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp 3189 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad, 3190 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp 3191 3192 #endif // KMP_HAVE_QUAD 3193 3194 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} 3195 3196 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3197 TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ 3198 TYPE rhs) { \ 3199 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 3200 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); 3201 3202 #define CRITICAL_SWP(LCK_ID) \ 3203 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3204 \ 3205 old_value = (*lhs); \ 3206 (*lhs) = rhs; \ 3207 \ 3208 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3209 return old_value; 3210 3211 // ------------------------------------------------------------------------ 3212 #ifdef KMP_GOMP_COMPAT 3213 #define GOMP_CRITICAL_SWP(FLAG) \ 3214 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3215 KMP_CHECK_GTID; \ 3216 CRITICAL_SWP(0); \ 3217 } 3218 #else 3219 #define GOMP_CRITICAL_SWP(FLAG) 3220 #endif /* KMP_GOMP_COMPAT */ 3221 3222 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3223 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3224 TYPE old_value; \ 3225 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3226 old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \ 3227 return old_value; \ 3228 } 3229 // ------------------------------------------------------------------------ 3230 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3231 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3232 TYPE old_value; \ 3233 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3234 old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \ 3235 return old_value; \ 3236 } 3237 3238 // ------------------------------------------------------------------------ 3239 #define CMPXCHG_SWP(TYPE, BITS) \ 3240 { \ 3241 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 3242 TYPE old_value, new_value; \ 3243 temp_val = *lhs; \ 3244 old_value = temp_val; \ 3245 new_value = rhs; \ 3246 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 3247 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 3248 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 3249 temp_val = *lhs; \ 3250 old_value = temp_val; \ 3251 new_value = rhs; \ 3252 } \ 3253 return old_value; \ 3254 } 3255 3256 // ------------------------------------------------------------------------- 3257 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3258 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3259 TYPE old_value; \ 3260 (void)old_value; \ 3261 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3262 CMPXCHG_SWP(TYPE, BITS) \ 3263 } 3264 3265 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp 3266 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp 3267 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp 3268 3269 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32, 3270 KMP_ARCH_X86) // __kmpc_atomic_float4_swp 3271 3272 #if (KMP_ARCH_X86) 3273 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64, 3274 KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp 3275 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64, 3276 KMP_ARCH_X86) // __kmpc_atomic_float8_swp 3277 #else 3278 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp 3279 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64, 3280 KMP_ARCH_X86) // __kmpc_atomic_float8_swp 3281 #endif // (KMP_ARCH_X86) 3282 3283 // ------------------------------------------------------------------------ 3284 // Routines for Extended types: long double, _Quad, complex flavours (use 3285 // critical section) 3286 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ 3287 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3288 TYPE old_value; \ 3289 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3290 CRITICAL_SWP(LCK_ID) \ 3291 } 3292 3293 // ------------------------------------------------------------------------ 3294 // !!! TODO: check if we need to return void for cmplx4 routines 3295 // Workaround for cmplx4. Regular routines with return value don't work 3296 // on Win_32e. Let's return captured values through the additional parameter. 3297 3298 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ 3299 void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ 3300 TYPE rhs, TYPE *out) { \ 3301 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 3302 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); 3303 3304 #define CRITICAL_SWP_WRK(LCK_ID) \ 3305 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3306 \ 3307 tmp = (*lhs); \ 3308 (*lhs) = (rhs); \ 3309 (*out) = tmp; \ 3310 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3311 return; 3312 // ------------------------------------------------------------------------ 3313 3314 #ifdef KMP_GOMP_COMPAT 3315 #define GOMP_CRITICAL_SWP_WRK(FLAG) \ 3316 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3317 KMP_CHECK_GTID; \ 3318 CRITICAL_SWP_WRK(0); \ 3319 } 3320 #else 3321 #define GOMP_CRITICAL_SWP_WRK(FLAG) 3322 #endif /* KMP_GOMP_COMPAT */ 3323 // ------------------------------------------------------------------------ 3324 3325 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ 3326 ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ 3327 TYPE tmp; \ 3328 GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \ 3329 CRITICAL_SWP_WRK(LCK_ID) \ 3330 } 3331 // The end of workaround for cmplx4 3332 3333 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp 3334 #if KMP_HAVE_QUAD 3335 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp 3336 #endif // KMP_HAVE_QUAD 3337 // cmplx4 routine to return void 3338 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp 3339 3340 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // 3341 // __kmpc_atomic_cmplx4_swp 3342 3343 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp 3344 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp 3345 #if KMP_HAVE_QUAD 3346 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp 3347 #if (KMP_ARCH_X86) 3348 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r, 3349 1) // __kmpc_atomic_float16_a16_swp 3350 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c, 3351 1) // __kmpc_atomic_cmplx16_a16_swp 3352 #endif // (KMP_ARCH_X86) 3353 #endif // KMP_HAVE_QUAD 3354 3355 // End of OpenMP 4.0 Capture 3356 3357 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 3358 3359 #undef OP_CRITICAL 3360 3361 /* ------------------------------------------------------------------------ */ 3362 /* Generic atomic routines */ 3363 3364 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3365 void (*f)(void *, void *, void *)) { 3366 KMP_DEBUG_ASSERT(__kmp_init_serial); 3367 3368 if ( 3369 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3370 FALSE /* must use lock */ 3371 #else 3372 TRUE 3373 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3374 ) { 3375 kmp_int8 old_value, new_value; 3376 3377 old_value = *(kmp_int8 *)lhs; 3378 (*f)(&new_value, &old_value, rhs); 3379 3380 /* TODO: Should this be acquire or release? */ 3381 while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value, 3382 *(kmp_int8 *)&new_value)) { 3383 KMP_CPU_PAUSE(); 3384 3385 old_value = *(kmp_int8 *)lhs; 3386 (*f)(&new_value, &old_value, rhs); 3387 } 3388 3389 return; 3390 } else { 3391 // All 1-byte data is of integer data type. 3392 3393 #ifdef KMP_GOMP_COMPAT 3394 if (__kmp_atomic_mode == 2) { 3395 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3396 } else 3397 #endif /* KMP_GOMP_COMPAT */ 3398 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid); 3399 3400 (*f)(lhs, lhs, rhs); 3401 3402 #ifdef KMP_GOMP_COMPAT 3403 if (__kmp_atomic_mode == 2) { 3404 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3405 } else 3406 #endif /* KMP_GOMP_COMPAT */ 3407 __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid); 3408 } 3409 } 3410 3411 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3412 void (*f)(void *, void *, void *)) { 3413 if ( 3414 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3415 FALSE /* must use lock */ 3416 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 3417 TRUE /* no alignment problems */ 3418 #else 3419 !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */ 3420 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3421 ) { 3422 kmp_int16 old_value, new_value; 3423 3424 old_value = *(kmp_int16 *)lhs; 3425 (*f)(&new_value, &old_value, rhs); 3426 3427 /* TODO: Should this be acquire or release? */ 3428 while (!KMP_COMPARE_AND_STORE_ACQ16( 3429 (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) { 3430 KMP_CPU_PAUSE(); 3431 3432 old_value = *(kmp_int16 *)lhs; 3433 (*f)(&new_value, &old_value, rhs); 3434 } 3435 3436 return; 3437 } else { 3438 // All 2-byte data is of integer data type. 3439 3440 #ifdef KMP_GOMP_COMPAT 3441 if (__kmp_atomic_mode == 2) { 3442 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3443 } else 3444 #endif /* KMP_GOMP_COMPAT */ 3445 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid); 3446 3447 (*f)(lhs, lhs, rhs); 3448 3449 #ifdef KMP_GOMP_COMPAT 3450 if (__kmp_atomic_mode == 2) { 3451 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3452 } else 3453 #endif /* KMP_GOMP_COMPAT */ 3454 __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid); 3455 } 3456 } 3457 3458 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3459 void (*f)(void *, void *, void *)) { 3460 KMP_DEBUG_ASSERT(__kmp_init_serial); 3461 3462 if ( 3463 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints. 3464 // Gomp compatibility is broken if this routine is called for floats. 3465 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 3466 TRUE /* no alignment problems */ 3467 #else 3468 !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */ 3469 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 3470 ) { 3471 kmp_int32 old_value, new_value; 3472 3473 old_value = *(kmp_int32 *)lhs; 3474 (*f)(&new_value, &old_value, rhs); 3475 3476 /* TODO: Should this be acquire or release? */ 3477 while (!KMP_COMPARE_AND_STORE_ACQ32( 3478 (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) { 3479 KMP_CPU_PAUSE(); 3480 3481 old_value = *(kmp_int32 *)lhs; 3482 (*f)(&new_value, &old_value, rhs); 3483 } 3484 3485 return; 3486 } else { 3487 // Use __kmp_atomic_lock_4i for all 4-byte data, 3488 // even if it isn't of integer data type. 3489 3490 #ifdef KMP_GOMP_COMPAT 3491 if (__kmp_atomic_mode == 2) { 3492 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3493 } else 3494 #endif /* KMP_GOMP_COMPAT */ 3495 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid); 3496 3497 (*f)(lhs, lhs, rhs); 3498 3499 #ifdef KMP_GOMP_COMPAT 3500 if (__kmp_atomic_mode == 2) { 3501 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3502 } else 3503 #endif /* KMP_GOMP_COMPAT */ 3504 __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid); 3505 } 3506 } 3507 3508 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3509 void (*f)(void *, void *, void *)) { 3510 KMP_DEBUG_ASSERT(__kmp_init_serial); 3511 if ( 3512 3513 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3514 FALSE /* must use lock */ 3515 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 3516 TRUE /* no alignment problems */ 3517 #else 3518 !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */ 3519 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3520 ) { 3521 kmp_int64 old_value, new_value; 3522 3523 old_value = *(kmp_int64 *)lhs; 3524 (*f)(&new_value, &old_value, rhs); 3525 /* TODO: Should this be acquire or release? */ 3526 while (!KMP_COMPARE_AND_STORE_ACQ64( 3527 (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) { 3528 KMP_CPU_PAUSE(); 3529 3530 old_value = *(kmp_int64 *)lhs; 3531 (*f)(&new_value, &old_value, rhs); 3532 } 3533 3534 return; 3535 } else { 3536 // Use __kmp_atomic_lock_8i for all 8-byte data, 3537 // even if it isn't of integer data type. 3538 3539 #ifdef KMP_GOMP_COMPAT 3540 if (__kmp_atomic_mode == 2) { 3541 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3542 } else 3543 #endif /* KMP_GOMP_COMPAT */ 3544 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid); 3545 3546 (*f)(lhs, lhs, rhs); 3547 3548 #ifdef KMP_GOMP_COMPAT 3549 if (__kmp_atomic_mode == 2) { 3550 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3551 } else 3552 #endif /* KMP_GOMP_COMPAT */ 3553 __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid); 3554 } 3555 } 3556 3557 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3558 void (*f)(void *, void *, void *)) { 3559 KMP_DEBUG_ASSERT(__kmp_init_serial); 3560 3561 #ifdef KMP_GOMP_COMPAT 3562 if (__kmp_atomic_mode == 2) { 3563 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3564 } else 3565 #endif /* KMP_GOMP_COMPAT */ 3566 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid); 3567 3568 (*f)(lhs, lhs, rhs); 3569 3570 #ifdef KMP_GOMP_COMPAT 3571 if (__kmp_atomic_mode == 2) { 3572 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3573 } else 3574 #endif /* KMP_GOMP_COMPAT */ 3575 __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid); 3576 } 3577 3578 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3579 void (*f)(void *, void *, void *)) { 3580 KMP_DEBUG_ASSERT(__kmp_init_serial); 3581 3582 #ifdef KMP_GOMP_COMPAT 3583 if (__kmp_atomic_mode == 2) { 3584 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3585 } else 3586 #endif /* KMP_GOMP_COMPAT */ 3587 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid); 3588 3589 (*f)(lhs, lhs, rhs); 3590 3591 #ifdef KMP_GOMP_COMPAT 3592 if (__kmp_atomic_mode == 2) { 3593 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3594 } else 3595 #endif /* KMP_GOMP_COMPAT */ 3596 __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid); 3597 } 3598 3599 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3600 void (*f)(void *, void *, void *)) { 3601 KMP_DEBUG_ASSERT(__kmp_init_serial); 3602 3603 #ifdef KMP_GOMP_COMPAT 3604 if (__kmp_atomic_mode == 2) { 3605 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3606 } else 3607 #endif /* KMP_GOMP_COMPAT */ 3608 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid); 3609 3610 (*f)(lhs, lhs, rhs); 3611 3612 #ifdef KMP_GOMP_COMPAT 3613 if (__kmp_atomic_mode == 2) { 3614 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3615 } else 3616 #endif /* KMP_GOMP_COMPAT */ 3617 __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid); 3618 } 3619 3620 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3621 void (*f)(void *, void *, void *)) { 3622 KMP_DEBUG_ASSERT(__kmp_init_serial); 3623 3624 #ifdef KMP_GOMP_COMPAT 3625 if (__kmp_atomic_mode == 2) { 3626 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3627 } else 3628 #endif /* KMP_GOMP_COMPAT */ 3629 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid); 3630 3631 (*f)(lhs, lhs, rhs); 3632 3633 #ifdef KMP_GOMP_COMPAT 3634 if (__kmp_atomic_mode == 2) { 3635 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3636 } else 3637 #endif /* KMP_GOMP_COMPAT */ 3638 __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid); 3639 } 3640 3641 // AC: same two routines as GOMP_atomic_start/end, but will be called by our 3642 // compiler; duplicated in order to not use 3-party names in pure Intel code 3643 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin. 3644 void __kmpc_atomic_start(void) { 3645 int gtid = __kmp_entry_gtid(); 3646 KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid)); 3647 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3648 } 3649 3650 void __kmpc_atomic_end(void) { 3651 int gtid = __kmp_get_gtid(); 3652 KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid)); 3653 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3654 } 3655 3656 /*! 3657 @} 3658 */ 3659 3660 // end of file 3661