1 /* 2 * kmp_atomic.cpp -- ATOMIC implementation routines 3 */ 4 5 //===----------------------------------------------------------------------===// 6 // 7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 8 // See https://llvm.org/LICENSE.txt for license information. 9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "kmp_atomic.h" 14 #include "kmp.h" // TRUE, asm routines prototypes 15 16 typedef unsigned char uchar; 17 typedef unsigned short ushort; 18 19 /*! 20 @defgroup ATOMIC_OPS Atomic Operations 21 These functions are used for implementing the many different varieties of atomic 22 operations. 23 24 The compiler is at liberty to inline atomic operations that are naturally 25 supported by the target architecture. For instance on IA-32 architecture an 26 atomic like this can be inlined 27 @code 28 static int s = 0; 29 #pragma omp atomic 30 s++; 31 @endcode 32 using the single instruction: `lock; incl s` 33 34 However the runtime does provide entrypoints for these operations to support 35 compilers that choose not to inline them. (For instance, 36 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.) 37 38 The names of the functions are encoded by using the data type name and the 39 operation name, as in these tables. 40 41 Data Type | Data type encoding 42 -----------|--------------- 43 int8_t | `fixed1` 44 uint8_t | `fixed1u` 45 int16_t | `fixed2` 46 uint16_t | `fixed2u` 47 int32_t | `fixed4` 48 uint32_t | `fixed4u` 49 int32_t | `fixed8` 50 uint32_t | `fixed8u` 51 float | `float4` 52 double | `float8` 53 float 10 (8087 eighty bit float) | `float10` 54 complex<float> | `cmplx4` 55 complex<double> | `cmplx8` 56 complex<float10> | `cmplx10` 57 <br> 58 59 Operation | Operation encoding 60 ----------|------------------- 61 + | add 62 - | sub 63 \* | mul 64 / | div 65 & | andb 66 << | shl 67 \>\> | shr 68 \| | orb 69 ^ | xor 70 && | andl 71 \|\| | orl 72 maximum | max 73 minimum | min 74 .eqv. | eqv 75 .neqv. | neqv 76 77 <br> 78 For non-commutative operations, `_rev` can also be added for the reversed 79 operation. For the functions that capture the result, the suffix `_cpt` is 80 added. 81 82 Update Functions 83 ================ 84 The general form of an atomic function that just performs an update (without a 85 `capture`) 86 @code 87 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE * 88 lhs, TYPE rhs ); 89 @endcode 90 @param ident_t a pointer to source location 91 @param gtid the global thread id 92 @param lhs a pointer to the left operand 93 @param rhs the right operand 94 95 `capture` functions 96 =================== 97 The capture functions perform an atomic update and return a result, which is 98 either the value before the capture, or that after. They take an additional 99 argument to determine which result is returned. 100 Their general form is therefore 101 @code 102 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE * 103 lhs, TYPE rhs, int flag ); 104 @endcode 105 @param ident_t a pointer to source location 106 @param gtid the global thread id 107 @param lhs a pointer to the left operand 108 @param rhs the right operand 109 @param flag one if the result is to be captured *after* the operation, zero if 110 captured *before*. 111 112 The one set of exceptions to this is the `complex<float>` type where the value 113 is not returned, rather an extra argument pointer is passed. 114 115 They look like 116 @code 117 void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * 118 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); 119 @endcode 120 121 Read and Write Operations 122 ========================= 123 The OpenMP<sup>*</sup> standard now supports atomic operations that simply 124 ensure that the value is read or written atomically, with no modification 125 performed. In many cases on IA-32 architecture these operations can be inlined 126 since the architecture guarantees that no tearing occurs on aligned objects 127 accessed with a single memory operation of up to 64 bits in size. 128 129 The general form of the read operations is 130 @code 131 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc ); 132 @endcode 133 134 For the write operations the form is 135 @code 136 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs 137 ); 138 @endcode 139 140 Full list of functions 141 ====================== 142 This leads to the generation of 376 atomic functions, as follows. 143 144 Functions for integers 145 --------------------- 146 There are versions here for integers of size 1,2,4 and 8 bytes both signed and 147 unsigned (where that matters). 148 @code 149 __kmpc_atomic_fixed1_add 150 __kmpc_atomic_fixed1_add_cpt 151 __kmpc_atomic_fixed1_add_fp 152 __kmpc_atomic_fixed1_andb 153 __kmpc_atomic_fixed1_andb_cpt 154 __kmpc_atomic_fixed1_andl 155 __kmpc_atomic_fixed1_andl_cpt 156 __kmpc_atomic_fixed1_div 157 __kmpc_atomic_fixed1_div_cpt 158 __kmpc_atomic_fixed1_div_cpt_rev 159 __kmpc_atomic_fixed1_div_float8 160 __kmpc_atomic_fixed1_div_fp 161 __kmpc_atomic_fixed1_div_rev 162 __kmpc_atomic_fixed1_eqv 163 __kmpc_atomic_fixed1_eqv_cpt 164 __kmpc_atomic_fixed1_max 165 __kmpc_atomic_fixed1_max_cpt 166 __kmpc_atomic_fixed1_min 167 __kmpc_atomic_fixed1_min_cpt 168 __kmpc_atomic_fixed1_mul 169 __kmpc_atomic_fixed1_mul_cpt 170 __kmpc_atomic_fixed1_mul_float8 171 __kmpc_atomic_fixed1_mul_fp 172 __kmpc_atomic_fixed1_neqv 173 __kmpc_atomic_fixed1_neqv_cpt 174 __kmpc_atomic_fixed1_orb 175 __kmpc_atomic_fixed1_orb_cpt 176 __kmpc_atomic_fixed1_orl 177 __kmpc_atomic_fixed1_orl_cpt 178 __kmpc_atomic_fixed1_rd 179 __kmpc_atomic_fixed1_shl 180 __kmpc_atomic_fixed1_shl_cpt 181 __kmpc_atomic_fixed1_shl_cpt_rev 182 __kmpc_atomic_fixed1_shl_rev 183 __kmpc_atomic_fixed1_shr 184 __kmpc_atomic_fixed1_shr_cpt 185 __kmpc_atomic_fixed1_shr_cpt_rev 186 __kmpc_atomic_fixed1_shr_rev 187 __kmpc_atomic_fixed1_sub 188 __kmpc_atomic_fixed1_sub_cpt 189 __kmpc_atomic_fixed1_sub_cpt_rev 190 __kmpc_atomic_fixed1_sub_fp 191 __kmpc_atomic_fixed1_sub_rev 192 __kmpc_atomic_fixed1_swp 193 __kmpc_atomic_fixed1_wr 194 __kmpc_atomic_fixed1_xor 195 __kmpc_atomic_fixed1_xor_cpt 196 __kmpc_atomic_fixed1u_add_fp 197 __kmpc_atomic_fixed1u_sub_fp 198 __kmpc_atomic_fixed1u_mul_fp 199 __kmpc_atomic_fixed1u_div 200 __kmpc_atomic_fixed1u_div_cpt 201 __kmpc_atomic_fixed1u_div_cpt_rev 202 __kmpc_atomic_fixed1u_div_fp 203 __kmpc_atomic_fixed1u_div_rev 204 __kmpc_atomic_fixed1u_shr 205 __kmpc_atomic_fixed1u_shr_cpt 206 __kmpc_atomic_fixed1u_shr_cpt_rev 207 __kmpc_atomic_fixed1u_shr_rev 208 __kmpc_atomic_fixed2_add 209 __kmpc_atomic_fixed2_add_cpt 210 __kmpc_atomic_fixed2_add_fp 211 __kmpc_atomic_fixed2_andb 212 __kmpc_atomic_fixed2_andb_cpt 213 __kmpc_atomic_fixed2_andl 214 __kmpc_atomic_fixed2_andl_cpt 215 __kmpc_atomic_fixed2_div 216 __kmpc_atomic_fixed2_div_cpt 217 __kmpc_atomic_fixed2_div_cpt_rev 218 __kmpc_atomic_fixed2_div_float8 219 __kmpc_atomic_fixed2_div_fp 220 __kmpc_atomic_fixed2_div_rev 221 __kmpc_atomic_fixed2_eqv 222 __kmpc_atomic_fixed2_eqv_cpt 223 __kmpc_atomic_fixed2_max 224 __kmpc_atomic_fixed2_max_cpt 225 __kmpc_atomic_fixed2_min 226 __kmpc_atomic_fixed2_min_cpt 227 __kmpc_atomic_fixed2_mul 228 __kmpc_atomic_fixed2_mul_cpt 229 __kmpc_atomic_fixed2_mul_float8 230 __kmpc_atomic_fixed2_mul_fp 231 __kmpc_atomic_fixed2_neqv 232 __kmpc_atomic_fixed2_neqv_cpt 233 __kmpc_atomic_fixed2_orb 234 __kmpc_atomic_fixed2_orb_cpt 235 __kmpc_atomic_fixed2_orl 236 __kmpc_atomic_fixed2_orl_cpt 237 __kmpc_atomic_fixed2_rd 238 __kmpc_atomic_fixed2_shl 239 __kmpc_atomic_fixed2_shl_cpt 240 __kmpc_atomic_fixed2_shl_cpt_rev 241 __kmpc_atomic_fixed2_shl_rev 242 __kmpc_atomic_fixed2_shr 243 __kmpc_atomic_fixed2_shr_cpt 244 __kmpc_atomic_fixed2_shr_cpt_rev 245 __kmpc_atomic_fixed2_shr_rev 246 __kmpc_atomic_fixed2_sub 247 __kmpc_atomic_fixed2_sub_cpt 248 __kmpc_atomic_fixed2_sub_cpt_rev 249 __kmpc_atomic_fixed2_sub_fp 250 __kmpc_atomic_fixed2_sub_rev 251 __kmpc_atomic_fixed2_swp 252 __kmpc_atomic_fixed2_wr 253 __kmpc_atomic_fixed2_xor 254 __kmpc_atomic_fixed2_xor_cpt 255 __kmpc_atomic_fixed2u_add_fp 256 __kmpc_atomic_fixed2u_sub_fp 257 __kmpc_atomic_fixed2u_mul_fp 258 __kmpc_atomic_fixed2u_div 259 __kmpc_atomic_fixed2u_div_cpt 260 __kmpc_atomic_fixed2u_div_cpt_rev 261 __kmpc_atomic_fixed2u_div_fp 262 __kmpc_atomic_fixed2u_div_rev 263 __kmpc_atomic_fixed2u_shr 264 __kmpc_atomic_fixed2u_shr_cpt 265 __kmpc_atomic_fixed2u_shr_cpt_rev 266 __kmpc_atomic_fixed2u_shr_rev 267 __kmpc_atomic_fixed4_add 268 __kmpc_atomic_fixed4_add_cpt 269 __kmpc_atomic_fixed4_add_fp 270 __kmpc_atomic_fixed4_andb 271 __kmpc_atomic_fixed4_andb_cpt 272 __kmpc_atomic_fixed4_andl 273 __kmpc_atomic_fixed4_andl_cpt 274 __kmpc_atomic_fixed4_div 275 __kmpc_atomic_fixed4_div_cpt 276 __kmpc_atomic_fixed4_div_cpt_rev 277 __kmpc_atomic_fixed4_div_float8 278 __kmpc_atomic_fixed4_div_fp 279 __kmpc_atomic_fixed4_div_rev 280 __kmpc_atomic_fixed4_eqv 281 __kmpc_atomic_fixed4_eqv_cpt 282 __kmpc_atomic_fixed4_max 283 __kmpc_atomic_fixed4_max_cpt 284 __kmpc_atomic_fixed4_min 285 __kmpc_atomic_fixed4_min_cpt 286 __kmpc_atomic_fixed4_mul 287 __kmpc_atomic_fixed4_mul_cpt 288 __kmpc_atomic_fixed4_mul_float8 289 __kmpc_atomic_fixed4_mul_fp 290 __kmpc_atomic_fixed4_neqv 291 __kmpc_atomic_fixed4_neqv_cpt 292 __kmpc_atomic_fixed4_orb 293 __kmpc_atomic_fixed4_orb_cpt 294 __kmpc_atomic_fixed4_orl 295 __kmpc_atomic_fixed4_orl_cpt 296 __kmpc_atomic_fixed4_rd 297 __kmpc_atomic_fixed4_shl 298 __kmpc_atomic_fixed4_shl_cpt 299 __kmpc_atomic_fixed4_shl_cpt_rev 300 __kmpc_atomic_fixed4_shl_rev 301 __kmpc_atomic_fixed4_shr 302 __kmpc_atomic_fixed4_shr_cpt 303 __kmpc_atomic_fixed4_shr_cpt_rev 304 __kmpc_atomic_fixed4_shr_rev 305 __kmpc_atomic_fixed4_sub 306 __kmpc_atomic_fixed4_sub_cpt 307 __kmpc_atomic_fixed4_sub_cpt_rev 308 __kmpc_atomic_fixed4_sub_fp 309 __kmpc_atomic_fixed4_sub_rev 310 __kmpc_atomic_fixed4_swp 311 __kmpc_atomic_fixed4_wr 312 __kmpc_atomic_fixed4_xor 313 __kmpc_atomic_fixed4_xor_cpt 314 __kmpc_atomic_fixed4u_add_fp 315 __kmpc_atomic_fixed4u_sub_fp 316 __kmpc_atomic_fixed4u_mul_fp 317 __kmpc_atomic_fixed4u_div 318 __kmpc_atomic_fixed4u_div_cpt 319 __kmpc_atomic_fixed4u_div_cpt_rev 320 __kmpc_atomic_fixed4u_div_fp 321 __kmpc_atomic_fixed4u_div_rev 322 __kmpc_atomic_fixed4u_shr 323 __kmpc_atomic_fixed4u_shr_cpt 324 __kmpc_atomic_fixed4u_shr_cpt_rev 325 __kmpc_atomic_fixed4u_shr_rev 326 __kmpc_atomic_fixed8_add 327 __kmpc_atomic_fixed8_add_cpt 328 __kmpc_atomic_fixed8_add_fp 329 __kmpc_atomic_fixed8_andb 330 __kmpc_atomic_fixed8_andb_cpt 331 __kmpc_atomic_fixed8_andl 332 __kmpc_atomic_fixed8_andl_cpt 333 __kmpc_atomic_fixed8_div 334 __kmpc_atomic_fixed8_div_cpt 335 __kmpc_atomic_fixed8_div_cpt_rev 336 __kmpc_atomic_fixed8_div_float8 337 __kmpc_atomic_fixed8_div_fp 338 __kmpc_atomic_fixed8_div_rev 339 __kmpc_atomic_fixed8_eqv 340 __kmpc_atomic_fixed8_eqv_cpt 341 __kmpc_atomic_fixed8_max 342 __kmpc_atomic_fixed8_max_cpt 343 __kmpc_atomic_fixed8_min 344 __kmpc_atomic_fixed8_min_cpt 345 __kmpc_atomic_fixed8_mul 346 __kmpc_atomic_fixed8_mul_cpt 347 __kmpc_atomic_fixed8_mul_float8 348 __kmpc_atomic_fixed8_mul_fp 349 __kmpc_atomic_fixed8_neqv 350 __kmpc_atomic_fixed8_neqv_cpt 351 __kmpc_atomic_fixed8_orb 352 __kmpc_atomic_fixed8_orb_cpt 353 __kmpc_atomic_fixed8_orl 354 __kmpc_atomic_fixed8_orl_cpt 355 __kmpc_atomic_fixed8_rd 356 __kmpc_atomic_fixed8_shl 357 __kmpc_atomic_fixed8_shl_cpt 358 __kmpc_atomic_fixed8_shl_cpt_rev 359 __kmpc_atomic_fixed8_shl_rev 360 __kmpc_atomic_fixed8_shr 361 __kmpc_atomic_fixed8_shr_cpt 362 __kmpc_atomic_fixed8_shr_cpt_rev 363 __kmpc_atomic_fixed8_shr_rev 364 __kmpc_atomic_fixed8_sub 365 __kmpc_atomic_fixed8_sub_cpt 366 __kmpc_atomic_fixed8_sub_cpt_rev 367 __kmpc_atomic_fixed8_sub_fp 368 __kmpc_atomic_fixed8_sub_rev 369 __kmpc_atomic_fixed8_swp 370 __kmpc_atomic_fixed8_wr 371 __kmpc_atomic_fixed8_xor 372 __kmpc_atomic_fixed8_xor_cpt 373 __kmpc_atomic_fixed8u_add_fp 374 __kmpc_atomic_fixed8u_sub_fp 375 __kmpc_atomic_fixed8u_mul_fp 376 __kmpc_atomic_fixed8u_div 377 __kmpc_atomic_fixed8u_div_cpt 378 __kmpc_atomic_fixed8u_div_cpt_rev 379 __kmpc_atomic_fixed8u_div_fp 380 __kmpc_atomic_fixed8u_div_rev 381 __kmpc_atomic_fixed8u_shr 382 __kmpc_atomic_fixed8u_shr_cpt 383 __kmpc_atomic_fixed8u_shr_cpt_rev 384 __kmpc_atomic_fixed8u_shr_rev 385 @endcode 386 387 Functions for floating point 388 ---------------------------- 389 There are versions here for floating point numbers of size 4, 8, 10 and 16 390 bytes. (Ten byte floats are used by X87, but are now rare). 391 @code 392 __kmpc_atomic_float4_add 393 __kmpc_atomic_float4_add_cpt 394 __kmpc_atomic_float4_add_float8 395 __kmpc_atomic_float4_add_fp 396 __kmpc_atomic_float4_div 397 __kmpc_atomic_float4_div_cpt 398 __kmpc_atomic_float4_div_cpt_rev 399 __kmpc_atomic_float4_div_float8 400 __kmpc_atomic_float4_div_fp 401 __kmpc_atomic_float4_div_rev 402 __kmpc_atomic_float4_max 403 __kmpc_atomic_float4_max_cpt 404 __kmpc_atomic_float4_min 405 __kmpc_atomic_float4_min_cpt 406 __kmpc_atomic_float4_mul 407 __kmpc_atomic_float4_mul_cpt 408 __kmpc_atomic_float4_mul_float8 409 __kmpc_atomic_float4_mul_fp 410 __kmpc_atomic_float4_rd 411 __kmpc_atomic_float4_sub 412 __kmpc_atomic_float4_sub_cpt 413 __kmpc_atomic_float4_sub_cpt_rev 414 __kmpc_atomic_float4_sub_float8 415 __kmpc_atomic_float4_sub_fp 416 __kmpc_atomic_float4_sub_rev 417 __kmpc_atomic_float4_swp 418 __kmpc_atomic_float4_wr 419 __kmpc_atomic_float8_add 420 __kmpc_atomic_float8_add_cpt 421 __kmpc_atomic_float8_add_fp 422 __kmpc_atomic_float8_div 423 __kmpc_atomic_float8_div_cpt 424 __kmpc_atomic_float8_div_cpt_rev 425 __kmpc_atomic_float8_div_fp 426 __kmpc_atomic_float8_div_rev 427 __kmpc_atomic_float8_max 428 __kmpc_atomic_float8_max_cpt 429 __kmpc_atomic_float8_min 430 __kmpc_atomic_float8_min_cpt 431 __kmpc_atomic_float8_mul 432 __kmpc_atomic_float8_mul_cpt 433 __kmpc_atomic_float8_mul_fp 434 __kmpc_atomic_float8_rd 435 __kmpc_atomic_float8_sub 436 __kmpc_atomic_float8_sub_cpt 437 __kmpc_atomic_float8_sub_cpt_rev 438 __kmpc_atomic_float8_sub_fp 439 __kmpc_atomic_float8_sub_rev 440 __kmpc_atomic_float8_swp 441 __kmpc_atomic_float8_wr 442 __kmpc_atomic_float10_add 443 __kmpc_atomic_float10_add_cpt 444 __kmpc_atomic_float10_add_fp 445 __kmpc_atomic_float10_div 446 __kmpc_atomic_float10_div_cpt 447 __kmpc_atomic_float10_div_cpt_rev 448 __kmpc_atomic_float10_div_fp 449 __kmpc_atomic_float10_div_rev 450 __kmpc_atomic_float10_mul 451 __kmpc_atomic_float10_mul_cpt 452 __kmpc_atomic_float10_mul_fp 453 __kmpc_atomic_float10_rd 454 __kmpc_atomic_float10_sub 455 __kmpc_atomic_float10_sub_cpt 456 __kmpc_atomic_float10_sub_cpt_rev 457 __kmpc_atomic_float10_sub_fp 458 __kmpc_atomic_float10_sub_rev 459 __kmpc_atomic_float10_swp 460 __kmpc_atomic_float10_wr 461 __kmpc_atomic_float16_add 462 __kmpc_atomic_float16_add_cpt 463 __kmpc_atomic_float16_div 464 __kmpc_atomic_float16_div_cpt 465 __kmpc_atomic_float16_div_cpt_rev 466 __kmpc_atomic_float16_div_rev 467 __kmpc_atomic_float16_max 468 __kmpc_atomic_float16_max_cpt 469 __kmpc_atomic_float16_min 470 __kmpc_atomic_float16_min_cpt 471 __kmpc_atomic_float16_mul 472 __kmpc_atomic_float16_mul_cpt 473 __kmpc_atomic_float16_rd 474 __kmpc_atomic_float16_sub 475 __kmpc_atomic_float16_sub_cpt 476 __kmpc_atomic_float16_sub_cpt_rev 477 __kmpc_atomic_float16_sub_rev 478 __kmpc_atomic_float16_swp 479 __kmpc_atomic_float16_wr 480 @endcode 481 482 Functions for Complex types 483 --------------------------- 484 Functions for complex types whose component floating point variables are of size 485 4,8,10 or 16 bytes. The names here are based on the size of the component float, 486 *not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an 487 operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`. 488 489 @code 490 __kmpc_atomic_cmplx4_add 491 __kmpc_atomic_cmplx4_add_cmplx8 492 __kmpc_atomic_cmplx4_add_cpt 493 __kmpc_atomic_cmplx4_div 494 __kmpc_atomic_cmplx4_div_cmplx8 495 __kmpc_atomic_cmplx4_div_cpt 496 __kmpc_atomic_cmplx4_div_cpt_rev 497 __kmpc_atomic_cmplx4_div_rev 498 __kmpc_atomic_cmplx4_mul 499 __kmpc_atomic_cmplx4_mul_cmplx8 500 __kmpc_atomic_cmplx4_mul_cpt 501 __kmpc_atomic_cmplx4_rd 502 __kmpc_atomic_cmplx4_sub 503 __kmpc_atomic_cmplx4_sub_cmplx8 504 __kmpc_atomic_cmplx4_sub_cpt 505 __kmpc_atomic_cmplx4_sub_cpt_rev 506 __kmpc_atomic_cmplx4_sub_rev 507 __kmpc_atomic_cmplx4_swp 508 __kmpc_atomic_cmplx4_wr 509 __kmpc_atomic_cmplx8_add 510 __kmpc_atomic_cmplx8_add_cpt 511 __kmpc_atomic_cmplx8_div 512 __kmpc_atomic_cmplx8_div_cpt 513 __kmpc_atomic_cmplx8_div_cpt_rev 514 __kmpc_atomic_cmplx8_div_rev 515 __kmpc_atomic_cmplx8_mul 516 __kmpc_atomic_cmplx8_mul_cpt 517 __kmpc_atomic_cmplx8_rd 518 __kmpc_atomic_cmplx8_sub 519 __kmpc_atomic_cmplx8_sub_cpt 520 __kmpc_atomic_cmplx8_sub_cpt_rev 521 __kmpc_atomic_cmplx8_sub_rev 522 __kmpc_atomic_cmplx8_swp 523 __kmpc_atomic_cmplx8_wr 524 __kmpc_atomic_cmplx10_add 525 __kmpc_atomic_cmplx10_add_cpt 526 __kmpc_atomic_cmplx10_div 527 __kmpc_atomic_cmplx10_div_cpt 528 __kmpc_atomic_cmplx10_div_cpt_rev 529 __kmpc_atomic_cmplx10_div_rev 530 __kmpc_atomic_cmplx10_mul 531 __kmpc_atomic_cmplx10_mul_cpt 532 __kmpc_atomic_cmplx10_rd 533 __kmpc_atomic_cmplx10_sub 534 __kmpc_atomic_cmplx10_sub_cpt 535 __kmpc_atomic_cmplx10_sub_cpt_rev 536 __kmpc_atomic_cmplx10_sub_rev 537 __kmpc_atomic_cmplx10_swp 538 __kmpc_atomic_cmplx10_wr 539 __kmpc_atomic_cmplx16_add 540 __kmpc_atomic_cmplx16_add_cpt 541 __kmpc_atomic_cmplx16_div 542 __kmpc_atomic_cmplx16_div_cpt 543 __kmpc_atomic_cmplx16_div_cpt_rev 544 __kmpc_atomic_cmplx16_div_rev 545 __kmpc_atomic_cmplx16_mul 546 __kmpc_atomic_cmplx16_mul_cpt 547 __kmpc_atomic_cmplx16_rd 548 __kmpc_atomic_cmplx16_sub 549 __kmpc_atomic_cmplx16_sub_cpt 550 __kmpc_atomic_cmplx16_sub_cpt_rev 551 __kmpc_atomic_cmplx16_swp 552 __kmpc_atomic_cmplx16_wr 553 @endcode 554 */ 555 556 /*! 557 @ingroup ATOMIC_OPS 558 @{ 559 */ 560 561 /* 562 * Global vars 563 */ 564 565 #ifndef KMP_GOMP_COMPAT 566 int __kmp_atomic_mode = 1; // Intel perf 567 #else 568 int __kmp_atomic_mode = 2; // GOMP compatibility 569 #endif /* KMP_GOMP_COMPAT */ 570 571 KMP_ALIGN(128) 572 573 // Control access to all user coded atomics in Gnu compat mode 574 kmp_atomic_lock_t __kmp_atomic_lock; 575 // Control access to all user coded atomics for 1-byte fixed data types 576 kmp_atomic_lock_t __kmp_atomic_lock_1i; 577 // Control access to all user coded atomics for 2-byte fixed data types 578 kmp_atomic_lock_t __kmp_atomic_lock_2i; 579 // Control access to all user coded atomics for 4-byte fixed data types 580 kmp_atomic_lock_t __kmp_atomic_lock_4i; 581 // Control access to all user coded atomics for kmp_real32 data type 582 kmp_atomic_lock_t __kmp_atomic_lock_4r; 583 // Control access to all user coded atomics for 8-byte fixed data types 584 kmp_atomic_lock_t __kmp_atomic_lock_8i; 585 // Control access to all user coded atomics for kmp_real64 data type 586 kmp_atomic_lock_t __kmp_atomic_lock_8r; 587 // Control access to all user coded atomics for complex byte data type 588 kmp_atomic_lock_t __kmp_atomic_lock_8c; 589 // Control access to all user coded atomics for long double data type 590 kmp_atomic_lock_t __kmp_atomic_lock_10r; 591 // Control access to all user coded atomics for _Quad data type 592 kmp_atomic_lock_t __kmp_atomic_lock_16r; 593 // Control access to all user coded atomics for double complex data type 594 kmp_atomic_lock_t __kmp_atomic_lock_16c; 595 // Control access to all user coded atomics for long double complex type 596 kmp_atomic_lock_t __kmp_atomic_lock_20c; 597 // Control access to all user coded atomics for _Quad complex data type 598 kmp_atomic_lock_t __kmp_atomic_lock_32c; 599 600 /* 2007-03-02: 601 Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug 602 on *_32 and *_32e. This is just a temporary workaround for the problem. It 603 seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines 604 in assembler language. */ 605 #define KMP_ATOMIC_VOLATILE volatile 606 607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD 608 609 static inline Quad_a4_t operator+(Quad_a4_t &lhs, Quad_a4_t &rhs) { 610 return lhs.q + rhs.q; 611 } 612 static inline Quad_a4_t operator-(Quad_a4_t &lhs, Quad_a4_t &rhs) { 613 return lhs.q - rhs.q; 614 } 615 static inline Quad_a4_t operator*(Quad_a4_t &lhs, Quad_a4_t &rhs) { 616 return lhs.q * rhs.q; 617 } 618 static inline Quad_a4_t operator/(Quad_a4_t &lhs, Quad_a4_t &rhs) { 619 return lhs.q / rhs.q; 620 } 621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) { 622 return lhs.q < rhs.q; 623 } 624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) { 625 return lhs.q > rhs.q; 626 } 627 628 static inline Quad_a16_t operator+(Quad_a16_t &lhs, Quad_a16_t &rhs) { 629 return lhs.q + rhs.q; 630 } 631 static inline Quad_a16_t operator-(Quad_a16_t &lhs, Quad_a16_t &rhs) { 632 return lhs.q - rhs.q; 633 } 634 static inline Quad_a16_t operator*(Quad_a16_t &lhs, Quad_a16_t &rhs) { 635 return lhs.q * rhs.q; 636 } 637 static inline Quad_a16_t operator/(Quad_a16_t &lhs, Quad_a16_t &rhs) { 638 return lhs.q / rhs.q; 639 } 640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) { 641 return lhs.q < rhs.q; 642 } 643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) { 644 return lhs.q > rhs.q; 645 } 646 647 static inline kmp_cmplx128_a4_t operator+(kmp_cmplx128_a4_t &lhs, 648 kmp_cmplx128_a4_t &rhs) { 649 return lhs.q + rhs.q; 650 } 651 static inline kmp_cmplx128_a4_t operator-(kmp_cmplx128_a4_t &lhs, 652 kmp_cmplx128_a4_t &rhs) { 653 return lhs.q - rhs.q; 654 } 655 static inline kmp_cmplx128_a4_t operator*(kmp_cmplx128_a4_t &lhs, 656 kmp_cmplx128_a4_t &rhs) { 657 return lhs.q * rhs.q; 658 } 659 static inline kmp_cmplx128_a4_t operator/(kmp_cmplx128_a4_t &lhs, 660 kmp_cmplx128_a4_t &rhs) { 661 return lhs.q / rhs.q; 662 } 663 664 static inline kmp_cmplx128_a16_t operator+(kmp_cmplx128_a16_t &lhs, 665 kmp_cmplx128_a16_t &rhs) { 666 return lhs.q + rhs.q; 667 } 668 static inline kmp_cmplx128_a16_t operator-(kmp_cmplx128_a16_t &lhs, 669 kmp_cmplx128_a16_t &rhs) { 670 return lhs.q - rhs.q; 671 } 672 static inline kmp_cmplx128_a16_t operator*(kmp_cmplx128_a16_t &lhs, 673 kmp_cmplx128_a16_t &rhs) { 674 return lhs.q * rhs.q; 675 } 676 static inline kmp_cmplx128_a16_t operator/(kmp_cmplx128_a16_t &lhs, 677 kmp_cmplx128_a16_t &rhs) { 678 return lhs.q / rhs.q; 679 } 680 681 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD 682 683 // ATOMIC implementation routines ----------------------------------------- 684 // One routine for each operation and operand type. 685 // All routines declarations looks like 686 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs ); 687 688 #define KMP_CHECK_GTID \ 689 if (gtid == KMP_GTID_UNKNOWN) { \ 690 gtid = __kmp_entry_gtid(); \ 691 } // check and get gtid when needed 692 693 // Beginning of a definition (provides name, parameters, gebug trace) 694 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 695 // fixed) 696 // OP_ID - operation identifier (add, sub, mul, ...) 697 // TYPE - operands' type 698 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 699 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 700 TYPE *lhs, TYPE rhs) { \ 701 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 702 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 703 704 // ------------------------------------------------------------------------ 705 // Lock variables used for critical sections for various size operands 706 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat 707 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char 708 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short 709 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int 710 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float 711 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int 712 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double 713 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex 714 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double 715 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad 716 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex 717 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex 718 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex 719 720 // ------------------------------------------------------------------------ 721 // Operation on *lhs, rhs bound by critical section 722 // OP - operator (it's supposed to contain an assignment) 723 // LCK_ID - lock identifier 724 // Note: don't check gtid as it should always be valid 725 // 1, 2-byte - expect valid parameter, other - check before this macro 726 #define OP_CRITICAL(OP, LCK_ID) \ 727 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 728 \ 729 (*lhs) OP(rhs); \ 730 \ 731 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 732 733 #define OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) \ 734 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 735 (*lhs) = (TYPE)((*lhs)OP((TYPE)rhs)); \ 736 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 737 738 // ------------------------------------------------------------------------ 739 // For GNU compatibility, we may need to use a critical section, 740 // even though it is not required by the ISA. 741 // 742 // On IA-32 architecture, all atomic operations except for fixed 4 byte add, 743 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common 744 // critical section. On Intel(R) 64, all atomic operations are done with fetch 745 // and add or compare and exchange. Therefore, the FLAG parameter to this 746 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which 747 // require a critical section, where we predict that they will be implemented 748 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()). 749 // 750 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct, 751 // the FLAG parameter should always be 1. If we know that we will be using 752 // a critical section, then we want to make certain that we use the generic 753 // lock __kmp_atomic_lock to protect the atomic update, and not of of the 754 // locks that are specialized based upon the size or type of the data. 755 // 756 // If FLAG is 0, then we are relying on dead code elimination by the build 757 // compiler to get rid of the useless block of code, and save a needless 758 // branch at runtime. 759 760 #ifdef KMP_GOMP_COMPAT 761 #define OP_GOMP_CRITICAL(OP, FLAG) \ 762 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 763 KMP_CHECK_GTID; \ 764 OP_CRITICAL(OP, 0); \ 765 return; \ 766 } 767 768 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) \ 769 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 770 KMP_CHECK_GTID; \ 771 OP_UPDATE_CRITICAL(TYPE, OP, 0); \ 772 return; \ 773 } 774 #else 775 #define OP_GOMP_CRITICAL(OP, FLAG) 776 #define OP_UPDATE_GOMP_CRITICAL(TYPE, OP, FLAG) 777 #endif /* KMP_GOMP_COMPAT */ 778 779 #if KMP_MIC 780 #define KMP_DO_PAUSE _mm_delay_32(1) 781 #else 782 #define KMP_DO_PAUSE KMP_CPU_PAUSE() 783 #endif /* KMP_MIC */ 784 785 // ------------------------------------------------------------------------ 786 // Operation on *lhs, rhs using "compare_and_store" routine 787 // TYPE - operands' type 788 // BITS - size in bits, used to distinguish low level calls 789 // OP - operator 790 #define OP_CMPXCHG(TYPE, BITS, OP) \ 791 { \ 792 TYPE old_value, new_value; \ 793 old_value = *(TYPE volatile *)lhs; \ 794 new_value = (TYPE)(old_value OP((TYPE)rhs)); \ 795 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 796 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 797 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 798 KMP_DO_PAUSE; \ 799 \ 800 old_value = *(TYPE volatile *)lhs; \ 801 new_value = (TYPE)(old_value OP((TYPE)rhs)); \ 802 } \ 803 } 804 805 #if USE_CMPXCHG_FIX 806 // 2007-06-25: 807 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32 808 // and win_32e are affected (I verified the asm). Compiler ignores the volatile 809 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the 810 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of 811 // the workaround. 812 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 813 { \ 814 struct _sss { \ 815 TYPE cmp; \ 816 kmp_int##BITS *vvv; \ 817 }; \ 818 struct _sss old_value, new_value; \ 819 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \ 820 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \ 821 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 822 new_value.cmp = (TYPE)(old_value.cmp OP rhs); \ 823 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 824 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ 825 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \ 826 KMP_DO_PAUSE; \ 827 \ 828 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ 829 new_value.cmp = (TYPE)(old_value.cmp OP rhs); \ 830 } \ 831 } 832 // end of the first part of the workaround for C78287 833 #endif // USE_CMPXCHG_FIX 834 835 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 836 837 // ------------------------------------------------------------------------ 838 // X86 or X86_64: no alignment problems ==================================== 839 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 840 GOMP_FLAG) \ 841 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 842 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 843 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 844 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 845 } 846 // ------------------------------------------------------------------------- 847 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 848 GOMP_FLAG) \ 849 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 850 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 851 OP_CMPXCHG(TYPE, BITS, OP) \ 852 } 853 #if USE_CMPXCHG_FIX 854 // ------------------------------------------------------------------------- 855 // workaround for C78287 (complex(kind=4) data type) 856 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ 857 MASK, GOMP_FLAG) \ 858 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 859 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 860 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 861 } 862 // end of the second part of the workaround for C78287 863 #endif // USE_CMPXCHG_FIX 864 865 #else 866 // ------------------------------------------------------------------------- 867 // Code for other architectures that don't handle unaligned accesses. 868 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 869 GOMP_FLAG) \ 870 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 871 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 872 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 873 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 874 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 875 } else { \ 876 KMP_CHECK_GTID; \ 877 OP_UPDATE_CRITICAL(TYPE, OP, \ 878 LCK_ID) /* unaligned address - use critical */ \ 879 } \ 880 } 881 // ------------------------------------------------------------------------- 882 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 883 GOMP_FLAG) \ 884 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 885 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 886 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 887 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 888 } else { \ 889 KMP_CHECK_GTID; \ 890 OP_UPDATE_CRITICAL(TYPE, OP, \ 891 LCK_ID) /* unaligned address - use critical */ \ 892 } \ 893 } 894 #if USE_CMPXCHG_FIX 895 // ------------------------------------------------------------------------- 896 // workaround for C78287 (complex(kind=4) data type) 897 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ 898 MASK, GOMP_FLAG) \ 899 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 900 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 901 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 902 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 903 } else { \ 904 KMP_CHECK_GTID; \ 905 OP_UPDATE_CRITICAL(TYPE, OP, \ 906 LCK_ID) /* unaligned address - use critical */ \ 907 } \ 908 } 909 // end of the second part of the workaround for C78287 910 #endif // USE_CMPXCHG_FIX 911 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 912 913 // Routines for ATOMIC 4-byte operands addition and subtraction 914 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3, 915 0) // __kmpc_atomic_fixed4_add 916 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3, 917 0) // __kmpc_atomic_fixed4_sub 918 919 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3, 920 KMP_ARCH_X86) // __kmpc_atomic_float4_add 921 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3, 922 KMP_ARCH_X86) // __kmpc_atomic_float4_sub 923 924 // Routines for ATOMIC 8-byte operands addition and subtraction 925 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7, 926 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add 927 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7, 928 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub 929 930 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7, 931 KMP_ARCH_X86) // __kmpc_atomic_float8_add 932 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7, 933 KMP_ARCH_X86) // __kmpc_atomic_float8_sub 934 935 // ------------------------------------------------------------------------ 936 // Entries definition for integer operands 937 // TYPE_ID - operands type and size (fixed4, float4) 938 // OP_ID - operation identifier (add, sub, mul, ...) 939 // TYPE - operand type 940 // BITS - size in bits, used to distinguish low level calls 941 // OP - operator (used in critical section) 942 // LCK_ID - lock identifier, used to possibly distinguish lock variable 943 // MASK - used for alignment check 944 945 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG 946 // ------------------------------------------------------------------------ 947 // Routines for ATOMIC integer operands, other operators 948 // ------------------------------------------------------------------------ 949 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 950 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0, 951 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add 952 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0, 953 0) // __kmpc_atomic_fixed1_andb 954 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0, 955 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div 956 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0, 957 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div 958 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0, 959 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul 960 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0, 961 0) // __kmpc_atomic_fixed1_orb 962 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0, 963 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl 964 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0, 965 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr 966 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, 967 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr 968 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0, 969 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub 970 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0, 971 0) // __kmpc_atomic_fixed1_xor 972 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1, 973 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add 974 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1, 975 0) // __kmpc_atomic_fixed2_andb 976 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1, 977 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div 978 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1, 979 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div 980 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1, 981 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul 982 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1, 983 0) // __kmpc_atomic_fixed2_orb 984 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1, 985 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl 986 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1, 987 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr 988 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, 989 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr 990 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1, 991 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub 992 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1, 993 0) // __kmpc_atomic_fixed2_xor 994 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3, 995 0) // __kmpc_atomic_fixed4_andb 996 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3, 997 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div 998 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3, 999 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div 1000 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3, 1001 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul 1002 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3, 1003 0) // __kmpc_atomic_fixed4_orb 1004 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3, 1005 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl 1006 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3, 1007 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr 1008 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, 1009 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr 1010 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3, 1011 0) // __kmpc_atomic_fixed4_xor 1012 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7, 1013 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb 1014 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7, 1015 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div 1016 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7, 1017 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div 1018 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7, 1019 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul 1020 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7, 1021 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb 1022 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7, 1023 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl 1024 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7, 1025 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr 1026 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, 1027 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr 1028 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7, 1029 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor 1030 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3, 1031 KMP_ARCH_X86) // __kmpc_atomic_float4_div 1032 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3, 1033 KMP_ARCH_X86) // __kmpc_atomic_float4_mul 1034 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7, 1035 KMP_ARCH_X86) // __kmpc_atomic_float8_div 1036 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7, 1037 KMP_ARCH_X86) // __kmpc_atomic_float8_mul 1038 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 1039 1040 /* ------------------------------------------------------------------------ */ 1041 /* Routines for C/C++ Reduction operators && and || */ 1042 1043 // ------------------------------------------------------------------------ 1044 // Need separate macros for &&, || because there is no combined assignment 1045 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used 1046 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1047 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1048 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1049 OP_CRITICAL(= *lhs OP, LCK_ID) \ 1050 } 1051 1052 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1053 1054 // ------------------------------------------------------------------------ 1055 // X86 or X86_64: no alignment problems =================================== 1056 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ 1057 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1058 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1059 OP_CMPXCHG(TYPE, BITS, OP) \ 1060 } 1061 1062 #else 1063 // ------------------------------------------------------------------------ 1064 // Code for other architectures that don't handle unaligned accesses. 1065 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ 1066 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1067 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ 1068 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1069 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1070 } else { \ 1071 KMP_CHECK_GTID; \ 1072 OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \ 1073 } \ 1074 } 1075 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1076 1077 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0, 1078 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl 1079 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0, 1080 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl 1081 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1, 1082 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl 1083 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1, 1084 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl 1085 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3, 1086 0) // __kmpc_atomic_fixed4_andl 1087 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3, 1088 0) // __kmpc_atomic_fixed4_orl 1089 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7, 1090 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl 1091 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7, 1092 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl 1093 1094 /* ------------------------------------------------------------------------- */ 1095 /* Routines for Fortran operators that matched no one in C: */ 1096 /* MAX, MIN, .EQV., .NEQV. */ 1097 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */ 1098 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */ 1099 1100 // ------------------------------------------------------------------------- 1101 // MIN and MAX need separate macros 1102 // OP - operator to check if we need any actions? 1103 #define MIN_MAX_CRITSECT(OP, LCK_ID) \ 1104 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1105 \ 1106 if (*lhs OP rhs) { /* still need actions? */ \ 1107 *lhs = rhs; \ 1108 } \ 1109 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1110 1111 // ------------------------------------------------------------------------- 1112 #ifdef KMP_GOMP_COMPAT 1113 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \ 1114 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1115 KMP_CHECK_GTID; \ 1116 MIN_MAX_CRITSECT(OP, 0); \ 1117 return; \ 1118 } 1119 #else 1120 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) 1121 #endif /* KMP_GOMP_COMPAT */ 1122 1123 // ------------------------------------------------------------------------- 1124 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ 1125 { \ 1126 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1127 TYPE old_value; \ 1128 temp_val = *lhs; \ 1129 old_value = temp_val; \ 1130 while (old_value OP rhs && /* still need actions? */ \ 1131 !KMP_COMPARE_AND_STORE_ACQ##BITS( \ 1132 (kmp_int##BITS *)lhs, \ 1133 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 1134 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ 1135 KMP_CPU_PAUSE(); \ 1136 temp_val = *lhs; \ 1137 old_value = temp_val; \ 1138 } \ 1139 } 1140 1141 // ------------------------------------------------------------------------- 1142 // 1-byte, 2-byte operands - use critical section 1143 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1144 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1145 if (*lhs OP rhs) { /* need actions? */ \ 1146 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1147 MIN_MAX_CRITSECT(OP, LCK_ID) \ 1148 } \ 1149 } 1150 1151 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1152 1153 // ------------------------------------------------------------------------- 1154 // X86 or X86_64: no alignment problems ==================================== 1155 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1156 GOMP_FLAG) \ 1157 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1158 if (*lhs OP rhs) { \ 1159 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1160 MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ 1161 } \ 1162 } 1163 1164 #else 1165 // ------------------------------------------------------------------------- 1166 // Code for other architectures that don't handle unaligned accesses. 1167 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1168 GOMP_FLAG) \ 1169 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1170 if (*lhs OP rhs) { \ 1171 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ 1172 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1173 MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1174 } else { \ 1175 KMP_CHECK_GTID; \ 1176 MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \ 1177 } \ 1178 } \ 1179 } 1180 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1181 1182 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0, 1183 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max 1184 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0, 1185 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min 1186 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1, 1187 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max 1188 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1, 1189 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min 1190 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3, 1191 0) // __kmpc_atomic_fixed4_max 1192 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3, 1193 0) // __kmpc_atomic_fixed4_min 1194 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7, 1195 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max 1196 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7, 1197 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min 1198 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3, 1199 KMP_ARCH_X86) // __kmpc_atomic_float4_max 1200 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3, 1201 KMP_ARCH_X86) // __kmpc_atomic_float4_min 1202 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7, 1203 KMP_ARCH_X86) // __kmpc_atomic_float8_max 1204 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7, 1205 KMP_ARCH_X86) // __kmpc_atomic_float8_min 1206 #if KMP_HAVE_QUAD 1207 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r, 1208 1) // __kmpc_atomic_float16_max 1209 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r, 1210 1) // __kmpc_atomic_float16_min 1211 #if (KMP_ARCH_X86) 1212 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r, 1213 1) // __kmpc_atomic_float16_max_a16 1214 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r, 1215 1) // __kmpc_atomic_float16_min_a16 1216 #endif // (KMP_ARCH_X86) 1217 #endif // KMP_HAVE_QUAD 1218 // ------------------------------------------------------------------------ 1219 // Need separate macros for .EQV. because of the need of complement (~) 1220 // OP ignored for critical sections, ^=~ used instead 1221 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1222 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1223 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \ 1224 OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* send assignment and complement */ \ 1225 } 1226 1227 // ------------------------------------------------------------------------ 1228 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1229 // ------------------------------------------------------------------------ 1230 // X86 or X86_64: no alignment problems =================================== 1231 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1232 GOMP_FLAG) \ 1233 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1234 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \ 1235 OP_CMPXCHG(TYPE, BITS, OP) \ 1236 } 1237 // ------------------------------------------------------------------------ 1238 #else 1239 // ------------------------------------------------------------------------ 1240 // Code for other architectures that don't handle unaligned accesses. 1241 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ 1242 GOMP_FLAG) \ 1243 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1244 OP_GOMP_CRITICAL(^= (TYPE) ~, GOMP_FLAG) \ 1245 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1246 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1247 } else { \ 1248 KMP_CHECK_GTID; \ 1249 OP_CRITICAL(^= (TYPE) ~, LCK_ID) /* unaligned address - use critical */ \ 1250 } \ 1251 } 1252 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1253 1254 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0, 1255 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv 1256 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1, 1257 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv 1258 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3, 1259 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv 1260 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7, 1261 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv 1262 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, 1263 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv 1264 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, 1265 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv 1266 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, 1267 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv 1268 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, 1269 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv 1270 1271 // ------------------------------------------------------------------------ 1272 // Routines for Extended types: long double, _Quad, complex flavours (use 1273 // critical section) 1274 // TYPE_ID, OP_ID, TYPE - detailed above 1275 // OP - operator 1276 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1277 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1278 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 1279 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 1280 OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \ 1281 } 1282 1283 /* ------------------------------------------------------------------------- */ 1284 // routines for long double type 1285 ATOMIC_CRITICAL(float10, add, long double, +, 10r, 1286 1) // __kmpc_atomic_float10_add 1287 ATOMIC_CRITICAL(float10, sub, long double, -, 10r, 1288 1) // __kmpc_atomic_float10_sub 1289 ATOMIC_CRITICAL(float10, mul, long double, *, 10r, 1290 1) // __kmpc_atomic_float10_mul 1291 ATOMIC_CRITICAL(float10, div, long double, /, 10r, 1292 1) // __kmpc_atomic_float10_div 1293 #if KMP_HAVE_QUAD 1294 // routines for _Quad type 1295 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r, 1296 1) // __kmpc_atomic_float16_add 1297 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r, 1298 1) // __kmpc_atomic_float16_sub 1299 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r, 1300 1) // __kmpc_atomic_float16_mul 1301 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r, 1302 1) // __kmpc_atomic_float16_div 1303 #if (KMP_ARCH_X86) 1304 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r, 1305 1) // __kmpc_atomic_float16_add_a16 1306 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r, 1307 1) // __kmpc_atomic_float16_sub_a16 1308 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r, 1309 1) // __kmpc_atomic_float16_mul_a16 1310 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r, 1311 1) // __kmpc_atomic_float16_div_a16 1312 #endif // (KMP_ARCH_X86) 1313 #endif // KMP_HAVE_QUAD 1314 // routines for complex types 1315 1316 #if USE_CMPXCHG_FIX 1317 // workaround for C78287 (complex(kind=4) data type) 1318 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1319 1) // __kmpc_atomic_cmplx4_add 1320 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1321 1) // __kmpc_atomic_cmplx4_sub 1322 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1323 1) // __kmpc_atomic_cmplx4_mul 1324 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1325 1) // __kmpc_atomic_cmplx4_div 1326 // end of the workaround for C78287 1327 #else 1328 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add 1329 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub 1330 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul 1331 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div 1332 #endif // USE_CMPXCHG_FIX 1333 1334 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add 1335 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub 1336 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul 1337 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div 1338 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c, 1339 1) // __kmpc_atomic_cmplx10_add 1340 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c, 1341 1) // __kmpc_atomic_cmplx10_sub 1342 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c, 1343 1) // __kmpc_atomic_cmplx10_mul 1344 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c, 1345 1) // __kmpc_atomic_cmplx10_div 1346 #if KMP_HAVE_QUAD 1347 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c, 1348 1) // __kmpc_atomic_cmplx16_add 1349 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c, 1350 1) // __kmpc_atomic_cmplx16_sub 1351 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c, 1352 1) // __kmpc_atomic_cmplx16_mul 1353 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c, 1354 1) // __kmpc_atomic_cmplx16_div 1355 #if (KMP_ARCH_X86) 1356 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1357 1) // __kmpc_atomic_cmplx16_add_a16 1358 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1359 1) // __kmpc_atomic_cmplx16_sub_a16 1360 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1361 1) // __kmpc_atomic_cmplx16_mul_a16 1362 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1363 1) // __kmpc_atomic_cmplx16_div_a16 1364 #endif // (KMP_ARCH_X86) 1365 #endif // KMP_HAVE_QUAD 1366 1367 // OpenMP 4.0: x = expr binop x for non-commutative operations. 1368 // Supported only on IA-32 architecture and Intel(R) 64 1369 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1370 1371 // ------------------------------------------------------------------------ 1372 // Operation on *lhs, rhs bound by critical section 1373 // OP - operator (it's supposed to contain an assignment) 1374 // LCK_ID - lock identifier 1375 // Note: don't check gtid as it should always be valid 1376 // 1, 2-byte - expect valid parameter, other - check before this macro 1377 #define OP_CRITICAL_REV(TYPE, OP, LCK_ID) \ 1378 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1379 \ 1380 (*lhs) = (TYPE)((rhs)OP(*lhs)); \ 1381 \ 1382 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1383 1384 #ifdef KMP_GOMP_COMPAT 1385 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) \ 1386 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1387 KMP_CHECK_GTID; \ 1388 OP_CRITICAL_REV(TYPE, OP, 0); \ 1389 return; \ 1390 } 1391 1392 #else 1393 #define OP_GOMP_CRITICAL_REV(TYPE, OP, FLAG) 1394 #endif /* KMP_GOMP_COMPAT */ 1395 1396 // Beginning of a definition (provides name, parameters, gebug trace) 1397 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 1398 // fixed) 1399 // OP_ID - operation identifier (add, sub, mul, ...) 1400 // TYPE - operands' type 1401 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 1402 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \ 1403 TYPE *lhs, TYPE rhs) { \ 1404 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1405 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid)); 1406 1407 // ------------------------------------------------------------------------ 1408 // Operation on *lhs, rhs using "compare_and_store" routine 1409 // TYPE - operands' type 1410 // BITS - size in bits, used to distinguish low level calls 1411 // OP - operator 1412 // Note: temp_val introduced in order to force the compiler to read 1413 // *lhs only once (w/o it the compiler reads *lhs twice) 1414 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1415 { \ 1416 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1417 TYPE old_value, new_value; \ 1418 temp_val = *lhs; \ 1419 old_value = temp_val; \ 1420 new_value = (TYPE)(rhs OP old_value); \ 1421 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 1422 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 1423 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 1424 KMP_DO_PAUSE; \ 1425 \ 1426 temp_val = *lhs; \ 1427 old_value = temp_val; \ 1428 new_value = (TYPE)(rhs OP old_value); \ 1429 } \ 1430 } 1431 1432 // ------------------------------------------------------------------------- 1433 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \ 1434 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ 1435 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ 1436 OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1437 } 1438 1439 // ------------------------------------------------------------------------ 1440 // Entries definition for integer operands 1441 // TYPE_ID - operands type and size (fixed4, float4) 1442 // OP_ID - operation identifier (add, sub, mul, ...) 1443 // TYPE - operand type 1444 // BITS - size in bits, used to distinguish low level calls 1445 // OP - operator (used in critical section) 1446 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1447 1448 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG 1449 // ------------------------------------------------------------------------ 1450 // Routines for ATOMIC integer operands, other operators 1451 // ------------------------------------------------------------------------ 1452 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG 1453 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i, 1454 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev 1455 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i, 1456 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev 1457 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i, 1458 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev 1459 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i, 1460 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev 1461 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i, 1462 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev 1463 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i, 1464 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev 1465 1466 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i, 1467 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev 1468 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i, 1469 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev 1470 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i, 1471 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev 1472 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i, 1473 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev 1474 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1475 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev 1476 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i, 1477 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev 1478 1479 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i, 1480 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev 1481 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i, 1482 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev 1483 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i, 1484 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev 1485 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i, 1486 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev 1487 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i, 1488 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev 1489 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i, 1490 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev 1491 1492 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i, 1493 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev 1494 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i, 1495 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev 1496 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i, 1497 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev 1498 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i, 1499 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev 1500 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i, 1501 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev 1502 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i, 1503 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev 1504 1505 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r, 1506 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev 1507 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r, 1508 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev 1509 1510 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r, 1511 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev 1512 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r, 1513 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev 1514 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG 1515 1516 // ------------------------------------------------------------------------ 1517 // Routines for Extended types: long double, _Quad, complex flavours (use 1518 // critical section) 1519 // TYPE_ID, OP_ID, TYPE - detailed above 1520 // OP - operator 1521 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1522 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1523 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ 1524 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ 1525 OP_CRITICAL_REV(TYPE, OP, LCK_ID) \ 1526 } 1527 1528 /* ------------------------------------------------------------------------- */ 1529 // routines for long double type 1530 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r, 1531 1) // __kmpc_atomic_float10_sub_rev 1532 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r, 1533 1) // __kmpc_atomic_float10_div_rev 1534 #if KMP_HAVE_QUAD 1535 // routines for _Quad type 1536 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r, 1537 1) // __kmpc_atomic_float16_sub_rev 1538 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r, 1539 1) // __kmpc_atomic_float16_div_rev 1540 #if (KMP_ARCH_X86) 1541 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r, 1542 1) // __kmpc_atomic_float16_sub_a16_rev 1543 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r, 1544 1) // __kmpc_atomic_float16_div_a16_rev 1545 #endif // KMP_ARCH_X86 1546 #endif // KMP_HAVE_QUAD 1547 1548 // routines for complex types 1549 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c, 1550 1) // __kmpc_atomic_cmplx4_sub_rev 1551 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c, 1552 1) // __kmpc_atomic_cmplx4_div_rev 1553 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c, 1554 1) // __kmpc_atomic_cmplx8_sub_rev 1555 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c, 1556 1) // __kmpc_atomic_cmplx8_div_rev 1557 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c, 1558 1) // __kmpc_atomic_cmplx10_sub_rev 1559 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c, 1560 1) // __kmpc_atomic_cmplx10_div_rev 1561 #if KMP_HAVE_QUAD 1562 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c, 1563 1) // __kmpc_atomic_cmplx16_sub_rev 1564 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c, 1565 1) // __kmpc_atomic_cmplx16_div_rev 1566 #if (KMP_ARCH_X86) 1567 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1568 1) // __kmpc_atomic_cmplx16_sub_a16_rev 1569 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1570 1) // __kmpc_atomic_cmplx16_div_a16_rev 1571 #endif // KMP_ARCH_X86 1572 #endif // KMP_HAVE_QUAD 1573 1574 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 1575 // End of OpenMP 4.0: x = expr binop x for non-commutative operations. 1576 1577 /* ------------------------------------------------------------------------ */ 1578 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */ 1579 /* Note: in order to reduce the total number of types combinations */ 1580 /* it is supposed that compiler converts RHS to longest floating type,*/ 1581 /* that is _Quad, before call to any of these routines */ 1582 /* Conversion to _Quad will be done by the compiler during calculation, */ 1583 /* conversion back to TYPE - before the assignment, like: */ 1584 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */ 1585 /* Performance penalty expected because of SW emulation use */ 1586 /* ------------------------------------------------------------------------ */ 1587 1588 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1589 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ 1590 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \ 1591 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1592 KA_TRACE(100, \ 1593 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ 1594 gtid)); 1595 1596 // ------------------------------------------------------------------------- 1597 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \ 1598 GOMP_FLAG) \ 1599 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1600 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 1601 OP_UPDATE_CRITICAL(TYPE, OP, LCK_ID) /* send assignment */ \ 1602 } 1603 1604 // ------------------------------------------------------------------------- 1605 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1606 // ------------------------------------------------------------------------- 1607 // X86 or X86_64: no alignment problems ==================================== 1608 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1609 LCK_ID, MASK, GOMP_FLAG) \ 1610 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1611 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1612 OP_CMPXCHG(TYPE, BITS, OP) \ 1613 } 1614 // ------------------------------------------------------------------------- 1615 #else 1616 // ------------------------------------------------------------------------ 1617 // Code for other architectures that don't handle unaligned accesses. 1618 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1619 LCK_ID, MASK, GOMP_FLAG) \ 1620 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1621 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1622 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1623 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1624 } else { \ 1625 KMP_CHECK_GTID; \ 1626 OP_UPDATE_CRITICAL(TYPE, OP, \ 1627 LCK_ID) /* unaligned address - use critical */ \ 1628 } \ 1629 } 1630 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1631 1632 // ------------------------------------------------------------------------- 1633 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1634 // ------------------------------------------------------------------------- 1635 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 1636 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 1637 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1638 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ 1639 OP_CMPXCHG_REV(TYPE, BITS, OP) \ 1640 } 1641 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 1642 LCK_ID, GOMP_FLAG) \ 1643 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1644 OP_GOMP_CRITICAL_REV(TYPE, OP, GOMP_FLAG) \ 1645 OP_CRITICAL_REV(TYPE, OP, LCK_ID) \ 1646 } 1647 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1648 1649 // RHS=float8 1650 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, 1651 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8 1652 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, 1653 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8 1654 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, 1655 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8 1656 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, 1657 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8 1658 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 1659 0) // __kmpc_atomic_fixed4_mul_float8 1660 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 1661 0) // __kmpc_atomic_fixed4_div_float8 1662 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, 1663 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8 1664 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, 1665 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8 1666 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, 1667 KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8 1668 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, 1669 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8 1670 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, 1671 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8 1672 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, 1673 KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8 1674 1675 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not 1676 // use them) 1677 #if KMP_HAVE_QUAD 1678 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0, 1679 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp 1680 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0, 1681 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp 1682 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, 1683 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp 1684 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0, 1685 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp 1686 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, 1687 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp 1688 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0, 1689 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp 1690 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0, 1691 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp 1692 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, 1693 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp 1694 1695 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1, 1696 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp 1697 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1, 1698 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp 1699 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, 1700 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp 1701 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1, 1702 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp 1703 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, 1704 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp 1705 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1, 1706 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp 1707 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1, 1708 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp 1709 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, 1710 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp 1711 1712 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 1713 0) // __kmpc_atomic_fixed4_add_fp 1714 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3, 1715 0) // __kmpc_atomic_fixed4u_add_fp 1716 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 1717 0) // __kmpc_atomic_fixed4_sub_fp 1718 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3, 1719 0) // __kmpc_atomic_fixed4u_sub_fp 1720 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 1721 0) // __kmpc_atomic_fixed4_mul_fp 1722 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3, 1723 0) // __kmpc_atomic_fixed4u_mul_fp 1724 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 1725 0) // __kmpc_atomic_fixed4_div_fp 1726 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 1727 0) // __kmpc_atomic_fixed4u_div_fp 1728 1729 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, 1730 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp 1731 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7, 1732 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp 1733 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, 1734 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp 1735 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7, 1736 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp 1737 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, 1738 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp 1739 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7, 1740 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp 1741 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, 1742 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp 1743 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, 1744 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp 1745 1746 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, 1747 KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp 1748 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, 1749 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp 1750 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, 1751 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp 1752 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, 1753 KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp 1754 1755 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, 1756 KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp 1757 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, 1758 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp 1759 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, 1760 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp 1761 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, 1762 KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp 1763 1764 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r, 1765 1) // __kmpc_atomic_float10_add_fp 1766 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r, 1767 1) // __kmpc_atomic_float10_sub_fp 1768 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r, 1769 1) // __kmpc_atomic_float10_mul_fp 1770 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r, 1771 1) // __kmpc_atomic_float10_div_fp 1772 1773 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1774 // Reverse operations 1775 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0, 1776 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp 1777 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0, 1778 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp 1779 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0, 1780 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp 1781 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0, 1782 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp 1783 1784 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1, 1785 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp 1786 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1, 1787 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp 1788 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1, 1789 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp 1790 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1, 1791 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp 1792 1793 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3, 1794 0) // __kmpc_atomic_fixed4_sub_rev_fp 1795 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3, 1796 0) // __kmpc_atomic_fixed4u_sub_rev_fp 1797 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3, 1798 0) // __kmpc_atomic_fixed4_div_rev_fp 1799 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3, 1800 0) // __kmpc_atomic_fixed4u_div_rev_fp 1801 1802 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7, 1803 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp 1804 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7, 1805 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp 1806 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7, 1807 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp 1808 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7, 1809 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp 1810 1811 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3, 1812 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp 1813 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3, 1814 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp 1815 1816 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7, 1817 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp 1818 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7, 1819 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp 1820 1821 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r, 1822 1) // __kmpc_atomic_float10_sub_rev_fp 1823 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r, 1824 1) // __kmpc_atomic_float10_div_rev_fp 1825 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1826 1827 #endif // KMP_HAVE_QUAD 1828 1829 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1830 // ------------------------------------------------------------------------ 1831 // X86 or X86_64: no alignment problems ==================================== 1832 #if USE_CMPXCHG_FIX 1833 // workaround for C78287 (complex(kind=4) data type) 1834 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1835 LCK_ID, MASK, GOMP_FLAG) \ 1836 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1837 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1838 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ 1839 } 1840 // end of the second part of the workaround for C78287 1841 #else 1842 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1843 LCK_ID, MASK, GOMP_FLAG) \ 1844 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1845 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1846 OP_CMPXCHG(TYPE, BITS, OP) \ 1847 } 1848 #endif // USE_CMPXCHG_FIX 1849 #else 1850 // ------------------------------------------------------------------------ 1851 // Code for other architectures that don't handle unaligned accesses. 1852 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ 1853 LCK_ID, MASK, GOMP_FLAG) \ 1854 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ 1855 OP_UPDATE_GOMP_CRITICAL(TYPE, OP, GOMP_FLAG) \ 1856 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ 1857 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ 1858 } else { \ 1859 KMP_CHECK_GTID; \ 1860 OP_UPDATE_CRITICAL(TYPE, OP, \ 1861 LCK_ID) /* unaligned address - use critical */ \ 1862 } \ 1863 } 1864 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1865 1866 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 1867 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8 1868 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 1869 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8 1870 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 1871 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8 1872 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 1873 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8 1874 1875 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 1876 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1877 1878 // ------------------------------------------------------------------------ 1879 // Atomic READ routines 1880 1881 // ------------------------------------------------------------------------ 1882 // Beginning of a definition (provides name, parameters, gebug trace) 1883 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 1884 // fixed) 1885 // OP_ID - operation identifier (add, sub, mul, ...) 1886 // TYPE - operands' type 1887 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 1888 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 1889 TYPE *loc) { \ 1890 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 1891 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 1892 1893 // ------------------------------------------------------------------------ 1894 // Operation on *lhs, rhs using "compare_and_store_ret" routine 1895 // TYPE - operands' type 1896 // BITS - size in bits, used to distinguish low level calls 1897 // OP - operator 1898 // Note: temp_val introduced in order to force the compiler to read 1899 // *lhs only once (w/o it the compiler reads *lhs twice) 1900 // TODO: check if it is still necessary 1901 // Return old value regardless of the result of "compare & swap# operation 1902 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \ 1903 { \ 1904 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1905 union f_i_union { \ 1906 TYPE f_val; \ 1907 kmp_int##BITS i_val; \ 1908 }; \ 1909 union f_i_union old_value; \ 1910 temp_val = *loc; \ 1911 old_value.f_val = temp_val; \ 1912 old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \ 1913 (kmp_int##BITS *)loc, \ 1914 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \ 1915 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \ 1916 new_value = old_value.f_val; \ 1917 return new_value; \ 1918 } 1919 1920 // ------------------------------------------------------------------------- 1921 // Operation on *lhs, rhs bound by critical section 1922 // OP - operator (it's supposed to contain an assignment) 1923 // LCK_ID - lock identifier 1924 // Note: don't check gtid as it should always be valid 1925 // 1, 2-byte - expect valid parameter, other - check before this macro 1926 #define OP_CRITICAL_READ(OP, LCK_ID) \ 1927 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1928 \ 1929 new_value = (*loc); \ 1930 \ 1931 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1932 1933 // ------------------------------------------------------------------------- 1934 #ifdef KMP_GOMP_COMPAT 1935 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \ 1936 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1937 KMP_CHECK_GTID; \ 1938 OP_CRITICAL_READ(OP, 0); \ 1939 return new_value; \ 1940 } 1941 #else 1942 #define OP_GOMP_CRITICAL_READ(OP, FLAG) 1943 #endif /* KMP_GOMP_COMPAT */ 1944 1945 // ------------------------------------------------------------------------- 1946 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 1947 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1948 TYPE new_value; \ 1949 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ 1950 new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \ 1951 return new_value; \ 1952 } 1953 // ------------------------------------------------------------------------- 1954 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 1955 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1956 TYPE new_value; \ 1957 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ 1958 OP_CMPXCHG_READ(TYPE, BITS, OP) \ 1959 } 1960 // ------------------------------------------------------------------------ 1961 // Routines for Extended types: long double, _Quad, complex flavours (use 1962 // critical section) 1963 // TYPE_ID, OP_ID, TYPE - detailed above 1964 // OP - operator 1965 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1966 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 1967 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ 1968 TYPE new_value; \ 1969 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \ 1970 OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \ 1971 return new_value; \ 1972 } 1973 1974 // ------------------------------------------------------------------------ 1975 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return 1976 // value doesn't work. 1977 // Let's return the read value through the additional parameter. 1978 #if (KMP_OS_WINDOWS) 1979 1980 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \ 1981 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 1982 \ 1983 (*out) = (*loc); \ 1984 \ 1985 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 1986 // ------------------------------------------------------------------------ 1987 #ifdef KMP_GOMP_COMPAT 1988 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \ 1989 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 1990 KMP_CHECK_GTID; \ 1991 OP_CRITICAL_READ_WRK(OP, 0); \ 1992 } 1993 #else 1994 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) 1995 #endif /* KMP_GOMP_COMPAT */ 1996 // ------------------------------------------------------------------------ 1997 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ 1998 void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \ 1999 TYPE *loc) { \ 2000 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2001 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2002 2003 // ------------------------------------------------------------------------ 2004 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2005 ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ 2006 OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \ 2007 OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \ 2008 } 2009 2010 #endif // KMP_OS_WINDOWS 2011 2012 // ------------------------------------------------------------------------ 2013 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2014 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd 2015 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +, 2016 KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd 2017 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +, 2018 KMP_ARCH_X86) // __kmpc_atomic_float4_rd 2019 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +, 2020 KMP_ARCH_X86) // __kmpc_atomic_float8_rd 2021 2022 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic 2023 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +, 2024 KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd 2025 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +, 2026 KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd 2027 2028 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r, 2029 1) // __kmpc_atomic_float10_rd 2030 #if KMP_HAVE_QUAD 2031 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r, 2032 1) // __kmpc_atomic_float16_rd 2033 #endif // KMP_HAVE_QUAD 2034 2035 // Fix for CQ220361 on Windows* OS 2036 #if (KMP_OS_WINDOWS) 2037 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c, 2038 1) // __kmpc_atomic_cmplx4_rd 2039 #else 2040 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c, 2041 1) // __kmpc_atomic_cmplx4_rd 2042 #endif // (KMP_OS_WINDOWS) 2043 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c, 2044 1) // __kmpc_atomic_cmplx8_rd 2045 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c, 2046 1) // __kmpc_atomic_cmplx10_rd 2047 #if KMP_HAVE_QUAD 2048 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c, 2049 1) // __kmpc_atomic_cmplx16_rd 2050 #if (KMP_ARCH_X86) 2051 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r, 2052 1) // __kmpc_atomic_float16_a16_rd 2053 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 2054 1) // __kmpc_atomic_cmplx16_a16_rd 2055 #endif // (KMP_ARCH_X86) 2056 #endif // KMP_HAVE_QUAD 2057 2058 // ------------------------------------------------------------------------ 2059 // Atomic WRITE routines 2060 2061 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2062 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2063 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2064 KMP_XCHG_FIXED##BITS(lhs, rhs); \ 2065 } 2066 // ------------------------------------------------------------------------ 2067 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2068 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2069 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2070 KMP_XCHG_REAL##BITS(lhs, rhs); \ 2071 } 2072 2073 // ------------------------------------------------------------------------ 2074 // Operation on *lhs, rhs using "compare_and_store" routine 2075 // TYPE - operands' type 2076 // BITS - size in bits, used to distinguish low level calls 2077 // OP - operator 2078 // Note: temp_val introduced in order to force the compiler to read 2079 // *lhs only once (w/o it the compiler reads *lhs twice) 2080 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \ 2081 { \ 2082 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2083 TYPE old_value, new_value; \ 2084 temp_val = *lhs; \ 2085 old_value = temp_val; \ 2086 new_value = rhs; \ 2087 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2088 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2089 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2090 KMP_CPU_PAUSE(); \ 2091 \ 2092 temp_val = *lhs; \ 2093 old_value = temp_val; \ 2094 new_value = rhs; \ 2095 } \ 2096 } 2097 2098 // ------------------------------------------------------------------------- 2099 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2100 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2101 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ 2102 OP_CMPXCHG_WR(TYPE, BITS, OP) \ 2103 } 2104 2105 // ------------------------------------------------------------------------ 2106 // Routines for Extended types: long double, _Quad, complex flavours (use 2107 // critical section) 2108 // TYPE_ID, OP_ID, TYPE - detailed above 2109 // OP - operator 2110 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2111 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2112 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ 2113 OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \ 2114 OP_CRITICAL(OP, LCK_ID) /* send assignment */ \ 2115 } 2116 // ------------------------------------------------------------------------- 2117 2118 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =, 2119 KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr 2120 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =, 2121 KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr 2122 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =, 2123 KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr 2124 #if (KMP_ARCH_X86) 2125 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =, 2126 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr 2127 #else 2128 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =, 2129 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr 2130 #endif // (KMP_ARCH_X86) 2131 2132 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =, 2133 KMP_ARCH_X86) // __kmpc_atomic_float4_wr 2134 #if (KMP_ARCH_X86) 2135 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =, 2136 KMP_ARCH_X86) // __kmpc_atomic_float8_wr 2137 #else 2138 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =, 2139 KMP_ARCH_X86) // __kmpc_atomic_float8_wr 2140 #endif // (KMP_ARCH_X86) 2141 2142 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r, 2143 1) // __kmpc_atomic_float10_wr 2144 #if KMP_HAVE_QUAD 2145 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r, 2146 1) // __kmpc_atomic_float16_wr 2147 #endif // KMP_HAVE_QUAD 2148 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr 2149 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c, 2150 1) // __kmpc_atomic_cmplx8_wr 2151 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c, 2152 1) // __kmpc_atomic_cmplx10_wr 2153 #if KMP_HAVE_QUAD 2154 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c, 2155 1) // __kmpc_atomic_cmplx16_wr 2156 #if (KMP_ARCH_X86) 2157 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r, 2158 1) // __kmpc_atomic_float16_a16_wr 2159 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 2160 1) // __kmpc_atomic_cmplx16_a16_wr 2161 #endif // (KMP_ARCH_X86) 2162 #endif // KMP_HAVE_QUAD 2163 2164 // ------------------------------------------------------------------------ 2165 // Atomic CAPTURE routines 2166 2167 // Beginning of a definition (provides name, parameters, gebug trace) 2168 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 2169 // fixed) 2170 // OP_ID - operation identifier (add, sub, mul, ...) 2171 // TYPE - operands' type 2172 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ 2173 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ 2174 TYPE *lhs, TYPE rhs, int flag) { \ 2175 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2176 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2177 2178 // ------------------------------------------------------------------------- 2179 // Operation on *lhs, rhs bound by critical section 2180 // OP - operator (it's supposed to contain an assignment) 2181 // LCK_ID - lock identifier 2182 // Note: don't check gtid as it should always be valid 2183 // 1, 2-byte - expect valid parameter, other - check before this macro 2184 #define OP_CRITICAL_CPT(OP, LCK_ID) \ 2185 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2186 \ 2187 if (flag) { \ 2188 (*lhs) OP rhs; \ 2189 new_value = (*lhs); \ 2190 } else { \ 2191 new_value = (*lhs); \ 2192 (*lhs) OP rhs; \ 2193 } \ 2194 \ 2195 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2196 return new_value; 2197 2198 #define OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) \ 2199 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2200 \ 2201 if (flag) { \ 2202 (*lhs) = (TYPE)((*lhs)OP rhs); \ 2203 new_value = (*lhs); \ 2204 } else { \ 2205 new_value = (*lhs); \ 2206 (*lhs) = (TYPE)((*lhs)OP rhs); \ 2207 } \ 2208 \ 2209 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2210 return new_value; 2211 2212 // ------------------------------------------------------------------------ 2213 #ifdef KMP_GOMP_COMPAT 2214 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) \ 2215 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2216 KMP_CHECK_GTID; \ 2217 OP_UPDATE_CRITICAL_CPT(TYPE, OP, 0); \ 2218 } 2219 #else 2220 #define OP_GOMP_CRITICAL_CPT(TYPE, OP, FLAG) 2221 #endif /* KMP_GOMP_COMPAT */ 2222 2223 // ------------------------------------------------------------------------ 2224 // Operation on *lhs, rhs using "compare_and_store" routine 2225 // TYPE - operands' type 2226 // BITS - size in bits, used to distinguish low level calls 2227 // OP - operator 2228 // Note: temp_val introduced in order to force the compiler to read 2229 // *lhs only once (w/o it the compiler reads *lhs twice) 2230 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2231 { \ 2232 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2233 TYPE old_value, new_value; \ 2234 temp_val = *lhs; \ 2235 old_value = temp_val; \ 2236 new_value = (TYPE)(old_value OP rhs); \ 2237 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2238 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2239 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2240 KMP_CPU_PAUSE(); \ 2241 \ 2242 temp_val = *lhs; \ 2243 old_value = temp_val; \ 2244 new_value = (TYPE)(old_value OP rhs); \ 2245 } \ 2246 if (flag) { \ 2247 return new_value; \ 2248 } else \ 2249 return old_value; \ 2250 } 2251 2252 // ------------------------------------------------------------------------- 2253 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2254 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2255 TYPE new_value; \ 2256 (void)new_value; \ 2257 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \ 2258 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2259 } 2260 2261 // ------------------------------------------------------------------------- 2262 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2263 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2264 TYPE old_value, new_value; \ 2265 (void)new_value; \ 2266 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \ 2267 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 2268 old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ 2269 if (flag) { \ 2270 return old_value OP rhs; \ 2271 } else \ 2272 return old_value; \ 2273 } 2274 // ------------------------------------------------------------------------- 2275 2276 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +, 2277 0) // __kmpc_atomic_fixed4_add_cpt 2278 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -, 2279 0) // __kmpc_atomic_fixed4_sub_cpt 2280 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +, 2281 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt 2282 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -, 2283 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt 2284 2285 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +, 2286 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt 2287 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -, 2288 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt 2289 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +, 2290 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt 2291 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -, 2292 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt 2293 2294 // ------------------------------------------------------------------------ 2295 // Entries definition for integer operands 2296 // TYPE_ID - operands type and size (fixed4, float4) 2297 // OP_ID - operation identifier (add, sub, mul, ...) 2298 // TYPE - operand type 2299 // BITS - size in bits, used to distinguish low level calls 2300 // OP - operator (used in critical section) 2301 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG 2302 // ------------------------------------------------------------------------ 2303 // Routines for ATOMIC integer operands, other operators 2304 // ------------------------------------------------------------------------ 2305 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2306 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +, 2307 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt 2308 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &, 2309 0) // __kmpc_atomic_fixed1_andb_cpt 2310 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /, 2311 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt 2312 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /, 2313 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt 2314 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *, 2315 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt 2316 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |, 2317 0) // __kmpc_atomic_fixed1_orb_cpt 2318 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<, 2319 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt 2320 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>, 2321 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt 2322 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>, 2323 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt 2324 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -, 2325 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt 2326 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^, 2327 0) // __kmpc_atomic_fixed1_xor_cpt 2328 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +, 2329 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt 2330 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &, 2331 0) // __kmpc_atomic_fixed2_andb_cpt 2332 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /, 2333 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt 2334 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /, 2335 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt 2336 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *, 2337 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt 2338 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |, 2339 0) // __kmpc_atomic_fixed2_orb_cpt 2340 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<, 2341 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt 2342 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>, 2343 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt 2344 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>, 2345 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt 2346 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -, 2347 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt 2348 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^, 2349 0) // __kmpc_atomic_fixed2_xor_cpt 2350 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &, 2351 0) // __kmpc_atomic_fixed4_andb_cpt 2352 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /, 2353 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt 2354 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /, 2355 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt 2356 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *, 2357 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt 2358 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |, 2359 0) // __kmpc_atomic_fixed4_orb_cpt 2360 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<, 2361 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt 2362 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>, 2363 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt 2364 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>, 2365 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt 2366 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^, 2367 0) // __kmpc_atomic_fixed4_xor_cpt 2368 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &, 2369 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt 2370 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /, 2371 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt 2372 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /, 2373 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt 2374 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *, 2375 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt 2376 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |, 2377 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt 2378 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<, 2379 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt 2380 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>, 2381 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt 2382 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>, 2383 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt 2384 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^, 2385 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt 2386 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /, 2387 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt 2388 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *, 2389 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt 2390 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /, 2391 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt 2392 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *, 2393 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt 2394 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2395 2396 // CAPTURE routines for mixed types RHS=float16 2397 #if KMP_HAVE_QUAD 2398 2399 // Beginning of a definition (provides name, parameters, gebug trace) 2400 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 2401 // fixed) 2402 // OP_ID - operation identifier (add, sub, mul, ...) 2403 // TYPE - operands' type 2404 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2405 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ 2406 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \ 2407 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2408 KA_TRACE(100, \ 2409 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ 2410 gtid)); 2411 2412 // ------------------------------------------------------------------------- 2413 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 2414 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 2415 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2416 TYPE new_value; \ 2417 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) \ 2418 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2419 } 2420 2421 // ------------------------------------------------------------------------- 2422 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 2423 LCK_ID, GOMP_FLAG) \ 2424 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 2425 TYPE new_value; \ 2426 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 2427 OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \ 2428 } 2429 2430 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0, 2431 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp 2432 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0, 2433 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp 2434 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0, 2435 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp 2436 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0, 2437 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp 2438 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0, 2439 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp 2440 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0, 2441 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp 2442 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0, 2443 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp 2444 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0, 2445 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp 2446 2447 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1, 2448 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp 2449 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1, 2450 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp 2451 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1, 2452 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp 2453 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1, 2454 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp 2455 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1, 2456 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp 2457 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1, 2458 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp 2459 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1, 2460 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp 2461 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1, 2462 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp 2463 2464 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3, 2465 0) // __kmpc_atomic_fixed4_add_cpt_fp 2466 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3, 2467 0) // __kmpc_atomic_fixed4u_add_cpt_fp 2468 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 2469 0) // __kmpc_atomic_fixed4_sub_cpt_fp 2470 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 2471 0) // __kmpc_atomic_fixed4u_sub_cpt_fp 2472 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 2473 0) // __kmpc_atomic_fixed4_mul_cpt_fp 2474 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 2475 0) // __kmpc_atomic_fixed4u_mul_cpt_fp 2476 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3, 2477 0) // __kmpc_atomic_fixed4_div_cpt_fp 2478 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3, 2479 0) // __kmpc_atomic_fixed4u_div_cpt_fp 2480 2481 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7, 2482 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp 2483 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7, 2484 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp 2485 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7, 2486 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp 2487 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7, 2488 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp 2489 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7, 2490 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp 2491 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7, 2492 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp 2493 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7, 2494 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp 2495 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7, 2496 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp 2497 2498 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3, 2499 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp 2500 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3, 2501 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp 2502 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3, 2503 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp 2504 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3, 2505 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp 2506 2507 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7, 2508 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp 2509 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7, 2510 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp 2511 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7, 2512 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp 2513 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7, 2514 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp 2515 2516 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r, 2517 1) // __kmpc_atomic_float10_add_cpt_fp 2518 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r, 2519 1) // __kmpc_atomic_float10_sub_cpt_fp 2520 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r, 2521 1) // __kmpc_atomic_float10_mul_cpt_fp 2522 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r, 2523 1) // __kmpc_atomic_float10_div_cpt_fp 2524 2525 #endif // KMP_HAVE_QUAD 2526 2527 // ------------------------------------------------------------------------ 2528 // Routines for C/C++ Reduction operators && and || 2529 2530 // ------------------------------------------------------------------------- 2531 // Operation on *lhs, rhs bound by critical section 2532 // OP - operator (it's supposed to contain an assignment) 2533 // LCK_ID - lock identifier 2534 // Note: don't check gtid as it should always be valid 2535 // 1, 2-byte - expect valid parameter, other - check before this macro 2536 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \ 2537 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2538 \ 2539 if (flag) { \ 2540 new_value OP rhs; \ 2541 (*lhs) = new_value; \ 2542 } else { \ 2543 new_value = (*lhs); \ 2544 (*lhs) OP rhs; \ 2545 } \ 2546 \ 2547 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); 2548 2549 // ------------------------------------------------------------------------ 2550 #ifdef KMP_GOMP_COMPAT 2551 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \ 2552 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2553 KMP_CHECK_GTID; \ 2554 OP_CRITICAL_L_CPT(OP, 0); \ 2555 return new_value; \ 2556 } 2557 #else 2558 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) 2559 #endif /* KMP_GOMP_COMPAT */ 2560 2561 // ------------------------------------------------------------------------ 2562 // Need separate macros for &&, || because there is no combined assignment 2563 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2564 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2565 TYPE new_value; \ 2566 (void)new_value; \ 2567 OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \ 2568 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2569 } 2570 2571 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&, 2572 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt 2573 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||, 2574 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt 2575 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&, 2576 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt 2577 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||, 2578 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt 2579 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&, 2580 0) // __kmpc_atomic_fixed4_andl_cpt 2581 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||, 2582 0) // __kmpc_atomic_fixed4_orl_cpt 2583 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&, 2584 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt 2585 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||, 2586 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt 2587 2588 // ------------------------------------------------------------------------- 2589 // Routines for Fortran operators that matched no one in C: 2590 // MAX, MIN, .EQV., .NEQV. 2591 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt 2592 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt 2593 2594 // ------------------------------------------------------------------------- 2595 // MIN and MAX need separate macros 2596 // OP - operator to check if we need any actions? 2597 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ 2598 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2599 \ 2600 if (*lhs OP rhs) { /* still need actions? */ \ 2601 old_value = *lhs; \ 2602 *lhs = rhs; \ 2603 if (flag) \ 2604 new_value = rhs; \ 2605 else \ 2606 new_value = old_value; \ 2607 } else { \ 2608 new_value = *lhs; \ 2609 } \ 2610 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2611 return new_value; 2612 2613 // ------------------------------------------------------------------------- 2614 #ifdef KMP_GOMP_COMPAT 2615 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \ 2616 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2617 KMP_CHECK_GTID; \ 2618 MIN_MAX_CRITSECT_CPT(OP, 0); \ 2619 } 2620 #else 2621 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) 2622 #endif /* KMP_GOMP_COMPAT */ 2623 2624 // ------------------------------------------------------------------------- 2625 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ 2626 { \ 2627 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2628 /*TYPE old_value; */ \ 2629 temp_val = *lhs; \ 2630 old_value = temp_val; \ 2631 while (old_value OP rhs && /* still need actions? */ \ 2632 !KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2633 (kmp_int##BITS *)lhs, \ 2634 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2635 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ 2636 KMP_CPU_PAUSE(); \ 2637 temp_val = *lhs; \ 2638 old_value = temp_val; \ 2639 } \ 2640 if (flag) \ 2641 return rhs; \ 2642 else \ 2643 return old_value; \ 2644 } 2645 2646 // ------------------------------------------------------------------------- 2647 // 1-byte, 2-byte operands - use critical section 2648 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2649 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2650 TYPE new_value, old_value; \ 2651 if (*lhs OP rhs) { /* need actions? */ \ 2652 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ 2653 MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ 2654 } \ 2655 return *lhs; \ 2656 } 2657 2658 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2659 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2660 TYPE new_value, old_value; \ 2661 (void)new_value; \ 2662 if (*lhs OP rhs) { \ 2663 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ 2664 MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ 2665 } \ 2666 return *lhs; \ 2667 } 2668 2669 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <, 2670 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt 2671 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >, 2672 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt 2673 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <, 2674 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt 2675 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >, 2676 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt 2677 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <, 2678 0) // __kmpc_atomic_fixed4_max_cpt 2679 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >, 2680 0) // __kmpc_atomic_fixed4_min_cpt 2681 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <, 2682 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt 2683 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >, 2684 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt 2685 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <, 2686 KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt 2687 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >, 2688 KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt 2689 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <, 2690 KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt 2691 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >, 2692 KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt 2693 #if KMP_HAVE_QUAD 2694 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r, 2695 1) // __kmpc_atomic_float16_max_cpt 2696 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r, 2697 1) // __kmpc_atomic_float16_min_cpt 2698 #if (KMP_ARCH_X86) 2699 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r, 2700 1) // __kmpc_atomic_float16_max_a16_cpt 2701 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r, 2702 1) // __kmpc_atomic_float16_mix_a16_cpt 2703 #endif // (KMP_ARCH_X86) 2704 #endif // KMP_HAVE_QUAD 2705 2706 // ------------------------------------------------------------------------ 2707 #ifdef KMP_GOMP_COMPAT 2708 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \ 2709 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2710 KMP_CHECK_GTID; \ 2711 OP_CRITICAL_CPT(OP, 0); \ 2712 } 2713 #else 2714 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) 2715 #endif /* KMP_GOMP_COMPAT */ 2716 // ------------------------------------------------------------------------ 2717 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2718 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2719 TYPE new_value; \ 2720 (void)new_value; \ 2721 OP_GOMP_CRITICAL_EQV_CPT(^= (TYPE) ~, GOMP_FLAG) /* send assignment */ \ 2722 OP_CMPXCHG_CPT(TYPE, BITS, OP) \ 2723 } 2724 2725 // ------------------------------------------------------------------------ 2726 2727 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^, 2728 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt 2729 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^, 2730 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt 2731 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^, 2732 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt 2733 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^, 2734 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt 2735 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~, 2736 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt 2737 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~, 2738 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt 2739 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~, 2740 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt 2741 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~, 2742 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt 2743 2744 // ------------------------------------------------------------------------ 2745 // Routines for Extended types: long double, _Quad, complex flavours (use 2746 // critical section) 2747 // TYPE_ID, OP_ID, TYPE - detailed above 2748 // OP - operator 2749 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2750 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2751 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2752 TYPE new_value; \ 2753 OP_GOMP_CRITICAL_CPT(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 2754 OP_UPDATE_CRITICAL_CPT(TYPE, OP, LCK_ID) /* send assignment */ \ 2755 } 2756 2757 // ------------------------------------------------------------------------ 2758 // Workaround for cmplx4. Regular routines with return value don't work 2759 // on Win_32e. Let's return captured values through the additional parameter. 2760 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \ 2761 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2762 \ 2763 if (flag) { \ 2764 (*lhs) OP rhs; \ 2765 (*out) = (*lhs); \ 2766 } else { \ 2767 (*out) = (*lhs); \ 2768 (*lhs) OP rhs; \ 2769 } \ 2770 \ 2771 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2772 return; 2773 // ------------------------------------------------------------------------ 2774 2775 #ifdef KMP_GOMP_COMPAT 2776 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \ 2777 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2778 KMP_CHECK_GTID; \ 2779 OP_CRITICAL_CPT_WRK(OP## =, 0); \ 2780 } 2781 #else 2782 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) 2783 #endif /* KMP_GOMP_COMPAT */ 2784 // ------------------------------------------------------------------------ 2785 2786 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 2787 void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \ 2788 TYPE rhs, TYPE *out, int flag) { \ 2789 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 2790 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); 2791 // ------------------------------------------------------------------------ 2792 2793 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 2794 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 2795 OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \ 2796 OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \ 2797 } 2798 // The end of workaround for cmplx4 2799 2800 /* ------------------------------------------------------------------------- */ 2801 // routines for long double type 2802 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r, 2803 1) // __kmpc_atomic_float10_add_cpt 2804 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r, 2805 1) // __kmpc_atomic_float10_sub_cpt 2806 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r, 2807 1) // __kmpc_atomic_float10_mul_cpt 2808 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r, 2809 1) // __kmpc_atomic_float10_div_cpt 2810 #if KMP_HAVE_QUAD 2811 // routines for _Quad type 2812 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r, 2813 1) // __kmpc_atomic_float16_add_cpt 2814 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r, 2815 1) // __kmpc_atomic_float16_sub_cpt 2816 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r, 2817 1) // __kmpc_atomic_float16_mul_cpt 2818 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r, 2819 1) // __kmpc_atomic_float16_div_cpt 2820 #if (KMP_ARCH_X86) 2821 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r, 2822 1) // __kmpc_atomic_float16_add_a16_cpt 2823 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r, 2824 1) // __kmpc_atomic_float16_sub_a16_cpt 2825 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r, 2826 1) // __kmpc_atomic_float16_mul_a16_cpt 2827 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r, 2828 1) // __kmpc_atomic_float16_div_a16_cpt 2829 #endif // (KMP_ARCH_X86) 2830 #endif // KMP_HAVE_QUAD 2831 2832 // routines for complex types 2833 2834 // cmplx4 routines to return void 2835 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c, 2836 1) // __kmpc_atomic_cmplx4_add_cpt 2837 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 2838 1) // __kmpc_atomic_cmplx4_sub_cpt 2839 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 2840 1) // __kmpc_atomic_cmplx4_mul_cpt 2841 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c, 2842 1) // __kmpc_atomic_cmplx4_div_cpt 2843 2844 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c, 2845 1) // __kmpc_atomic_cmplx8_add_cpt 2846 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 2847 1) // __kmpc_atomic_cmplx8_sub_cpt 2848 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 2849 1) // __kmpc_atomic_cmplx8_mul_cpt 2850 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c, 2851 1) // __kmpc_atomic_cmplx8_div_cpt 2852 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c, 2853 1) // __kmpc_atomic_cmplx10_add_cpt 2854 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 2855 1) // __kmpc_atomic_cmplx10_sub_cpt 2856 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 2857 1) // __kmpc_atomic_cmplx10_mul_cpt 2858 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c, 2859 1) // __kmpc_atomic_cmplx10_div_cpt 2860 #if KMP_HAVE_QUAD 2861 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c, 2862 1) // __kmpc_atomic_cmplx16_add_cpt 2863 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 2864 1) // __kmpc_atomic_cmplx16_sub_cpt 2865 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 2866 1) // __kmpc_atomic_cmplx16_mul_cpt 2867 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c, 2868 1) // __kmpc_atomic_cmplx16_div_cpt 2869 #if (KMP_ARCH_X86) 2870 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 2871 1) // __kmpc_atomic_cmplx16_add_a16_cpt 2872 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 2873 1) // __kmpc_atomic_cmplx16_sub_a16_cpt 2874 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 2875 1) // __kmpc_atomic_cmplx16_mul_a16_cpt 2876 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 2877 1) // __kmpc_atomic_cmplx16_div_a16_cpt 2878 #endif // (KMP_ARCH_X86) 2879 #endif // KMP_HAVE_QUAD 2880 2881 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr 2882 // binop x; v = x; } for non-commutative operations. 2883 // Supported only on IA-32 architecture and Intel(R) 64 2884 2885 // ------------------------------------------------------------------------- 2886 // Operation on *lhs, rhs bound by critical section 2887 // OP - operator (it's supposed to contain an assignment) 2888 // LCK_ID - lock identifier 2889 // Note: don't check gtid as it should always be valid 2890 // 1, 2-byte - expect valid parameter, other - check before this macro 2891 #define OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \ 2892 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2893 \ 2894 if (flag) { \ 2895 /*temp_val = (*lhs);*/ \ 2896 (*lhs) = (TYPE)((rhs)OP(*lhs)); \ 2897 new_value = (*lhs); \ 2898 } else { \ 2899 new_value = (*lhs); \ 2900 (*lhs) = (TYPE)((rhs)OP(*lhs)); \ 2901 } \ 2902 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 2903 return new_value; 2904 2905 // ------------------------------------------------------------------------ 2906 #ifdef KMP_GOMP_COMPAT 2907 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) \ 2908 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 2909 KMP_CHECK_GTID; \ 2910 OP_CRITICAL_CPT_REV(TYPE, OP, 0); \ 2911 } 2912 #else 2913 #define OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, FLAG) 2914 #endif /* KMP_GOMP_COMPAT */ 2915 2916 // ------------------------------------------------------------------------ 2917 // Operation on *lhs, rhs using "compare_and_store" routine 2918 // TYPE - operands' type 2919 // BITS - size in bits, used to distinguish low level calls 2920 // OP - operator 2921 // Note: temp_val introduced in order to force the compiler to read 2922 // *lhs only once (w/o it the compiler reads *lhs twice) 2923 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 2924 { \ 2925 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2926 TYPE old_value, new_value; \ 2927 temp_val = *lhs; \ 2928 old_value = temp_val; \ 2929 new_value = (TYPE)(rhs OP old_value); \ 2930 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 2931 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 2932 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 2933 KMP_CPU_PAUSE(); \ 2934 \ 2935 temp_val = *lhs; \ 2936 old_value = temp_val; \ 2937 new_value = (TYPE)(rhs OP old_value); \ 2938 } \ 2939 if (flag) { \ 2940 return new_value; \ 2941 } else \ 2942 return old_value; \ 2943 } 2944 2945 // ------------------------------------------------------------------------- 2946 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ 2947 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 2948 TYPE new_value; \ 2949 (void)new_value; \ 2950 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \ 2951 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 2952 } 2953 2954 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /, 2955 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev 2956 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /, 2957 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev 2958 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<, 2959 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev 2960 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>, 2961 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev 2962 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, 2963 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev 2964 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -, 2965 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev 2966 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /, 2967 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev 2968 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /, 2969 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev 2970 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<, 2971 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev 2972 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>, 2973 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev 2974 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, 2975 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev 2976 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -, 2977 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev 2978 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /, 2979 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev 2980 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /, 2981 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev 2982 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<, 2983 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev 2984 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>, 2985 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev 2986 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, 2987 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev 2988 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -, 2989 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev 2990 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /, 2991 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev 2992 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /, 2993 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev 2994 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<, 2995 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev 2996 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>, 2997 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev 2998 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, 2999 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev 3000 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -, 3001 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev 3002 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /, 3003 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev 3004 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -, 3005 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev 3006 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /, 3007 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev 3008 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -, 3009 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev 3010 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 3011 3012 // ------------------------------------------------------------------------ 3013 // Routines for Extended types: long double, _Quad, complex flavours (use 3014 // critical section) 3015 // TYPE_ID, OP_ID, TYPE - detailed above 3016 // OP - operator 3017 // LCK_ID - lock identifier, used to possibly distinguish lock variable 3018 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ 3019 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ 3020 TYPE new_value; \ 3021 /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \ 3022 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \ 3023 OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) \ 3024 } 3025 3026 /* ------------------------------------------------------------------------- */ 3027 // routines for long double type 3028 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r, 3029 1) // __kmpc_atomic_float10_sub_cpt_rev 3030 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r, 3031 1) // __kmpc_atomic_float10_div_cpt_rev 3032 #if KMP_HAVE_QUAD 3033 // routines for _Quad type 3034 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 3035 1) // __kmpc_atomic_float16_sub_cpt_rev 3036 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 3037 1) // __kmpc_atomic_float16_div_cpt_rev 3038 #if (KMP_ARCH_X86) 3039 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 3040 1) // __kmpc_atomic_float16_sub_a16_cpt_rev 3041 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 3042 1) // __kmpc_atomic_float16_div_a16_cpt_rev 3043 #endif // (KMP_ARCH_X86) 3044 #endif // KMP_HAVE_QUAD 3045 3046 // routines for complex types 3047 3048 // ------------------------------------------------------------------------ 3049 // Workaround for cmplx4. Regular routines with return value don't work 3050 // on Win_32e. Let's return captured values through the additional parameter. 3051 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ 3052 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3053 \ 3054 if (flag) { \ 3055 (*lhs) = (rhs)OP(*lhs); \ 3056 (*out) = (*lhs); \ 3057 } else { \ 3058 (*out) = (*lhs); \ 3059 (*lhs) = (rhs)OP(*lhs); \ 3060 } \ 3061 \ 3062 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3063 return; 3064 // ------------------------------------------------------------------------ 3065 3066 #ifdef KMP_GOMP_COMPAT 3067 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \ 3068 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3069 KMP_CHECK_GTID; \ 3070 OP_CRITICAL_CPT_REV_WRK(OP, 0); \ 3071 } 3072 #else 3073 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) 3074 #endif /* KMP_GOMP_COMPAT */ 3075 // ------------------------------------------------------------------------ 3076 3077 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \ 3078 GOMP_FLAG) \ 3079 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ 3080 OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \ 3081 OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ 3082 } 3083 // The end of workaround for cmplx4 3084 3085 // !!! TODO: check if we need to return void for cmplx4 routines 3086 // cmplx4 routines to return void 3087 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 3088 1) // __kmpc_atomic_cmplx4_sub_cpt_rev 3089 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 3090 1) // __kmpc_atomic_cmplx4_div_cpt_rev 3091 3092 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 3093 1) // __kmpc_atomic_cmplx8_sub_cpt_rev 3094 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 3095 1) // __kmpc_atomic_cmplx8_div_cpt_rev 3096 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 3097 1) // __kmpc_atomic_cmplx10_sub_cpt_rev 3098 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 3099 1) // __kmpc_atomic_cmplx10_div_cpt_rev 3100 #if KMP_HAVE_QUAD 3101 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 3102 1) // __kmpc_atomic_cmplx16_sub_cpt_rev 3103 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 3104 1) // __kmpc_atomic_cmplx16_div_cpt_rev 3105 #if (KMP_ARCH_X86) 3106 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 3107 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev 3108 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 3109 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev 3110 #endif // (KMP_ARCH_X86) 3111 #endif // KMP_HAVE_QUAD 3112 3113 // Capture reverse for mixed type: RHS=float16 3114 #if KMP_HAVE_QUAD 3115 3116 // Beginning of a definition (provides name, parameters, gebug trace) 3117 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned 3118 // fixed) 3119 // OP_ID - operation identifier (add, sub, mul, ...) 3120 // TYPE - operands' type 3121 // ------------------------------------------------------------------------- 3122 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ 3123 RTYPE, LCK_ID, MASK, GOMP_FLAG) \ 3124 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 3125 TYPE new_value; \ 3126 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) \ 3127 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ 3128 } 3129 3130 // ------------------------------------------------------------------------- 3131 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ 3132 LCK_ID, GOMP_FLAG) \ 3133 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ 3134 TYPE new_value; \ 3135 OP_GOMP_CRITICAL_CPT_REV(TYPE, OP, GOMP_FLAG) /* send assignment */ \ 3136 OP_CRITICAL_CPT_REV(TYPE, OP, LCK_ID) /* send assignment */ \ 3137 } 3138 3139 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, 3140 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp 3141 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, 3142 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp 3143 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, 3144 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp 3145 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, 3146 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp 3147 3148 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1, 3149 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp 3150 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i, 3151 1, 3152 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp 3153 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1, 3154 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp 3155 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i, 3156 1, 3157 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp 3158 3159 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i, 3160 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp 3161 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad, 3162 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp 3163 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i, 3164 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp 3165 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad, 3166 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp 3167 3168 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i, 3169 7, 3170 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp 3171 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad, 3172 8i, 7, 3173 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp 3174 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i, 3175 7, 3176 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp 3177 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad, 3178 8i, 7, 3179 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp 3180 3181 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad, 3182 4r, 3, 3183 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp 3184 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad, 3185 4r, 3, 3186 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp 3187 3188 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad, 3189 8r, 7, 3190 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp 3191 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad, 3192 8r, 7, 3193 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp 3194 3195 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad, 3196 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp 3197 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad, 3198 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp 3199 3200 #endif // KMP_HAVE_QUAD 3201 3202 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} 3203 3204 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3205 TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ 3206 TYPE rhs) { \ 3207 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 3208 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); 3209 3210 #define CRITICAL_SWP(LCK_ID) \ 3211 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3212 \ 3213 old_value = (*lhs); \ 3214 (*lhs) = rhs; \ 3215 \ 3216 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3217 return old_value; 3218 3219 // ------------------------------------------------------------------------ 3220 #ifdef KMP_GOMP_COMPAT 3221 #define GOMP_CRITICAL_SWP(FLAG) \ 3222 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3223 KMP_CHECK_GTID; \ 3224 CRITICAL_SWP(0); \ 3225 } 3226 #else 3227 #define GOMP_CRITICAL_SWP(FLAG) 3228 #endif /* KMP_GOMP_COMPAT */ 3229 3230 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3231 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3232 TYPE old_value; \ 3233 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3234 old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \ 3235 return old_value; \ 3236 } 3237 // ------------------------------------------------------------------------ 3238 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3239 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3240 TYPE old_value; \ 3241 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3242 old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \ 3243 return old_value; \ 3244 } 3245 3246 // ------------------------------------------------------------------------ 3247 #define CMPXCHG_SWP(TYPE, BITS) \ 3248 { \ 3249 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 3250 TYPE old_value, new_value; \ 3251 temp_val = *lhs; \ 3252 old_value = temp_val; \ 3253 new_value = rhs; \ 3254 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ 3255 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ 3256 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ 3257 KMP_CPU_PAUSE(); \ 3258 \ 3259 temp_val = *lhs; \ 3260 old_value = temp_val; \ 3261 new_value = rhs; \ 3262 } \ 3263 return old_value; \ 3264 } 3265 3266 // ------------------------------------------------------------------------- 3267 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ 3268 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3269 TYPE old_value; \ 3270 (void)old_value; \ 3271 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3272 CMPXCHG_SWP(TYPE, BITS) \ 3273 } 3274 3275 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp 3276 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp 3277 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp 3278 3279 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32, 3280 KMP_ARCH_X86) // __kmpc_atomic_float4_swp 3281 3282 #if (KMP_ARCH_X86) 3283 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64, 3284 KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp 3285 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64, 3286 KMP_ARCH_X86) // __kmpc_atomic_float8_swp 3287 #else 3288 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp 3289 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64, 3290 KMP_ARCH_X86) // __kmpc_atomic_float8_swp 3291 #endif // (KMP_ARCH_X86) 3292 3293 // ------------------------------------------------------------------------ 3294 // Routines for Extended types: long double, _Quad, complex flavours (use 3295 // critical section) 3296 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ 3297 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ 3298 TYPE old_value; \ 3299 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 3300 CRITICAL_SWP(LCK_ID) \ 3301 } 3302 3303 // ------------------------------------------------------------------------ 3304 // !!! TODO: check if we need to return void for cmplx4 routines 3305 // Workaround for cmplx4. Regular routines with return value don't work 3306 // on Win_32e. Let's return captured values through the additional parameter. 3307 3308 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ 3309 void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ 3310 TYPE rhs, TYPE *out) { \ 3311 KMP_DEBUG_ASSERT(__kmp_init_serial); \ 3312 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); 3313 3314 #define CRITICAL_SWP_WRK(LCK_ID) \ 3315 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3316 \ 3317 tmp = (*lhs); \ 3318 (*lhs) = (rhs); \ 3319 (*out) = tmp; \ 3320 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ 3321 return; 3322 // ------------------------------------------------------------------------ 3323 3324 #ifdef KMP_GOMP_COMPAT 3325 #define GOMP_CRITICAL_SWP_WRK(FLAG) \ 3326 if ((FLAG) && (__kmp_atomic_mode == 2)) { \ 3327 KMP_CHECK_GTID; \ 3328 CRITICAL_SWP_WRK(0); \ 3329 } 3330 #else 3331 #define GOMP_CRITICAL_SWP_WRK(FLAG) 3332 #endif /* KMP_GOMP_COMPAT */ 3333 // ------------------------------------------------------------------------ 3334 3335 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ 3336 ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ 3337 TYPE tmp; \ 3338 GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \ 3339 CRITICAL_SWP_WRK(LCK_ID) \ 3340 } 3341 // The end of workaround for cmplx4 3342 3343 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp 3344 #if KMP_HAVE_QUAD 3345 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp 3346 #endif // KMP_HAVE_QUAD 3347 // cmplx4 routine to return void 3348 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp 3349 3350 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // 3351 // __kmpc_atomic_cmplx4_swp 3352 3353 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp 3354 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp 3355 #if KMP_HAVE_QUAD 3356 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp 3357 #if (KMP_ARCH_X86) 3358 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r, 3359 1) // __kmpc_atomic_float16_a16_swp 3360 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c, 3361 1) // __kmpc_atomic_cmplx16_a16_swp 3362 #endif // (KMP_ARCH_X86) 3363 #endif // KMP_HAVE_QUAD 3364 3365 // End of OpenMP 4.0 Capture 3366 3367 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 3368 3369 #undef OP_CRITICAL 3370 3371 /* ------------------------------------------------------------------------ */ 3372 /* Generic atomic routines */ 3373 3374 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3375 void (*f)(void *, void *, void *)) { 3376 KMP_DEBUG_ASSERT(__kmp_init_serial); 3377 3378 if ( 3379 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3380 FALSE /* must use lock */ 3381 #else 3382 TRUE 3383 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3384 ) { 3385 kmp_int8 old_value, new_value; 3386 3387 old_value = *(kmp_int8 *)lhs; 3388 (*f)(&new_value, &old_value, rhs); 3389 3390 /* TODO: Should this be acquire or release? */ 3391 while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value, 3392 *(kmp_int8 *)&new_value)) { 3393 KMP_CPU_PAUSE(); 3394 3395 old_value = *(kmp_int8 *)lhs; 3396 (*f)(&new_value, &old_value, rhs); 3397 } 3398 3399 return; 3400 } else { 3401 // All 1-byte data is of integer data type. 3402 3403 #ifdef KMP_GOMP_COMPAT 3404 if (__kmp_atomic_mode == 2) { 3405 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3406 } else 3407 #endif /* KMP_GOMP_COMPAT */ 3408 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid); 3409 3410 (*f)(lhs, lhs, rhs); 3411 3412 #ifdef KMP_GOMP_COMPAT 3413 if (__kmp_atomic_mode == 2) { 3414 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3415 } else 3416 #endif /* KMP_GOMP_COMPAT */ 3417 __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid); 3418 } 3419 } 3420 3421 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3422 void (*f)(void *, void *, void *)) { 3423 if ( 3424 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3425 FALSE /* must use lock */ 3426 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 3427 TRUE /* no alignment problems */ 3428 #else 3429 !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */ 3430 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3431 ) { 3432 kmp_int16 old_value, new_value; 3433 3434 old_value = *(kmp_int16 *)lhs; 3435 (*f)(&new_value, &old_value, rhs); 3436 3437 /* TODO: Should this be acquire or release? */ 3438 while (!KMP_COMPARE_AND_STORE_ACQ16( 3439 (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) { 3440 KMP_CPU_PAUSE(); 3441 3442 old_value = *(kmp_int16 *)lhs; 3443 (*f)(&new_value, &old_value, rhs); 3444 } 3445 3446 return; 3447 } else { 3448 // All 2-byte data is of integer data type. 3449 3450 #ifdef KMP_GOMP_COMPAT 3451 if (__kmp_atomic_mode == 2) { 3452 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3453 } else 3454 #endif /* KMP_GOMP_COMPAT */ 3455 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid); 3456 3457 (*f)(lhs, lhs, rhs); 3458 3459 #ifdef KMP_GOMP_COMPAT 3460 if (__kmp_atomic_mode == 2) { 3461 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3462 } else 3463 #endif /* KMP_GOMP_COMPAT */ 3464 __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid); 3465 } 3466 } 3467 3468 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3469 void (*f)(void *, void *, void *)) { 3470 KMP_DEBUG_ASSERT(__kmp_init_serial); 3471 3472 if ( 3473 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints. 3474 // Gomp compatibility is broken if this routine is called for floats. 3475 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 3476 TRUE /* no alignment problems */ 3477 #else 3478 !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */ 3479 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 3480 ) { 3481 kmp_int32 old_value, new_value; 3482 3483 old_value = *(kmp_int32 *)lhs; 3484 (*f)(&new_value, &old_value, rhs); 3485 3486 /* TODO: Should this be acquire or release? */ 3487 while (!KMP_COMPARE_AND_STORE_ACQ32( 3488 (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) { 3489 KMP_CPU_PAUSE(); 3490 3491 old_value = *(kmp_int32 *)lhs; 3492 (*f)(&new_value, &old_value, rhs); 3493 } 3494 3495 return; 3496 } else { 3497 // Use __kmp_atomic_lock_4i for all 4-byte data, 3498 // even if it isn't of integer data type. 3499 3500 #ifdef KMP_GOMP_COMPAT 3501 if (__kmp_atomic_mode == 2) { 3502 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3503 } else 3504 #endif /* KMP_GOMP_COMPAT */ 3505 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid); 3506 3507 (*f)(lhs, lhs, rhs); 3508 3509 #ifdef KMP_GOMP_COMPAT 3510 if (__kmp_atomic_mode == 2) { 3511 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3512 } else 3513 #endif /* KMP_GOMP_COMPAT */ 3514 __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid); 3515 } 3516 } 3517 3518 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3519 void (*f)(void *, void *, void *)) { 3520 KMP_DEBUG_ASSERT(__kmp_init_serial); 3521 if ( 3522 3523 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3524 FALSE /* must use lock */ 3525 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 3526 TRUE /* no alignment problems */ 3527 #else 3528 !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */ 3529 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 3530 ) { 3531 kmp_int64 old_value, new_value; 3532 3533 old_value = *(kmp_int64 *)lhs; 3534 (*f)(&new_value, &old_value, rhs); 3535 /* TODO: Should this be acquire or release? */ 3536 while (!KMP_COMPARE_AND_STORE_ACQ64( 3537 (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) { 3538 KMP_CPU_PAUSE(); 3539 3540 old_value = *(kmp_int64 *)lhs; 3541 (*f)(&new_value, &old_value, rhs); 3542 } 3543 3544 return; 3545 } else { 3546 // Use __kmp_atomic_lock_8i for all 8-byte data, 3547 // even if it isn't of integer data type. 3548 3549 #ifdef KMP_GOMP_COMPAT 3550 if (__kmp_atomic_mode == 2) { 3551 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3552 } else 3553 #endif /* KMP_GOMP_COMPAT */ 3554 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid); 3555 3556 (*f)(lhs, lhs, rhs); 3557 3558 #ifdef KMP_GOMP_COMPAT 3559 if (__kmp_atomic_mode == 2) { 3560 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3561 } else 3562 #endif /* KMP_GOMP_COMPAT */ 3563 __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid); 3564 } 3565 } 3566 3567 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3568 void (*f)(void *, void *, void *)) { 3569 KMP_DEBUG_ASSERT(__kmp_init_serial); 3570 3571 #ifdef KMP_GOMP_COMPAT 3572 if (__kmp_atomic_mode == 2) { 3573 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3574 } else 3575 #endif /* KMP_GOMP_COMPAT */ 3576 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid); 3577 3578 (*f)(lhs, lhs, rhs); 3579 3580 #ifdef KMP_GOMP_COMPAT 3581 if (__kmp_atomic_mode == 2) { 3582 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3583 } else 3584 #endif /* KMP_GOMP_COMPAT */ 3585 __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid); 3586 } 3587 3588 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3589 void (*f)(void *, void *, void *)) { 3590 KMP_DEBUG_ASSERT(__kmp_init_serial); 3591 3592 #ifdef KMP_GOMP_COMPAT 3593 if (__kmp_atomic_mode == 2) { 3594 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3595 } else 3596 #endif /* KMP_GOMP_COMPAT */ 3597 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid); 3598 3599 (*f)(lhs, lhs, rhs); 3600 3601 #ifdef KMP_GOMP_COMPAT 3602 if (__kmp_atomic_mode == 2) { 3603 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3604 } else 3605 #endif /* KMP_GOMP_COMPAT */ 3606 __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid); 3607 } 3608 3609 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3610 void (*f)(void *, void *, void *)) { 3611 KMP_DEBUG_ASSERT(__kmp_init_serial); 3612 3613 #ifdef KMP_GOMP_COMPAT 3614 if (__kmp_atomic_mode == 2) { 3615 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3616 } else 3617 #endif /* KMP_GOMP_COMPAT */ 3618 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid); 3619 3620 (*f)(lhs, lhs, rhs); 3621 3622 #ifdef KMP_GOMP_COMPAT 3623 if (__kmp_atomic_mode == 2) { 3624 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3625 } else 3626 #endif /* KMP_GOMP_COMPAT */ 3627 __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid); 3628 } 3629 3630 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs, 3631 void (*f)(void *, void *, void *)) { 3632 KMP_DEBUG_ASSERT(__kmp_init_serial); 3633 3634 #ifdef KMP_GOMP_COMPAT 3635 if (__kmp_atomic_mode == 2) { 3636 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3637 } else 3638 #endif /* KMP_GOMP_COMPAT */ 3639 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid); 3640 3641 (*f)(lhs, lhs, rhs); 3642 3643 #ifdef KMP_GOMP_COMPAT 3644 if (__kmp_atomic_mode == 2) { 3645 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3646 } else 3647 #endif /* KMP_GOMP_COMPAT */ 3648 __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid); 3649 } 3650 3651 // AC: same two routines as GOMP_atomic_start/end, but will be called by our 3652 // compiler; duplicated in order to not use 3-party names in pure Intel code 3653 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin. 3654 void __kmpc_atomic_start(void) { 3655 int gtid = __kmp_entry_gtid(); 3656 KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid)); 3657 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3658 } 3659 3660 void __kmpc_atomic_end(void) { 3661 int gtid = __kmp_get_gtid(); 3662 KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid)); 3663 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3664 } 3665 3666 /*! 3667 @} 3668 */ 3669 3670 // end of file 3671