1 /* 2 * kmp_atomic.cpp -- ATOMIC implementation routines 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #include "kmp_atomic.h" 17 #include "kmp.h" // TRUE, asm routines prototypes 18 19 typedef unsigned char uchar; 20 typedef unsigned short ushort; 21 22 /*! 23 @defgroup ATOMIC_OPS Atomic Operations 24 These functions are used for implementing the many different varieties of atomic operations. 25 26 The compiler is at liberty to inline atomic operations that are naturally supported 27 by the target architecture. For instance on IA-32 architecture an atomic like this can be inlined 28 @code 29 static int s = 0; 30 #pragma omp atomic 31 s++; 32 @endcode 33 using the single instruction: `lock; incl s` 34 35 However the runtime does provide entrypoints for these operations to support compilers that choose 36 not to inline them. (For instance, `__kmpc_atomic_fixed4_add` could be used to perform the 37 increment above.) 38 39 The names of the functions are encoded by using the data type name and the operation name, as in these tables. 40 41 Data Type | Data type encoding 42 -----------|--------------- 43 int8_t | `fixed1` 44 uint8_t | `fixed1u` 45 int16_t | `fixed2` 46 uint16_t | `fixed2u` 47 int32_t | `fixed4` 48 uint32_t | `fixed4u` 49 int32_t | `fixed8` 50 uint32_t | `fixed8u` 51 float | `float4` 52 double | `float8` 53 float 10 (8087 eighty bit float) | `float10` 54 complex<float> | `cmplx4` 55 complex<double> | `cmplx8` 56 complex<float10> | `cmplx10` 57 <br> 58 59 Operation | Operation encoding 60 ----------|------------------- 61 + | add 62 - | sub 63 \* | mul 64 / | div 65 & | andb 66 << | shl 67 \>\> | shr 68 \| | orb 69 ^ | xor 70 && | andl 71 \|\| | orl 72 maximum | max 73 minimum | min 74 .eqv. | eqv 75 .neqv. | neqv 76 77 <br> 78 For non-commutative operations, `_rev` can also be added for the reversed operation. 79 For the functions that capture the result, the suffix `_cpt` is added. 80 81 Update Functions 82 ================ 83 The general form of an atomic function that just performs an update (without a `capture`) 84 @code 85 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ); 86 @endcode 87 @param ident_t a pointer to source location 88 @param gtid the global thread id 89 @param lhs a pointer to the left operand 90 @param rhs the right operand 91 92 `capture` functions 93 =================== 94 The capture functions perform an atomic update and return a result, which is either the value 95 before the capture, or that after. They take an additional argument to determine which result is returned. 96 Their general form is therefore 97 @code 98 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ); 99 @endcode 100 @param ident_t a pointer to source location 101 @param gtid the global thread id 102 @param lhs a pointer to the left operand 103 @param rhs the right operand 104 @param flag one if the result is to be captured *after* the operation, zero if captured *before*. 105 106 The one set of exceptions to this is the `complex<float>` type where the value is not returned, 107 rather an extra argument pointer is passed. 108 109 They look like 110 @code 111 void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); 112 @endcode 113 114 Read and Write Operations 115 ========================= 116 The OpenMP<sup>*</sup> standard now supports atomic operations that simply ensure that the 117 value is read or written atomically, with no modification 118 performed. In many cases on IA-32 architecture these operations can be inlined since 119 the architecture guarantees that no tearing occurs on aligned objects 120 accessed with a single memory operation of up to 64 bits in size. 121 122 The general form of the read operations is 123 @code 124 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc ); 125 @endcode 126 127 For the write operations the form is 128 @code 129 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ); 130 @endcode 131 132 Full list of functions 133 ====================== 134 This leads to the generation of 376 atomic functions, as follows. 135 136 Functons for integers 137 --------------------- 138 There are versions here for integers of size 1,2,4 and 8 bytes both signed and unsigned (where that matters). 139 @code 140 __kmpc_atomic_fixed1_add 141 __kmpc_atomic_fixed1_add_cpt 142 __kmpc_atomic_fixed1_add_fp 143 __kmpc_atomic_fixed1_andb 144 __kmpc_atomic_fixed1_andb_cpt 145 __kmpc_atomic_fixed1_andl 146 __kmpc_atomic_fixed1_andl_cpt 147 __kmpc_atomic_fixed1_div 148 __kmpc_atomic_fixed1_div_cpt 149 __kmpc_atomic_fixed1_div_cpt_rev 150 __kmpc_atomic_fixed1_div_float8 151 __kmpc_atomic_fixed1_div_fp 152 __kmpc_atomic_fixed1_div_rev 153 __kmpc_atomic_fixed1_eqv 154 __kmpc_atomic_fixed1_eqv_cpt 155 __kmpc_atomic_fixed1_max 156 __kmpc_atomic_fixed1_max_cpt 157 __kmpc_atomic_fixed1_min 158 __kmpc_atomic_fixed1_min_cpt 159 __kmpc_atomic_fixed1_mul 160 __kmpc_atomic_fixed1_mul_cpt 161 __kmpc_atomic_fixed1_mul_float8 162 __kmpc_atomic_fixed1_mul_fp 163 __kmpc_atomic_fixed1_neqv 164 __kmpc_atomic_fixed1_neqv_cpt 165 __kmpc_atomic_fixed1_orb 166 __kmpc_atomic_fixed1_orb_cpt 167 __kmpc_atomic_fixed1_orl 168 __kmpc_atomic_fixed1_orl_cpt 169 __kmpc_atomic_fixed1_rd 170 __kmpc_atomic_fixed1_shl 171 __kmpc_atomic_fixed1_shl_cpt 172 __kmpc_atomic_fixed1_shl_cpt_rev 173 __kmpc_atomic_fixed1_shl_rev 174 __kmpc_atomic_fixed1_shr 175 __kmpc_atomic_fixed1_shr_cpt 176 __kmpc_atomic_fixed1_shr_cpt_rev 177 __kmpc_atomic_fixed1_shr_rev 178 __kmpc_atomic_fixed1_sub 179 __kmpc_atomic_fixed1_sub_cpt 180 __kmpc_atomic_fixed1_sub_cpt_rev 181 __kmpc_atomic_fixed1_sub_fp 182 __kmpc_atomic_fixed1_sub_rev 183 __kmpc_atomic_fixed1_swp 184 __kmpc_atomic_fixed1_wr 185 __kmpc_atomic_fixed1_xor 186 __kmpc_atomic_fixed1_xor_cpt 187 __kmpc_atomic_fixed1u_add_fp 188 __kmpc_atomic_fixed1u_sub_fp 189 __kmpc_atomic_fixed1u_mul_fp 190 __kmpc_atomic_fixed1u_div 191 __kmpc_atomic_fixed1u_div_cpt 192 __kmpc_atomic_fixed1u_div_cpt_rev 193 __kmpc_atomic_fixed1u_div_fp 194 __kmpc_atomic_fixed1u_div_rev 195 __kmpc_atomic_fixed1u_shr 196 __kmpc_atomic_fixed1u_shr_cpt 197 __kmpc_atomic_fixed1u_shr_cpt_rev 198 __kmpc_atomic_fixed1u_shr_rev 199 __kmpc_atomic_fixed2_add 200 __kmpc_atomic_fixed2_add_cpt 201 __kmpc_atomic_fixed2_add_fp 202 __kmpc_atomic_fixed2_andb 203 __kmpc_atomic_fixed2_andb_cpt 204 __kmpc_atomic_fixed2_andl 205 __kmpc_atomic_fixed2_andl_cpt 206 __kmpc_atomic_fixed2_div 207 __kmpc_atomic_fixed2_div_cpt 208 __kmpc_atomic_fixed2_div_cpt_rev 209 __kmpc_atomic_fixed2_div_float8 210 __kmpc_atomic_fixed2_div_fp 211 __kmpc_atomic_fixed2_div_rev 212 __kmpc_atomic_fixed2_eqv 213 __kmpc_atomic_fixed2_eqv_cpt 214 __kmpc_atomic_fixed2_max 215 __kmpc_atomic_fixed2_max_cpt 216 __kmpc_atomic_fixed2_min 217 __kmpc_atomic_fixed2_min_cpt 218 __kmpc_atomic_fixed2_mul 219 __kmpc_atomic_fixed2_mul_cpt 220 __kmpc_atomic_fixed2_mul_float8 221 __kmpc_atomic_fixed2_mul_fp 222 __kmpc_atomic_fixed2_neqv 223 __kmpc_atomic_fixed2_neqv_cpt 224 __kmpc_atomic_fixed2_orb 225 __kmpc_atomic_fixed2_orb_cpt 226 __kmpc_atomic_fixed2_orl 227 __kmpc_atomic_fixed2_orl_cpt 228 __kmpc_atomic_fixed2_rd 229 __kmpc_atomic_fixed2_shl 230 __kmpc_atomic_fixed2_shl_cpt 231 __kmpc_atomic_fixed2_shl_cpt_rev 232 __kmpc_atomic_fixed2_shl_rev 233 __kmpc_atomic_fixed2_shr 234 __kmpc_atomic_fixed2_shr_cpt 235 __kmpc_atomic_fixed2_shr_cpt_rev 236 __kmpc_atomic_fixed2_shr_rev 237 __kmpc_atomic_fixed2_sub 238 __kmpc_atomic_fixed2_sub_cpt 239 __kmpc_atomic_fixed2_sub_cpt_rev 240 __kmpc_atomic_fixed2_sub_fp 241 __kmpc_atomic_fixed2_sub_rev 242 __kmpc_atomic_fixed2_swp 243 __kmpc_atomic_fixed2_wr 244 __kmpc_atomic_fixed2_xor 245 __kmpc_atomic_fixed2_xor_cpt 246 __kmpc_atomic_fixed2u_add_fp 247 __kmpc_atomic_fixed2u_sub_fp 248 __kmpc_atomic_fixed2u_mul_fp 249 __kmpc_atomic_fixed2u_div 250 __kmpc_atomic_fixed2u_div_cpt 251 __kmpc_atomic_fixed2u_div_cpt_rev 252 __kmpc_atomic_fixed2u_div_fp 253 __kmpc_atomic_fixed2u_div_rev 254 __kmpc_atomic_fixed2u_shr 255 __kmpc_atomic_fixed2u_shr_cpt 256 __kmpc_atomic_fixed2u_shr_cpt_rev 257 __kmpc_atomic_fixed2u_shr_rev 258 __kmpc_atomic_fixed4_add 259 __kmpc_atomic_fixed4_add_cpt 260 __kmpc_atomic_fixed4_add_fp 261 __kmpc_atomic_fixed4_andb 262 __kmpc_atomic_fixed4_andb_cpt 263 __kmpc_atomic_fixed4_andl 264 __kmpc_atomic_fixed4_andl_cpt 265 __kmpc_atomic_fixed4_div 266 __kmpc_atomic_fixed4_div_cpt 267 __kmpc_atomic_fixed4_div_cpt_rev 268 __kmpc_atomic_fixed4_div_float8 269 __kmpc_atomic_fixed4_div_fp 270 __kmpc_atomic_fixed4_div_rev 271 __kmpc_atomic_fixed4_eqv 272 __kmpc_atomic_fixed4_eqv_cpt 273 __kmpc_atomic_fixed4_max 274 __kmpc_atomic_fixed4_max_cpt 275 __kmpc_atomic_fixed4_min 276 __kmpc_atomic_fixed4_min_cpt 277 __kmpc_atomic_fixed4_mul 278 __kmpc_atomic_fixed4_mul_cpt 279 __kmpc_atomic_fixed4_mul_float8 280 __kmpc_atomic_fixed4_mul_fp 281 __kmpc_atomic_fixed4_neqv 282 __kmpc_atomic_fixed4_neqv_cpt 283 __kmpc_atomic_fixed4_orb 284 __kmpc_atomic_fixed4_orb_cpt 285 __kmpc_atomic_fixed4_orl 286 __kmpc_atomic_fixed4_orl_cpt 287 __kmpc_atomic_fixed4_rd 288 __kmpc_atomic_fixed4_shl 289 __kmpc_atomic_fixed4_shl_cpt 290 __kmpc_atomic_fixed4_shl_cpt_rev 291 __kmpc_atomic_fixed4_shl_rev 292 __kmpc_atomic_fixed4_shr 293 __kmpc_atomic_fixed4_shr_cpt 294 __kmpc_atomic_fixed4_shr_cpt_rev 295 __kmpc_atomic_fixed4_shr_rev 296 __kmpc_atomic_fixed4_sub 297 __kmpc_atomic_fixed4_sub_cpt 298 __kmpc_atomic_fixed4_sub_cpt_rev 299 __kmpc_atomic_fixed4_sub_fp 300 __kmpc_atomic_fixed4_sub_rev 301 __kmpc_atomic_fixed4_swp 302 __kmpc_atomic_fixed4_wr 303 __kmpc_atomic_fixed4_xor 304 __kmpc_atomic_fixed4_xor_cpt 305 __kmpc_atomic_fixed4u_add_fp 306 __kmpc_atomic_fixed4u_sub_fp 307 __kmpc_atomic_fixed4u_mul_fp 308 __kmpc_atomic_fixed4u_div 309 __kmpc_atomic_fixed4u_div_cpt 310 __kmpc_atomic_fixed4u_div_cpt_rev 311 __kmpc_atomic_fixed4u_div_fp 312 __kmpc_atomic_fixed4u_div_rev 313 __kmpc_atomic_fixed4u_shr 314 __kmpc_atomic_fixed4u_shr_cpt 315 __kmpc_atomic_fixed4u_shr_cpt_rev 316 __kmpc_atomic_fixed4u_shr_rev 317 __kmpc_atomic_fixed8_add 318 __kmpc_atomic_fixed8_add_cpt 319 __kmpc_atomic_fixed8_add_fp 320 __kmpc_atomic_fixed8_andb 321 __kmpc_atomic_fixed8_andb_cpt 322 __kmpc_atomic_fixed8_andl 323 __kmpc_atomic_fixed8_andl_cpt 324 __kmpc_atomic_fixed8_div 325 __kmpc_atomic_fixed8_div_cpt 326 __kmpc_atomic_fixed8_div_cpt_rev 327 __kmpc_atomic_fixed8_div_float8 328 __kmpc_atomic_fixed8_div_fp 329 __kmpc_atomic_fixed8_div_rev 330 __kmpc_atomic_fixed8_eqv 331 __kmpc_atomic_fixed8_eqv_cpt 332 __kmpc_atomic_fixed8_max 333 __kmpc_atomic_fixed8_max_cpt 334 __kmpc_atomic_fixed8_min 335 __kmpc_atomic_fixed8_min_cpt 336 __kmpc_atomic_fixed8_mul 337 __kmpc_atomic_fixed8_mul_cpt 338 __kmpc_atomic_fixed8_mul_float8 339 __kmpc_atomic_fixed8_mul_fp 340 __kmpc_atomic_fixed8_neqv 341 __kmpc_atomic_fixed8_neqv_cpt 342 __kmpc_atomic_fixed8_orb 343 __kmpc_atomic_fixed8_orb_cpt 344 __kmpc_atomic_fixed8_orl 345 __kmpc_atomic_fixed8_orl_cpt 346 __kmpc_atomic_fixed8_rd 347 __kmpc_atomic_fixed8_shl 348 __kmpc_atomic_fixed8_shl_cpt 349 __kmpc_atomic_fixed8_shl_cpt_rev 350 __kmpc_atomic_fixed8_shl_rev 351 __kmpc_atomic_fixed8_shr 352 __kmpc_atomic_fixed8_shr_cpt 353 __kmpc_atomic_fixed8_shr_cpt_rev 354 __kmpc_atomic_fixed8_shr_rev 355 __kmpc_atomic_fixed8_sub 356 __kmpc_atomic_fixed8_sub_cpt 357 __kmpc_atomic_fixed8_sub_cpt_rev 358 __kmpc_atomic_fixed8_sub_fp 359 __kmpc_atomic_fixed8_sub_rev 360 __kmpc_atomic_fixed8_swp 361 __kmpc_atomic_fixed8_wr 362 __kmpc_atomic_fixed8_xor 363 __kmpc_atomic_fixed8_xor_cpt 364 __kmpc_atomic_fixed8u_add_fp 365 __kmpc_atomic_fixed8u_sub_fp 366 __kmpc_atomic_fixed8u_mul_fp 367 __kmpc_atomic_fixed8u_div 368 __kmpc_atomic_fixed8u_div_cpt 369 __kmpc_atomic_fixed8u_div_cpt_rev 370 __kmpc_atomic_fixed8u_div_fp 371 __kmpc_atomic_fixed8u_div_rev 372 __kmpc_atomic_fixed8u_shr 373 __kmpc_atomic_fixed8u_shr_cpt 374 __kmpc_atomic_fixed8u_shr_cpt_rev 375 __kmpc_atomic_fixed8u_shr_rev 376 @endcode 377 378 Functions for floating point 379 ---------------------------- 380 There are versions here for floating point numbers of size 4, 8, 10 and 16 bytes. 381 (Ten byte floats are used by X87, but are now rare). 382 @code 383 __kmpc_atomic_float4_add 384 __kmpc_atomic_float4_add_cpt 385 __kmpc_atomic_float4_add_float8 386 __kmpc_atomic_float4_add_fp 387 __kmpc_atomic_float4_div 388 __kmpc_atomic_float4_div_cpt 389 __kmpc_atomic_float4_div_cpt_rev 390 __kmpc_atomic_float4_div_float8 391 __kmpc_atomic_float4_div_fp 392 __kmpc_atomic_float4_div_rev 393 __kmpc_atomic_float4_max 394 __kmpc_atomic_float4_max_cpt 395 __kmpc_atomic_float4_min 396 __kmpc_atomic_float4_min_cpt 397 __kmpc_atomic_float4_mul 398 __kmpc_atomic_float4_mul_cpt 399 __kmpc_atomic_float4_mul_float8 400 __kmpc_atomic_float4_mul_fp 401 __kmpc_atomic_float4_rd 402 __kmpc_atomic_float4_sub 403 __kmpc_atomic_float4_sub_cpt 404 __kmpc_atomic_float4_sub_cpt_rev 405 __kmpc_atomic_float4_sub_float8 406 __kmpc_atomic_float4_sub_fp 407 __kmpc_atomic_float4_sub_rev 408 __kmpc_atomic_float4_swp 409 __kmpc_atomic_float4_wr 410 __kmpc_atomic_float8_add 411 __kmpc_atomic_float8_add_cpt 412 __kmpc_atomic_float8_add_fp 413 __kmpc_atomic_float8_div 414 __kmpc_atomic_float8_div_cpt 415 __kmpc_atomic_float8_div_cpt_rev 416 __kmpc_atomic_float8_div_fp 417 __kmpc_atomic_float8_div_rev 418 __kmpc_atomic_float8_max 419 __kmpc_atomic_float8_max_cpt 420 __kmpc_atomic_float8_min 421 __kmpc_atomic_float8_min_cpt 422 __kmpc_atomic_float8_mul 423 __kmpc_atomic_float8_mul_cpt 424 __kmpc_atomic_float8_mul_fp 425 __kmpc_atomic_float8_rd 426 __kmpc_atomic_float8_sub 427 __kmpc_atomic_float8_sub_cpt 428 __kmpc_atomic_float8_sub_cpt_rev 429 __kmpc_atomic_float8_sub_fp 430 __kmpc_atomic_float8_sub_rev 431 __kmpc_atomic_float8_swp 432 __kmpc_atomic_float8_wr 433 __kmpc_atomic_float10_add 434 __kmpc_atomic_float10_add_cpt 435 __kmpc_atomic_float10_add_fp 436 __kmpc_atomic_float10_div 437 __kmpc_atomic_float10_div_cpt 438 __kmpc_atomic_float10_div_cpt_rev 439 __kmpc_atomic_float10_div_fp 440 __kmpc_atomic_float10_div_rev 441 __kmpc_atomic_float10_mul 442 __kmpc_atomic_float10_mul_cpt 443 __kmpc_atomic_float10_mul_fp 444 __kmpc_atomic_float10_rd 445 __kmpc_atomic_float10_sub 446 __kmpc_atomic_float10_sub_cpt 447 __kmpc_atomic_float10_sub_cpt_rev 448 __kmpc_atomic_float10_sub_fp 449 __kmpc_atomic_float10_sub_rev 450 __kmpc_atomic_float10_swp 451 __kmpc_atomic_float10_wr 452 __kmpc_atomic_float16_add 453 __kmpc_atomic_float16_add_cpt 454 __kmpc_atomic_float16_div 455 __kmpc_atomic_float16_div_cpt 456 __kmpc_atomic_float16_div_cpt_rev 457 __kmpc_atomic_float16_div_rev 458 __kmpc_atomic_float16_max 459 __kmpc_atomic_float16_max_cpt 460 __kmpc_atomic_float16_min 461 __kmpc_atomic_float16_min_cpt 462 __kmpc_atomic_float16_mul 463 __kmpc_atomic_float16_mul_cpt 464 __kmpc_atomic_float16_rd 465 __kmpc_atomic_float16_sub 466 __kmpc_atomic_float16_sub_cpt 467 __kmpc_atomic_float16_sub_cpt_rev 468 __kmpc_atomic_float16_sub_rev 469 __kmpc_atomic_float16_swp 470 __kmpc_atomic_float16_wr 471 @endcode 472 473 Functions for Complex types 474 --------------------------- 475 Functions for complex types whose component floating point variables are of size 4,8,10 or 16 bytes. 476 The names here are based on the size of the component float, *not* the size of the complex type. So 477 `__kmpc_atomc_cmplx8_add` is an operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`. 478 479 @code 480 __kmpc_atomic_cmplx4_add 481 __kmpc_atomic_cmplx4_add_cmplx8 482 __kmpc_atomic_cmplx4_add_cpt 483 __kmpc_atomic_cmplx4_div 484 __kmpc_atomic_cmplx4_div_cmplx8 485 __kmpc_atomic_cmplx4_div_cpt 486 __kmpc_atomic_cmplx4_div_cpt_rev 487 __kmpc_atomic_cmplx4_div_rev 488 __kmpc_atomic_cmplx4_mul 489 __kmpc_atomic_cmplx4_mul_cmplx8 490 __kmpc_atomic_cmplx4_mul_cpt 491 __kmpc_atomic_cmplx4_rd 492 __kmpc_atomic_cmplx4_sub 493 __kmpc_atomic_cmplx4_sub_cmplx8 494 __kmpc_atomic_cmplx4_sub_cpt 495 __kmpc_atomic_cmplx4_sub_cpt_rev 496 __kmpc_atomic_cmplx4_sub_rev 497 __kmpc_atomic_cmplx4_swp 498 __kmpc_atomic_cmplx4_wr 499 __kmpc_atomic_cmplx8_add 500 __kmpc_atomic_cmplx8_add_cpt 501 __kmpc_atomic_cmplx8_div 502 __kmpc_atomic_cmplx8_div_cpt 503 __kmpc_atomic_cmplx8_div_cpt_rev 504 __kmpc_atomic_cmplx8_div_rev 505 __kmpc_atomic_cmplx8_mul 506 __kmpc_atomic_cmplx8_mul_cpt 507 __kmpc_atomic_cmplx8_rd 508 __kmpc_atomic_cmplx8_sub 509 __kmpc_atomic_cmplx8_sub_cpt 510 __kmpc_atomic_cmplx8_sub_cpt_rev 511 __kmpc_atomic_cmplx8_sub_rev 512 __kmpc_atomic_cmplx8_swp 513 __kmpc_atomic_cmplx8_wr 514 __kmpc_atomic_cmplx10_add 515 __kmpc_atomic_cmplx10_add_cpt 516 __kmpc_atomic_cmplx10_div 517 __kmpc_atomic_cmplx10_div_cpt 518 __kmpc_atomic_cmplx10_div_cpt_rev 519 __kmpc_atomic_cmplx10_div_rev 520 __kmpc_atomic_cmplx10_mul 521 __kmpc_atomic_cmplx10_mul_cpt 522 __kmpc_atomic_cmplx10_rd 523 __kmpc_atomic_cmplx10_sub 524 __kmpc_atomic_cmplx10_sub_cpt 525 __kmpc_atomic_cmplx10_sub_cpt_rev 526 __kmpc_atomic_cmplx10_sub_rev 527 __kmpc_atomic_cmplx10_swp 528 __kmpc_atomic_cmplx10_wr 529 __kmpc_atomic_cmplx16_add 530 __kmpc_atomic_cmplx16_add_cpt 531 __kmpc_atomic_cmplx16_div 532 __kmpc_atomic_cmplx16_div_cpt 533 __kmpc_atomic_cmplx16_div_cpt_rev 534 __kmpc_atomic_cmplx16_div_rev 535 __kmpc_atomic_cmplx16_mul 536 __kmpc_atomic_cmplx16_mul_cpt 537 __kmpc_atomic_cmplx16_rd 538 __kmpc_atomic_cmplx16_sub 539 __kmpc_atomic_cmplx16_sub_cpt 540 __kmpc_atomic_cmplx16_sub_cpt_rev 541 __kmpc_atomic_cmplx16_swp 542 __kmpc_atomic_cmplx16_wr 543 @endcode 544 */ 545 546 /*! 547 @ingroup ATOMIC_OPS 548 @{ 549 */ 550 551 /* 552 * Global vars 553 */ 554 555 #ifndef KMP_GOMP_COMPAT 556 int __kmp_atomic_mode = 1; // Intel perf 557 #else 558 int __kmp_atomic_mode = 2; // GOMP compatibility 559 #endif /* KMP_GOMP_COMPAT */ 560 561 KMP_ALIGN(128) 562 563 kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded atomics in Gnu compat mode */ 564 kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user coded atomics for 1-byte fixed data types */ 565 kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user coded atomics for 2-byte fixed data types */ 566 kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user coded atomics for 4-byte fixed data types */ 567 kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user coded atomics for kmp_real32 data type */ 568 kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user coded atomics for 8-byte fixed data types */ 569 kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user coded atomics for kmp_real64 data type */ 570 kmp_atomic_lock_t __kmp_atomic_lock_8c; /* Control access to all user coded atomics for complex byte data type */ 571 kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type */ 572 kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type */ 573 kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/ 574 kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/ 575 kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */ 576 577 578 /* 579 2007-03-02: 580 Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a 581 bug on *_32 and *_32e. This is just a temporary workaround for the problem. 582 It seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG 583 routines in assembler language. 584 */ 585 #define KMP_ATOMIC_VOLATILE volatile 586 587 #if ( KMP_ARCH_X86 ) && KMP_HAVE_QUAD 588 589 static inline void operator +=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q += rhs.q; }; 590 static inline void operator -=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q -= rhs.q; }; 591 static inline void operator *=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q *= rhs.q; }; 592 static inline void operator /=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q /= rhs.q; }; 593 static inline bool operator < ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q < rhs.q; } 594 static inline bool operator > ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q > rhs.q; } 595 596 static inline void operator +=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q += rhs.q; }; 597 static inline void operator -=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q -= rhs.q; }; 598 static inline void operator *=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q *= rhs.q; }; 599 static inline void operator /=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q /= rhs.q; }; 600 static inline bool operator < ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q < rhs.q; } 601 static inline bool operator > ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q > rhs.q; } 602 603 static inline void operator +=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q += rhs.q; }; 604 static inline void operator -=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q -= rhs.q; }; 605 static inline void operator *=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q *= rhs.q; }; 606 static inline void operator /=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q /= rhs.q; }; 607 608 static inline void operator +=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q += rhs.q; }; 609 static inline void operator -=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q -= rhs.q; }; 610 static inline void operator *=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q *= rhs.q; }; 611 static inline void operator /=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q /= rhs.q; }; 612 613 #endif 614 615 /* ------------------------------------------------------------------------ */ 616 /* ATOMIC implementation routines */ 617 /* one routine for each operation and operand type */ 618 /* ------------------------------------------------------------------------ */ 619 620 // All routines declarations looks like 621 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs ); 622 // ------------------------------------------------------------------------ 623 624 #define KMP_CHECK_GTID \ 625 if ( gtid == KMP_GTID_UNKNOWN ) { \ 626 gtid = __kmp_entry_gtid(); \ 627 } // check and get gtid when needed 628 629 // Beginning of a definition (provides name, parameters, gebug trace) 630 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) 631 // OP_ID - operation identifier (add, sub, mul, ...) 632 // TYPE - operands' type 633 #define ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE, RET_TYPE) \ 634 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \ 635 { \ 636 KMP_DEBUG_ASSERT( __kmp_init_serial ); \ 637 KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); 638 639 // ------------------------------------------------------------------------ 640 // Lock variables used for critical sections for various size operands 641 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat 642 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char 643 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short 644 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int 645 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float 646 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int 647 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double 648 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex 649 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double 650 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad 651 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex 652 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex 653 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex 654 655 // ------------------------------------------------------------------------ 656 // Operation on *lhs, rhs bound by critical section 657 // OP - operator (it's supposed to contain an assignment) 658 // LCK_ID - lock identifier 659 // Note: don't check gtid as it should always be valid 660 // 1, 2-byte - expect valid parameter, other - check before this macro 661 #define OP_CRITICAL(OP,LCK_ID) \ 662 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ 663 \ 664 (*lhs) OP (rhs); \ 665 \ 666 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); 667 668 // ------------------------------------------------------------------------ 669 // For GNU compatibility, we may need to use a critical section, 670 // even though it is not required by the ISA. 671 // 672 // On IA-32 architecture, all atomic operations except for fixed 4 byte add, 673 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common 674 // critical section. On Intel(R) 64, all atomic operations are done with fetch 675 // and add or compare and exchange. Therefore, the FLAG parameter to this 676 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which 677 // require a critical section, where we predict that they will be implemented 678 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()). 679 // 680 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct, 681 // the FLAG parameter should always be 1. If we know that we will be using 682 // a critical section, then we want to make certain that we use the generic 683 // lock __kmp_atomic_lock to protect the atomic update, and not of of the 684 // locks that are specialized based upon the size or type of the data. 685 // 686 // If FLAG is 0, then we are relying on dead code elimination by the build 687 // compiler to get rid of the useless block of code, and save a needless 688 // branch at runtime. 689 // 690 691 #ifdef KMP_GOMP_COMPAT 692 # define OP_GOMP_CRITICAL(OP,FLAG) \ 693 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ 694 KMP_CHECK_GTID; \ 695 OP_CRITICAL( OP, 0 ); \ 696 return; \ 697 } 698 # else 699 # define OP_GOMP_CRITICAL(OP,FLAG) 700 #endif /* KMP_GOMP_COMPAT */ 701 702 #if KMP_MIC 703 # define KMP_DO_PAUSE _mm_delay_32( 1 ) 704 #else 705 # define KMP_DO_PAUSE KMP_CPU_PAUSE() 706 #endif /* KMP_MIC */ 707 708 // ------------------------------------------------------------------------ 709 // Operation on *lhs, rhs using "compare_and_store" routine 710 // TYPE - operands' type 711 // BITS - size in bits, used to distinguish low level calls 712 // OP - operator 713 #define OP_CMPXCHG(TYPE,BITS,OP) \ 714 { \ 715 TYPE old_value, new_value; \ 716 old_value = *(TYPE volatile *)lhs; \ 717 new_value = old_value OP rhs; \ 718 while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ 719 *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ 720 *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ 721 { \ 722 KMP_DO_PAUSE; \ 723 \ 724 old_value = *(TYPE volatile *)lhs; \ 725 new_value = old_value OP rhs; \ 726 } \ 727 } 728 729 #if USE_CMPXCHG_FIX 730 // 2007-06-25: 731 // workaround for C78287 (complex(kind=4) data type) 732 // lin_32, lin_32e, win_32 and win_32e are affected (I verified the asm) 733 // Compiler ignores the volatile qualifier of the temp_val in the OP_CMPXCHG macro. 734 // This is a problem of the compiler. 735 // Related tracker is C76005, targeted to 11.0. 736 // I verified the asm of the workaround. 737 #define OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \ 738 { \ 739 struct _sss { \ 740 TYPE cmp; \ 741 kmp_int##BITS *vvv; \ 742 }; \ 743 struct _sss old_value, new_value; \ 744 old_value.vvv = ( kmp_int##BITS * )&old_value.cmp; \ 745 new_value.vvv = ( kmp_int##BITS * )&new_value.cmp; \ 746 *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \ 747 new_value.cmp = old_value.cmp OP rhs; \ 748 while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ 749 *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ 750 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv ) ) \ 751 { \ 752 KMP_DO_PAUSE; \ 753 \ 754 *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \ 755 new_value.cmp = old_value.cmp OP rhs; \ 756 } \ 757 } 758 // end of the first part of the workaround for C78287 759 #endif // USE_CMPXCHG_FIX 760 761 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 762 763 // ------------------------------------------------------------------------ 764 // X86 or X86_64: no alignment problems ==================================== 765 #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ 766 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ 767 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ 768 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 769 KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \ 770 } 771 // ------------------------------------------------------------------------- 772 #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ 773 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ 774 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ 775 OP_CMPXCHG(TYPE,BITS,OP) \ 776 } 777 #if USE_CMPXCHG_FIX 778 // ------------------------------------------------------------------------- 779 // workaround for C78287 (complex(kind=4) data type) 780 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ 781 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ 782 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ 783 OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \ 784 } 785 // end of the second part of the workaround for C78287 786 #endif 787 788 #else 789 // ------------------------------------------------------------------------- 790 // Code for other architectures that don't handle unaligned accesses. 791 #define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ 792 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ 793 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ 794 if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ 795 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 796 KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \ 797 } else { \ 798 KMP_CHECK_GTID; \ 799 OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ 800 } \ 801 } 802 // ------------------------------------------------------------------------- 803 #define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ 804 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ 805 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ 806 if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ 807 OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ 808 } else { \ 809 KMP_CHECK_GTID; \ 810 OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ 811 } \ 812 } 813 #if USE_CMPXCHG_FIX 814 // ------------------------------------------------------------------------- 815 // workaround for C78287 (complex(kind=4) data type) 816 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ 817 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ 818 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ 819 if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ 820 OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ 821 } else { \ 822 KMP_CHECK_GTID; \ 823 OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ 824 } \ 825 } 826 // end of the second part of the workaround for C78287 827 #endif // USE_CMPXCHG_FIX 828 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 829 830 // Routines for ATOMIC 4-byte operands addition and subtraction 831 ATOMIC_FIXED_ADD( fixed4, add, kmp_int32, 32, +, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add 832 ATOMIC_FIXED_ADD( fixed4, sub, kmp_int32, 32, -, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub 833 834 ATOMIC_CMPXCHG( float4, add, kmp_real32, 32, +, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add 835 ATOMIC_CMPXCHG( float4, sub, kmp_real32, 32, -, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub 836 837 // Routines for ATOMIC 8-byte operands addition and subtraction 838 ATOMIC_FIXED_ADD( fixed8, add, kmp_int64, 64, +, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add 839 ATOMIC_FIXED_ADD( fixed8, sub, kmp_int64, 64, -, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub 840 841 ATOMIC_CMPXCHG( float8, add, kmp_real64, 64, +, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add 842 ATOMIC_CMPXCHG( float8, sub, kmp_real64, 64, -, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub 843 844 // ------------------------------------------------------------------------ 845 // Entries definition for integer operands 846 // TYPE_ID - operands type and size (fixed4, float4) 847 // OP_ID - operation identifier (add, sub, mul, ...) 848 // TYPE - operand type 849 // BITS - size in bits, used to distinguish low level calls 850 // OP - operator (used in critical section) 851 // LCK_ID - lock identifier, used to possibly distinguish lock variable 852 // MASK - used for alignment check 853 854 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG 855 // ------------------------------------------------------------------------ 856 // Routines for ATOMIC integer operands, other operators 857 // ------------------------------------------------------------------------ 858 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 859 ATOMIC_CMPXCHG( fixed1, add, kmp_int8, 8, +, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add 860 ATOMIC_CMPXCHG( fixed1, andb, kmp_int8, 8, &, 1i, 0, 0 ) // __kmpc_atomic_fixed1_andb 861 ATOMIC_CMPXCHG( fixed1, div, kmp_int8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div 862 ATOMIC_CMPXCHG( fixed1u, div, kmp_uint8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div 863 ATOMIC_CMPXCHG( fixed1, mul, kmp_int8, 8, *, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul 864 ATOMIC_CMPXCHG( fixed1, orb, kmp_int8, 8, |, 1i, 0, 0 ) // __kmpc_atomic_fixed1_orb 865 ATOMIC_CMPXCHG( fixed1, shl, kmp_int8, 8, <<, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl 866 ATOMIC_CMPXCHG( fixed1, shr, kmp_int8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr 867 ATOMIC_CMPXCHG( fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr 868 ATOMIC_CMPXCHG( fixed1, sub, kmp_int8, 8, -, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub 869 ATOMIC_CMPXCHG( fixed1, xor, kmp_int8, 8, ^, 1i, 0, 0 ) // __kmpc_atomic_fixed1_xor 870 ATOMIC_CMPXCHG( fixed2, add, kmp_int16, 16, +, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add 871 ATOMIC_CMPXCHG( fixed2, andb, kmp_int16, 16, &, 2i, 1, 0 ) // __kmpc_atomic_fixed2_andb 872 ATOMIC_CMPXCHG( fixed2, div, kmp_int16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div 873 ATOMIC_CMPXCHG( fixed2u, div, kmp_uint16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div 874 ATOMIC_CMPXCHG( fixed2, mul, kmp_int16, 16, *, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul 875 ATOMIC_CMPXCHG( fixed2, orb, kmp_int16, 16, |, 2i, 1, 0 ) // __kmpc_atomic_fixed2_orb 876 ATOMIC_CMPXCHG( fixed2, shl, kmp_int16, 16, <<, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl 877 ATOMIC_CMPXCHG( fixed2, shr, kmp_int16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr 878 ATOMIC_CMPXCHG( fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr 879 ATOMIC_CMPXCHG( fixed2, sub, kmp_int16, 16, -, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub 880 ATOMIC_CMPXCHG( fixed2, xor, kmp_int16, 16, ^, 2i, 1, 0 ) // __kmpc_atomic_fixed2_xor 881 ATOMIC_CMPXCHG( fixed4, andb, kmp_int32, 32, &, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andb 882 ATOMIC_CMPXCHG( fixed4, div, kmp_int32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div 883 ATOMIC_CMPXCHG( fixed4u, div, kmp_uint32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div 884 ATOMIC_CMPXCHG( fixed4, mul, kmp_int32, 32, *, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul 885 ATOMIC_CMPXCHG( fixed4, orb, kmp_int32, 32, |, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orb 886 ATOMIC_CMPXCHG( fixed4, shl, kmp_int32, 32, <<, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl 887 ATOMIC_CMPXCHG( fixed4, shr, kmp_int32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr 888 ATOMIC_CMPXCHG( fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr 889 ATOMIC_CMPXCHG( fixed4, xor, kmp_int32, 32, ^, 4i, 3, 0 ) // __kmpc_atomic_fixed4_xor 890 ATOMIC_CMPXCHG( fixed8, andb, kmp_int64, 64, &, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb 891 ATOMIC_CMPXCHG( fixed8, div, kmp_int64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div 892 ATOMIC_CMPXCHG( fixed8u, div, kmp_uint64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div 893 ATOMIC_CMPXCHG( fixed8, mul, kmp_int64, 64, *, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul 894 ATOMIC_CMPXCHG( fixed8, orb, kmp_int64, 64, |, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb 895 ATOMIC_CMPXCHG( fixed8, shl, kmp_int64, 64, <<, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl 896 ATOMIC_CMPXCHG( fixed8, shr, kmp_int64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr 897 ATOMIC_CMPXCHG( fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr 898 ATOMIC_CMPXCHG( fixed8, xor, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor 899 ATOMIC_CMPXCHG( float4, div, kmp_real32, 32, /, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div 900 ATOMIC_CMPXCHG( float4, mul, kmp_real32, 32, *, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul 901 ATOMIC_CMPXCHG( float8, div, kmp_real64, 64, /, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div 902 ATOMIC_CMPXCHG( float8, mul, kmp_real64, 64, *, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul 903 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG 904 905 906 /* ------------------------------------------------------------------------ */ 907 /* Routines for C/C++ Reduction operators && and || */ 908 /* ------------------------------------------------------------------------ */ 909 910 // ------------------------------------------------------------------------ 911 // Need separate macros for &&, || because there is no combined assignment 912 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used 913 #define ATOMIC_CRIT_L(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ 914 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ 915 OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \ 916 OP_CRITICAL( = *lhs OP, LCK_ID ) \ 917 } 918 919 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 920 921 // ------------------------------------------------------------------------ 922 // X86 or X86_64: no alignment problems =================================== 923 #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ 924 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ 925 OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \ 926 OP_CMPXCHG(TYPE,BITS,OP) \ 927 } 928 929 #else 930 // ------------------------------------------------------------------------ 931 // Code for other architectures that don't handle unaligned accesses. 932 #define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ 933 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ 934 OP_GOMP_CRITICAL(= *lhs OP,GOMP_FLAG) \ 935 if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ 936 OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ 937 } else { \ 938 KMP_CHECK_GTID; \ 939 OP_CRITICAL(= *lhs OP,LCK_ID) /* unaligned - use critical */ \ 940 } \ 941 } 942 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 943 944 ATOMIC_CMPX_L( fixed1, andl, char, 8, &&, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl 945 ATOMIC_CMPX_L( fixed1, orl, char, 8, ||, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl 946 ATOMIC_CMPX_L( fixed2, andl, short, 16, &&, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl 947 ATOMIC_CMPX_L( fixed2, orl, short, 16, ||, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl 948 ATOMIC_CMPX_L( fixed4, andl, kmp_int32, 32, &&, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andl 949 ATOMIC_CMPX_L( fixed4, orl, kmp_int32, 32, ||, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orl 950 ATOMIC_CMPX_L( fixed8, andl, kmp_int64, 64, &&, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl 951 ATOMIC_CMPX_L( fixed8, orl, kmp_int64, 64, ||, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl 952 953 954 /* ------------------------------------------------------------------------- */ 955 /* Routines for Fortran operators that matched no one in C: */ 956 /* MAX, MIN, .EQV., .NEQV. */ 957 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */ 958 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */ 959 /* ------------------------------------------------------------------------- */ 960 961 // ------------------------------------------------------------------------- 962 // MIN and MAX need separate macros 963 // OP - operator to check if we need any actions? 964 #define MIN_MAX_CRITSECT(OP,LCK_ID) \ 965 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ 966 \ 967 if ( *lhs OP rhs ) { /* still need actions? */ \ 968 *lhs = rhs; \ 969 } \ 970 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); 971 972 // ------------------------------------------------------------------------- 973 #ifdef KMP_GOMP_COMPAT 974 #define GOMP_MIN_MAX_CRITSECT(OP,FLAG) \ 975 if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \ 976 KMP_CHECK_GTID; \ 977 MIN_MAX_CRITSECT( OP, 0 ); \ 978 return; \ 979 } 980 #else 981 #define GOMP_MIN_MAX_CRITSECT(OP,FLAG) 982 #endif /* KMP_GOMP_COMPAT */ 983 984 // ------------------------------------------------------------------------- 985 #define MIN_MAX_CMPXCHG(TYPE,BITS,OP) \ 986 { \ 987 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 988 TYPE old_value; \ 989 temp_val = *lhs; \ 990 old_value = temp_val; \ 991 while ( old_value OP rhs && /* still need actions? */ \ 992 ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ 993 *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ 994 *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \ 995 { \ 996 KMP_CPU_PAUSE(); \ 997 temp_val = *lhs; \ 998 old_value = temp_val; \ 999 } \ 1000 } 1001 1002 // ------------------------------------------------------------------------- 1003 // 1-byte, 2-byte operands - use critical section 1004 #define MIN_MAX_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ 1005 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ 1006 if ( *lhs OP rhs ) { /* need actions? */ \ 1007 GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \ 1008 MIN_MAX_CRITSECT(OP,LCK_ID) \ 1009 } \ 1010 } 1011 1012 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1013 1014 // ------------------------------------------------------------------------- 1015 // X86 or X86_64: no alignment problems ==================================== 1016 #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ 1017 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ 1018 if ( *lhs OP rhs ) { \ 1019 GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \ 1020 MIN_MAX_CMPXCHG(TYPE,BITS,OP) \ 1021 } \ 1022 } 1023 1024 #else 1025 // ------------------------------------------------------------------------- 1026 // Code for other architectures that don't handle unaligned accesses. 1027 #define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ 1028 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ 1029 if ( *lhs OP rhs ) { \ 1030 GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \ 1031 if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ 1032 MIN_MAX_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ 1033 } else { \ 1034 KMP_CHECK_GTID; \ 1035 MIN_MAX_CRITSECT(OP,LCK_ID) /* unaligned address */ \ 1036 } \ 1037 } \ 1038 } 1039 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1040 1041 MIN_MAX_COMPXCHG( fixed1, max, char, 8, <, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max 1042 MIN_MAX_COMPXCHG( fixed1, min, char, 8, >, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min 1043 MIN_MAX_COMPXCHG( fixed2, max, short, 16, <, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max 1044 MIN_MAX_COMPXCHG( fixed2, min, short, 16, >, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min 1045 MIN_MAX_COMPXCHG( fixed4, max, kmp_int32, 32, <, 4i, 3, 0 ) // __kmpc_atomic_fixed4_max 1046 MIN_MAX_COMPXCHG( fixed4, min, kmp_int32, 32, >, 4i, 3, 0 ) // __kmpc_atomic_fixed4_min 1047 MIN_MAX_COMPXCHG( fixed8, max, kmp_int64, 64, <, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max 1048 MIN_MAX_COMPXCHG( fixed8, min, kmp_int64, 64, >, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min 1049 MIN_MAX_COMPXCHG( float4, max, kmp_real32, 32, <, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max 1050 MIN_MAX_COMPXCHG( float4, min, kmp_real32, 32, >, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min 1051 MIN_MAX_COMPXCHG( float8, max, kmp_real64, 64, <, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max 1052 MIN_MAX_COMPXCHG( float8, min, kmp_real64, 64, >, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min 1053 #if KMP_HAVE_QUAD 1054 MIN_MAX_CRITICAL( float16, max, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max 1055 MIN_MAX_CRITICAL( float16, min, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min 1056 #if ( KMP_ARCH_X86 ) 1057 MIN_MAX_CRITICAL( float16, max_a16, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16 1058 MIN_MAX_CRITICAL( float16, min_a16, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_min_a16 1059 #endif 1060 #endif 1061 // ------------------------------------------------------------------------ 1062 // Need separate macros for .EQV. because of the need of complement (~) 1063 // OP ignored for critical sections, ^=~ used instead 1064 #define ATOMIC_CRIT_EQV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ 1065 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ 1066 OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \ 1067 OP_CRITICAL(^=~,LCK_ID) /* send assignment and complement */ \ 1068 } 1069 1070 // ------------------------------------------------------------------------ 1071 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1072 // ------------------------------------------------------------------------ 1073 // X86 or X86_64: no alignment problems =================================== 1074 #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ 1075 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ 1076 OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \ 1077 OP_CMPXCHG(TYPE,BITS,OP) \ 1078 } 1079 // ------------------------------------------------------------------------ 1080 #else 1081 // ------------------------------------------------------------------------ 1082 // Code for other architectures that don't handle unaligned accesses. 1083 #define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ 1084 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ 1085 OP_GOMP_CRITICAL(^=~,GOMP_FLAG) \ 1086 if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ 1087 OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ 1088 } else { \ 1089 KMP_CHECK_GTID; \ 1090 OP_CRITICAL(^=~,LCK_ID) /* unaligned address - use critical */ \ 1091 } \ 1092 } 1093 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1094 1095 ATOMIC_CMPXCHG( fixed1, neqv, kmp_int8, 8, ^, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv 1096 ATOMIC_CMPXCHG( fixed2, neqv, kmp_int16, 16, ^, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv 1097 ATOMIC_CMPXCHG( fixed4, neqv, kmp_int32, 32, ^, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv 1098 ATOMIC_CMPXCHG( fixed8, neqv, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv 1099 ATOMIC_CMPX_EQV( fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv 1100 ATOMIC_CMPX_EQV( fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv 1101 ATOMIC_CMPX_EQV( fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv 1102 ATOMIC_CMPX_EQV( fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv 1103 1104 1105 // ------------------------------------------------------------------------ 1106 // Routines for Extended types: long double, _Quad, complex flavours (use critical section) 1107 // TYPE_ID, OP_ID, TYPE - detailed above 1108 // OP - operator 1109 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1110 #define ATOMIC_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ 1111 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ 1112 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \ 1113 OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \ 1114 } 1115 1116 /* ------------------------------------------------------------------------- */ 1117 // routines for long double type 1118 ATOMIC_CRITICAL( float10, add, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add 1119 ATOMIC_CRITICAL( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub 1120 ATOMIC_CRITICAL( float10, mul, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul 1121 ATOMIC_CRITICAL( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div 1122 #if KMP_HAVE_QUAD 1123 // routines for _Quad type 1124 ATOMIC_CRITICAL( float16, add, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add 1125 ATOMIC_CRITICAL( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub 1126 ATOMIC_CRITICAL( float16, mul, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul 1127 ATOMIC_CRITICAL( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div 1128 #if ( KMP_ARCH_X86 ) 1129 ATOMIC_CRITICAL( float16, add_a16, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16 1130 ATOMIC_CRITICAL( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16 1131 ATOMIC_CRITICAL( float16, mul_a16, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16 1132 ATOMIC_CRITICAL( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16 1133 #endif 1134 #endif 1135 // routines for complex types 1136 1137 #if USE_CMPXCHG_FIX 1138 // workaround for C78287 (complex(kind=4) data type) 1139 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_add 1140 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_sub 1141 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_mul 1142 ATOMIC_CMPXCHG_WORKAROUND( cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_div 1143 // end of the workaround for C78287 1144 #else 1145 ATOMIC_CRITICAL( cmplx4, add, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add 1146 ATOMIC_CRITICAL( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub 1147 ATOMIC_CRITICAL( cmplx4, mul, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul 1148 ATOMIC_CRITICAL( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div 1149 #endif // USE_CMPXCHG_FIX 1150 1151 ATOMIC_CRITICAL( cmplx8, add, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add 1152 ATOMIC_CRITICAL( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub 1153 ATOMIC_CRITICAL( cmplx8, mul, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul 1154 ATOMIC_CRITICAL( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div 1155 ATOMIC_CRITICAL( cmplx10, add, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add 1156 ATOMIC_CRITICAL( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub 1157 ATOMIC_CRITICAL( cmplx10, mul, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul 1158 ATOMIC_CRITICAL( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div 1159 #if KMP_HAVE_QUAD 1160 ATOMIC_CRITICAL( cmplx16, add, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add 1161 ATOMIC_CRITICAL( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub 1162 ATOMIC_CRITICAL( cmplx16, mul, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul 1163 ATOMIC_CRITICAL( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div 1164 #if ( KMP_ARCH_X86 ) 1165 ATOMIC_CRITICAL( cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16 1166 ATOMIC_CRITICAL( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16 1167 ATOMIC_CRITICAL( cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16 1168 ATOMIC_CRITICAL( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16 1169 #endif 1170 #endif 1171 1172 #if OMP_40_ENABLED 1173 1174 // OpenMP 4.0: x = expr binop x for non-commutative operations. 1175 // Supported only on IA-32 architecture and Intel(R) 64 1176 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1177 1178 // ------------------------------------------------------------------------ 1179 // Operation on *lhs, rhs bound by critical section 1180 // OP - operator (it's supposed to contain an assignment) 1181 // LCK_ID - lock identifier 1182 // Note: don't check gtid as it should always be valid 1183 // 1, 2-byte - expect valid parameter, other - check before this macro 1184 #define OP_CRITICAL_REV(OP,LCK_ID) \ 1185 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ 1186 \ 1187 (*lhs) = (rhs) OP (*lhs); \ 1188 \ 1189 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); 1190 1191 #ifdef KMP_GOMP_COMPAT 1192 #define OP_GOMP_CRITICAL_REV(OP,FLAG) \ 1193 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ 1194 KMP_CHECK_GTID; \ 1195 OP_CRITICAL_REV( OP, 0 ); \ 1196 return; \ 1197 } 1198 #else 1199 #define OP_GOMP_CRITICAL_REV(OP,FLAG) 1200 #endif /* KMP_GOMP_COMPAT */ 1201 1202 1203 // Beginning of a definition (provides name, parameters, gebug trace) 1204 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) 1205 // OP_ID - operation identifier (add, sub, mul, ...) 1206 // TYPE - operands' type 1207 #define ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE, RET_TYPE) \ 1208 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \ 1209 { \ 1210 KMP_DEBUG_ASSERT( __kmp_init_serial ); \ 1211 KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid )); 1212 1213 // ------------------------------------------------------------------------ 1214 // Operation on *lhs, rhs using "compare_and_store" routine 1215 // TYPE - operands' type 1216 // BITS - size in bits, used to distinguish low level calls 1217 // OP - operator 1218 // Note: temp_val introduced in order to force the compiler to read 1219 // *lhs only once (w/o it the compiler reads *lhs twice) 1220 #define OP_CMPXCHG_REV(TYPE,BITS,OP) \ 1221 { \ 1222 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1223 TYPE old_value, new_value; \ 1224 temp_val = *lhs; \ 1225 old_value = temp_val; \ 1226 new_value = rhs OP old_value; \ 1227 while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ 1228 *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ 1229 *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ 1230 { \ 1231 KMP_DO_PAUSE; \ 1232 \ 1233 temp_val = *lhs; \ 1234 old_value = temp_val; \ 1235 new_value = rhs OP old_value; \ 1236 } \ 1237 } 1238 1239 // ------------------------------------------------------------------------- 1240 #define ATOMIC_CMPXCHG_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,GOMP_FLAG) \ 1241 ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \ 1242 OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \ 1243 OP_CMPXCHG_REV(TYPE,BITS,OP) \ 1244 } 1245 1246 // ------------------------------------------------------------------------ 1247 // Entries definition for integer operands 1248 // TYPE_ID - operands type and size (fixed4, float4) 1249 // OP_ID - operation identifier (add, sub, mul, ...) 1250 // TYPE - operand type 1251 // BITS - size in bits, used to distinguish low level calls 1252 // OP - operator (used in critical section) 1253 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1254 1255 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG 1256 // ------------------------------------------------------------------------ 1257 // Routines for ATOMIC integer operands, other operators 1258 // ------------------------------------------------------------------------ 1259 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG 1260 ATOMIC_CMPXCHG_REV( fixed1, div, kmp_int8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_rev 1261 ATOMIC_CMPXCHG_REV( fixed1u, div, kmp_uint8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_rev 1262 ATOMIC_CMPXCHG_REV( fixed1, shl, kmp_int8, 8, <<, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_rev 1263 ATOMIC_CMPXCHG_REV( fixed1, shr, kmp_int8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_rev 1264 ATOMIC_CMPXCHG_REV( fixed1u, shr, kmp_uint8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_rev 1265 ATOMIC_CMPXCHG_REV( fixed1, sub, kmp_int8, 8, -, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_rev 1266 1267 ATOMIC_CMPXCHG_REV( fixed2, div, kmp_int16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_rev 1268 ATOMIC_CMPXCHG_REV( fixed2u, div, kmp_uint16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_rev 1269 ATOMIC_CMPXCHG_REV( fixed2, shl, kmp_int16, 16, <<, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_rev 1270 ATOMIC_CMPXCHG_REV( fixed2, shr, kmp_int16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_rev 1271 ATOMIC_CMPXCHG_REV( fixed2u, shr, kmp_uint16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_rev 1272 ATOMIC_CMPXCHG_REV( fixed2, sub, kmp_int16, 16, -, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_rev 1273 1274 ATOMIC_CMPXCHG_REV( fixed4, div, kmp_int32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_rev 1275 ATOMIC_CMPXCHG_REV( fixed4u, div, kmp_uint32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_rev 1276 ATOMIC_CMPXCHG_REV( fixed4, shl, kmp_int32, 32, <<, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_rev 1277 ATOMIC_CMPXCHG_REV( fixed4, shr, kmp_int32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_rev 1278 ATOMIC_CMPXCHG_REV( fixed4u, shr, kmp_uint32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_rev 1279 ATOMIC_CMPXCHG_REV( fixed4, sub, kmp_int32, 32, -, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_rev 1280 1281 ATOMIC_CMPXCHG_REV( fixed8, div, kmp_int64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_rev 1282 ATOMIC_CMPXCHG_REV( fixed8u, div, kmp_uint64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_rev 1283 ATOMIC_CMPXCHG_REV( fixed8, shl, kmp_int64, 64, <<, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_rev 1284 ATOMIC_CMPXCHG_REV( fixed8, shr, kmp_int64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_rev 1285 ATOMIC_CMPXCHG_REV( fixed8u, shr, kmp_uint64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_rev 1286 ATOMIC_CMPXCHG_REV( fixed8, sub, kmp_int64, 64, -, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_rev 1287 1288 ATOMIC_CMPXCHG_REV( float4, div, kmp_real32, 32, /, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_rev 1289 ATOMIC_CMPXCHG_REV( float4, sub, kmp_real32, 32, -, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_rev 1290 1291 ATOMIC_CMPXCHG_REV( float8, div, kmp_real64, 64, /, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_rev 1292 ATOMIC_CMPXCHG_REV( float8, sub, kmp_real64, 64, -, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_rev 1293 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG 1294 1295 // ------------------------------------------------------------------------ 1296 // Routines for Extended types: long double, _Quad, complex flavours (use critical section) 1297 // TYPE_ID, OP_ID, TYPE - detailed above 1298 // OP - operator 1299 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1300 #define ATOMIC_CRITICAL_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ 1301 ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \ 1302 OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \ 1303 OP_CRITICAL_REV(OP,LCK_ID) \ 1304 } 1305 1306 /* ------------------------------------------------------------------------- */ 1307 // routines for long double type 1308 ATOMIC_CRITICAL_REV( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_rev 1309 ATOMIC_CRITICAL_REV( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_rev 1310 #if KMP_HAVE_QUAD 1311 // routines for _Quad type 1312 ATOMIC_CRITICAL_REV( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_rev 1313 ATOMIC_CRITICAL_REV( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_rev 1314 #if ( KMP_ARCH_X86 ) 1315 ATOMIC_CRITICAL_REV( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_rev 1316 ATOMIC_CRITICAL_REV( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_rev 1317 #endif 1318 #endif 1319 1320 // routines for complex types 1321 ATOMIC_CRITICAL_REV( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_rev 1322 ATOMIC_CRITICAL_REV( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_rev 1323 ATOMIC_CRITICAL_REV( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_rev 1324 ATOMIC_CRITICAL_REV( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_rev 1325 ATOMIC_CRITICAL_REV( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_rev 1326 ATOMIC_CRITICAL_REV( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_rev 1327 #if KMP_HAVE_QUAD 1328 ATOMIC_CRITICAL_REV( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_rev 1329 ATOMIC_CRITICAL_REV( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_rev 1330 #if ( KMP_ARCH_X86 ) 1331 ATOMIC_CRITICAL_REV( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_rev 1332 ATOMIC_CRITICAL_REV( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_rev 1333 #endif 1334 #endif 1335 1336 1337 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64 1338 // End of OpenMP 4.0: x = expr binop x for non-commutative operations. 1339 1340 #endif //OMP_40_ENABLED 1341 1342 1343 /* ------------------------------------------------------------------------ */ 1344 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */ 1345 /* Note: in order to reduce the total number of types combinations */ 1346 /* it is supposed that compiler converts RHS to longest floating type,*/ 1347 /* that is _Quad, before call to any of these routines */ 1348 /* Conversion to _Quad will be done by the compiler during calculation, */ 1349 /* conversion back to TYPE - before the assignment, like: */ 1350 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */ 1351 /* Performance penalty expected because of SW emulation use */ 1352 /* ------------------------------------------------------------------------ */ 1353 1354 #define ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ 1355 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( ident_t *id_ref, int gtid, TYPE * lhs, RTYPE rhs ) \ 1356 { \ 1357 KMP_DEBUG_ASSERT( __kmp_init_serial ); \ 1358 KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", gtid )); 1359 1360 // ------------------------------------------------------------------------- 1361 #define ATOMIC_CRITICAL_FP(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \ 1362 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ 1363 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \ 1364 OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \ 1365 } 1366 1367 // ------------------------------------------------------------------------- 1368 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1369 // ------------------------------------------------------------------------- 1370 // X86 or X86_64: no alignment problems ==================================== 1371 #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ 1372 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ 1373 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ 1374 OP_CMPXCHG(TYPE,BITS,OP) \ 1375 } 1376 // ------------------------------------------------------------------------- 1377 #else 1378 // ------------------------------------------------------------------------ 1379 // Code for other architectures that don't handle unaligned accesses. 1380 #define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ 1381 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ 1382 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ 1383 if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ 1384 OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ 1385 } else { \ 1386 KMP_CHECK_GTID; \ 1387 OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ 1388 } \ 1389 } 1390 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1391 1392 // ------------------------------------------------------------------------- 1393 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1394 // ------------------------------------------------------------------------- 1395 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ 1396 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ 1397 OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \ 1398 OP_CMPXCHG_REV(TYPE,BITS,OP) \ 1399 } 1400 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \ 1401 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ 1402 OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \ 1403 OP_CRITICAL_REV(OP,LCK_ID) \ 1404 } 1405 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1406 1407 // RHS=float8 1408 ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_float8 1409 ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_float8 1410 ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_float8 1411 ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_float8 1412 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_float8 1413 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_float8 1414 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_float8 1415 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_float8 1416 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_float8 1417 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_float8 1418 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_float8 1419 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_float8 1420 1421 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them) 1422 #if KMP_HAVE_QUAD 1423 ATOMIC_CMPXCHG_MIX( fixed1, char, add, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_fp 1424 ATOMIC_CMPXCHG_MIX( fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_add_fp 1425 ATOMIC_CMPXCHG_MIX( fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_fp 1426 ATOMIC_CMPXCHG_MIX( fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_sub_fp 1427 ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_fp 1428 ATOMIC_CMPXCHG_MIX( fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_mul_fp 1429 ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_fp 1430 ATOMIC_CMPXCHG_MIX( fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_fp 1431 1432 ATOMIC_CMPXCHG_MIX( fixed2, short, add, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_fp 1433 ATOMIC_CMPXCHG_MIX( fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_add_fp 1434 ATOMIC_CMPXCHG_MIX( fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_fp 1435 ATOMIC_CMPXCHG_MIX( fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_sub_fp 1436 ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_fp 1437 ATOMIC_CMPXCHG_MIX( fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_mul_fp 1438 ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_fp 1439 ATOMIC_CMPXCHG_MIX( fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_fp 1440 1441 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add_fp 1442 ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_add_fp 1443 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_fp 1444 ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_sub_fp 1445 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_fp 1446 ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_mul_fp 1447 ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_fp 1448 ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_fp 1449 1450 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_fp 1451 ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_add_fp 1452 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_fp 1453 ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_sub_fp 1454 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_fp 1455 ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_mul_fp 1456 ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_fp 1457 ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_fp 1458 1459 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_fp 1460 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_fp 1461 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_fp 1462 ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_fp 1463 1464 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_fp 1465 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_fp 1466 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_fp 1467 ATOMIC_CMPXCHG_MIX( float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_fp 1468 1469 ATOMIC_CRITICAL_FP( float10, long double, add, +, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_add_fp 1470 ATOMIC_CRITICAL_FP( float10, long double, sub, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_fp 1471 ATOMIC_CRITICAL_FP( float10, long double, mul, *, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_mul_fp 1472 ATOMIC_CRITICAL_FP( float10, long double, div, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_fp 1473 1474 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1475 // Reverse operations 1476 ATOMIC_CMPXCHG_REV_MIX( fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_rev_fp 1477 ATOMIC_CMPXCHG_REV_MIX( fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_sub_rev_fp 1478 ATOMIC_CMPXCHG_REV_MIX( fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_rev_fp 1479 ATOMIC_CMPXCHG_REV_MIX( fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_rev_fp 1480 1481 ATOMIC_CMPXCHG_REV_MIX( fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_rev_fp 1482 ATOMIC_CMPXCHG_REV_MIX( fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_sub_rev_fp 1483 ATOMIC_CMPXCHG_REV_MIX( fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_rev_fp 1484 ATOMIC_CMPXCHG_REV_MIX( fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_rev_fp 1485 1486 ATOMIC_CMPXCHG_REV_MIX( fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_rev_fp 1487 ATOMIC_CMPXCHG_REV_MIX( fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_sub_rev_fp 1488 ATOMIC_CMPXCHG_REV_MIX( fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_rev_fp 1489 ATOMIC_CMPXCHG_REV_MIX( fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_rev_fp 1490 1491 ATOMIC_CMPXCHG_REV_MIX( fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_rev_fp 1492 ATOMIC_CMPXCHG_REV_MIX( fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_sub_rev_fp 1493 ATOMIC_CMPXCHG_REV_MIX( fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_rev_fp 1494 ATOMIC_CMPXCHG_REV_MIX( fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_rev_fp 1495 1496 ATOMIC_CMPXCHG_REV_MIX( float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_rev_fp 1497 ATOMIC_CMPXCHG_REV_MIX( float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_rev_fp 1498 1499 ATOMIC_CMPXCHG_REV_MIX( float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_rev_fp 1500 ATOMIC_CMPXCHG_REV_MIX( float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_rev_fp 1501 1502 ATOMIC_CRITICAL_REV_FP( float10, long double, sub_rev, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_rev_fp 1503 ATOMIC_CRITICAL_REV_FP( float10, long double, div_rev, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_rev_fp 1504 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1505 1506 #endif 1507 1508 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1509 // ------------------------------------------------------------------------ 1510 // X86 or X86_64: no alignment problems ==================================== 1511 #if USE_CMPXCHG_FIX 1512 // workaround for C78287 (complex(kind=4) data type) 1513 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ 1514 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ 1515 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ 1516 OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \ 1517 } 1518 // end of the second part of the workaround for C78287 1519 #else 1520 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ 1521 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ 1522 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ 1523 OP_CMPXCHG(TYPE,BITS,OP) \ 1524 } 1525 #endif // USE_CMPXCHG_FIX 1526 #else 1527 // ------------------------------------------------------------------------ 1528 // Code for other architectures that don't handle unaligned accesses. 1529 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ 1530 ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ 1531 OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ 1532 if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ 1533 OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ 1534 } else { \ 1535 KMP_CHECK_GTID; \ 1536 OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ 1537 } \ 1538 } 1539 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ 1540 1541 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_add_cmplx8 1542 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_sub_cmplx8 1543 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_mul_cmplx8 1544 ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_div_cmplx8 1545 1546 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 1547 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 1548 1549 ////////////////////////////////////////////////////////////////////////////////////////////////////// 1550 // ------------------------------------------------------------------------ 1551 // Atomic READ routines 1552 // ------------------------------------------------------------------------ 1553 1554 // ------------------------------------------------------------------------ 1555 // Beginning of a definition (provides name, parameters, gebug trace) 1556 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) 1557 // OP_ID - operation identifier (add, sub, mul, ...) 1558 // TYPE - operands' type 1559 #define ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE, RET_TYPE) \ 1560 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * loc ) \ 1561 { \ 1562 KMP_DEBUG_ASSERT( __kmp_init_serial ); \ 1563 KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); 1564 1565 // ------------------------------------------------------------------------ 1566 // Operation on *lhs, rhs using "compare_and_store_ret" routine 1567 // TYPE - operands' type 1568 // BITS - size in bits, used to distinguish low level calls 1569 // OP - operator 1570 // Note: temp_val introduced in order to force the compiler to read 1571 // *lhs only once (w/o it the compiler reads *lhs twice) 1572 // TODO: check if it is still necessary 1573 // Return old value regardless of the result of "compare & swap# operation 1574 1575 #define OP_CMPXCHG_READ(TYPE,BITS,OP) \ 1576 { \ 1577 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1578 union f_i_union { \ 1579 TYPE f_val; \ 1580 kmp_int##BITS i_val; \ 1581 }; \ 1582 union f_i_union old_value; \ 1583 temp_val = *loc; \ 1584 old_value.f_val = temp_val; \ 1585 old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( (kmp_int##BITS *) loc, \ 1586 *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val, \ 1587 *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val ); \ 1588 new_value = old_value.f_val; \ 1589 return new_value; \ 1590 } 1591 1592 // ------------------------------------------------------------------------- 1593 // Operation on *lhs, rhs bound by critical section 1594 // OP - operator (it's supposed to contain an assignment) 1595 // LCK_ID - lock identifier 1596 // Note: don't check gtid as it should always be valid 1597 // 1, 2-byte - expect valid parameter, other - check before this macro 1598 #define OP_CRITICAL_READ(OP,LCK_ID) \ 1599 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ 1600 \ 1601 new_value = (*loc); \ 1602 \ 1603 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); 1604 1605 // ------------------------------------------------------------------------- 1606 #ifdef KMP_GOMP_COMPAT 1607 #define OP_GOMP_CRITICAL_READ(OP,FLAG) \ 1608 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ 1609 KMP_CHECK_GTID; \ 1610 OP_CRITICAL_READ( OP, 0 ); \ 1611 return new_value; \ 1612 } 1613 #else 1614 #define OP_GOMP_CRITICAL_READ(OP,FLAG) 1615 #endif /* KMP_GOMP_COMPAT */ 1616 1617 // ------------------------------------------------------------------------- 1618 #define ATOMIC_FIXED_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ 1619 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \ 1620 TYPE new_value; \ 1621 OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \ 1622 new_value = KMP_TEST_THEN_ADD##BITS( loc, OP 0 ); \ 1623 return new_value; \ 1624 } 1625 // ------------------------------------------------------------------------- 1626 #define ATOMIC_CMPXCHG_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ 1627 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \ 1628 TYPE new_value; \ 1629 OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \ 1630 OP_CMPXCHG_READ(TYPE,BITS,OP) \ 1631 } 1632 // ------------------------------------------------------------------------ 1633 // Routines for Extended types: long double, _Quad, complex flavours (use critical section) 1634 // TYPE_ID, OP_ID, TYPE - detailed above 1635 // OP - operator 1636 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1637 #define ATOMIC_CRITICAL_READ(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ 1638 ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \ 1639 TYPE new_value; \ 1640 OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) /* send assignment */ \ 1641 OP_CRITICAL_READ(OP,LCK_ID) /* send assignment */ \ 1642 return new_value; \ 1643 } 1644 1645 // ------------------------------------------------------------------------ 1646 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return value doesn't work. 1647 // Let's return the read value through the additional parameter. 1648 1649 #if ( KMP_OS_WINDOWS ) 1650 1651 #define OP_CRITICAL_READ_WRK(OP,LCK_ID) \ 1652 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ 1653 \ 1654 (*out) = (*loc); \ 1655 \ 1656 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); 1657 // ------------------------------------------------------------------------ 1658 #ifdef KMP_GOMP_COMPAT 1659 #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG) \ 1660 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ 1661 KMP_CHECK_GTID; \ 1662 OP_CRITICAL_READ_WRK( OP, 0 ); \ 1663 } 1664 #else 1665 #define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG) 1666 #endif /* KMP_GOMP_COMPAT */ 1667 // ------------------------------------------------------------------------ 1668 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \ 1669 void __kmpc_atomic_##TYPE_ID##_##OP_ID( TYPE * out, ident_t *id_ref, int gtid, TYPE * loc ) \ 1670 { \ 1671 KMP_DEBUG_ASSERT( __kmp_init_serial ); \ 1672 KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); 1673 1674 // ------------------------------------------------------------------------ 1675 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ 1676 ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \ 1677 OP_GOMP_CRITICAL_READ_WRK(OP##=,GOMP_FLAG) /* send assignment */ \ 1678 OP_CRITICAL_READ_WRK(OP,LCK_ID) /* send assignment */ \ 1679 } 1680 1681 #endif // KMP_OS_WINDOWS 1682 1683 // ------------------------------------------------------------------------ 1684 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 1685 ATOMIC_FIXED_READ( fixed4, rd, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_rd 1686 ATOMIC_FIXED_READ( fixed8, rd, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_rd 1687 ATOMIC_CMPXCHG_READ( float4, rd, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_rd 1688 ATOMIC_CMPXCHG_READ( float8, rd, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_rd 1689 1690 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic 1691 ATOMIC_CMPXCHG_READ( fixed1, rd, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_rd 1692 ATOMIC_CMPXCHG_READ( fixed2, rd, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_rd 1693 1694 ATOMIC_CRITICAL_READ( float10, rd, long double, +, 10r, 1 ) // __kmpc_atomic_float10_rd 1695 #if KMP_HAVE_QUAD 1696 ATOMIC_CRITICAL_READ( float16, rd, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_rd 1697 #endif // KMP_HAVE_QUAD 1698 1699 // Fix for CQ220361 on Windows* OS 1700 #if ( KMP_OS_WINDOWS ) 1701 ATOMIC_CRITICAL_READ_WRK( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd 1702 #else 1703 ATOMIC_CRITICAL_READ( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd 1704 #endif 1705 ATOMIC_CRITICAL_READ( cmplx8, rd, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_rd 1706 ATOMIC_CRITICAL_READ( cmplx10, rd, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_rd 1707 #if KMP_HAVE_QUAD 1708 ATOMIC_CRITICAL_READ( cmplx16, rd, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_rd 1709 #if ( KMP_ARCH_X86 ) 1710 ATOMIC_CRITICAL_READ( float16, a16_rd, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_a16_rd 1711 ATOMIC_CRITICAL_READ( cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_rd 1712 #endif 1713 #endif 1714 1715 1716 // ------------------------------------------------------------------------ 1717 // Atomic WRITE routines 1718 // ------------------------------------------------------------------------ 1719 1720 #define ATOMIC_XCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ 1721 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ 1722 OP_GOMP_CRITICAL(OP,GOMP_FLAG) \ 1723 KMP_XCHG_FIXED##BITS( lhs, rhs ); \ 1724 } 1725 // ------------------------------------------------------------------------ 1726 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ 1727 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ 1728 OP_GOMP_CRITICAL(OP,GOMP_FLAG) \ 1729 KMP_XCHG_REAL##BITS( lhs, rhs ); \ 1730 } 1731 1732 1733 // ------------------------------------------------------------------------ 1734 // Operation on *lhs, rhs using "compare_and_store" routine 1735 // TYPE - operands' type 1736 // BITS - size in bits, used to distinguish low level calls 1737 // OP - operator 1738 // Note: temp_val introduced in order to force the compiler to read 1739 // *lhs only once (w/o it the compiler reads *lhs twice) 1740 #define OP_CMPXCHG_WR(TYPE,BITS,OP) \ 1741 { \ 1742 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1743 TYPE old_value, new_value; \ 1744 temp_val = *lhs; \ 1745 old_value = temp_val; \ 1746 new_value = rhs; \ 1747 while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ 1748 *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ 1749 *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ 1750 { \ 1751 KMP_CPU_PAUSE(); \ 1752 \ 1753 temp_val = *lhs; \ 1754 old_value = temp_val; \ 1755 new_value = rhs; \ 1756 } \ 1757 } 1758 1759 // ------------------------------------------------------------------------- 1760 #define ATOMIC_CMPXCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ 1761 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ 1762 OP_GOMP_CRITICAL(OP,GOMP_FLAG) \ 1763 OP_CMPXCHG_WR(TYPE,BITS,OP) \ 1764 } 1765 1766 // ------------------------------------------------------------------------ 1767 // Routines for Extended types: long double, _Quad, complex flavours (use critical section) 1768 // TYPE_ID, OP_ID, TYPE - detailed above 1769 // OP - operator 1770 // LCK_ID - lock identifier, used to possibly distinguish lock variable 1771 #define ATOMIC_CRITICAL_WR(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ 1772 ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ 1773 OP_GOMP_CRITICAL(OP,GOMP_FLAG) /* send assignment */ \ 1774 OP_CRITICAL(OP,LCK_ID) /* send assignment */ \ 1775 } 1776 // ------------------------------------------------------------------------- 1777 1778 ATOMIC_XCHG_WR( fixed1, wr, kmp_int8, 8, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_wr 1779 ATOMIC_XCHG_WR( fixed2, wr, kmp_int16, 16, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_wr 1780 ATOMIC_XCHG_WR( fixed4, wr, kmp_int32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_wr 1781 #if ( KMP_ARCH_X86 ) 1782 ATOMIC_CMPXCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr 1783 #else 1784 ATOMIC_XCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr 1785 #endif 1786 1787 ATOMIC_XCHG_FLOAT_WR( float4, wr, kmp_real32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_float4_wr 1788 #if ( KMP_ARCH_X86 ) 1789 ATOMIC_CMPXCHG_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr 1790 #else 1791 ATOMIC_XCHG_FLOAT_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr 1792 #endif 1793 1794 ATOMIC_CRITICAL_WR( float10, wr, long double, =, 10r, 1 ) // __kmpc_atomic_float10_wr 1795 #if KMP_HAVE_QUAD 1796 ATOMIC_CRITICAL_WR( float16, wr, QUAD_LEGACY, =, 16r, 1 ) // __kmpc_atomic_float16_wr 1797 #endif 1798 ATOMIC_CRITICAL_WR( cmplx4, wr, kmp_cmplx32, =, 8c, 1 ) // __kmpc_atomic_cmplx4_wr 1799 ATOMIC_CRITICAL_WR( cmplx8, wr, kmp_cmplx64, =, 16c, 1 ) // __kmpc_atomic_cmplx8_wr 1800 ATOMIC_CRITICAL_WR( cmplx10, wr, kmp_cmplx80, =, 20c, 1 ) // __kmpc_atomic_cmplx10_wr 1801 #if KMP_HAVE_QUAD 1802 ATOMIC_CRITICAL_WR( cmplx16, wr, CPLX128_LEG, =, 32c, 1 ) // __kmpc_atomic_cmplx16_wr 1803 #if ( KMP_ARCH_X86 ) 1804 ATOMIC_CRITICAL_WR( float16, a16_wr, Quad_a16_t, =, 16r, 1 ) // __kmpc_atomic_float16_a16_wr 1805 ATOMIC_CRITICAL_WR( cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_wr 1806 #endif 1807 #endif 1808 1809 1810 // ------------------------------------------------------------------------ 1811 // Atomic CAPTURE routines 1812 // ------------------------------------------------------------------------ 1813 1814 // Beginning of a definition (provides name, parameters, gebug trace) 1815 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) 1816 // OP_ID - operation identifier (add, sub, mul, ...) 1817 // TYPE - operands' type 1818 #define ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,RET_TYPE) \ 1819 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ) \ 1820 { \ 1821 KMP_DEBUG_ASSERT( __kmp_init_serial ); \ 1822 KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); 1823 1824 // ------------------------------------------------------------------------- 1825 // Operation on *lhs, rhs bound by critical section 1826 // OP - operator (it's supposed to contain an assignment) 1827 // LCK_ID - lock identifier 1828 // Note: don't check gtid as it should always be valid 1829 // 1, 2-byte - expect valid parameter, other - check before this macro 1830 #define OP_CRITICAL_CPT(OP,LCK_ID) \ 1831 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ 1832 \ 1833 if( flag ) { \ 1834 (*lhs) OP rhs; \ 1835 new_value = (*lhs); \ 1836 } else { \ 1837 new_value = (*lhs); \ 1838 (*lhs) OP rhs; \ 1839 } \ 1840 \ 1841 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ 1842 return new_value; 1843 1844 // ------------------------------------------------------------------------ 1845 #ifdef KMP_GOMP_COMPAT 1846 #define OP_GOMP_CRITICAL_CPT(OP,FLAG) \ 1847 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ 1848 KMP_CHECK_GTID; \ 1849 OP_CRITICAL_CPT( OP##=, 0 ); \ 1850 } 1851 #else 1852 #define OP_GOMP_CRITICAL_CPT(OP,FLAG) 1853 #endif /* KMP_GOMP_COMPAT */ 1854 1855 // ------------------------------------------------------------------------ 1856 // Operation on *lhs, rhs using "compare_and_store" routine 1857 // TYPE - operands' type 1858 // BITS - size in bits, used to distinguish low level calls 1859 // OP - operator 1860 // Note: temp_val introduced in order to force the compiler to read 1861 // *lhs only once (w/o it the compiler reads *lhs twice) 1862 #define OP_CMPXCHG_CPT(TYPE,BITS,OP) \ 1863 { \ 1864 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 1865 TYPE old_value, new_value; \ 1866 temp_val = *lhs; \ 1867 old_value = temp_val; \ 1868 new_value = old_value OP rhs; \ 1869 while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ 1870 *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ 1871 *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ 1872 { \ 1873 KMP_CPU_PAUSE(); \ 1874 \ 1875 temp_val = *lhs; \ 1876 old_value = temp_val; \ 1877 new_value = old_value OP rhs; \ 1878 } \ 1879 if( flag ) { \ 1880 return new_value; \ 1881 } else \ 1882 return old_value; \ 1883 } 1884 1885 // ------------------------------------------------------------------------- 1886 #define ATOMIC_CMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ 1887 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ 1888 TYPE new_value; \ 1889 OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \ 1890 OP_CMPXCHG_CPT(TYPE,BITS,OP) \ 1891 } 1892 1893 // ------------------------------------------------------------------------- 1894 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ 1895 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ 1896 TYPE old_value, new_value; \ 1897 OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \ 1898 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ 1899 old_value = KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \ 1900 if( flag ) { \ 1901 return old_value OP rhs; \ 1902 } else \ 1903 return old_value; \ 1904 } 1905 // ------------------------------------------------------------------------- 1906 1907 ATOMIC_FIXED_ADD_CPT( fixed4, add_cpt, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_add_cpt 1908 ATOMIC_FIXED_ADD_CPT( fixed4, sub_cpt, kmp_int32, 32, -, 0 ) // __kmpc_atomic_fixed4_sub_cpt 1909 ATOMIC_FIXED_ADD_CPT( fixed8, add_cpt, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_cpt 1910 ATOMIC_FIXED_ADD_CPT( fixed8, sub_cpt, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt 1911 1912 ATOMIC_CMPXCHG_CPT( float4, add_cpt, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_cpt 1913 ATOMIC_CMPXCHG_CPT( float4, sub_cpt, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt 1914 ATOMIC_CMPXCHG_CPT( float8, add_cpt, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_cpt 1915 ATOMIC_CMPXCHG_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt 1916 1917 // ------------------------------------------------------------------------ 1918 // Entries definition for integer operands 1919 // TYPE_ID - operands type and size (fixed4, float4) 1920 // OP_ID - operation identifier (add, sub, mul, ...) 1921 // TYPE - operand type 1922 // BITS - size in bits, used to distinguish low level calls 1923 // OP - operator (used in critical section) 1924 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG 1925 // ------------------------------------------------------------------------ 1926 // Routines for ATOMIC integer operands, other operators 1927 // ------------------------------------------------------------------------ 1928 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 1929 ATOMIC_CMPXCHG_CPT( fixed1, add_cpt, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_cpt 1930 ATOMIC_CMPXCHG_CPT( fixed1, andb_cpt, kmp_int8, 8, &, 0 ) // __kmpc_atomic_fixed1_andb_cpt 1931 ATOMIC_CMPXCHG_CPT( fixed1, div_cpt, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt 1932 ATOMIC_CMPXCHG_CPT( fixed1u, div_cpt, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt 1933 ATOMIC_CMPXCHG_CPT( fixed1, mul_cpt, kmp_int8, 8, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_cpt 1934 ATOMIC_CMPXCHG_CPT( fixed1, orb_cpt, kmp_int8, 8, |, 0 ) // __kmpc_atomic_fixed1_orb_cpt 1935 ATOMIC_CMPXCHG_CPT( fixed1, shl_cpt, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt 1936 ATOMIC_CMPXCHG_CPT( fixed1, shr_cpt, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt 1937 ATOMIC_CMPXCHG_CPT( fixed1u, shr_cpt, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt 1938 ATOMIC_CMPXCHG_CPT( fixed1, sub_cpt, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt 1939 ATOMIC_CMPXCHG_CPT( fixed1, xor_cpt, kmp_int8, 8, ^, 0 ) // __kmpc_atomic_fixed1_xor_cpt 1940 ATOMIC_CMPXCHG_CPT( fixed2, add_cpt, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_cpt 1941 ATOMIC_CMPXCHG_CPT( fixed2, andb_cpt, kmp_int16, 16, &, 0 ) // __kmpc_atomic_fixed2_andb_cpt 1942 ATOMIC_CMPXCHG_CPT( fixed2, div_cpt, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt 1943 ATOMIC_CMPXCHG_CPT( fixed2u, div_cpt, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt 1944 ATOMIC_CMPXCHG_CPT( fixed2, mul_cpt, kmp_int16, 16, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_cpt 1945 ATOMIC_CMPXCHG_CPT( fixed2, orb_cpt, kmp_int16, 16, |, 0 ) // __kmpc_atomic_fixed2_orb_cpt 1946 ATOMIC_CMPXCHG_CPT( fixed2, shl_cpt, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt 1947 ATOMIC_CMPXCHG_CPT( fixed2, shr_cpt, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt 1948 ATOMIC_CMPXCHG_CPT( fixed2u, shr_cpt, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt 1949 ATOMIC_CMPXCHG_CPT( fixed2, sub_cpt, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt 1950 ATOMIC_CMPXCHG_CPT( fixed2, xor_cpt, kmp_int16, 16, ^, 0 ) // __kmpc_atomic_fixed2_xor_cpt 1951 ATOMIC_CMPXCHG_CPT( fixed4, andb_cpt, kmp_int32, 32, &, 0 ) // __kmpc_atomic_fixed4_andb_cpt 1952 ATOMIC_CMPXCHG_CPT( fixed4, div_cpt, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt 1953 ATOMIC_CMPXCHG_CPT( fixed4u, div_cpt, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt 1954 ATOMIC_CMPXCHG_CPT( fixed4, mul_cpt, kmp_int32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul_cpt 1955 ATOMIC_CMPXCHG_CPT( fixed4, orb_cpt, kmp_int32, 32, |, 0 ) // __kmpc_atomic_fixed4_orb_cpt 1956 ATOMIC_CMPXCHG_CPT( fixed4, shl_cpt, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt 1957 ATOMIC_CMPXCHG_CPT( fixed4, shr_cpt, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt 1958 ATOMIC_CMPXCHG_CPT( fixed4u, shr_cpt, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt 1959 ATOMIC_CMPXCHG_CPT( fixed4, xor_cpt, kmp_int32, 32, ^, 0 ) // __kmpc_atomic_fixed4_xor_cpt 1960 ATOMIC_CMPXCHG_CPT( fixed8, andb_cpt, kmp_int64, 64, &, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb_cpt 1961 ATOMIC_CMPXCHG_CPT( fixed8, div_cpt, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt 1962 ATOMIC_CMPXCHG_CPT( fixed8u, div_cpt, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt 1963 ATOMIC_CMPXCHG_CPT( fixed8, mul_cpt, kmp_int64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_cpt 1964 ATOMIC_CMPXCHG_CPT( fixed8, orb_cpt, kmp_int64, 64, |, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb_cpt 1965 ATOMIC_CMPXCHG_CPT( fixed8, shl_cpt, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt 1966 ATOMIC_CMPXCHG_CPT( fixed8, shr_cpt, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt 1967 ATOMIC_CMPXCHG_CPT( fixed8u, shr_cpt, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt 1968 ATOMIC_CMPXCHG_CPT( fixed8, xor_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor_cpt 1969 ATOMIC_CMPXCHG_CPT( float4, div_cpt, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt 1970 ATOMIC_CMPXCHG_CPT( float4, mul_cpt, kmp_real32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_cpt 1971 ATOMIC_CMPXCHG_CPT( float8, div_cpt, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt 1972 ATOMIC_CMPXCHG_CPT( float8, mul_cpt, kmp_real64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_cpt 1973 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 1974 1975 ////////////////////////////////// 1976 1977 // CAPTURE routines for mixed types RHS=float16 1978 #if KMP_HAVE_QUAD 1979 1980 // Beginning of a definition (provides name, parameters, gebug trace) 1981 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) 1982 // OP_ID - operation identifier (add, sub, mul, ...) 1983 // TYPE - operands' type 1984 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID,OP_ID,TYPE,RTYPE_ID,RTYPE) \ 1985 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( ident_t *id_ref, int gtid, TYPE * lhs, RTYPE rhs, int flag ) \ 1986 { \ 1987 KMP_DEBUG_ASSERT( __kmp_init_serial ); \ 1988 KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", gtid )); 1989 1990 // ------------------------------------------------------------------------- 1991 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ 1992 ATOMIC_BEGIN_CPT_MIX(TYPE_ID,OP_ID,TYPE,RTYPE_ID,RTYPE) \ 1993 TYPE new_value; \ 1994 OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \ 1995 OP_CMPXCHG_CPT(TYPE,BITS,OP) \ 1996 } 1997 1998 // ------------------------------------------------------------------------- 1999 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \ 2000 ATOMIC_BEGIN_CPT_MIX(TYPE_ID,OP_ID,TYPE,RTYPE_ID,RTYPE) \ 2001 TYPE new_value; \ 2002 OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) /* send assignment */ \ 2003 OP_CRITICAL_CPT(OP##=,LCK_ID) /* send assignment */ \ 2004 } 2005 2006 ATOMIC_CMPXCHG_CPT_MIX( fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_cpt_fp 2007 ATOMIC_CMPXCHG_CPT_MIX( fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_add_cpt_fp 2008 ATOMIC_CMPXCHG_CPT_MIX( fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt_fp 2009 ATOMIC_CMPXCHG_CPT_MIX( fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_sub_cpt_fp 2010 ATOMIC_CMPXCHG_CPT_MIX( fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_cpt_fp 2011 ATOMIC_CMPXCHG_CPT_MIX( fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_mul_cpt_fp 2012 ATOMIC_CMPXCHG_CPT_MIX( fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt_fp 2013 ATOMIC_CMPXCHG_CPT_MIX( fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt_fp 2014 2015 ATOMIC_CMPXCHG_CPT_MIX( fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_cpt_fp 2016 ATOMIC_CMPXCHG_CPT_MIX( fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_add_cpt_fp 2017 ATOMIC_CMPXCHG_CPT_MIX( fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt_fp 2018 ATOMIC_CMPXCHG_CPT_MIX( fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_sub_cpt_fp 2019 ATOMIC_CMPXCHG_CPT_MIX( fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_cpt_fp 2020 ATOMIC_CMPXCHG_CPT_MIX( fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_mul_cpt_fp 2021 ATOMIC_CMPXCHG_CPT_MIX( fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt_fp 2022 ATOMIC_CMPXCHG_CPT_MIX( fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt_fp 2023 2024 ATOMIC_CMPXCHG_CPT_MIX( fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add_cpt_fp 2025 ATOMIC_CMPXCHG_CPT_MIX( fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_add_cpt_fp 2026 ATOMIC_CMPXCHG_CPT_MIX( fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_cpt_fp 2027 ATOMIC_CMPXCHG_CPT_MIX( fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_sub_cpt_fp 2028 ATOMIC_CMPXCHG_CPT_MIX( fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_cpt_fp 2029 ATOMIC_CMPXCHG_CPT_MIX( fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_mul_cpt_fp 2030 ATOMIC_CMPXCHG_CPT_MIX( fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_cpt_fp 2031 ATOMIC_CMPXCHG_CPT_MIX( fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_cpt_fp 2032 2033 ATOMIC_CMPXCHG_CPT_MIX( fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_cpt_fp 2034 ATOMIC_CMPXCHG_CPT_MIX( fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_add_cpt_fp 2035 ATOMIC_CMPXCHG_CPT_MIX( fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt_fp 2036 ATOMIC_CMPXCHG_CPT_MIX( fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_sub_cpt_fp 2037 ATOMIC_CMPXCHG_CPT_MIX( fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_cpt_fp 2038 ATOMIC_CMPXCHG_CPT_MIX( fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_mul_cpt_fp 2039 ATOMIC_CMPXCHG_CPT_MIX( fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt_fp 2040 ATOMIC_CMPXCHG_CPT_MIX( fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt_fp 2041 2042 ATOMIC_CMPXCHG_CPT_MIX( float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_cpt_fp 2043 ATOMIC_CMPXCHG_CPT_MIX( float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt_fp 2044 ATOMIC_CMPXCHG_CPT_MIX( float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_cpt_fp 2045 ATOMIC_CMPXCHG_CPT_MIX( float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt_fp 2046 2047 ATOMIC_CMPXCHG_CPT_MIX( float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_cpt_fp 2048 ATOMIC_CMPXCHG_CPT_MIX( float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt_fp 2049 ATOMIC_CMPXCHG_CPT_MIX( float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_cpt_fp 2050 ATOMIC_CMPXCHG_CPT_MIX( float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt_fp 2051 2052 ATOMIC_CRITICAL_CPT_MIX( float10, long double, add_cpt, +, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_add_cpt_fp 2053 ATOMIC_CRITICAL_CPT_MIX( float10, long double, sub_cpt, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt_fp 2054 ATOMIC_CRITICAL_CPT_MIX( float10, long double, mul_cpt, *, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_mul_cpt_fp 2055 ATOMIC_CRITICAL_CPT_MIX( float10, long double, div_cpt, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_cpt_fp 2056 2057 #endif //KMP_HAVE_QUAD 2058 2059 /////////////////////////////////// 2060 2061 // ------------------------------------------------------------------------ 2062 // Routines for C/C++ Reduction operators && and || 2063 // ------------------------------------------------------------------------ 2064 2065 // ------------------------------------------------------------------------- 2066 // Operation on *lhs, rhs bound by critical section 2067 // OP - operator (it's supposed to contain an assignment) 2068 // LCK_ID - lock identifier 2069 // Note: don't check gtid as it should always be valid 2070 // 1, 2-byte - expect valid parameter, other - check before this macro 2071 #define OP_CRITICAL_L_CPT(OP,LCK_ID) \ 2072 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ 2073 \ 2074 if( flag ) { \ 2075 new_value OP rhs; \ 2076 } else \ 2077 new_value = (*lhs); \ 2078 \ 2079 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); 2080 2081 // ------------------------------------------------------------------------ 2082 #ifdef KMP_GOMP_COMPAT 2083 #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG) \ 2084 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ 2085 KMP_CHECK_GTID; \ 2086 OP_CRITICAL_L_CPT( OP, 0 ); \ 2087 return new_value; \ 2088 } 2089 #else 2090 #define OP_GOMP_CRITICAL_L_CPT(OP,FLAG) 2091 #endif /* KMP_GOMP_COMPAT */ 2092 2093 // ------------------------------------------------------------------------ 2094 // Need separate macros for &&, || because there is no combined assignment 2095 #define ATOMIC_CMPX_L_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ 2096 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ 2097 TYPE new_value; \ 2098 OP_GOMP_CRITICAL_L_CPT( = *lhs OP, GOMP_FLAG ) \ 2099 OP_CMPXCHG_CPT(TYPE,BITS,OP) \ 2100 } 2101 2102 ATOMIC_CMPX_L_CPT( fixed1, andl_cpt, char, 8, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl_cpt 2103 ATOMIC_CMPX_L_CPT( fixed1, orl_cpt, char, 8, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl_cpt 2104 ATOMIC_CMPX_L_CPT( fixed2, andl_cpt, short, 16, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl_cpt 2105 ATOMIC_CMPX_L_CPT( fixed2, orl_cpt, short, 16, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl_cpt 2106 ATOMIC_CMPX_L_CPT( fixed4, andl_cpt, kmp_int32, 32, &&, 0 ) // __kmpc_atomic_fixed4_andl_cpt 2107 ATOMIC_CMPX_L_CPT( fixed4, orl_cpt, kmp_int32, 32, ||, 0 ) // __kmpc_atomic_fixed4_orl_cpt 2108 ATOMIC_CMPX_L_CPT( fixed8, andl_cpt, kmp_int64, 64, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl_cpt 2109 ATOMIC_CMPX_L_CPT( fixed8, orl_cpt, kmp_int64, 64, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl_cpt 2110 2111 2112 // ------------------------------------------------------------------------- 2113 // Routines for Fortran operators that matched no one in C: 2114 // MAX, MIN, .EQV., .NEQV. 2115 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt 2116 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt 2117 // ------------------------------------------------------------------------- 2118 2119 // ------------------------------------------------------------------------- 2120 // MIN and MAX need separate macros 2121 // OP - operator to check if we need any actions? 2122 #define MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \ 2123 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ 2124 \ 2125 if ( *lhs OP rhs ) { /* still need actions? */ \ 2126 old_value = *lhs; \ 2127 *lhs = rhs; \ 2128 if ( flag ) \ 2129 new_value = rhs; \ 2130 else \ 2131 new_value = old_value; \ 2132 } \ 2133 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ 2134 return new_value; \ 2135 2136 // ------------------------------------------------------------------------- 2137 #ifdef KMP_GOMP_COMPAT 2138 #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG) \ 2139 if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \ 2140 KMP_CHECK_GTID; \ 2141 MIN_MAX_CRITSECT_CPT( OP, 0 ); \ 2142 } 2143 #else 2144 #define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG) 2145 #endif /* KMP_GOMP_COMPAT */ 2146 2147 // ------------------------------------------------------------------------- 2148 #define MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \ 2149 { \ 2150 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2151 /*TYPE old_value; */ \ 2152 temp_val = *lhs; \ 2153 old_value = temp_val; \ 2154 while ( old_value OP rhs && /* still need actions? */ \ 2155 ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ 2156 *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ 2157 *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \ 2158 { \ 2159 KMP_CPU_PAUSE(); \ 2160 temp_val = *lhs; \ 2161 old_value = temp_val; \ 2162 } \ 2163 if( flag ) \ 2164 return rhs; \ 2165 else \ 2166 return old_value; \ 2167 } 2168 2169 // ------------------------------------------------------------------------- 2170 // 1-byte, 2-byte operands - use critical section 2171 #define MIN_MAX_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ 2172 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ 2173 TYPE new_value, old_value; \ 2174 if ( *lhs OP rhs ) { /* need actions? */ \ 2175 GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \ 2176 MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \ 2177 } \ 2178 return *lhs; \ 2179 } 2180 2181 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ 2182 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ 2183 TYPE new_value, old_value; \ 2184 if ( *lhs OP rhs ) { \ 2185 GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \ 2186 MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \ 2187 } \ 2188 return *lhs; \ 2189 } 2190 2191 2192 MIN_MAX_COMPXCHG_CPT( fixed1, max_cpt, char, 8, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max_cpt 2193 MIN_MAX_COMPXCHG_CPT( fixed1, min_cpt, char, 8, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min_cpt 2194 MIN_MAX_COMPXCHG_CPT( fixed2, max_cpt, short, 16, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max_cpt 2195 MIN_MAX_COMPXCHG_CPT( fixed2, min_cpt, short, 16, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min_cpt 2196 MIN_MAX_COMPXCHG_CPT( fixed4, max_cpt, kmp_int32, 32, <, 0 ) // __kmpc_atomic_fixed4_max_cpt 2197 MIN_MAX_COMPXCHG_CPT( fixed4, min_cpt, kmp_int32, 32, >, 0 ) // __kmpc_atomic_fixed4_min_cpt 2198 MIN_MAX_COMPXCHG_CPT( fixed8, max_cpt, kmp_int64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max_cpt 2199 MIN_MAX_COMPXCHG_CPT( fixed8, min_cpt, kmp_int64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min_cpt 2200 MIN_MAX_COMPXCHG_CPT( float4, max_cpt, kmp_real32, 32, <, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max_cpt 2201 MIN_MAX_COMPXCHG_CPT( float4, min_cpt, kmp_real32, 32, >, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min_cpt 2202 MIN_MAX_COMPXCHG_CPT( float8, max_cpt, kmp_real64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max_cpt 2203 MIN_MAX_COMPXCHG_CPT( float8, min_cpt, kmp_real64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min_cpt 2204 #if KMP_HAVE_QUAD 2205 MIN_MAX_CRITICAL_CPT( float16, max_cpt, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max_cpt 2206 MIN_MAX_CRITICAL_CPT( float16, min_cpt, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min_cpt 2207 #if ( KMP_ARCH_X86 ) 2208 MIN_MAX_CRITICAL_CPT( float16, max_a16_cpt, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16_cpt 2209 MIN_MAX_CRITICAL_CPT( float16, min_a16_cpt, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_mix_a16_cpt 2210 #endif 2211 #endif 2212 2213 // ------------------------------------------------------------------------ 2214 #ifdef KMP_GOMP_COMPAT 2215 #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG) \ 2216 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ 2217 KMP_CHECK_GTID; \ 2218 OP_CRITICAL_CPT( OP, 0 ); \ 2219 } 2220 #else 2221 #define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG) 2222 #endif /* KMP_GOMP_COMPAT */ 2223 // ------------------------------------------------------------------------ 2224 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ 2225 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ 2226 TYPE new_value; \ 2227 OP_GOMP_CRITICAL_EQV_CPT(^=~,GOMP_FLAG) /* send assignment */ \ 2228 OP_CMPXCHG_CPT(TYPE,BITS,OP) \ 2229 } 2230 2231 // ------------------------------------------------------------------------ 2232 2233 ATOMIC_CMPXCHG_CPT( fixed1, neqv_cpt, kmp_int8, 8, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv_cpt 2234 ATOMIC_CMPXCHG_CPT( fixed2, neqv_cpt, kmp_int16, 16, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv_cpt 2235 ATOMIC_CMPXCHG_CPT( fixed4, neqv_cpt, kmp_int32, 32, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv_cpt 2236 ATOMIC_CMPXCHG_CPT( fixed8, neqv_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv_cpt 2237 ATOMIC_CMPX_EQV_CPT( fixed1, eqv_cpt, kmp_int8, 8, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv_cpt 2238 ATOMIC_CMPX_EQV_CPT( fixed2, eqv_cpt, kmp_int16, 16, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv_cpt 2239 ATOMIC_CMPX_EQV_CPT( fixed4, eqv_cpt, kmp_int32, 32, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv_cpt 2240 ATOMIC_CMPX_EQV_CPT( fixed8, eqv_cpt, kmp_int64, 64, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv_cpt 2241 2242 // ------------------------------------------------------------------------ 2243 // Routines for Extended types: long double, _Quad, complex flavours (use critical section) 2244 // TYPE_ID, OP_ID, TYPE - detailed above 2245 // OP - operator 2246 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2247 #define ATOMIC_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ 2248 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ 2249 TYPE new_value; \ 2250 OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) /* send assignment */ \ 2251 OP_CRITICAL_CPT(OP##=,LCK_ID) /* send assignment */ \ 2252 } 2253 2254 // ------------------------------------------------------------------------ 2255 2256 // Workaround for cmplx4. Regular routines with return value don't work 2257 // on Win_32e. Let's return captured values through the additional parameter. 2258 #define OP_CRITICAL_CPT_WRK(OP,LCK_ID) \ 2259 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ 2260 \ 2261 if( flag ) { \ 2262 (*lhs) OP rhs; \ 2263 (*out) = (*lhs); \ 2264 } else { \ 2265 (*out) = (*lhs); \ 2266 (*lhs) OP rhs; \ 2267 } \ 2268 \ 2269 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ 2270 return; 2271 // ------------------------------------------------------------------------ 2272 2273 #ifdef KMP_GOMP_COMPAT 2274 #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG) \ 2275 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ 2276 KMP_CHECK_GTID; \ 2277 OP_CRITICAL_CPT_WRK( OP##=, 0 ); \ 2278 } 2279 #else 2280 #define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG) 2281 #endif /* KMP_GOMP_COMPAT */ 2282 // ------------------------------------------------------------------------ 2283 2284 #define ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \ 2285 void __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out, int flag ) \ 2286 { \ 2287 KMP_DEBUG_ASSERT( __kmp_init_serial ); \ 2288 KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); 2289 // ------------------------------------------------------------------------ 2290 2291 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ 2292 ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \ 2293 OP_GOMP_CRITICAL_CPT_WRK(OP,GOMP_FLAG) \ 2294 OP_CRITICAL_CPT_WRK(OP##=,LCK_ID) \ 2295 } 2296 // The end of workaround for cmplx4 2297 2298 /* ------------------------------------------------------------------------- */ 2299 // routines for long double type 2300 ATOMIC_CRITICAL_CPT( float10, add_cpt, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add_cpt 2301 ATOMIC_CRITICAL_CPT( float10, sub_cpt, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt 2302 ATOMIC_CRITICAL_CPT( float10, mul_cpt, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul_cpt 2303 ATOMIC_CRITICAL_CPT( float10, div_cpt, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt 2304 #if KMP_HAVE_QUAD 2305 // routines for _Quad type 2306 ATOMIC_CRITICAL_CPT( float16, add_cpt, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add_cpt 2307 ATOMIC_CRITICAL_CPT( float16, sub_cpt, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt 2308 ATOMIC_CRITICAL_CPT( float16, mul_cpt, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul_cpt 2309 ATOMIC_CRITICAL_CPT( float16, div_cpt, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt 2310 #if ( KMP_ARCH_X86 ) 2311 ATOMIC_CRITICAL_CPT( float16, add_a16_cpt, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16_cpt 2312 ATOMIC_CRITICAL_CPT( float16, sub_a16_cpt, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt 2313 ATOMIC_CRITICAL_CPT( float16, mul_a16_cpt, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16_cpt 2314 ATOMIC_CRITICAL_CPT( float16, div_a16_cpt, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt 2315 #endif 2316 #endif 2317 2318 // routines for complex types 2319 2320 // cmplx4 routines to return void 2321 ATOMIC_CRITICAL_CPT_WRK( cmplx4, add_cpt, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add_cpt 2322 ATOMIC_CRITICAL_CPT_WRK( cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt 2323 ATOMIC_CRITICAL_CPT_WRK( cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul_cpt 2324 ATOMIC_CRITICAL_CPT_WRK( cmplx4, div_cpt, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt 2325 2326 ATOMIC_CRITICAL_CPT( cmplx8, add_cpt, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add_cpt 2327 ATOMIC_CRITICAL_CPT( cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt 2328 ATOMIC_CRITICAL_CPT( cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul_cpt 2329 ATOMIC_CRITICAL_CPT( cmplx8, div_cpt, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt 2330 ATOMIC_CRITICAL_CPT( cmplx10, add_cpt, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add_cpt 2331 ATOMIC_CRITICAL_CPT( cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt 2332 ATOMIC_CRITICAL_CPT( cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul_cpt 2333 ATOMIC_CRITICAL_CPT( cmplx10, div_cpt, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt 2334 #if KMP_HAVE_QUAD 2335 ATOMIC_CRITICAL_CPT( cmplx16, add_cpt, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_cpt 2336 ATOMIC_CRITICAL_CPT( cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt 2337 ATOMIC_CRITICAL_CPT( cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_cpt 2338 ATOMIC_CRITICAL_CPT( cmplx16, div_cpt, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt 2339 #if ( KMP_ARCH_X86 ) 2340 ATOMIC_CRITICAL_CPT( cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16_cpt 2341 ATOMIC_CRITICAL_CPT( cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt 2342 ATOMIC_CRITICAL_CPT( cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16_cpt 2343 ATOMIC_CRITICAL_CPT( cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt 2344 #endif 2345 #endif 2346 2347 #if OMP_40_ENABLED 2348 2349 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; } for non-commutative operations. 2350 // Supported only on IA-32 architecture and Intel(R) 64 2351 2352 // ------------------------------------------------------------------------- 2353 // Operation on *lhs, rhs bound by critical section 2354 // OP - operator (it's supposed to contain an assignment) 2355 // LCK_ID - lock identifier 2356 // Note: don't check gtid as it should always be valid 2357 // 1, 2-byte - expect valid parameter, other - check before this macro 2358 #define OP_CRITICAL_CPT_REV(OP,LCK_ID) \ 2359 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ 2360 \ 2361 if( flag ) { \ 2362 /*temp_val = (*lhs);*/\ 2363 (*lhs) = (rhs) OP (*lhs); \ 2364 new_value = (*lhs); \ 2365 } else { \ 2366 new_value = (*lhs);\ 2367 (*lhs) = (rhs) OP (*lhs); \ 2368 } \ 2369 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ 2370 return new_value; 2371 2372 // ------------------------------------------------------------------------ 2373 #ifdef KMP_GOMP_COMPAT 2374 #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG) \ 2375 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ 2376 KMP_CHECK_GTID; \ 2377 OP_CRITICAL_CPT_REV( OP, 0 ); \ 2378 } 2379 #else 2380 #define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG) 2381 #endif /* KMP_GOMP_COMPAT */ 2382 2383 // ------------------------------------------------------------------------ 2384 // Operation on *lhs, rhs using "compare_and_store" routine 2385 // TYPE - operands' type 2386 // BITS - size in bits, used to distinguish low level calls 2387 // OP - operator 2388 // Note: temp_val introduced in order to force the compiler to read 2389 // *lhs only once (w/o it the compiler reads *lhs twice) 2390 #define OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \ 2391 { \ 2392 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2393 TYPE old_value, new_value; \ 2394 temp_val = *lhs; \ 2395 old_value = temp_val; \ 2396 new_value = rhs OP old_value; \ 2397 while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ 2398 *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ 2399 *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ 2400 { \ 2401 KMP_CPU_PAUSE(); \ 2402 \ 2403 temp_val = *lhs; \ 2404 old_value = temp_val; \ 2405 new_value = rhs OP old_value; \ 2406 } \ 2407 if( flag ) { \ 2408 return new_value; \ 2409 } else \ 2410 return old_value; \ 2411 } 2412 2413 // ------------------------------------------------------------------------- 2414 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ 2415 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ 2416 TYPE new_value; \ 2417 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2418 OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \ 2419 OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \ 2420 } 2421 2422 2423 ATOMIC_CMPXCHG_CPT_REV( fixed1, div_cpt_rev, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt_rev 2424 ATOMIC_CMPXCHG_CPT_REV( fixed1u, div_cpt_rev, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt_rev 2425 ATOMIC_CMPXCHG_CPT_REV( fixed1, shl_cpt_rev, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt_rev 2426 ATOMIC_CMPXCHG_CPT_REV( fixed1, shr_cpt_rev, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt_rev 2427 ATOMIC_CMPXCHG_CPT_REV( fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt_rev 2428 ATOMIC_CMPXCHG_CPT_REV( fixed1, sub_cpt_rev, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt_rev 2429 ATOMIC_CMPXCHG_CPT_REV( fixed2, div_cpt_rev, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt_rev 2430 ATOMIC_CMPXCHG_CPT_REV( fixed2u, div_cpt_rev, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt_rev 2431 ATOMIC_CMPXCHG_CPT_REV( fixed2, shl_cpt_rev, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt_rev 2432 ATOMIC_CMPXCHG_CPT_REV( fixed2, shr_cpt_rev, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt_rev 2433 ATOMIC_CMPXCHG_CPT_REV( fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt_rev 2434 ATOMIC_CMPXCHG_CPT_REV( fixed2, sub_cpt_rev, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt_rev 2435 ATOMIC_CMPXCHG_CPT_REV( fixed4, div_cpt_rev, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt_rev 2436 ATOMIC_CMPXCHG_CPT_REV( fixed4u, div_cpt_rev, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt_rev 2437 ATOMIC_CMPXCHG_CPT_REV( fixed4, shl_cpt_rev, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt_rev 2438 ATOMIC_CMPXCHG_CPT_REV( fixed4, shr_cpt_rev, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt_rev 2439 ATOMIC_CMPXCHG_CPT_REV( fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt_rev 2440 ATOMIC_CMPXCHG_CPT_REV( fixed4, sub_cpt_rev, kmp_int32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_cpt_rev 2441 ATOMIC_CMPXCHG_CPT_REV( fixed8, div_cpt_rev, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt_rev 2442 ATOMIC_CMPXCHG_CPT_REV( fixed8u, div_cpt_rev, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt_rev 2443 ATOMIC_CMPXCHG_CPT_REV( fixed8, shl_cpt_rev, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt_rev 2444 ATOMIC_CMPXCHG_CPT_REV( fixed8, shr_cpt_rev, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt_rev 2445 ATOMIC_CMPXCHG_CPT_REV( fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt_rev 2446 ATOMIC_CMPXCHG_CPT_REV( fixed8, sub_cpt_rev, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt_rev 2447 ATOMIC_CMPXCHG_CPT_REV( float4, div_cpt_rev, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt_rev 2448 ATOMIC_CMPXCHG_CPT_REV( float4, sub_cpt_rev, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt_rev 2449 ATOMIC_CMPXCHG_CPT_REV( float8, div_cpt_rev, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt_rev 2450 ATOMIC_CMPXCHG_CPT_REV( float8, sub_cpt_rev, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt_rev 2451 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG 2452 2453 2454 // ------------------------------------------------------------------------ 2455 // Routines for Extended types: long double, _Quad, complex flavours (use critical section) 2456 // TYPE_ID, OP_ID, TYPE - detailed above 2457 // OP - operator 2458 // LCK_ID - lock identifier, used to possibly distinguish lock variable 2459 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ 2460 ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ 2461 TYPE new_value; \ 2462 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2463 /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/\ 2464 OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \ 2465 OP_CRITICAL_CPT_REV(OP,LCK_ID) \ 2466 } 2467 2468 2469 /* ------------------------------------------------------------------------- */ 2470 // routines for long double type 2471 ATOMIC_CRITICAL_CPT_REV( float10, sub_cpt_rev, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt_rev 2472 ATOMIC_CRITICAL_CPT_REV( float10, div_cpt_rev, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt_rev 2473 #if KMP_HAVE_QUAD 2474 // routines for _Quad type 2475 ATOMIC_CRITICAL_CPT_REV( float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt_rev 2476 ATOMIC_CRITICAL_CPT_REV( float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt_rev 2477 #if ( KMP_ARCH_X86 ) 2478 ATOMIC_CRITICAL_CPT_REV( float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt_rev 2479 ATOMIC_CRITICAL_CPT_REV( float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt_rev 2480 #endif 2481 #endif 2482 2483 // routines for complex types 2484 2485 // ------------------------------------------------------------------------ 2486 2487 // Workaround for cmplx4. Regular routines with return value don't work 2488 // on Win_32e. Let's return captured values through the additional parameter. 2489 #define OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \ 2490 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ 2491 \ 2492 if( flag ) { \ 2493 (*lhs) = (rhs) OP (*lhs); \ 2494 (*out) = (*lhs); \ 2495 } else { \ 2496 (*out) = (*lhs); \ 2497 (*lhs) = (rhs) OP (*lhs); \ 2498 } \ 2499 \ 2500 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ 2501 return; 2502 // ------------------------------------------------------------------------ 2503 2504 #ifdef KMP_GOMP_COMPAT 2505 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG) \ 2506 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ 2507 KMP_CHECK_GTID; \ 2508 OP_CRITICAL_CPT_REV_WRK( OP, 0 ); \ 2509 } 2510 #else 2511 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG) 2512 #endif /* KMP_GOMP_COMPAT */ 2513 // ------------------------------------------------------------------------ 2514 2515 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ 2516 ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \ 2517 OP_GOMP_CRITICAL_CPT_REV_WRK(OP,GOMP_FLAG) \ 2518 OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \ 2519 } 2520 // The end of workaround for cmplx4 2521 2522 2523 // !!! TODO: check if we need to return void for cmplx4 routines 2524 // cmplx4 routines to return void 2525 ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt_rev 2526 ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt_rev 2527 2528 ATOMIC_CRITICAL_CPT_REV( cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt_rev 2529 ATOMIC_CRITICAL_CPT_REV( cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt_rev 2530 ATOMIC_CRITICAL_CPT_REV( cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt_rev 2531 ATOMIC_CRITICAL_CPT_REV( cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt_rev 2532 #if KMP_HAVE_QUAD 2533 ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt_rev 2534 ATOMIC_CRITICAL_CPT_REV( cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt_rev 2535 #if ( KMP_ARCH_X86 ) 2536 ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev 2537 ATOMIC_CRITICAL_CPT_REV( cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt_rev 2538 #endif 2539 #endif 2540 2541 // Capture reverse for mixed type: RHS=float16 2542 #if KMP_HAVE_QUAD 2543 2544 // Beginning of a definition (provides name, parameters, gebug trace) 2545 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) 2546 // OP_ID - operation identifier (add, sub, mul, ...) 2547 // TYPE - operands' type 2548 // ------------------------------------------------------------------------- 2549 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ 2550 ATOMIC_BEGIN_CPT_MIX(TYPE_ID,OP_ID,TYPE,RTYPE_ID,RTYPE) \ 2551 TYPE new_value; \ 2552 OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \ 2553 OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \ 2554 } 2555 2556 // ------------------------------------------------------------------------- 2557 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \ 2558 ATOMIC_BEGIN_CPT_MIX(TYPE_ID,OP_ID,TYPE,RTYPE_ID,RTYPE) \ 2559 TYPE new_value; \ 2560 OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) /* send assignment */ \ 2561 OP_CRITICAL_CPT_REV(OP,LCK_ID) /* send assignment */ \ 2562 } 2563 2564 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt_rev_fp 2565 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp 2566 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt_rev_fp 2567 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt_rev_fp 2568 2569 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt_rev_fp 2570 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp 2571 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt_rev_fp 2572 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt_rev_fp 2573 2574 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_cpt_rev_fp 2575 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp 2576 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_cpt_rev_fp 2577 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_cpt_rev_fp 2578 2579 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt_rev_fp 2580 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp 2581 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt_rev_fp 2582 ATOMIC_CMPXCHG_CPT_REV_MIX( fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt_rev_fp 2583 2584 ATOMIC_CMPXCHG_CPT_REV_MIX( float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt_rev_fp 2585 ATOMIC_CMPXCHG_CPT_REV_MIX( float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt_rev_fp 2586 2587 ATOMIC_CMPXCHG_CPT_REV_MIX( float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt_rev_fp 2588 ATOMIC_CMPXCHG_CPT_REV_MIX( float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt_rev_fp 2589 2590 ATOMIC_CRITICAL_CPT_REV_MIX( float10, long double, sub_cpt_rev, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt_rev_fp 2591 ATOMIC_CRITICAL_CPT_REV_MIX( float10, long double, div_cpt_rev, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_cpt_rev_fp 2592 2593 #endif //KMP_HAVE_QUAD 2594 2595 2596 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} 2597 2598 #define ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ 2599 TYPE __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \ 2600 { \ 2601 KMP_DEBUG_ASSERT( __kmp_init_serial ); \ 2602 KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid )); 2603 2604 #define CRITICAL_SWP(LCK_ID) \ 2605 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ 2606 \ 2607 old_value = (*lhs); \ 2608 (*lhs) = rhs; \ 2609 \ 2610 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ 2611 return old_value; 2612 2613 // ------------------------------------------------------------------------ 2614 #ifdef KMP_GOMP_COMPAT 2615 #define GOMP_CRITICAL_SWP(FLAG) \ 2616 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ 2617 KMP_CHECK_GTID; \ 2618 CRITICAL_SWP( 0 ); \ 2619 } 2620 #else 2621 #define GOMP_CRITICAL_SWP(FLAG) 2622 #endif /* KMP_GOMP_COMPAT */ 2623 2624 2625 #define ATOMIC_XCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \ 2626 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ 2627 TYPE old_value; \ 2628 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 2629 old_value = KMP_XCHG_FIXED##BITS( lhs, rhs ); \ 2630 return old_value; \ 2631 } 2632 // ------------------------------------------------------------------------ 2633 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \ 2634 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ 2635 TYPE old_value; \ 2636 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 2637 old_value = KMP_XCHG_REAL##BITS( lhs, rhs ); \ 2638 return old_value; \ 2639 } 2640 2641 // ------------------------------------------------------------------------ 2642 #define CMPXCHG_SWP(TYPE,BITS) \ 2643 { \ 2644 TYPE KMP_ATOMIC_VOLATILE temp_val; \ 2645 TYPE old_value, new_value; \ 2646 temp_val = *lhs; \ 2647 old_value = temp_val; \ 2648 new_value = rhs; \ 2649 while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ 2650 *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ 2651 *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ 2652 { \ 2653 KMP_CPU_PAUSE(); \ 2654 \ 2655 temp_val = *lhs; \ 2656 old_value = temp_val; \ 2657 new_value = rhs; \ 2658 } \ 2659 return old_value; \ 2660 } 2661 2662 // ------------------------------------------------------------------------- 2663 #define ATOMIC_CMPXCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \ 2664 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ 2665 TYPE old_value; \ 2666 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 2667 CMPXCHG_SWP(TYPE,BITS) \ 2668 } 2669 2670 ATOMIC_XCHG_SWP( fixed1, kmp_int8, 8, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_swp 2671 ATOMIC_XCHG_SWP( fixed2, kmp_int16, 16, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_swp 2672 ATOMIC_XCHG_SWP( fixed4, kmp_int32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_swp 2673 2674 ATOMIC_XCHG_FLOAT_SWP( float4, kmp_real32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_float4_swp 2675 2676 #if ( KMP_ARCH_X86 ) 2677 ATOMIC_CMPXCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp 2678 ATOMIC_CMPXCHG_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp 2679 #else 2680 ATOMIC_XCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp 2681 ATOMIC_XCHG_FLOAT_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp 2682 #endif 2683 2684 // ------------------------------------------------------------------------ 2685 // Routines for Extended types: long double, _Quad, complex flavours (use critical section) 2686 #define ATOMIC_CRITICAL_SWP(TYPE_ID,TYPE,LCK_ID,GOMP_FLAG) \ 2687 ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ 2688 TYPE old_value; \ 2689 GOMP_CRITICAL_SWP(GOMP_FLAG) \ 2690 CRITICAL_SWP(LCK_ID) \ 2691 } 2692 2693 // ------------------------------------------------------------------------ 2694 2695 // !!! TODO: check if we need to return void for cmplx4 routines 2696 // Workaround for cmplx4. Regular routines with return value don't work 2697 // on Win_32e. Let's return captured values through the additional parameter. 2698 2699 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \ 2700 void __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out ) \ 2701 { \ 2702 KMP_DEBUG_ASSERT( __kmp_init_serial ); \ 2703 KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid )); 2704 2705 2706 #define CRITICAL_SWP_WRK(LCK_ID) \ 2707 __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ 2708 \ 2709 tmp = (*lhs); \ 2710 (*lhs) = (rhs); \ 2711 (*out) = tmp; \ 2712 __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ 2713 return; 2714 2715 // ------------------------------------------------------------------------ 2716 2717 #ifdef KMP_GOMP_COMPAT 2718 #define GOMP_CRITICAL_SWP_WRK(FLAG) \ 2719 if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ 2720 KMP_CHECK_GTID; \ 2721 CRITICAL_SWP_WRK( 0 ); \ 2722 } 2723 #else 2724 #define GOMP_CRITICAL_SWP_WRK(FLAG) 2725 #endif /* KMP_GOMP_COMPAT */ 2726 // ------------------------------------------------------------------------ 2727 2728 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE,LCK_ID,GOMP_FLAG) \ 2729 ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \ 2730 TYPE tmp; \ 2731 GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \ 2732 CRITICAL_SWP_WRK(LCK_ID) \ 2733 } 2734 // The end of workaround for cmplx4 2735 2736 2737 ATOMIC_CRITICAL_SWP( float10, long double, 10r, 1 ) // __kmpc_atomic_float10_swp 2738 #if KMP_HAVE_QUAD 2739 ATOMIC_CRITICAL_SWP( float16, QUAD_LEGACY, 16r, 1 ) // __kmpc_atomic_float16_swp 2740 #endif 2741 // cmplx4 routine to return void 2742 ATOMIC_CRITICAL_SWP_WRK( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp 2743 2744 //ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp 2745 2746 2747 ATOMIC_CRITICAL_SWP( cmplx8, kmp_cmplx64, 16c, 1 ) // __kmpc_atomic_cmplx8_swp 2748 ATOMIC_CRITICAL_SWP( cmplx10, kmp_cmplx80, 20c, 1 ) // __kmpc_atomic_cmplx10_swp 2749 #if KMP_HAVE_QUAD 2750 ATOMIC_CRITICAL_SWP( cmplx16, CPLX128_LEG, 32c, 1 ) // __kmpc_atomic_cmplx16_swp 2751 #if ( KMP_ARCH_X86 ) 2752 ATOMIC_CRITICAL_SWP( float16_a16, Quad_a16_t, 16r, 1 ) // __kmpc_atomic_float16_a16_swp 2753 ATOMIC_CRITICAL_SWP( cmplx16_a16, kmp_cmplx128_a16_t, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_swp 2754 #endif 2755 #endif 2756 2757 2758 // End of OpenMP 4.0 Capture 2759 2760 #endif //OMP_40_ENABLED 2761 2762 #endif //KMP_ARCH_X86 || KMP_ARCH_X86_64 2763 2764 2765 #undef OP_CRITICAL 2766 2767 /* ------------------------------------------------------------------------ */ 2768 /* Generic atomic routines */ 2769 /* ------------------------------------------------------------------------ */ 2770 2771 void 2772 __kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) 2773 { 2774 KMP_DEBUG_ASSERT( __kmp_init_serial ); 2775 2776 if ( 2777 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 2778 FALSE /* must use lock */ 2779 #else 2780 TRUE 2781 #endif 2782 ) 2783 { 2784 kmp_int8 old_value, new_value; 2785 2786 old_value = *(kmp_int8 *) lhs; 2787 (*f)( &new_value, &old_value, rhs ); 2788 2789 /* TODO: Should this be acquire or release? */ 2790 while ( ! KMP_COMPARE_AND_STORE_ACQ8 ( (kmp_int8 *) lhs, 2791 *(kmp_int8 *) &old_value, *(kmp_int8 *) &new_value ) ) 2792 { 2793 KMP_CPU_PAUSE(); 2794 2795 old_value = *(kmp_int8 *) lhs; 2796 (*f)( &new_value, &old_value, rhs ); 2797 } 2798 2799 return; 2800 } 2801 else { 2802 // 2803 // All 1-byte data is of integer data type. 2804 // 2805 2806 #ifdef KMP_GOMP_COMPAT 2807 if ( __kmp_atomic_mode == 2 ) { 2808 __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); 2809 } 2810 else 2811 #endif /* KMP_GOMP_COMPAT */ 2812 __kmp_acquire_atomic_lock( & __kmp_atomic_lock_1i, gtid ); 2813 2814 (*f)( lhs, lhs, rhs ); 2815 2816 #ifdef KMP_GOMP_COMPAT 2817 if ( __kmp_atomic_mode == 2 ) { 2818 __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); 2819 } 2820 else 2821 #endif /* KMP_GOMP_COMPAT */ 2822 __kmp_release_atomic_lock( & __kmp_atomic_lock_1i, gtid ); 2823 } 2824 } 2825 2826 void 2827 __kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) 2828 { 2829 if ( 2830 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 2831 FALSE /* must use lock */ 2832 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 2833 TRUE /* no alignment problems */ 2834 #else 2835 ! ( (kmp_uintptr_t) lhs & 0x1) /* make sure address is 2-byte aligned */ 2836 #endif 2837 ) 2838 { 2839 kmp_int16 old_value, new_value; 2840 2841 old_value = *(kmp_int16 *) lhs; 2842 (*f)( &new_value, &old_value, rhs ); 2843 2844 /* TODO: Should this be acquire or release? */ 2845 while ( ! KMP_COMPARE_AND_STORE_ACQ16 ( (kmp_int16 *) lhs, 2846 *(kmp_int16 *) &old_value, *(kmp_int16 *) &new_value ) ) 2847 { 2848 KMP_CPU_PAUSE(); 2849 2850 old_value = *(kmp_int16 *) lhs; 2851 (*f)( &new_value, &old_value, rhs ); 2852 } 2853 2854 return; 2855 } 2856 else { 2857 // 2858 // All 2-byte data is of integer data type. 2859 // 2860 2861 #ifdef KMP_GOMP_COMPAT 2862 if ( __kmp_atomic_mode == 2 ) { 2863 __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); 2864 } 2865 else 2866 #endif /* KMP_GOMP_COMPAT */ 2867 __kmp_acquire_atomic_lock( & __kmp_atomic_lock_2i, gtid ); 2868 2869 (*f)( lhs, lhs, rhs ); 2870 2871 #ifdef KMP_GOMP_COMPAT 2872 if ( __kmp_atomic_mode == 2 ) { 2873 __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); 2874 } 2875 else 2876 #endif /* KMP_GOMP_COMPAT */ 2877 __kmp_release_atomic_lock( & __kmp_atomic_lock_2i, gtid ); 2878 } 2879 } 2880 2881 void 2882 __kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) 2883 { 2884 KMP_DEBUG_ASSERT( __kmp_init_serial ); 2885 2886 if ( 2887 // 2888 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints. 2889 // Gomp compatibility is broken if this routine is called for floats. 2890 // 2891 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 2892 TRUE /* no alignment problems */ 2893 #else 2894 ! ( (kmp_uintptr_t) lhs & 0x3) /* make sure address is 4-byte aligned */ 2895 #endif 2896 ) 2897 { 2898 kmp_int32 old_value, new_value; 2899 2900 old_value = *(kmp_int32 *) lhs; 2901 (*f)( &new_value, &old_value, rhs ); 2902 2903 /* TODO: Should this be acquire or release? */ 2904 while ( ! KMP_COMPARE_AND_STORE_ACQ32 ( (kmp_int32 *) lhs, 2905 *(kmp_int32 *) &old_value, *(kmp_int32 *) &new_value ) ) 2906 { 2907 KMP_CPU_PAUSE(); 2908 2909 old_value = *(kmp_int32 *) lhs; 2910 (*f)( &new_value, &old_value, rhs ); 2911 } 2912 2913 return; 2914 } 2915 else { 2916 // 2917 // Use __kmp_atomic_lock_4i for all 4-byte data, 2918 // even if it isn't of integer data type. 2919 // 2920 2921 #ifdef KMP_GOMP_COMPAT 2922 if ( __kmp_atomic_mode == 2 ) { 2923 __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); 2924 } 2925 else 2926 #endif /* KMP_GOMP_COMPAT */ 2927 __kmp_acquire_atomic_lock( & __kmp_atomic_lock_4i, gtid ); 2928 2929 (*f)( lhs, lhs, rhs ); 2930 2931 #ifdef KMP_GOMP_COMPAT 2932 if ( __kmp_atomic_mode == 2 ) { 2933 __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); 2934 } 2935 else 2936 #endif /* KMP_GOMP_COMPAT */ 2937 __kmp_release_atomic_lock( & __kmp_atomic_lock_4i, gtid ); 2938 } 2939 } 2940 2941 void 2942 __kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) 2943 { 2944 KMP_DEBUG_ASSERT( __kmp_init_serial ); 2945 if ( 2946 2947 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) 2948 FALSE /* must use lock */ 2949 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 2950 TRUE /* no alignment problems */ 2951 #else 2952 ! ( (kmp_uintptr_t) lhs & 0x7) /* make sure address is 8-byte aligned */ 2953 #endif 2954 ) 2955 { 2956 kmp_int64 old_value, new_value; 2957 2958 old_value = *(kmp_int64 *) lhs; 2959 (*f)( &new_value, &old_value, rhs ); 2960 /* TODO: Should this be acquire or release? */ 2961 while ( ! KMP_COMPARE_AND_STORE_ACQ64 ( (kmp_int64 *) lhs, 2962 *(kmp_int64 *) &old_value, 2963 *(kmp_int64 *) &new_value ) ) 2964 { 2965 KMP_CPU_PAUSE(); 2966 2967 old_value = *(kmp_int64 *) lhs; 2968 (*f)( &new_value, &old_value, rhs ); 2969 } 2970 2971 return; 2972 } else { 2973 // 2974 // Use __kmp_atomic_lock_8i for all 8-byte data, 2975 // even if it isn't of integer data type. 2976 // 2977 2978 #ifdef KMP_GOMP_COMPAT 2979 if ( __kmp_atomic_mode == 2 ) { 2980 __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); 2981 } 2982 else 2983 #endif /* KMP_GOMP_COMPAT */ 2984 __kmp_acquire_atomic_lock( & __kmp_atomic_lock_8i, gtid ); 2985 2986 (*f)( lhs, lhs, rhs ); 2987 2988 #ifdef KMP_GOMP_COMPAT 2989 if ( __kmp_atomic_mode == 2 ) { 2990 __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); 2991 } 2992 else 2993 #endif /* KMP_GOMP_COMPAT */ 2994 __kmp_release_atomic_lock( & __kmp_atomic_lock_8i, gtid ); 2995 } 2996 } 2997 2998 void 2999 __kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) 3000 { 3001 KMP_DEBUG_ASSERT( __kmp_init_serial ); 3002 3003 #ifdef KMP_GOMP_COMPAT 3004 if ( __kmp_atomic_mode == 2 ) { 3005 __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); 3006 } 3007 else 3008 #endif /* KMP_GOMP_COMPAT */ 3009 __kmp_acquire_atomic_lock( & __kmp_atomic_lock_10r, gtid ); 3010 3011 (*f)( lhs, lhs, rhs ); 3012 3013 #ifdef KMP_GOMP_COMPAT 3014 if ( __kmp_atomic_mode == 2 ) { 3015 __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); 3016 } 3017 else 3018 #endif /* KMP_GOMP_COMPAT */ 3019 __kmp_release_atomic_lock( & __kmp_atomic_lock_10r, gtid ); 3020 } 3021 3022 void 3023 __kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) 3024 { 3025 KMP_DEBUG_ASSERT( __kmp_init_serial ); 3026 3027 #ifdef KMP_GOMP_COMPAT 3028 if ( __kmp_atomic_mode == 2 ) { 3029 __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); 3030 } 3031 else 3032 #endif /* KMP_GOMP_COMPAT */ 3033 __kmp_acquire_atomic_lock( & __kmp_atomic_lock_16c, gtid ); 3034 3035 (*f)( lhs, lhs, rhs ); 3036 3037 #ifdef KMP_GOMP_COMPAT 3038 if ( __kmp_atomic_mode == 2 ) { 3039 __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); 3040 } 3041 else 3042 #endif /* KMP_GOMP_COMPAT */ 3043 __kmp_release_atomic_lock( & __kmp_atomic_lock_16c, gtid ); 3044 } 3045 3046 void 3047 __kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) 3048 { 3049 KMP_DEBUG_ASSERT( __kmp_init_serial ); 3050 3051 #ifdef KMP_GOMP_COMPAT 3052 if ( __kmp_atomic_mode == 2 ) { 3053 __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); 3054 } 3055 else 3056 #endif /* KMP_GOMP_COMPAT */ 3057 __kmp_acquire_atomic_lock( & __kmp_atomic_lock_20c, gtid ); 3058 3059 (*f)( lhs, lhs, rhs ); 3060 3061 #ifdef KMP_GOMP_COMPAT 3062 if ( __kmp_atomic_mode == 2 ) { 3063 __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); 3064 } 3065 else 3066 #endif /* KMP_GOMP_COMPAT */ 3067 __kmp_release_atomic_lock( & __kmp_atomic_lock_20c, gtid ); 3068 } 3069 3070 void 3071 __kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) 3072 { 3073 KMP_DEBUG_ASSERT( __kmp_init_serial ); 3074 3075 #ifdef KMP_GOMP_COMPAT 3076 if ( __kmp_atomic_mode == 2 ) { 3077 __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); 3078 } 3079 else 3080 #endif /* KMP_GOMP_COMPAT */ 3081 __kmp_acquire_atomic_lock( & __kmp_atomic_lock_32c, gtid ); 3082 3083 (*f)( lhs, lhs, rhs ); 3084 3085 #ifdef KMP_GOMP_COMPAT 3086 if ( __kmp_atomic_mode == 2 ) { 3087 __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); 3088 } 3089 else 3090 #endif /* KMP_GOMP_COMPAT */ 3091 __kmp_release_atomic_lock( & __kmp_atomic_lock_32c, gtid ); 3092 } 3093 3094 // AC: same two routines as GOMP_atomic_start/end, but will be called by our compiler 3095 // duplicated in order to not use 3-party names in pure Intel code 3096 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin. 3097 void 3098 __kmpc_atomic_start(void) 3099 { 3100 int gtid = __kmp_entry_gtid(); 3101 KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid)); 3102 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); 3103 } 3104 3105 3106 void 3107 __kmpc_atomic_end(void) 3108 { 3109 int gtid = __kmp_get_gtid(); 3110 KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid)); 3111 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); 3112 } 3113 3114 /* ------------------------------------------------------------------------ */ 3115 /* ------------------------------------------------------------------------ */ 3116 /*! 3117 @} 3118 */ 3119 3120 // end of file 3121