1// z_Linux_asm.S: - microtasking routines specifically 2// written for Intel platforms running Linux* OS 3 4// 5////===----------------------------------------------------------------------===// 6//// 7//// The LLVM Compiler Infrastructure 8//// 9//// This file is dual licensed under the MIT and the University of Illinois Open 10//// Source Licenses. See LICENSE.txt for details. 11//// 12////===----------------------------------------------------------------------===// 13// 14 15// ----------------------------------------------------------------------- 16// macros 17// ----------------------------------------------------------------------- 18 19#include "kmp_config.h" 20 21#if KMP_ARCH_X86 || KMP_ARCH_X86_64 22 23# if KMP_MIC 24// the 'delay r16/r32/r64' should be used instead of the 'pause'. 25// The delay operation has the effect of removing the current thread from 26// the round-robin HT mechanism, and therefore speeds up the issue rate of 27// the other threads on the same core. 28// 29// A value of 0 works fine for <= 2 threads per core, but causes the EPCC 30// barrier time to increase greatly for 3 or more threads per core. 31// 32// A value of 100 works pretty well for up to 4 threads per core, but isn't 33// quite as fast as 0 for 2 threads per core. 34// 35// We need to check what happens for oversubscription / > 4 threads per core. 36// It is possible that we need to pass the delay value in as a parameter 37// that the caller determines based on the total # threads / # cores. 38// 39//.macro pause_op 40// mov $100, %rax 41// delay %rax 42//.endm 43# else 44# define pause_op .byte 0xf3,0x90 45# endif // KMP_MIC 46 47# if KMP_OS_DARWIN 48# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols 49# define KMP_LABEL(x) L_##x // form the name of label 50.macro KMP_CFI_DEF_OFFSET 51.endmacro 52.macro KMP_CFI_OFFSET 53.endmacro 54.macro KMP_CFI_REGISTER 55.endmacro 56.macro KMP_CFI_DEF 57.endmacro 58.macro ALIGN 59 .align $0 60.endmacro 61.macro DEBUG_INFO 62/* Not sure what .size does in icc, not sure if we need to do something 63 similar for OS X*. 64*/ 65.endmacro 66.macro PROC 67 ALIGN 4 68 .globl KMP_PREFIX_UNDERSCORE($0) 69KMP_PREFIX_UNDERSCORE($0): 70.endmacro 71# else // KMP_OS_DARWIN 72# define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols 73// Format labels so that they don't override function names in gdb's backtraces 74// MIC assembler doesn't accept .L syntax, the L works fine there (as well as 75// on OS X*) 76# if KMP_MIC 77# define KMP_LABEL(x) L_##x // local label 78# else 79# define KMP_LABEL(x) .L_##x // local label hidden from backtraces 80# endif // KMP_MIC 81.macro ALIGN size 82 .align 1<<(\size) 83.endm 84.macro DEBUG_INFO proc 85 .cfi_endproc 86// Not sure why we need .type and .size for the functions 87 .align 16 88 .type \proc,@function 89 .size \proc,.-\proc 90.endm 91.macro PROC proc 92 ALIGN 4 93 .globl KMP_PREFIX_UNDERSCORE(\proc) 94KMP_PREFIX_UNDERSCORE(\proc): 95 .cfi_startproc 96.endm 97.macro KMP_CFI_DEF_OFFSET sz 98 .cfi_def_cfa_offset \sz 99.endm 100.macro KMP_CFI_OFFSET reg, sz 101 .cfi_offset \reg,\sz 102.endm 103.macro KMP_CFI_REGISTER reg 104 .cfi_def_cfa_register \reg 105.endm 106.macro KMP_CFI_DEF reg, sz 107 .cfi_def_cfa \reg,\sz 108.endm 109# endif // KMP_OS_DARWIN 110#endif // KMP_ARCH_X86 || KMP_ARCH_x86_64 111 112#if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 113 114# if KMP_OS_DARWIN 115# define KMP_PREFIX_UNDERSCORE(x) _##x // extra underscore for OS X* symbols 116# define KMP_LABEL(x) L_##x // form the name of label 117 118.macro ALIGN 119 .align $0 120.endmacro 121 122.macro DEBUG_INFO 123/* Not sure what .size does in icc, not sure if we need to do something 124 similar for OS X*. 125*/ 126.endmacro 127 128.macro PROC 129 ALIGN 4 130 .globl KMP_PREFIX_UNDERSCORE($0) 131KMP_PREFIX_UNDERSCORE($0): 132.endmacro 133# else // KMP_OS_DARWIN 134# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols 135// Format labels so that they don't override function names in gdb's backtraces 136# define KMP_LABEL(x) .L_##x // local label hidden from backtraces 137 138.macro ALIGN size 139 .align 1<<(\size) 140.endm 141 142.macro DEBUG_INFO proc 143 .cfi_endproc 144// Not sure why we need .type and .size for the functions 145 ALIGN 2 146 .type \proc,@function 147 .size \proc,.-\proc 148.endm 149 150.macro PROC proc 151 ALIGN 2 152 .globl KMP_PREFIX_UNDERSCORE(\proc) 153KMP_PREFIX_UNDERSCORE(\proc): 154 .cfi_startproc 155.endm 156# endif // KMP_OS_DARWIN 157 158#endif // (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 159 160// ----------------------------------------------------------------------- 161// data 162// ----------------------------------------------------------------------- 163 164#ifdef KMP_GOMP_COMPAT 165 166// Support for unnamed common blocks. 167// 168// Because the symbol ".gomp_critical_user_" contains a ".", we have to 169// put this stuff in assembly. 170 171# if KMP_ARCH_X86 172# if KMP_OS_DARWIN 173 .data 174 .comm .gomp_critical_user_,32 175 .data 176 .globl ___kmp_unnamed_critical_addr 177___kmp_unnamed_critical_addr: 178 .long .gomp_critical_user_ 179# else /* Linux* OS */ 180 .data 181 .comm .gomp_critical_user_,32,8 182 .data 183 ALIGN 4 184 .global __kmp_unnamed_critical_addr 185__kmp_unnamed_critical_addr: 186 .4byte .gomp_critical_user_ 187 .type __kmp_unnamed_critical_addr,@object 188 .size __kmp_unnamed_critical_addr,4 189# endif /* KMP_OS_DARWIN */ 190# endif /* KMP_ARCH_X86 */ 191 192# if KMP_ARCH_X86_64 193# if KMP_OS_DARWIN 194 .data 195 .comm .gomp_critical_user_,32 196 .data 197 .globl ___kmp_unnamed_critical_addr 198___kmp_unnamed_critical_addr: 199 .quad .gomp_critical_user_ 200# else /* Linux* OS */ 201 .data 202 .comm .gomp_critical_user_,32,8 203 .data 204 ALIGN 8 205 .global __kmp_unnamed_critical_addr 206__kmp_unnamed_critical_addr: 207 .8byte .gomp_critical_user_ 208 .type __kmp_unnamed_critical_addr,@object 209 .size __kmp_unnamed_critical_addr,8 210# endif /* KMP_OS_DARWIN */ 211# endif /* KMP_ARCH_X86_64 */ 212 213#endif /* KMP_GOMP_COMPAT */ 214 215 216#if KMP_ARCH_X86 && !KMP_ARCH_PPC64 217 218// ----------------------------------------------------------------------- 219// microtasking routines specifically written for IA-32 architecture 220// running Linux* OS 221// ----------------------------------------------------------------------- 222 223 .ident "Intel Corporation" 224 .data 225 ALIGN 4 226// void 227// __kmp_x86_pause( void ); 228 229 .text 230 PROC __kmp_x86_pause 231 232 pause_op 233 ret 234 235 DEBUG_INFO __kmp_x86_pause 236 237// void 238// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer ); 239 240 PROC __kmp_x86_cpuid 241 242 pushl %ebp 243 movl %esp,%ebp 244 pushl %edi 245 pushl %ebx 246 pushl %ecx 247 pushl %edx 248 249 movl 8(%ebp), %eax 250 movl 12(%ebp), %ecx 251 cpuid // Query the CPUID for the current processor 252 253 movl 16(%ebp), %edi 254 movl %eax, 0(%edi) 255 movl %ebx, 4(%edi) 256 movl %ecx, 8(%edi) 257 movl %edx, 12(%edi) 258 259 popl %edx 260 popl %ecx 261 popl %ebx 262 popl %edi 263 movl %ebp, %esp 264 popl %ebp 265 ret 266 267 DEBUG_INFO __kmp_x86_cpuid 268 269 270# if !KMP_ASM_INTRINS 271 272//------------------------------------------------------------------------ 273// kmp_int32 274// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); 275 276 PROC __kmp_test_then_add32 277 278 movl 4(%esp), %ecx 279 movl 8(%esp), %eax 280 lock 281 xaddl %eax,(%ecx) 282 ret 283 284 DEBUG_INFO __kmp_test_then_add32 285 286//------------------------------------------------------------------------ 287// FUNCTION __kmp_xchg_fixed8 288// 289// kmp_int32 290// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); 291// 292// parameters: 293// p: 4(%esp) 294// d: 8(%esp) 295// 296// return: %al 297 PROC __kmp_xchg_fixed8 298 299 movl 4(%esp), %ecx // "p" 300 movb 8(%esp), %al // "d" 301 302 lock 303 xchgb %al,(%ecx) 304 ret 305 306 DEBUG_INFO __kmp_xchg_fixed8 307 308 309//------------------------------------------------------------------------ 310// FUNCTION __kmp_xchg_fixed16 311// 312// kmp_int16 313// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); 314// 315// parameters: 316// p: 4(%esp) 317// d: 8(%esp) 318// return: %ax 319 PROC __kmp_xchg_fixed16 320 321 movl 4(%esp), %ecx // "p" 322 movw 8(%esp), %ax // "d" 323 324 lock 325 xchgw %ax,(%ecx) 326 ret 327 328 DEBUG_INFO __kmp_xchg_fixed16 329 330 331//------------------------------------------------------------------------ 332// FUNCTION __kmp_xchg_fixed32 333// 334// kmp_int32 335// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); 336// 337// parameters: 338// p: 4(%esp) 339// d: 8(%esp) 340// 341// return: %eax 342 PROC __kmp_xchg_fixed32 343 344 movl 4(%esp), %ecx // "p" 345 movl 8(%esp), %eax // "d" 346 347 lock 348 xchgl %eax,(%ecx) 349 ret 350 351 DEBUG_INFO __kmp_xchg_fixed32 352 353 354// kmp_int8 355// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 356 PROC __kmp_compare_and_store8 357 358 movl 4(%esp), %ecx 359 movb 8(%esp), %al 360 movb 12(%esp), %dl 361 lock 362 cmpxchgb %dl,(%ecx) 363 sete %al // if %al == (%ecx) set %al = 1 else set %al = 0 364 and $1, %eax // sign extend previous instruction 365 ret 366 367 DEBUG_INFO __kmp_compare_and_store8 368 369// kmp_int16 370// __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv); 371 PROC __kmp_compare_and_store16 372 373 movl 4(%esp), %ecx 374 movw 8(%esp), %ax 375 movw 12(%esp), %dx 376 lock 377 cmpxchgw %dx,(%ecx) 378 sete %al // if %ax == (%ecx) set %al = 1 else set %al = 0 379 and $1, %eax // sign extend previous instruction 380 ret 381 382 DEBUG_INFO __kmp_compare_and_store16 383 384// kmp_int32 385// __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv); 386 PROC __kmp_compare_and_store32 387 388 movl 4(%esp), %ecx 389 movl 8(%esp), %eax 390 movl 12(%esp), %edx 391 lock 392 cmpxchgl %edx,(%ecx) 393 sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0 394 and $1, %eax // sign extend previous instruction 395 ret 396 397 DEBUG_INFO __kmp_compare_and_store32 398 399// kmp_int32 400// __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 s ); 401 PROC __kmp_compare_and_store64 402 403 pushl %ebp 404 movl %esp, %ebp 405 pushl %ebx 406 pushl %edi 407 movl 8(%ebp), %edi 408 movl 12(%ebp), %eax // "cv" low order word 409 movl 16(%ebp), %edx // "cv" high order word 410 movl 20(%ebp), %ebx // "sv" low order word 411 movl 24(%ebp), %ecx // "sv" high order word 412 lock 413 cmpxchg8b (%edi) 414 sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0 415 and $1, %eax // sign extend previous instruction 416 popl %edi 417 popl %ebx 418 movl %ebp, %esp 419 popl %ebp 420 ret 421 422 DEBUG_INFO __kmp_compare_and_store64 423 424// kmp_int8 425// __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv); 426 PROC __kmp_compare_and_store_ret8 427 428 movl 4(%esp), %ecx 429 movb 8(%esp), %al 430 movb 12(%esp), %dl 431 lock 432 cmpxchgb %dl,(%ecx) 433 ret 434 435 DEBUG_INFO __kmp_compare_and_store_ret8 436 437// kmp_int16 438// __kmp_compare_and_store_ret16(volatile kmp_int16 *p, kmp_int16 cv, 439// kmp_int16 sv); 440 PROC __kmp_compare_and_store_ret16 441 442 movl 4(%esp), %ecx 443 movw 8(%esp), %ax 444 movw 12(%esp), %dx 445 lock 446 cmpxchgw %dx,(%ecx) 447 ret 448 449 DEBUG_INFO __kmp_compare_and_store_ret16 450 451// kmp_int32 452// __kmp_compare_and_store_ret32(volatile kmp_int32 *p, kmp_int32 cv, 453// kmp_int32 sv); 454 PROC __kmp_compare_and_store_ret32 455 456 movl 4(%esp), %ecx 457 movl 8(%esp), %eax 458 movl 12(%esp), %edx 459 lock 460 cmpxchgl %edx,(%ecx) 461 ret 462 463 DEBUG_INFO __kmp_compare_and_store_ret32 464 465// kmp_int64 466// __kmp_compare_and_store_ret64(volatile kmp_int64 *p, kmp_int64 cv, 467// kmp_int64 sv); 468 PROC __kmp_compare_and_store_ret64 469 470 pushl %ebp 471 movl %esp, %ebp 472 pushl %ebx 473 pushl %edi 474 movl 8(%ebp), %edi 475 movl 12(%ebp), %eax // "cv" low order word 476 movl 16(%ebp), %edx // "cv" high order word 477 movl 20(%ebp), %ebx // "sv" low order word 478 movl 24(%ebp), %ecx // "sv" high order word 479 lock 480 cmpxchg8b (%edi) 481 popl %edi 482 popl %ebx 483 movl %ebp, %esp 484 popl %ebp 485 ret 486 487 DEBUG_INFO __kmp_compare_and_store_ret64 488 489 490//------------------------------------------------------------------------ 491// FUNCTION __kmp_xchg_real32 492// 493// kmp_real32 494// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); 495// 496// parameters: 497// addr: 4(%esp) 498// data: 8(%esp) 499// 500// return: %eax 501 PROC __kmp_xchg_real32 502 503 pushl %ebp 504 movl %esp, %ebp 505 subl $4, %esp 506 pushl %esi 507 508 movl 4(%ebp), %esi 509 flds (%esi) 510 // load <addr> 511 fsts -4(%ebp) 512 // store old value 513 514 movl 8(%ebp), %eax 515 516 lock 517 xchgl %eax, (%esi) 518 519 flds -4(%ebp) 520 // return old value 521 522 popl %esi 523 movl %ebp, %esp 524 popl %ebp 525 ret 526 527 DEBUG_INFO __kmp_xchg_real32 528 529# endif /* !KMP_ASM_INTRINS */ 530 531 532//------------------------------------------------------------------------ 533// FUNCTION __kmp_load_x87_fpu_control_word 534// 535// void 536// __kmp_load_x87_fpu_control_word( kmp_int16 *p ); 537// 538// parameters: 539// p: 4(%esp) 540 PROC __kmp_load_x87_fpu_control_word 541 542 movl 4(%esp), %eax 543 fldcw (%eax) 544 ret 545 546 DEBUG_INFO __kmp_load_x87_fpu_control_word 547 548 549//------------------------------------------------------------------------ 550// FUNCTION __kmp_store_x87_fpu_control_word 551// 552// void 553// __kmp_store_x87_fpu_control_word( kmp_int16 *p ); 554// 555// parameters: 556// p: 4(%esp) 557 PROC __kmp_store_x87_fpu_control_word 558 559 movl 4(%esp), %eax 560 fstcw (%eax) 561 ret 562 563 DEBUG_INFO __kmp_store_x87_fpu_control_word 564 565 566//------------------------------------------------------------------------ 567// FUNCTION __kmp_clear_x87_fpu_status_word 568// 569// void 570// __kmp_clear_x87_fpu_status_word(); 571 PROC __kmp_clear_x87_fpu_status_word 572 573 fnclex 574 ret 575 576 DEBUG_INFO __kmp_clear_x87_fpu_status_word 577 578 579//------------------------------------------------------------------------ 580// typedef void (*microtask_t)( int *gtid, int *tid, ... ); 581// 582// int 583// __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid, 584// int argc, void *p_argv[] ) { 585// (*pkfn)( & gtid, & gtid, argv[0], ... ); 586// return 1; 587// } 588 589// -- Begin __kmp_invoke_microtask 590// mark_begin; 591 PROC __kmp_invoke_microtask 592 593 pushl %ebp 594 KMP_CFI_DEF_OFFSET 8 595 KMP_CFI_OFFSET ebp,-8 596 movl %esp,%ebp // establish the base pointer for this routine. 597 KMP_CFI_REGISTER ebp 598 subl $8,%esp // allocate space for two local variables. 599 // These varibales are: 600 // argv: -4(%ebp) 601 // temp: -8(%ebp) 602 // 603 pushl %ebx // save %ebx to use during this routine 604 // 605#if OMPT_SUPPORT 606 movl 28(%ebp),%ebx // get exit_frame address 607 movl %ebp,(%ebx) // save exit_frame 608#endif 609 610 movl 20(%ebp),%ebx // Stack alignment - # args 611 addl $2,%ebx // #args +2 Always pass at least 2 args (gtid and tid) 612 shll $2,%ebx // Number of bytes used on stack: (#args+2)*4 613 movl %esp,%eax // 614 subl %ebx,%eax // %esp-((#args+2)*4) -> %eax -- without mods, stack ptr would be this 615 movl %eax,%ebx // Save to %ebx 616 andl $0xFFFFFF80,%eax // mask off 7 bits 617 subl %eax,%ebx // Amount to subtract from %esp 618 subl %ebx,%esp // Prepare the stack ptr -- 619 // now it will be aligned on 128-byte boundary at the call 620 621 movl 24(%ebp),%eax // copy from p_argv[] 622 movl %eax,-4(%ebp) // into the local variable *argv. 623 624 movl 20(%ebp),%ebx // argc is 20(%ebp) 625 shll $2,%ebx 626 627KMP_LABEL(invoke_2): 628 cmpl $0,%ebx 629 jg KMP_LABEL(invoke_4) 630 jmp KMP_LABEL(invoke_3) 631 ALIGN 2 632KMP_LABEL(invoke_4): 633 movl -4(%ebp),%eax 634 subl $4,%ebx // decrement argc. 635 addl %ebx,%eax // index into argv. 636 movl (%eax),%edx 637 pushl %edx 638 639 jmp KMP_LABEL(invoke_2) 640 ALIGN 2 641KMP_LABEL(invoke_3): 642 leal 16(%ebp),%eax // push & tid 643 pushl %eax 644 645 leal 12(%ebp),%eax // push & gtid 646 pushl %eax 647 648 movl 8(%ebp),%ebx 649 call *%ebx // call (*pkfn)(); 650 651 movl $1,%eax // return 1; 652 653 movl -12(%ebp),%ebx // restore %ebx 654 leave 655 KMP_CFI_DEF esp,4 656 ret 657 658 DEBUG_INFO __kmp_invoke_microtask 659// -- End __kmp_invoke_microtask 660 661 662// kmp_uint64 663// __kmp_hardware_timestamp(void) 664 PROC __kmp_hardware_timestamp 665 rdtsc 666 ret 667 668 DEBUG_INFO __kmp_hardware_timestamp 669// -- End __kmp_hardware_timestamp 670 671#endif /* KMP_ARCH_X86 */ 672 673 674#if KMP_ARCH_X86_64 675 676// ----------------------------------------------------------------------- 677// microtasking routines specifically written for IA-32 architecture and 678// Intel(R) 64 running Linux* OS 679// ----------------------------------------------------------------------- 680 681// -- Machine type P 682// mark_description "Intel Corporation"; 683 .ident "Intel Corporation" 684// -- .file "z_Linux_asm.S" 685 .data 686 ALIGN 4 687 688// To prevent getting our code into .data section .text added to every routine 689// definition for x86_64. 690//------------------------------------------------------------------------ 691// FUNCTION __kmp_x86_cpuid 692// 693// void 694// __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer ); 695// 696// parameters: 697// mode: %edi 698// mode2: %esi 699// cpuid_buffer: %rdx 700 .text 701 PROC __kmp_x86_cpuid 702 703 pushq %rbp 704 movq %rsp,%rbp 705 pushq %rbx // callee-save register 706 707 movl %esi, %ecx // "mode2" 708 movl %edi, %eax // "mode" 709 movq %rdx, %rsi // cpuid_buffer 710 cpuid // Query the CPUID for the current processor 711 712 movl %eax, 0(%rsi) // store results into buffer 713 movl %ebx, 4(%rsi) 714 movl %ecx, 8(%rsi) 715 movl %edx, 12(%rsi) 716 717 popq %rbx // callee-save register 718 movq %rbp, %rsp 719 popq %rbp 720 ret 721 722 DEBUG_INFO __kmp_x86_cpuid 723 724 725 726# if !KMP_ASM_INTRINS 727 728//------------------------------------------------------------------------ 729// FUNCTION __kmp_test_then_add32 730// 731// kmp_int32 732// __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); 733// 734// parameters: 735// p: %rdi 736// d: %esi 737// 738// return: %eax 739 .text 740 PROC __kmp_test_then_add32 741 742 movl %esi, %eax // "d" 743 lock 744 xaddl %eax,(%rdi) 745 ret 746 747 DEBUG_INFO __kmp_test_then_add32 748 749 750//------------------------------------------------------------------------ 751// FUNCTION __kmp_test_then_add64 752// 753// kmp_int64 754// __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ); 755// 756// parameters: 757// p: %rdi 758// d: %rsi 759// return: %rax 760 .text 761 PROC __kmp_test_then_add64 762 763 movq %rsi, %rax // "d" 764 lock 765 xaddq %rax,(%rdi) 766 ret 767 768 DEBUG_INFO __kmp_test_then_add64 769 770 771//------------------------------------------------------------------------ 772// FUNCTION __kmp_xchg_fixed8 773// 774// kmp_int32 775// __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); 776// 777// parameters: 778// p: %rdi 779// d: %sil 780// 781// return: %al 782 .text 783 PROC __kmp_xchg_fixed8 784 785 movb %sil, %al // "d" 786 787 lock 788 xchgb %al,(%rdi) 789 ret 790 791 DEBUG_INFO __kmp_xchg_fixed8 792 793 794//------------------------------------------------------------------------ 795// FUNCTION __kmp_xchg_fixed16 796// 797// kmp_int16 798// __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); 799// 800// parameters: 801// p: %rdi 802// d: %si 803// return: %ax 804 .text 805 PROC __kmp_xchg_fixed16 806 807 movw %si, %ax // "d" 808 809 lock 810 xchgw %ax,(%rdi) 811 ret 812 813 DEBUG_INFO __kmp_xchg_fixed16 814 815 816//------------------------------------------------------------------------ 817// FUNCTION __kmp_xchg_fixed32 818// 819// kmp_int32 820// __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); 821// 822// parameters: 823// p: %rdi 824// d: %esi 825// 826// return: %eax 827 .text 828 PROC __kmp_xchg_fixed32 829 830 movl %esi, %eax // "d" 831 832 lock 833 xchgl %eax,(%rdi) 834 ret 835 836 DEBUG_INFO __kmp_xchg_fixed32 837 838 839//------------------------------------------------------------------------ 840// FUNCTION __kmp_xchg_fixed64 841// 842// kmp_int64 843// __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 d ); 844// 845// parameters: 846// p: %rdi 847// d: %rsi 848// return: %rax 849 .text 850 PROC __kmp_xchg_fixed64 851 852 movq %rsi, %rax // "d" 853 854 lock 855 xchgq %rax,(%rdi) 856 ret 857 858 DEBUG_INFO __kmp_xchg_fixed64 859 860 861//------------------------------------------------------------------------ 862// FUNCTION __kmp_compare_and_store8 863// 864// kmp_int8 865// __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 866// 867// parameters: 868// p: %rdi 869// cv: %esi 870// sv: %edx 871// 872// return: %eax 873 .text 874 PROC __kmp_compare_and_store8 875 876 movb %sil, %al // "cv" 877 lock 878 cmpxchgb %dl,(%rdi) 879 sete %al // if %al == (%rdi) set %al = 1 else set %al = 0 880 andq $1, %rax // sign extend previous instruction for return value 881 ret 882 883 DEBUG_INFO __kmp_compare_and_store8 884 885 886//------------------------------------------------------------------------ 887// FUNCTION __kmp_compare_and_store16 888// 889// kmp_int16 890// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 891// 892// parameters: 893// p: %rdi 894// cv: %si 895// sv: %dx 896// 897// return: %eax 898 .text 899 PROC __kmp_compare_and_store16 900 901 movw %si, %ax // "cv" 902 lock 903 cmpxchgw %dx,(%rdi) 904 sete %al // if %ax == (%rdi) set %al = 1 else set %al = 0 905 andq $1, %rax // sign extend previous instruction for return value 906 ret 907 908 DEBUG_INFO __kmp_compare_and_store16 909 910 911//------------------------------------------------------------------------ 912// FUNCTION __kmp_compare_and_store32 913// 914// kmp_int32 915// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 916// 917// parameters: 918// p: %rdi 919// cv: %esi 920// sv: %edx 921// 922// return: %eax 923 .text 924 PROC __kmp_compare_and_store32 925 926 movl %esi, %eax // "cv" 927 lock 928 cmpxchgl %edx,(%rdi) 929 sete %al // if %eax == (%rdi) set %al = 1 else set %al = 0 930 andq $1, %rax // sign extend previous instruction for return value 931 ret 932 933 DEBUG_INFO __kmp_compare_and_store32 934 935 936//------------------------------------------------------------------------ 937// FUNCTION __kmp_compare_and_store64 938// 939// kmp_int32 940// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 941// 942// parameters: 943// p: %rdi 944// cv: %rsi 945// sv: %rdx 946// return: %eax 947 .text 948 PROC __kmp_compare_and_store64 949 950 movq %rsi, %rax // "cv" 951 lock 952 cmpxchgq %rdx,(%rdi) 953 sete %al // if %rax == (%rdi) set %al = 1 else set %al = 0 954 andq $1, %rax // sign extend previous instruction for return value 955 ret 956 957 DEBUG_INFO __kmp_compare_and_store64 958 959//------------------------------------------------------------------------ 960// FUNCTION __kmp_compare_and_store_ret8 961// 962// kmp_int8 963// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); 964// 965// parameters: 966// p: %rdi 967// cv: %esi 968// sv: %edx 969// 970// return: %eax 971 .text 972 PROC __kmp_compare_and_store_ret8 973 974 movb %sil, %al // "cv" 975 lock 976 cmpxchgb %dl,(%rdi) 977 ret 978 979 DEBUG_INFO __kmp_compare_and_store_ret8 980 981 982//------------------------------------------------------------------------ 983// FUNCTION __kmp_compare_and_store_ret16 984// 985// kmp_int16 986// __kmp_compare_and_store16_ret( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); 987// 988// parameters: 989// p: %rdi 990// cv: %si 991// sv: %dx 992// 993// return: %eax 994 .text 995 PROC __kmp_compare_and_store_ret16 996 997 movw %si, %ax // "cv" 998 lock 999 cmpxchgw %dx,(%rdi) 1000 ret 1001 1002 DEBUG_INFO __kmp_compare_and_store_ret16 1003 1004 1005//------------------------------------------------------------------------ 1006// FUNCTION __kmp_compare_and_store_ret32 1007// 1008// kmp_int32 1009// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); 1010// 1011// parameters: 1012// p: %rdi 1013// cv: %esi 1014// sv: %edx 1015// 1016// return: %eax 1017 .text 1018 PROC __kmp_compare_and_store_ret32 1019 1020 movl %esi, %eax // "cv" 1021 lock 1022 cmpxchgl %edx,(%rdi) 1023 ret 1024 1025 DEBUG_INFO __kmp_compare_and_store_ret32 1026 1027 1028//------------------------------------------------------------------------ 1029// FUNCTION __kmp_compare_and_store_ret64 1030// 1031// kmp_int64 1032// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); 1033// 1034// parameters: 1035// p: %rdi 1036// cv: %rsi 1037// sv: %rdx 1038// return: %eax 1039 .text 1040 PROC __kmp_compare_and_store_ret64 1041 1042 movq %rsi, %rax // "cv" 1043 lock 1044 cmpxchgq %rdx,(%rdi) 1045 ret 1046 1047 DEBUG_INFO __kmp_compare_and_store_ret64 1048 1049# endif /* !KMP_ASM_INTRINS */ 1050 1051 1052# if !KMP_MIC 1053 1054# if !KMP_ASM_INTRINS 1055 1056//------------------------------------------------------------------------ 1057// FUNCTION __kmp_xchg_real32 1058// 1059// kmp_real32 1060// __kmp_xchg_real32( volatile kmp_real32 *addr, kmp_real32 data ); 1061// 1062// parameters: 1063// addr: %rdi 1064// data: %xmm0 (lower 4 bytes) 1065// 1066// return: %xmm0 (lower 4 bytes) 1067 .text 1068 PROC __kmp_xchg_real32 1069 1070 movd %xmm0, %eax // load "data" to eax 1071 1072 lock 1073 xchgl %eax, (%rdi) 1074 1075 movd %eax, %xmm0 // load old value into return register 1076 1077 ret 1078 1079 DEBUG_INFO __kmp_xchg_real32 1080 1081 1082//------------------------------------------------------------------------ 1083// FUNCTION __kmp_xchg_real64 1084// 1085// kmp_real64 1086// __kmp_xchg_real64( volatile kmp_real64 *addr, kmp_real64 data ); 1087// 1088// parameters: 1089// addr: %rdi 1090// data: %xmm0 (lower 8 bytes) 1091// return: %xmm0 (lower 8 bytes) 1092 .text 1093 PROC __kmp_xchg_real64 1094 1095 movd %xmm0, %rax // load "data" to rax 1096 1097 lock 1098 xchgq %rax, (%rdi) 1099 1100 movd %rax, %xmm0 // load old value into return register 1101 ret 1102 1103 DEBUG_INFO __kmp_xchg_real64 1104 1105 1106# endif /* !KMP_MIC */ 1107 1108# endif /* !KMP_ASM_INTRINS */ 1109 1110 1111//------------------------------------------------------------------------ 1112// FUNCTION __kmp_load_x87_fpu_control_word 1113// 1114// void 1115// __kmp_load_x87_fpu_control_word( kmp_int16 *p ); 1116// 1117// parameters: 1118// p: %rdi 1119 .text 1120 PROC __kmp_load_x87_fpu_control_word 1121 1122 fldcw (%rdi) 1123 ret 1124 1125 DEBUG_INFO __kmp_load_x87_fpu_control_word 1126 1127 1128//------------------------------------------------------------------------ 1129// FUNCTION __kmp_store_x87_fpu_control_word 1130// 1131// void 1132// __kmp_store_x87_fpu_control_word( kmp_int16 *p ); 1133// 1134// parameters: 1135// p: %rdi 1136 .text 1137 PROC __kmp_store_x87_fpu_control_word 1138 1139 fstcw (%rdi) 1140 ret 1141 1142 DEBUG_INFO __kmp_store_x87_fpu_control_word 1143 1144 1145//------------------------------------------------------------------------ 1146// FUNCTION __kmp_clear_x87_fpu_status_word 1147// 1148// void 1149// __kmp_clear_x87_fpu_status_word(); 1150 .text 1151 PROC __kmp_clear_x87_fpu_status_word 1152 1153#if KMP_MIC 1154// TODO: remove the workaround for problem with fnclex instruction (no CQ known) 1155 fstenv -32(%rsp) // store FP env 1156 andw $~0x80ff, 4-32(%rsp) // clear 0-7,15 bits of FP SW 1157 fldenv -32(%rsp) // load FP env back 1158 ret 1159#else 1160 fnclex 1161 ret 1162#endif 1163 1164 DEBUG_INFO __kmp_clear_x87_fpu_status_word 1165 1166 1167//------------------------------------------------------------------------ 1168// typedef void (*microtask_t)( int *gtid, int *tid, ... ); 1169// 1170// int 1171// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 1172// int gtid, int tid, 1173// int argc, void *p_argv[] ) { 1174// (*pkfn)( & gtid, & tid, argv[0], ... ); 1175// return 1; 1176// } 1177// 1178// note: at call to pkfn must have %rsp 128-byte aligned for compiler 1179// 1180// parameters: 1181// %rdi: pkfn 1182// %esi: gtid 1183// %edx: tid 1184// %ecx: argc 1185// %r8: p_argv 1186// %r9: &exit_frame 1187// 1188// locals: 1189// __gtid: gtid parm pushed on stack so can pass >id to pkfn 1190// __tid: tid parm pushed on stack so can pass &tid to pkfn 1191// 1192// reg temps: 1193// %rax: used all over the place 1194// %rdx: used in stack pointer alignment calculation 1195// %r11: used to traverse p_argv array 1196// %rsi: used as temporary for stack parameters 1197// used as temporary for number of pkfn parms to push 1198// %rbx: used to hold pkfn address, and zero constant, callee-save 1199// 1200// return: %eax (always 1/TRUE) 1201__gtid = -16 1202__tid = -24 1203 1204// -- Begin __kmp_invoke_microtask 1205// mark_begin; 1206 .text 1207 PROC __kmp_invoke_microtask 1208 1209 pushq %rbp // save base pointer 1210 KMP_CFI_DEF_OFFSET 16 1211 KMP_CFI_OFFSET rbp,-16 1212 movq %rsp,%rbp // establish the base pointer for this routine. 1213 KMP_CFI_REGISTER rbp 1214 1215#if OMPT_SUPPORT 1216 movq %rbp, (%r9) // save exit_frame 1217#endif 1218 1219 pushq %rbx // %rbx is callee-saved register 1220 pushq %rsi // Put gtid on stack so can pass &tgid to pkfn 1221 pushq %rdx // Put tid on stack so can pass &tid to pkfn 1222 1223 movq %rcx, %rax // Stack alignment calculation begins; argc -> %rax 1224 movq $0, %rbx // constant for cmovs later 1225 subq $4, %rax // subtract four args passed in registers to pkfn 1226#if KMP_MIC 1227 js KMP_LABEL(kmp_0) // jump to movq 1228 jmp KMP_LABEL(kmp_0_exit) // jump ahead 1229KMP_LABEL(kmp_0): 1230 movq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) 1231KMP_LABEL(kmp_0_exit): 1232#else 1233 cmovsq %rbx, %rax // zero negative value in %rax <- max(0, argc-4) 1234#endif // KMP_MIC 1235 1236 movq %rax, %rsi // save max(0, argc-4) -> %rsi for later 1237 shlq $3, %rax // Number of bytes used on stack: max(0, argc-4)*8 1238 1239 movq %rsp, %rdx // 1240 subq %rax, %rdx // %rsp-(max(0,argc-4)*8) -> %rdx -- 1241 // without align, stack ptr would be this 1242 movq %rdx, %rax // Save to %rax 1243 1244 andq $0xFFFFFFFFFFFFFF80, %rax // mask off lower 7 bits (128 bytes align) 1245 subq %rax, %rdx // Amount to subtract from %rsp 1246 subq %rdx, %rsp // Prepare the stack ptr -- 1247 // now %rsp will align to 128-byte boundary at call site 1248 1249 // setup pkfn parameter reg and stack 1250 movq %rcx, %rax // argc -> %rax 1251 cmpq $0, %rsi 1252 je KMP_LABEL(kmp_invoke_pass_parms) // jump ahead if no parms to push 1253 shlq $3, %rcx // argc*8 -> %rcx 1254 movq %r8, %rdx // p_argv -> %rdx 1255 addq %rcx, %rdx // &p_argv[argc] -> %rdx 1256 1257 movq %rsi, %rcx // max (0, argc-4) -> %rcx 1258 1259KMP_LABEL(kmp_invoke_push_parms): 1260 // push nth - 7th parms to pkfn on stack 1261 subq $8, %rdx // decrement p_argv pointer to previous parm 1262 movq (%rdx), %rsi // p_argv[%rcx-1] -> %rsi 1263 pushq %rsi // push p_argv[%rcx-1] onto stack (reverse order) 1264 subl $1, %ecx 1265 1266// C69570: "X86_64_RELOC_BRANCH not supported" error at linking on mac_32e 1267// if the name of the label that is an operand of this jecxz starts with a dot ("."); 1268// Apple's linker does not support 1-byte length relocation; 1269// Resolution: replace all .labelX entries with L_labelX. 1270 1271 jecxz KMP_LABEL(kmp_invoke_pass_parms) // stop when four p_argv[] parms left 1272 jmp KMP_LABEL(kmp_invoke_push_parms) 1273 ALIGN 3 1274KMP_LABEL(kmp_invoke_pass_parms): // put 1st - 6th parms to pkfn in registers. 1275 // order here is important to avoid trashing 1276 // registers used for both input and output parms! 1277 movq %rdi, %rbx // pkfn -> %rbx 1278 leaq __gtid(%rbp), %rdi // >id -> %rdi (store 1st parm to pkfn) 1279 leaq __tid(%rbp), %rsi // &tid -> %rsi (store 2nd parm to pkfn) 1280 1281 movq %r8, %r11 // p_argv -> %r11 1282 1283#if KMP_MIC 1284 cmpq $4, %rax // argc >= 4? 1285 jns KMP_LABEL(kmp_4) // jump to movq 1286 jmp KMP_LABEL(kmp_4_exit) // jump ahead 1287KMP_LABEL(kmp_4): 1288 movq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) 1289KMP_LABEL(kmp_4_exit): 1290 1291 cmpq $3, %rax // argc >= 3? 1292 jns KMP_LABEL(kmp_3) // jump to movq 1293 jmp KMP_LABEL(kmp_3_exit) // jump ahead 1294KMP_LABEL(kmp_3): 1295 movq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) 1296KMP_LABEL(kmp_3_exit): 1297 1298 cmpq $2, %rax // argc >= 2? 1299 jns KMP_LABEL(kmp_2) // jump to movq 1300 jmp KMP_LABEL(kmp_2_exit) // jump ahead 1301KMP_LABEL(kmp_2): 1302 movq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) 1303KMP_LABEL(kmp_2_exit): 1304 1305 cmpq $1, %rax // argc >= 1? 1306 jns KMP_LABEL(kmp_1) // jump to movq 1307 jmp KMP_LABEL(kmp_1_exit) // jump ahead 1308KMP_LABEL(kmp_1): 1309 movq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) 1310KMP_LABEL(kmp_1_exit): 1311#else 1312 cmpq $4, %rax // argc >= 4? 1313 cmovnsq 24(%r11), %r9 // p_argv[3] -> %r9 (store 6th parm to pkfn) 1314 1315 cmpq $3, %rax // argc >= 3? 1316 cmovnsq 16(%r11), %r8 // p_argv[2] -> %r8 (store 5th parm to pkfn) 1317 1318 cmpq $2, %rax // argc >= 2? 1319 cmovnsq 8(%r11), %rcx // p_argv[1] -> %rcx (store 4th parm to pkfn) 1320 1321 cmpq $1, %rax // argc >= 1? 1322 cmovnsq (%r11), %rdx // p_argv[0] -> %rdx (store 3rd parm to pkfn) 1323#endif // KMP_MIC 1324 1325 call *%rbx // call (*pkfn)(); 1326 movq $1, %rax // move 1 into return register; 1327 1328 movq -8(%rbp), %rbx // restore %rbx using %rbp since %rsp was modified 1329 movq %rbp, %rsp // restore stack pointer 1330 popq %rbp // restore frame pointer 1331 KMP_CFI_DEF rsp,8 1332 ret 1333 1334 DEBUG_INFO __kmp_invoke_microtask 1335// -- End __kmp_invoke_microtask 1336 1337// kmp_uint64 1338// __kmp_hardware_timestamp(void) 1339 .text 1340 PROC __kmp_hardware_timestamp 1341 rdtsc 1342 shlq $32, %rdx 1343 orq %rdx, %rax 1344 ret 1345 1346 DEBUG_INFO __kmp_hardware_timestamp 1347// -- End __kmp_hardware_timestamp 1348 1349//------------------------------------------------------------------------ 1350// FUNCTION __kmp_bsr32 1351// 1352// int 1353// __kmp_bsr32( int ); 1354 .text 1355 PROC __kmp_bsr32 1356 1357 bsr %edi,%eax 1358 ret 1359 1360 DEBUG_INFO __kmp_bsr32 1361 1362 1363// ----------------------------------------------------------------------- 1364#endif /* KMP_ARCH_X86_64 */ 1365 1366// ' 1367#if (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 1368 1369//------------------------------------------------------------------------ 1370// 1371// typedef void (*microtask_t)( int *gtid, int *tid, ... ); 1372// 1373// int 1374// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 1375// int gtid, int tid, 1376// int argc, void *p_argv[] ) { 1377// (*pkfn)( & gtid, & tid, argv[0], ... ); 1378// return 1; 1379// } 1380// 1381// parameters: 1382// x0: pkfn 1383// w1: gtid 1384// w2: tid 1385// w3: argc 1386// x4: p_argv 1387// x5: &exit_frame 1388// 1389// locals: 1390// __gtid: gtid parm pushed on stack so can pass >id to pkfn 1391// __tid: tid parm pushed on stack so can pass &tid to pkfn 1392// 1393// reg temps: 1394// x8: used to hold pkfn address 1395// w9: used as temporary for number of pkfn parms 1396// x10: used to traverse p_argv array 1397// x11: used as temporary for stack placement calculation 1398// x12: used as temporary for stack parameters 1399// x19: used to preserve exit_frame_ptr, callee-save 1400// 1401// return: w0 (always 1/TRUE) 1402// 1403 1404__gtid = 4 1405__tid = 8 1406 1407// -- Begin __kmp_invoke_microtask 1408// mark_begin; 1409 .text 1410 PROC __kmp_invoke_microtask 1411 1412 stp x29, x30, [sp, #-16]! 1413# if OMPT_SUPPORT 1414 stp x19, x20, [sp, #-16]! 1415# endif 1416 mov x29, sp 1417 1418 orr w9, wzr, #1 1419 add w9, w9, w3, lsr #1 1420 sub sp, sp, w9, lsl #4 1421 mov x11, sp 1422 1423 mov x8, x0 1424 str w1, [x29, #-__gtid] 1425 str w2, [x29, #-__tid] 1426 mov w9, w3 1427 mov x10, x4 1428# if OMPT_SUPPORT 1429 mov x19, x5 1430 str x29, [x19] 1431# endif 1432 1433 sub x0, x29, #__gtid 1434 sub x1, x29, #__tid 1435 1436 cbz w9, KMP_LABEL(kmp_1) 1437 ldr x2, [x10] 1438 1439 sub w9, w9, #1 1440 cbz w9, KMP_LABEL(kmp_1) 1441 ldr x3, [x10, #8]! 1442 1443 sub w9, w9, #1 1444 cbz w9, KMP_LABEL(kmp_1) 1445 ldr x4, [x10, #8]! 1446 1447 sub w9, w9, #1 1448 cbz w9, KMP_LABEL(kmp_1) 1449 ldr x5, [x10, #8]! 1450 1451 sub w9, w9, #1 1452 cbz w9, KMP_LABEL(kmp_1) 1453 ldr x6, [x10, #8]! 1454 1455 sub w9, w9, #1 1456 cbz w9, KMP_LABEL(kmp_1) 1457 ldr x7, [x10, #8]! 1458 1459KMP_LABEL(kmp_0): 1460 sub w9, w9, #1 1461 cbz w9, KMP_LABEL(kmp_1) 1462 ldr x12, [x10, #8]! 1463 str x12, [x11], #8 1464 b KMP_LABEL(kmp_0) 1465KMP_LABEL(kmp_1): 1466 blr x8 1467 orr w0, wzr, #1 1468 mov sp, x29 1469# if OMPT_SUPPORT 1470 str xzr, [x19] 1471 ldp x19, x20, [sp], #16 1472# endif 1473 ldp x29, x30, [sp], #16 1474 ret 1475 1476 DEBUG_INFO __kmp_invoke_microtask 1477// -- End __kmp_invoke_microtask 1478 1479#endif /* (KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64 */ 1480 1481#if KMP_ARCH_PPC64 1482 1483//------------------------------------------------------------------------ 1484// 1485// typedef void (*microtask_t)( int *gtid, int *tid, ... ); 1486// 1487// int 1488// __kmp_invoke_microtask( void (*pkfn) (int gtid, int tid, ...), 1489// int gtid, int tid, 1490// int argc, void *p_argv[] ) { 1491// (*pkfn)( & gtid, & tid, argv[0], ... ); 1492// return 1; 1493// } 1494// 1495// parameters: 1496// r3: pkfn 1497// r4: gtid 1498// r5: tid 1499// r6: argc 1500// r7: p_argv 1501// r8: &exit_frame 1502// 1503// return: r3 (always 1/TRUE) 1504// 1505 .text 1506# if KMP_ARCH_PPC64_LE 1507 .abiversion 2 1508# endif 1509 .globl __kmp_invoke_microtask 1510 1511# if KMP_ARCH_PPC64_LE 1512 .p2align 4 1513# else 1514 .p2align 2 1515# endif 1516 1517 .type __kmp_invoke_microtask,@function 1518 1519# if KMP_ARCH_PPC64_LE 1520__kmp_invoke_microtask: 1521.Lfunc_begin0: 1522.Lfunc_gep0: 1523 addis 2, 12, .TOC.-.Lfunc_gep0@ha 1524 addi 2, 2, .TOC.-.Lfunc_gep0@l 1525.Lfunc_lep0: 1526 .localentry __kmp_invoke_microtask, .Lfunc_lep0-.Lfunc_gep0 1527# else 1528 .section .opd,"aw",@progbits 1529__kmp_invoke_microtask: 1530 .p2align 3 1531 .quad .Lfunc_begin0 1532 .quad .TOC.@tocbase 1533 .quad 0 1534 .text 1535.Lfunc_begin0: 1536# endif 1537 1538// -- Begin __kmp_invoke_microtask 1539// mark_begin; 1540 1541// We need to allocate a stack frame large enough to hold all of the parameters 1542// on the stack for the microtask plus what this function needs. That's 48 1543// bytes under the ELFv1 ABI (32 bytes under ELFv2), plus 8*(2 + argc) for the 1544// parameters to the microtask, plus 8 bytes to store the values of r4 and r5, 1545// and 8 bytes to store r31. With OMP-T support, we need an additional 8 bytes 1546// to save r30 to hold a copy of r8. 1547 1548 .cfi_startproc 1549 mflr 0 1550 std 31, -8(1) 1551 std 0, 16(1) 1552 1553// This is unusual because normally we'd set r31 equal to r1 after the stack 1554// frame is established. In this case, however, we need to dynamically compute 1555// the stack frame size, and so we keep a direct copy of r1 to access our 1556// register save areas and restore the r1 value before returning. 1557 mr 31, 1 1558 .cfi_def_cfa_register r31 1559 .cfi_offset r31, -8 1560 .cfi_offset lr, 16 1561 1562// Compute the size necessary for the local stack frame. 1563# if KMP_ARCH_PPC64_LE 1564 li 12, 72 1565# else 1566 li 12, 88 1567# endif 1568 sldi 0, 6, 3 1569 add 12, 0, 12 1570 neg 12, 12 1571 1572// We need to make sure that the stack frame stays aligned (to 16 bytes, except 1573// under the BG/Q CNK, where it must be to 32 bytes). 1574# if KMP_OS_CNK 1575 li 0, -32 1576# else 1577 li 0, -16 1578# endif 1579 and 12, 0, 12 1580 1581// Establish the local stack frame. 1582 stdux 1, 1, 12 1583 1584# if OMPT_SUPPORT 1585 .cfi_offset r30, -16 1586 std 30, -16(31) 1587 std 1, 0(8) 1588 mr 30, 8 1589# endif 1590 1591// Store gtid and tid to the stack because they're passed by reference to the microtask. 1592 stw 4, -20(31) 1593 stw 5, -24(31) 1594 1595 mr 12, 6 1596 mr 4, 7 1597 1598 cmpwi 0, 12, 1 1599 blt 0, .Lcall 1600 1601 ld 5, 0(4) 1602 1603 cmpwi 0, 12, 2 1604 blt 0, .Lcall 1605 1606 ld 6, 8(4) 1607 1608 cmpwi 0, 12, 3 1609 blt 0, .Lcall 1610 1611 ld 7, 16(4) 1612 1613 cmpwi 0, 12, 4 1614 blt 0, .Lcall 1615 1616 ld 8, 24(4) 1617 1618 cmpwi 0, 12, 5 1619 blt 0, .Lcall 1620 1621 ld 9, 32(4) 1622 1623 cmpwi 0, 12, 6 1624 blt 0, .Lcall 1625 1626 ld 10, 40(4) 1627 1628 cmpwi 0, 12, 7 1629 blt 0, .Lcall 1630 1631// There are more than 6 microtask parameters, so we need to store the 1632// remainder to the stack. 1633 addi 12, 12, -6 1634 mtctr 12 1635 1636// These are set to 8 bytes before the first desired store address (we're using 1637// pre-increment loads and stores in the loop below). The parameter save area 1638// for the microtask begins 48 + 8*8 == 112 bytes above r1 for ELFv1 and 1639// 32 + 8*8 == 96 bytes above r1 for ELFv2. 1640 addi 4, 4, 40 1641# if KMP_ARCH_PPC64_LE 1642 addi 12, 1, 88 1643# else 1644 addi 12, 1, 104 1645# endif 1646 1647.Lnext: 1648 ldu 0, 8(4) 1649 stdu 0, 8(12) 1650 bdnz .Lnext 1651 1652.Lcall: 1653# if KMP_ARCH_PPC64_LE 1654 std 2, 24(1) 1655 mr 12, 3 1656#else 1657 std 2, 40(1) 1658// For ELFv1, we need to load the actual function address from the function descriptor. 1659 ld 12, 0(3) 1660 ld 2, 8(3) 1661 ld 11, 16(3) 1662#endif 1663 1664 addi 3, 31, -20 1665 addi 4, 31, -24 1666 1667 mtctr 12 1668 bctrl 1669# if KMP_ARCH_PPC64_LE 1670 ld 2, 24(1) 1671# else 1672 ld 2, 40(1) 1673# endif 1674 1675# if OMPT_SUPPORT 1676 li 3, 0 1677 std 3, 0(30) 1678# endif 1679 1680 li 3, 1 1681 1682# if OMPT_SUPPORT 1683 ld 30, -16(31) 1684# endif 1685 1686 mr 1, 31 1687 ld 0, 16(1) 1688 ld 31, -8(1) 1689 mtlr 0 1690 blr 1691 1692 .long 0 1693 .quad 0 1694.Lfunc_end0: 1695 .size __kmp_invoke_microtask, .Lfunc_end0-.Lfunc_begin0 1696 .cfi_endproc 1697 1698// -- End __kmp_invoke_microtask 1699 1700#endif /* KMP_ARCH_PPC64 */ 1701 1702#if KMP_ARCH_ARM || KMP_ARCH_MIPS 1703 .data 1704 .comm .gomp_critical_user_,32,8 1705 .data 1706 .align 4 1707 .global __kmp_unnamed_critical_addr 1708__kmp_unnamed_critical_addr: 1709 .4byte .gomp_critical_user_ 1710 .size __kmp_unnamed_critical_addr,4 1711#endif /* KMP_ARCH_ARM */ 1712 1713#if KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 1714 .data 1715 .comm .gomp_critical_user_,32,8 1716 .data 1717 .align 8 1718 .global __kmp_unnamed_critical_addr 1719__kmp_unnamed_critical_addr: 1720 .8byte .gomp_critical_user_ 1721 .size __kmp_unnamed_critical_addr,8 1722#endif /* KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 */ 1723 1724#if KMP_OS_LINUX 1725# if KMP_ARCH_ARM 1726.section .note.GNU-stack,"",%progbits 1727# else 1728.section .note.GNU-stack,"",@progbits 1729# endif 1730#endif 1731