1 /***************************************************************************** 2 * system include files 3 ****************************************************************************/ 4 5 #include <assert.h> 6 7 #include <stdint.h> 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <string.h> 11 #if KMP_OS_UNIX 12 #include <dlfcn.h> 13 #endif 14 15 /***************************************************************************** 16 * ompt include files 17 ****************************************************************************/ 18 19 #include "ompt-specific.cpp" 20 21 /***************************************************************************** 22 * macros 23 ****************************************************************************/ 24 25 #define ompt_get_callback_success 1 26 #define ompt_get_callback_failure 0 27 28 #define no_tool_present 0 29 30 #define OMPT_API_ROUTINE static 31 32 #ifndef OMPT_STR_MATCH 33 #define OMPT_STR_MATCH(haystack, needle) (!strcasecmp(haystack, needle)) 34 #endif 35 36 /***************************************************************************** 37 * types 38 ****************************************************************************/ 39 40 typedef struct { 41 const char *state_name; 42 omp_state_t state_id; 43 } omp_state_info_t; 44 45 typedef struct { 46 const char *name; 47 ompt_mutex_impl_t id; 48 } ompt_mutex_impl_info_t; 49 50 enum tool_setting_e { 51 omp_tool_error, 52 omp_tool_unset, 53 omp_tool_disabled, 54 omp_tool_enabled 55 }; 56 57 /***************************************************************************** 58 * global variables 59 ****************************************************************************/ 60 61 ompt_callbacks_active_t ompt_enabled; 62 63 omp_state_info_t omp_state_info[] = { 64 #define omp_state_macro(state, code) {#state, state}, 65 FOREACH_OMP_STATE(omp_state_macro) 66 #undef omp_state_macro 67 }; 68 69 ompt_mutex_impl_info_t ompt_mutex_impl_info[] = { 70 #define ompt_mutex_impl_macro(name, id) {#name, name}, 71 FOREACH_OMPT_MUTEX_IMPL(ompt_mutex_impl_macro) 72 #undef ompt_mutex_impl_macro 73 }; 74 75 ompt_callbacks_internal_t ompt_callbacks; 76 77 static ompt_start_tool_result_t *ompt_start_tool_result = NULL; 78 79 /***************************************************************************** 80 * forward declarations 81 ****************************************************************************/ 82 83 static ompt_interface_fn_t ompt_fn_lookup(const char *s); 84 85 OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(void); 86 87 /***************************************************************************** 88 * initialization and finalization (private operations) 89 ****************************************************************************/ 90 91 typedef ompt_start_tool_result_t *(*ompt_start_tool_t)(unsigned int, 92 const char *); 93 94 #if KMP_OS_DARWIN 95 96 // While Darwin supports weak symbols, the library that wishes to provide a new 97 // implementation has to link against this runtime which defeats the purpose 98 // of having tools that are agnostic of the underlying runtime implementation. 99 // 100 // Fortunately, the linker includes all symbols of an executable in the global 101 // symbol table by default so dlsym() even finds static implementations of 102 // ompt_start_tool. For this to work on Linux, -Wl,--export-dynamic needs to be 103 // passed when building the application which we don't want to rely on. 104 105 static ompt_start_tool_result_t *ompt_tool_darwin(unsigned int omp_version, 106 const char *runtime_version) { 107 ompt_start_tool_result_t *ret = NULL; 108 // Search symbol in the current address space. 109 ompt_start_tool_t start_tool = 110 (ompt_start_tool_t)dlsym(RTLD_DEFAULT, "ompt_start_tool"); 111 if (start_tool) { 112 ret = start_tool(omp_version, runtime_version); 113 } 114 return ret; 115 } 116 117 #elif OMPT_HAVE_WEAK_ATTRIBUTE 118 119 // On Unix-like systems that support weak symbols the following implementation 120 // of ompt_start_tool() will be used in case no tool-supplied implementation of 121 // this function is present in the address space of a process. 122 123 _OMP_EXTERN OMPT_WEAK_ATTRIBUTE ompt_start_tool_result_t * 124 ompt_start_tool(unsigned int omp_version, const char *runtime_version) { 125 ompt_start_tool_result_t *ret = NULL; 126 // Search next symbol in the current address space. This can happen if the 127 // runtime library is linked before the tool. Since glibc 2.2 strong symbols 128 // don't override weak symbols that have been found before unless the user 129 // sets the environment variable LD_DYNAMIC_WEAK. 130 ompt_start_tool_t next_tool = 131 (ompt_start_tool_t)dlsym(RTLD_NEXT, "ompt_start_tool"); 132 if (next_tool) { 133 ret = next_tool(omp_version, runtime_version); 134 } 135 return ret; 136 } 137 138 #elif OMPT_HAVE_PSAPI 139 140 // On Windows, the ompt_tool_windows function is used to find the 141 // ompt_start_tool symbol across all modules loaded by a process. If 142 // ompt_start_tool is found, ompt_start_tool's return value is used to 143 // initialize the tool. Otherwise, NULL is returned and OMPT won't be enabled. 144 145 #include <psapi.h> 146 #pragma comment(lib, "psapi.lib") 147 148 // The number of loaded modules to start enumeration with EnumProcessModules() 149 #define NUM_MODULES 128 150 151 static ompt_start_tool_result_t * 152 ompt_tool_windows(unsigned int omp_version, const char *runtime_version) { 153 int i; 154 DWORD needed, new_size; 155 HMODULE *modules; 156 HANDLE process = GetCurrentProcess(); 157 modules = (HMODULE *)malloc(NUM_MODULES * sizeof(HMODULE)); 158 ompt_start_tool_t ompt_tool_p = NULL; 159 160 #if OMPT_DEBUG 161 printf("ompt_tool_windows(): looking for ompt_start_tool\n"); 162 #endif 163 if (!EnumProcessModules(process, modules, NUM_MODULES * sizeof(HMODULE), 164 &needed)) { 165 // Regardless of the error reason use the stub initialization function 166 free(modules); 167 return NULL; 168 } 169 // Check if NUM_MODULES is enough to list all modules 170 new_size = needed / sizeof(HMODULE); 171 if (new_size > NUM_MODULES) { 172 #if OMPT_DEBUG 173 printf("ompt_tool_windows(): resize buffer to %d bytes\n", needed); 174 #endif 175 modules = (HMODULE *)realloc(modules, needed); 176 // If resizing failed use the stub function. 177 if (!EnumProcessModules(process, modules, needed, &needed)) { 178 free(modules); 179 return NULL; 180 } 181 } 182 for (i = 0; i < new_size; ++i) { 183 (FARPROC &)ompt_tool_p = GetProcAddress(modules[i], "ompt_start_tool"); 184 if (ompt_tool_p) { 185 #if OMPT_DEBUG 186 TCHAR modName[MAX_PATH]; 187 if (GetModuleFileName(modules[i], modName, MAX_PATH)) 188 printf("ompt_tool_windows(): ompt_start_tool found in module %s\n", 189 modName); 190 #endif 191 free(modules); 192 return (*ompt_tool_p)(omp_version, runtime_version); 193 } 194 #if OMPT_DEBUG 195 else { 196 TCHAR modName[MAX_PATH]; 197 if (GetModuleFileName(modules[i], modName, MAX_PATH)) 198 printf("ompt_tool_windows(): ompt_start_tool not found in module %s\n", 199 modName); 200 } 201 #endif 202 } 203 free(modules); 204 return NULL; 205 } 206 #else 207 #error Activation of OMPT is not supported on this platform. 208 #endif 209 210 static ompt_start_tool_result_t * 211 ompt_try_start_tool(unsigned int omp_version, const char *runtime_version) { 212 ompt_start_tool_result_t *ret = NULL; 213 ompt_start_tool_t start_tool = NULL; 214 #if KMP_OS_WINDOWS 215 // Cannot use colon to describe a list of absolute paths on Windows 216 const char *sep = ";"; 217 #else 218 const char *sep = ":"; 219 #endif 220 221 // Try in the current address space 222 #if KMP_OS_DARWIN 223 ret = ompt_tool_darwin(omp_version, runtime_version); 224 #elif OMPT_HAVE_WEAK_ATTRIBUTE 225 ret = ompt_start_tool(omp_version, runtime_version); 226 #elif OMPT_HAVE_PSAPI 227 ret = ompt_tool_windows(omp_version, runtime_version); 228 #else 229 #error Activation of OMPT is not supported on this platform. 230 #endif 231 if (ret) 232 return ret; 233 234 // Try tool-libraries-var ICV 235 const char *tool_libs = getenv("OMP_TOOL_LIBRARIES"); 236 if (tool_libs) { 237 char *libs = __kmp_str_format("%s", tool_libs); 238 char *buf; 239 char *fname = __kmp_str_token(libs, sep, &buf); 240 while (fname) { 241 #if KMP_OS_UNIX 242 void *h = dlopen(fname, RTLD_LAZY); 243 if (h) { 244 start_tool = (ompt_start_tool_t)dlsym(h, "ompt_start_tool"); 245 #elif KMP_OS_WINDOWS 246 HMODULE h = LoadLibrary(fname); 247 if (h) { 248 start_tool = (ompt_start_tool_t)GetProcAddress(h, "ompt_start_tool"); 249 #else 250 #error Activation of OMPT is not supported on this platform. 251 #endif 252 if (start_tool && (ret = (*start_tool)(omp_version, runtime_version))) 253 break; 254 } 255 fname = __kmp_str_token(NULL, sep, &buf); 256 } 257 __kmp_str_free(&libs); 258 } 259 return ret; 260 } 261 262 void ompt_pre_init() { 263 //-------------------------------------------------- 264 // Execute the pre-initialization logic only once. 265 //-------------------------------------------------- 266 static int ompt_pre_initialized = 0; 267 268 if (ompt_pre_initialized) 269 return; 270 271 ompt_pre_initialized = 1; 272 273 //-------------------------------------------------- 274 // Use a tool iff a tool is enabled and available. 275 //-------------------------------------------------- 276 const char *ompt_env_var = getenv("OMP_TOOL"); 277 tool_setting_e tool_setting = omp_tool_error; 278 279 if (!ompt_env_var || !strcmp(ompt_env_var, "")) 280 tool_setting = omp_tool_unset; 281 else if (OMPT_STR_MATCH(ompt_env_var, "disabled")) 282 tool_setting = omp_tool_disabled; 283 else if (OMPT_STR_MATCH(ompt_env_var, "enabled")) 284 tool_setting = omp_tool_enabled; 285 286 #if OMPT_DEBUG 287 printf("ompt_pre_init(): tool_setting = %d\n", tool_setting); 288 #endif 289 switch (tool_setting) { 290 case omp_tool_disabled: 291 break; 292 293 case omp_tool_unset: 294 case omp_tool_enabled: 295 296 //-------------------------------------------------- 297 // Load tool iff specified in environment variable 298 //-------------------------------------------------- 299 ompt_start_tool_result = 300 ompt_try_start_tool(__kmp_openmp_version, ompt_get_runtime_version()); 301 302 memset(&ompt_enabled, 0, sizeof(ompt_enabled)); 303 break; 304 305 case omp_tool_error: 306 fprintf(stderr, "Warning: OMP_TOOL has invalid value \"%s\".\n" 307 " legal values are (NULL,\"\",\"disabled\"," 308 "\"enabled\").\n", 309 ompt_env_var); 310 break; 311 } 312 #if OMPT_DEBUG 313 printf("ompt_pre_init(): ompt_enabled = %d\n", ompt_enabled); 314 #endif 315 } 316 317 void ompt_post_init() { 318 //-------------------------------------------------- 319 // Execute the post-initialization logic only once. 320 //-------------------------------------------------- 321 static int ompt_post_initialized = 0; 322 323 if (ompt_post_initialized) 324 return; 325 326 ompt_post_initialized = 1; 327 328 //-------------------------------------------------- 329 // Initialize the tool if so indicated. 330 //-------------------------------------------------- 331 if (ompt_start_tool_result) { 332 ompt_enabled.enabled = !!ompt_start_tool_result->initialize( 333 ompt_fn_lookup, &(ompt_start_tool_result->tool_data)); 334 335 ompt_thread_t *root_thread = ompt_get_thread(); 336 337 ompt_set_thread_state(root_thread, omp_state_overhead); 338 339 if (ompt_enabled.ompt_callback_thread_begin) { 340 ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( 341 ompt_thread_initial, __ompt_get_thread_data_internal()); 342 } 343 ompt_data_t *task_data; 344 __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL); 345 if (ompt_enabled.ompt_callback_task_create) { 346 ompt_callbacks.ompt_callback(ompt_callback_task_create)( 347 NULL, NULL, task_data, ompt_task_initial, 0, NULL); 348 } 349 350 ompt_set_thread_state(root_thread, omp_state_work_serial); 351 } 352 } 353 354 void ompt_fini() { 355 if (ompt_enabled.enabled) { 356 ompt_start_tool_result->finalize(&(ompt_start_tool_result->tool_data)); 357 } 358 359 memset(&ompt_enabled, 0, sizeof(ompt_enabled)); 360 } 361 362 /***************************************************************************** 363 * interface operations 364 ****************************************************************************/ 365 366 /***************************************************************************** 367 * state 368 ****************************************************************************/ 369 370 OMPT_API_ROUTINE int ompt_enumerate_states(int current_state, int *next_state, 371 const char **next_state_name) { 372 const static int len = sizeof(omp_state_info) / sizeof(omp_state_info_t); 373 int i = 0; 374 375 for (i = 0; i < len - 1; i++) { 376 if (omp_state_info[i].state_id == current_state) { 377 *next_state = omp_state_info[i + 1].state_id; 378 *next_state_name = omp_state_info[i + 1].state_name; 379 return 1; 380 } 381 } 382 383 return 0; 384 } 385 386 OMPT_API_ROUTINE int ompt_enumerate_mutex_impls(int current_impl, 387 int *next_impl, 388 const char **next_impl_name) { 389 const static int len = 390 sizeof(ompt_mutex_impl_info) / sizeof(ompt_mutex_impl_info_t); 391 int i = 0; 392 for (i = 0; i < len - 1; i++) { 393 if (ompt_mutex_impl_info[i].id != current_impl) 394 continue; 395 *next_impl = ompt_mutex_impl_info[i + 1].id; 396 *next_impl_name = ompt_mutex_impl_info[i + 1].name; 397 return 1; 398 } 399 return 0; 400 } 401 402 /***************************************************************************** 403 * callbacks 404 ****************************************************************************/ 405 406 OMPT_API_ROUTINE int ompt_set_callback(ompt_callbacks_t which, 407 ompt_callback_t callback) { 408 switch (which) { 409 410 #define ompt_event_macro(event_name, callback_type, event_id) \ 411 case event_name: \ 412 if (ompt_event_implementation_status(event_name)) { \ 413 ompt_callbacks.ompt_callback(event_name) = (callback_type)callback; \ 414 ompt_enabled.event_name = 1; \ 415 } \ 416 return ompt_event_implementation_status(event_name); 417 418 FOREACH_OMPT_EVENT(ompt_event_macro) 419 420 #undef ompt_event_macro 421 422 default: 423 return ompt_set_error; 424 } 425 } 426 427 OMPT_API_ROUTINE int ompt_get_callback(ompt_callbacks_t which, 428 ompt_callback_t *callback) { 429 switch (which) { 430 431 #define ompt_event_macro(event_name, callback_type, event_id) \ 432 case event_name: \ 433 if (ompt_event_implementation_status(event_name)) { \ 434 ompt_callback_t mycb = \ 435 (ompt_callback_t)ompt_callbacks.ompt_callback(event_name); \ 436 if (mycb) { \ 437 *callback = mycb; \ 438 return ompt_get_callback_success; \ 439 } \ 440 } \ 441 return ompt_get_callback_failure; 442 443 FOREACH_OMPT_EVENT(ompt_event_macro) 444 445 #undef ompt_event_macro 446 447 default: 448 return ompt_get_callback_failure; 449 } 450 } 451 452 /***************************************************************************** 453 * parallel regions 454 ****************************************************************************/ 455 456 OMPT_API_ROUTINE int ompt_get_parallel_info(int ancestor_level, 457 ompt_data_t **parallel_data, 458 int *team_size) { 459 return __ompt_get_parallel_info_internal(ancestor_level, parallel_data, 460 team_size); 461 } 462 463 OMPT_API_ROUTINE omp_state_t ompt_get_state(ompt_wait_id_t *wait_id) { 464 omp_state_t thread_state = __ompt_get_state_internal(wait_id); 465 466 if (thread_state == omp_state_undefined) { 467 thread_state = omp_state_work_serial; 468 } 469 470 return thread_state; 471 } 472 473 /***************************************************************************** 474 * tasks 475 ****************************************************************************/ 476 477 OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(void) { 478 return __ompt_get_thread_data_internal(); 479 } 480 481 OMPT_API_ROUTINE int ompt_get_task_info(int ancestor_level, int *type, 482 ompt_data_t **task_data, 483 ompt_frame_t **task_frame, 484 ompt_data_t **parallel_data, 485 int *thread_num) { 486 return __ompt_get_task_info_internal(ancestor_level, type, task_data, 487 task_frame, parallel_data, thread_num); 488 } 489 490 /***************************************************************************** 491 * places 492 ****************************************************************************/ 493 494 OMPT_API_ROUTINE int ompt_get_num_places(void) { 495 // copied from kmp_ftn_entry.h (but modified) 496 #if !KMP_AFFINITY_SUPPORTED 497 return 0; 498 #else 499 if (!KMP_AFFINITY_CAPABLE()) 500 return 0; 501 return __kmp_affinity_num_masks; 502 #endif 503 } 504 505 OMPT_API_ROUTINE int ompt_get_place_proc_ids(int place_num, int ids_size, 506 int *ids) { 507 // copied from kmp_ftn_entry.h (but modified) 508 #if !KMP_AFFINITY_SUPPORTED 509 return 0; 510 #else 511 int i, count; 512 int tmp_ids[ids_size]; 513 if (!KMP_AFFINITY_CAPABLE()) 514 return 0; 515 if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks) 516 return 0; 517 /* TODO: Is this safe for asynchronous call from signal handler during runtime 518 * shutdown? */ 519 kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num); 520 count = 0; 521 KMP_CPU_SET_ITERATE(i, mask) { 522 if ((!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) || 523 (!KMP_CPU_ISSET(i, mask))) { 524 continue; 525 } 526 if (count < ids_size) 527 tmp_ids[count] = i; 528 count++; 529 } 530 if (ids_size >= count) { 531 for (i = 0; i < count; i++) { 532 ids[i] = tmp_ids[i]; 533 } 534 } 535 return count; 536 #endif 537 } 538 539 OMPT_API_ROUTINE int ompt_get_place_num(void) { 540 // copied from kmp_ftn_entry.h (but modified) 541 #if !KMP_AFFINITY_SUPPORTED 542 return -1; 543 #else 544 int gtid; 545 kmp_info_t *thread; 546 if (!KMP_AFFINITY_CAPABLE()) 547 return -1; 548 gtid = __kmp_entry_gtid(); 549 thread = __kmp_thread_from_gtid(gtid); 550 if (thread == NULL || thread->th.th_current_place < 0) 551 return -1; 552 return thread->th.th_current_place; 553 #endif 554 } 555 556 OMPT_API_ROUTINE int ompt_get_partition_place_nums(int place_nums_size, 557 int *place_nums) { 558 // copied from kmp_ftn_entry.h (but modified) 559 #if !KMP_AFFINITY_SUPPORTED 560 return 0; 561 #else 562 int i, gtid, place_num, first_place, last_place, start, end; 563 kmp_info_t *thread; 564 if (!KMP_AFFINITY_CAPABLE()) 565 return 0; 566 gtid = __kmp_entry_gtid(); 567 thread = __kmp_thread_from_gtid(gtid); 568 if (thread == NULL) 569 return 0; 570 first_place = thread->th.th_first_place; 571 last_place = thread->th.th_last_place; 572 if (first_place < 0 || last_place < 0) 573 return 0; 574 if (first_place <= last_place) { 575 start = first_place; 576 end = last_place; 577 } else { 578 start = last_place; 579 end = first_place; 580 } 581 if (end - start <= place_nums_size) 582 for (i = 0, place_num = start; place_num <= end; ++place_num, ++i) { 583 place_nums[i] = place_num; 584 } 585 return end - start; 586 #endif 587 } 588 589 /***************************************************************************** 590 * places 591 ****************************************************************************/ 592 593 OMPT_API_ROUTINE int ompt_get_proc_id(void) { 594 #if KMP_OS_LINUX 595 return sched_getcpu(); 596 #else 597 return -1; 598 #endif 599 } 600 601 /***************************************************************************** 602 * compatability 603 ****************************************************************************/ 604 605 OMPT_API_ROUTINE int ompt_get_ompt_version() { return OMPT_VERSION; } 606 607 /***************************************************************************** 608 * application-facing API 609 ****************************************************************************/ 610 611 /*---------------------------------------------------------------------------- 612 | control 613 ---------------------------------------------------------------------------*/ 614 615 int __kmp_control_tool(uint64_t command, uint64_t modifier, void *arg) { 616 617 if (ompt_enabled.enabled) { 618 if (ompt_enabled.ompt_callback_control_tool) { 619 return ompt_callbacks.ompt_callback(ompt_callback_control_tool)( 620 command, modifier, arg, OMPT_LOAD_RETURN_ADDRESS(__kmp_entry_gtid())); 621 } else { 622 return -1; 623 } 624 } else { 625 return -2; 626 } 627 } 628 629 /***************************************************************************** 630 * misc 631 ****************************************************************************/ 632 633 OMPT_API_ROUTINE uint64_t ompt_get_unique_id(void) { 634 return __ompt_get_unique_id_internal(); 635 } 636 637 /***************************************************************************** 638 * Target 639 ****************************************************************************/ 640 641 OMPT_API_ROUTINE int ompt_get_target_info(uint64_t *device_num, 642 ompt_id_t *target_id, 643 ompt_id_t *host_op_id) { 644 return 0; // thread is not in a target region 645 } 646 647 OMPT_API_ROUTINE int ompt_get_num_devices(void) { 648 return 1; // only one device (the current device) is available 649 } 650 651 /***************************************************************************** 652 * API inquiry for tool 653 ****************************************************************************/ 654 655 static ompt_interface_fn_t ompt_fn_lookup(const char *s) { 656 657 #define ompt_interface_fn(fn) \ 658 fn##_t fn##_f = fn; \ 659 if (strcmp(s, #fn) == 0) \ 660 return (ompt_interface_fn_t)fn##_f; 661 662 FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn) 663 664 return (ompt_interface_fn_t)0; 665 } 666