1 /*****************************************************************************
2  * system include files
3  ****************************************************************************/
4 
5 #include <assert.h>
6 
7 #include <stdint.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #if KMP_OS_UNIX
12 #include <dlfcn.h>
13 #endif
14 
15 /*****************************************************************************
16  * ompt include files
17  ****************************************************************************/
18 
19 #include "ompt-specific.cpp"
20 
21 /*****************************************************************************
22  * macros
23  ****************************************************************************/
24 
25 #define ompt_get_callback_success 1
26 #define ompt_get_callback_failure 0
27 
28 #define no_tool_present 0
29 
30 #define OMPT_API_ROUTINE static
31 
32 #ifndef OMPT_STR_MATCH
33 #define OMPT_STR_MATCH(haystack, needle) (!strcasecmp(haystack, needle))
34 #endif
35 
36 /*****************************************************************************
37  * types
38  ****************************************************************************/
39 
40 typedef struct {
41   const char *state_name;
42   omp_state_t state_id;
43 } omp_state_info_t;
44 
45 typedef struct {
46   const char *name;
47   kmp_mutex_impl_t id;
48 } kmp_mutex_impl_info_t;
49 
50 enum tool_setting_e {
51   omp_tool_error,
52   omp_tool_unset,
53   omp_tool_disabled,
54   omp_tool_enabled
55 };
56 
57 /*****************************************************************************
58  * global variables
59  ****************************************************************************/
60 
61 ompt_callbacks_active_t ompt_enabled;
62 
63 omp_state_info_t omp_state_info[] = {
64 #define omp_state_macro(state, code) {#state, state},
65     FOREACH_OMP_STATE(omp_state_macro)
66 #undef omp_state_macro
67 };
68 
69 kmp_mutex_impl_info_t kmp_mutex_impl_info[] = {
70 #define kmp_mutex_impl_macro(name, id) {#name, name},
71     FOREACH_KMP_MUTEX_IMPL(kmp_mutex_impl_macro)
72 #undef kmp_mutex_impl_macro
73 };
74 
75 ompt_callbacks_internal_t ompt_callbacks;
76 
77 static ompt_start_tool_result_t *ompt_start_tool_result = NULL;
78 
79 /*****************************************************************************
80  * forward declarations
81  ****************************************************************************/
82 
83 static ompt_interface_fn_t ompt_fn_lookup(const char *s);
84 
85 OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(void);
86 
87 /*****************************************************************************
88  * initialization and finalization (private operations)
89  ****************************************************************************/
90 
91 typedef ompt_start_tool_result_t *(*ompt_start_tool_t)(unsigned int,
92                                                        const char *);
93 
94 #if KMP_OS_DARWIN
95 
96 // While Darwin supports weak symbols, the library that wishes to provide a new
97 // implementation has to link against this runtime which defeats the purpose
98 // of having tools that are agnostic of the underlying runtime implementation.
99 //
100 // Fortunately, the linker includes all symbols of an executable in the global
101 // symbol table by default so dlsym() even finds static implementations of
102 // ompt_start_tool. For this to work on Linux, -Wl,--export-dynamic needs to be
103 // passed when building the application which we don't want to rely on.
104 
105 static ompt_start_tool_result_t *ompt_tool_darwin(unsigned int omp_version,
106                                                   const char *runtime_version) {
107   ompt_start_tool_result_t *ret = NULL;
108   // Search symbol in the current address space.
109   ompt_start_tool_t start_tool =
110       (ompt_start_tool_t)dlsym(RTLD_DEFAULT, "ompt_start_tool");
111   if (start_tool) {
112     ret = start_tool(omp_version, runtime_version);
113   }
114   return ret;
115 }
116 
117 #elif OMPT_HAVE_WEAK_ATTRIBUTE
118 
119 // On Unix-like systems that support weak symbols the following implementation
120 // of ompt_start_tool() will be used in case no tool-supplied implementation of
121 // this function is present in the address space of a process.
122 
123 _OMP_EXTERN OMPT_WEAK_ATTRIBUTE ompt_start_tool_result_t *
124 ompt_start_tool(unsigned int omp_version, const char *runtime_version) {
125   ompt_start_tool_result_t *ret = NULL;
126   // Search next symbol in the current address space. This can happen if the
127   // runtime library is linked before the tool. Since glibc 2.2 strong symbols
128   // don't override weak symbols that have been found before unless the user
129   // sets the environment variable LD_DYNAMIC_WEAK.
130   ompt_start_tool_t next_tool =
131       (ompt_start_tool_t)dlsym(RTLD_NEXT, "ompt_start_tool");
132   if (next_tool) {
133     ret = next_tool(omp_version, runtime_version);
134   }
135   return ret;
136 }
137 
138 #elif OMPT_HAVE_PSAPI
139 
140 // On Windows, the ompt_tool_windows function is used to find the
141 // ompt_start_tool symbol across all modules loaded by a process. If
142 // ompt_start_tool is found, ompt_start_tool's return value is used to
143 // initialize the tool. Otherwise, NULL is returned and OMPT won't be enabled.
144 
145 #include <psapi.h>
146 #pragma comment(lib, "psapi.lib")
147 
148 // The number of loaded modules to start enumeration with EnumProcessModules()
149 #define NUM_MODULES 128
150 
151 static ompt_start_tool_result_t *
152 ompt_tool_windows(unsigned int omp_version, const char *runtime_version) {
153   int i;
154   DWORD needed, new_size;
155   HMODULE *modules;
156   HANDLE process = GetCurrentProcess();
157   modules = (HMODULE *)malloc(NUM_MODULES * sizeof(HMODULE));
158   ompt_start_tool_t ompt_tool_p = NULL;
159 
160 #if OMPT_DEBUG
161   printf("ompt_tool_windows(): looking for ompt_start_tool\n");
162 #endif
163   if (!EnumProcessModules(process, modules, NUM_MODULES * sizeof(HMODULE),
164                           &needed)) {
165     // Regardless of the error reason use the stub initialization function
166     free(modules);
167     return NULL;
168   }
169   // Check if NUM_MODULES is enough to list all modules
170   new_size = needed / sizeof(HMODULE);
171   if (new_size > NUM_MODULES) {
172 #if OMPT_DEBUG
173     printf("ompt_tool_windows(): resize buffer to %d bytes\n", needed);
174 #endif
175     modules = (HMODULE *)realloc(modules, needed);
176     // If resizing failed use the stub function.
177     if (!EnumProcessModules(process, modules, needed, &needed)) {
178       free(modules);
179       return NULL;
180     }
181   }
182   for (i = 0; i < new_size; ++i) {
183     (FARPROC &)ompt_tool_p = GetProcAddress(modules[i], "ompt_start_tool");
184     if (ompt_tool_p) {
185 #if OMPT_DEBUG
186       TCHAR modName[MAX_PATH];
187       if (GetModuleFileName(modules[i], modName, MAX_PATH))
188         printf("ompt_tool_windows(): ompt_start_tool found in module %s\n",
189                modName);
190 #endif
191       free(modules);
192       return (*ompt_tool_p)(omp_version, runtime_version);
193     }
194 #if OMPT_DEBUG
195     else {
196       TCHAR modName[MAX_PATH];
197       if (GetModuleFileName(modules[i], modName, MAX_PATH))
198         printf("ompt_tool_windows(): ompt_start_tool not found in module %s\n",
199                modName);
200     }
201 #endif
202   }
203   free(modules);
204   return NULL;
205 }
206 #else
207 #error Activation of OMPT is not supported on this platform.
208 #endif
209 
210 static ompt_start_tool_result_t *
211 ompt_try_start_tool(unsigned int omp_version, const char *runtime_version) {
212   ompt_start_tool_result_t *ret = NULL;
213   ompt_start_tool_t start_tool = NULL;
214 #if KMP_OS_WINDOWS
215   // Cannot use colon to describe a list of absolute paths on Windows
216   const char *sep = ";";
217 #else
218   const char *sep = ":";
219 #endif
220 
221   // Try in the current address space
222 #if KMP_OS_DARWIN
223   ret = ompt_tool_darwin(omp_version, runtime_version);
224 #elif OMPT_HAVE_WEAK_ATTRIBUTE
225   ret = ompt_start_tool(omp_version, runtime_version);
226 #elif OMPT_HAVE_PSAPI
227   ret = ompt_tool_windows(omp_version, runtime_version);
228 #else
229 #error Activation of OMPT is not supported on this platform.
230 #endif
231   if (ret)
232     return ret;
233 
234   // Try tool-libraries-var ICV
235   const char *tool_libs = getenv("OMP_TOOL_LIBRARIES");
236   if (tool_libs) {
237     char *libs = __kmp_str_format("%s", tool_libs);
238     char *buf;
239     char *fname = __kmp_str_token(libs, sep, &buf);
240     while (fname) {
241 #if KMP_OS_UNIX
242       void *h = dlopen(fname, RTLD_LAZY);
243       if (h) {
244         start_tool = (ompt_start_tool_t)dlsym(h, "ompt_start_tool");
245 #elif KMP_OS_WINDOWS
246       HMODULE h = LoadLibrary(fname);
247       if (h) {
248         start_tool = (ompt_start_tool_t)GetProcAddress(h, "ompt_start_tool");
249 #else
250 #error Activation of OMPT is not supported on this platform.
251 #endif
252         if (start_tool && (ret = (*start_tool)(omp_version, runtime_version)))
253           break;
254       }
255       fname = __kmp_str_token(NULL, sep, &buf);
256     }
257     __kmp_str_free(&libs);
258   }
259   return ret;
260 }
261 
262 void ompt_pre_init() {
263   //--------------------------------------------------
264   // Execute the pre-initialization logic only once.
265   //--------------------------------------------------
266   static int ompt_pre_initialized = 0;
267 
268   if (ompt_pre_initialized)
269     return;
270 
271   ompt_pre_initialized = 1;
272 
273   //--------------------------------------------------
274   // Use a tool iff a tool is enabled and available.
275   //--------------------------------------------------
276   const char *ompt_env_var = getenv("OMP_TOOL");
277   tool_setting_e tool_setting = omp_tool_error;
278 
279   if (!ompt_env_var || !strcmp(ompt_env_var, ""))
280     tool_setting = omp_tool_unset;
281   else if (OMPT_STR_MATCH(ompt_env_var, "disabled"))
282     tool_setting = omp_tool_disabled;
283   else if (OMPT_STR_MATCH(ompt_env_var, "enabled"))
284     tool_setting = omp_tool_enabled;
285 
286 #if OMPT_DEBUG
287   printf("ompt_pre_init(): tool_setting = %d\n", tool_setting);
288 #endif
289   switch (tool_setting) {
290   case omp_tool_disabled:
291     break;
292 
293   case omp_tool_unset:
294   case omp_tool_enabled:
295 
296     //--------------------------------------------------
297     // Load tool iff specified in environment variable
298     //--------------------------------------------------
299     ompt_start_tool_result =
300         ompt_try_start_tool(__kmp_openmp_version, ompt_get_runtime_version());
301 
302     memset(&ompt_enabled, 0, sizeof(ompt_enabled));
303     break;
304 
305   case omp_tool_error:
306     fprintf(stderr, "Warning: OMP_TOOL has invalid value \"%s\".\n"
307                     "  legal values are (NULL,\"\",\"disabled\","
308                     "\"enabled\").\n",
309             ompt_env_var);
310     break;
311   }
312 #if OMPT_DEBUG
313   printf("ompt_pre_init(): ompt_enabled = %d\n", ompt_enabled);
314 #endif
315 }
316 
317 void ompt_post_init() {
318   //--------------------------------------------------
319   // Execute the post-initialization logic only once.
320   //--------------------------------------------------
321   static int ompt_post_initialized = 0;
322 
323   if (ompt_post_initialized)
324     return;
325 
326   ompt_post_initialized = 1;
327 
328   //--------------------------------------------------
329   // Initialize the tool if so indicated.
330   //--------------------------------------------------
331   if (ompt_start_tool_result) {
332     ompt_enabled.enabled = !!ompt_start_tool_result->initialize(
333         ompt_fn_lookup, &(ompt_start_tool_result->tool_data));
334 
335     ompt_thread_t *root_thread = ompt_get_thread();
336 
337     ompt_set_thread_state(root_thread, omp_state_overhead);
338 
339     if (ompt_enabled.ompt_callback_thread_begin) {
340       ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
341           ompt_thread_initial, __ompt_get_thread_data_internal());
342     }
343     ompt_data_t *task_data;
344     __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
345     if (ompt_enabled.ompt_callback_task_create) {
346       ompt_callbacks.ompt_callback(ompt_callback_task_create)(
347           NULL, NULL, task_data, ompt_task_initial, 0, NULL);
348     }
349 
350     ompt_set_thread_state(root_thread, omp_state_work_serial);
351   }
352 }
353 
354 void ompt_fini() {
355   if (ompt_enabled.enabled) {
356     ompt_start_tool_result->finalize(&(ompt_start_tool_result->tool_data));
357   }
358 
359   memset(&ompt_enabled, 0, sizeof(ompt_enabled));
360 }
361 
362 /*****************************************************************************
363  * interface operations
364  ****************************************************************************/
365 
366 /*****************************************************************************
367  * state
368  ****************************************************************************/
369 
370 OMPT_API_ROUTINE int ompt_enumerate_states(int current_state, int *next_state,
371                                            const char **next_state_name) {
372   const static int len = sizeof(omp_state_info) / sizeof(omp_state_info_t);
373   int i = 0;
374 
375   for (i = 0; i < len - 1; i++) {
376     if (omp_state_info[i].state_id == current_state) {
377       *next_state = omp_state_info[i + 1].state_id;
378       *next_state_name = omp_state_info[i + 1].state_name;
379       return 1;
380     }
381   }
382 
383   return 0;
384 }
385 
386 OMPT_API_ROUTINE int ompt_enumerate_mutex_impls(int current_impl,
387                                                 int *next_impl,
388                                                 const char **next_impl_name) {
389   const static int len =
390       sizeof(kmp_mutex_impl_info) / sizeof(kmp_mutex_impl_info_t);
391   int i = 0;
392   for (i = 0; i < len - 1; i++) {
393     if (kmp_mutex_impl_info[i].id != current_impl)
394       continue;
395     *next_impl = kmp_mutex_impl_info[i + 1].id;
396     *next_impl_name = kmp_mutex_impl_info[i + 1].name;
397     return 1;
398   }
399   return 0;
400 }
401 
402 /*****************************************************************************
403  * callbacks
404  ****************************************************************************/
405 
406 OMPT_API_ROUTINE int ompt_set_callback(ompt_callbacks_t which,
407                                        ompt_callback_t callback) {
408   switch (which) {
409 
410 #define ompt_event_macro(event_name, callback_type, event_id)                  \
411   case event_name:                                                             \
412     if (ompt_event_implementation_status(event_name)) {                        \
413       ompt_callbacks.ompt_callback(event_name) = (callback_type)callback;      \
414       ompt_enabled.event_name = (callback != 0);                               \
415     }                                                                          \
416     if (callback)                                                              \
417       return ompt_event_implementation_status(event_name);                     \
418     else                                                                       \
419       return ompt_set_always;
420 
421     FOREACH_OMPT_EVENT(ompt_event_macro)
422 
423 #undef ompt_event_macro
424 
425   default:
426     return ompt_set_error;
427   }
428 }
429 
430 OMPT_API_ROUTINE int ompt_get_callback(ompt_callbacks_t which,
431                                        ompt_callback_t *callback) {
432   switch (which) {
433 
434 #define ompt_event_macro(event_name, callback_type, event_id)                  \
435   case event_name:                                                             \
436     if (ompt_event_implementation_status(event_name)) {                        \
437       ompt_callback_t mycb =                                                   \
438           (ompt_callback_t)ompt_callbacks.ompt_callback(event_name);           \
439       if (mycb) {                                                              \
440         *callback = mycb;                                                      \
441         return ompt_get_callback_success;                                      \
442       }                                                                        \
443     }                                                                          \
444     return ompt_get_callback_failure;
445 
446     FOREACH_OMPT_EVENT(ompt_event_macro)
447 
448 #undef ompt_event_macro
449 
450   default:
451     return ompt_get_callback_failure;
452   }
453 }
454 
455 /*****************************************************************************
456  * parallel regions
457  ****************************************************************************/
458 
459 OMPT_API_ROUTINE int ompt_get_parallel_info(int ancestor_level,
460                                             ompt_data_t **parallel_data,
461                                             int *team_size) {
462   return __ompt_get_parallel_info_internal(ancestor_level, parallel_data,
463                                            team_size);
464 }
465 
466 OMPT_API_ROUTINE omp_state_t ompt_get_state(ompt_wait_id_t *wait_id) {
467   omp_state_t thread_state = __ompt_get_state_internal(wait_id);
468 
469   if (thread_state == omp_state_undefined) {
470     thread_state = omp_state_work_serial;
471   }
472 
473   return thread_state;
474 }
475 
476 /*****************************************************************************
477  * tasks
478  ****************************************************************************/
479 
480 OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(void) {
481   return __ompt_get_thread_data_internal();
482 }
483 
484 OMPT_API_ROUTINE int ompt_get_task_info(int ancestor_level, int *type,
485                                         ompt_data_t **task_data,
486                                         ompt_frame_t **task_frame,
487                                         ompt_data_t **parallel_data,
488                                         int *thread_num) {
489   return __ompt_get_task_info_internal(ancestor_level, type, task_data,
490                                        task_frame, parallel_data, thread_num);
491 }
492 
493 /*****************************************************************************
494  * num_procs
495  ****************************************************************************/
496 
497 OMPT_API_ROUTINE int ompt_get_num_procs(void) {
498 // copied from kmp_ftn_entry.h (but modified: OMPT can only be called when runtime is initialized)
499   return __kmp_avail_proc;
500 }
501 
502 /*****************************************************************************
503  * places
504  ****************************************************************************/
505 
506 OMPT_API_ROUTINE int ompt_get_num_places(void) {
507 // copied from kmp_ftn_entry.h (but modified)
508 #if !KMP_AFFINITY_SUPPORTED
509   return 0;
510 #else
511   if (!KMP_AFFINITY_CAPABLE())
512     return 0;
513   return __kmp_affinity_num_masks;
514 #endif
515 }
516 
517 OMPT_API_ROUTINE int ompt_get_place_proc_ids(int place_num, int ids_size,
518                                              int *ids) {
519 // copied from kmp_ftn_entry.h (but modified)
520 #if !KMP_AFFINITY_SUPPORTED
521   return 0;
522 #else
523   int i, count;
524   int tmp_ids[ids_size];
525   if (!KMP_AFFINITY_CAPABLE())
526     return 0;
527   if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks)
528     return 0;
529   /* TODO: Is this safe for asynchronous call from signal handler during runtime
530    * shutdown? */
531   kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num);
532   count = 0;
533   KMP_CPU_SET_ITERATE(i, mask) {
534     if ((!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) ||
535         (!KMP_CPU_ISSET(i, mask))) {
536       continue;
537     }
538     if (count < ids_size)
539       tmp_ids[count] = i;
540     count++;
541   }
542   if (ids_size >= count) {
543     for (i = 0; i < count; i++) {
544       ids[i] = tmp_ids[i];
545     }
546   }
547   return count;
548 #endif
549 }
550 
551 OMPT_API_ROUTINE int ompt_get_place_num(void) {
552 // copied from kmp_ftn_entry.h (but modified)
553 #if !KMP_AFFINITY_SUPPORTED
554   return -1;
555 #else
556   if (__kmp_get_gtid() < 0)
557     return -1;
558 
559   int gtid;
560   kmp_info_t *thread;
561   if (!KMP_AFFINITY_CAPABLE())
562     return -1;
563   gtid = __kmp_entry_gtid();
564   thread = __kmp_thread_from_gtid(gtid);
565   if (thread == NULL || thread->th.th_current_place < 0)
566     return -1;
567   return thread->th.th_current_place;
568 #endif
569 }
570 
571 OMPT_API_ROUTINE int ompt_get_partition_place_nums(int place_nums_size,
572                                                    int *place_nums) {
573 // copied from kmp_ftn_entry.h (but modified)
574 #if !KMP_AFFINITY_SUPPORTED
575   return 0;
576 #else
577   if (__kmp_get_gtid() < 0)
578     return 0;
579 
580   int i, gtid, place_num, first_place, last_place, start, end;
581   kmp_info_t *thread;
582   if (!KMP_AFFINITY_CAPABLE())
583     return 0;
584   gtid = __kmp_entry_gtid();
585   thread = __kmp_thread_from_gtid(gtid);
586   if (thread == NULL)
587     return 0;
588   first_place = thread->th.th_first_place;
589   last_place = thread->th.th_last_place;
590   if (first_place < 0 || last_place < 0)
591     return 0;
592   if (first_place <= last_place) {
593     start = first_place;
594     end = last_place;
595   } else {
596     start = last_place;
597     end = first_place;
598   }
599   if (end - start <= place_nums_size)
600     for (i = 0, place_num = start; place_num <= end; ++place_num, ++i) {
601       place_nums[i] = place_num;
602     }
603   return end - start;
604 #endif
605 }
606 
607 /*****************************************************************************
608  * places
609  ****************************************************************************/
610 
611 OMPT_API_ROUTINE int ompt_get_proc_id(void) {
612 #if KMP_OS_LINUX
613   if (__kmp_get_gtid() < 0)
614     return -1;
615 
616   return sched_getcpu();
617 #else
618   return -1;
619 #endif
620 }
621 
622 /*****************************************************************************
623  * compatability
624  ****************************************************************************/
625 
626 OMPT_API_ROUTINE int ompt_get_ompt_version() { return OMPT_VERSION; }
627 
628 /*****************************************************************************
629 * application-facing API
630  ****************************************************************************/
631 
632 /*----------------------------------------------------------------------------
633  | control
634  ---------------------------------------------------------------------------*/
635 
636 int __kmp_control_tool(uint64_t command, uint64_t modifier, void *arg) {
637 
638   if (ompt_enabled.enabled) {
639     if (ompt_enabled.ompt_callback_control_tool) {
640       return ompt_callbacks.ompt_callback(ompt_callback_control_tool)(
641           command, modifier, arg, OMPT_LOAD_RETURN_ADDRESS(__kmp_entry_gtid()));
642     } else {
643       return -1;
644     }
645   } else {
646     return -2;
647   }
648 }
649 
650 /*****************************************************************************
651  * misc
652  ****************************************************************************/
653 
654 OMPT_API_ROUTINE uint64_t ompt_get_unique_id(void) {
655   return __ompt_get_unique_id_internal();
656 }
657 
658 /*****************************************************************************
659  * Target
660  ****************************************************************************/
661 
662 OMPT_API_ROUTINE int ompt_get_target_info(uint64_t *device_num,
663                                           ompt_id_t *target_id,
664                                           ompt_id_t *host_op_id) {
665   return 0; // thread is not in a target region
666 }
667 
668 OMPT_API_ROUTINE int ompt_get_num_devices(void) {
669   return 1; // only one device (the current device) is available
670 }
671 
672 /*****************************************************************************
673  * API inquiry for tool
674  ****************************************************************************/
675 
676 static ompt_interface_fn_t ompt_fn_lookup(const char *s) {
677 
678 #define ompt_interface_fn(fn)                                                  \
679   fn##_t fn##_f = fn;                                                          \
680   if (strcmp(s, #fn) == 0)                                                     \
681     return (ompt_interface_fn_t)fn##_f;
682 
683   FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn)
684 
685   return (ompt_interface_fn_t)0;
686 }
687