1 /*****************************************************************************
2  * system include files
3  ****************************************************************************/
4 
5 #include <assert.h>
6 
7 #include <stdint.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #if KMP_OS_UNIX
12 #include <dlfcn.h>
13 #endif
14 
15 /*****************************************************************************
16  * ompt include files
17  ****************************************************************************/
18 
19 #include "ompt-specific.cpp"
20 
21 /*****************************************************************************
22  * macros
23  ****************************************************************************/
24 
25 #define ompt_get_callback_success 1
26 #define ompt_get_callback_failure 0
27 
28 #define no_tool_present 0
29 
30 #define OMPT_API_ROUTINE static
31 
32 #ifndef OMPT_STR_MATCH
33 #define OMPT_STR_MATCH(haystack, needle) (!strcasecmp(haystack, needle))
34 #endif
35 
36 /*****************************************************************************
37  * types
38  ****************************************************************************/
39 
40 typedef struct {
41   const char *state_name;
42   omp_state_t state_id;
43 } omp_state_info_t;
44 
45 typedef struct {
46   const char *name;
47   kmp_mutex_impl_t id;
48 } kmp_mutex_impl_info_t;
49 
50 enum tool_setting_e {
51   omp_tool_error,
52   omp_tool_unset,
53   omp_tool_disabled,
54   omp_tool_enabled
55 };
56 
57 /*****************************************************************************
58  * global variables
59  ****************************************************************************/
60 
61 ompt_callbacks_active_t ompt_enabled;
62 
63 omp_state_info_t omp_state_info[] = {
64 #define omp_state_macro(state, code) {#state, state},
65     FOREACH_OMP_STATE(omp_state_macro)
66 #undef omp_state_macro
67 };
68 
69 kmp_mutex_impl_info_t kmp_mutex_impl_info[] = {
70 #define kmp_mutex_impl_macro(name, id) {#name, name},
71     FOREACH_KMP_MUTEX_IMPL(kmp_mutex_impl_macro)
72 #undef kmp_mutex_impl_macro
73 };
74 
75 ompt_callbacks_internal_t ompt_callbacks;
76 
77 static ompt_start_tool_result_t *ompt_start_tool_result = NULL;
78 
79 /*****************************************************************************
80  * forward declarations
81  ****************************************************************************/
82 
83 static ompt_interface_fn_t ompt_fn_lookup(const char *s);
84 
85 OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(void);
86 
87 /*****************************************************************************
88  * initialization and finalization (private operations)
89  ****************************************************************************/
90 
91 typedef ompt_start_tool_result_t *(*ompt_start_tool_t)(unsigned int,
92                                                        const char *);
93 
94 #if KMP_OS_DARWIN
95 
96 // While Darwin supports weak symbols, the library that wishes to provide a new
97 // implementation has to link against this runtime which defeats the purpose
98 // of having tools that are agnostic of the underlying runtime implementation.
99 //
100 // Fortunately, the linker includes all symbols of an executable in the global
101 // symbol table by default so dlsym() even finds static implementations of
102 // ompt_start_tool. For this to work on Linux, -Wl,--export-dynamic needs to be
103 // passed when building the application which we don't want to rely on.
104 
105 static ompt_start_tool_result_t *ompt_tool_darwin(unsigned int omp_version,
106                                                   const char *runtime_version) {
107   ompt_start_tool_result_t *ret = NULL;
108   // Search symbol in the current address space.
109   ompt_start_tool_t start_tool =
110       (ompt_start_tool_t)dlsym(RTLD_DEFAULT, "ompt_start_tool");
111   if (start_tool) {
112     ret = start_tool(omp_version, runtime_version);
113   }
114   return ret;
115 }
116 
117 #elif OMPT_HAVE_WEAK_ATTRIBUTE
118 
119 // On Unix-like systems that support weak symbols the following implementation
120 // of ompt_start_tool() will be used in case no tool-supplied implementation of
121 // this function is present in the address space of a process.
122 
123 _OMP_EXTERN OMPT_WEAK_ATTRIBUTE ompt_start_tool_result_t *
124 ompt_start_tool(unsigned int omp_version, const char *runtime_version) {
125   ompt_start_tool_result_t *ret = NULL;
126   // Search next symbol in the current address space. This can happen if the
127   // runtime library is linked before the tool. Since glibc 2.2 strong symbols
128   // don't override weak symbols that have been found before unless the user
129   // sets the environment variable LD_DYNAMIC_WEAK.
130   ompt_start_tool_t next_tool =
131       (ompt_start_tool_t)dlsym(RTLD_NEXT, "ompt_start_tool");
132   if (next_tool) {
133     ret = next_tool(omp_version, runtime_version);
134   }
135   return ret;
136 }
137 
138 #elif OMPT_HAVE_PSAPI
139 
140 // On Windows, the ompt_tool_windows function is used to find the
141 // ompt_start_tool symbol across all modules loaded by a process. If
142 // ompt_start_tool is found, ompt_start_tool's return value is used to
143 // initialize the tool. Otherwise, NULL is returned and OMPT won't be enabled.
144 
145 #include <psapi.h>
146 #pragma comment(lib, "psapi.lib")
147 
148 // The number of loaded modules to start enumeration with EnumProcessModules()
149 #define NUM_MODULES 128
150 
151 static ompt_start_tool_result_t *
152 ompt_tool_windows(unsigned int omp_version, const char *runtime_version) {
153   int i;
154   DWORD needed, new_size;
155   HMODULE *modules;
156   HANDLE process = GetCurrentProcess();
157   modules = (HMODULE *)malloc(NUM_MODULES * sizeof(HMODULE));
158   ompt_start_tool_t ompt_tool_p = NULL;
159 
160 #if OMPT_DEBUG
161   printf("ompt_tool_windows(): looking for ompt_start_tool\n");
162 #endif
163   if (!EnumProcessModules(process, modules, NUM_MODULES * sizeof(HMODULE),
164                           &needed)) {
165     // Regardless of the error reason use the stub initialization function
166     free(modules);
167     return NULL;
168   }
169   // Check if NUM_MODULES is enough to list all modules
170   new_size = needed / sizeof(HMODULE);
171   if (new_size > NUM_MODULES) {
172 #if OMPT_DEBUG
173     printf("ompt_tool_windows(): resize buffer to %d bytes\n", needed);
174 #endif
175     modules = (HMODULE *)realloc(modules, needed);
176     // If resizing failed use the stub function.
177     if (!EnumProcessModules(process, modules, needed, &needed)) {
178       free(modules);
179       return NULL;
180     }
181   }
182   for (i = 0; i < new_size; ++i) {
183     (FARPROC &)ompt_tool_p = GetProcAddress(modules[i], "ompt_start_tool");
184     if (ompt_tool_p) {
185 #if OMPT_DEBUG
186       TCHAR modName[MAX_PATH];
187       if (GetModuleFileName(modules[i], modName, MAX_PATH))
188         printf("ompt_tool_windows(): ompt_start_tool found in module %s\n",
189                modName);
190 #endif
191       free(modules);
192       return (*ompt_tool_p)(omp_version, runtime_version);
193     }
194 #if OMPT_DEBUG
195     else {
196       TCHAR modName[MAX_PATH];
197       if (GetModuleFileName(modules[i], modName, MAX_PATH))
198         printf("ompt_tool_windows(): ompt_start_tool not found in module %s\n",
199                modName);
200     }
201 #endif
202   }
203   free(modules);
204   return NULL;
205 }
206 #else
207 #error Activation of OMPT is not supported on this platform.
208 #endif
209 
210 static ompt_start_tool_result_t *
211 ompt_try_start_tool(unsigned int omp_version, const char *runtime_version) {
212   ompt_start_tool_result_t *ret = NULL;
213   ompt_start_tool_t start_tool = NULL;
214 #if KMP_OS_WINDOWS
215   // Cannot use colon to describe a list of absolute paths on Windows
216   const char *sep = ";";
217 #else
218   const char *sep = ":";
219 #endif
220 
221 #if KMP_OS_DARWIN
222   // Try in the current address space
223   ret = ompt_tool_darwin(omp_version, runtime_version);
224 #elif OMPT_HAVE_WEAK_ATTRIBUTE
225   ret = ompt_start_tool(omp_version, runtime_version);
226 #elif OMPT_HAVE_PSAPI
227   ret = ompt_tool_windows(omp_version, runtime_version);
228 #else
229 #error Activation of OMPT is not supported on this platform.
230 #endif
231   if (ret)
232     return ret;
233 
234   // Try tool-libraries-var ICV
235   const char *tool_libs = getenv("OMP_TOOL_LIBRARIES");
236   if (tool_libs) {
237     char *libs = __kmp_str_format("%s", tool_libs);
238     char *buf;
239     char *fname = __kmp_str_token(libs, sep, &buf);
240     while (fname) {
241 #if KMP_OS_UNIX
242       void *h = dlopen(fname, RTLD_LAZY);
243       if (h) {
244         start_tool = (ompt_start_tool_t)dlsym(h, "ompt_start_tool");
245 #elif KMP_OS_WINDOWS
246       HMODULE h = LoadLibrary(fname);
247       if (h) {
248         start_tool = (ompt_start_tool_t)GetProcAddress(h, "ompt_start_tool");
249 #else
250 #error Activation of OMPT is not supported on this platform.
251 #endif
252         if (start_tool && (ret = (*start_tool)(omp_version, runtime_version)))
253           break;
254       }
255       fname = __kmp_str_token(NULL, sep, &buf);
256     }
257     __kmp_str_free(&libs);
258   }
259   return ret;
260 }
261 
262 void ompt_pre_init() {
263   //--------------------------------------------------
264   // Execute the pre-initialization logic only once.
265   //--------------------------------------------------
266   static int ompt_pre_initialized = 0;
267 
268   if (ompt_pre_initialized)
269     return;
270 
271   ompt_pre_initialized = 1;
272 
273   //--------------------------------------------------
274   // Use a tool iff a tool is enabled and available.
275   //--------------------------------------------------
276   const char *ompt_env_var = getenv("OMP_TOOL");
277   tool_setting_e tool_setting = omp_tool_error;
278 
279   if (!ompt_env_var || !strcmp(ompt_env_var, ""))
280     tool_setting = omp_tool_unset;
281   else if (OMPT_STR_MATCH(ompt_env_var, "disabled"))
282     tool_setting = omp_tool_disabled;
283   else if (OMPT_STR_MATCH(ompt_env_var, "enabled"))
284     tool_setting = omp_tool_enabled;
285 
286 #if OMPT_DEBUG
287   printf("ompt_pre_init(): tool_setting = %d\n", tool_setting);
288 #endif
289   switch (tool_setting) {
290   case omp_tool_disabled:
291     break;
292 
293   case omp_tool_unset:
294   case omp_tool_enabled:
295 
296     //--------------------------------------------------
297     // Load tool iff specified in environment variable
298     //--------------------------------------------------
299     ompt_start_tool_result =
300         ompt_try_start_tool(__kmp_openmp_version, ompt_get_runtime_version());
301 
302     memset(&ompt_enabled, 0, sizeof(ompt_enabled));
303     break;
304 
305   case omp_tool_error:
306     fprintf(stderr, "Warning: OMP_TOOL has invalid value \"%s\".\n"
307                     "  legal values are (NULL,\"\",\"disabled\","
308                     "\"enabled\").\n",
309             ompt_env_var);
310     break;
311   }
312 #if OMPT_DEBUG
313   printf("ompt_pre_init(): ompt_enabled = %d\n", ompt_enabled);
314 #endif
315 }
316 
317 void ompt_post_init() {
318   //--------------------------------------------------
319   // Execute the post-initialization logic only once.
320   //--------------------------------------------------
321   static int ompt_post_initialized = 0;
322 
323   if (ompt_post_initialized)
324     return;
325 
326   ompt_post_initialized = 1;
327 
328   //--------------------------------------------------
329   // Initialize the tool if so indicated.
330   //--------------------------------------------------
331   if (ompt_start_tool_result) {
332     ompt_enabled.enabled = !!ompt_start_tool_result->initialize(
333         ompt_fn_lookup, &(ompt_start_tool_result->tool_data));
334 
335     if (!ompt_enabled.enabled) {
336       // tool not enabled, zero out the bitmap, and done
337       memset(&ompt_enabled, 0, sizeof(ompt_enabled));
338       return;
339     }
340 
341     ompt_thread_t *root_thread = ompt_get_thread();
342 
343     ompt_set_thread_state(root_thread, omp_state_overhead);
344 
345     if (ompt_enabled.ompt_callback_thread_begin) {
346       ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
347           ompt_thread_initial, __ompt_get_thread_data_internal());
348     }
349     ompt_data_t *task_data;
350     __ompt_get_task_info_internal(0, NULL, &task_data, NULL, NULL, NULL);
351     if (ompt_enabled.ompt_callback_task_create) {
352       ompt_callbacks.ompt_callback(ompt_callback_task_create)(
353           NULL, NULL, task_data, ompt_task_initial, 0, NULL);
354     }
355 
356     ompt_set_thread_state(root_thread, omp_state_work_serial);
357   }
358 }
359 
360 void ompt_fini() {
361   if (ompt_enabled.enabled) {
362     ompt_start_tool_result->finalize(&(ompt_start_tool_result->tool_data));
363   }
364 
365   memset(&ompt_enabled, 0, sizeof(ompt_enabled));
366 }
367 
368 /*****************************************************************************
369  * interface operations
370  ****************************************************************************/
371 
372 /*****************************************************************************
373  * state
374  ****************************************************************************/
375 
376 OMPT_API_ROUTINE int ompt_enumerate_states(int current_state, int *next_state,
377                                            const char **next_state_name) {
378   const static int len = sizeof(omp_state_info) / sizeof(omp_state_info_t);
379   int i = 0;
380 
381   for (i = 0; i < len - 1; i++) {
382     if (omp_state_info[i].state_id == current_state) {
383       *next_state = omp_state_info[i + 1].state_id;
384       *next_state_name = omp_state_info[i + 1].state_name;
385       return 1;
386     }
387   }
388 
389   return 0;
390 }
391 
392 OMPT_API_ROUTINE int ompt_enumerate_mutex_impls(int current_impl,
393                                                 int *next_impl,
394                                                 const char **next_impl_name) {
395   const static int len =
396       sizeof(kmp_mutex_impl_info) / sizeof(kmp_mutex_impl_info_t);
397   int i = 0;
398   for (i = 0; i < len - 1; i++) {
399     if (kmp_mutex_impl_info[i].id != current_impl)
400       continue;
401     *next_impl = kmp_mutex_impl_info[i + 1].id;
402     *next_impl_name = kmp_mutex_impl_info[i + 1].name;
403     return 1;
404   }
405   return 0;
406 }
407 
408 /*****************************************************************************
409  * callbacks
410  ****************************************************************************/
411 
412 OMPT_API_ROUTINE int ompt_set_callback(ompt_callbacks_t which,
413                                        ompt_callback_t callback) {
414   switch (which) {
415 
416 #define ompt_event_macro(event_name, callback_type, event_id)                  \
417   case event_name:                                                             \
418     if (ompt_event_implementation_status(event_name)) {                        \
419       ompt_callbacks.ompt_callback(event_name) = (callback_type)callback;      \
420       ompt_enabled.event_name = (callback != 0);                               \
421     }                                                                          \
422     if (callback)                                                              \
423       return ompt_event_implementation_status(event_name);                     \
424     else                                                                       \
425       return ompt_set_always;
426 
427     FOREACH_OMPT_EVENT(ompt_event_macro)
428 
429 #undef ompt_event_macro
430 
431   default:
432     return ompt_set_error;
433   }
434 }
435 
436 OMPT_API_ROUTINE int ompt_get_callback(ompt_callbacks_t which,
437                                        ompt_callback_t *callback) {
438   switch (which) {
439 
440 #define ompt_event_macro(event_name, callback_type, event_id)                  \
441   case event_name:                                                             \
442     if (ompt_event_implementation_status(event_name)) {                        \
443       ompt_callback_t mycb =                                                   \
444           (ompt_callback_t)ompt_callbacks.ompt_callback(event_name);           \
445       if (mycb) {                                                              \
446         *callback = mycb;                                                      \
447         return ompt_get_callback_success;                                      \
448       }                                                                        \
449     }                                                                          \
450     return ompt_get_callback_failure;
451 
452     FOREACH_OMPT_EVENT(ompt_event_macro)
453 
454 #undef ompt_event_macro
455 
456   default:
457     return ompt_get_callback_failure;
458   }
459 }
460 
461 /*****************************************************************************
462  * parallel regions
463  ****************************************************************************/
464 
465 OMPT_API_ROUTINE int ompt_get_parallel_info(int ancestor_level,
466                                             ompt_data_t **parallel_data,
467                                             int *team_size) {
468   return __ompt_get_parallel_info_internal(ancestor_level, parallel_data,
469                                            team_size);
470 }
471 
472 OMPT_API_ROUTINE omp_state_t ompt_get_state(ompt_wait_id_t *wait_id) {
473   omp_state_t thread_state = __ompt_get_state_internal(wait_id);
474 
475   if (thread_state == omp_state_undefined) {
476     thread_state = omp_state_work_serial;
477   }
478 
479   return thread_state;
480 }
481 
482 /*****************************************************************************
483  * tasks
484  ****************************************************************************/
485 
486 OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(void) {
487   return __ompt_get_thread_data_internal();
488 }
489 
490 OMPT_API_ROUTINE int ompt_get_task_info(int ancestor_level, int *type,
491                                         ompt_data_t **task_data,
492                                         ompt_frame_t **task_frame,
493                                         ompt_data_t **parallel_data,
494                                         int *thread_num) {
495   return __ompt_get_task_info_internal(ancestor_level, type, task_data,
496                                        task_frame, parallel_data, thread_num);
497 }
498 
499 /*****************************************************************************
500  * num_procs
501  ****************************************************************************/
502 
503 OMPT_API_ROUTINE int ompt_get_num_procs(void) {
504   // copied from kmp_ftn_entry.h (but modified: OMPT can only be called when
505   // runtime is initialized)
506   return __kmp_avail_proc;
507 }
508 
509 /*****************************************************************************
510  * places
511  ****************************************************************************/
512 
513 OMPT_API_ROUTINE int ompt_get_num_places(void) {
514 // copied from kmp_ftn_entry.h (but modified)
515 #if !KMP_AFFINITY_SUPPORTED
516   return 0;
517 #else
518   if (!KMP_AFFINITY_CAPABLE())
519     return 0;
520   return __kmp_affinity_num_masks;
521 #endif
522 }
523 
524 OMPT_API_ROUTINE int ompt_get_place_proc_ids(int place_num, int ids_size,
525                                              int *ids) {
526 // copied from kmp_ftn_entry.h (but modified)
527 #if !KMP_AFFINITY_SUPPORTED
528   return 0;
529 #else
530   int i, count;
531   int tmp_ids[ids_size];
532   if (!KMP_AFFINITY_CAPABLE())
533     return 0;
534   if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks)
535     return 0;
536   /* TODO: Is this safe for asynchronous call from signal handler during runtime
537    * shutdown? */
538   kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num);
539   count = 0;
540   KMP_CPU_SET_ITERATE(i, mask) {
541     if ((!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) ||
542         (!KMP_CPU_ISSET(i, mask))) {
543       continue;
544     }
545     if (count < ids_size)
546       tmp_ids[count] = i;
547     count++;
548   }
549   if (ids_size >= count) {
550     for (i = 0; i < count; i++) {
551       ids[i] = tmp_ids[i];
552     }
553   }
554   return count;
555 #endif
556 }
557 
558 OMPT_API_ROUTINE int ompt_get_place_num(void) {
559 // copied from kmp_ftn_entry.h (but modified)
560 #if !KMP_AFFINITY_SUPPORTED
561   return -1;
562 #else
563   if (__kmp_get_gtid() < 0)
564     return -1;
565 
566   int gtid;
567   kmp_info_t *thread;
568   if (!KMP_AFFINITY_CAPABLE())
569     return -1;
570   gtid = __kmp_entry_gtid();
571   thread = __kmp_thread_from_gtid(gtid);
572   if (thread == NULL || thread->th.th_current_place < 0)
573     return -1;
574   return thread->th.th_current_place;
575 #endif
576 }
577 
578 OMPT_API_ROUTINE int ompt_get_partition_place_nums(int place_nums_size,
579                                                    int *place_nums) {
580 // copied from kmp_ftn_entry.h (but modified)
581 #if !KMP_AFFINITY_SUPPORTED
582   return 0;
583 #else
584   if (__kmp_get_gtid() < 0)
585     return 0;
586 
587   int i, gtid, place_num, first_place, last_place, start, end;
588   kmp_info_t *thread;
589   if (!KMP_AFFINITY_CAPABLE())
590     return 0;
591   gtid = __kmp_entry_gtid();
592   thread = __kmp_thread_from_gtid(gtid);
593   if (thread == NULL)
594     return 0;
595   first_place = thread->th.th_first_place;
596   last_place = thread->th.th_last_place;
597   if (first_place < 0 || last_place < 0)
598     return 0;
599   if (first_place <= last_place) {
600     start = first_place;
601     end = last_place;
602   } else {
603     start = last_place;
604     end = first_place;
605   }
606   if (end - start <= place_nums_size)
607     for (i = 0, place_num = start; place_num <= end; ++place_num, ++i) {
608       place_nums[i] = place_num;
609     }
610   return end - start + 1;
611 #endif
612 }
613 
614 /*****************************************************************************
615  * places
616  ****************************************************************************/
617 
618 OMPT_API_ROUTINE int ompt_get_proc_id(void) {
619 #if KMP_OS_LINUX
620   if (__kmp_get_gtid() < 0)
621     return -1;
622 
623   return sched_getcpu();
624 #else
625   return -1;
626 #endif
627 }
628 
629 /*****************************************************************************
630  * compatability
631  ****************************************************************************/
632 
633 OMPT_API_ROUTINE int ompt_get_ompt_version() { return OMPT_VERSION; }
634 
635 /*****************************************************************************
636 * application-facing API
637  ****************************************************************************/
638 
639 /*----------------------------------------------------------------------------
640  | control
641  ---------------------------------------------------------------------------*/
642 
643 int __kmp_control_tool(uint64_t command, uint64_t modifier, void *arg) {
644 
645   if (ompt_enabled.enabled) {
646     if (ompt_enabled.ompt_callback_control_tool) {
647       return ompt_callbacks.ompt_callback(ompt_callback_control_tool)(
648           command, modifier, arg, OMPT_LOAD_RETURN_ADDRESS(__kmp_entry_gtid()));
649     } else {
650       return -1;
651     }
652   } else {
653     return -2;
654   }
655 }
656 
657 /*****************************************************************************
658  * misc
659  ****************************************************************************/
660 
661 OMPT_API_ROUTINE uint64_t ompt_get_unique_id(void) {
662   return __ompt_get_unique_id_internal();
663 }
664 
665 /*****************************************************************************
666  * Target
667  ****************************************************************************/
668 
669 OMPT_API_ROUTINE int ompt_get_target_info(uint64_t *device_num,
670                                           ompt_id_t *target_id,
671                                           ompt_id_t *host_op_id) {
672   return 0; // thread is not in a target region
673 }
674 
675 OMPT_API_ROUTINE int ompt_get_num_devices(void) {
676   return 1; // only one device (the current device) is available
677 }
678 
679 /*****************************************************************************
680  * API inquiry for tool
681  ****************************************************************************/
682 
683 static ompt_interface_fn_t ompt_fn_lookup(const char *s) {
684 
685 #define ompt_interface_fn(fn)                                                  \
686   fn##_t fn##_f = fn;                                                          \
687   if (strcmp(s, #fn) == 0)                                                     \
688     return (ompt_interface_fn_t)fn##_f;
689 
690   FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn)
691 
692   return (ompt_interface_fn_t)0;
693 }
694