1 #ifndef _BSD_SOURCE
2 #define _BSD_SOURCE
3 #endif
4 #ifndef _DEFAULT_SOURCE
5 #define _DEFAULT_SOURCE
6 #endif
7 #include <stdio.h>
8 #ifndef __STDC_FORMAT_MACROS
9 #define __STDC_FORMAT_MACROS
10 #endif
11 #include <inttypes.h>
12 #include <omp.h>
13 #include <omp-tools.h>
14 #include "ompt-signal.h"
15 
16 // Used to detect architecture
17 #include "../../src/kmp_platform.h"
18 
19 #ifndef _TOOL_PREFIX
20 #define _TOOL_PREFIX ""
21 // If no _TOOL_PREFIX is set, we assume that we run as part of an OMPT test
22 #define _OMPT_TESTS
23 #endif
24 
25 static const char *ompt_thread_t_values[] = {
26     "ompt_thread_UNDEFINED", "ompt_thread_initial", "ompt_thread_worker",
27     "ompt_thread_other"};
28 
29 static const char *ompt_task_status_t_values[] = {
30     "ompt_task_UNDEFINED",
31     "ompt_task_complete", // 1
32     "ompt_task_yield", // 2
33     "ompt_task_cancel", // 3
34     "ompt_task_detach", // 4
35     "ompt_task_early_fulfill", // 5
36     "ompt_task_late_fulfill", // 6
37     "ompt_task_switch", // 7
38     "ompt_taskwait_complete" // 8
39 };
40 static const char* ompt_cancel_flag_t_values[] = {
41   "ompt_cancel_parallel",
42   "ompt_cancel_sections",
43   "ompt_cancel_loop",
44   "ompt_cancel_taskgroup",
45   "ompt_cancel_activated",
46   "ompt_cancel_detected",
47   "ompt_cancel_discarded_task"
48 };
49 
50 static const char *ompt_dependence_type_t_values[] = {
51     "ompt_dependence_type_UNDEFINED",
52     "ompt_dependence_type_in", // 1
53     "ompt_dependence_type_out", // 2
54     "ompt_dependence_type_inout", // 3
55     "ompt_dependence_type_mutexinoutset", // 4
56     "ompt_dependence_type_source", // 5
57     "ompt_dependence_type_sink", // 6
58     "ompt_dependence_type_inoutset" // 7
59 };
60 
format_task_type(int type,char * buffer)61 static void format_task_type(int type, char *buffer) {
62   char *progress = buffer;
63   if (type & ompt_task_initial)
64     progress += sprintf(progress, "ompt_task_initial");
65   if (type & ompt_task_implicit)
66     progress += sprintf(progress, "ompt_task_implicit");
67   if (type & ompt_task_explicit)
68     progress += sprintf(progress, "ompt_task_explicit");
69   if (type & ompt_task_target)
70     progress += sprintf(progress, "ompt_task_target");
71   if (type & ompt_task_taskwait)
72     progress += sprintf(progress, "ompt_task_taskwait");
73   if (type & ompt_task_undeferred)
74     progress += sprintf(progress, "|ompt_task_undeferred");
75   if (type & ompt_task_untied)
76     progress += sprintf(progress, "|ompt_task_untied");
77   if (type & ompt_task_final)
78     progress += sprintf(progress, "|ompt_task_final");
79   if (type & ompt_task_mergeable)
80     progress += sprintf(progress, "|ompt_task_mergeable");
81   if (type & ompt_task_merged)
82     progress += sprintf(progress, "|ompt_task_merged");
83 }
84 
85 static ompt_set_callback_t ompt_set_callback;
86 static ompt_get_callback_t ompt_get_callback;
87 static ompt_get_state_t ompt_get_state;
88 static ompt_get_task_info_t ompt_get_task_info;
89 static ompt_get_task_memory_t ompt_get_task_memory;
90 static ompt_get_thread_data_t ompt_get_thread_data;
91 static ompt_get_parallel_info_t ompt_get_parallel_info;
92 static ompt_get_unique_id_t ompt_get_unique_id;
93 static ompt_finalize_tool_t ompt_finalize_tool;
94 static ompt_get_num_procs_t ompt_get_num_procs;
95 static ompt_get_num_places_t ompt_get_num_places;
96 static ompt_get_place_proc_ids_t ompt_get_place_proc_ids;
97 static ompt_get_place_num_t ompt_get_place_num;
98 static ompt_get_partition_place_nums_t ompt_get_partition_place_nums;
99 static ompt_get_proc_id_t ompt_get_proc_id;
100 static ompt_enumerate_states_t ompt_enumerate_states;
101 static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls;
102 
print_ids(int level)103 static void print_ids(int level)
104 {
105   int task_type, thread_num;
106   ompt_frame_t *frame;
107   ompt_data_t *task_parallel_data;
108   ompt_data_t *task_data;
109   int exists_task = ompt_get_task_info(level, &task_type, &task_data, &frame,
110                                        &task_parallel_data, &thread_num);
111   char buffer[2048];
112   format_task_type(task_type, buffer);
113   if (frame)
114     printf("%" PRIu64 ": task level %d: parallel_id=%" PRIu64
115            ", task_id=%" PRIu64 ", exit_frame=%p, reenter_frame=%p, "
116            "task_type=%s=%d, thread_num=%d\n",
117            ompt_get_thread_data()->value, level,
118            exists_task ? task_parallel_data->value : 0,
119            exists_task ? task_data->value : 0, frame->exit_frame.ptr,
120            frame->enter_frame.ptr, buffer, task_type, thread_num);
121 }
122 
123 #define get_frame_address(level) __builtin_frame_address(level)
124 
125 #define print_frame(level)                                                     \
126   printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n",                      \
127          ompt_get_thread_data()->value, level, get_frame_address(level))
128 
129 // clang (version 5.0 and above) adds an intermediate function call with debug flag (-g)
130 #if defined(TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN)
131   #if defined(DEBUG) && defined(__clang__) && __clang_major__ >= 5
132     #define print_frame_from_outlined_fn(level) print_frame(level+1)
133   #else
134     #define print_frame_from_outlined_fn(level) print_frame(level)
135   #endif
136 
137   #if defined(__clang__) && __clang_major__ >= 5
138     #warning "Clang 5.0 and later add an additional wrapper for outlined functions when compiling with debug information."
139     #warning "Please define -DDEBUG iff you manually pass in -g to make the tests succeed!"
140   #endif
141 #endif
142 
143 // This macro helps to define a label at the current position that can be used
144 // to get the current address in the code.
145 //
146 // For print_current_address():
147 //   To reliably determine the offset between the address of the label and the
148 //   actual return address, we insert a NOP instruction as a jump target as the
149 //   compiler would otherwise insert an instruction that we can't control. The
150 //   instruction length is target dependent and is explained below.
151 //
152 // (The empty block between "#pragma omp ..." and the __asm__ statement is a
153 // workaround for a bug in the Intel Compiler.)
154 #define define_ompt_label(id) \
155   {} \
156   __asm__("nop"); \
157 ompt_label_##id:
158 
159 // This macro helps to get the address of a label that is inserted by the above
160 // macro define_ompt_label(). The address is obtained with a GNU extension
161 // (&&label) that has been tested with gcc, clang and icc.
162 #define get_ompt_label_address(id) (&& ompt_label_##id)
163 
164 // This macro prints the exact address that a previously called runtime function
165 // returns to.
166 #define print_current_address(id) \
167   define_ompt_label(id) \
168   print_possible_return_addresses(get_ompt_label_address(id))
169 
170 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
171 // On X86 the NOP instruction is 1 byte long. In addition, the compiler inserts
172 // a MOV instruction for non-void runtime functions which is 3 bytes long.
173 #define print_possible_return_addresses(addr) \
174   printf("%" PRIu64 ": current_address=%p or %p for non-void functions\n", \
175          ompt_get_thread_data()->value, ((char *)addr) - 1, ((char *)addr) - 4)
176 #elif KMP_ARCH_PPC64
177 // On Power the NOP instruction is 4 bytes long. In addition, the compiler
178 // inserts a second NOP instruction (another 4 bytes). For non-void runtime
179 // functions Clang inserts a STW instruction (but only if compiling under
180 // -fno-PIC which will be the default with Clang 8.0, another 4 bytes).
181 #define print_possible_return_addresses(addr) \
182   printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, \
183          ((char *)addr) - 8, ((char *)addr) - 12)
184 #elif KMP_ARCH_AARCH64
185 // On AArch64 the NOP instruction is 4 bytes long, can be followed by inserted
186 // store instruction (another 4 bytes long).
187 #define print_possible_return_addresses(addr) \
188   printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, \
189          ((char *)addr) - 4, ((char *)addr) - 8)
190 #elif KMP_ARCH_RISCV64
191 #if __riscv_compressed
192 // On RV64GC the C.NOP instruction is 2 byte long. In addition, the compiler
193 // inserts a J instruction (targeting the successor basic block), which
194 // accounts for another 4 bytes. Finally, an additional J instruction may
195 // appear (adding 4 more bytes) when the C.NOP is referenced elsewhere (ie.
196 // another branch).
197 #define print_possible_return_addresses(addr) \
198   printf("%" PRIu64 ": current_address=%p or %p\n", \
199          ompt_get_thread_data()->value, ((char *)addr) - 6, ((char *)addr) - 10)
200 #else
201 // On RV64G the NOP instruction is 4 byte long. In addition, the compiler
202 // inserts a J instruction (targeting the successor basic block), which
203 // accounts for another 4 bytes. Finally, an additional J instruction may
204 // appear (adding 4 more bytes) when the NOP is referenced elsewhere (ie.
205 // another branch).
206 #define print_possible_return_addresses(addr) \
207   printf("%" PRIu64 ": current_address=%p or %p\n", \
208          ompt_get_thread_data()->value, ((char *)addr) - 8, ((char *)addr) - 12)
209 #endif
210 #else
211 #error Unsupported target architecture, cannot determine address offset!
212 #endif
213 
214 
215 // This macro performs a somewhat similar job to print_current_address(), except
216 // that it discards a certain number of nibbles from the address and only prints
217 // the most significant bits / nibbles. This can be used for cases where the
218 // return address can only be approximated.
219 //
220 // To account for overflows (ie the most significant bits / nibbles have just
221 // changed as we are a few bytes above the relevant power of two) the addresses
222 // of the "current" and of the "previous block" are printed.
223 #define print_fuzzy_address(id) \
224   define_ompt_label(id) \
225   print_fuzzy_address_blocks(get_ompt_label_address(id))
226 
227 // If you change this define you need to adapt all capture patterns in the tests
228 // to include or discard the new number of nibbles!
229 #define FUZZY_ADDRESS_DISCARD_NIBBLES 2
230 #define FUZZY_ADDRESS_DISCARD_BYTES (1 << ((FUZZY_ADDRESS_DISCARD_NIBBLES) * 4))
231 #define print_fuzzy_address_blocks(addr)                                       \
232   printf("%" PRIu64 ": fuzzy_address=0x%" PRIx64 " or 0x%" PRIx64              \
233          " or 0x%" PRIx64 " or 0x%" PRIx64 " (%p)\n",                          \
234          ompt_get_thread_data()->value,                                        \
235          ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES - 1,                   \
236          ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES,                       \
237          ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 1,                   \
238          ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 2, addr)
239 
240 #define register_ompt_callback_t(name, type)                                   \
241   do {                                                                         \
242     type f_##name = &on_##name;                                                \
243     if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never)  \
244       printf("0: Could not register callback '" #name "'\n");                  \
245   } while (0)
246 
247 #define register_ompt_callback(name) register_ompt_callback_t(name, name##_t)
248 
249 #ifndef USE_PRIVATE_TOOL
250 static void
on_ompt_callback_mutex_acquire(ompt_mutex_t kind,unsigned int hint,unsigned int impl,ompt_wait_id_t wait_id,const void * codeptr_ra)251 on_ompt_callback_mutex_acquire(
252   ompt_mutex_t kind,
253   unsigned int hint,
254   unsigned int impl,
255   ompt_wait_id_t wait_id,
256   const void *codeptr_ra)
257 {
258   switch(kind)
259   {
260     case ompt_mutex_lock:
261       printf("%" PRIu64 ":" _TOOL_PREFIX
262              " ompt_event_wait_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
263              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
264              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
265       break;
266     case ompt_mutex_nest_lock:
267       printf("%" PRIu64 ":" _TOOL_PREFIX
268              " ompt_event_wait_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
269              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
270              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
271       break;
272     case ompt_mutex_critical:
273       printf("%" PRIu64 ":" _TOOL_PREFIX
274              " ompt_event_wait_critical: wait_id=%" PRIu64 ", hint=%" PRIu32
275              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
276              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
277       break;
278     case ompt_mutex_atomic:
279       printf("%" PRIu64 ":" _TOOL_PREFIX
280              " ompt_event_wait_atomic: wait_id=%" PRIu64 ", hint=%" PRIu32
281              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
282              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
283       break;
284     case ompt_mutex_ordered:
285       printf("%" PRIu64 ":" _TOOL_PREFIX
286              " ompt_event_wait_ordered: wait_id=%" PRIu64 ", hint=%" PRIu32
287              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
288              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
289       break;
290     default:
291       break;
292   }
293 }
294 
295 static void
on_ompt_callback_mutex_acquired(ompt_mutex_t kind,ompt_wait_id_t wait_id,const void * codeptr_ra)296 on_ompt_callback_mutex_acquired(
297   ompt_mutex_t kind,
298   ompt_wait_id_t wait_id,
299   const void *codeptr_ra)
300 {
301   switch(kind)
302   {
303     case ompt_mutex_lock:
304       printf("%" PRIu64 ":" _TOOL_PREFIX
305              " ompt_event_acquired_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
306              ompt_get_thread_data()->value, wait_id, codeptr_ra);
307       break;
308     case ompt_mutex_nest_lock:
309       printf("%" PRIu64 ":" _TOOL_PREFIX
310              " ompt_event_acquired_nest_lock_first: wait_id=%" PRIu64
311              ", codeptr_ra=%p \n",
312              ompt_get_thread_data()->value, wait_id, codeptr_ra);
313       break;
314     case ompt_mutex_critical:
315       printf("%" PRIu64 ":" _TOOL_PREFIX
316              " ompt_event_acquired_critical: wait_id=%" PRIu64
317              ", codeptr_ra=%p \n",
318              ompt_get_thread_data()->value, wait_id, codeptr_ra);
319       break;
320     case ompt_mutex_atomic:
321       printf("%" PRIu64 ":" _TOOL_PREFIX
322              " ompt_event_acquired_atomic: wait_id=%" PRIu64
323              ", codeptr_ra=%p \n",
324              ompt_get_thread_data()->value, wait_id, codeptr_ra);
325       break;
326     case ompt_mutex_ordered:
327       printf("%" PRIu64 ":" _TOOL_PREFIX
328              " ompt_event_acquired_ordered: wait_id=%" PRIu64
329              ", codeptr_ra=%p \n",
330              ompt_get_thread_data()->value, wait_id, codeptr_ra);
331       break;
332     default:
333       break;
334   }
335 }
336 
337 static void
on_ompt_callback_mutex_released(ompt_mutex_t kind,ompt_wait_id_t wait_id,const void * codeptr_ra)338 on_ompt_callback_mutex_released(
339   ompt_mutex_t kind,
340   ompt_wait_id_t wait_id,
341   const void *codeptr_ra)
342 {
343   switch(kind)
344   {
345     case ompt_mutex_lock:
346       printf("%" PRIu64 ":" _TOOL_PREFIX
347              " ompt_event_release_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
348              ompt_get_thread_data()->value, wait_id, codeptr_ra);
349       break;
350     case ompt_mutex_nest_lock:
351       printf("%" PRIu64 ":" _TOOL_PREFIX
352              " ompt_event_release_nest_lock_last: wait_id=%" PRIu64
353              ", codeptr_ra=%p \n",
354              ompt_get_thread_data()->value, wait_id, codeptr_ra);
355       break;
356     case ompt_mutex_critical:
357       printf("%" PRIu64 ":" _TOOL_PREFIX
358              " ompt_event_release_critical: wait_id=%" PRIu64
359              ", codeptr_ra=%p \n",
360              ompt_get_thread_data()->value, wait_id, codeptr_ra);
361       break;
362     case ompt_mutex_atomic:
363       printf("%" PRIu64 ":" _TOOL_PREFIX
364              " ompt_event_release_atomic: wait_id=%" PRIu64
365              ", codeptr_ra=%p \n",
366              ompt_get_thread_data()->value, wait_id, codeptr_ra);
367       break;
368     case ompt_mutex_ordered:
369       printf("%" PRIu64 ":" _TOOL_PREFIX
370              " ompt_event_release_ordered: wait_id=%" PRIu64
371              ", codeptr_ra=%p \n",
372              ompt_get_thread_data()->value, wait_id, codeptr_ra);
373       break;
374     default:
375       break;
376   }
377 }
378 
379 static void
on_ompt_callback_nest_lock(ompt_scope_endpoint_t endpoint,ompt_wait_id_t wait_id,const void * codeptr_ra)380 on_ompt_callback_nest_lock(
381     ompt_scope_endpoint_t endpoint,
382     ompt_wait_id_t wait_id,
383     const void *codeptr_ra)
384 {
385   switch(endpoint)
386   {
387     case ompt_scope_begin:
388       printf("%" PRIu64 ":" _TOOL_PREFIX
389              " ompt_event_acquired_nest_lock_next: wait_id=%" PRIu64
390              ", codeptr_ra=%p \n",
391              ompt_get_thread_data()->value, wait_id, codeptr_ra);
392       break;
393     case ompt_scope_end:
394       printf("%" PRIu64 ":" _TOOL_PREFIX
395              " ompt_event_release_nest_lock_prev: wait_id=%" PRIu64
396              ", codeptr_ra=%p \n",
397              ompt_get_thread_data()->value, wait_id, codeptr_ra);
398       break;
399     case ompt_scope_beginend:
400       printf("ompt_scope_beginend should never be passed to %s\n", __func__);
401       exit(-1);
402   }
403 }
404 
405 static void
on_ompt_callback_sync_region(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)406 on_ompt_callback_sync_region(
407   ompt_sync_region_t kind,
408   ompt_scope_endpoint_t endpoint,
409   ompt_data_t *parallel_data,
410   ompt_data_t *task_data,
411   const void *codeptr_ra)
412 {
413   switch(endpoint)
414   {
415     case ompt_scope_begin:
416       switch(kind)
417       {
418         case ompt_sync_region_barrier:
419         case ompt_sync_region_barrier_implicit:
420         case ompt_sync_region_barrier_implicit_workshare:
421         case ompt_sync_region_barrier_implicit_parallel:
422         case ompt_sync_region_barrier_teams:
423         case ompt_sync_region_barrier_explicit:
424         case ompt_sync_region_barrier_implementation:
425           printf("%" PRIu64 ":" _TOOL_PREFIX
426                  " ompt_event_barrier_begin: parallel_id=%" PRIu64
427                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
428                  ompt_get_thread_data()->value, parallel_data->value,
429                  task_data->value, codeptr_ra);
430           print_ids(0);
431           break;
432         case ompt_sync_region_taskwait:
433           printf("%" PRIu64 ":" _TOOL_PREFIX
434                  " ompt_event_taskwait_begin: parallel_id=%" PRIu64
435                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
436                  ompt_get_thread_data()->value, parallel_data->value,
437                  task_data->value, codeptr_ra);
438           break;
439         case ompt_sync_region_taskgroup:
440           printf("%" PRIu64 ":" _TOOL_PREFIX
441                  " ompt_event_taskgroup_begin: parallel_id=%" PRIu64
442                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
443                  ompt_get_thread_data()->value, parallel_data->value,
444                  task_data->value, codeptr_ra);
445           break;
446         case ompt_sync_region_reduction:
447           printf("ompt_sync_region_reduction should never be passed to "
448                  "on_ompt_callback_sync_region\n");
449           exit(-1);
450           break;
451       }
452       break;
453     case ompt_scope_end:
454       switch(kind)
455       {
456         case ompt_sync_region_barrier:
457         case ompt_sync_region_barrier_implicit:
458         case ompt_sync_region_barrier_explicit:
459         case ompt_sync_region_barrier_implicit_workshare:
460         case ompt_sync_region_barrier_implicit_parallel:
461         case ompt_sync_region_barrier_teams:
462         case ompt_sync_region_barrier_implementation:
463           printf("%" PRIu64 ":" _TOOL_PREFIX
464                  " ompt_event_barrier_end: parallel_id=%" PRIu64
465                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
466                  ompt_get_thread_data()->value,
467                  (parallel_data) ? parallel_data->value : 0, task_data->value,
468                  codeptr_ra);
469           break;
470         case ompt_sync_region_taskwait:
471           printf("%" PRIu64 ":" _TOOL_PREFIX
472                  " ompt_event_taskwait_end: parallel_id=%" PRIu64
473                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
474                  ompt_get_thread_data()->value,
475                  (parallel_data) ? parallel_data->value : 0, task_data->value,
476                  codeptr_ra);
477           break;
478         case ompt_sync_region_taskgroup:
479           printf("%" PRIu64 ":" _TOOL_PREFIX
480                  " ompt_event_taskgroup_end: parallel_id=%" PRIu64
481                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
482                  ompt_get_thread_data()->value,
483                  (parallel_data) ? parallel_data->value : 0, task_data->value,
484                  codeptr_ra);
485           break;
486         case ompt_sync_region_reduction:
487           printf("ompt_sync_region_reduction should never be passed to "
488                  "on_ompt_callback_sync_region\n");
489           exit(-1);
490           break;
491       }
492       break;
493     case ompt_scope_beginend:
494       printf("ompt_scope_beginend should never be passed to %s\n", __func__);
495       exit(-1);
496   }
497 }
498 
499 static void
on_ompt_callback_sync_region_wait(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)500 on_ompt_callback_sync_region_wait(
501   ompt_sync_region_t kind,
502   ompt_scope_endpoint_t endpoint,
503   ompt_data_t *parallel_data,
504   ompt_data_t *task_data,
505   const void *codeptr_ra)
506 {
507   switch(endpoint)
508   {
509     case ompt_scope_begin:
510       switch(kind)
511       {
512         case ompt_sync_region_barrier:
513         case ompt_sync_region_barrier_implicit:
514         case ompt_sync_region_barrier_implicit_workshare:
515         case ompt_sync_region_barrier_implicit_parallel:
516         case ompt_sync_region_barrier_teams:
517         case ompt_sync_region_barrier_explicit:
518         case ompt_sync_region_barrier_implementation:
519           printf("%" PRIu64 ":" _TOOL_PREFIX
520                  " ompt_event_wait_barrier_begin: parallel_id=%" PRIu64
521                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
522                  ompt_get_thread_data()->value, parallel_data->value,
523                  task_data->value, codeptr_ra);
524           break;
525         case ompt_sync_region_taskwait:
526           printf("%" PRIu64 ":" _TOOL_PREFIX
527                  " ompt_event_wait_taskwait_begin: parallel_id=%" PRIu64
528                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
529                  ompt_get_thread_data()->value, parallel_data->value,
530                  task_data->value, codeptr_ra);
531           break;
532         case ompt_sync_region_taskgroup:
533           printf("%" PRIu64 ":" _TOOL_PREFIX
534                  " ompt_event_wait_taskgroup_begin: parallel_id=%" PRIu64
535                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
536                  ompt_get_thread_data()->value, parallel_data->value,
537                  task_data->value, codeptr_ra);
538           break;
539         case ompt_sync_region_reduction:
540           printf("ompt_sync_region_reduction should never be passed to "
541                  "on_ompt_callback_sync_region_wait\n");
542           exit(-1);
543           break;
544       }
545       break;
546     case ompt_scope_end:
547       switch(kind)
548       {
549         case ompt_sync_region_barrier:
550         case ompt_sync_region_barrier_implicit:
551         case ompt_sync_region_barrier_implicit_workshare:
552         case ompt_sync_region_barrier_implicit_parallel:
553         case ompt_sync_region_barrier_teams:
554         case ompt_sync_region_barrier_explicit:
555         case ompt_sync_region_barrier_implementation:
556           printf("%" PRIu64 ":" _TOOL_PREFIX
557                  " ompt_event_wait_barrier_end: parallel_id=%" PRIu64
558                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
559                  ompt_get_thread_data()->value,
560                  (parallel_data) ? parallel_data->value : 0, task_data->value,
561                  codeptr_ra);
562           break;
563         case ompt_sync_region_taskwait:
564           printf("%" PRIu64 ":" _TOOL_PREFIX
565                  " ompt_event_wait_taskwait_end: parallel_id=%" PRIu64
566                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
567                  ompt_get_thread_data()->value,
568                  (parallel_data) ? parallel_data->value : 0, task_data->value,
569                  codeptr_ra);
570           break;
571         case ompt_sync_region_taskgroup:
572           printf("%" PRIu64 ":" _TOOL_PREFIX
573                  " ompt_event_wait_taskgroup_end: parallel_id=%" PRIu64
574                  ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
575                  ompt_get_thread_data()->value,
576                  (parallel_data) ? parallel_data->value : 0, task_data->value,
577                  codeptr_ra);
578           break;
579         case ompt_sync_region_reduction:
580           printf("ompt_sync_region_reduction should never be passed to "
581                  "on_ompt_callback_sync_region_wait\n");
582           exit(-1);
583           break;
584       }
585       break;
586     case ompt_scope_beginend:
587       printf("ompt_scope_beginend should never be passed to %s\n", __func__);
588       exit(-1);
589   }
590 }
591 
on_ompt_callback_reduction(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)592 static void on_ompt_callback_reduction(ompt_sync_region_t kind,
593                                        ompt_scope_endpoint_t endpoint,
594                                        ompt_data_t *parallel_data,
595                                        ompt_data_t *task_data,
596                                        const void *codeptr_ra) {
597   switch (endpoint) {
598   case ompt_scope_begin:
599     printf("%" PRIu64 ":" _TOOL_PREFIX
600            " ompt_event_reduction_begin: parallel_id=%" PRIu64
601            ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
602            ompt_get_thread_data()->value,
603            (parallel_data) ? parallel_data->value : 0, task_data->value,
604            codeptr_ra);
605     break;
606   case ompt_scope_end:
607     printf("%" PRIu64 ":" _TOOL_PREFIX
608            " ompt_event_reduction_end: parallel_id=%" PRIu64
609            ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
610            ompt_get_thread_data()->value,
611            (parallel_data) ? parallel_data->value : 0, task_data->value,
612            codeptr_ra);
613     break;
614   case ompt_scope_beginend:
615     printf("ompt_scope_beginend should never be passed to %s\n", __func__);
616     exit(-1);
617   }
618 }
619 
620 static void
on_ompt_callback_flush(ompt_data_t * thread_data,const void * codeptr_ra)621 on_ompt_callback_flush(
622     ompt_data_t *thread_data,
623     const void *codeptr_ra)
624 {
625   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_flush: codeptr_ra=%p\n",
626          thread_data->value, codeptr_ra);
627 }
628 
629 static void
on_ompt_callback_cancel(ompt_data_t * task_data,int flags,const void * codeptr_ra)630 on_ompt_callback_cancel(
631     ompt_data_t *task_data,
632     int flags,
633     const void *codeptr_ra)
634 {
635   const char* first_flag_value;
636   const char* second_flag_value;
637   if(flags & ompt_cancel_parallel)
638     first_flag_value = ompt_cancel_flag_t_values[0];
639   else if(flags & ompt_cancel_sections)
640     first_flag_value = ompt_cancel_flag_t_values[1];
641   else if(flags & ompt_cancel_loop)
642     first_flag_value = ompt_cancel_flag_t_values[2];
643   else if(flags & ompt_cancel_taskgroup)
644     first_flag_value = ompt_cancel_flag_t_values[3];
645 
646   if(flags & ompt_cancel_activated)
647     second_flag_value = ompt_cancel_flag_t_values[4];
648   else if(flags & ompt_cancel_detected)
649     second_flag_value = ompt_cancel_flag_t_values[5];
650   else if(flags & ompt_cancel_discarded_task)
651     second_flag_value = ompt_cancel_flag_t_values[6];
652 
653   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_cancel: task_data=%" PRIu64
654          ", flags=%s|%s=%" PRIu32 ", codeptr_ra=%p\n",
655          ompt_get_thread_data()->value, task_data->value, first_flag_value,
656          second_flag_value, flags, codeptr_ra);
657 }
658 
659 static void
on_ompt_callback_implicit_task(ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,unsigned int team_size,unsigned int thread_num,int flags)660 on_ompt_callback_implicit_task(
661     ompt_scope_endpoint_t endpoint,
662     ompt_data_t *parallel_data,
663     ompt_data_t *task_data,
664     unsigned int team_size,
665     unsigned int thread_num,
666     int flags)
667 {
668   switch(endpoint)
669   {
670     case ompt_scope_begin:
671       if(task_data->ptr)
672         printf("%s\n", "0: task_data initially not null");
673       task_data->value = ompt_get_unique_id();
674 
675       //there is no parallel_begin callback for implicit parallel region
676       //thus it is initialized in initial task
677       if(flags & ompt_task_initial)
678       {
679         char buffer[2048];
680 
681         format_task_type(flags, buffer);
682         // Only check initial task not created by teams construct
683         if (team_size == 1 && thread_num == 1 && parallel_data->ptr)
684           printf("%s\n", "0: parallel_data initially not null");
685         parallel_data->value = ompt_get_unique_id();
686         printf("%" PRIu64 ":" _TOOL_PREFIX
687                " ompt_event_initial_task_begin: parallel_id=%" PRIu64
688                ", task_id=%" PRIu64 ", actual_parallelism=%" PRIu32
689                ", index=%" PRIu32 ", flags=%" PRIu32 "\n",
690                ompt_get_thread_data()->value, parallel_data->value,
691                task_data->value, team_size, thread_num, flags);
692       } else {
693         printf("%" PRIu64 ":" _TOOL_PREFIX
694                " ompt_event_implicit_task_begin: parallel_id=%" PRIu64
695                ", task_id=%" PRIu64 ", team_size=%" PRIu32
696                ", thread_num=%" PRIu32 "\n",
697                ompt_get_thread_data()->value, parallel_data->value,
698                task_data->value, team_size, thread_num);
699       }
700 
701       break;
702     case ompt_scope_end:
703       if(flags & ompt_task_initial){
704         printf("%" PRIu64 ":" _TOOL_PREFIX
705                " ompt_event_initial_task_end: parallel_id=%" PRIu64
706                ", task_id=%" PRIu64 ", actual_parallelism=%" PRIu32
707                ", index=%" PRIu32 "\n",
708                ompt_get_thread_data()->value,
709                (parallel_data) ? parallel_data->value : 0, task_data->value,
710                team_size, thread_num);
711       } else {
712         printf("%" PRIu64 ":" _TOOL_PREFIX
713                " ompt_event_implicit_task_end: parallel_id=%" PRIu64
714                ", task_id=%" PRIu64 ", team_size=%" PRIu32
715                ", thread_num=%" PRIu32 "\n",
716                ompt_get_thread_data()->value,
717                (parallel_data) ? parallel_data->value : 0, task_data->value,
718                team_size, thread_num);
719       }
720       break;
721     case ompt_scope_beginend:
722       printf("ompt_scope_beginend should never be passed to %s\n", __func__);
723       exit(-1);
724   }
725 }
726 
727 static void
on_ompt_callback_lock_init(ompt_mutex_t kind,unsigned int hint,unsigned int impl,ompt_wait_id_t wait_id,const void * codeptr_ra)728 on_ompt_callback_lock_init(
729   ompt_mutex_t kind,
730   unsigned int hint,
731   unsigned int impl,
732   ompt_wait_id_t wait_id,
733   const void *codeptr_ra)
734 {
735   switch(kind)
736   {
737     case ompt_mutex_lock:
738       printf("%" PRIu64 ":" _TOOL_PREFIX
739              " ompt_event_init_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
740              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
741              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
742       break;
743     case ompt_mutex_nest_lock:
744       printf("%" PRIu64 ":" _TOOL_PREFIX
745              " ompt_event_init_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
746              ", impl=%" PRIu32 ", codeptr_ra=%p \n",
747              ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
748       break;
749     default:
750       break;
751   }
752 }
753 
754 static void
on_ompt_callback_lock_destroy(ompt_mutex_t kind,ompt_wait_id_t wait_id,const void * codeptr_ra)755 on_ompt_callback_lock_destroy(
756   ompt_mutex_t kind,
757   ompt_wait_id_t wait_id,
758   const void *codeptr_ra)
759 {
760   switch(kind)
761   {
762     case ompt_mutex_lock:
763       printf("%" PRIu64 ":" _TOOL_PREFIX
764              " ompt_event_destroy_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
765              ompt_get_thread_data()->value, wait_id, codeptr_ra);
766       break;
767     case ompt_mutex_nest_lock:
768       printf("%" PRIu64 ":" _TOOL_PREFIX
769              " ompt_event_destroy_nest_lock: wait_id=%" PRIu64
770              ", codeptr_ra=%p \n",
771              ompt_get_thread_data()->value, wait_id, codeptr_ra);
772       break;
773     default:
774       break;
775   }
776 }
777 
778 static void
on_ompt_callback_work(ompt_work_t wstype,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,uint64_t count,const void * codeptr_ra)779 on_ompt_callback_work(
780   ompt_work_t wstype,
781   ompt_scope_endpoint_t endpoint,
782   ompt_data_t *parallel_data,
783   ompt_data_t *task_data,
784   uint64_t count,
785   const void *codeptr_ra)
786 {
787   switch(endpoint)
788   {
789     case ompt_scope_begin:
790       switch(wstype)
791       {
792         case ompt_work_loop:
793         case ompt_work_loop_static:
794         case ompt_work_loop_dynamic:
795         case ompt_work_loop_guided:
796         case ompt_work_loop_other:
797         // TODO: add schedule attribute for the different work_loop types.
798         // e.g., ", schedule=%s", ..., ompt_schedule_values[wstype]
799           printf("%" PRIu64 ":" _TOOL_PREFIX
800                  " ompt_event_loop_begin: parallel_id=%" PRIu64
801                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
802                  "\n",
803                  ompt_get_thread_data()->value, parallel_data->value,
804                  task_data->value, codeptr_ra, count);
805           break;
806         case ompt_work_sections:
807           printf("%" PRIu64 ":" _TOOL_PREFIX
808                  " ompt_event_sections_begin: parallel_id=%" PRIu64
809                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
810                  "\n",
811                  ompt_get_thread_data()->value, parallel_data->value,
812                  task_data->value, codeptr_ra, count);
813           break;
814         case ompt_work_single_executor:
815           printf("%" PRIu64 ":" _TOOL_PREFIX
816                  " ompt_event_single_in_block_begin: parallel_id=%" PRIu64
817                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
818                  "\n",
819                  ompt_get_thread_data()->value, parallel_data->value,
820                  task_data->value, codeptr_ra, count);
821           break;
822         case ompt_work_single_other:
823           printf("%" PRIu64 ":" _TOOL_PREFIX
824                  " ompt_event_single_others_begin: parallel_id=%" PRIu64
825                  ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
826                  ompt_get_thread_data()->value, parallel_data->value,
827                  task_data->value, codeptr_ra, count);
828           break;
829         case ompt_work_workshare:
830           //impl
831           break;
832         case ompt_work_distribute:
833           printf("%" PRIu64 ":" _TOOL_PREFIX
834                  " ompt_event_distribute_begin: parallel_id=%" PRIu64
835                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
836                  "\n",
837                  ompt_get_thread_data()->value, parallel_data->value,
838                  task_data->value, codeptr_ra, count);
839           break;
840         case ompt_work_taskloop:
841           //impl
842           printf("%" PRIu64 ":" _TOOL_PREFIX
843                  " ompt_event_taskloop_begin: parallel_id=%" PRIu64
844                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
845                  "\n",
846                  ompt_get_thread_data()->value, parallel_data->value,
847                  task_data->value, codeptr_ra, count);
848           break;
849         case ompt_work_scope:
850           printf("%" PRIu64 ":" _TOOL_PREFIX
851                  " ompt_event_scope_begin: parallel_id=%" PRIu64
852                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
853                  "\n",
854                  ompt_get_thread_data()->value, parallel_data->value,
855                  task_data->value, codeptr_ra, count);
856           break;
857       }
858       break;
859     case ompt_scope_end:
860       switch(wstype)
861       {
862         case ompt_work_loop:
863         case ompt_work_loop_static:
864         case ompt_work_loop_dynamic:
865         case ompt_work_loop_guided:
866         case ompt_work_loop_other:
867           printf("%" PRIu64 ":" _TOOL_PREFIX
868                  " ompt_event_loop_end: parallel_id=%" PRIu64
869                  ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
870                  ompt_get_thread_data()->value, parallel_data->value,
871                  task_data->value, codeptr_ra, count);
872           break;
873         case ompt_work_sections:
874           printf("%" PRIu64 ":" _TOOL_PREFIX
875                  " ompt_event_sections_end: parallel_id=%" PRIu64
876                  ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
877                  ompt_get_thread_data()->value, parallel_data->value,
878                  task_data->value, codeptr_ra, count);
879           break;
880         case ompt_work_single_executor:
881           printf("%" PRIu64 ":" _TOOL_PREFIX
882                  " ompt_event_single_in_block_end: parallel_id=%" PRIu64
883                  ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
884                  ompt_get_thread_data()->value, parallel_data->value,
885                  task_data->value, codeptr_ra, count);
886           break;
887         case ompt_work_single_other:
888           printf("%" PRIu64 ":" _TOOL_PREFIX
889                  " ompt_event_single_others_end: parallel_id=%" PRIu64
890                  ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
891                  ompt_get_thread_data()->value, parallel_data->value,
892                  task_data->value, codeptr_ra, count);
893           break;
894         case ompt_work_workshare:
895           //impl
896           break;
897         case ompt_work_distribute:
898           printf("%" PRIu64 ":" _TOOL_PREFIX
899                  " ompt_event_distribute_end: parallel_id=%" PRIu64
900                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
901                  "\n",
902                  ompt_get_thread_data()->value, parallel_data->value,
903                  task_data->value, codeptr_ra, count);
904           break;
905         case ompt_work_taskloop:
906           //impl
907           printf("%" PRIu64 ":" _TOOL_PREFIX
908                  " ompt_event_taskloop_end: parallel_id=%" PRIu64
909                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
910                  "\n",
911                  ompt_get_thread_data()->value, parallel_data->value,
912                  task_data->value, codeptr_ra, count);
913           break;
914         case ompt_work_scope:
915           printf("%" PRIu64 ":" _TOOL_PREFIX
916                  " ompt_event_scope_end: parallel_id=%" PRIu64
917                  ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
918                  "\n",
919                  ompt_get_thread_data()->value, parallel_data->value,
920                  task_data->value, codeptr_ra, count);
921           break;
922       }
923       break;
924     case ompt_scope_beginend:
925       printf("ompt_scope_beginend should never be passed to %s\n", __func__);
926       exit(-1);
927   }
928 }
929 
on_ompt_callback_dispatch(ompt_data_t * parallel_data,ompt_data_t * task_data,ompt_dispatch_t kind,ompt_data_t instance)930 static void on_ompt_callback_dispatch(
931     ompt_data_t *parallel_data,
932     ompt_data_t *task_data,
933     ompt_dispatch_t kind,
934     ompt_data_t instance) {
935   char *event_name = NULL;
936   void *codeptr_ra = NULL;
937   ompt_dispatch_chunk_t *dispatch_chunk = NULL;
938   switch (kind) {
939   case ompt_dispatch_section:
940     event_name = "ompt_event_section_begin";
941     codeptr_ra = instance.ptr;
942     break;
943   case ompt_dispatch_ws_loop_chunk:
944     event_name = "ompt_event_ws_loop_chunk_begin";
945     dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
946     break;
947   case ompt_dispatch_taskloop_chunk:
948     event_name = "ompt_event_taskloop_chunk_begin";
949     dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
950     break;
951   case ompt_dispatch_distribute_chunk:
952     event_name = "ompt_event_distribute_chunk_begin";
953     dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
954     break;
955   default:
956     event_name = "ompt_ws_loop_iteration_begin";
957   }
958   printf("%" PRIu64 ":" _TOOL_PREFIX
959          " %s: parallel_id=%" PRIu64 ", task_id=%" PRIu64
960          ", codeptr_ra=%p, chunk_start=%" PRIu64 ", chunk_iterations=%" PRIu64
961          "\n", ompt_get_thread_data()->value, event_name, parallel_data->value,
962          task_data->value, codeptr_ra,
963          dispatch_chunk ? dispatch_chunk->start : 0,
964          dispatch_chunk ? dispatch_chunk->iterations : 0);
965 }
966 
on_ompt_callback_masked(ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)967 static void on_ompt_callback_masked(ompt_scope_endpoint_t endpoint,
968                                     ompt_data_t *parallel_data,
969                                     ompt_data_t *task_data,
970                                     const void *codeptr_ra) {
971   switch(endpoint)
972   {
973     case ompt_scope_begin:
974       printf("%" PRIu64 ":" _TOOL_PREFIX
975              " ompt_event_masked_begin: parallel_id=%" PRIu64
976              ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
977              ompt_get_thread_data()->value, parallel_data->value,
978              task_data->value, codeptr_ra);
979       break;
980     case ompt_scope_end:
981       printf("%" PRIu64 ":" _TOOL_PREFIX
982              " ompt_event_masked_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64
983              ", codeptr_ra=%p\n",
984              ompt_get_thread_data()->value, parallel_data->value,
985              task_data->value, codeptr_ra);
986       break;
987     case ompt_scope_beginend:
988       printf("ompt_scope_beginend should never be passed to %s\n", __func__);
989       exit(-1);
990   }
991 }
992 
on_ompt_callback_parallel_begin(ompt_data_t * encountering_task_data,const ompt_frame_t * encountering_task_frame,ompt_data_t * parallel_data,uint32_t requested_team_size,int flag,const void * codeptr_ra)993 static void on_ompt_callback_parallel_begin(
994     ompt_data_t *encountering_task_data,
995     const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data,
996     uint32_t requested_team_size, int flag, const void *codeptr_ra) {
997   if(parallel_data->ptr)
998     printf("0: parallel_data initially not null\n");
999   parallel_data->value = ompt_get_unique_id();
1000   int invoker = flag & 0xF;
1001   const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams";
1002   const char *size = (flag & ompt_parallel_team) ? "team_size" : "num_teams";
1003   printf("%" PRIu64 ":" _TOOL_PREFIX
1004          " ompt_event_%s_begin: parent_task_id=%" PRIu64
1005          ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
1006          "parallel_id=%" PRIu64 ", requested_%s=%" PRIu32
1007          ", codeptr_ra=%p, invoker=%d\n",
1008          ompt_get_thread_data()->value, event, encountering_task_data->value,
1009          encountering_task_frame->exit_frame.ptr,
1010          encountering_task_frame->enter_frame.ptr, parallel_data->value, size,
1011          requested_team_size, codeptr_ra, invoker);
1012 }
1013 
on_ompt_callback_parallel_end(ompt_data_t * parallel_data,ompt_data_t * encountering_task_data,int flag,const void * codeptr_ra)1014 static void on_ompt_callback_parallel_end(ompt_data_t *parallel_data,
1015                                           ompt_data_t *encountering_task_data,
1016                                           int flag, const void *codeptr_ra) {
1017   int invoker = flag & 0xF;
1018   const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams";
1019   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_%s_end: parallel_id=%" PRIu64
1020          ", task_id=%" PRIu64 ", invoker=%d, codeptr_ra=%p\n",
1021          ompt_get_thread_data()->value, event, parallel_data->value,
1022          encountering_task_data->value, invoker, codeptr_ra);
1023 }
1024 
1025 static void
on_ompt_callback_task_create(ompt_data_t * encountering_task_data,const ompt_frame_t * encountering_task_frame,ompt_data_t * new_task_data,int type,int has_dependences,const void * codeptr_ra)1026 on_ompt_callback_task_create(
1027     ompt_data_t *encountering_task_data,
1028     const ompt_frame_t *encountering_task_frame,
1029     ompt_data_t* new_task_data,
1030     int type,
1031     int has_dependences,
1032     const void *codeptr_ra)
1033 {
1034   if(new_task_data->ptr)
1035     printf("0: new_task_data initially not null\n");
1036   new_task_data->value = ompt_get_unique_id();
1037   char buffer[2048];
1038 
1039   format_task_type(type, buffer);
1040 
1041   printf(
1042       "%" PRIu64 ":" _TOOL_PREFIX
1043       " ompt_event_task_create: parent_task_id=%" PRIu64
1044       ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
1045       "new_task_id=%" PRIu64
1046       ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n",
1047       ompt_get_thread_data()->value,
1048       encountering_task_data ? encountering_task_data->value : 0,
1049       encountering_task_frame ? encountering_task_frame->exit_frame.ptr : NULL,
1050       encountering_task_frame ? encountering_task_frame->enter_frame.ptr : NULL,
1051       new_task_data->value, codeptr_ra, buffer, type,
1052       has_dependences ? "yes" : "no");
1053 }
1054 
1055 static void
on_ompt_callback_task_schedule(ompt_data_t * first_task_data,ompt_task_status_t prior_task_status,ompt_data_t * second_task_data)1056 on_ompt_callback_task_schedule(
1057     ompt_data_t *first_task_data,
1058     ompt_task_status_t prior_task_status,
1059     ompt_data_t *second_task_data)
1060 {
1061   printf("%" PRIu64 ":" _TOOL_PREFIX
1062          " ompt_event_task_schedule: first_task_id=%" PRIu64
1063          ", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n",
1064          ompt_get_thread_data()->value, first_task_data->value,
1065          (second_task_data ? second_task_data->value : -1),
1066          ompt_task_status_t_values[prior_task_status], prior_task_status);
1067   if (prior_task_status == ompt_task_complete ||
1068       prior_task_status == ompt_task_late_fulfill ||
1069       prior_task_status == ompt_taskwait_complete) {
1070     printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_task_end: task_id=%" PRIu64
1071            "\n", ompt_get_thread_data()->value, first_task_data->value);
1072   }
1073 }
1074 
1075 static void
on_ompt_callback_dependences(ompt_data_t * task_data,const ompt_dependence_t * deps,int ndeps)1076 on_ompt_callback_dependences(
1077   ompt_data_t *task_data,
1078   const ompt_dependence_t *deps,
1079   int ndeps)
1080 {
1081   char buffer[2048];
1082   char *progress = buffer;
1083   int i;
1084   for (i = 0; i < ndeps && progress < buffer + 2000; i++) {
1085     if (deps[i].dependence_type == ompt_dependence_type_source ||
1086         deps[i].dependence_type == ompt_dependence_type_sink)
1087       progress +=
1088           sprintf(progress, "(%" PRIu64 ", %s), ", deps[i].variable.value,
1089                   ompt_dependence_type_t_values[deps[i].dependence_type]);
1090     else
1091       progress +=
1092           sprintf(progress, "(%p, %s), ", deps[i].variable.ptr,
1093                   ompt_dependence_type_t_values[deps[i].dependence_type]);
1094   }
1095   if (ndeps > 0)
1096     progress[-2] = 0;
1097   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_dependences: task_id=%" PRIu64
1098          ", deps=[%s], ndeps=%d\n",
1099          ompt_get_thread_data()->value, task_data->value, buffer, ndeps);
1100 }
1101 
1102 static void
on_ompt_callback_task_dependence(ompt_data_t * first_task_data,ompt_data_t * second_task_data)1103 on_ompt_callback_task_dependence(
1104   ompt_data_t *first_task_data,
1105   ompt_data_t *second_task_data)
1106 {
1107   printf("%" PRIu64 ":" _TOOL_PREFIX
1108          " ompt_event_task_dependence_pair: first_task_id=%" PRIu64
1109          ", second_task_id=%" PRIu64 "\n",
1110          ompt_get_thread_data()->value, first_task_data->value,
1111          second_task_data->value);
1112 }
1113 
1114 static void
on_ompt_callback_thread_begin(ompt_thread_t thread_type,ompt_data_t * thread_data)1115 on_ompt_callback_thread_begin(
1116   ompt_thread_t thread_type,
1117   ompt_data_t *thread_data)
1118 {
1119   if(thread_data->ptr)
1120     printf("%s\n", "0: thread_data initially not null");
1121   thread_data->value = ompt_get_unique_id();
1122   printf("%" PRIu64 ":" _TOOL_PREFIX
1123          " ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n",
1124          ompt_get_thread_data()->value, ompt_thread_t_values[thread_type],
1125          thread_type, thread_data->value);
1126 }
1127 
1128 static void
on_ompt_callback_thread_end(ompt_data_t * thread_data)1129 on_ompt_callback_thread_end(
1130   ompt_data_t *thread_data)
1131 {
1132   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_thread_end: thread_id=%" PRIu64
1133          "\n",
1134          ompt_get_thread_data()->value, thread_data->value);
1135 }
1136 
1137 static int
on_ompt_callback_control_tool(uint64_t command,uint64_t modifier,void * arg,const void * codeptr_ra)1138 on_ompt_callback_control_tool(
1139   uint64_t command,
1140   uint64_t modifier,
1141   void *arg,
1142   const void *codeptr_ra)
1143 {
1144   ompt_frame_t* omptTaskFrame;
1145   ompt_get_task_info(0, NULL, (ompt_data_t**) NULL, &omptTaskFrame, NULL, NULL);
1146   printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_control_tool: command=%" PRIu64
1147          ", modifier=%" PRIu64
1148          ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, "
1149          "current_task_frame.reenter=%p \n",
1150          ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra,
1151          omptTaskFrame->exit_frame.ptr, omptTaskFrame->enter_frame.ptr);
1152 
1153   // the following would interfere with expected output for OMPT tests, so skip
1154 #ifndef _OMPT_TESTS
1155   // print task data
1156   int task_level = 0;
1157   ompt_data_t *task_data;
1158   while (ompt_get_task_info(task_level, NULL, (ompt_data_t **)&task_data, NULL,
1159                             NULL, NULL)) {
1160     printf("%" PRIu64 ":" _TOOL_PREFIX " task level %d: task_id=%" PRIu64 "\n",
1161            ompt_get_thread_data()->value, task_level, task_data->value);
1162     task_level++;
1163   }
1164 
1165   // print parallel data
1166   int parallel_level = 0;
1167   ompt_data_t *parallel_data;
1168   while (ompt_get_parallel_info(parallel_level, (ompt_data_t **)&parallel_data,
1169                                 NULL)) {
1170     printf("%" PRIu64 ":" _TOOL_PREFIX " parallel level %d: parallel_id=%" PRIu64
1171            "\n",
1172            ompt_get_thread_data()->value, parallel_level, parallel_data->value);
1173     parallel_level++;
1174   }
1175 #endif
1176   return 0; //success
1177 }
1178 
on_ompt_callback_error(ompt_severity_t severity,const char * message,size_t length,const void * codeptr_ra)1179 static void on_ompt_callback_error(ompt_severity_t severity,
1180                                    const char *message, size_t length,
1181                                    const void *codeptr_ra) {
1182   printf("%" PRIu64 ": ompt_event_runtime_error: severity=%" PRIu32
1183          ", message=%s, length=%" PRIu64 ", codeptr_ra=%p\n",
1184          ompt_get_thread_data()->value, severity, message, (uint64_t)length,
1185          codeptr_ra);
1186 }
1187 
ompt_initialize(ompt_function_lookup_t lookup,int initial_device_num,ompt_data_t * tool_data)1188 int ompt_initialize(
1189   ompt_function_lookup_t lookup,
1190   int initial_device_num,
1191   ompt_data_t *tool_data)
1192 {
1193   ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback");
1194   ompt_get_callback = (ompt_get_callback_t) lookup("ompt_get_callback");
1195   ompt_get_state = (ompt_get_state_t) lookup("ompt_get_state");
1196   ompt_get_task_info = (ompt_get_task_info_t) lookup("ompt_get_task_info");
1197   ompt_get_task_memory = (ompt_get_task_memory_t)lookup("ompt_get_task_memory");
1198   ompt_get_thread_data = (ompt_get_thread_data_t) lookup("ompt_get_thread_data");
1199   ompt_get_parallel_info = (ompt_get_parallel_info_t) lookup("ompt_get_parallel_info");
1200   ompt_get_unique_id = (ompt_get_unique_id_t) lookup("ompt_get_unique_id");
1201   ompt_finalize_tool = (ompt_finalize_tool_t)lookup("ompt_finalize_tool");
1202 
1203   ompt_get_unique_id();
1204 
1205   ompt_get_num_procs = (ompt_get_num_procs_t) lookup("ompt_get_num_procs");
1206   ompt_get_num_places = (ompt_get_num_places_t) lookup("ompt_get_num_places");
1207   ompt_get_place_proc_ids = (ompt_get_place_proc_ids_t) lookup("ompt_get_place_proc_ids");
1208   ompt_get_place_num = (ompt_get_place_num_t) lookup("ompt_get_place_num");
1209   ompt_get_partition_place_nums = (ompt_get_partition_place_nums_t) lookup("ompt_get_partition_place_nums");
1210   ompt_get_proc_id = (ompt_get_proc_id_t) lookup("ompt_get_proc_id");
1211   ompt_enumerate_states = (ompt_enumerate_states_t) lookup("ompt_enumerate_states");
1212   ompt_enumerate_mutex_impls = (ompt_enumerate_mutex_impls_t) lookup("ompt_enumerate_mutex_impls");
1213 
1214   register_ompt_callback(ompt_callback_mutex_acquire);
1215   register_ompt_callback_t(ompt_callback_mutex_acquired, ompt_callback_mutex_t);
1216   register_ompt_callback_t(ompt_callback_mutex_released, ompt_callback_mutex_t);
1217   register_ompt_callback(ompt_callback_nest_lock);
1218   register_ompt_callback(ompt_callback_sync_region);
1219   register_ompt_callback_t(ompt_callback_sync_region_wait, ompt_callback_sync_region_t);
1220   register_ompt_callback_t(ompt_callback_reduction, ompt_callback_sync_region_t);
1221   register_ompt_callback(ompt_callback_control_tool);
1222   register_ompt_callback(ompt_callback_flush);
1223   register_ompt_callback(ompt_callback_cancel);
1224   register_ompt_callback(ompt_callback_implicit_task);
1225   register_ompt_callback_t(ompt_callback_lock_init, ompt_callback_mutex_acquire_t);
1226   register_ompt_callback_t(ompt_callback_lock_destroy, ompt_callback_mutex_t);
1227   register_ompt_callback(ompt_callback_work);
1228   register_ompt_callback(ompt_callback_dispatch);
1229   register_ompt_callback(ompt_callback_masked);
1230   register_ompt_callback(ompt_callback_parallel_begin);
1231   register_ompt_callback(ompt_callback_parallel_end);
1232   register_ompt_callback(ompt_callback_task_create);
1233   register_ompt_callback(ompt_callback_task_schedule);
1234   register_ompt_callback(ompt_callback_dependences);
1235   register_ompt_callback(ompt_callback_task_dependence);
1236   register_ompt_callback(ompt_callback_thread_begin);
1237   register_ompt_callback(ompt_callback_thread_end);
1238   register_ompt_callback(ompt_callback_error);
1239   printf("0: NULL_POINTER=%p\n", (void*)NULL);
1240   return 1; //success
1241 }
1242 
ompt_finalize(ompt_data_t * tool_data)1243 void ompt_finalize(ompt_data_t *tool_data)
1244 {
1245   printf("0: ompt_event_runtime_shutdown\n");
1246 }
1247 
1248 #ifdef __cplusplus
1249 extern "C" {
1250 #endif
ompt_start_tool(unsigned int omp_version,const char * runtime_version)1251 ompt_start_tool_result_t* ompt_start_tool(
1252   unsigned int omp_version,
1253   const char *runtime_version)
1254 {
1255   static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0};
1256   return &ompt_start_tool_result;
1257 }
1258 #ifdef __cplusplus
1259 }
1260 #endif
1261 #endif // ifndef USE_PRIVATE_TOOL
1262 #ifdef _OMPT_TESTS
1263 #undef _OMPT_TESTS
1264 #endif
1265