1 #ifndef _BSD_SOURCE
2 #define _BSD_SOURCE
3 #endif
4 #ifndef _DEFAULT_SOURCE
5 #define _DEFAULT_SOURCE
6 #endif
7 #include <stdio.h>
8 #ifndef __STDC_FORMAT_MACROS
9 #define __STDC_FORMAT_MACROS
10 #endif
11 #include <inttypes.h>
12 #include <omp.h>
13 #include <omp-tools.h>
14 #include "ompt-signal.h"
15
16 // Used to detect architecture
17 #include "../../src/kmp_platform.h"
18
19 #ifndef _TOOL_PREFIX
20 #define _TOOL_PREFIX ""
21 // If no _TOOL_PREFIX is set, we assume that we run as part of an OMPT test
22 #define _OMPT_TESTS
23 #endif
24
25 static const char *ompt_thread_t_values[] = {
26 "ompt_thread_UNDEFINED", "ompt_thread_initial", "ompt_thread_worker",
27 "ompt_thread_other"};
28
29 static const char *ompt_task_status_t_values[] = {
30 "ompt_task_UNDEFINED",
31 "ompt_task_complete", // 1
32 "ompt_task_yield", // 2
33 "ompt_task_cancel", // 3
34 "ompt_task_detach", // 4
35 "ompt_task_early_fulfill", // 5
36 "ompt_task_late_fulfill", // 6
37 "ompt_task_switch", // 7
38 "ompt_taskwait_complete" // 8
39 };
40 static const char* ompt_cancel_flag_t_values[] = {
41 "ompt_cancel_parallel",
42 "ompt_cancel_sections",
43 "ompt_cancel_loop",
44 "ompt_cancel_taskgroup",
45 "ompt_cancel_activated",
46 "ompt_cancel_detected",
47 "ompt_cancel_discarded_task"
48 };
49
50 static const char *ompt_dependence_type_t_values[] = {
51 "ompt_dependence_type_UNDEFINED",
52 "ompt_dependence_type_in", // 1
53 "ompt_dependence_type_out", // 2
54 "ompt_dependence_type_inout", // 3
55 "ompt_dependence_type_mutexinoutset", // 4
56 "ompt_dependence_type_source", // 5
57 "ompt_dependence_type_sink", // 6
58 "ompt_dependence_type_inoutset" // 7
59 };
60
format_task_type(int type,char * buffer)61 static void format_task_type(int type, char *buffer) {
62 char *progress = buffer;
63 if (type & ompt_task_initial)
64 progress += sprintf(progress, "ompt_task_initial");
65 if (type & ompt_task_implicit)
66 progress += sprintf(progress, "ompt_task_implicit");
67 if (type & ompt_task_explicit)
68 progress += sprintf(progress, "ompt_task_explicit");
69 if (type & ompt_task_target)
70 progress += sprintf(progress, "ompt_task_target");
71 if (type & ompt_task_taskwait)
72 progress += sprintf(progress, "ompt_task_taskwait");
73 if (type & ompt_task_undeferred)
74 progress += sprintf(progress, "|ompt_task_undeferred");
75 if (type & ompt_task_untied)
76 progress += sprintf(progress, "|ompt_task_untied");
77 if (type & ompt_task_final)
78 progress += sprintf(progress, "|ompt_task_final");
79 if (type & ompt_task_mergeable)
80 progress += sprintf(progress, "|ompt_task_mergeable");
81 if (type & ompt_task_merged)
82 progress += sprintf(progress, "|ompt_task_merged");
83 }
84
85 static ompt_set_callback_t ompt_set_callback;
86 static ompt_get_callback_t ompt_get_callback;
87 static ompt_get_state_t ompt_get_state;
88 static ompt_get_task_info_t ompt_get_task_info;
89 static ompt_get_task_memory_t ompt_get_task_memory;
90 static ompt_get_thread_data_t ompt_get_thread_data;
91 static ompt_get_parallel_info_t ompt_get_parallel_info;
92 static ompt_get_unique_id_t ompt_get_unique_id;
93 static ompt_finalize_tool_t ompt_finalize_tool;
94 static ompt_get_num_procs_t ompt_get_num_procs;
95 static ompt_get_num_places_t ompt_get_num_places;
96 static ompt_get_place_proc_ids_t ompt_get_place_proc_ids;
97 static ompt_get_place_num_t ompt_get_place_num;
98 static ompt_get_partition_place_nums_t ompt_get_partition_place_nums;
99 static ompt_get_proc_id_t ompt_get_proc_id;
100 static ompt_enumerate_states_t ompt_enumerate_states;
101 static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls;
102
print_ids(int level)103 static void print_ids(int level)
104 {
105 int task_type, thread_num;
106 ompt_frame_t *frame;
107 ompt_data_t *task_parallel_data;
108 ompt_data_t *task_data;
109 int exists_task = ompt_get_task_info(level, &task_type, &task_data, &frame,
110 &task_parallel_data, &thread_num);
111 char buffer[2048];
112 format_task_type(task_type, buffer);
113 if (frame)
114 printf("%" PRIu64 ": task level %d: parallel_id=%" PRIu64
115 ", task_id=%" PRIu64 ", exit_frame=%p, reenter_frame=%p, "
116 "task_type=%s=%d, thread_num=%d\n",
117 ompt_get_thread_data()->value, level,
118 exists_task ? task_parallel_data->value : 0,
119 exists_task ? task_data->value : 0, frame->exit_frame.ptr,
120 frame->enter_frame.ptr, buffer, task_type, thread_num);
121 }
122
123 #define get_frame_address(level) __builtin_frame_address(level)
124
125 #define print_frame(level) \
126 printf("%" PRIu64 ": __builtin_frame_address(%d)=%p\n", \
127 ompt_get_thread_data()->value, level, get_frame_address(level))
128
129 // clang (version 5.0 and above) adds an intermediate function call with debug flag (-g)
130 #if defined(TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN)
131 #if defined(DEBUG) && defined(__clang__) && __clang_major__ >= 5
132 #define print_frame_from_outlined_fn(level) print_frame(level+1)
133 #else
134 #define print_frame_from_outlined_fn(level) print_frame(level)
135 #endif
136
137 #if defined(__clang__) && __clang_major__ >= 5
138 #warning "Clang 5.0 and later add an additional wrapper for outlined functions when compiling with debug information."
139 #warning "Please define -DDEBUG iff you manually pass in -g to make the tests succeed!"
140 #endif
141 #endif
142
143 // This macro helps to define a label at the current position that can be used
144 // to get the current address in the code.
145 //
146 // For print_current_address():
147 // To reliably determine the offset between the address of the label and the
148 // actual return address, we insert a NOP instruction as a jump target as the
149 // compiler would otherwise insert an instruction that we can't control. The
150 // instruction length is target dependent and is explained below.
151 //
152 // (The empty block between "#pragma omp ..." and the __asm__ statement is a
153 // workaround for a bug in the Intel Compiler.)
154 #define define_ompt_label(id) \
155 {} \
156 __asm__("nop"); \
157 ompt_label_##id:
158
159 // This macro helps to get the address of a label that is inserted by the above
160 // macro define_ompt_label(). The address is obtained with a GNU extension
161 // (&&label) that has been tested with gcc, clang and icc.
162 #define get_ompt_label_address(id) (&& ompt_label_##id)
163
164 // This macro prints the exact address that a previously called runtime function
165 // returns to.
166 #define print_current_address(id) \
167 define_ompt_label(id) \
168 print_possible_return_addresses(get_ompt_label_address(id))
169
170 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
171 // On X86 the NOP instruction is 1 byte long. In addition, the compiler inserts
172 // a MOV instruction for non-void runtime functions which is 3 bytes long.
173 #define print_possible_return_addresses(addr) \
174 printf("%" PRIu64 ": current_address=%p or %p for non-void functions\n", \
175 ompt_get_thread_data()->value, ((char *)addr) - 1, ((char *)addr) - 4)
176 #elif KMP_ARCH_PPC64
177 // On Power the NOP instruction is 4 bytes long. In addition, the compiler
178 // inserts a second NOP instruction (another 4 bytes). For non-void runtime
179 // functions Clang inserts a STW instruction (but only if compiling under
180 // -fno-PIC which will be the default with Clang 8.0, another 4 bytes).
181 #define print_possible_return_addresses(addr) \
182 printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, \
183 ((char *)addr) - 8, ((char *)addr) - 12)
184 #elif KMP_ARCH_AARCH64
185 // On AArch64 the NOP instruction is 4 bytes long, can be followed by inserted
186 // store instruction (another 4 bytes long).
187 #define print_possible_return_addresses(addr) \
188 printf("%" PRIu64 ": current_address=%p or %p\n", ompt_get_thread_data()->value, \
189 ((char *)addr) - 4, ((char *)addr) - 8)
190 #elif KMP_ARCH_RISCV64
191 #if __riscv_compressed
192 // On RV64GC the C.NOP instruction is 2 byte long. In addition, the compiler
193 // inserts a J instruction (targeting the successor basic block), which
194 // accounts for another 4 bytes. Finally, an additional J instruction may
195 // appear (adding 4 more bytes) when the C.NOP is referenced elsewhere (ie.
196 // another branch).
197 #define print_possible_return_addresses(addr) \
198 printf("%" PRIu64 ": current_address=%p or %p\n", \
199 ompt_get_thread_data()->value, ((char *)addr) - 6, ((char *)addr) - 10)
200 #else
201 // On RV64G the NOP instruction is 4 byte long. In addition, the compiler
202 // inserts a J instruction (targeting the successor basic block), which
203 // accounts for another 4 bytes. Finally, an additional J instruction may
204 // appear (adding 4 more bytes) when the NOP is referenced elsewhere (ie.
205 // another branch).
206 #define print_possible_return_addresses(addr) \
207 printf("%" PRIu64 ": current_address=%p or %p\n", \
208 ompt_get_thread_data()->value, ((char *)addr) - 8, ((char *)addr) - 12)
209 #endif
210 #else
211 #error Unsupported target architecture, cannot determine address offset!
212 #endif
213
214
215 // This macro performs a somewhat similar job to print_current_address(), except
216 // that it discards a certain number of nibbles from the address and only prints
217 // the most significant bits / nibbles. This can be used for cases where the
218 // return address can only be approximated.
219 //
220 // To account for overflows (ie the most significant bits / nibbles have just
221 // changed as we are a few bytes above the relevant power of two) the addresses
222 // of the "current" and of the "previous block" are printed.
223 #define print_fuzzy_address(id) \
224 define_ompt_label(id) \
225 print_fuzzy_address_blocks(get_ompt_label_address(id))
226
227 // If you change this define you need to adapt all capture patterns in the tests
228 // to include or discard the new number of nibbles!
229 #define FUZZY_ADDRESS_DISCARD_NIBBLES 2
230 #define FUZZY_ADDRESS_DISCARD_BYTES (1 << ((FUZZY_ADDRESS_DISCARD_NIBBLES) * 4))
231 #define print_fuzzy_address_blocks(addr) \
232 printf("%" PRIu64 ": fuzzy_address=0x%" PRIx64 " or 0x%" PRIx64 \
233 " or 0x%" PRIx64 " or 0x%" PRIx64 " (%p)\n", \
234 ompt_get_thread_data()->value, \
235 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES - 1, \
236 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES, \
237 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 1, \
238 ((uint64_t)addr) / FUZZY_ADDRESS_DISCARD_BYTES + 2, addr)
239
240 #define register_ompt_callback_t(name, type) \
241 do { \
242 type f_##name = &on_##name; \
243 if (ompt_set_callback(name, (ompt_callback_t)f_##name) == ompt_set_never) \
244 printf("0: Could not register callback '" #name "'\n"); \
245 } while (0)
246
247 #define register_ompt_callback(name) register_ompt_callback_t(name, name##_t)
248
249 #ifndef USE_PRIVATE_TOOL
250 static void
on_ompt_callback_mutex_acquire(ompt_mutex_t kind,unsigned int hint,unsigned int impl,ompt_wait_id_t wait_id,const void * codeptr_ra)251 on_ompt_callback_mutex_acquire(
252 ompt_mutex_t kind,
253 unsigned int hint,
254 unsigned int impl,
255 ompt_wait_id_t wait_id,
256 const void *codeptr_ra)
257 {
258 switch(kind)
259 {
260 case ompt_mutex_lock:
261 printf("%" PRIu64 ":" _TOOL_PREFIX
262 " ompt_event_wait_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
263 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
264 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
265 break;
266 case ompt_mutex_nest_lock:
267 printf("%" PRIu64 ":" _TOOL_PREFIX
268 " ompt_event_wait_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
269 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
270 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
271 break;
272 case ompt_mutex_critical:
273 printf("%" PRIu64 ":" _TOOL_PREFIX
274 " ompt_event_wait_critical: wait_id=%" PRIu64 ", hint=%" PRIu32
275 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
276 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
277 break;
278 case ompt_mutex_atomic:
279 printf("%" PRIu64 ":" _TOOL_PREFIX
280 " ompt_event_wait_atomic: wait_id=%" PRIu64 ", hint=%" PRIu32
281 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
282 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
283 break;
284 case ompt_mutex_ordered:
285 printf("%" PRIu64 ":" _TOOL_PREFIX
286 " ompt_event_wait_ordered: wait_id=%" PRIu64 ", hint=%" PRIu32
287 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
288 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
289 break;
290 default:
291 break;
292 }
293 }
294
295 static void
on_ompt_callback_mutex_acquired(ompt_mutex_t kind,ompt_wait_id_t wait_id,const void * codeptr_ra)296 on_ompt_callback_mutex_acquired(
297 ompt_mutex_t kind,
298 ompt_wait_id_t wait_id,
299 const void *codeptr_ra)
300 {
301 switch(kind)
302 {
303 case ompt_mutex_lock:
304 printf("%" PRIu64 ":" _TOOL_PREFIX
305 " ompt_event_acquired_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
306 ompt_get_thread_data()->value, wait_id, codeptr_ra);
307 break;
308 case ompt_mutex_nest_lock:
309 printf("%" PRIu64 ":" _TOOL_PREFIX
310 " ompt_event_acquired_nest_lock_first: wait_id=%" PRIu64
311 ", codeptr_ra=%p \n",
312 ompt_get_thread_data()->value, wait_id, codeptr_ra);
313 break;
314 case ompt_mutex_critical:
315 printf("%" PRIu64 ":" _TOOL_PREFIX
316 " ompt_event_acquired_critical: wait_id=%" PRIu64
317 ", codeptr_ra=%p \n",
318 ompt_get_thread_data()->value, wait_id, codeptr_ra);
319 break;
320 case ompt_mutex_atomic:
321 printf("%" PRIu64 ":" _TOOL_PREFIX
322 " ompt_event_acquired_atomic: wait_id=%" PRIu64
323 ", codeptr_ra=%p \n",
324 ompt_get_thread_data()->value, wait_id, codeptr_ra);
325 break;
326 case ompt_mutex_ordered:
327 printf("%" PRIu64 ":" _TOOL_PREFIX
328 " ompt_event_acquired_ordered: wait_id=%" PRIu64
329 ", codeptr_ra=%p \n",
330 ompt_get_thread_data()->value, wait_id, codeptr_ra);
331 break;
332 default:
333 break;
334 }
335 }
336
337 static void
on_ompt_callback_mutex_released(ompt_mutex_t kind,ompt_wait_id_t wait_id,const void * codeptr_ra)338 on_ompt_callback_mutex_released(
339 ompt_mutex_t kind,
340 ompt_wait_id_t wait_id,
341 const void *codeptr_ra)
342 {
343 switch(kind)
344 {
345 case ompt_mutex_lock:
346 printf("%" PRIu64 ":" _TOOL_PREFIX
347 " ompt_event_release_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
348 ompt_get_thread_data()->value, wait_id, codeptr_ra);
349 break;
350 case ompt_mutex_nest_lock:
351 printf("%" PRIu64 ":" _TOOL_PREFIX
352 " ompt_event_release_nest_lock_last: wait_id=%" PRIu64
353 ", codeptr_ra=%p \n",
354 ompt_get_thread_data()->value, wait_id, codeptr_ra);
355 break;
356 case ompt_mutex_critical:
357 printf("%" PRIu64 ":" _TOOL_PREFIX
358 " ompt_event_release_critical: wait_id=%" PRIu64
359 ", codeptr_ra=%p \n",
360 ompt_get_thread_data()->value, wait_id, codeptr_ra);
361 break;
362 case ompt_mutex_atomic:
363 printf("%" PRIu64 ":" _TOOL_PREFIX
364 " ompt_event_release_atomic: wait_id=%" PRIu64
365 ", codeptr_ra=%p \n",
366 ompt_get_thread_data()->value, wait_id, codeptr_ra);
367 break;
368 case ompt_mutex_ordered:
369 printf("%" PRIu64 ":" _TOOL_PREFIX
370 " ompt_event_release_ordered: wait_id=%" PRIu64
371 ", codeptr_ra=%p \n",
372 ompt_get_thread_data()->value, wait_id, codeptr_ra);
373 break;
374 default:
375 break;
376 }
377 }
378
379 static void
on_ompt_callback_nest_lock(ompt_scope_endpoint_t endpoint,ompt_wait_id_t wait_id,const void * codeptr_ra)380 on_ompt_callback_nest_lock(
381 ompt_scope_endpoint_t endpoint,
382 ompt_wait_id_t wait_id,
383 const void *codeptr_ra)
384 {
385 switch(endpoint)
386 {
387 case ompt_scope_begin:
388 printf("%" PRIu64 ":" _TOOL_PREFIX
389 " ompt_event_acquired_nest_lock_next: wait_id=%" PRIu64
390 ", codeptr_ra=%p \n",
391 ompt_get_thread_data()->value, wait_id, codeptr_ra);
392 break;
393 case ompt_scope_end:
394 printf("%" PRIu64 ":" _TOOL_PREFIX
395 " ompt_event_release_nest_lock_prev: wait_id=%" PRIu64
396 ", codeptr_ra=%p \n",
397 ompt_get_thread_data()->value, wait_id, codeptr_ra);
398 break;
399 case ompt_scope_beginend:
400 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
401 exit(-1);
402 }
403 }
404
405 static void
on_ompt_callback_sync_region(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)406 on_ompt_callback_sync_region(
407 ompt_sync_region_t kind,
408 ompt_scope_endpoint_t endpoint,
409 ompt_data_t *parallel_data,
410 ompt_data_t *task_data,
411 const void *codeptr_ra)
412 {
413 switch(endpoint)
414 {
415 case ompt_scope_begin:
416 switch(kind)
417 {
418 case ompt_sync_region_barrier:
419 case ompt_sync_region_barrier_implicit:
420 case ompt_sync_region_barrier_implicit_workshare:
421 case ompt_sync_region_barrier_implicit_parallel:
422 case ompt_sync_region_barrier_teams:
423 case ompt_sync_region_barrier_explicit:
424 case ompt_sync_region_barrier_implementation:
425 printf("%" PRIu64 ":" _TOOL_PREFIX
426 " ompt_event_barrier_begin: parallel_id=%" PRIu64
427 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
428 ompt_get_thread_data()->value, parallel_data->value,
429 task_data->value, codeptr_ra);
430 print_ids(0);
431 break;
432 case ompt_sync_region_taskwait:
433 printf("%" PRIu64 ":" _TOOL_PREFIX
434 " ompt_event_taskwait_begin: parallel_id=%" PRIu64
435 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
436 ompt_get_thread_data()->value, parallel_data->value,
437 task_data->value, codeptr_ra);
438 break;
439 case ompt_sync_region_taskgroup:
440 printf("%" PRIu64 ":" _TOOL_PREFIX
441 " ompt_event_taskgroup_begin: parallel_id=%" PRIu64
442 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
443 ompt_get_thread_data()->value, parallel_data->value,
444 task_data->value, codeptr_ra);
445 break;
446 case ompt_sync_region_reduction:
447 printf("ompt_sync_region_reduction should never be passed to "
448 "on_ompt_callback_sync_region\n");
449 exit(-1);
450 break;
451 }
452 break;
453 case ompt_scope_end:
454 switch(kind)
455 {
456 case ompt_sync_region_barrier:
457 case ompt_sync_region_barrier_implicit:
458 case ompt_sync_region_barrier_explicit:
459 case ompt_sync_region_barrier_implicit_workshare:
460 case ompt_sync_region_barrier_implicit_parallel:
461 case ompt_sync_region_barrier_teams:
462 case ompt_sync_region_barrier_implementation:
463 printf("%" PRIu64 ":" _TOOL_PREFIX
464 " ompt_event_barrier_end: parallel_id=%" PRIu64
465 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
466 ompt_get_thread_data()->value,
467 (parallel_data) ? parallel_data->value : 0, task_data->value,
468 codeptr_ra);
469 break;
470 case ompt_sync_region_taskwait:
471 printf("%" PRIu64 ":" _TOOL_PREFIX
472 " ompt_event_taskwait_end: parallel_id=%" PRIu64
473 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
474 ompt_get_thread_data()->value,
475 (parallel_data) ? parallel_data->value : 0, task_data->value,
476 codeptr_ra);
477 break;
478 case ompt_sync_region_taskgroup:
479 printf("%" PRIu64 ":" _TOOL_PREFIX
480 " ompt_event_taskgroup_end: parallel_id=%" PRIu64
481 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
482 ompt_get_thread_data()->value,
483 (parallel_data) ? parallel_data->value : 0, task_data->value,
484 codeptr_ra);
485 break;
486 case ompt_sync_region_reduction:
487 printf("ompt_sync_region_reduction should never be passed to "
488 "on_ompt_callback_sync_region\n");
489 exit(-1);
490 break;
491 }
492 break;
493 case ompt_scope_beginend:
494 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
495 exit(-1);
496 }
497 }
498
499 static void
on_ompt_callback_sync_region_wait(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)500 on_ompt_callback_sync_region_wait(
501 ompt_sync_region_t kind,
502 ompt_scope_endpoint_t endpoint,
503 ompt_data_t *parallel_data,
504 ompt_data_t *task_data,
505 const void *codeptr_ra)
506 {
507 switch(endpoint)
508 {
509 case ompt_scope_begin:
510 switch(kind)
511 {
512 case ompt_sync_region_barrier:
513 case ompt_sync_region_barrier_implicit:
514 case ompt_sync_region_barrier_implicit_workshare:
515 case ompt_sync_region_barrier_implicit_parallel:
516 case ompt_sync_region_barrier_teams:
517 case ompt_sync_region_barrier_explicit:
518 case ompt_sync_region_barrier_implementation:
519 printf("%" PRIu64 ":" _TOOL_PREFIX
520 " ompt_event_wait_barrier_begin: parallel_id=%" PRIu64
521 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
522 ompt_get_thread_data()->value, parallel_data->value,
523 task_data->value, codeptr_ra);
524 break;
525 case ompt_sync_region_taskwait:
526 printf("%" PRIu64 ":" _TOOL_PREFIX
527 " ompt_event_wait_taskwait_begin: parallel_id=%" PRIu64
528 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
529 ompt_get_thread_data()->value, parallel_data->value,
530 task_data->value, codeptr_ra);
531 break;
532 case ompt_sync_region_taskgroup:
533 printf("%" PRIu64 ":" _TOOL_PREFIX
534 " ompt_event_wait_taskgroup_begin: parallel_id=%" PRIu64
535 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
536 ompt_get_thread_data()->value, parallel_data->value,
537 task_data->value, codeptr_ra);
538 break;
539 case ompt_sync_region_reduction:
540 printf("ompt_sync_region_reduction should never be passed to "
541 "on_ompt_callback_sync_region_wait\n");
542 exit(-1);
543 break;
544 }
545 break;
546 case ompt_scope_end:
547 switch(kind)
548 {
549 case ompt_sync_region_barrier:
550 case ompt_sync_region_barrier_implicit:
551 case ompt_sync_region_barrier_implicit_workshare:
552 case ompt_sync_region_barrier_implicit_parallel:
553 case ompt_sync_region_barrier_teams:
554 case ompt_sync_region_barrier_explicit:
555 case ompt_sync_region_barrier_implementation:
556 printf("%" PRIu64 ":" _TOOL_PREFIX
557 " ompt_event_wait_barrier_end: parallel_id=%" PRIu64
558 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
559 ompt_get_thread_data()->value,
560 (parallel_data) ? parallel_data->value : 0, task_data->value,
561 codeptr_ra);
562 break;
563 case ompt_sync_region_taskwait:
564 printf("%" PRIu64 ":" _TOOL_PREFIX
565 " ompt_event_wait_taskwait_end: parallel_id=%" PRIu64
566 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
567 ompt_get_thread_data()->value,
568 (parallel_data) ? parallel_data->value : 0, task_data->value,
569 codeptr_ra);
570 break;
571 case ompt_sync_region_taskgroup:
572 printf("%" PRIu64 ":" _TOOL_PREFIX
573 " ompt_event_wait_taskgroup_end: parallel_id=%" PRIu64
574 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
575 ompt_get_thread_data()->value,
576 (parallel_data) ? parallel_data->value : 0, task_data->value,
577 codeptr_ra);
578 break;
579 case ompt_sync_region_reduction:
580 printf("ompt_sync_region_reduction should never be passed to "
581 "on_ompt_callback_sync_region_wait\n");
582 exit(-1);
583 break;
584 }
585 break;
586 case ompt_scope_beginend:
587 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
588 exit(-1);
589 }
590 }
591
on_ompt_callback_reduction(ompt_sync_region_t kind,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)592 static void on_ompt_callback_reduction(ompt_sync_region_t kind,
593 ompt_scope_endpoint_t endpoint,
594 ompt_data_t *parallel_data,
595 ompt_data_t *task_data,
596 const void *codeptr_ra) {
597 switch (endpoint) {
598 case ompt_scope_begin:
599 printf("%" PRIu64 ":" _TOOL_PREFIX
600 " ompt_event_reduction_begin: parallel_id=%" PRIu64
601 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
602 ompt_get_thread_data()->value,
603 (parallel_data) ? parallel_data->value : 0, task_data->value,
604 codeptr_ra);
605 break;
606 case ompt_scope_end:
607 printf("%" PRIu64 ":" _TOOL_PREFIX
608 " ompt_event_reduction_end: parallel_id=%" PRIu64
609 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
610 ompt_get_thread_data()->value,
611 (parallel_data) ? parallel_data->value : 0, task_data->value,
612 codeptr_ra);
613 break;
614 case ompt_scope_beginend:
615 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
616 exit(-1);
617 }
618 }
619
620 static void
on_ompt_callback_flush(ompt_data_t * thread_data,const void * codeptr_ra)621 on_ompt_callback_flush(
622 ompt_data_t *thread_data,
623 const void *codeptr_ra)
624 {
625 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_flush: codeptr_ra=%p\n",
626 thread_data->value, codeptr_ra);
627 }
628
629 static void
on_ompt_callback_cancel(ompt_data_t * task_data,int flags,const void * codeptr_ra)630 on_ompt_callback_cancel(
631 ompt_data_t *task_data,
632 int flags,
633 const void *codeptr_ra)
634 {
635 const char* first_flag_value;
636 const char* second_flag_value;
637 if(flags & ompt_cancel_parallel)
638 first_flag_value = ompt_cancel_flag_t_values[0];
639 else if(flags & ompt_cancel_sections)
640 first_flag_value = ompt_cancel_flag_t_values[1];
641 else if(flags & ompt_cancel_loop)
642 first_flag_value = ompt_cancel_flag_t_values[2];
643 else if(flags & ompt_cancel_taskgroup)
644 first_flag_value = ompt_cancel_flag_t_values[3];
645
646 if(flags & ompt_cancel_activated)
647 second_flag_value = ompt_cancel_flag_t_values[4];
648 else if(flags & ompt_cancel_detected)
649 second_flag_value = ompt_cancel_flag_t_values[5];
650 else if(flags & ompt_cancel_discarded_task)
651 second_flag_value = ompt_cancel_flag_t_values[6];
652
653 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_cancel: task_data=%" PRIu64
654 ", flags=%s|%s=%" PRIu32 ", codeptr_ra=%p\n",
655 ompt_get_thread_data()->value, task_data->value, first_flag_value,
656 second_flag_value, flags, codeptr_ra);
657 }
658
659 static void
on_ompt_callback_implicit_task(ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,unsigned int team_size,unsigned int thread_num,int flags)660 on_ompt_callback_implicit_task(
661 ompt_scope_endpoint_t endpoint,
662 ompt_data_t *parallel_data,
663 ompt_data_t *task_data,
664 unsigned int team_size,
665 unsigned int thread_num,
666 int flags)
667 {
668 switch(endpoint)
669 {
670 case ompt_scope_begin:
671 if(task_data->ptr)
672 printf("%s\n", "0: task_data initially not null");
673 task_data->value = ompt_get_unique_id();
674
675 //there is no parallel_begin callback for implicit parallel region
676 //thus it is initialized in initial task
677 if(flags & ompt_task_initial)
678 {
679 char buffer[2048];
680
681 format_task_type(flags, buffer);
682 // Only check initial task not created by teams construct
683 if (team_size == 1 && thread_num == 1 && parallel_data->ptr)
684 printf("%s\n", "0: parallel_data initially not null");
685 parallel_data->value = ompt_get_unique_id();
686 printf("%" PRIu64 ":" _TOOL_PREFIX
687 " ompt_event_initial_task_begin: parallel_id=%" PRIu64
688 ", task_id=%" PRIu64 ", actual_parallelism=%" PRIu32
689 ", index=%" PRIu32 ", flags=%" PRIu32 "\n",
690 ompt_get_thread_data()->value, parallel_data->value,
691 task_data->value, team_size, thread_num, flags);
692 } else {
693 printf("%" PRIu64 ":" _TOOL_PREFIX
694 " ompt_event_implicit_task_begin: parallel_id=%" PRIu64
695 ", task_id=%" PRIu64 ", team_size=%" PRIu32
696 ", thread_num=%" PRIu32 "\n",
697 ompt_get_thread_data()->value, parallel_data->value,
698 task_data->value, team_size, thread_num);
699 }
700
701 break;
702 case ompt_scope_end:
703 if(flags & ompt_task_initial){
704 printf("%" PRIu64 ":" _TOOL_PREFIX
705 " ompt_event_initial_task_end: parallel_id=%" PRIu64
706 ", task_id=%" PRIu64 ", actual_parallelism=%" PRIu32
707 ", index=%" PRIu32 "\n",
708 ompt_get_thread_data()->value,
709 (parallel_data) ? parallel_data->value : 0, task_data->value,
710 team_size, thread_num);
711 } else {
712 printf("%" PRIu64 ":" _TOOL_PREFIX
713 " ompt_event_implicit_task_end: parallel_id=%" PRIu64
714 ", task_id=%" PRIu64 ", team_size=%" PRIu32
715 ", thread_num=%" PRIu32 "\n",
716 ompt_get_thread_data()->value,
717 (parallel_data) ? parallel_data->value : 0, task_data->value,
718 team_size, thread_num);
719 }
720 break;
721 case ompt_scope_beginend:
722 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
723 exit(-1);
724 }
725 }
726
727 static void
on_ompt_callback_lock_init(ompt_mutex_t kind,unsigned int hint,unsigned int impl,ompt_wait_id_t wait_id,const void * codeptr_ra)728 on_ompt_callback_lock_init(
729 ompt_mutex_t kind,
730 unsigned int hint,
731 unsigned int impl,
732 ompt_wait_id_t wait_id,
733 const void *codeptr_ra)
734 {
735 switch(kind)
736 {
737 case ompt_mutex_lock:
738 printf("%" PRIu64 ":" _TOOL_PREFIX
739 " ompt_event_init_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
740 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
741 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
742 break;
743 case ompt_mutex_nest_lock:
744 printf("%" PRIu64 ":" _TOOL_PREFIX
745 " ompt_event_init_nest_lock: wait_id=%" PRIu64 ", hint=%" PRIu32
746 ", impl=%" PRIu32 ", codeptr_ra=%p \n",
747 ompt_get_thread_data()->value, wait_id, hint, impl, codeptr_ra);
748 break;
749 default:
750 break;
751 }
752 }
753
754 static void
on_ompt_callback_lock_destroy(ompt_mutex_t kind,ompt_wait_id_t wait_id,const void * codeptr_ra)755 on_ompt_callback_lock_destroy(
756 ompt_mutex_t kind,
757 ompt_wait_id_t wait_id,
758 const void *codeptr_ra)
759 {
760 switch(kind)
761 {
762 case ompt_mutex_lock:
763 printf("%" PRIu64 ":" _TOOL_PREFIX
764 " ompt_event_destroy_lock: wait_id=%" PRIu64 ", codeptr_ra=%p \n",
765 ompt_get_thread_data()->value, wait_id, codeptr_ra);
766 break;
767 case ompt_mutex_nest_lock:
768 printf("%" PRIu64 ":" _TOOL_PREFIX
769 " ompt_event_destroy_nest_lock: wait_id=%" PRIu64
770 ", codeptr_ra=%p \n",
771 ompt_get_thread_data()->value, wait_id, codeptr_ra);
772 break;
773 default:
774 break;
775 }
776 }
777
778 static void
on_ompt_callback_work(ompt_work_t wstype,ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,uint64_t count,const void * codeptr_ra)779 on_ompt_callback_work(
780 ompt_work_t wstype,
781 ompt_scope_endpoint_t endpoint,
782 ompt_data_t *parallel_data,
783 ompt_data_t *task_data,
784 uint64_t count,
785 const void *codeptr_ra)
786 {
787 switch(endpoint)
788 {
789 case ompt_scope_begin:
790 switch(wstype)
791 {
792 case ompt_work_loop:
793 case ompt_work_loop_static:
794 case ompt_work_loop_dynamic:
795 case ompt_work_loop_guided:
796 case ompt_work_loop_other:
797 // TODO: add schedule attribute for the different work_loop types.
798 // e.g., ", schedule=%s", ..., ompt_schedule_values[wstype]
799 printf("%" PRIu64 ":" _TOOL_PREFIX
800 " ompt_event_loop_begin: parallel_id=%" PRIu64
801 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
802 "\n",
803 ompt_get_thread_data()->value, parallel_data->value,
804 task_data->value, codeptr_ra, count);
805 break;
806 case ompt_work_sections:
807 printf("%" PRIu64 ":" _TOOL_PREFIX
808 " ompt_event_sections_begin: parallel_id=%" PRIu64
809 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
810 "\n",
811 ompt_get_thread_data()->value, parallel_data->value,
812 task_data->value, codeptr_ra, count);
813 break;
814 case ompt_work_single_executor:
815 printf("%" PRIu64 ":" _TOOL_PREFIX
816 " ompt_event_single_in_block_begin: parallel_id=%" PRIu64
817 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
818 "\n",
819 ompt_get_thread_data()->value, parallel_data->value,
820 task_data->value, codeptr_ra, count);
821 break;
822 case ompt_work_single_other:
823 printf("%" PRIu64 ":" _TOOL_PREFIX
824 " ompt_event_single_others_begin: parallel_id=%" PRIu64
825 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
826 ompt_get_thread_data()->value, parallel_data->value,
827 task_data->value, codeptr_ra, count);
828 break;
829 case ompt_work_workshare:
830 //impl
831 break;
832 case ompt_work_distribute:
833 printf("%" PRIu64 ":" _TOOL_PREFIX
834 " ompt_event_distribute_begin: parallel_id=%" PRIu64
835 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
836 "\n",
837 ompt_get_thread_data()->value, parallel_data->value,
838 task_data->value, codeptr_ra, count);
839 break;
840 case ompt_work_taskloop:
841 //impl
842 printf("%" PRIu64 ":" _TOOL_PREFIX
843 " ompt_event_taskloop_begin: parallel_id=%" PRIu64
844 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
845 "\n",
846 ompt_get_thread_data()->value, parallel_data->value,
847 task_data->value, codeptr_ra, count);
848 break;
849 case ompt_work_scope:
850 printf("%" PRIu64 ":" _TOOL_PREFIX
851 " ompt_event_scope_begin: parallel_id=%" PRIu64
852 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
853 "\n",
854 ompt_get_thread_data()->value, parallel_data->value,
855 task_data->value, codeptr_ra, count);
856 break;
857 }
858 break;
859 case ompt_scope_end:
860 switch(wstype)
861 {
862 case ompt_work_loop:
863 case ompt_work_loop_static:
864 case ompt_work_loop_dynamic:
865 case ompt_work_loop_guided:
866 case ompt_work_loop_other:
867 printf("%" PRIu64 ":" _TOOL_PREFIX
868 " ompt_event_loop_end: parallel_id=%" PRIu64
869 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
870 ompt_get_thread_data()->value, parallel_data->value,
871 task_data->value, codeptr_ra, count);
872 break;
873 case ompt_work_sections:
874 printf("%" PRIu64 ":" _TOOL_PREFIX
875 " ompt_event_sections_end: parallel_id=%" PRIu64
876 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
877 ompt_get_thread_data()->value, parallel_data->value,
878 task_data->value, codeptr_ra, count);
879 break;
880 case ompt_work_single_executor:
881 printf("%" PRIu64 ":" _TOOL_PREFIX
882 " ompt_event_single_in_block_end: parallel_id=%" PRIu64
883 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
884 ompt_get_thread_data()->value, parallel_data->value,
885 task_data->value, codeptr_ra, count);
886 break;
887 case ompt_work_single_other:
888 printf("%" PRIu64 ":" _TOOL_PREFIX
889 " ompt_event_single_others_end: parallel_id=%" PRIu64
890 ", task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64 "\n",
891 ompt_get_thread_data()->value, parallel_data->value,
892 task_data->value, codeptr_ra, count);
893 break;
894 case ompt_work_workshare:
895 //impl
896 break;
897 case ompt_work_distribute:
898 printf("%" PRIu64 ":" _TOOL_PREFIX
899 " ompt_event_distribute_end: parallel_id=%" PRIu64
900 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
901 "\n",
902 ompt_get_thread_data()->value, parallel_data->value,
903 task_data->value, codeptr_ra, count);
904 break;
905 case ompt_work_taskloop:
906 //impl
907 printf("%" PRIu64 ":" _TOOL_PREFIX
908 " ompt_event_taskloop_end: parallel_id=%" PRIu64
909 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
910 "\n",
911 ompt_get_thread_data()->value, parallel_data->value,
912 task_data->value, codeptr_ra, count);
913 break;
914 case ompt_work_scope:
915 printf("%" PRIu64 ":" _TOOL_PREFIX
916 " ompt_event_scope_end: parallel_id=%" PRIu64
917 ", parent_task_id=%" PRIu64 ", codeptr_ra=%p, count=%" PRIu64
918 "\n",
919 ompt_get_thread_data()->value, parallel_data->value,
920 task_data->value, codeptr_ra, count);
921 break;
922 }
923 break;
924 case ompt_scope_beginend:
925 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
926 exit(-1);
927 }
928 }
929
on_ompt_callback_dispatch(ompt_data_t * parallel_data,ompt_data_t * task_data,ompt_dispatch_t kind,ompt_data_t instance)930 static void on_ompt_callback_dispatch(
931 ompt_data_t *parallel_data,
932 ompt_data_t *task_data,
933 ompt_dispatch_t kind,
934 ompt_data_t instance) {
935 char *event_name = NULL;
936 void *codeptr_ra = NULL;
937 ompt_dispatch_chunk_t *dispatch_chunk = NULL;
938 switch (kind) {
939 case ompt_dispatch_section:
940 event_name = "ompt_event_section_begin";
941 codeptr_ra = instance.ptr;
942 break;
943 case ompt_dispatch_ws_loop_chunk:
944 event_name = "ompt_event_ws_loop_chunk_begin";
945 dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
946 break;
947 case ompt_dispatch_taskloop_chunk:
948 event_name = "ompt_event_taskloop_chunk_begin";
949 dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
950 break;
951 case ompt_dispatch_distribute_chunk:
952 event_name = "ompt_event_distribute_chunk_begin";
953 dispatch_chunk = (ompt_dispatch_chunk_t *)instance.ptr;
954 break;
955 default:
956 event_name = "ompt_ws_loop_iteration_begin";
957 }
958 printf("%" PRIu64 ":" _TOOL_PREFIX
959 " %s: parallel_id=%" PRIu64 ", task_id=%" PRIu64
960 ", codeptr_ra=%p, chunk_start=%" PRIu64 ", chunk_iterations=%" PRIu64
961 "\n", ompt_get_thread_data()->value, event_name, parallel_data->value,
962 task_data->value, codeptr_ra,
963 dispatch_chunk ? dispatch_chunk->start : 0,
964 dispatch_chunk ? dispatch_chunk->iterations : 0);
965 }
966
on_ompt_callback_masked(ompt_scope_endpoint_t endpoint,ompt_data_t * parallel_data,ompt_data_t * task_data,const void * codeptr_ra)967 static void on_ompt_callback_masked(ompt_scope_endpoint_t endpoint,
968 ompt_data_t *parallel_data,
969 ompt_data_t *task_data,
970 const void *codeptr_ra) {
971 switch(endpoint)
972 {
973 case ompt_scope_begin:
974 printf("%" PRIu64 ":" _TOOL_PREFIX
975 " ompt_event_masked_begin: parallel_id=%" PRIu64
976 ", task_id=%" PRIu64 ", codeptr_ra=%p\n",
977 ompt_get_thread_data()->value, parallel_data->value,
978 task_data->value, codeptr_ra);
979 break;
980 case ompt_scope_end:
981 printf("%" PRIu64 ":" _TOOL_PREFIX
982 " ompt_event_masked_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64
983 ", codeptr_ra=%p\n",
984 ompt_get_thread_data()->value, parallel_data->value,
985 task_data->value, codeptr_ra);
986 break;
987 case ompt_scope_beginend:
988 printf("ompt_scope_beginend should never be passed to %s\n", __func__);
989 exit(-1);
990 }
991 }
992
on_ompt_callback_parallel_begin(ompt_data_t * encountering_task_data,const ompt_frame_t * encountering_task_frame,ompt_data_t * parallel_data,uint32_t requested_team_size,int flag,const void * codeptr_ra)993 static void on_ompt_callback_parallel_begin(
994 ompt_data_t *encountering_task_data,
995 const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data,
996 uint32_t requested_team_size, int flag, const void *codeptr_ra) {
997 if(parallel_data->ptr)
998 printf("0: parallel_data initially not null\n");
999 parallel_data->value = ompt_get_unique_id();
1000 int invoker = flag & 0xF;
1001 const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams";
1002 const char *size = (flag & ompt_parallel_team) ? "team_size" : "num_teams";
1003 printf("%" PRIu64 ":" _TOOL_PREFIX
1004 " ompt_event_%s_begin: parent_task_id=%" PRIu64
1005 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
1006 "parallel_id=%" PRIu64 ", requested_%s=%" PRIu32
1007 ", codeptr_ra=%p, invoker=%d\n",
1008 ompt_get_thread_data()->value, event, encountering_task_data->value,
1009 encountering_task_frame->exit_frame.ptr,
1010 encountering_task_frame->enter_frame.ptr, parallel_data->value, size,
1011 requested_team_size, codeptr_ra, invoker);
1012 }
1013
on_ompt_callback_parallel_end(ompt_data_t * parallel_data,ompt_data_t * encountering_task_data,int flag,const void * codeptr_ra)1014 static void on_ompt_callback_parallel_end(ompt_data_t *parallel_data,
1015 ompt_data_t *encountering_task_data,
1016 int flag, const void *codeptr_ra) {
1017 int invoker = flag & 0xF;
1018 const char *event = (flag & ompt_parallel_team) ? "parallel" : "teams";
1019 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_%s_end: parallel_id=%" PRIu64
1020 ", task_id=%" PRIu64 ", invoker=%d, codeptr_ra=%p\n",
1021 ompt_get_thread_data()->value, event, parallel_data->value,
1022 encountering_task_data->value, invoker, codeptr_ra);
1023 }
1024
1025 static void
on_ompt_callback_task_create(ompt_data_t * encountering_task_data,const ompt_frame_t * encountering_task_frame,ompt_data_t * new_task_data,int type,int has_dependences,const void * codeptr_ra)1026 on_ompt_callback_task_create(
1027 ompt_data_t *encountering_task_data,
1028 const ompt_frame_t *encountering_task_frame,
1029 ompt_data_t* new_task_data,
1030 int type,
1031 int has_dependences,
1032 const void *codeptr_ra)
1033 {
1034 if(new_task_data->ptr)
1035 printf("0: new_task_data initially not null\n");
1036 new_task_data->value = ompt_get_unique_id();
1037 char buffer[2048];
1038
1039 format_task_type(type, buffer);
1040
1041 printf(
1042 "%" PRIu64 ":" _TOOL_PREFIX
1043 " ompt_event_task_create: parent_task_id=%" PRIu64
1044 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, "
1045 "new_task_id=%" PRIu64
1046 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n",
1047 ompt_get_thread_data()->value,
1048 encountering_task_data ? encountering_task_data->value : 0,
1049 encountering_task_frame ? encountering_task_frame->exit_frame.ptr : NULL,
1050 encountering_task_frame ? encountering_task_frame->enter_frame.ptr : NULL,
1051 new_task_data->value, codeptr_ra, buffer, type,
1052 has_dependences ? "yes" : "no");
1053 }
1054
1055 static void
on_ompt_callback_task_schedule(ompt_data_t * first_task_data,ompt_task_status_t prior_task_status,ompt_data_t * second_task_data)1056 on_ompt_callback_task_schedule(
1057 ompt_data_t *first_task_data,
1058 ompt_task_status_t prior_task_status,
1059 ompt_data_t *second_task_data)
1060 {
1061 printf("%" PRIu64 ":" _TOOL_PREFIX
1062 " ompt_event_task_schedule: first_task_id=%" PRIu64
1063 ", second_task_id=%" PRIu64 ", prior_task_status=%s=%d\n",
1064 ompt_get_thread_data()->value, first_task_data->value,
1065 (second_task_data ? second_task_data->value : -1),
1066 ompt_task_status_t_values[prior_task_status], prior_task_status);
1067 if (prior_task_status == ompt_task_complete ||
1068 prior_task_status == ompt_task_late_fulfill ||
1069 prior_task_status == ompt_taskwait_complete) {
1070 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_task_end: task_id=%" PRIu64
1071 "\n", ompt_get_thread_data()->value, first_task_data->value);
1072 }
1073 }
1074
1075 static void
on_ompt_callback_dependences(ompt_data_t * task_data,const ompt_dependence_t * deps,int ndeps)1076 on_ompt_callback_dependences(
1077 ompt_data_t *task_data,
1078 const ompt_dependence_t *deps,
1079 int ndeps)
1080 {
1081 char buffer[2048];
1082 char *progress = buffer;
1083 int i;
1084 for (i = 0; i < ndeps && progress < buffer + 2000; i++) {
1085 if (deps[i].dependence_type == ompt_dependence_type_source ||
1086 deps[i].dependence_type == ompt_dependence_type_sink)
1087 progress +=
1088 sprintf(progress, "(%" PRIu64 ", %s), ", deps[i].variable.value,
1089 ompt_dependence_type_t_values[deps[i].dependence_type]);
1090 else
1091 progress +=
1092 sprintf(progress, "(%p, %s), ", deps[i].variable.ptr,
1093 ompt_dependence_type_t_values[deps[i].dependence_type]);
1094 }
1095 if (ndeps > 0)
1096 progress[-2] = 0;
1097 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_dependences: task_id=%" PRIu64
1098 ", deps=[%s], ndeps=%d\n",
1099 ompt_get_thread_data()->value, task_data->value, buffer, ndeps);
1100 }
1101
1102 static void
on_ompt_callback_task_dependence(ompt_data_t * first_task_data,ompt_data_t * second_task_data)1103 on_ompt_callback_task_dependence(
1104 ompt_data_t *first_task_data,
1105 ompt_data_t *second_task_data)
1106 {
1107 printf("%" PRIu64 ":" _TOOL_PREFIX
1108 " ompt_event_task_dependence_pair: first_task_id=%" PRIu64
1109 ", second_task_id=%" PRIu64 "\n",
1110 ompt_get_thread_data()->value, first_task_data->value,
1111 second_task_data->value);
1112 }
1113
1114 static void
on_ompt_callback_thread_begin(ompt_thread_t thread_type,ompt_data_t * thread_data)1115 on_ompt_callback_thread_begin(
1116 ompt_thread_t thread_type,
1117 ompt_data_t *thread_data)
1118 {
1119 if(thread_data->ptr)
1120 printf("%s\n", "0: thread_data initially not null");
1121 thread_data->value = ompt_get_unique_id();
1122 printf("%" PRIu64 ":" _TOOL_PREFIX
1123 " ompt_event_thread_begin: thread_type=%s=%d, thread_id=%" PRIu64 "\n",
1124 ompt_get_thread_data()->value, ompt_thread_t_values[thread_type],
1125 thread_type, thread_data->value);
1126 }
1127
1128 static void
on_ompt_callback_thread_end(ompt_data_t * thread_data)1129 on_ompt_callback_thread_end(
1130 ompt_data_t *thread_data)
1131 {
1132 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_thread_end: thread_id=%" PRIu64
1133 "\n",
1134 ompt_get_thread_data()->value, thread_data->value);
1135 }
1136
1137 static int
on_ompt_callback_control_tool(uint64_t command,uint64_t modifier,void * arg,const void * codeptr_ra)1138 on_ompt_callback_control_tool(
1139 uint64_t command,
1140 uint64_t modifier,
1141 void *arg,
1142 const void *codeptr_ra)
1143 {
1144 ompt_frame_t* omptTaskFrame;
1145 ompt_get_task_info(0, NULL, (ompt_data_t**) NULL, &omptTaskFrame, NULL, NULL);
1146 printf("%" PRIu64 ":" _TOOL_PREFIX " ompt_event_control_tool: command=%" PRIu64
1147 ", modifier=%" PRIu64
1148 ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, "
1149 "current_task_frame.reenter=%p \n",
1150 ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra,
1151 omptTaskFrame->exit_frame.ptr, omptTaskFrame->enter_frame.ptr);
1152
1153 // the following would interfere with expected output for OMPT tests, so skip
1154 #ifndef _OMPT_TESTS
1155 // print task data
1156 int task_level = 0;
1157 ompt_data_t *task_data;
1158 while (ompt_get_task_info(task_level, NULL, (ompt_data_t **)&task_data, NULL,
1159 NULL, NULL)) {
1160 printf("%" PRIu64 ":" _TOOL_PREFIX " task level %d: task_id=%" PRIu64 "\n",
1161 ompt_get_thread_data()->value, task_level, task_data->value);
1162 task_level++;
1163 }
1164
1165 // print parallel data
1166 int parallel_level = 0;
1167 ompt_data_t *parallel_data;
1168 while (ompt_get_parallel_info(parallel_level, (ompt_data_t **)¶llel_data,
1169 NULL)) {
1170 printf("%" PRIu64 ":" _TOOL_PREFIX " parallel level %d: parallel_id=%" PRIu64
1171 "\n",
1172 ompt_get_thread_data()->value, parallel_level, parallel_data->value);
1173 parallel_level++;
1174 }
1175 #endif
1176 return 0; //success
1177 }
1178
on_ompt_callback_error(ompt_severity_t severity,const char * message,size_t length,const void * codeptr_ra)1179 static void on_ompt_callback_error(ompt_severity_t severity,
1180 const char *message, size_t length,
1181 const void *codeptr_ra) {
1182 printf("%" PRIu64 ": ompt_event_runtime_error: severity=%" PRIu32
1183 ", message=%s, length=%" PRIu64 ", codeptr_ra=%p\n",
1184 ompt_get_thread_data()->value, severity, message, (uint64_t)length,
1185 codeptr_ra);
1186 }
1187
ompt_initialize(ompt_function_lookup_t lookup,int initial_device_num,ompt_data_t * tool_data)1188 int ompt_initialize(
1189 ompt_function_lookup_t lookup,
1190 int initial_device_num,
1191 ompt_data_t *tool_data)
1192 {
1193 ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback");
1194 ompt_get_callback = (ompt_get_callback_t) lookup("ompt_get_callback");
1195 ompt_get_state = (ompt_get_state_t) lookup("ompt_get_state");
1196 ompt_get_task_info = (ompt_get_task_info_t) lookup("ompt_get_task_info");
1197 ompt_get_task_memory = (ompt_get_task_memory_t)lookup("ompt_get_task_memory");
1198 ompt_get_thread_data = (ompt_get_thread_data_t) lookup("ompt_get_thread_data");
1199 ompt_get_parallel_info = (ompt_get_parallel_info_t) lookup("ompt_get_parallel_info");
1200 ompt_get_unique_id = (ompt_get_unique_id_t) lookup("ompt_get_unique_id");
1201 ompt_finalize_tool = (ompt_finalize_tool_t)lookup("ompt_finalize_tool");
1202
1203 ompt_get_unique_id();
1204
1205 ompt_get_num_procs = (ompt_get_num_procs_t) lookup("ompt_get_num_procs");
1206 ompt_get_num_places = (ompt_get_num_places_t) lookup("ompt_get_num_places");
1207 ompt_get_place_proc_ids = (ompt_get_place_proc_ids_t) lookup("ompt_get_place_proc_ids");
1208 ompt_get_place_num = (ompt_get_place_num_t) lookup("ompt_get_place_num");
1209 ompt_get_partition_place_nums = (ompt_get_partition_place_nums_t) lookup("ompt_get_partition_place_nums");
1210 ompt_get_proc_id = (ompt_get_proc_id_t) lookup("ompt_get_proc_id");
1211 ompt_enumerate_states = (ompt_enumerate_states_t) lookup("ompt_enumerate_states");
1212 ompt_enumerate_mutex_impls = (ompt_enumerate_mutex_impls_t) lookup("ompt_enumerate_mutex_impls");
1213
1214 register_ompt_callback(ompt_callback_mutex_acquire);
1215 register_ompt_callback_t(ompt_callback_mutex_acquired, ompt_callback_mutex_t);
1216 register_ompt_callback_t(ompt_callback_mutex_released, ompt_callback_mutex_t);
1217 register_ompt_callback(ompt_callback_nest_lock);
1218 register_ompt_callback(ompt_callback_sync_region);
1219 register_ompt_callback_t(ompt_callback_sync_region_wait, ompt_callback_sync_region_t);
1220 register_ompt_callback_t(ompt_callback_reduction, ompt_callback_sync_region_t);
1221 register_ompt_callback(ompt_callback_control_tool);
1222 register_ompt_callback(ompt_callback_flush);
1223 register_ompt_callback(ompt_callback_cancel);
1224 register_ompt_callback(ompt_callback_implicit_task);
1225 register_ompt_callback_t(ompt_callback_lock_init, ompt_callback_mutex_acquire_t);
1226 register_ompt_callback_t(ompt_callback_lock_destroy, ompt_callback_mutex_t);
1227 register_ompt_callback(ompt_callback_work);
1228 register_ompt_callback(ompt_callback_dispatch);
1229 register_ompt_callback(ompt_callback_masked);
1230 register_ompt_callback(ompt_callback_parallel_begin);
1231 register_ompt_callback(ompt_callback_parallel_end);
1232 register_ompt_callback(ompt_callback_task_create);
1233 register_ompt_callback(ompt_callback_task_schedule);
1234 register_ompt_callback(ompt_callback_dependences);
1235 register_ompt_callback(ompt_callback_task_dependence);
1236 register_ompt_callback(ompt_callback_thread_begin);
1237 register_ompt_callback(ompt_callback_thread_end);
1238 register_ompt_callback(ompt_callback_error);
1239 printf("0: NULL_POINTER=%p\n", (void*)NULL);
1240 return 1; //success
1241 }
1242
ompt_finalize(ompt_data_t * tool_data)1243 void ompt_finalize(ompt_data_t *tool_data)
1244 {
1245 printf("0: ompt_event_runtime_shutdown\n");
1246 }
1247
1248 #ifdef __cplusplus
1249 extern "C" {
1250 #endif
ompt_start_tool(unsigned int omp_version,const char * runtime_version)1251 ompt_start_tool_result_t* ompt_start_tool(
1252 unsigned int omp_version,
1253 const char *runtime_version)
1254 {
1255 static ompt_start_tool_result_t ompt_start_tool_result = {&ompt_initialize,&ompt_finalize, 0};
1256 return &ompt_start_tool_result;
1257 }
1258 #ifdef __cplusplus
1259 }
1260 #endif
1261 #endif // ifndef USE_PRIVATE_TOOL
1262 #ifdef _OMPT_TESTS
1263 #undef _OMPT_TESTS
1264 #endif
1265