1 /** @file kmp_stats.cpp
2  * Statistics gathering and processing.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 //                     The LLVM Compiler Infrastructure
8 //
9 // This file is dual licensed under the MIT and the University of Illinois Open
10 // Source Licenses. See LICENSE.txt for details.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "kmp.h"
15 #include "kmp_lock.h"
16 #include "kmp_stats.h"
17 #include "kmp_str.h"
18 
19 #include <algorithm>
20 #include <ctime>
21 #include <iomanip>
22 #include <sstream>
23 #include <stdlib.h> // for atexit
24 
25 #define STRINGIZE2(x) #x
26 #define STRINGIZE(x) STRINGIZE2(x)
27 
28 #define expandName(name, flags, ignore) {STRINGIZE(name), flags},
29 statInfo timeStat::timerInfo[] = {
30     KMP_FOREACH_TIMER(expandName, 0){"TIMER_LAST", 0}};
31 const statInfo counter::counterInfo[] = {
32     KMP_FOREACH_COUNTER(expandName, 0){"COUNTER_LAST", 0}};
33 #undef expandName
34 
35 #define expandName(ignore1, ignore2, ignore3) {0.0, 0.0, 0.0},
36 kmp_stats_output_module::rgb_color kmp_stats_output_module::timerColorInfo[] = {
37     KMP_FOREACH_TIMER(expandName, 0){0.0, 0.0, 0.0}};
38 #undef expandName
39 
40 const kmp_stats_output_module::rgb_color
41     kmp_stats_output_module::globalColorArray[] = {
42         {1.0, 0.0, 0.0}, // red
43         {1.0, 0.6, 0.0}, // orange
44         {1.0, 1.0, 0.0}, // yellow
45         {0.0, 1.0, 0.0}, // green
46         {0.0, 0.0, 1.0}, // blue
47         {0.6, 0.2, 0.8}, // purple
48         {1.0, 0.0, 1.0}, // magenta
49         {0.0, 0.4, 0.2}, // dark green
50         {1.0, 1.0, 0.6}, // light yellow
51         {0.6, 0.4, 0.6}, // dirty purple
52         {0.0, 1.0, 1.0}, // cyan
53         {1.0, 0.4, 0.8}, // pink
54         {0.5, 0.5, 0.5}, // grey
55         {0.8, 0.7, 0.5}, // brown
56         {0.6, 0.6, 1.0}, // light blue
57         {1.0, 0.7, 0.5}, // peach
58         {0.8, 0.5, 1.0}, // lavender
59         {0.6, 0.0, 0.0}, // dark red
60         {0.7, 0.6, 0.0}, // gold
61         {0.0, 0.0, 0.0} // black
62 };
63 
64 // Ensure that the atexit handler only runs once.
65 static uint32_t statsPrinted = 0;
66 
67 // output interface
68 static kmp_stats_output_module *__kmp_stats_global_output = NULL;
69 
70 /* ************* statistic member functions ************* */
71 
72 void statistic::addSample(double sample) {
73   double delta = sample - meanVal;
74 
75   sampleCount = sampleCount + 1;
76   meanVal = meanVal + delta / sampleCount;
77   m2 = m2 + delta * (sample - meanVal);
78 
79   minVal = std::min(minVal, sample);
80   maxVal = std::max(maxVal, sample);
81 }
82 
83 statistic &statistic::operator+=(const statistic &other) {
84   if (sampleCount == 0) {
85     *this = other;
86     return *this;
87   }
88 
89   uint64_t newSampleCount = sampleCount + other.sampleCount;
90   double dnsc = double(newSampleCount);
91   double dsc = double(sampleCount);
92   double dscBydnsc = dsc / dnsc;
93   double dosc = double(other.sampleCount);
94   double delta = other.meanVal - meanVal;
95 
96   // Try to order these calculations to avoid overflows. If this were Fortran,
97   // then the compiler would not be able to re-order over brackets. In C++ it
98   // may be legal to do that (we certainly hope it doesn't, and CC+ Programming
99   // Language 2nd edition suggests it shouldn't, since it says that exploitation
100   // of associativity can only be made if the operation really is associative
101   // (which floating addition isn't...)).
102   meanVal = meanVal * dscBydnsc + other.meanVal * (1 - dscBydnsc);
103   m2 = m2 + other.m2 + dscBydnsc * dosc * delta * delta;
104   minVal = std::min(minVal, other.minVal);
105   maxVal = std::max(maxVal, other.maxVal);
106   sampleCount = newSampleCount;
107 
108   return *this;
109 }
110 
111 void statistic::scale(double factor) {
112   minVal = minVal * factor;
113   maxVal = maxVal * factor;
114   meanVal = meanVal * factor;
115   m2 = m2 * factor * factor;
116   return;
117 }
118 
119 std::string statistic::format(char unit, bool total) const {
120   std::string result = formatSI(sampleCount, 9, ' ');
121 
122   if (sampleCount == 0) {
123     result = result + std::string(", ") + formatSI(0.0, 9, unit);
124     result = result + std::string(", ") + formatSI(0.0, 9, unit);
125     result = result + std::string(", ") + formatSI(0.0, 9, unit);
126     if (total)
127       result = result + std::string(", ") + formatSI(0.0, 9, unit);
128     result = result + std::string(", ") + formatSI(0.0, 9, unit);
129   } else {
130     result = result + std::string(", ") + formatSI(minVal, 9, unit);
131     result = result + std::string(", ") + formatSI(meanVal, 9, unit);
132     result = result + std::string(", ") + formatSI(maxVal, 9, unit);
133     if (total)
134       result =
135           result + std::string(", ") + formatSI(meanVal * sampleCount, 9, unit);
136     result = result + std::string(", ") + formatSI(getSD(), 9, unit);
137   }
138   return result;
139 }
140 
141 /* ************* explicitTimer member functions ************* */
142 
143 void explicitTimer::start(timer_e timerEnumValue) {
144   startTime = tsc_tick_count::now();
145   totalPauseTime = 0;
146   if (timeStat::logEvent(timerEnumValue)) {
147     __kmp_stats_thread_ptr->incrementNestValue();
148   }
149   return;
150 }
151 
152 void explicitTimer::stop(timer_e timerEnumValue,
153                          kmp_stats_list *stats_ptr /* = nullptr */) {
154   if (startTime.getValue() == 0)
155     return;
156 
157   tsc_tick_count finishTime = tsc_tick_count::now();
158 
159   // stat->addSample ((tsc_tick_count::now() - startTime).ticks());
160   stat->addSample(((finishTime - startTime) - totalPauseTime).ticks());
161 
162   if (timeStat::logEvent(timerEnumValue)) {
163     if (!stats_ptr)
164       stats_ptr = __kmp_stats_thread_ptr;
165     stats_ptr->push_event(
166         startTime.getValue() - __kmp_stats_start_time.getValue(),
167         finishTime.getValue() - __kmp_stats_start_time.getValue(),
168         __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
169     stats_ptr->decrementNestValue();
170   }
171 
172   /* We accept the risk that we drop a sample because it really did start at
173      t==0. */
174   startTime = 0;
175   return;
176 }
177 
178 /* ************* partitionedTimers member functions ************* */
179 partitionedTimers::partitionedTimers() { timer_stack.reserve(8); }
180 
181 // add a timer to this collection of partitioned timers.
182 void partitionedTimers::add_timer(explicit_timer_e timer_index,
183                                   explicitTimer *timer_pointer) {
184   KMP_DEBUG_ASSERT((int)timer_index < (int)EXPLICIT_TIMER_LAST + 1);
185   timers[timer_index] = timer_pointer;
186 }
187 
188 // initialize the paritioned timers to an initial timer
189 void partitionedTimers::init(timerPair init_timer_pair) {
190   KMP_DEBUG_ASSERT(this->timer_stack.size() == 0);
191   timer_stack.push_back(init_timer_pair);
192   timers[init_timer_pair.get_index()]->start(init_timer_pair.get_timer());
193 }
194 
195 // stop/save the current timer, and start the new timer (timer_pair)
196 // There is a special condition where if the current timer is equal to
197 // the one you are trying to push, then it only manipulates the stack,
198 // and it won't stop/start the currently running timer.
199 void partitionedTimers::push(timerPair timer_pair) {
200   // get the current timer
201   // stop current timer
202   // push new timer
203   // start the new timer
204   KMP_DEBUG_ASSERT(this->timer_stack.size() > 0);
205   timerPair current_timer = timer_stack.back();
206   timer_stack.push_back(timer_pair);
207   if (current_timer != timer_pair) {
208     timers[current_timer.get_index()]->pause();
209     timers[timer_pair.get_index()]->start(timer_pair.get_timer());
210   }
211 }
212 
213 // stop/discard the current timer, and start the previously saved timer
214 void partitionedTimers::pop() {
215   // get the current timer
216   // stop current timer
217   // pop current timer
218   // get the new current timer and start it back up
219   KMP_DEBUG_ASSERT(this->timer_stack.size() > 1);
220   timerPair current_timer = timer_stack.back();
221   timer_stack.pop_back();
222   timerPair new_timer = timer_stack.back();
223   if (current_timer != new_timer) {
224     timers[current_timer.get_index()]->stop(current_timer.get_timer());
225     timers[new_timer.get_index()]->resume();
226   }
227 }
228 
229 // Wind up all the currently running timers.
230 // This pops off all the timers from the stack and clears the stack
231 // After this is called, init() must be run again to initialize the
232 // stack of timers
233 void partitionedTimers::windup() {
234   while (timer_stack.size() > 1) {
235     this->pop();
236   }
237   if (timer_stack.size() > 0) {
238     timerPair last_timer = timer_stack.back();
239     timer_stack.pop_back();
240     timers[last_timer.get_index()]->stop(last_timer.get_timer());
241   }
242 }
243 
244 /* ************* kmp_stats_event_vector member functions ************* */
245 
246 void kmp_stats_event_vector::deallocate() {
247   __kmp_free(events);
248   internal_size = 0;
249   allocated_size = 0;
250   events = NULL;
251 }
252 
253 // This function is for qsort() which requires the compare function to return
254 // either a negative number if event1 < event2, a positive number if event1 >
255 // event2 or zero if event1 == event2. This sorts by start time (lowest to
256 // highest).
257 int compare_two_events(const void *event1, const void *event2) {
258   kmp_stats_event *ev1 = (kmp_stats_event *)event1;
259   kmp_stats_event *ev2 = (kmp_stats_event *)event2;
260 
261   if (ev1->getStart() < ev2->getStart())
262     return -1;
263   else if (ev1->getStart() > ev2->getStart())
264     return 1;
265   else
266     return 0;
267 }
268 
269 void kmp_stats_event_vector::sort() {
270   qsort(events, internal_size, sizeof(kmp_stats_event), compare_two_events);
271 }
272 
273 /* ************* kmp_stats_list member functions ************* */
274 
275 // returns a pointer to newly created stats node
276 kmp_stats_list *kmp_stats_list::push_back(int gtid) {
277   kmp_stats_list *newnode =
278       (kmp_stats_list *)__kmp_allocate(sizeof(kmp_stats_list));
279   // placement new, only requires space and pointer and initializes (so
280   // __kmp_allocate instead of C++ new[] is used)
281   new (newnode) kmp_stats_list();
282   newnode->setGtid(gtid);
283   newnode->prev = this->prev;
284   newnode->next = this;
285   newnode->prev->next = newnode;
286   newnode->next->prev = newnode;
287   return newnode;
288 }
289 void kmp_stats_list::deallocate() {
290   kmp_stats_list *ptr = this->next;
291   kmp_stats_list *delptr = this->next;
292   while (ptr != this) {
293     delptr = ptr;
294     ptr = ptr->next;
295     // placement new means we have to explicitly call destructor.
296     delptr->_event_vector.deallocate();
297     delptr->~kmp_stats_list();
298     __kmp_free(delptr);
299   }
300 }
301 kmp_stats_list::iterator kmp_stats_list::begin() {
302   kmp_stats_list::iterator it;
303   it.ptr = this->next;
304   return it;
305 }
306 kmp_stats_list::iterator kmp_stats_list::end() {
307   kmp_stats_list::iterator it;
308   it.ptr = this;
309   return it;
310 }
311 int kmp_stats_list::size() {
312   int retval;
313   kmp_stats_list::iterator it;
314   for (retval = 0, it = begin(); it != end(); it++, retval++) {
315   }
316   return retval;
317 }
318 
319 /* ************* kmp_stats_list::iterator member functions ************* */
320 
321 kmp_stats_list::iterator::iterator() : ptr(NULL) {}
322 kmp_stats_list::iterator::~iterator() {}
323 kmp_stats_list::iterator kmp_stats_list::iterator::operator++() {
324   this->ptr = this->ptr->next;
325   return *this;
326 }
327 kmp_stats_list::iterator kmp_stats_list::iterator::operator++(int dummy) {
328   this->ptr = this->ptr->next;
329   return *this;
330 }
331 kmp_stats_list::iterator kmp_stats_list::iterator::operator--() {
332   this->ptr = this->ptr->prev;
333   return *this;
334 }
335 kmp_stats_list::iterator kmp_stats_list::iterator::operator--(int dummy) {
336   this->ptr = this->ptr->prev;
337   return *this;
338 }
339 bool kmp_stats_list::iterator::operator!=(const kmp_stats_list::iterator &rhs) {
340   return this->ptr != rhs.ptr;
341 }
342 bool kmp_stats_list::iterator::operator==(const kmp_stats_list::iterator &rhs) {
343   return this->ptr == rhs.ptr;
344 }
345 kmp_stats_list *kmp_stats_list::iterator::operator*() const {
346   return this->ptr;
347 }
348 
349 /* *************  kmp_stats_output_module functions ************** */
350 
351 const char *kmp_stats_output_module::eventsFileName = NULL;
352 const char *kmp_stats_output_module::plotFileName = NULL;
353 int kmp_stats_output_module::printPerThreadFlag = 0;
354 int kmp_stats_output_module::printPerThreadEventsFlag = 0;
355 
356 // init() is called very near the beginning of execution time in the constructor
357 // of __kmp_stats_global_output
358 void kmp_stats_output_module::init() {
359   char *statsFileName = getenv("KMP_STATS_FILE");
360   eventsFileName = getenv("KMP_STATS_EVENTS_FILE");
361   plotFileName = getenv("KMP_STATS_PLOT_FILE");
362   char *threadStats = getenv("KMP_STATS_THREADS");
363   char *threadEvents = getenv("KMP_STATS_EVENTS");
364 
365   // set the stats output filenames based on environment variables and defaults
366   if (statsFileName) {
367     // append the process id to the output filename
368     // events.csv --> events-pid.csv
369     size_t index;
370     std::string baseFileName, pid, suffix;
371     std::stringstream ss;
372     outputFileName = std::string(statsFileName);
373     index = outputFileName.find_last_of('.');
374     if (index == std::string::npos) {
375       baseFileName = outputFileName;
376     } else {
377       baseFileName = outputFileName.substr(0, index);
378       suffix = outputFileName.substr(index);
379     }
380     ss << getpid();
381     pid = ss.str();
382     outputFileName = baseFileName + "-" + pid + suffix;
383   }
384   eventsFileName = eventsFileName ? eventsFileName : "events.dat";
385   plotFileName = plotFileName ? plotFileName : "events.plt";
386 
387   // set the flags based on environment variables matching: true, on, 1, .true.
388   // , .t. , yes
389   printPerThreadFlag = __kmp_str_match_true(threadStats);
390   printPerThreadEventsFlag = __kmp_str_match_true(threadEvents);
391 
392   if (printPerThreadEventsFlag) {
393     // assigns a color to each timer for printing
394     setupEventColors();
395   } else {
396     // will clear flag so that no event will be logged
397     timeStat::clearEventFlags();
398   }
399 
400   return;
401 }
402 
403 void kmp_stats_output_module::setupEventColors() {
404   int i;
405   int globalColorIndex = 0;
406   int numGlobalColors = sizeof(globalColorArray) / sizeof(rgb_color);
407   for (i = 0; i < TIMER_LAST; i++) {
408     if (timeStat::logEvent((timer_e)i)) {
409       timerColorInfo[i] = globalColorArray[globalColorIndex];
410       globalColorIndex = (globalColorIndex + 1) % numGlobalColors;
411     }
412   }
413   return;
414 }
415 
416 void kmp_stats_output_module::printTimerStats(FILE *statsOut,
417                                               statistic const *theStats,
418                                               statistic const *totalStats) {
419   fprintf(statsOut, "Timer,                      SampleCount,    Min,      "
420                     "Mean,       Max,     Total,        SD\n");
421   for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
422     statistic const *stat = &theStats[s];
423     char tag = timeStat::noUnits(s) ? ' ' : 'T';
424 
425     fprintf(statsOut, "%-28s, %s\n", timeStat::name(s),
426             stat->format(tag, true).c_str());
427   }
428   // Also print the Total_ versions of times.
429   for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
430     char tag = timeStat::noUnits(s) ? ' ' : 'T';
431     if (totalStats && !timeStat::noTotal(s))
432       fprintf(statsOut, "Total_%-22s, %s\n", timeStat::name(s),
433               totalStats[s].format(tag, true).c_str());
434   }
435 }
436 
437 void kmp_stats_output_module::printCounterStats(FILE *statsOut,
438                                                 statistic const *theStats) {
439   fprintf(statsOut, "Counter,                 ThreadCount,    Min,      Mean,  "
440                     "     Max,     Total,        SD\n");
441   for (int s = 0; s < COUNTER_LAST; s++) {
442     statistic const *stat = &theStats[s];
443     fprintf(statsOut, "%-25s, %s\n", counter::name(counter_e(s)),
444             stat->format(' ', true).c_str());
445   }
446 }
447 
448 void kmp_stats_output_module::printCounters(FILE *statsOut,
449                                             counter const *theCounters) {
450   // We print all the counters even if they are zero.
451   // That makes it easier to slice them into a spreadsheet if you need to.
452   fprintf(statsOut, "\nCounter,                    Count\n");
453   for (int c = 0; c < COUNTER_LAST; c++) {
454     counter const *stat = &theCounters[c];
455     fprintf(statsOut, "%-25s, %s\n", counter::name(counter_e(c)),
456             formatSI(stat->getValue(), 9, ' ').c_str());
457   }
458 }
459 
460 void kmp_stats_output_module::printEvents(FILE *eventsOut,
461                                           kmp_stats_event_vector *theEvents,
462                                           int gtid) {
463   // sort by start time before printing
464   theEvents->sort();
465   for (int i = 0; i < theEvents->size(); i++) {
466     kmp_stats_event ev = theEvents->at(i);
467     rgb_color color = getEventColor(ev.getTimerName());
468     fprintf(eventsOut, "%d %lu %lu %1.1f rgb(%1.1f,%1.1f,%1.1f) %s\n", gtid,
469             ev.getStart(), ev.getStop(), 1.2 - (ev.getNestLevel() * 0.2),
470             color.r, color.g, color.b, timeStat::name(ev.getTimerName()));
471   }
472   return;
473 }
474 
475 void kmp_stats_output_module::windupExplicitTimers() {
476   // Wind up any explicit timers. We assume that it's fair at this point to just
477   // walk all the explcit timers in all threads and say "it's over".
478   // If the timer wasn't running, this won't record anything anyway.
479   kmp_stats_list::iterator it;
480   for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
481     kmp_stats_list *ptr = *it;
482     ptr->getPartitionedTimers()->windup();
483     for (int timer = 0; timer < EXPLICIT_TIMER_LAST; timer++) {
484       ptr->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer, ptr);
485     }
486   }
487 }
488 
489 void kmp_stats_output_module::printPloticusFile() {
490   int i;
491   int size = __kmp_stats_list->size();
492   FILE *plotOut = fopen(plotFileName, "w+");
493 
494   fprintf(plotOut, "#proc page\n"
495                    "   pagesize: 15 10\n"
496                    "   scale: 1.0\n\n");
497 
498   fprintf(plotOut, "#proc getdata\n"
499                    "   file: %s\n\n",
500           eventsFileName);
501 
502   fprintf(plotOut, "#proc areadef\n"
503                    "   title: OpenMP Sampling Timeline\n"
504                    "   titledetails: align=center size=16\n"
505                    "   rectangle: 1 1 13 9\n"
506                    "   xautorange: datafield=2,3\n"
507                    "   yautorange: -1 %d\n\n",
508           size);
509 
510   fprintf(plotOut, "#proc xaxis\n"
511                    "   stubs: inc\n"
512                    "   stubdetails: size=12\n"
513                    "   label: Time (ticks)\n"
514                    "   labeldetails: size=14\n\n");
515 
516   fprintf(plotOut, "#proc yaxis\n"
517                    "   stubs: inc 1\n"
518                    "   stubrange: 0 %d\n"
519                    "   stubdetails: size=12\n"
520                    "   label: Thread #\n"
521                    "   labeldetails: size=14\n\n",
522           size - 1);
523 
524   fprintf(plotOut, "#proc bars\n"
525                    "   exactcolorfield: 5\n"
526                    "   axis: x\n"
527                    "   locfield: 1\n"
528                    "   segmentfields: 2 3\n"
529                    "   barwidthfield: 4\n\n");
530 
531   // create legend entries corresponding to the timer color
532   for (i = 0; i < TIMER_LAST; i++) {
533     if (timeStat::logEvent((timer_e)i)) {
534       rgb_color c = getEventColor((timer_e)i);
535       fprintf(plotOut, "#proc legendentry\n"
536                        "   sampletype: color\n"
537                        "   label: %s\n"
538                        "   details: rgb(%1.1f,%1.1f,%1.1f)\n\n",
539               timeStat::name((timer_e)i), c.r, c.g, c.b);
540     }
541   }
542 
543   fprintf(plotOut, "#proc legend\n"
544                    "   format: down\n"
545                    "   location: max max\n\n");
546   fclose(plotOut);
547   return;
548 }
549 
550 /* Print some useful information about
551    * the date and time this experiment ran.
552    * the machine on which it ran.
553    We output all of this as stylised comments, though we may decide to parse
554    some of it. */
555 void kmp_stats_output_module::printHeaderInfo(FILE *statsOut) {
556   std::time_t now = std::time(0);
557   char buffer[40];
558   char hostName[80];
559 
560   std::strftime(&buffer[0], sizeof(buffer), "%c", std::localtime(&now));
561   fprintf(statsOut, "# Time of run: %s\n", &buffer[0]);
562   if (gethostname(&hostName[0], sizeof(hostName)) == 0)
563     fprintf(statsOut, "# Hostname: %s\n", &hostName[0]);
564 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
565   fprintf(statsOut, "# CPU:  %s\n", &__kmp_cpuinfo.name[0]);
566   fprintf(statsOut, "# Family: %d, Model: %d, Stepping: %d\n",
567           __kmp_cpuinfo.family, __kmp_cpuinfo.model, __kmp_cpuinfo.stepping);
568   if (__kmp_cpuinfo.frequency == 0)
569     fprintf(statsOut, "# Nominal frequency: Unknown\n");
570   else
571     fprintf(statsOut, "# Nominal frequency: %sz\n",
572             formatSI(double(__kmp_cpuinfo.frequency), 9, 'H').c_str());
573 #endif
574 }
575 
576 void kmp_stats_output_module::outputStats(const char *heading) {
577   // Stop all the explicit timers in all threads
578   // Do this before declaring the local statistics because thay have
579   // constructors so will take time to create.
580   windupExplicitTimers();
581 
582   statistic allStats[TIMER_LAST];
583   statistic totalStats[TIMER_LAST]; /* Synthesized, cross threads versions of
584                                        normal timer stats */
585   statistic allCounters[COUNTER_LAST];
586 
587   FILE *statsOut =
588       !outputFileName.empty() ? fopen(outputFileName.c_str(), "a+") : stderr;
589   if (!statsOut)
590     statsOut = stderr;
591 
592   FILE *eventsOut;
593   if (eventPrintingEnabled()) {
594     eventsOut = fopen(eventsFileName, "w+");
595   }
596 
597   printHeaderInfo(statsOut);
598   fprintf(statsOut, "%s\n", heading);
599   // Accumulate across threads.
600   kmp_stats_list::iterator it;
601   for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
602     int t = (*it)->getGtid();
603     // Output per thread stats if requested.
604     if (printPerThreadFlag) {
605       fprintf(statsOut, "Thread %d\n", t);
606       printTimerStats(statsOut, (*it)->getTimers(), 0);
607       printCounters(statsOut, (*it)->getCounters());
608       fprintf(statsOut, "\n");
609     }
610     // Output per thread events if requested.
611     if (eventPrintingEnabled()) {
612       kmp_stats_event_vector events = (*it)->getEventVector();
613       printEvents(eventsOut, &events, t);
614     }
615 
616     // Accumulate timers.
617     for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
618       // See if we should ignore this timer when aggregating
619       if ((timeStat::masterOnly(s) && (t != 0)) || // Timer only valid on master
620           // and this thread is worker
621           (timeStat::workerOnly(s) && (t == 0)) // Timer only valid on worker
622           // and this thread is the master
623           ) {
624         continue;
625       }
626 
627       statistic *threadStat = (*it)->getTimer(s);
628       allStats[s] += *threadStat;
629 
630       // Add Total stats for timers that are valid in more than one thread
631       if (!timeStat::noTotal(s))
632         totalStats[s].addSample(threadStat->getTotal());
633     }
634 
635     // Accumulate counters.
636     for (counter_e c = counter_e(0); c < COUNTER_LAST; c = counter_e(c + 1)) {
637       if (counter::masterOnly(c) && t != 0)
638         continue;
639       allCounters[c].addSample((*it)->getCounter(c)->getValue());
640     }
641   }
642 
643   if (eventPrintingEnabled()) {
644     printPloticusFile();
645     fclose(eventsOut);
646   }
647 
648   fprintf(statsOut, "Aggregate for all threads\n");
649   printTimerStats(statsOut, &allStats[0], &totalStats[0]);
650   fprintf(statsOut, "\n");
651   printCounterStats(statsOut, &allCounters[0]);
652 
653   if (statsOut != stderr)
654     fclose(statsOut);
655 }
656 
657 /* *************  exported C functions ************** */
658 
659 // no name mangling for these functions, we want the c files to be able to get
660 // at these functions
661 extern "C" {
662 
663 void __kmp_reset_stats() {
664   kmp_stats_list::iterator it;
665   for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
666     timeStat *timers = (*it)->getTimers();
667     counter *counters = (*it)->getCounters();
668     explicitTimer *eTimers = (*it)->getExplicitTimers();
669 
670     for (int t = 0; t < TIMER_LAST; t++)
671       timers[t].reset();
672 
673     for (int c = 0; c < COUNTER_LAST; c++)
674       counters[c].reset();
675 
676     for (int t = 0; t < EXPLICIT_TIMER_LAST; t++)
677       eTimers[t].reset();
678 
679     // reset the event vector so all previous events are "erased"
680     (*it)->resetEventVector();
681   }
682 }
683 
684 // This function will reset all stats and stop all threads' explicit timers if
685 // they haven't been stopped already.
686 void __kmp_output_stats(const char *heading) {
687   __kmp_stats_global_output->outputStats(heading);
688   __kmp_reset_stats();
689 }
690 
691 void __kmp_accumulate_stats_at_exit(void) {
692   // Only do this once.
693   if (KMP_XCHG_FIXED32(&statsPrinted, 1) != 0)
694     return;
695 
696   __kmp_output_stats("Statistics on exit");
697 }
698 
699 void __kmp_stats_init(void) {
700   __kmp_init_tas_lock(&__kmp_stats_lock);
701   __kmp_stats_start_time = tsc_tick_count::now();
702   __kmp_stats_global_output = new kmp_stats_output_module();
703   __kmp_stats_list = new kmp_stats_list();
704 }
705 
706 void __kmp_stats_fini(void) {
707   __kmp_accumulate_stats_at_exit();
708   __kmp_stats_list->deallocate();
709   delete __kmp_stats_global_output;
710   delete __kmp_stats_list;
711 }
712 
713 } // extern "C"
714