1 /** @file kmp_stats.cpp 2 * Statistics gathering and processing. 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 16 #if KMP_STATS_ENABLED 17 18 #include "kmp.h" 19 #include "kmp_str.h" 20 #include "kmp_lock.h" 21 #include "kmp_stats.h" 22 23 #include <algorithm> 24 #include <sstream> 25 #include <iomanip> 26 #include <stdlib.h> // for atexit 27 28 #define STRINGIZE2(x) #x 29 #define STRINGIZE(x) STRINGIZE2(x) 30 31 #define expandName(name,flags,ignore) {STRINGIZE(name),flags}, 32 statInfo timeStat::timerInfo[] = { 33 KMP_FOREACH_TIMER(expandName,0) 34 {0,0} 35 }; 36 const statInfo counter::counterInfo[] = { 37 KMP_FOREACH_COUNTER(expandName,0) 38 {0,0} 39 }; 40 #undef expandName 41 42 #define expandName(ignore1,ignore2,ignore3) {0.0,0.0,0.0}, 43 kmp_stats_output_module::rgb_color kmp_stats_output_module::timerColorInfo[] = { 44 KMP_FOREACH_TIMER(expandName,0) 45 {0.0,0.0,0.0} 46 }; 47 #undef expandName 48 49 const kmp_stats_output_module::rgb_color kmp_stats_output_module::globalColorArray[] = { 50 {1.0, 0.0, 0.0}, // red 51 {1.0, 0.6, 0.0}, // orange 52 {1.0, 1.0, 0.0}, // yellow 53 {0.0, 1.0, 0.0}, // green 54 {0.0, 0.0, 1.0}, // blue 55 {0.6, 0.2, 0.8}, // purple 56 {1.0, 0.0, 1.0}, // magenta 57 {0.0, 0.4, 0.2}, // dark green 58 {1.0, 1.0, 0.6}, // light yellow 59 {0.6, 0.4, 0.6}, // dirty purple 60 {0.0, 1.0, 1.0}, // cyan 61 {1.0, 0.4, 0.8}, // pink 62 {0.5, 0.5, 0.5}, // grey 63 {0.8, 0.7, 0.5}, // brown 64 {0.6, 0.6, 1.0}, // light blue 65 {1.0, 0.7, 0.5}, // peach 66 {0.8, 0.5, 1.0}, // lavender 67 {0.6, 0.0, 0.0}, // dark red 68 {0.7, 0.6, 0.0}, // gold 69 {0.0, 0.0, 0.0} // black 70 }; 71 72 // Ensure that the atexit handler only runs once. 73 static uint32_t statsPrinted = 0; 74 75 // output interface 76 static kmp_stats_output_module __kmp_stats_global_output; 77 78 /* ****************************************************** */ 79 /* ************* statistic member functions ************* */ 80 81 void statistic::addSample(double sample) 82 { 83 double delta = sample - meanVal; 84 85 sampleCount = sampleCount + 1; 86 meanVal = meanVal + delta/sampleCount; 87 m2 = m2 + delta*(sample - meanVal); 88 89 minVal = std::min(minVal, sample); 90 maxVal = std::max(maxVal, sample); 91 } 92 93 statistic & statistic::operator+= (const statistic & other) 94 { 95 if (sampleCount == 0) 96 { 97 *this = other; 98 return *this; 99 } 100 101 uint64_t newSampleCount = sampleCount + other.sampleCount; 102 double dnsc = double(newSampleCount); 103 double dsc = double(sampleCount); 104 double dscBydnsc = dsc/dnsc; 105 double dosc = double(other.sampleCount); 106 double delta = other.meanVal - meanVal; 107 108 // Try to order these calculations to avoid overflows. 109 // If this were Fortran, then the compiler would not be able to re-order over brackets. 110 // In C++ it may be legal to do that (we certainly hope it doesn't, and CC+ Programming Language 2nd edition 111 // suggests it shouldn't, since it says that exploitation of associativity can only be made if the operation 112 // really is associative (which floating addition isn't...)). 113 meanVal = meanVal*dscBydnsc + other.meanVal*(1-dscBydnsc); 114 m2 = m2 + other.m2 + dscBydnsc*dosc*delta*delta; 115 minVal = std::min (minVal, other.minVal); 116 maxVal = std::max (maxVal, other.maxVal); 117 sampleCount = newSampleCount; 118 119 120 return *this; 121 } 122 123 void statistic::scale(double factor) 124 { 125 minVal = minVal*factor; 126 maxVal = maxVal*factor; 127 meanVal= meanVal*factor; 128 m2 = m2*factor*factor; 129 return; 130 } 131 132 std::string statistic::format(char unit, bool total) const 133 { 134 std::string result = formatSI(sampleCount,9,' '); 135 136 result = result + std::string(", ") + formatSI(minVal, 9, unit); 137 result = result + std::string(", ") + formatSI(meanVal, 9, unit); 138 result = result + std::string(", ") + formatSI(maxVal, 9, unit); 139 if (total) 140 result = result + std::string(", ") + formatSI(meanVal*sampleCount, 9, unit); 141 result = result + std::string(", ") + formatSI(getSD(), 9, unit); 142 143 return result; 144 } 145 146 /* ********************************************************** */ 147 /* ************* explicitTimer member functions ************* */ 148 149 void explicitTimer::start(timer_e timerEnumValue) { 150 startTime = tsc_tick_count::now(); 151 if(timeStat::logEvent(timerEnumValue)) { 152 __kmp_stats_thread_ptr->incrementNestValue(); 153 } 154 return; 155 } 156 157 void explicitTimer::stop(timer_e timerEnumValue) { 158 if (startTime.getValue() == 0) 159 return; 160 161 tsc_tick_count finishTime = tsc_tick_count::now(); 162 163 //stat->addSample ((tsc_tick_count::now() - startTime).ticks()); 164 stat->addSample ((finishTime - startTime).ticks()); 165 166 if(timeStat::logEvent(timerEnumValue)) { 167 __kmp_stats_thread_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue); 168 __kmp_stats_thread_ptr->decrementNestValue(); 169 } 170 171 /* We accept the risk that we drop a sample because it really did start at t==0. */ 172 startTime = 0; 173 return; 174 } 175 176 /* ******************************************************************* */ 177 /* ************* kmp_stats_event_vector member functions ************* */ 178 179 void kmp_stats_event_vector::deallocate() { 180 __kmp_free(events); 181 internal_size = 0; 182 allocated_size = 0; 183 events = NULL; 184 } 185 186 // This function is for qsort() which requires the compare function to return 187 // either a negative number if event1 < event2, a positive number if event1 > event2 188 // or zero if event1 == event2. 189 // This sorts by start time (lowest to highest). 190 int compare_two_events(const void* event1, const void* event2) { 191 kmp_stats_event* ev1 = (kmp_stats_event*)event1; 192 kmp_stats_event* ev2 = (kmp_stats_event*)event2; 193 194 if(ev1->getStart() < ev2->getStart()) return -1; 195 else if(ev1->getStart() > ev2->getStart()) return 1; 196 else return 0; 197 } 198 199 void kmp_stats_event_vector::sort() { 200 qsort(events, internal_size, sizeof(kmp_stats_event), compare_two_events); 201 } 202 203 /* *********************************************************** */ 204 /* ************* kmp_stats_list member functions ************* */ 205 206 // returns a pointer to newly created stats node 207 kmp_stats_list* kmp_stats_list::push_back(int gtid) { 208 kmp_stats_list* newnode = (kmp_stats_list*)__kmp_allocate(sizeof(kmp_stats_list)); 209 // placement new, only requires space and pointer and initializes (so __kmp_allocate instead of C++ new[] is used) 210 new (newnode) kmp_stats_list(); 211 newnode->setGtid(gtid); 212 newnode->prev = this->prev; 213 newnode->next = this; 214 newnode->prev->next = newnode; 215 newnode->next->prev = newnode; 216 return newnode; 217 } 218 void kmp_stats_list::deallocate() { 219 kmp_stats_list* ptr = this->next; 220 kmp_stats_list* delptr = this->next; 221 while(ptr != this) { 222 delptr = ptr; 223 ptr=ptr->next; 224 // placement new means we have to explicitly call destructor. 225 delptr->_event_vector.deallocate(); 226 delptr->~kmp_stats_list(); 227 __kmp_free(delptr); 228 } 229 } 230 kmp_stats_list::iterator kmp_stats_list::begin() { 231 kmp_stats_list::iterator it; 232 it.ptr = this->next; 233 return it; 234 } 235 kmp_stats_list::iterator kmp_stats_list::end() { 236 kmp_stats_list::iterator it; 237 it.ptr = this; 238 return it; 239 } 240 int kmp_stats_list::size() { 241 int retval; 242 kmp_stats_list::iterator it; 243 for(retval=0, it=begin(); it!=end(); it++, retval++) {} 244 return retval; 245 } 246 247 /* ********************************************************************* */ 248 /* ************* kmp_stats_list::iterator member functions ************* */ 249 250 kmp_stats_list::iterator::iterator() : ptr(NULL) {} 251 kmp_stats_list::iterator::~iterator() {} 252 kmp_stats_list::iterator kmp_stats_list::iterator::operator++() { 253 this->ptr = this->ptr->next; 254 return *this; 255 } 256 kmp_stats_list::iterator kmp_stats_list::iterator::operator++(int dummy) { 257 this->ptr = this->ptr->next; 258 return *this; 259 } 260 kmp_stats_list::iterator kmp_stats_list::iterator::operator--() { 261 this->ptr = this->ptr->prev; 262 return *this; 263 } 264 kmp_stats_list::iterator kmp_stats_list::iterator::operator--(int dummy) { 265 this->ptr = this->ptr->prev; 266 return *this; 267 } 268 bool kmp_stats_list::iterator::operator!=(const kmp_stats_list::iterator & rhs) { 269 return this->ptr!=rhs.ptr; 270 } 271 bool kmp_stats_list::iterator::operator==(const kmp_stats_list::iterator & rhs) { 272 return this->ptr==rhs.ptr; 273 } 274 kmp_stats_list* kmp_stats_list::iterator::operator*() const { 275 return this->ptr; 276 } 277 278 /* *************************************************************** */ 279 /* ************* kmp_stats_output_module functions ************** */ 280 281 const char* kmp_stats_output_module::outputFileName = NULL; 282 const char* kmp_stats_output_module::eventsFileName = NULL; 283 const char* kmp_stats_output_module::plotFileName = NULL; 284 int kmp_stats_output_module::printPerThreadFlag = 0; 285 int kmp_stats_output_module::printPerThreadEventsFlag = 0; 286 287 // init() is called very near the beginning of execution time in the constructor of __kmp_stats_global_output 288 void kmp_stats_output_module::init() 289 { 290 char * statsFileName = getenv("KMP_STATS_FILE"); 291 eventsFileName = getenv("KMP_STATS_EVENTS_FILE"); 292 plotFileName = getenv("KMP_STATS_PLOT_FILE"); 293 char * threadStats = getenv("KMP_STATS_THREADS"); 294 char * threadEvents = getenv("KMP_STATS_EVENTS"); 295 296 // set the stats output filenames based on environment variables and defaults 297 outputFileName = statsFileName; 298 eventsFileName = eventsFileName ? eventsFileName : "events.dat"; 299 plotFileName = plotFileName ? plotFileName : "events.plt"; 300 301 // set the flags based on environment variables matching: true, on, 1, .true. , .t. , yes 302 printPerThreadFlag = __kmp_str_match_true(threadStats); 303 printPerThreadEventsFlag = __kmp_str_match_true(threadEvents); 304 305 if(printPerThreadEventsFlag) { 306 // assigns a color to each timer for printing 307 setupEventColors(); 308 } else { 309 // will clear flag so that no event will be logged 310 timeStat::clearEventFlags(); 311 } 312 313 return; 314 } 315 316 void kmp_stats_output_module::setupEventColors() { 317 int i; 318 int globalColorIndex = 0; 319 int numGlobalColors = sizeof(globalColorArray) / sizeof(rgb_color); 320 for(i=0;i<TIMER_LAST;i++) { 321 if(timeStat::logEvent((timer_e)i)) { 322 timerColorInfo[i] = globalColorArray[globalColorIndex]; 323 globalColorIndex = (globalColorIndex+1)%numGlobalColors; 324 } 325 } 326 return; 327 } 328 329 void kmp_stats_output_module::printStats(FILE *statsOut, statistic const * theStats, bool areTimers) 330 { 331 if (areTimers) 332 { 333 // Check if we have useful timers, since we don't print zero value timers we need to avoid 334 // printing a header and then no data. 335 bool haveTimers = false; 336 for (int s = 0; s<TIMER_LAST; s++) 337 { 338 if (theStats[s].getCount() != 0) 339 { 340 haveTimers = true; 341 break; 342 } 343 } 344 if (!haveTimers) 345 return; 346 } 347 348 // Print 349 const char * title = areTimers ? "Timer, SampleCount," : "Counter, ThreadCount,"; 350 fprintf (statsOut, "%s Min, Mean, Max, Total, SD\n", title); 351 if (areTimers) { 352 for (int s = 0; s<TIMER_LAST; s++) { 353 statistic const * stat = &theStats[s]; 354 if (stat->getCount() != 0) { 355 char tag = timeStat::noUnits(timer_e(s)) ? ' ' : 'T'; 356 fprintf (statsOut, "%-25s, %s\n", timeStat::name(timer_e(s)), stat->format(tag, true).c_str()); 357 } 358 } 359 } else { // Counters 360 for (int s = 0; s<COUNTER_LAST; s++) { 361 statistic const * stat = &theStats[s]; 362 fprintf (statsOut, "%-25s, %s\n", counter::name(counter_e(s)), stat->format(' ', true).c_str()); 363 } 364 } 365 } 366 367 void kmp_stats_output_module::printCounters(FILE * statsOut, counter const * theCounters) 368 { 369 // We print all the counters even if they are zero. 370 // That makes it easier to slice them into a spreadsheet if you need to. 371 fprintf (statsOut, "\nCounter, Count\n"); 372 for (int c = 0; c<COUNTER_LAST; c++) { 373 counter const * stat = &theCounters[c]; 374 fprintf (statsOut, "%-25s, %s\n", counter::name(counter_e(c)), formatSI(stat->getValue(), 9, ' ').c_str()); 375 } 376 } 377 378 void kmp_stats_output_module::printEvents(FILE* eventsOut, kmp_stats_event_vector* theEvents, int gtid) { 379 // sort by start time before printing 380 theEvents->sort(); 381 for (int i = 0; i < theEvents->size(); i++) { 382 kmp_stats_event ev = theEvents->at(i); 383 rgb_color color = getEventColor(ev.getTimerName()); 384 fprintf(eventsOut, "%d %lu %lu %1.1f rgb(%1.1f,%1.1f,%1.1f) %s\n", 385 gtid, 386 ev.getStart(), 387 ev.getStop(), 388 1.2 - (ev.getNestLevel() * 0.2), 389 color.r, color.g, color.b, 390 timeStat::name(ev.getTimerName()) 391 ); 392 } 393 return; 394 } 395 396 void kmp_stats_output_module::windupExplicitTimers() 397 { 398 // Wind up any explicit timers. We assume that it's fair at this point to just walk all the explcit timers in all threads 399 // and say "it's over". 400 // If the timer wasn't running, this won't record anything anyway. 401 kmp_stats_list::iterator it; 402 for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) { 403 for (int timer=0; timer<EXPLICIT_TIMER_LAST; timer++) { 404 (*it)->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer); 405 } 406 } 407 } 408 409 void kmp_stats_output_module::printPloticusFile() { 410 int i; 411 int size = __kmp_stats_list.size(); 412 FILE* plotOut = fopen(plotFileName, "w+"); 413 414 fprintf(plotOut, "#proc page\n" 415 " pagesize: 15 10\n" 416 " scale: 1.0\n\n"); 417 418 fprintf(plotOut, "#proc getdata\n" 419 " file: %s\n\n", 420 eventsFileName); 421 422 fprintf(plotOut, "#proc areadef\n" 423 " title: OpenMP Sampling Timeline\n" 424 " titledetails: align=center size=16\n" 425 " rectangle: 1 1 13 9\n" 426 " xautorange: datafield=2,3\n" 427 " yautorange: -1 %d\n\n", 428 size); 429 430 fprintf(plotOut, "#proc xaxis\n" 431 " stubs: inc\n" 432 " stubdetails: size=12\n" 433 " label: Time (ticks)\n" 434 " labeldetails: size=14\n\n"); 435 436 fprintf(plotOut, "#proc yaxis\n" 437 " stubs: inc 1\n" 438 " stubrange: 0 %d\n" 439 " stubdetails: size=12\n" 440 " label: Thread #\n" 441 " labeldetails: size=14\n\n", 442 size-1); 443 444 fprintf(plotOut, "#proc bars\n" 445 " exactcolorfield: 5\n" 446 " axis: x\n" 447 " locfield: 1\n" 448 " segmentfields: 2 3\n" 449 " barwidthfield: 4\n\n"); 450 451 // create legend entries corresponding to the timer color 452 for(i=0;i<TIMER_LAST;i++) { 453 if(timeStat::logEvent((timer_e)i)) { 454 rgb_color c = getEventColor((timer_e)i); 455 fprintf(plotOut, "#proc legendentry\n" 456 " sampletype: color\n" 457 " label: %s\n" 458 " details: rgb(%1.1f,%1.1f,%1.1f)\n\n", 459 timeStat::name((timer_e)i), 460 c.r, c.g, c.b); 461 462 } 463 } 464 465 fprintf(plotOut, "#proc legend\n" 466 " format: down\n" 467 " location: max max\n\n"); 468 fclose(plotOut); 469 return; 470 } 471 472 void kmp_stats_output_module::outputStats(const char* heading) 473 { 474 statistic allStats[TIMER_LAST]; 475 statistic allCounters[COUNTER_LAST]; 476 477 // stop all the explicit timers for all threads 478 windupExplicitTimers(); 479 480 FILE * eventsOut; 481 FILE * statsOut = outputFileName ? fopen (outputFileName, "a+") : stderr; 482 483 if (eventPrintingEnabled()) { 484 eventsOut = fopen(eventsFileName, "w+"); 485 } 486 487 if (!statsOut) 488 statsOut = stderr; 489 490 fprintf(statsOut, "%s\n",heading); 491 // Accumulate across threads. 492 kmp_stats_list::iterator it; 493 for (it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) { 494 int t = (*it)->getGtid(); 495 // Output per thread stats if requested. 496 if (perThreadPrintingEnabled()) { 497 fprintf (statsOut, "Thread %d\n", t); 498 printStats(statsOut, (*it)->getTimers(), true); 499 printCounters(statsOut, (*it)->getCounters()); 500 fprintf(statsOut,"\n"); 501 } 502 // Output per thread events if requested. 503 if (eventPrintingEnabled()) { 504 kmp_stats_event_vector events = (*it)->getEventVector(); 505 printEvents(eventsOut, &events, t); 506 } 507 508 for (int s = 0; s<TIMER_LAST; s++) { 509 // See if we should ignore this timer when aggregating 510 if ((timeStat::masterOnly(timer_e(s)) && (t != 0)) || // Timer is only valid on the master and this thread is a worker 511 (timeStat::workerOnly(timer_e(s)) && (t == 0)) || // Timer is only valid on a worker and this thread is the master 512 timeStat::synthesized(timer_e(s)) // It's a synthesized stat, so there's no raw data for it. 513 ) 514 { 515 continue; 516 } 517 518 statistic * threadStat = (*it)->getTimer(timer_e(s)); 519 allStats[s] += *threadStat; 520 } 521 522 // Special handling for synthesized statistics. 523 // These just have to be coded specially here for now. 524 // At present we only have one: the total parallel work done in each thread. 525 // The variance here makes it easy to see load imbalance over the whole program (though, of course, 526 // it's possible to have a code with awful load balance in every parallel region but perfect load 527 // balance oever the whole program.) 528 allStats[TIMER_Total_work].addSample ((*it)->getTimer(TIMER_OMP_work)->getTotal()); 529 530 // Time waiting for work (synthesized) 531 if ((t != 0) || !timeStat::workerOnly(timer_e(TIMER_OMP_await_work))) 532 allStats[TIMER_Total_await_work].addSample ((*it)->getTimer(TIMER_OMP_await_work)->getTotal()); 533 534 // Time in explicit barriers. 535 allStats[TIMER_Total_barrier].addSample ((*it)->getTimer(TIMER_OMP_barrier)->getTotal()); 536 537 for (int c = 0; c<COUNTER_LAST; c++) { 538 if (counter::masterOnly(counter_e(c)) && t != 0) 539 continue; 540 allCounters[c].addSample ((*it)->getCounter(counter_e(c))->getValue()); 541 } 542 } 543 544 if (eventPrintingEnabled()) { 545 printPloticusFile(); 546 fclose(eventsOut); 547 } 548 549 fprintf (statsOut, "Aggregate for all threads\n"); 550 printStats (statsOut, &allStats[0], true); 551 fprintf (statsOut, "\n"); 552 printStats (statsOut, &allCounters[0], false); 553 554 if (statsOut != stderr) 555 fclose(statsOut); 556 557 } 558 559 /* ************************************************** */ 560 /* ************* exported C functions ************** */ 561 562 // no name mangling for these functions, we want the c files to be able to get at these functions 563 extern "C" { 564 565 void __kmp_reset_stats() 566 { 567 kmp_stats_list::iterator it; 568 for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) { 569 timeStat * timers = (*it)->getTimers(); 570 counter * counters = (*it)->getCounters(); 571 explicitTimer * eTimers = (*it)->getExplicitTimers(); 572 573 for (int t = 0; t<TIMER_LAST; t++) 574 timers[t].reset(); 575 576 for (int c = 0; c<COUNTER_LAST; c++) 577 counters[c].reset(); 578 579 for (int t=0; t<EXPLICIT_TIMER_LAST; t++) 580 eTimers[t].reset(); 581 582 // reset the event vector so all previous events are "erased" 583 (*it)->resetEventVector(); 584 585 // May need to restart the explicit timers in thread zero? 586 } 587 KMP_START_EXPLICIT_TIMER(OMP_serial); 588 KMP_START_EXPLICIT_TIMER(OMP_start_end); 589 } 590 591 // This function will reset all stats and stop all threads' explicit timers if they haven't been stopped already. 592 void __kmp_output_stats(const char * heading) 593 { 594 __kmp_stats_global_output.outputStats(heading); 595 __kmp_reset_stats(); 596 } 597 598 void __kmp_accumulate_stats_at_exit(void) 599 { 600 // Only do this once. 601 if (KMP_XCHG_FIXED32(&statsPrinted, 1) != 0) 602 return; 603 604 __kmp_output_stats("Statistics on exit"); 605 return; 606 } 607 608 void __kmp_stats_init(void) 609 { 610 return; 611 } 612 613 } // extern "C" 614 615 #endif // KMP_STATS_ENABLED 616