1 /** @file kmp_stats.cpp 2 * Statistics gathering and processing. 3 */ 4 5 6 //===----------------------------------------------------------------------===// 7 // 8 // The LLVM Compiler Infrastructure 9 // 10 // This file is dual licensed under the MIT and the University of Illinois Open 11 // Source Licenses. See LICENSE.txt for details. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "kmp.h" 16 #include "kmp_str.h" 17 #include "kmp_lock.h" 18 #include "kmp_stats.h" 19 20 #include <algorithm> 21 #include <sstream> 22 #include <iomanip> 23 #include <stdlib.h> // for atexit 24 25 #define STRINGIZE2(x) #x 26 #define STRINGIZE(x) STRINGIZE2(x) 27 28 #define expandName(name,flags,ignore) {STRINGIZE(name),flags}, 29 statInfo timeStat::timerInfo[] = { 30 KMP_FOREACH_TIMER(expandName,0) 31 {0,0} 32 }; 33 const statInfo counter::counterInfo[] = { 34 KMP_FOREACH_COUNTER(expandName,0) 35 {0,0} 36 }; 37 #undef expandName 38 39 #define expandName(ignore1,ignore2,ignore3) {0.0,0.0,0.0}, 40 kmp_stats_output_module::rgb_color kmp_stats_output_module::timerColorInfo[] = { 41 KMP_FOREACH_TIMER(expandName,0) 42 {0.0,0.0,0.0} 43 }; 44 #undef expandName 45 46 const kmp_stats_output_module::rgb_color kmp_stats_output_module::globalColorArray[] = { 47 {1.0, 0.0, 0.0}, // red 48 {1.0, 0.6, 0.0}, // orange 49 {1.0, 1.0, 0.0}, // yellow 50 {0.0, 1.0, 0.0}, // green 51 {0.0, 0.0, 1.0}, // blue 52 {0.6, 0.2, 0.8}, // purple 53 {1.0, 0.0, 1.0}, // magenta 54 {0.0, 0.4, 0.2}, // dark green 55 {1.0, 1.0, 0.6}, // light yellow 56 {0.6, 0.4, 0.6}, // dirty purple 57 {0.0, 1.0, 1.0}, // cyan 58 {1.0, 0.4, 0.8}, // pink 59 {0.5, 0.5, 0.5}, // grey 60 {0.8, 0.7, 0.5}, // brown 61 {0.6, 0.6, 1.0}, // light blue 62 {1.0, 0.7, 0.5}, // peach 63 {0.8, 0.5, 1.0}, // lavender 64 {0.6, 0.0, 0.0}, // dark red 65 {0.7, 0.6, 0.0}, // gold 66 {0.0, 0.0, 0.0} // black 67 }; 68 69 // Ensure that the atexit handler only runs once. 70 static uint32_t statsPrinted = 0; 71 72 // output interface 73 static kmp_stats_output_module __kmp_stats_global_output; 74 75 /* ****************************************************** */ 76 /* ************* statistic member functions ************* */ 77 78 void statistic::addSample(double sample) 79 { 80 double delta = sample - meanVal; 81 82 sampleCount = sampleCount + 1; 83 meanVal = meanVal + delta/sampleCount; 84 m2 = m2 + delta*(sample - meanVal); 85 86 minVal = std::min(minVal, sample); 87 maxVal = std::max(maxVal, sample); 88 } 89 90 statistic & statistic::operator+= (const statistic & other) 91 { 92 if (sampleCount == 0) 93 { 94 *this = other; 95 return *this; 96 } 97 98 uint64_t newSampleCount = sampleCount + other.sampleCount; 99 double dnsc = double(newSampleCount); 100 double dsc = double(sampleCount); 101 double dscBydnsc = dsc/dnsc; 102 double dosc = double(other.sampleCount); 103 double delta = other.meanVal - meanVal; 104 105 // Try to order these calculations to avoid overflows. 106 // If this were Fortran, then the compiler would not be able to re-order over brackets. 107 // In C++ it may be legal to do that (we certainly hope it doesn't, and CC+ Programming Language 2nd edition 108 // suggests it shouldn't, since it says that exploitation of associativity can only be made if the operation 109 // really is associative (which floating addition isn't...)). 110 meanVal = meanVal*dscBydnsc + other.meanVal*(1-dscBydnsc); 111 m2 = m2 + other.m2 + dscBydnsc*dosc*delta*delta; 112 minVal = std::min (minVal, other.minVal); 113 maxVal = std::max (maxVal, other.maxVal); 114 sampleCount = newSampleCount; 115 116 117 return *this; 118 } 119 120 void statistic::scale(double factor) 121 { 122 minVal = minVal*factor; 123 maxVal = maxVal*factor; 124 meanVal= meanVal*factor; 125 m2 = m2*factor*factor; 126 return; 127 } 128 129 std::string statistic::format(char unit, bool total) const 130 { 131 std::string result = formatSI(sampleCount,9,' '); 132 133 result = result + std::string(", ") + formatSI(minVal, 9, unit); 134 result = result + std::string(", ") + formatSI(meanVal, 9, unit); 135 result = result + std::string(", ") + formatSI(maxVal, 9, unit); 136 if (total) 137 result = result + std::string(", ") + formatSI(meanVal*sampleCount, 9, unit); 138 result = result + std::string(", ") + formatSI(getSD(), 9, unit); 139 140 return result; 141 } 142 143 /* ********************************************************** */ 144 /* ************* explicitTimer member functions ************* */ 145 146 void explicitTimer::start(timer_e timerEnumValue) { 147 startTime = tsc_tick_count::now(); 148 if(timeStat::logEvent(timerEnumValue)) { 149 __kmp_stats_thread_ptr->incrementNestValue(); 150 } 151 return; 152 } 153 154 void explicitTimer::stop(timer_e timerEnumValue) { 155 if (startTime.getValue() == 0) 156 return; 157 158 tsc_tick_count finishTime = tsc_tick_count::now(); 159 160 //stat->addSample ((tsc_tick_count::now() - startTime).ticks()); 161 stat->addSample ((finishTime - startTime).ticks()); 162 163 if(timeStat::logEvent(timerEnumValue)) { 164 __kmp_stats_thread_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue); 165 __kmp_stats_thread_ptr->decrementNestValue(); 166 } 167 168 /* We accept the risk that we drop a sample because it really did start at t==0. */ 169 startTime = 0; 170 return; 171 } 172 173 /* ******************************************************************* */ 174 /* ************* kmp_stats_event_vector member functions ************* */ 175 176 void kmp_stats_event_vector::deallocate() { 177 __kmp_free(events); 178 internal_size = 0; 179 allocated_size = 0; 180 events = NULL; 181 } 182 183 // This function is for qsort() which requires the compare function to return 184 // either a negative number if event1 < event2, a positive number if event1 > event2 185 // or zero if event1 == event2. 186 // This sorts by start time (lowest to highest). 187 int compare_two_events(const void* event1, const void* event2) { 188 kmp_stats_event* ev1 = (kmp_stats_event*)event1; 189 kmp_stats_event* ev2 = (kmp_stats_event*)event2; 190 191 if(ev1->getStart() < ev2->getStart()) return -1; 192 else if(ev1->getStart() > ev2->getStart()) return 1; 193 else return 0; 194 } 195 196 void kmp_stats_event_vector::sort() { 197 qsort(events, internal_size, sizeof(kmp_stats_event), compare_two_events); 198 } 199 200 /* *********************************************************** */ 201 /* ************* kmp_stats_list member functions ************* */ 202 203 // returns a pointer to newly created stats node 204 kmp_stats_list* kmp_stats_list::push_back(int gtid) { 205 kmp_stats_list* newnode = (kmp_stats_list*)__kmp_allocate(sizeof(kmp_stats_list)); 206 // placement new, only requires space and pointer and initializes (so __kmp_allocate instead of C++ new[] is used) 207 new (newnode) kmp_stats_list(); 208 newnode->setGtid(gtid); 209 newnode->prev = this->prev; 210 newnode->next = this; 211 newnode->prev->next = newnode; 212 newnode->next->prev = newnode; 213 return newnode; 214 } 215 void kmp_stats_list::deallocate() { 216 kmp_stats_list* ptr = this->next; 217 kmp_stats_list* delptr = this->next; 218 while(ptr != this) { 219 delptr = ptr; 220 ptr=ptr->next; 221 // placement new means we have to explicitly call destructor. 222 delptr->_event_vector.deallocate(); 223 delptr->~kmp_stats_list(); 224 __kmp_free(delptr); 225 } 226 } 227 kmp_stats_list::iterator kmp_stats_list::begin() { 228 kmp_stats_list::iterator it; 229 it.ptr = this->next; 230 return it; 231 } 232 kmp_stats_list::iterator kmp_stats_list::end() { 233 kmp_stats_list::iterator it; 234 it.ptr = this; 235 return it; 236 } 237 int kmp_stats_list::size() { 238 int retval; 239 kmp_stats_list::iterator it; 240 for(retval=0, it=begin(); it!=end(); it++, retval++) {} 241 return retval; 242 } 243 244 /* ********************************************************************* */ 245 /* ************* kmp_stats_list::iterator member functions ************* */ 246 247 kmp_stats_list::iterator::iterator() : ptr(NULL) {} 248 kmp_stats_list::iterator::~iterator() {} 249 kmp_stats_list::iterator kmp_stats_list::iterator::operator++() { 250 this->ptr = this->ptr->next; 251 return *this; 252 } 253 kmp_stats_list::iterator kmp_stats_list::iterator::operator++(int dummy) { 254 this->ptr = this->ptr->next; 255 return *this; 256 } 257 kmp_stats_list::iterator kmp_stats_list::iterator::operator--() { 258 this->ptr = this->ptr->prev; 259 return *this; 260 } 261 kmp_stats_list::iterator kmp_stats_list::iterator::operator--(int dummy) { 262 this->ptr = this->ptr->prev; 263 return *this; 264 } 265 bool kmp_stats_list::iterator::operator!=(const kmp_stats_list::iterator & rhs) { 266 return this->ptr!=rhs.ptr; 267 } 268 bool kmp_stats_list::iterator::operator==(const kmp_stats_list::iterator & rhs) { 269 return this->ptr==rhs.ptr; 270 } 271 kmp_stats_list* kmp_stats_list::iterator::operator*() const { 272 return this->ptr; 273 } 274 275 /* *************************************************************** */ 276 /* ************* kmp_stats_output_module functions ************** */ 277 278 const char* kmp_stats_output_module::outputFileName = NULL; 279 const char* kmp_stats_output_module::eventsFileName = NULL; 280 const char* kmp_stats_output_module::plotFileName = NULL; 281 int kmp_stats_output_module::printPerThreadFlag = 0; 282 int kmp_stats_output_module::printPerThreadEventsFlag = 0; 283 284 // init() is called very near the beginning of execution time in the constructor of __kmp_stats_global_output 285 void kmp_stats_output_module::init() 286 { 287 char * statsFileName = getenv("KMP_STATS_FILE"); 288 eventsFileName = getenv("KMP_STATS_EVENTS_FILE"); 289 plotFileName = getenv("KMP_STATS_PLOT_FILE"); 290 char * threadStats = getenv("KMP_STATS_THREADS"); 291 char * threadEvents = getenv("KMP_STATS_EVENTS"); 292 293 // set the stats output filenames based on environment variables and defaults 294 outputFileName = statsFileName; 295 eventsFileName = eventsFileName ? eventsFileName : "events.dat"; 296 plotFileName = plotFileName ? plotFileName : "events.plt"; 297 298 // set the flags based on environment variables matching: true, on, 1, .true. , .t. , yes 299 printPerThreadFlag = __kmp_str_match_true(threadStats); 300 printPerThreadEventsFlag = __kmp_str_match_true(threadEvents); 301 302 if(printPerThreadEventsFlag) { 303 // assigns a color to each timer for printing 304 setupEventColors(); 305 } else { 306 // will clear flag so that no event will be logged 307 timeStat::clearEventFlags(); 308 } 309 310 return; 311 } 312 313 void kmp_stats_output_module::setupEventColors() { 314 int i; 315 int globalColorIndex = 0; 316 int numGlobalColors = sizeof(globalColorArray) / sizeof(rgb_color); 317 for(i=0;i<TIMER_LAST;i++) { 318 if(timeStat::logEvent((timer_e)i)) { 319 timerColorInfo[i] = globalColorArray[globalColorIndex]; 320 globalColorIndex = (globalColorIndex+1)%numGlobalColors; 321 } 322 } 323 return; 324 } 325 326 void kmp_stats_output_module::printStats(FILE *statsOut, statistic const * theStats, bool areTimers) 327 { 328 if (areTimers) 329 { 330 // Check if we have useful timers, since we don't print zero value timers we need to avoid 331 // printing a header and then no data. 332 bool haveTimers = false; 333 for (int s = 0; s<TIMER_LAST; s++) 334 { 335 if (theStats[s].getCount() != 0) 336 { 337 haveTimers = true; 338 break; 339 } 340 } 341 if (!haveTimers) 342 return; 343 } 344 345 // Print 346 const char * title = areTimers ? "Timer, SampleCount," : "Counter, ThreadCount,"; 347 fprintf (statsOut, "%s Min, Mean, Max, Total, SD\n", title); 348 if (areTimers) { 349 for (int s = 0; s<TIMER_LAST; s++) { 350 statistic const * stat = &theStats[s]; 351 if (stat->getCount() != 0) { 352 char tag = timeStat::noUnits(timer_e(s)) ? ' ' : 'T'; 353 fprintf (statsOut, "%-25s, %s\n", timeStat::name(timer_e(s)), stat->format(tag, true).c_str()); 354 } 355 } 356 } else { // Counters 357 for (int s = 0; s<COUNTER_LAST; s++) { 358 statistic const * stat = &theStats[s]; 359 fprintf (statsOut, "%-25s, %s\n", counter::name(counter_e(s)), stat->format(' ', true).c_str()); 360 } 361 } 362 } 363 364 void kmp_stats_output_module::printCounters(FILE * statsOut, counter const * theCounters) 365 { 366 // We print all the counters even if they are zero. 367 // That makes it easier to slice them into a spreadsheet if you need to. 368 fprintf (statsOut, "\nCounter, Count\n"); 369 for (int c = 0; c<COUNTER_LAST; c++) { 370 counter const * stat = &theCounters[c]; 371 fprintf (statsOut, "%-25s, %s\n", counter::name(counter_e(c)), formatSI(stat->getValue(), 9, ' ').c_str()); 372 } 373 } 374 375 void kmp_stats_output_module::printEvents(FILE* eventsOut, kmp_stats_event_vector* theEvents, int gtid) { 376 // sort by start time before printing 377 theEvents->sort(); 378 for (int i = 0; i < theEvents->size(); i++) { 379 kmp_stats_event ev = theEvents->at(i); 380 rgb_color color = getEventColor(ev.getTimerName()); 381 fprintf(eventsOut, "%d %lu %lu %1.1f rgb(%1.1f,%1.1f,%1.1f) %s\n", 382 gtid, 383 ev.getStart(), 384 ev.getStop(), 385 1.2 - (ev.getNestLevel() * 0.2), 386 color.r, color.g, color.b, 387 timeStat::name(ev.getTimerName()) 388 ); 389 } 390 return; 391 } 392 393 void kmp_stats_output_module::windupExplicitTimers() 394 { 395 // Wind up any explicit timers. We assume that it's fair at this point to just walk all the explcit timers in all threads 396 // and say "it's over". 397 // If the timer wasn't running, this won't record anything anyway. 398 kmp_stats_list::iterator it; 399 for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) { 400 for (int timer=0; timer<EXPLICIT_TIMER_LAST; timer++) { 401 (*it)->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer); 402 } 403 } 404 } 405 406 void kmp_stats_output_module::printPloticusFile() { 407 int i; 408 int size = __kmp_stats_list.size(); 409 FILE* plotOut = fopen(plotFileName, "w+"); 410 411 fprintf(plotOut, "#proc page\n" 412 " pagesize: 15 10\n" 413 " scale: 1.0\n\n"); 414 415 fprintf(plotOut, "#proc getdata\n" 416 " file: %s\n\n", 417 eventsFileName); 418 419 fprintf(plotOut, "#proc areadef\n" 420 " title: OpenMP Sampling Timeline\n" 421 " titledetails: align=center size=16\n" 422 " rectangle: 1 1 13 9\n" 423 " xautorange: datafield=2,3\n" 424 " yautorange: -1 %d\n\n", 425 size); 426 427 fprintf(plotOut, "#proc xaxis\n" 428 " stubs: inc\n" 429 " stubdetails: size=12\n" 430 " label: Time (ticks)\n" 431 " labeldetails: size=14\n\n"); 432 433 fprintf(plotOut, "#proc yaxis\n" 434 " stubs: inc 1\n" 435 " stubrange: 0 %d\n" 436 " stubdetails: size=12\n" 437 " label: Thread #\n" 438 " labeldetails: size=14\n\n", 439 size-1); 440 441 fprintf(plotOut, "#proc bars\n" 442 " exactcolorfield: 5\n" 443 " axis: x\n" 444 " locfield: 1\n" 445 " segmentfields: 2 3\n" 446 " barwidthfield: 4\n\n"); 447 448 // create legend entries corresponding to the timer color 449 for(i=0;i<TIMER_LAST;i++) { 450 if(timeStat::logEvent((timer_e)i)) { 451 rgb_color c = getEventColor((timer_e)i); 452 fprintf(plotOut, "#proc legendentry\n" 453 " sampletype: color\n" 454 " label: %s\n" 455 " details: rgb(%1.1f,%1.1f,%1.1f)\n\n", 456 timeStat::name((timer_e)i), 457 c.r, c.g, c.b); 458 459 } 460 } 461 462 fprintf(plotOut, "#proc legend\n" 463 " format: down\n" 464 " location: max max\n\n"); 465 fclose(plotOut); 466 return; 467 } 468 469 void kmp_stats_output_module::outputStats(const char* heading) 470 { 471 statistic allStats[TIMER_LAST]; 472 statistic allCounters[COUNTER_LAST]; 473 474 // stop all the explicit timers for all threads 475 windupExplicitTimers(); 476 477 FILE * eventsOut; 478 FILE * statsOut = outputFileName ? fopen (outputFileName, "a+") : stderr; 479 480 if (eventPrintingEnabled()) { 481 eventsOut = fopen(eventsFileName, "w+"); 482 } 483 484 if (!statsOut) 485 statsOut = stderr; 486 487 fprintf(statsOut, "%s\n",heading); 488 // Accumulate across threads. 489 kmp_stats_list::iterator it; 490 for (it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) { 491 int t = (*it)->getGtid(); 492 // Output per thread stats if requested. 493 if (perThreadPrintingEnabled()) { 494 fprintf (statsOut, "Thread %d\n", t); 495 printStats(statsOut, (*it)->getTimers(), true); 496 printCounters(statsOut, (*it)->getCounters()); 497 fprintf(statsOut,"\n"); 498 } 499 // Output per thread events if requested. 500 if (eventPrintingEnabled()) { 501 kmp_stats_event_vector events = (*it)->getEventVector(); 502 printEvents(eventsOut, &events, t); 503 } 504 505 for (int s = 0; s<TIMER_LAST; s++) { 506 // See if we should ignore this timer when aggregating 507 if ((timeStat::masterOnly(timer_e(s)) && (t != 0)) || // Timer is only valid on the master and this thread is a worker 508 (timeStat::workerOnly(timer_e(s)) && (t == 0)) || // Timer is only valid on a worker and this thread is the master 509 timeStat::synthesized(timer_e(s)) // It's a synthesized stat, so there's no raw data for it. 510 ) 511 { 512 continue; 513 } 514 515 statistic * threadStat = (*it)->getTimer(timer_e(s)); 516 allStats[s] += *threadStat; 517 } 518 519 // Special handling for synthesized statistics. 520 // These just have to be coded specially here for now. 521 // At present we only have a few: 522 // The total parallel work done in each thread. 523 // The variance here makes it easy to see load imbalance over the whole program (though, of course, 524 // it's possible to have a code with awful load balance in every parallel region but perfect load 525 // balance oever the whole program.) 526 // The time spent in barriers in each thread. 527 allStats[TIMER_Total_work].addSample ((*it)->getTimer(TIMER_OMP_work)->getTotal()); 528 529 // Time in explicit barriers. 530 allStats[TIMER_Total_barrier].addSample ((*it)->getTimer(TIMER_OMP_barrier)->getTotal()); 531 532 for (int c = 0; c<COUNTER_LAST; c++) { 533 if (counter::masterOnly(counter_e(c)) && t != 0) 534 continue; 535 allCounters[c].addSample ((*it)->getCounter(counter_e(c))->getValue()); 536 } 537 } 538 539 if (eventPrintingEnabled()) { 540 printPloticusFile(); 541 fclose(eventsOut); 542 } 543 544 fprintf (statsOut, "Aggregate for all threads\n"); 545 printStats (statsOut, &allStats[0], true); 546 fprintf (statsOut, "\n"); 547 printStats (statsOut, &allCounters[0], false); 548 549 if (statsOut != stderr) 550 fclose(statsOut); 551 552 } 553 554 /* ************************************************** */ 555 /* ************* exported C functions ************** */ 556 557 // no name mangling for these functions, we want the c files to be able to get at these functions 558 extern "C" { 559 560 void __kmp_reset_stats() 561 { 562 kmp_stats_list::iterator it; 563 for(it = __kmp_stats_list.begin(); it != __kmp_stats_list.end(); it++) { 564 timeStat * timers = (*it)->getTimers(); 565 counter * counters = (*it)->getCounters(); 566 explicitTimer * eTimers = (*it)->getExplicitTimers(); 567 568 for (int t = 0; t<TIMER_LAST; t++) 569 timers[t].reset(); 570 571 for (int c = 0; c<COUNTER_LAST; c++) 572 counters[c].reset(); 573 574 for (int t=0; t<EXPLICIT_TIMER_LAST; t++) 575 eTimers[t].reset(); 576 577 // reset the event vector so all previous events are "erased" 578 (*it)->resetEventVector(); 579 580 // May need to restart the explicit timers in thread zero? 581 } 582 KMP_START_EXPLICIT_TIMER(OMP_serial); 583 KMP_START_EXPLICIT_TIMER(OMP_start_end); 584 } 585 586 // This function will reset all stats and stop all threads' explicit timers if they haven't been stopped already. 587 void __kmp_output_stats(const char * heading) 588 { 589 __kmp_stats_global_output.outputStats(heading); 590 __kmp_reset_stats(); 591 } 592 593 void __kmp_accumulate_stats_at_exit(void) 594 { 595 // Only do this once. 596 if (KMP_XCHG_FIXED32(&statsPrinted, 1) != 0) 597 return; 598 599 __kmp_output_stats("Statistics on exit"); 600 return; 601 } 602 603 void __kmp_stats_init(void) 604 { 605 return; 606 } 607 608 } // extern "C" 609 610