1 //===- Timing.cpp - Execution time measurement facilities -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Facilities to measure and provide statistics on execution time. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "mlir/Support/Timing.h" 14 #include "mlir/Support/ThreadLocalCache.h" 15 #include "llvm/ADT/MapVector.h" 16 #include "llvm/ADT/Statistic.h" 17 #include "llvm/ADT/StringMap.h" 18 #include "llvm/ADT/StringSet.h" 19 #include "llvm/Support/Allocator.h" 20 #include "llvm/Support/CommandLine.h" 21 #include "llvm/Support/Format.h" 22 #include "llvm/Support/FormatVariadic.h" 23 #include "llvm/Support/ManagedStatic.h" 24 #include "llvm/Support/RWMutex.h" 25 #include "llvm/Support/Threading.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 #include <atomic> 29 #include <chrono> 30 31 using namespace mlir; 32 using namespace detail; 33 using DisplayMode = DefaultTimingManager::DisplayMode; 34 35 constexpr llvm::StringLiteral kTimingDescription = 36 "... Execution time report ..."; 37 38 //===----------------------------------------------------------------------===// 39 // TimingManager 40 //===----------------------------------------------------------------------===// 41 42 namespace mlir { 43 namespace detail { 44 /// Private implementation details of the `TimingManager`. 45 class TimingManagerImpl { 46 public: 47 // Identifier allocator, map, and mutex for thread safety. 48 llvm::BumpPtrAllocator identifierAllocator; 49 llvm::StringSet<llvm::BumpPtrAllocator &> identifiers; 50 llvm::sys::SmartRWMutex<true> identifierMutex; 51 52 /// A thread local cache of identifiers to reduce lock contention. 53 ThreadLocalCache<llvm::StringMap<llvm::StringMapEntry<llvm::NoneType> *>> 54 localIdentifierCache; 55 56 TimingManagerImpl() : identifiers(identifierAllocator) {} 57 }; 58 } // namespace detail 59 } // namespace mlir 60 61 TimingManager::TimingManager() : impl(std::make_unique<TimingManagerImpl>()) {} 62 63 TimingManager::~TimingManager() = default; 64 65 /// Get the root timer of this timing manager. 66 Timer TimingManager::getRootTimer() { 67 auto rt = rootTimer(); 68 return rt ? Timer(*this, *rt) : Timer(); 69 } 70 71 /// Get the root timer of this timing manager wrapped in a `TimingScope`. 72 TimingScope TimingManager::getRootScope() { 73 return TimingScope(getRootTimer()); 74 } 75 76 //===----------------------------------------------------------------------===// 77 // Identifier uniquing 78 //===----------------------------------------------------------------------===// 79 80 /// Return an identifier for the specified string. 81 TimingIdentifier TimingIdentifier::get(StringRef str, TimingManager &tm) { 82 // Check for an existing instance in the local cache. 83 auto &impl = *tm.impl; 84 auto *&localEntry = (*impl.localIdentifierCache)[str]; 85 if (localEntry) 86 return TimingIdentifier(localEntry); 87 88 // Check for an existing identifier in read-only mode. 89 { 90 llvm::sys::SmartScopedReader<true> contextLock(impl.identifierMutex); 91 auto it = impl.identifiers.find(str); 92 if (it != impl.identifiers.end()) { 93 localEntry = &*it; 94 return TimingIdentifier(localEntry); 95 } 96 } 97 98 // Acquire a writer-lock so that we can safely create the new instance. 99 llvm::sys::SmartScopedWriter<true> contextLock(impl.identifierMutex); 100 auto it = impl.identifiers.insert(str).first; 101 localEntry = &*it; 102 return TimingIdentifier(localEntry); 103 } 104 105 //===----------------------------------------------------------------------===// 106 // Helpers for time record printing 107 //===----------------------------------------------------------------------===// 108 109 namespace { 110 111 /// Simple record class to record timing information. 112 struct TimeRecord { 113 TimeRecord(double wall = 0.0, double user = 0.0) : wall(wall), user(user) {} 114 115 TimeRecord &operator+=(const TimeRecord &other) { 116 wall += other.wall; 117 user += other.user; 118 return *this; 119 } 120 121 TimeRecord &operator-=(const TimeRecord &other) { 122 wall -= other.wall; 123 user -= other.user; 124 return *this; 125 } 126 127 /// Print the current time record to 'os', with a breakdown showing 128 /// contributions to the give 'total' time record. 129 void print(raw_ostream &os, const TimeRecord &total) { 130 if (total.user != total.wall) 131 os << llvm::format(" %8.4f (%5.1f%%)", user, 100.0 * user / total.user); 132 os << llvm::format(" %8.4f (%5.1f%%) ", wall, 100.0 * wall / total.wall); 133 } 134 135 double wall, user; 136 }; 137 138 } // namespace 139 140 /// Utility to print a single line entry in the timer output. 141 static void printTimeEntry(raw_ostream &os, unsigned indent, StringRef name, 142 TimeRecord time, TimeRecord total) { 143 time.print(os, total); 144 os.indent(indent) << name << "\n"; 145 } 146 147 /// Utility to print the timer heading information. 148 static void printTimeHeader(raw_ostream &os, TimeRecord total) { 149 // Figure out how many spaces to description name. 150 unsigned padding = (80 - kTimingDescription.size()) / 2; 151 os << "===" << std::string(73, '-') << "===\n"; 152 os.indent(padding) << kTimingDescription << '\n'; 153 os << "===" << std::string(73, '-') << "===\n"; 154 155 // Print the total time followed by the section headers. 156 os << llvm::format(" Total Execution Time: %.4f seconds\n\n", total.wall); 157 if (total.user != total.wall) 158 os << " ----User Time----"; 159 os << " ----Wall Time---- ----Name----\n"; 160 } 161 162 //===----------------------------------------------------------------------===// 163 // Timer Implementation for DefaultTimingManager 164 //===----------------------------------------------------------------------===// 165 166 namespace { 167 168 /// A timer used to sample execution time. 169 /// 170 /// Separately tracks wall time and user time to account for parallel threads of 171 /// execution. Timers are intended to be started and stopped multiple times. 172 /// Each start and stop will add to the timer's wall and user time. 173 class TimerImpl { 174 public: 175 using ChildrenMap = llvm::MapVector<const void *, std::unique_ptr<TimerImpl>>; 176 using AsyncChildrenMap = llvm::DenseMap<uint64_t, ChildrenMap>; 177 178 TimerImpl(std::string &&name) : threadId(llvm::get_threadid()), name(name) {} 179 180 /// Start the timer. 181 void start() { startTime = std::chrono::steady_clock::now(); } 182 183 /// Stop the timer. 184 void stop() { 185 auto newTime = std::chrono::steady_clock::now() - startTime; 186 wallTime += newTime; 187 userTime += newTime; 188 } 189 190 /// Create a child timer nested within this one. Multiple calls to this 191 /// function with the same unique identifier `id` will return the same child 192 /// timer. 193 /// 194 /// This function can be called from other threads, as long as this timer 195 /// outlives any uses of the child timer on the other thread. 196 TimerImpl *nest(const void *id, function_ref<std::string()> nameBuilder) { 197 auto tid = llvm::get_threadid(); 198 if (tid == threadId) 199 return nestTail(children[id], nameBuilder); 200 std::unique_lock<std::mutex> lock(asyncMutex); 201 return nestTail(asyncChildren[tid][id], nameBuilder); 202 } 203 204 /// Tail-called from `nest()`. 205 TimerImpl *nestTail(std::unique_ptr<TimerImpl> &child, 206 function_ref<std::string()> nameBuilder) { 207 if (!child) 208 child = std::make_unique<TimerImpl>(nameBuilder()); 209 return child.get(); 210 } 211 212 /// Finalize this timer and all its children. 213 /// 214 /// If this timer has async children, which happens if `nest()` was called 215 /// from another thread, this function merges the async childr timers into the 216 /// main list of child timers. 217 /// 218 /// Caution: Call this function only after all nested timers running on other 219 /// threads no longer need their timers! 220 void finalize() { 221 addAsyncUserTime(); 222 mergeAsyncChildren(); 223 } 224 225 /// Add the user time of all async children to this timer's user time. This is 226 /// necessary since the user time already contains all regular child timers, 227 /// but not the asynchronous ones (by the nesting nature of the timers). 228 std::chrono::nanoseconds addAsyncUserTime() { 229 auto added = std::chrono::nanoseconds(0); 230 for (auto &child : children) 231 added += child.second->addAsyncUserTime(); 232 for (auto &thread : asyncChildren) { 233 for (auto &child : thread.second) { 234 child.second->addAsyncUserTime(); 235 added += child.second->userTime; 236 } 237 } 238 userTime += added; 239 return added; 240 } 241 242 /// Ensure that this timer and recursively all its children have their async 243 /// children folded into the main map of children. 244 void mergeAsyncChildren() { 245 for (auto &child : children) 246 child.second->mergeAsyncChildren(); 247 mergeChildren(std::move(asyncChildren)); 248 assert(asyncChildren.empty()); 249 } 250 251 /// Merge multiple child timers into this timer. 252 /// 253 /// Children in `other` are added as children to this timer, or, if this timer 254 /// already contains a child with the corresponding unique identifier, are 255 /// merged into the existing child. 256 void mergeChildren(ChildrenMap &&other) { 257 if (children.empty()) { 258 children = std::move(other); 259 for (auto &child : children) 260 child.second->mergeAsyncChildren(); 261 } else { 262 for (auto &child : other) 263 mergeChild(child.first, std::move(child.second)); 264 other.clear(); 265 } 266 } 267 268 /// See above. 269 void mergeChildren(AsyncChildrenMap &&other) { 270 for (auto &thread : other) { 271 mergeChildren(std::move(thread.second)); 272 assert(thread.second.empty()); 273 } 274 other.clear(); 275 } 276 277 /// Merge a child timer into this timer for a given unique identifier. 278 /// 279 /// Moves all child and async child timers of `other` into this timer's child 280 /// for the given unique identifier. 281 void mergeChild(const void *id, std::unique_ptr<TimerImpl> &&other) { 282 auto &into = children[id]; 283 if (!into) { 284 into = std::move(other); 285 into->mergeAsyncChildren(); 286 } else { 287 into->wallTime = std::max(into->wallTime, other->wallTime); 288 into->userTime += other->userTime; 289 into->mergeChildren(std::move(other->children)); 290 into->mergeChildren(std::move(other->asyncChildren)); 291 other.reset(); 292 } 293 } 294 295 /// Dump a human-readable tree representation of the timer and its children. 296 /// This is useful for debugging the timing mechanisms and structure of the 297 /// timers. 298 void dump(raw_ostream &os, unsigned indent = 0, unsigned markThreadId = 0) { 299 auto time = getTimeRecord(); 300 os << std::string(indent * 2, ' ') << name << " [" << threadId << "]" 301 << llvm::format(" %7.4f / %7.4f", time.user, time.wall); 302 if (threadId != markThreadId && markThreadId != 0) 303 os << " (*)"; 304 os << "\n"; 305 for (auto &child : children) 306 child.second->dump(os, indent + 1, threadId); 307 for (auto &thread : asyncChildren) 308 for (auto &child : thread.second) 309 child.second->dump(os, indent + 1, threadId); 310 } 311 312 /// Returns the time for this timer in seconds. 313 TimeRecord getTimeRecord() { 314 return TimeRecord( 315 std::chrono::duration_cast<std::chrono::duration<double>>(wallTime) 316 .count(), 317 std::chrono::duration_cast<std::chrono::duration<double>>(userTime) 318 .count()); 319 } 320 321 /// Print the timing result in list mode. 322 void printAsList(raw_ostream &os, TimeRecord total) { 323 // Flatten the leaf timers in the tree and merge them by name. 324 llvm::StringMap<TimeRecord> mergedTimers; 325 std::function<void(TimerImpl *)> addTimer = [&](TimerImpl *timer) { 326 mergedTimers[timer->name] += timer->getTimeRecord(); 327 for (auto &children : timer->children) 328 addTimer(children.second.get()); 329 }; 330 addTimer(this); 331 332 // Sort the timing information by wall time. 333 std::vector<std::pair<StringRef, TimeRecord>> timerNameAndTime; 334 for (auto &it : mergedTimers) 335 timerNameAndTime.emplace_back(it.first(), it.second); 336 llvm::array_pod_sort(timerNameAndTime.begin(), timerNameAndTime.end(), 337 [](const std::pair<StringRef, TimeRecord> *lhs, 338 const std::pair<StringRef, TimeRecord> *rhs) { 339 return llvm::array_pod_sort_comparator<double>( 340 &rhs->second.wall, &lhs->second.wall); 341 }); 342 343 // Print the timing information sequentially. 344 for (auto &timeData : timerNameAndTime) 345 printTimeEntry(os, 0, timeData.first, timeData.second, total); 346 } 347 348 /// Print the timing result in tree mode. 349 void printAsTree(raw_ostream &os, TimeRecord total, unsigned indent = 0) { 350 unsigned childIndent = indent; 351 if (!hidden) { 352 printTimeEntry(os, indent, name, getTimeRecord(), total); 353 childIndent += 2; 354 } 355 for (auto &child : children) { 356 child.second->printAsTree(os, total, childIndent); 357 } 358 } 359 360 /// Print the current timing information. 361 void print(raw_ostream &os, DisplayMode displayMode) { 362 // Print the banner. 363 auto total = getTimeRecord(); 364 printTimeHeader(os, total); 365 366 // Defer to a specialized printer for each display mode. 367 switch (displayMode) { 368 case DisplayMode::List: 369 printAsList(os, total); 370 break; 371 case DisplayMode::Tree: 372 printAsTree(os, total); 373 break; 374 } 375 376 // Print the top-level time not accounted for by child timers, and the 377 // total. 378 auto rest = total; 379 for (auto &child : children) 380 rest -= child.second->getTimeRecord(); 381 printTimeEntry(os, 0, "Rest", rest, total); 382 printTimeEntry(os, 0, "Total", total, total); 383 os.flush(); 384 } 385 386 /// The last time instant at which the timer was started. 387 std::chrono::time_point<std::chrono::steady_clock> startTime; 388 389 /// Accumulated wall time. If multiple threads of execution are merged into 390 /// this timer, the wall time will hold the maximum wall time of each thread 391 /// of execution. 392 std::chrono::nanoseconds wallTime = std::chrono::nanoseconds(0); 393 394 /// Accumulated user time. If multiple threads of execution are merged into 395 /// this timer, each thread's user time is added here. 396 std::chrono::nanoseconds userTime = std::chrono::nanoseconds(0); 397 398 /// The thread on which this timer is running. 399 uint64_t threadId; 400 401 /// A descriptive name for this timer. 402 std::string name; 403 404 /// Whether to omit this timer from reports and directly show its children. 405 bool hidden = false; 406 407 /// Child timers on the same thread the timer itself. We keep at most one 408 /// timer per unique identifier. 409 ChildrenMap children; 410 411 /// Child timers on other threads. We keep at most one timer per unique 412 /// identifier. 413 AsyncChildrenMap asyncChildren; 414 415 /// Mutex for the async children. 416 std::mutex asyncMutex; 417 }; 418 419 } // namespace 420 421 //===----------------------------------------------------------------------===// 422 // DefaultTimingManager 423 //===----------------------------------------------------------------------===// 424 425 namespace mlir { 426 namespace detail { 427 428 /// Implementation details of the `DefaultTimingManager`. 429 class DefaultTimingManagerImpl { 430 public: 431 /// Whether we should do our work or not. 432 bool enabled = false; 433 434 /// The configured display mode. 435 DisplayMode displayMode = DisplayMode::Tree; 436 437 /// The stream where we should print our output. This will always be non-null. 438 raw_ostream *output = &llvm::errs(); 439 440 /// The root timer. 441 std::unique_ptr<TimerImpl> rootTimer; 442 }; 443 444 } // namespace detail 445 } // namespace mlir 446 447 DefaultTimingManager::DefaultTimingManager() 448 : impl(std::make_unique<DefaultTimingManagerImpl>()) { 449 clear(); // initializes the root timer 450 } 451 452 DefaultTimingManager::~DefaultTimingManager() { print(); } 453 454 /// Enable or disable execution time sampling. 455 void DefaultTimingManager::setEnabled(bool enabled) { impl->enabled = enabled; } 456 457 /// Return whether execution time sampling is enabled. 458 bool DefaultTimingManager::isEnabled() const { return impl->enabled; } 459 460 /// Change the display mode. 461 void DefaultTimingManager::setDisplayMode(DisplayMode displayMode) { 462 impl->displayMode = displayMode; 463 } 464 465 /// Return the current display mode; 466 DefaultTimingManager::DisplayMode DefaultTimingManager::getDisplayMode() const { 467 return impl->displayMode; 468 } 469 470 /// Change the stream where the output will be printed to. 471 void DefaultTimingManager::setOutput(raw_ostream &os) { impl->output = &os; } 472 473 /// Return the current output stream where the output will be printed to. 474 raw_ostream &DefaultTimingManager::getOutput() const { 475 assert(impl->output); 476 return *impl->output; 477 } 478 479 /// Print and clear the timing results. 480 void DefaultTimingManager::print() { 481 if (impl->enabled) { 482 impl->rootTimer->finalize(); 483 impl->rootTimer->print(*impl->output, impl->displayMode); 484 } 485 clear(); 486 } 487 488 /// Clear the timing results. 489 void DefaultTimingManager::clear() { 490 impl->rootTimer = std::make_unique<TimerImpl>("root"); 491 impl->rootTimer->hidden = true; 492 } 493 494 /// Debug print the timer data structures to an output stream. 495 void DefaultTimingManager::dumpTimers(raw_ostream &os) { 496 impl->rootTimer->dump(os); 497 } 498 499 /// Debug print the timers as a list. 500 void DefaultTimingManager::dumpAsList(raw_ostream &os) { 501 impl->rootTimer->finalize(); 502 impl->rootTimer->print(os, DisplayMode::List); 503 } 504 505 /// Debug print the timers as a tree. 506 void DefaultTimingManager::dumpAsTree(raw_ostream &os) { 507 impl->rootTimer->finalize(); 508 impl->rootTimer->print(os, DisplayMode::Tree); 509 } 510 511 Optional<void *> DefaultTimingManager::rootTimer() { 512 if (impl->enabled) 513 return impl->rootTimer.get(); 514 return llvm::None; 515 } 516 517 void DefaultTimingManager::startTimer(void *handle) { 518 static_cast<TimerImpl *>(handle)->start(); 519 } 520 521 void DefaultTimingManager::stopTimer(void *handle) { 522 static_cast<TimerImpl *>(handle)->stop(); 523 } 524 525 void *DefaultTimingManager::nestTimer(void *handle, const void *id, 526 function_ref<std::string()> nameBuilder) { 527 return static_cast<TimerImpl *>(handle)->nest(id, nameBuilder); 528 } 529 530 void DefaultTimingManager::hideTimer(void *handle) { 531 static_cast<TimerImpl *>(handle)->hidden = true; 532 } 533 534 //===----------------------------------------------------------------------===// 535 // DefaultTimingManager Command Line Options 536 //===----------------------------------------------------------------------===// 537 538 namespace { 539 struct DefaultTimingManagerOptions { 540 llvm::cl::opt<bool> timing{"mlir-timing", 541 llvm::cl::desc("Display execution times"), 542 llvm::cl::init(false)}; 543 llvm::cl::opt<DisplayMode> displayMode{ 544 "mlir-timing-display", llvm::cl::desc("Display method for timing data"), 545 llvm::cl::init(DisplayMode::Tree), 546 llvm::cl::values( 547 clEnumValN(DisplayMode::List, "list", 548 "display the results in a list sorted by total time"), 549 clEnumValN(DisplayMode::Tree, "tree", 550 "display the results ina with a nested tree view"))}; 551 }; 552 } // namespace 553 554 static llvm::ManagedStatic<DefaultTimingManagerOptions> options; 555 556 void mlir::registerDefaultTimingManagerCLOptions() { 557 // Make sure that the options struct has been constructed. 558 *options; 559 } 560 561 void mlir::applyDefaultTimingManagerCLOptions(DefaultTimingManager &tm) { 562 if (!options.isConstructed()) 563 return; 564 tm.setEnabled(options->timing); 565 tm.setDisplayMode(options->displayMode); 566 } 567