1 //===- PassTiming.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "PassDetail.h" 10 #include "mlir/Pass/PassManager.h" 11 #include "llvm/ADT/MapVector.h" 12 #include "llvm/ADT/STLExtras.h" 13 #include "llvm/ADT/SmallVector.h" 14 #include "llvm/ADT/Statistic.h" 15 #include "llvm/Support/Format.h" 16 #include "llvm/Support/FormatVariadic.h" 17 #include "llvm/Support/Threading.h" 18 #include <chrono> 19 20 using namespace mlir; 21 using namespace mlir::detail; 22 23 constexpr StringLiteral kPassTimingDescription = 24 "... Pass execution timing report ..."; 25 26 namespace { 27 /// Simple record class to record timing information. 28 struct TimeRecord { 29 TimeRecord(double wall = 0.0, double user = 0.0) : wall(wall), user(user) {} 30 31 TimeRecord &operator+=(const TimeRecord &other) { 32 wall += other.wall; 33 user += other.user; 34 return *this; 35 } 36 37 /// Print the current time record to 'os', with a breakdown showing 38 /// contributions to the give 'total' time record. 39 void print(raw_ostream &os, const TimeRecord &total) { 40 if (total.user != total.wall) 41 os << llvm::format(" %7.4f (%5.1f%%) ", user, 42 100.0 * user / total.user); 43 os << llvm::format(" %7.4f (%5.1f%%) ", wall, 100.0 * wall / total.wall); 44 } 45 46 double wall, user; 47 }; 48 49 /// An enumeration of the different types of timers. 50 enum class TimerKind { 51 /// This timer represents an ordered collection of pass timers, corresponding 52 /// to a pass pipeline. 53 Pipeline, 54 55 /// This timer represents a collection of pipeline timers. 56 PipelineCollection, 57 58 /// This timer represents an analysis or pass timer. 59 PassOrAnalysis 60 }; 61 62 struct Timer { 63 explicit Timer(std::string &&name, TimerKind kind) 64 : name(std::move(name)), kind(kind) {} 65 66 /// Start the timer. 67 void start() { startTime = std::chrono::system_clock::now(); } 68 69 /// Stop the timer. 70 void stop() { 71 auto newTime = std::chrono::system_clock::now() - startTime; 72 wallTime += newTime; 73 userTime += newTime; 74 } 75 76 /// Get or create a child timer with the provided name and id. 77 Timer *getChildTimer(const void *id, TimerKind kind, 78 std::function<std::string()> &&nameBuilder) { 79 auto &child = children[id]; 80 if (!child) 81 child = std::make_unique<Timer>(nameBuilder(), kind); 82 return child.get(); 83 } 84 85 /// Returns the total time for this timer in seconds. 86 TimeRecord getTotalTime() { 87 // If this is a pass or analysis timer, use the recorded time directly. 88 if (kind == TimerKind::PassOrAnalysis) { 89 return TimeRecord( 90 std::chrono::duration_cast<std::chrono::duration<double>>(wallTime) 91 .count(), 92 std::chrono::duration_cast<std::chrono::duration<double>>(userTime) 93 .count()); 94 } 95 96 // Otherwise, accumulate the timing from each of the children. 97 TimeRecord totalTime; 98 for (auto &child : children) 99 totalTime += child.second->getTotalTime(); 100 return totalTime; 101 } 102 103 /// A map of unique identifiers to child timers. 104 using ChildrenMap = llvm::MapVector<const void *, std::unique_ptr<Timer>>; 105 106 /// Merge the timing data from 'other' into this timer. 107 void merge(Timer &&other) { 108 if (wallTime < other.wallTime) 109 wallTime = other.wallTime; 110 userTime += other.userTime; 111 mergeChildren(std::move(other.children)); 112 } 113 114 /// Merge the timer children in 'otherChildren' with the children of this 115 /// timer. 116 void mergeChildren(ChildrenMap &&otherChildren) { 117 // Check for an empty children list. 118 if (children.empty()) { 119 children = std::move(otherChildren); 120 return; 121 } 122 123 // Pipeline merges are handled separately as the children are merged 124 // lexicographically. 125 if (kind == TimerKind::Pipeline) { 126 assert(children.size() == otherChildren.size() && 127 "pipeline merge requires the same number of children"); 128 for (auto it : llvm::zip(children, otherChildren)) 129 std::get<0>(it).second->merge(std::move(*std::get<1>(it).second)); 130 return; 131 } 132 133 // Otherwise, we merge children based upon their timer key. 134 for (auto &otherChild : otherChildren) 135 mergeChild(std::move(otherChild)); 136 } 137 138 /// Merge in the given child timer and id into this timer. 139 void mergeChild(ChildrenMap::value_type &&childIt) { 140 auto &child = children[childIt.first]; 141 if (!child) 142 child = std::move(childIt.second); 143 else 144 child->merge(std::move(*childIt.second)); 145 } 146 147 /// Raw timing information. 148 std::chrono::time_point<std::chrono::system_clock> startTime; 149 std::chrono::nanoseconds wallTime = std::chrono::nanoseconds(0); 150 std::chrono::nanoseconds userTime = std::chrono::nanoseconds(0); 151 152 /// A map of unique identifiers to child timers. 153 ChildrenMap children; 154 155 /// A descriptive name for this timer. 156 std::string name; 157 158 /// The type of timer this instance represents. 159 TimerKind kind; 160 }; 161 162 struct PassTiming : public PassInstrumentation { 163 PassTiming(std::unique_ptr<PassManager::PassTimingConfig> config) 164 : config(std::move(config)) {} 165 ~PassTiming() override { print(); } 166 167 /// Setup the instrumentation hooks. 168 void runBeforePipeline(const OperationName &name, 169 const PipelineParentInfo &parentInfo) override; 170 void runAfterPipeline(const OperationName &name, 171 const PipelineParentInfo &parentInfo) override; 172 void runBeforePass(Pass *pass, Operation *) override { startPassTimer(pass); } 173 void runAfterPass(Pass *pass, Operation *) override; 174 void runAfterPassFailed(Pass *pass, Operation *op) override { 175 runAfterPass(pass, op); 176 } 177 void runBeforeAnalysis(StringRef name, TypeID id, Operation *) override { 178 startAnalysisTimer(name, id); 179 } 180 void runAfterAnalysis(StringRef, TypeID, Operation *) override; 181 182 /// Print and clear the timing results. 183 void print(); 184 185 /// Start a new timer for the given pass. 186 void startPassTimer(Pass *pass); 187 188 /// Start a new timer for the given analysis. 189 void startAnalysisTimer(StringRef name, TypeID id); 190 191 /// Pop the last active timer for the current thread. 192 Timer *popLastActiveTimer() { 193 auto tid = llvm::get_threadid(); 194 auto &activeTimers = activeThreadTimers[tid]; 195 assert(!activeTimers.empty() && "expected active timer"); 196 return activeTimers.pop_back_val(); 197 } 198 199 /// Print the timing result in list mode. 200 void printResultsAsList(raw_ostream &os, Timer *root, TimeRecord totalTime); 201 202 /// Print the timing result in pipeline mode. 203 void printResultsAsPipeline(raw_ostream &os, Timer *root, 204 TimeRecord totalTime); 205 206 /// Returns a timer for the provided identifier and name. 207 Timer *getTimer(const void *id, TimerKind kind, 208 std::function<std::string()> &&nameBuilder) { 209 auto tid = llvm::get_threadid(); 210 211 // If there is no active timer then add to the root timer. 212 auto &activeTimers = activeThreadTimers[tid]; 213 Timer *parentTimer; 214 if (activeTimers.empty()) { 215 auto &rootTimer = rootTimers[tid]; 216 if (!rootTimer) 217 rootTimer = std::make_unique<Timer>("root", TimerKind::Pipeline); 218 parentTimer = rootTimer.get(); 219 } else { 220 // Otherwise, add this to the active timer. 221 parentTimer = activeTimers.back(); 222 } 223 224 auto timer = parentTimer->getChildTimer(id, kind, std::move(nameBuilder)); 225 activeTimers.push_back(timer); 226 return timer; 227 } 228 229 /// The root top level timers for each thread. 230 DenseMap<uint64_t, std::unique_ptr<Timer>> rootTimers; 231 232 /// A stack of the currently active pass timers per thread. 233 DenseMap<uint64_t, SmallVector<Timer *, 4>> activeThreadTimers; 234 235 /// The configuration object to use when printing the timing results. 236 std::unique_ptr<PassManager::PassTimingConfig> config; 237 238 /// A mapping of pipeline timers that need to be merged into the parent 239 /// collection. The timers are mapped to the parent info to merge into. 240 DenseMap<PipelineParentInfo, SmallVector<Timer::ChildrenMap::value_type, 4>> 241 pipelinesToMerge; 242 }; 243 } // end anonymous namespace 244 245 void PassTiming::runBeforePipeline(const OperationName &name, 246 const PipelineParentInfo &parentInfo) { 247 // We don't actually want to time the pipelines, they gather their total 248 // from their held passes. 249 getTimer(name.getAsOpaquePointer(), TimerKind::Pipeline, 250 [&] { return ("'" + name.getStringRef() + "' Pipeline").str(); }); 251 } 252 253 void PassTiming::runAfterPipeline(const OperationName &name, 254 const PipelineParentInfo &parentInfo) { 255 // Pop the timer for the pipeline. 256 auto tid = llvm::get_threadid(); 257 auto &activeTimers = activeThreadTimers[tid]; 258 assert(!activeTimers.empty() && "expected active timer"); 259 activeTimers.pop_back(); 260 261 // If the current thread is the same as the parent, there is nothing left to 262 // do. 263 if (tid == parentInfo.parentThreadID) 264 return; 265 266 // Otherwise, mark the pipeline timer for merging into the correct parent 267 // thread. 268 assert(activeTimers.empty() && "expected parent timer to be root"); 269 auto *parentTimer = rootTimers[tid].get(); 270 assert(parentTimer->children.size() == 1 && 271 parentTimer->children.count(name.getAsOpaquePointer()) && 272 "expected a single pipeline timer"); 273 pipelinesToMerge[parentInfo].push_back( 274 std::move(*parentTimer->children.begin())); 275 rootTimers.erase(tid); 276 } 277 278 /// Start a new timer for the given pass. 279 void PassTiming::startPassTimer(Pass *pass) { 280 auto kind = isa<OpToOpPassAdaptor>(pass) ? TimerKind::PipelineCollection 281 : TimerKind::PassOrAnalysis; 282 Timer *timer = getTimer(pass, kind, [pass]() -> std::string { 283 if (auto *adaptor = dyn_cast<OpToOpPassAdaptor>(pass)) 284 return adaptor->getAdaptorName(); 285 return std::string(pass->getName()); 286 }); 287 288 // We don't actually want to time the adaptor passes, they gather their total 289 // from their held passes. 290 if (!isa<OpToOpPassAdaptor>(pass)) 291 timer->start(); 292 } 293 294 /// Start a new timer for the given analysis. 295 void PassTiming::startAnalysisTimer(StringRef name, TypeID id) { 296 Timer *timer = getTimer(id.getAsOpaquePointer(), TimerKind::PassOrAnalysis, 297 [name] { return "(A) " + name.str(); }); 298 timer->start(); 299 } 300 301 /// Stop a pass timer. 302 void PassTiming::runAfterPass(Pass *pass, Operation *) { 303 Timer *timer = popLastActiveTimer(); 304 305 // If this is a pass adaptor, then we need to merge in the timing data for the 306 // pipelines running on other threads. 307 if (isa<OpToOpPassAdaptor>(pass)) { 308 auto toMerge = pipelinesToMerge.find({llvm::get_threadid(), pass}); 309 if (toMerge != pipelinesToMerge.end()) { 310 for (auto &it : toMerge->second) 311 timer->mergeChild(std::move(it)); 312 pipelinesToMerge.erase(toMerge); 313 } 314 return; 315 } 316 317 timer->stop(); 318 } 319 320 /// Stop a timer. 321 void PassTiming::runAfterAnalysis(StringRef, TypeID, Operation *) { 322 popLastActiveTimer()->stop(); 323 } 324 325 /// Utility to print the timer heading information. 326 static void printTimerHeader(raw_ostream &os, TimeRecord total) { 327 os << "===" << std::string(73, '-') << "===\n"; 328 // Figure out how many spaces to description name. 329 unsigned padding = (80 - kPassTimingDescription.size()) / 2; 330 os.indent(padding) << kPassTimingDescription << '\n'; 331 os << "===" << std::string(73, '-') << "===\n"; 332 333 // Print the total time followed by the section headers. 334 os << llvm::format(" Total Execution Time: %5.4f seconds\n\n", total.wall); 335 if (total.user != total.wall) 336 os << " ---User Time---"; 337 os << " ---Wall Time--- --- Name ---\n"; 338 } 339 340 /// Utility to print a single line entry in the timer output. 341 static void printTimeEntry(raw_ostream &os, unsigned indent, StringRef name, 342 TimeRecord time, TimeRecord totalTime) { 343 time.print(os, totalTime); 344 os.indent(indent) << name << "\n"; 345 } 346 347 /// Print out the current timing information. 348 void PassTiming::print() { 349 // Don't print anything if there is no timing data. 350 if (rootTimers.empty()) 351 return; 352 353 assert(rootTimers.size() == 1 && "expected one remaining root timer"); 354 355 auto printCallback = [&](raw_ostream &os) { 356 auto &rootTimer = rootTimers.begin()->second; 357 // Print the timer header. 358 TimeRecord totalTime = rootTimer->getTotalTime(); 359 printTimerHeader(os, totalTime); 360 // Defer to a specialized printer for each display mode. 361 switch (config->getDisplayMode()) { 362 case PassDisplayMode::List: 363 printResultsAsList(os, rootTimer.get(), totalTime); 364 break; 365 case PassDisplayMode::Pipeline: 366 printResultsAsPipeline(os, rootTimer.get(), totalTime); 367 break; 368 } 369 printTimeEntry(os, 0, "Total", totalTime, totalTime); 370 os.flush(); 371 372 // Reset root timers. 373 rootTimers.clear(); 374 activeThreadTimers.clear(); 375 }; 376 377 config->printTiming(printCallback); 378 } 379 380 // The default implementation for printTiming uses 381 // `llvm::CreateInfoOutputFile()` as stream, it can be overridden by clients 382 // to customize the output. 383 void PassManager::PassTimingConfig::printTiming(PrintCallbackFn printCallback) { 384 printCallback(*llvm::CreateInfoOutputFile()); 385 } 386 387 /// Print the timing result in list mode. 388 void PassTiming::printResultsAsList(raw_ostream &os, Timer *root, 389 TimeRecord totalTime) { 390 llvm::StringMap<TimeRecord> mergedTimings; 391 392 std::function<void(Timer *)> addTimer = [&](Timer *timer) { 393 // Only add timing information for passes and analyses. 394 if (timer->kind == TimerKind::PassOrAnalysis) 395 mergedTimings[timer->name] += timer->getTotalTime(); 396 for (auto &children : timer->children) 397 addTimer(children.second.get()); 398 }; 399 400 // Add each of the top level timers. 401 for (auto &topLevelTimer : root->children) 402 addTimer(topLevelTimer.second.get()); 403 404 // Sort the timing information by wall time. 405 std::vector<std::pair<StringRef, TimeRecord>> timerNameAndTime; 406 for (auto &it : mergedTimings) 407 timerNameAndTime.emplace_back(it.first(), it.second); 408 llvm::array_pod_sort(timerNameAndTime.begin(), timerNameAndTime.end(), 409 [](const std::pair<StringRef, TimeRecord> *lhs, 410 const std::pair<StringRef, TimeRecord> *rhs) { 411 return llvm::array_pod_sort_comparator<double>( 412 &rhs->second.wall, &lhs->second.wall); 413 }); 414 415 // Print the timing information sequentially. 416 for (auto &timeData : timerNameAndTime) 417 printTimeEntry(os, 0, timeData.first, timeData.second, totalTime); 418 } 419 420 /// Print the timing result in pipeline mode. 421 void PassTiming::printResultsAsPipeline(raw_ostream &os, Timer *root, 422 TimeRecord totalTime) { 423 std::function<void(unsigned, Timer *)> printTimer = [&](unsigned indent, 424 Timer *timer) { 425 // If this is a timer for a pipeline collection and the collection only has 426 // one pipeline child, then only print the child. 427 if (timer->kind == TimerKind::PipelineCollection && 428 timer->children.size() == 1) 429 return printTimer(indent, timer->children.begin()->second.get()); 430 431 printTimeEntry(os, indent, timer->name, timer->getTotalTime(), totalTime); 432 433 // If this timer is a pipeline, then print the children in-order. 434 if (timer->kind == TimerKind::Pipeline) { 435 for (auto &child : timer->children) 436 printTimer(indent + 2, child.second.get()); 437 return; 438 } 439 440 // Otherwise, sort the children by name to give a deterministic ordering 441 // when emitting the time. 442 SmallVector<Timer *, 4> children; 443 children.reserve(timer->children.size()); 444 for (auto &child : timer->children) 445 children.push_back(child.second.get()); 446 llvm::array_pod_sort(children.begin(), children.end(), 447 [](Timer *const *lhs, Timer *const *rhs) { 448 return (*lhs)->name.compare((*rhs)->name); 449 }); 450 for (auto &child : children) 451 printTimer(indent + 2, child); 452 }; 453 454 // Print each of the top level timers. 455 for (auto &topLevelTimer : root->children) 456 printTimer(0, topLevelTimer.second.get()); 457 } 458 459 // Out-of-line as key function. 460 PassManager::PassTimingConfig::~PassTimingConfig() {} 461 462 //===----------------------------------------------------------------------===// 463 // PassManager 464 //===----------------------------------------------------------------------===// 465 466 /// Add an instrumentation to time the execution of passes and the computation 467 /// of analyses. 468 void PassManager::enableTiming(std::unique_ptr<PassTimingConfig> config) { 469 // Check if pass timing is already enabled. 470 if (passTiming) 471 return; 472 if (!config) 473 config = std::make_unique<PassManager::PassTimingConfig>(); 474 addInstrumentation(std::make_unique<PassTiming>(std::move(config))); 475 passTiming = true; 476 } 477