1 //===- PassTiming.cpp -----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "PassDetail.h" 10 #include "mlir/Pass/PassManager.h" 11 #include "llvm/ADT/MapVector.h" 12 #include "llvm/ADT/STLExtras.h" 13 #include "llvm/ADT/SmallVector.h" 14 #include "llvm/ADT/Statistic.h" 15 #include "llvm/Support/Format.h" 16 #include "llvm/Support/FormatVariadic.h" 17 #include "llvm/Support/Threading.h" 18 #include <chrono> 19 20 using namespace mlir; 21 using namespace mlir::detail; 22 23 constexpr StringLiteral kPassTimingDescription = 24 "... Pass execution timing report ..."; 25 26 namespace { 27 /// Simple record class to record timing information. 28 struct TimeRecord { 29 TimeRecord(double wall = 0.0, double user = 0.0) : wall(wall), user(user) {} 30 31 TimeRecord &operator+=(const TimeRecord &other) { 32 wall += other.wall; 33 user += other.user; 34 return *this; 35 } 36 37 /// Print the current time record to 'os', with a breakdown showing 38 /// contributions to the give 'total' time record. 39 void print(raw_ostream &os, const TimeRecord &total) { 40 if (total.user != total.wall) 41 os << llvm::format(" %7.4f (%5.1f%%) ", user, 42 100.0 * user / total.user); 43 os << llvm::format(" %7.4f (%5.1f%%) ", wall, 100.0 * wall / total.wall); 44 } 45 46 double wall, user; 47 }; 48 49 /// An enumeration of the different types of timers. 50 enum class TimerKind { 51 /// This timer represents an ordered collection of pass timers, corresponding 52 /// to a pass pipeline. 53 Pipeline, 54 55 /// This timer represents a collection of pipeline timers. 56 PipelineCollection, 57 58 /// This timer represents an analysis or pass timer. 59 PassOrAnalysis 60 }; 61 62 struct Timer { 63 explicit Timer(std::string &&name, TimerKind kind) 64 : name(std::move(name)), kind(kind) {} 65 66 /// Start the timer. 67 void start() { startTime = std::chrono::system_clock::now(); } 68 69 /// Stop the timer. 70 void stop() { 71 auto newTime = std::chrono::system_clock::now() - startTime; 72 wallTime += newTime; 73 userTime += newTime; 74 } 75 76 /// Get or create a child timer with the provided name and id. 77 Timer *getChildTimer(const void *id, TimerKind kind, 78 std::function<std::string()> &&nameBuilder) { 79 auto &child = children[id]; 80 if (!child) 81 child = std::make_unique<Timer>(nameBuilder(), kind); 82 return child.get(); 83 } 84 85 /// Returns the total time for this timer in seconds. 86 TimeRecord getTotalTime() { 87 // If this is a pass or analysis timer, use the recorded time directly. 88 if (kind == TimerKind::PassOrAnalysis) { 89 return TimeRecord( 90 std::chrono::duration_cast<std::chrono::duration<double>>(wallTime) 91 .count(), 92 std::chrono::duration_cast<std::chrono::duration<double>>(userTime) 93 .count()); 94 } 95 96 // Otherwise, accumulate the timing from each of the children. 97 TimeRecord totalTime; 98 for (auto &child : children) 99 totalTime += child.second->getTotalTime(); 100 return totalTime; 101 } 102 103 /// A map of unique identifiers to child timers. 104 using ChildrenMap = llvm::MapVector<const void *, std::unique_ptr<Timer>>; 105 106 /// Merge the timing data from 'other' into this timer. 107 void merge(Timer &&other) { 108 if (wallTime < other.wallTime) 109 wallTime = other.wallTime; 110 userTime += other.userTime; 111 mergeChildren(std::move(other.children)); 112 } 113 114 /// Merge the timer children in 'otherChildren' with the children of this 115 /// timer. 116 void mergeChildren(ChildrenMap &&otherChildren) { 117 // Check for an empty children list. 118 if (children.empty()) { 119 children = std::move(otherChildren); 120 return; 121 } 122 123 // Pipeline merges are handled separately as the children are merged 124 // lexicographically. 125 if (kind == TimerKind::Pipeline) { 126 assert(children.size() == otherChildren.size() && 127 "pipeline merge requires the same number of children"); 128 for (auto it : llvm::zip(children, otherChildren)) 129 std::get<0>(it).second->merge(std::move(*std::get<1>(it).second)); 130 return; 131 } 132 133 // Otherwise, we merge children based upon their timer key. 134 for (auto &otherChild : otherChildren) 135 mergeChild(std::move(otherChild)); 136 } 137 138 /// Merge in the given child timer and id into this timer. 139 void mergeChild(ChildrenMap::value_type &&childIt) { 140 auto &child = children[childIt.first]; 141 if (!child) 142 child = std::move(childIt.second); 143 else 144 child->merge(std::move(*childIt.second)); 145 } 146 147 /// Raw timing information. 148 std::chrono::time_point<std::chrono::system_clock> startTime; 149 std::chrono::nanoseconds wallTime = std::chrono::nanoseconds(0); 150 std::chrono::nanoseconds userTime = std::chrono::nanoseconds(0); 151 152 /// A map of unique identifiers to child timers. 153 ChildrenMap children; 154 155 /// A descriptive name for this timer. 156 std::string name; 157 158 /// The type of timer this instance represents. 159 TimerKind kind; 160 }; 161 162 struct PassTiming : public PassInstrumentation { 163 PassTiming(PassDisplayMode displayMode) : displayMode(displayMode) {} 164 ~PassTiming() override { print(); } 165 166 /// Setup the instrumentation hooks. 167 void runBeforePipeline(const OperationName &name, 168 const PipelineParentInfo &parentInfo) override; 169 void runAfterPipeline(const OperationName &name, 170 const PipelineParentInfo &parentInfo) override; 171 void runBeforePass(Pass *pass, Operation *) override { startPassTimer(pass); } 172 void runAfterPass(Pass *pass, Operation *) override; 173 void runAfterPassFailed(Pass *pass, Operation *op) override { 174 runAfterPass(pass, op); 175 } 176 void runBeforeAnalysis(StringRef name, AnalysisID *id, Operation *) override { 177 startAnalysisTimer(name, id); 178 } 179 void runAfterAnalysis(StringRef, AnalysisID *, Operation *) override; 180 181 /// Print and clear the timing results. 182 void print(); 183 184 /// Start a new timer for the given pass. 185 void startPassTimer(Pass *pass); 186 187 /// Start a new timer for the given analysis. 188 void startAnalysisTimer(StringRef name, AnalysisID *id); 189 190 /// Pop the last active timer for the current thread. 191 Timer *popLastActiveTimer() { 192 auto tid = llvm::get_threadid(); 193 auto &activeTimers = activeThreadTimers[tid]; 194 assert(!activeTimers.empty() && "expected active timer"); 195 return activeTimers.pop_back_val(); 196 } 197 198 /// Print the timing result in list mode. 199 void printResultsAsList(raw_ostream &os, Timer *root, TimeRecord totalTime); 200 201 /// Print the timing result in pipeline mode. 202 void printResultsAsPipeline(raw_ostream &os, Timer *root, 203 TimeRecord totalTime); 204 205 /// Returns a timer for the provided identifier and name. 206 Timer *getTimer(const void *id, TimerKind kind, 207 std::function<std::string()> &&nameBuilder) { 208 auto tid = llvm::get_threadid(); 209 210 // If there is no active timer then add to the root timer. 211 auto &activeTimers = activeThreadTimers[tid]; 212 Timer *parentTimer; 213 if (activeTimers.empty()) { 214 auto &rootTimer = rootTimers[tid]; 215 if (!rootTimer) 216 rootTimer = std::make_unique<Timer>("root", TimerKind::Pipeline); 217 parentTimer = rootTimer.get(); 218 } else { 219 // Otherwise, add this to the active timer. 220 parentTimer = activeTimers.back(); 221 } 222 223 auto timer = parentTimer->getChildTimer(id, kind, std::move(nameBuilder)); 224 activeTimers.push_back(timer); 225 return timer; 226 } 227 228 /// The root top level timers for each thread. 229 DenseMap<uint64_t, std::unique_ptr<Timer>> rootTimers; 230 231 /// A stack of the currently active pass timers per thread. 232 DenseMap<uint64_t, SmallVector<Timer *, 4>> activeThreadTimers; 233 234 /// The display mode to use when printing the timing results. 235 PassDisplayMode displayMode; 236 237 /// A mapping of pipeline timers that need to be merged into the parent 238 /// collection. The timers are mapped to the parent info to merge into. 239 DenseMap<PipelineParentInfo, SmallVector<Timer::ChildrenMap::value_type, 4>> 240 pipelinesToMerge; 241 }; 242 } // end anonymous namespace 243 244 void PassTiming::runBeforePipeline(const OperationName &name, 245 const PipelineParentInfo &parentInfo) { 246 // We don't actually want to time the piplelines, they gather their total 247 // from their held passes. 248 getTimer(name.getAsOpaquePointer(), TimerKind::Pipeline, 249 [&] { return ("'" + name.getStringRef() + "' Pipeline").str(); }); 250 } 251 252 void PassTiming::runAfterPipeline(const OperationName &name, 253 const PipelineParentInfo &parentInfo) { 254 // Pop the timer for the pipeline. 255 auto tid = llvm::get_threadid(); 256 auto &activeTimers = activeThreadTimers[tid]; 257 assert(!activeTimers.empty() && "expected active timer"); 258 activeTimers.pop_back(); 259 260 // If the current thread is the same as the parent, there is nothing left to 261 // do. 262 if (tid == parentInfo.parentThreadID) 263 return; 264 265 // Otherwise, mark the pipeline timer for merging into the correct parent 266 // thread. 267 assert(activeTimers.empty() && "expected parent timer to be root"); 268 auto *parentTimer = rootTimers[tid].get(); 269 assert(parentTimer->children.size() == 1 && 270 parentTimer->children.count(name.getAsOpaquePointer()) && 271 "expected a single pipeline timer"); 272 pipelinesToMerge[parentInfo].push_back( 273 std::move(*parentTimer->children.begin())); 274 rootTimers.erase(tid); 275 } 276 277 /// Start a new timer for the given pass. 278 void PassTiming::startPassTimer(Pass *pass) { 279 auto kind = isAdaptorPass(pass) ? TimerKind::PipelineCollection 280 : TimerKind::PassOrAnalysis; 281 Timer *timer = getTimer(pass, kind, [pass]() -> std::string { 282 if (auto *adaptor = getAdaptorPassBase(pass)) 283 return adaptor->getName(); 284 return std::string(pass->getName()); 285 }); 286 287 // We don't actually want to time the adaptor passes, they gather their total 288 // from their held passes. 289 if (!isAdaptorPass(pass)) 290 timer->start(); 291 } 292 293 /// Start a new timer for the given analysis. 294 void PassTiming::startAnalysisTimer(StringRef name, AnalysisID *id) { 295 Timer *timer = getTimer(id, TimerKind::PassOrAnalysis, 296 [name] { return "(A) " + name.str(); }); 297 timer->start(); 298 } 299 300 /// Stop a pass timer. 301 void PassTiming::runAfterPass(Pass *pass, Operation *) { 302 Timer *timer = popLastActiveTimer(); 303 304 // If this is an OpToOpPassAdaptorParallel, then we need to merge in the 305 // timing data for the pipelines running on other threads. 306 if (isa<OpToOpPassAdaptorParallel>(pass)) { 307 auto toMerge = pipelinesToMerge.find({llvm::get_threadid(), pass}); 308 if (toMerge != pipelinesToMerge.end()) { 309 for (auto &it : toMerge->second) 310 timer->mergeChild(std::move(it)); 311 pipelinesToMerge.erase(toMerge); 312 } 313 return; 314 } 315 316 // Adaptor passes aren't timed directly, so we don't need to stop their 317 // timers. 318 if (!isAdaptorPass(pass)) 319 timer->stop(); 320 } 321 322 /// Stop a timer. 323 void PassTiming::runAfterAnalysis(StringRef, AnalysisID *, Operation *) { 324 popLastActiveTimer()->stop(); 325 } 326 327 /// Utility to print the timer heading information. 328 static void printTimerHeader(raw_ostream &os, TimeRecord total) { 329 os << "===" << std::string(73, '-') << "===\n"; 330 // Figure out how many spaces to description name. 331 unsigned padding = (80 - kPassTimingDescription.size()) / 2; 332 os.indent(padding) << kPassTimingDescription << '\n'; 333 os << "===" << std::string(73, '-') << "===\n"; 334 335 // Print the total time followed by the section headers. 336 os << llvm::format(" Total Execution Time: %5.4f seconds\n\n", total.wall); 337 if (total.user != total.wall) 338 os << " ---User Time---"; 339 os << " ---Wall Time--- --- Name ---\n"; 340 } 341 342 /// Utility to print a single line entry in the timer output. 343 static void printTimeEntry(raw_ostream &os, unsigned indent, StringRef name, 344 TimeRecord time, TimeRecord totalTime) { 345 time.print(os, totalTime); 346 os.indent(indent) << name << "\n"; 347 } 348 349 /// Print out the current timing information. 350 void PassTiming::print() { 351 // Don't print anything if there is no timing data. 352 if (rootTimers.empty()) 353 return; 354 355 assert(rootTimers.size() == 1 && "expected one remaining root timer"); 356 auto &rootTimer = rootTimers.begin()->second; 357 auto os = llvm::CreateInfoOutputFile(); 358 359 // Print the timer header. 360 TimeRecord totalTime = rootTimer->getTotalTime(); 361 printTimerHeader(*os, totalTime); 362 363 // Defer to a specialized printer for each display mode. 364 switch (displayMode) { 365 case PassDisplayMode::List: 366 printResultsAsList(*os, rootTimer.get(), totalTime); 367 break; 368 case PassDisplayMode::Pipeline: 369 printResultsAsPipeline(*os, rootTimer.get(), totalTime); 370 break; 371 } 372 printTimeEntry(*os, 0, "Total", totalTime, totalTime); 373 os->flush(); 374 375 // Reset root timers. 376 rootTimers.clear(); 377 activeThreadTimers.clear(); 378 } 379 380 /// Print the timing result in list mode. 381 void PassTiming::printResultsAsList(raw_ostream &os, Timer *root, 382 TimeRecord totalTime) { 383 llvm::StringMap<TimeRecord> mergedTimings; 384 385 std::function<void(Timer *)> addTimer = [&](Timer *timer) { 386 // Only add timing information for passes and analyses. 387 if (timer->kind == TimerKind::PassOrAnalysis) 388 mergedTimings[timer->name] += timer->getTotalTime(); 389 for (auto &children : timer->children) 390 addTimer(children.second.get()); 391 }; 392 393 // Add each of the top level timers. 394 for (auto &topLevelTimer : root->children) 395 addTimer(topLevelTimer.second.get()); 396 397 // Sort the timing information by wall time. 398 std::vector<std::pair<StringRef, TimeRecord>> timerNameAndTime; 399 for (auto &it : mergedTimings) 400 timerNameAndTime.emplace_back(it.first(), it.second); 401 llvm::array_pod_sort(timerNameAndTime.begin(), timerNameAndTime.end(), 402 [](const std::pair<StringRef, TimeRecord> *lhs, 403 const std::pair<StringRef, TimeRecord> *rhs) { 404 return llvm::array_pod_sort_comparator<double>( 405 &rhs->second.wall, &lhs->second.wall); 406 }); 407 408 // Print the timing information sequentially. 409 for (auto &timeData : timerNameAndTime) 410 printTimeEntry(os, 0, timeData.first, timeData.second, totalTime); 411 } 412 413 /// Print the timing result in pipeline mode. 414 void PassTiming::printResultsAsPipeline(raw_ostream &os, Timer *root, 415 TimeRecord totalTime) { 416 std::function<void(unsigned, Timer *)> printTimer = [&](unsigned indent, 417 Timer *timer) { 418 // If this is a timer for a pipeline collection and the collection only has 419 // one pipeline child, then only print the child. 420 if (timer->kind == TimerKind::PipelineCollection && 421 timer->children.size() == 1) 422 return printTimer(indent, timer->children.begin()->second.get()); 423 424 printTimeEntry(os, indent, timer->name, timer->getTotalTime(), totalTime); 425 426 // If this timer is a pipeline, then print the children in-order. 427 if (timer->kind == TimerKind::Pipeline) { 428 for (auto &child : timer->children) 429 printTimer(indent + 2, child.second.get()); 430 return; 431 } 432 433 // Otherwise, sort the children by name to give a deterministic ordering 434 // when emitting the time. 435 SmallVector<Timer *, 4> children; 436 children.reserve(timer->children.size()); 437 for (auto &child : timer->children) 438 children.push_back(child.second.get()); 439 llvm::array_pod_sort(children.begin(), children.end(), 440 [](Timer *const *lhs, Timer *const *rhs) { 441 return (*lhs)->name.compare((*rhs)->name); 442 }); 443 for (auto &child : children) 444 printTimer(indent + 2, child); 445 }; 446 447 // Print each of the top level timers. 448 for (auto &topLevelTimer : root->children) 449 printTimer(0, topLevelTimer.second.get()); 450 } 451 452 //===----------------------------------------------------------------------===// 453 // PassManager 454 //===----------------------------------------------------------------------===// 455 456 /// Add an instrumentation to time the execution of passes and the computation 457 /// of analyses. 458 void PassManager::enableTiming(PassDisplayMode displayMode) { 459 // Check if pass timing is already enabled. 460 if (passTiming) 461 return; 462 addInstrumentation(std::make_unique<PassTiming>(displayMode)); 463 passTiming = true; 464 } 465