1 //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements hierarchical time profiler. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Support/TimeProfiler.h" 14 #include "llvm/ADT/StringMap.h" 15 #include "llvm/Support/CommandLine.h" 16 #include "llvm/Support/JSON.h" 17 #include "llvm/Support/Path.h" 18 #include "llvm/Support/Threading.h" 19 #include <algorithm> 20 #include <cassert> 21 #include <chrono> 22 #include <mutex> 23 #include <string> 24 #include <vector> 25 26 using namespace std::chrono; 27 using namespace llvm; 28 29 namespace { 30 std::mutex Mu; 31 // List of all instances 32 std::vector<TimeTraceProfiler *> 33 ThreadTimeTraceProfilerInstances; // guarded by Mu 34 // Per Thread instance 35 LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr; 36 } // namespace 37 38 namespace llvm { 39 40 TimeTraceProfiler *getTimeTraceProfilerInstance() { 41 return TimeTraceProfilerInstance; 42 } 43 44 typedef duration<steady_clock::rep, steady_clock::period> DurationType; 45 typedef time_point<steady_clock> TimePointType; 46 typedef std::pair<size_t, DurationType> CountAndDurationType; 47 typedef std::pair<std::string, CountAndDurationType> 48 NameAndCountAndDurationType; 49 50 struct Entry { 51 const TimePointType Start; 52 TimePointType End; 53 const std::string Name; 54 const std::string Detail; 55 56 Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt) 57 : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), 58 Detail(std::move(Dt)) {} 59 60 // Calculate timings for FlameGraph. Cast time points to microsecond precision 61 // rather than casting duration. This avoid truncation issues causing inner 62 // scopes overruning outer scopes. 63 steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const { 64 return (time_point_cast<microseconds>(Start) - 65 time_point_cast<microseconds>(StartTime)) 66 .count(); 67 } 68 69 steady_clock::rep getFlameGraphDurUs() const { 70 return (time_point_cast<microseconds>(End) - 71 time_point_cast<microseconds>(Start)) 72 .count(); 73 } 74 }; 75 76 struct TimeTraceProfiler { 77 TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "") 78 : StartTime(steady_clock::now()), ProcName(ProcName), 79 Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {} 80 81 void begin(std::string Name, llvm::function_ref<std::string()> Detail) { 82 Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name), 83 Detail()); 84 } 85 86 void end() { 87 assert(!Stack.empty() && "Must call begin() first"); 88 auto &E = Stack.back(); 89 E.End = steady_clock::now(); 90 91 // Check that end times monotonically increase. 92 assert((Entries.empty() || 93 (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >= 94 Entries.back().getFlameGraphStartUs(StartTime) + 95 Entries.back().getFlameGraphDurUs())) && 96 "TimeProfiler scope ended earlier than previous scope"); 97 98 // Calculate duration at full precision for overall counts. 99 DurationType Duration = E.End - E.Start; 100 101 // Only include sections longer or equal to TimeTraceGranularity msec. 102 if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity) 103 Entries.emplace_back(E); 104 105 // Track total time taken by each "name", but only the topmost levels of 106 // them; e.g. if there's a template instantiation that instantiates other 107 // templates from within, we only want to add the topmost one. "topmost" 108 // happens to be the ones that don't have any currently open entries above 109 // itself. 110 if (std::find_if(++Stack.rbegin(), Stack.rend(), [&](const Entry &Val) { 111 return Val.Name == E.Name; 112 }) == Stack.rend()) { 113 auto &CountAndTotal = CountAndTotalPerName[E.Name]; 114 CountAndTotal.first++; 115 CountAndTotal.second += Duration; 116 } 117 118 Stack.pop_back(); 119 } 120 121 // Write events from this TimeTraceProfilerInstance and 122 // ThreadTimeTraceProfilerInstances. 123 void Write(raw_pwrite_stream &OS) { 124 // Acquire Mutex as reading ThreadTimeTraceProfilerInstances. 125 std::lock_guard<std::mutex> Lock(Mu); 126 assert(Stack.empty() && 127 "All profiler sections should be ended when calling Write"); 128 assert(std::all_of(ThreadTimeTraceProfilerInstances.begin(), 129 ThreadTimeTraceProfilerInstances.end(), 130 [](const auto &TTP) { return TTP->Stack.empty(); }) && 131 "All profiler sections should be ended when calling Write"); 132 133 json::OStream J(OS); 134 J.objectBegin(); 135 J.attributeBegin("traceEvents"); 136 J.arrayBegin(); 137 138 // Emit all events for the main flame graph. 139 auto writeEvent = [&](const auto &E, uint64_t Tid) { 140 auto StartUs = E.getFlameGraphStartUs(StartTime); 141 auto DurUs = E.getFlameGraphDurUs(); 142 143 J.object([&]{ 144 J.attribute("pid", 1); 145 J.attribute("tid", int64_t(Tid)); 146 J.attribute("ph", "X"); 147 J.attribute("ts", StartUs); 148 J.attribute("dur", DurUs); 149 J.attribute("name", E.Name); 150 if (!E.Detail.empty()) { 151 J.attributeObject("args", [&] { J.attribute("detail", E.Detail); }); 152 } 153 }); 154 }; 155 for (const auto &E : Entries) { 156 writeEvent(E, this->Tid); 157 } 158 for (const auto &TTP : ThreadTimeTraceProfilerInstances) { 159 for (const auto &E : TTP->Entries) { 160 writeEvent(E, TTP->Tid); 161 } 162 } 163 164 // Emit totals by section name as additional "thread" events, sorted from 165 // longest one. 166 // Find highest used thread id. 167 uint64_t MaxTid = this->Tid; 168 for (const auto &TTP : ThreadTimeTraceProfilerInstances) { 169 MaxTid = std::max(MaxTid, TTP->Tid); 170 } 171 172 // Combine all CountAndTotalPerName from threads into one. 173 StringMap<CountAndDurationType> AllCountAndTotalPerName; 174 auto combineStat = [&](const auto &Stat) { 175 StringRef Key = Stat.getKey(); 176 auto Value = Stat.getValue(); 177 auto &CountAndTotal = AllCountAndTotalPerName[Key]; 178 CountAndTotal.first += Value.first; 179 CountAndTotal.second += Value.second; 180 }; 181 for (const auto &Stat : CountAndTotalPerName) { 182 combineStat(Stat); 183 } 184 for (const auto &TTP : ThreadTimeTraceProfilerInstances) { 185 for (const auto &Stat : TTP->CountAndTotalPerName) { 186 combineStat(Stat); 187 } 188 } 189 190 std::vector<NameAndCountAndDurationType> SortedTotals; 191 SortedTotals.reserve(AllCountAndTotalPerName.size()); 192 for (const auto &Total : AllCountAndTotalPerName) 193 SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue()); 194 195 llvm::sort(SortedTotals.begin(), SortedTotals.end(), 196 [](const NameAndCountAndDurationType &A, 197 const NameAndCountAndDurationType &B) { 198 return A.second.second > B.second.second; 199 }); 200 201 // Report totals on separate threads of tracing file. 202 uint64_t TotalTid = MaxTid + 1; 203 for (const auto &Total : SortedTotals) { 204 auto DurUs = duration_cast<microseconds>(Total.second.second).count(); 205 auto Count = AllCountAndTotalPerName[Total.first].first; 206 207 J.object([&]{ 208 J.attribute("pid", 1); 209 J.attribute("tid", int64_t(TotalTid)); 210 J.attribute("ph", "X"); 211 J.attribute("ts", 0); 212 J.attribute("dur", DurUs); 213 J.attribute("name", "Total " + Total.first); 214 J.attributeObject("args", [&] { 215 J.attribute("count", int64_t(Count)); 216 J.attribute("avg ms", int64_t(DurUs / Count / 1000)); 217 }); 218 }); 219 220 ++TotalTid; 221 } 222 223 // Emit metadata event with process name. 224 J.object([&] { 225 J.attribute("cat", ""); 226 J.attribute("pid", 1); 227 J.attribute("tid", 0); 228 J.attribute("ts", 0); 229 J.attribute("ph", "M"); 230 J.attribute("name", "process_name"); 231 J.attributeObject("args", [&] { J.attribute("name", ProcName); }); 232 }); 233 234 J.arrayEnd(); 235 J.attributeEnd(); 236 J.objectEnd(); 237 } 238 239 SmallVector<Entry, 16> Stack; 240 SmallVector<Entry, 128> Entries; 241 StringMap<CountAndDurationType> CountAndTotalPerName; 242 const TimePointType StartTime; 243 const std::string ProcName; 244 const uint64_t Tid; 245 246 // Minimum time granularity (in microseconds) 247 const unsigned TimeTraceGranularity; 248 }; 249 250 void timeTraceProfilerInitialize(unsigned TimeTraceGranularity, 251 StringRef ProcName) { 252 assert(TimeTraceProfilerInstance == nullptr && 253 "Profiler should not be initialized"); 254 TimeTraceProfilerInstance = new TimeTraceProfiler( 255 TimeTraceGranularity, llvm::sys::path::filename(ProcName)); 256 } 257 258 // Removes all TimeTraceProfilerInstances. 259 // Called from main thread. 260 void timeTraceProfilerCleanup() { 261 delete TimeTraceProfilerInstance; 262 std::lock_guard<std::mutex> Lock(Mu); 263 for (auto TTP : ThreadTimeTraceProfilerInstances) 264 delete TTP; 265 ThreadTimeTraceProfilerInstances.clear(); 266 } 267 268 // Finish TimeTraceProfilerInstance on a worker thread. 269 // This doesn't remove the instance, just moves the pointer to global vector. 270 void timeTraceProfilerFinishThread() { 271 std::lock_guard<std::mutex> Lock(Mu); 272 ThreadTimeTraceProfilerInstances.push_back(TimeTraceProfilerInstance); 273 TimeTraceProfilerInstance = nullptr; 274 } 275 276 void timeTraceProfilerWrite(raw_pwrite_stream &OS) { 277 assert(TimeTraceProfilerInstance != nullptr && 278 "Profiler object can't be null"); 279 TimeTraceProfilerInstance->Write(OS); 280 } 281 282 void timeTraceProfilerBegin(StringRef Name, StringRef Detail) { 283 if (TimeTraceProfilerInstance != nullptr) 284 TimeTraceProfilerInstance->begin(std::string(Name), 285 [&]() { return std::string(Detail); }); 286 } 287 288 void timeTraceProfilerBegin(StringRef Name, 289 llvm::function_ref<std::string()> Detail) { 290 if (TimeTraceProfilerInstance != nullptr) 291 TimeTraceProfilerInstance->begin(std::string(Name), Detail); 292 } 293 294 void timeTraceProfilerEnd() { 295 if (TimeTraceProfilerInstance != nullptr) 296 TimeTraceProfilerInstance->end(); 297 } 298 299 } // namespace llvm 300