1 //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements hierarchical time profiler. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/Support/TimeProfiler.h" 14 #include "llvm/ADT/StringMap.h" 15 #include "llvm/Support/CommandLine.h" 16 #include "llvm/Support/JSON.h" 17 #include "llvm/Support/Path.h" 18 #include "llvm/Support/Threading.h" 19 #include <algorithm> 20 #include <cassert> 21 #include <chrono> 22 #include <mutex> 23 #include <string> 24 #include <vector> 25 26 using namespace std::chrono; 27 28 namespace { 29 std::mutex Mu; 30 std::vector<std::unique_ptr<llvm::TimeTraceProfiler>> 31 ThreadTimeTraceProfilerInstances; // guarded by Mu 32 } // namespace 33 34 namespace llvm { 35 36 LLVM_THREAD_LOCAL std::unique_ptr<TimeTraceProfiler> TimeTraceProfilerInstance; 37 38 typedef duration<steady_clock::rep, steady_clock::period> DurationType; 39 typedef time_point<steady_clock> TimePointType; 40 typedef std::pair<size_t, DurationType> CountAndDurationType; 41 typedef std::pair<std::string, CountAndDurationType> 42 NameAndCountAndDurationType; 43 44 struct Entry { 45 const TimePointType Start; 46 TimePointType End; 47 const std::string Name; 48 const std::string Detail; 49 50 Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt) 51 : Start(std::move(S)), End(std::move(E)), Name(std::move(N)), 52 Detail(std::move(Dt)) {} 53 54 // Calculate timings for FlameGraph. Cast time points to microsecond precision 55 // rather than casting duration. This avoid truncation issues causing inner 56 // scopes overruning outer scopes. 57 steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const { 58 return (time_point_cast<microseconds>(Start) - 59 time_point_cast<microseconds>(StartTime)) 60 .count(); 61 } 62 63 steady_clock::rep getFlameGraphDurUs() const { 64 return (time_point_cast<microseconds>(End) - 65 time_point_cast<microseconds>(Start)) 66 .count(); 67 } 68 }; 69 70 struct TimeTraceProfiler { 71 TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "") 72 : StartTime(steady_clock::now()), ProcName(ProcName), 73 Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {} 74 75 void begin(std::string Name, llvm::function_ref<std::string()> Detail) { 76 Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name), 77 Detail()); 78 } 79 80 void end() { 81 assert(!Stack.empty() && "Must call begin() first"); 82 auto &E = Stack.back(); 83 E.End = steady_clock::now(); 84 85 // Check that end times monotonically increase. 86 assert((Entries.empty() || 87 (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >= 88 Entries.back().getFlameGraphStartUs(StartTime) + 89 Entries.back().getFlameGraphDurUs())) && 90 "TimeProfiler scope ended earlier than previous scope"); 91 92 // Calculate duration at full precision for overall counts. 93 DurationType Duration = E.End - E.Start; 94 95 // Only include sections longer or equal to TimeTraceGranularity msec. 96 if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity) 97 Entries.emplace_back(E); 98 99 // Track total time taken by each "name", but only the topmost levels of 100 // them; e.g. if there's a template instantiation that instantiates other 101 // templates from within, we only want to add the topmost one. "topmost" 102 // happens to be the ones that don't have any currently open entries above 103 // itself. 104 if (std::find_if(++Stack.rbegin(), Stack.rend(), [&](const Entry &Val) { 105 return Val.Name == E.Name; 106 }) == Stack.rend()) { 107 auto &CountAndTotal = CountAndTotalPerName[E.Name]; 108 CountAndTotal.first++; 109 CountAndTotal.second += Duration; 110 } 111 112 Stack.pop_back(); 113 } 114 115 // Write events from this TimeTraceProfilerInstance and 116 // ThreadTimeTraceProfilerInstances. 117 void Write(raw_pwrite_stream &OS) { 118 // Acquire Mutex as reading ThreadTimeTraceProfilerInstances. 119 std::lock_guard<std::mutex> Lock(Mu); 120 assert(Stack.empty() && 121 "All profiler sections should be ended when calling Write"); 122 assert(std::all_of(ThreadTimeTraceProfilerInstances.begin(), 123 ThreadTimeTraceProfilerInstances.end(), 124 [](const auto &TTP) { return TTP->Stack.empty(); }) && 125 "All profiler sections should be ended when calling Write"); 126 127 json::OStream J(OS); 128 J.objectBegin(); 129 J.attributeBegin("traceEvents"); 130 J.arrayBegin(); 131 132 // Emit all events for the main flame graph. 133 auto writeEvent = [&](auto &E, uint64_t Tid) { 134 auto StartUs = E.getFlameGraphStartUs(StartTime); 135 auto DurUs = E.getFlameGraphDurUs(); 136 137 J.object([&]{ 138 J.attribute("pid", 1); 139 J.attribute("tid", int64_t(Tid)); 140 J.attribute("ph", "X"); 141 J.attribute("ts", StartUs); 142 J.attribute("dur", DurUs); 143 J.attribute("name", E.Name); 144 if (!E.Detail.empty()) { 145 J.attributeObject("args", [&] { J.attribute("detail", E.Detail); }); 146 } 147 }); 148 }; 149 for (const auto &E : Entries) { 150 writeEvent(E, this->Tid); 151 } 152 for (const auto &TTP : ThreadTimeTraceProfilerInstances) { 153 for (const auto &E : TTP->Entries) { 154 writeEvent(E, TTP->Tid); 155 } 156 } 157 158 // Emit totals by section name as additional "thread" events, sorted from 159 // longest one. 160 // Find highest used thread id. 161 uint64_t MaxTid = this->Tid; 162 for (const auto &TTP : ThreadTimeTraceProfilerInstances) { 163 MaxTid = std::max(MaxTid, TTP->Tid); 164 } 165 166 // Combine all CountAndTotalPerName from threads into one. 167 StringMap<CountAndDurationType> AllCountAndTotalPerName; 168 auto combineStat = [&](auto &Stat) { 169 std::string Key = Stat.getKey(); 170 auto Value = Stat.getValue(); 171 auto &CountAndTotal = AllCountAndTotalPerName[Key]; 172 CountAndTotal.first += Value.first; 173 CountAndTotal.second += Value.second; 174 }; 175 for (const auto &Stat : CountAndTotalPerName) { 176 combineStat(Stat); 177 } 178 for (const auto &TTP : ThreadTimeTraceProfilerInstances) { 179 for (const auto &Stat : TTP->CountAndTotalPerName) { 180 combineStat(Stat); 181 } 182 } 183 184 std::vector<NameAndCountAndDurationType> SortedTotals; 185 SortedTotals.reserve(AllCountAndTotalPerName.size()); 186 for (const auto &Total : AllCountAndTotalPerName) 187 SortedTotals.emplace_back(Total.getKey(), Total.getValue()); 188 189 llvm::sort(SortedTotals.begin(), SortedTotals.end(), 190 [](const NameAndCountAndDurationType &A, 191 const NameAndCountAndDurationType &B) { 192 return A.second.second > B.second.second; 193 }); 194 195 // Report totals on separate threads of tracing file. 196 uint64_t TotalTid = MaxTid + 1; 197 for (const auto &Total : SortedTotals) { 198 auto DurUs = duration_cast<microseconds>(Total.second.second).count(); 199 auto Count = AllCountAndTotalPerName[Total.first].first; 200 201 J.object([&]{ 202 J.attribute("pid", 1); 203 J.attribute("tid", int64_t(TotalTid)); 204 J.attribute("ph", "X"); 205 J.attribute("ts", 0); 206 J.attribute("dur", DurUs); 207 J.attribute("name", "Total " + Total.first); 208 J.attributeObject("args", [&] { 209 J.attribute("count", int64_t(Count)); 210 J.attribute("avg ms", int64_t(DurUs / Count / 1000)); 211 }); 212 }); 213 214 ++TotalTid; 215 } 216 217 // Emit metadata event with process name. 218 J.object([&] { 219 J.attribute("cat", ""); 220 J.attribute("pid", 1); 221 J.attribute("tid", 0); 222 J.attribute("ts", 0); 223 J.attribute("ph", "M"); 224 J.attribute("name", "process_name"); 225 J.attributeObject("args", [&] { J.attribute("name", ProcName); }); 226 }); 227 228 J.arrayEnd(); 229 J.attributeEnd(); 230 J.objectEnd(); 231 } 232 233 SmallVector<Entry, 16> Stack; 234 SmallVector<Entry, 128> Entries; 235 StringMap<CountAndDurationType> CountAndTotalPerName; 236 const TimePointType StartTime; 237 const std::string ProcName; 238 const uint64_t Tid; 239 240 // Minimum time granularity (in microseconds) 241 const unsigned TimeTraceGranularity; 242 }; 243 244 void timeTraceProfilerInitialize(unsigned TimeTraceGranularity, 245 StringRef ProcName) { 246 assert(TimeTraceProfilerInstance == nullptr && 247 "Profiler should not be initialized"); 248 TimeTraceProfilerInstance = std::make_unique<TimeTraceProfiler>( 249 TimeTraceGranularity, llvm::sys::path::filename(ProcName)); 250 } 251 252 // Removes all TimeTraceProfilerInstances. 253 void timeTraceProfilerCleanup() { 254 TimeTraceProfilerInstance.reset(); 255 std::lock_guard<std::mutex> Lock(Mu); 256 ThreadTimeTraceProfilerInstances.clear(); 257 } 258 259 // Finish TimeTraceProfilerInstance on a worker thread. 260 // This doesn't remove the instance, just moves the pointer to global vector. 261 void timeTraceProfilerFinishThread() { 262 std::lock_guard<std::mutex> Lock(Mu); 263 ThreadTimeTraceProfilerInstances.push_back( 264 std::move(TimeTraceProfilerInstance)); 265 } 266 267 void timeTraceProfilerWrite(raw_pwrite_stream &OS) { 268 assert(TimeTraceProfilerInstance != nullptr && 269 "Profiler object can't be null"); 270 TimeTraceProfilerInstance->Write(OS); 271 } 272 273 void timeTraceProfilerBegin(StringRef Name, StringRef Detail) { 274 if (TimeTraceProfilerInstance != nullptr) 275 TimeTraceProfilerInstance->begin(Name, [&]() { return Detail; }); 276 } 277 278 void timeTraceProfilerBegin(StringRef Name, 279 llvm::function_ref<std::string()> Detail) { 280 if (TimeTraceProfilerInstance != nullptr) 281 TimeTraceProfilerInstance->begin(Name, Detail); 282 } 283 284 void timeTraceProfilerEnd() { 285 if (TimeTraceProfilerInstance != nullptr) 286 TimeTraceProfilerInstance->end(); 287 } 288 289 } // namespace llvm 290