1 //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements hierarchical time profiler.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/Support/TimeProfiler.h"
14 #include "llvm/ADT/STLFunctionalExtras.h"
15 #include "llvm/ADT/StringMap.h"
16 #include "llvm/Support/JSON.h"
17 #include "llvm/Support/ManagedStatic.h"
18 #include "llvm/Support/Path.h"
19 #include "llvm/Support/Process.h"
20 #include "llvm/Support/Threading.h"
21 #include <algorithm>
22 #include <cassert>
23 #include <chrono>
24 #include <mutex>
25 #include <string>
26 #include <vector>
27
28 using namespace std::chrono;
29 using namespace llvm;
30
31 static std::mutex Mu;
32 // List of all instances
33 static ManagedStatic<std::vector<TimeTraceProfiler *>>
34 ThreadTimeTraceProfilerInstances; // GUARDED_BY(Mu)
35 // Per Thread instance
36 static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
37
getTimeTraceProfilerInstance()38 TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() {
39 return TimeTraceProfilerInstance;
40 }
41
42 typedef duration<steady_clock::rep, steady_clock::period> DurationType;
43 typedef time_point<steady_clock> TimePointType;
44 typedef std::pair<size_t, DurationType> CountAndDurationType;
45 typedef std::pair<std::string, CountAndDurationType>
46 NameAndCountAndDurationType;
47
48 namespace {
49 struct Entry {
50 const TimePointType Start;
51 TimePointType End;
52 const std::string Name;
53 const std::string Detail;
54
Entry__anona21739de0111::Entry55 Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt)
56 : Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
57 Detail(std::move(Dt)) {}
58
59 // Calculate timings for FlameGraph. Cast time points to microsecond precision
60 // rather than casting duration. This avoid truncation issues causing inner
61 // scopes overruning outer scopes.
getFlameGraphStartUs__anona21739de0111::Entry62 steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const {
63 return (time_point_cast<microseconds>(Start) -
64 time_point_cast<microseconds>(StartTime))
65 .count();
66 }
67
getFlameGraphDurUs__anona21739de0111::Entry68 steady_clock::rep getFlameGraphDurUs() const {
69 return (time_point_cast<microseconds>(End) -
70 time_point_cast<microseconds>(Start))
71 .count();
72 }
73 };
74 } // namespace
75
76 struct llvm::TimeTraceProfiler {
TimeTraceProfilerllvm::TimeTraceProfiler77 TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "")
78 : BeginningOfTime(system_clock::now()), StartTime(steady_clock::now()),
79 ProcName(ProcName), Pid(sys::Process::getProcessId()),
80 Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {
81 llvm::get_thread_name(ThreadName);
82 }
83
beginllvm::TimeTraceProfiler84 void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
85 Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name),
86 Detail());
87 }
88
endllvm::TimeTraceProfiler89 void end() {
90 assert(!Stack.empty() && "Must call begin() first");
91 Entry &E = Stack.back();
92 E.End = steady_clock::now();
93
94 // Check that end times monotonically increase.
95 assert((Entries.empty() ||
96 (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >=
97 Entries.back().getFlameGraphStartUs(StartTime) +
98 Entries.back().getFlameGraphDurUs())) &&
99 "TimeProfiler scope ended earlier than previous scope");
100
101 // Calculate duration at full precision for overall counts.
102 DurationType Duration = E.End - E.Start;
103
104 // Only include sections longer or equal to TimeTraceGranularity msec.
105 if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity)
106 Entries.emplace_back(E);
107
108 // Track total time taken by each "name", but only the topmost levels of
109 // them; e.g. if there's a template instantiation that instantiates other
110 // templates from within, we only want to add the topmost one. "topmost"
111 // happens to be the ones that don't have any currently open entries above
112 // itself.
113 if (llvm::none_of(llvm::drop_begin(llvm::reverse(Stack)),
114 [&](const Entry &Val) { return Val.Name == E.Name; })) {
115 auto &CountAndTotal = CountAndTotalPerName[E.Name];
116 CountAndTotal.first++;
117 CountAndTotal.second += Duration;
118 }
119
120 Stack.pop_back();
121 }
122
123 // Write events from this TimeTraceProfilerInstance and
124 // ThreadTimeTraceProfilerInstances.
writellvm::TimeTraceProfiler125 void write(raw_pwrite_stream &OS) {
126 // Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
127 std::lock_guard<std::mutex> Lock(Mu);
128 assert(Stack.empty() &&
129 "All profiler sections should be ended when calling write");
130 assert(llvm::all_of(*ThreadTimeTraceProfilerInstances,
131 [](const auto &TTP) { return TTP->Stack.empty(); }) &&
132 "All profiler sections should be ended when calling write");
133
134 json::OStream J(OS);
135 J.objectBegin();
136 J.attributeBegin("traceEvents");
137 J.arrayBegin();
138
139 // Emit all events for the main flame graph.
140 auto writeEvent = [&](const auto &E, uint64_t Tid) {
141 auto StartUs = E.getFlameGraphStartUs(StartTime);
142 auto DurUs = E.getFlameGraphDurUs();
143
144 J.object([&] {
145 J.attribute("pid", Pid);
146 J.attribute("tid", int64_t(Tid));
147 J.attribute("ph", "X");
148 J.attribute("ts", StartUs);
149 J.attribute("dur", DurUs);
150 J.attribute("name", E.Name);
151 if (!E.Detail.empty()) {
152 J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
153 }
154 });
155 };
156 for (const Entry &E : Entries)
157 writeEvent(E, this->Tid);
158 for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances)
159 for (const Entry &E : TTP->Entries)
160 writeEvent(E, TTP->Tid);
161
162 // Emit totals by section name as additional "thread" events, sorted from
163 // longest one.
164 // Find highest used thread id.
165 uint64_t MaxTid = this->Tid;
166 for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances)
167 MaxTid = std::max(MaxTid, TTP->Tid);
168
169 // Combine all CountAndTotalPerName from threads into one.
170 StringMap<CountAndDurationType> AllCountAndTotalPerName;
171 auto combineStat = [&](const auto &Stat) {
172 StringRef Key = Stat.getKey();
173 auto Value = Stat.getValue();
174 auto &CountAndTotal = AllCountAndTotalPerName[Key];
175 CountAndTotal.first += Value.first;
176 CountAndTotal.second += Value.second;
177 };
178 for (const auto &Stat : CountAndTotalPerName)
179 combineStat(Stat);
180 for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances)
181 for (const auto &Stat : TTP->CountAndTotalPerName)
182 combineStat(Stat);
183
184 std::vector<NameAndCountAndDurationType> SortedTotals;
185 SortedTotals.reserve(AllCountAndTotalPerName.size());
186 for (const auto &Total : AllCountAndTotalPerName)
187 SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue());
188
189 llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A,
190 const NameAndCountAndDurationType &B) {
191 return A.second.second > B.second.second;
192 });
193
194 // Report totals on separate threads of tracing file.
195 uint64_t TotalTid = MaxTid + 1;
196 for (const NameAndCountAndDurationType &Total : SortedTotals) {
197 auto DurUs = duration_cast<microseconds>(Total.second.second).count();
198 auto Count = AllCountAndTotalPerName[Total.first].first;
199
200 J.object([&] {
201 J.attribute("pid", Pid);
202 J.attribute("tid", int64_t(TotalTid));
203 J.attribute("ph", "X");
204 J.attribute("ts", 0);
205 J.attribute("dur", DurUs);
206 J.attribute("name", "Total " + Total.first);
207 J.attributeObject("args", [&] {
208 J.attribute("count", int64_t(Count));
209 J.attribute("avg ms", int64_t(DurUs / Count / 1000));
210 });
211 });
212
213 ++TotalTid;
214 }
215
216 auto writeMetadataEvent = [&](const char *Name, uint64_t Tid,
217 StringRef arg) {
218 J.object([&] {
219 J.attribute("cat", "");
220 J.attribute("pid", Pid);
221 J.attribute("tid", int64_t(Tid));
222 J.attribute("ts", 0);
223 J.attribute("ph", "M");
224 J.attribute("name", Name);
225 J.attributeObject("args", [&] { J.attribute("name", arg); });
226 });
227 };
228
229 writeMetadataEvent("process_name", Tid, ProcName);
230 writeMetadataEvent("thread_name", Tid, ThreadName);
231 for (const TimeTraceProfiler *TTP : *ThreadTimeTraceProfilerInstances)
232 writeMetadataEvent("thread_name", TTP->Tid, TTP->ThreadName);
233
234 J.arrayEnd();
235 J.attributeEnd();
236
237 // Emit the absolute time when this TimeProfiler started.
238 // This can be used to combine the profiling data from
239 // multiple processes and preserve actual time intervals.
240 J.attribute("beginningOfTime",
241 time_point_cast<microseconds>(BeginningOfTime)
242 .time_since_epoch()
243 .count());
244
245 J.objectEnd();
246 }
247
248 SmallVector<Entry, 16> Stack;
249 SmallVector<Entry, 128> Entries;
250 StringMap<CountAndDurationType> CountAndTotalPerName;
251 const time_point<system_clock> BeginningOfTime;
252 const TimePointType StartTime;
253 const std::string ProcName;
254 const sys::Process::Pid Pid;
255 SmallString<0> ThreadName;
256 const uint64_t Tid;
257
258 // Minimum time granularity (in microseconds)
259 const unsigned TimeTraceGranularity;
260 };
261
timeTraceProfilerInitialize(unsigned TimeTraceGranularity,StringRef ProcName)262 void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
263 StringRef ProcName) {
264 assert(TimeTraceProfilerInstance == nullptr &&
265 "Profiler should not be initialized");
266 TimeTraceProfilerInstance = new TimeTraceProfiler(
267 TimeTraceGranularity, llvm::sys::path::filename(ProcName));
268 }
269
270 // Removes all TimeTraceProfilerInstances.
271 // Called from main thread.
timeTraceProfilerCleanup()272 void llvm::timeTraceProfilerCleanup() {
273 delete TimeTraceProfilerInstance;
274 TimeTraceProfilerInstance = nullptr;
275 std::lock_guard<std::mutex> Lock(Mu);
276 for (auto *TTP : *ThreadTimeTraceProfilerInstances)
277 delete TTP;
278 ThreadTimeTraceProfilerInstances->clear();
279 }
280
281 // Finish TimeTraceProfilerInstance on a worker thread.
282 // This doesn't remove the instance, just moves the pointer to global vector.
timeTraceProfilerFinishThread()283 void llvm::timeTraceProfilerFinishThread() {
284 std::lock_guard<std::mutex> Lock(Mu);
285 ThreadTimeTraceProfilerInstances->push_back(TimeTraceProfilerInstance);
286 TimeTraceProfilerInstance = nullptr;
287 }
288
timeTraceProfilerWrite(raw_pwrite_stream & OS)289 void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) {
290 assert(TimeTraceProfilerInstance != nullptr &&
291 "Profiler object can't be null");
292 TimeTraceProfilerInstance->write(OS);
293 }
294
timeTraceProfilerWrite(StringRef PreferredFileName,StringRef FallbackFileName)295 Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName,
296 StringRef FallbackFileName) {
297 assert(TimeTraceProfilerInstance != nullptr &&
298 "Profiler object can't be null");
299
300 std::string Path = PreferredFileName.str();
301 if (Path.empty()) {
302 Path = FallbackFileName == "-" ? "out" : FallbackFileName.str();
303 Path += ".time-trace";
304 }
305
306 std::error_code EC;
307 raw_fd_ostream OS(Path, EC, sys::fs::OF_TextWithCRLF);
308 if (EC)
309 return createStringError(EC, "Could not open " + Path);
310
311 timeTraceProfilerWrite(OS);
312 return Error::success();
313 }
314
timeTraceProfilerBegin(StringRef Name,StringRef Detail)315 void llvm::timeTraceProfilerBegin(StringRef Name, StringRef Detail) {
316 if (TimeTraceProfilerInstance != nullptr)
317 TimeTraceProfilerInstance->begin(std::string(Name),
318 [&]() { return std::string(Detail); });
319 }
320
timeTraceProfilerBegin(StringRef Name,llvm::function_ref<std::string ()> Detail)321 void llvm::timeTraceProfilerBegin(StringRef Name,
322 llvm::function_ref<std::string()> Detail) {
323 if (TimeTraceProfilerInstance != nullptr)
324 TimeTraceProfilerInstance->begin(std::string(Name), Detail);
325 }
326
timeTraceProfilerEnd()327 void llvm::timeTraceProfilerEnd() {
328 if (TimeTraceProfilerInstance != nullptr)
329 TimeTraceProfilerInstance->end();
330 }
331