1 //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements hierarchical time profiler.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Support/TimeProfiler.h"
14 #include "llvm/ADT/StringMap.h"
15 #include "llvm/Support/CommandLine.h"
16 #include "llvm/Support/JSON.h"
17 #include "llvm/Support/Path.h"
18 #include "llvm/Support/Threading.h"
19 #include <algorithm>
20 #include <cassert>
21 #include <chrono>
22 #include <mutex>
23 #include <string>
24 #include <vector>
25 
26 using namespace std::chrono;
27 
28 namespace {
29 std::mutex Mu;
30 std::vector<std::unique_ptr<llvm::TimeTraceProfiler>>
31     ThreadTimeTraceProfilerInstances; // guarded by Mu
32 } // namespace
33 
34 namespace llvm {
35 
36 thread_local std::unique_ptr<TimeTraceProfiler> TimeTraceProfilerInstance =
37     nullptr;
38 
39 typedef duration<steady_clock::rep, steady_clock::period> DurationType;
40 typedef time_point<steady_clock> TimePointType;
41 typedef std::pair<size_t, DurationType> CountAndDurationType;
42 typedef std::pair<std::string, CountAndDurationType>
43     NameAndCountAndDurationType;
44 
45 struct Entry {
46   const TimePointType Start;
47   TimePointType End;
48   const std::string Name;
49   const std::string Detail;
50 
51   Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt)
52       : Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
53         Detail(std::move(Dt)) {}
54 
55   // Calculate timings for FlameGraph. Cast time points to microsecond precision
56   // rather than casting duration. This avoid truncation issues causing inner
57   // scopes overruning outer scopes.
58   steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const {
59     return (time_point_cast<microseconds>(Start) -
60             time_point_cast<microseconds>(StartTime))
61         .count();
62   }
63 
64   steady_clock::rep getFlameGraphDurUs() const {
65     return (time_point_cast<microseconds>(End) -
66             time_point_cast<microseconds>(Start))
67         .count();
68   }
69 };
70 
71 struct TimeTraceProfiler {
72   TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "")
73       : StartTime(steady_clock::now()), ProcName(ProcName),
74         Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {}
75 
76   void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
77     Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name),
78                        Detail());
79   }
80 
81   void end() {
82     assert(!Stack.empty() && "Must call begin() first");
83     auto &E = Stack.back();
84     E.End = steady_clock::now();
85 
86     // Check that end times monotonically increase.
87     assert((Entries.empty() ||
88             (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >=
89              Entries.back().getFlameGraphStartUs(StartTime) +
90                  Entries.back().getFlameGraphDurUs())) &&
91            "TimeProfiler scope ended earlier than previous scope");
92 
93     // Calculate duration at full precision for overall counts.
94     DurationType Duration = E.End - E.Start;
95 
96     // Only include sections longer or equal to TimeTraceGranularity msec.
97     if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity)
98       Entries.emplace_back(E);
99 
100     // Track total time taken by each "name", but only the topmost levels of
101     // them; e.g. if there's a template instantiation that instantiates other
102     // templates from within, we only want to add the topmost one. "topmost"
103     // happens to be the ones that don't have any currently open entries above
104     // itself.
105     if (std::find_if(++Stack.rbegin(), Stack.rend(), [&](const Entry &Val) {
106           return Val.Name == E.Name;
107         }) == Stack.rend()) {
108       auto &CountAndTotal = CountAndTotalPerName[E.Name];
109       CountAndTotal.first++;
110       CountAndTotal.second += Duration;
111     }
112 
113     Stack.pop_back();
114   }
115 
116   // Write events from this TimeTraceProfilerInstance and
117   // ThreadTimeTraceProfilerInstances.
118   void Write(raw_pwrite_stream &OS) {
119     // Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
120     std::lock_guard<std::mutex> Lock(Mu);
121     assert(Stack.empty() &&
122            "All profiler sections should be ended when calling Write");
123     assert(std::all_of(ThreadTimeTraceProfilerInstances.begin(),
124                        ThreadTimeTraceProfilerInstances.end(),
125                        [](const auto &TTP) { return TTP->Stack.empty(); }) &&
126            "All profiler sections should be ended when calling Write");
127 
128     json::OStream J(OS);
129     J.objectBegin();
130     J.attributeBegin("traceEvents");
131     J.arrayBegin();
132 
133     // Emit all events for the main flame graph.
134     auto writeEvent = [&](auto &E, uint64_t Tid) {
135       auto StartUs = E.getFlameGraphStartUs(StartTime);
136       auto DurUs = E.getFlameGraphDurUs();
137 
138       J.object([&]{
139         J.attribute("pid", 1);
140         J.attribute("tid", int64_t(Tid));
141         J.attribute("ph", "X");
142         J.attribute("ts", StartUs);
143         J.attribute("dur", DurUs);
144         J.attribute("name", E.Name);
145         if (!E.Detail.empty()) {
146           J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
147         }
148       });
149     };
150     for (const auto &E : Entries) {
151       writeEvent(E, this->Tid);
152     }
153     for (const auto &TTP : ThreadTimeTraceProfilerInstances) {
154       for (const auto &E : TTP->Entries) {
155         writeEvent(E, TTP->Tid);
156       }
157     }
158 
159     // Emit totals by section name as additional "thread" events, sorted from
160     // longest one.
161     // Find highest used thread id.
162     uint64_t MaxTid = this->Tid;
163     for (const auto &TTP : ThreadTimeTraceProfilerInstances) {
164       MaxTid = std::max(MaxTid, TTP->Tid);
165     }
166 
167     // Combine all CountAndTotalPerName from threads into one.
168     StringMap<CountAndDurationType> AllCountAndTotalPerName;
169     auto combineStat = [&](auto &Stat) {
170       std::string Key = Stat.getKey();
171       auto Value = Stat.getValue();
172       auto &CountAndTotal = AllCountAndTotalPerName[Key];
173       CountAndTotal.first += Value.first;
174       CountAndTotal.second += Value.second;
175     };
176     for (const auto &Stat : CountAndTotalPerName) {
177       combineStat(Stat);
178     }
179     for (const auto &TTP : ThreadTimeTraceProfilerInstances) {
180       for (const auto &Stat : TTP->CountAndTotalPerName) {
181         combineStat(Stat);
182       }
183     }
184 
185     std::vector<NameAndCountAndDurationType> SortedTotals;
186     SortedTotals.reserve(AllCountAndTotalPerName.size());
187     for (const auto &Total : AllCountAndTotalPerName)
188       SortedTotals.emplace_back(Total.getKey(), Total.getValue());
189 
190     llvm::sort(SortedTotals.begin(), SortedTotals.end(),
191                [](const NameAndCountAndDurationType &A,
192                   const NameAndCountAndDurationType &B) {
193                  return A.second.second > B.second.second;
194                });
195 
196     // Report totals on separate threads of tracing file.
197     uint64_t TotalTid = MaxTid + 1;
198     for (const auto &Total : SortedTotals) {
199       auto DurUs = duration_cast<microseconds>(Total.second.second).count();
200       auto Count = AllCountAndTotalPerName[Total.first].first;
201 
202       J.object([&]{
203         J.attribute("pid", 1);
204         J.attribute("tid", int64_t(TotalTid));
205         J.attribute("ph", "X");
206         J.attribute("ts", 0);
207         J.attribute("dur", DurUs);
208         J.attribute("name", "Total " + Total.first);
209         J.attributeObject("args", [&] {
210           J.attribute("count", int64_t(Count));
211           J.attribute("avg ms", int64_t(DurUs / Count / 1000));
212         });
213       });
214 
215       ++TotalTid;
216     }
217 
218     // Emit metadata event with process name.
219     J.object([&] {
220       J.attribute("cat", "");
221       J.attribute("pid", 1);
222       J.attribute("tid", 0);
223       J.attribute("ts", 0);
224       J.attribute("ph", "M");
225       J.attribute("name", "process_name");
226       J.attributeObject("args", [&] { J.attribute("name", ProcName); });
227     });
228 
229     J.arrayEnd();
230     J.attributeEnd();
231     J.objectEnd();
232   }
233 
234   SmallVector<Entry, 16> Stack;
235   SmallVector<Entry, 128> Entries;
236   StringMap<CountAndDurationType> CountAndTotalPerName;
237   const TimePointType StartTime;
238   const std::string ProcName;
239   const uint64_t Tid;
240 
241   // Minimum time granularity (in microseconds)
242   const unsigned TimeTraceGranularity;
243 };
244 
245 void timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
246                                  StringRef ProcName) {
247   assert(TimeTraceProfilerInstance == nullptr &&
248          "Profiler should not be initialized");
249   TimeTraceProfilerInstance = std::make_unique<TimeTraceProfiler>(
250       TimeTraceGranularity, llvm::sys::path::filename(ProcName));
251 }
252 
253 // Removes all TimeTraceProfilerInstances.
254 void timeTraceProfilerCleanup() {
255   TimeTraceProfilerInstance.reset();
256   std::lock_guard<std::mutex> Lock(Mu);
257   ThreadTimeTraceProfilerInstances.clear();
258 }
259 
260 // Finish TimeTraceProfilerInstance on a worker thread.
261 // This doesn't remove the instance, just moves the pointer to global vector.
262 void timeTraceProfilerFinishThread() {
263   std::lock_guard<std::mutex> Lock(Mu);
264   ThreadTimeTraceProfilerInstances.push_back(
265       std::move(TimeTraceProfilerInstance));
266 }
267 
268 void timeTraceProfilerWrite(raw_pwrite_stream &OS) {
269   assert(TimeTraceProfilerInstance != nullptr &&
270          "Profiler object can't be null");
271   TimeTraceProfilerInstance->Write(OS);
272 }
273 
274 void timeTraceProfilerBegin(StringRef Name, StringRef Detail) {
275   if (TimeTraceProfilerInstance != nullptr)
276     TimeTraceProfilerInstance->begin(Name, [&]() { return Detail; });
277 }
278 
279 void timeTraceProfilerBegin(StringRef Name,
280                             llvm::function_ref<std::string()> Detail) {
281   if (TimeTraceProfilerInstance != nullptr)
282     TimeTraceProfilerInstance->begin(Name, Detail);
283 }
284 
285 void timeTraceProfilerEnd() {
286   if (TimeTraceProfilerInstance != nullptr)
287     TimeTraceProfilerInstance->end();
288 }
289 
290 } // namespace llvm
291