1 //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements hierarchical time profiler.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Support/TimeProfiler.h"
14 #include "llvm/ADT/StringMap.h"
15 #include "llvm/Support/CommandLine.h"
16 #include "llvm/Support/JSON.h"
17 #include "llvm/Support/Path.h"
18 #include "llvm/Support/Threading.h"
19 #include <algorithm>
20 #include <cassert>
21 #include <chrono>
22 #include <mutex>
23 #include <string>
24 #include <vector>
25 
26 using namespace std::chrono;
27 using namespace llvm;
28 
29 namespace {
30 std::mutex Mu;
31 // List of all instances
32 std::vector<TimeTraceProfiler *>
33     ThreadTimeTraceProfilerInstances; // guarded by Mu
34 // Per Thread instance
35 LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
36 } // namespace
37 
38 namespace llvm {
39 
40 TimeTraceProfiler *getTimeTraceProfilerInstance() {
41   return TimeTraceProfilerInstance;
42 }
43 
44 typedef duration<steady_clock::rep, steady_clock::period> DurationType;
45 typedef time_point<steady_clock> TimePointType;
46 typedef std::pair<size_t, DurationType> CountAndDurationType;
47 typedef std::pair<std::string, CountAndDurationType>
48     NameAndCountAndDurationType;
49 
50 struct Entry {
51   const TimePointType Start;
52   TimePointType End;
53   const std::string Name;
54   const std::string Detail;
55 
56   Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt)
57       : Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
58         Detail(std::move(Dt)) {}
59 
60   // Calculate timings for FlameGraph. Cast time points to microsecond precision
61   // rather than casting duration. This avoid truncation issues causing inner
62   // scopes overruning outer scopes.
63   steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const {
64     return (time_point_cast<microseconds>(Start) -
65             time_point_cast<microseconds>(StartTime))
66         .count();
67   }
68 
69   steady_clock::rep getFlameGraphDurUs() const {
70     return (time_point_cast<microseconds>(End) -
71             time_point_cast<microseconds>(Start))
72         .count();
73   }
74 };
75 
76 struct TimeTraceProfiler {
77   TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "")
78       : StartTime(steady_clock::now()), ProcName(ProcName),
79         Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {}
80 
81   void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
82     Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name),
83                        Detail());
84   }
85 
86   void end() {
87     assert(!Stack.empty() && "Must call begin() first");
88     auto &E = Stack.back();
89     E.End = steady_clock::now();
90 
91     // Check that end times monotonically increase.
92     assert((Entries.empty() ||
93             (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >=
94              Entries.back().getFlameGraphStartUs(StartTime) +
95                  Entries.back().getFlameGraphDurUs())) &&
96            "TimeProfiler scope ended earlier than previous scope");
97 
98     // Calculate duration at full precision for overall counts.
99     DurationType Duration = E.End - E.Start;
100 
101     // Only include sections longer or equal to TimeTraceGranularity msec.
102     if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity)
103       Entries.emplace_back(E);
104 
105     // Track total time taken by each "name", but only the topmost levels of
106     // them; e.g. if there's a template instantiation that instantiates other
107     // templates from within, we only want to add the topmost one. "topmost"
108     // happens to be the ones that don't have any currently open entries above
109     // itself.
110     if (std::find_if(++Stack.rbegin(), Stack.rend(), [&](const Entry &Val) {
111           return Val.Name == E.Name;
112         }) == Stack.rend()) {
113       auto &CountAndTotal = CountAndTotalPerName[E.Name];
114       CountAndTotal.first++;
115       CountAndTotal.second += Duration;
116     }
117 
118     Stack.pop_back();
119   }
120 
121   // Write events from this TimeTraceProfilerInstance and
122   // ThreadTimeTraceProfilerInstances.
123   void Write(raw_pwrite_stream &OS) {
124     // Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
125     std::lock_guard<std::mutex> Lock(Mu);
126     assert(Stack.empty() &&
127            "All profiler sections should be ended when calling Write");
128     assert(std::all_of(ThreadTimeTraceProfilerInstances.begin(),
129                        ThreadTimeTraceProfilerInstances.end(),
130                        [](const auto &TTP) { return TTP->Stack.empty(); }) &&
131            "All profiler sections should be ended when calling Write");
132 
133     json::OStream J(OS);
134     J.objectBegin();
135     J.attributeBegin("traceEvents");
136     J.arrayBegin();
137 
138     // Emit all events for the main flame graph.
139     auto writeEvent = [&](const auto &E, uint64_t Tid) {
140       auto StartUs = E.getFlameGraphStartUs(StartTime);
141       auto DurUs = E.getFlameGraphDurUs();
142 
143       J.object([&]{
144         J.attribute("pid", 1);
145         J.attribute("tid", int64_t(Tid));
146         J.attribute("ph", "X");
147         J.attribute("ts", StartUs);
148         J.attribute("dur", DurUs);
149         J.attribute("name", E.Name);
150         if (!E.Detail.empty()) {
151           J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
152         }
153       });
154     };
155     for (const auto &E : Entries) {
156       writeEvent(E, this->Tid);
157     }
158     for (const auto &TTP : ThreadTimeTraceProfilerInstances) {
159       for (const auto &E : TTP->Entries) {
160         writeEvent(E, TTP->Tid);
161       }
162     }
163 
164     // Emit totals by section name as additional "thread" events, sorted from
165     // longest one.
166     // Find highest used thread id.
167     uint64_t MaxTid = this->Tid;
168     for (const auto &TTP : ThreadTimeTraceProfilerInstances) {
169       MaxTid = std::max(MaxTid, TTP->Tid);
170     }
171 
172     // Combine all CountAndTotalPerName from threads into one.
173     StringMap<CountAndDurationType> AllCountAndTotalPerName;
174     auto combineStat = [&](const auto &Stat) {
175       StringRef Key = Stat.getKey();
176       auto Value = Stat.getValue();
177       auto &CountAndTotal = AllCountAndTotalPerName[Key];
178       CountAndTotal.first += Value.first;
179       CountAndTotal.second += Value.second;
180     };
181     for (const auto &Stat : CountAndTotalPerName) {
182       combineStat(Stat);
183     }
184     for (const auto &TTP : ThreadTimeTraceProfilerInstances) {
185       for (const auto &Stat : TTP->CountAndTotalPerName) {
186         combineStat(Stat);
187       }
188     }
189 
190     std::vector<NameAndCountAndDurationType> SortedTotals;
191     SortedTotals.reserve(AllCountAndTotalPerName.size());
192     for (const auto &Total : AllCountAndTotalPerName)
193       SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue());
194 
195     llvm::sort(SortedTotals.begin(), SortedTotals.end(),
196                [](const NameAndCountAndDurationType &A,
197                   const NameAndCountAndDurationType &B) {
198                  return A.second.second > B.second.second;
199                });
200 
201     // Report totals on separate threads of tracing file.
202     uint64_t TotalTid = MaxTid + 1;
203     for (const auto &Total : SortedTotals) {
204       auto DurUs = duration_cast<microseconds>(Total.second.second).count();
205       auto Count = AllCountAndTotalPerName[Total.first].first;
206 
207       J.object([&]{
208         J.attribute("pid", 1);
209         J.attribute("tid", int64_t(TotalTid));
210         J.attribute("ph", "X");
211         J.attribute("ts", 0);
212         J.attribute("dur", DurUs);
213         J.attribute("name", "Total " + Total.first);
214         J.attributeObject("args", [&] {
215           J.attribute("count", int64_t(Count));
216           J.attribute("avg ms", int64_t(DurUs / Count / 1000));
217         });
218       });
219 
220       ++TotalTid;
221     }
222 
223     // Emit metadata event with process name.
224     J.object([&] {
225       J.attribute("cat", "");
226       J.attribute("pid", 1);
227       J.attribute("tid", 0);
228       J.attribute("ts", 0);
229       J.attribute("ph", "M");
230       J.attribute("name", "process_name");
231       J.attributeObject("args", [&] { J.attribute("name", ProcName); });
232     });
233 
234     J.arrayEnd();
235     J.attributeEnd();
236     J.objectEnd();
237   }
238 
239   SmallVector<Entry, 16> Stack;
240   SmallVector<Entry, 128> Entries;
241   StringMap<CountAndDurationType> CountAndTotalPerName;
242   const TimePointType StartTime;
243   const std::string ProcName;
244   const uint64_t Tid;
245 
246   // Minimum time granularity (in microseconds)
247   const unsigned TimeTraceGranularity;
248 };
249 
250 void timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
251                                  StringRef ProcName) {
252   assert(TimeTraceProfilerInstance == nullptr &&
253          "Profiler should not be initialized");
254   TimeTraceProfilerInstance = new TimeTraceProfiler(
255       TimeTraceGranularity, llvm::sys::path::filename(ProcName));
256 }
257 
258 // Removes all TimeTraceProfilerInstances.
259 // Called from main thread.
260 void timeTraceProfilerCleanup() {
261   delete TimeTraceProfilerInstance;
262   std::lock_guard<std::mutex> Lock(Mu);
263   for (auto TTP : ThreadTimeTraceProfilerInstances)
264     delete TTP;
265   ThreadTimeTraceProfilerInstances.clear();
266 }
267 
268 // Finish TimeTraceProfilerInstance on a worker thread.
269 // This doesn't remove the instance, just moves the pointer to global vector.
270 void timeTraceProfilerFinishThread() {
271   std::lock_guard<std::mutex> Lock(Mu);
272   ThreadTimeTraceProfilerInstances.push_back(TimeTraceProfilerInstance);
273   TimeTraceProfilerInstance = nullptr;
274 }
275 
276 void timeTraceProfilerWrite(raw_pwrite_stream &OS) {
277   assert(TimeTraceProfilerInstance != nullptr &&
278          "Profiler object can't be null");
279   TimeTraceProfilerInstance->Write(OS);
280 }
281 
282 void timeTraceProfilerBegin(StringRef Name, StringRef Detail) {
283   if (TimeTraceProfilerInstance != nullptr)
284     TimeTraceProfilerInstance->begin(std::string(Name),
285                                      [&]() { return std::string(Detail); });
286 }
287 
288 void timeTraceProfilerBegin(StringRef Name,
289                             llvm::function_ref<std::string()> Detail) {
290   if (TimeTraceProfilerInstance != nullptr)
291     TimeTraceProfilerInstance->begin(std::string(Name), Detail);
292 }
293 
294 void timeTraceProfilerEnd() {
295   if (TimeTraceProfilerInstance != nullptr)
296     TimeTraceProfilerInstance->end();
297 }
298 
299 } // namespace llvm
300