1 //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements hierarchical time profiler.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Support/TimeProfiler.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/StringMap.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/JSON.h"
18 #include "llvm/Support/Path.h"
19 #include "llvm/Support/Threading.h"
20 #include <algorithm>
21 #include <cassert>
22 #include <chrono>
23 #include <mutex>
24 #include <string>
25 #include <vector>
26 
27 using namespace std::chrono;
28 using namespace llvm;
29 
30 static std::mutex Mu;
31 // List of all instances
32 static std::vector<TimeTraceProfiler *>
33     ThreadTimeTraceProfilerInstances; // GUARDED_BY(Mu)
34 // Per Thread instance
35 static LLVM_THREAD_LOCAL TimeTraceProfiler *TimeTraceProfilerInstance = nullptr;
36 
37 TimeTraceProfiler *llvm::getTimeTraceProfilerInstance() {
38   return TimeTraceProfilerInstance;
39 }
40 
41 typedef duration<steady_clock::rep, steady_clock::period> DurationType;
42 typedef time_point<steady_clock> TimePointType;
43 typedef std::pair<size_t, DurationType> CountAndDurationType;
44 typedef std::pair<std::string, CountAndDurationType>
45     NameAndCountAndDurationType;
46 
47 namespace {
48 struct Entry {
49   const TimePointType Start;
50   TimePointType End;
51   const std::string Name;
52   const std::string Detail;
53 
54   Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt)
55       : Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
56         Detail(std::move(Dt)) {}
57 
58   // Calculate timings for FlameGraph. Cast time points to microsecond precision
59   // rather than casting duration. This avoid truncation issues causing inner
60   // scopes overruning outer scopes.
61   steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const {
62     return (time_point_cast<microseconds>(Start) -
63             time_point_cast<microseconds>(StartTime))
64         .count();
65   }
66 
67   steady_clock::rep getFlameGraphDurUs() const {
68     return (time_point_cast<microseconds>(End) -
69             time_point_cast<microseconds>(Start))
70         .count();
71   }
72 };
73 } // namespace
74 
75 struct llvm::TimeTraceProfiler {
76   TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "")
77       : StartTime(steady_clock::now()), ProcName(ProcName),
78         Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {}
79 
80   void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
81     Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name),
82                        Detail());
83   }
84 
85   void end() {
86     assert(!Stack.empty() && "Must call begin() first");
87     Entry &E = Stack.back();
88     E.End = steady_clock::now();
89 
90     // Check that end times monotonically increase.
91     assert((Entries.empty() ||
92             (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >=
93              Entries.back().getFlameGraphStartUs(StartTime) +
94                  Entries.back().getFlameGraphDurUs())) &&
95            "TimeProfiler scope ended earlier than previous scope");
96 
97     // Calculate duration at full precision for overall counts.
98     DurationType Duration = E.End - E.Start;
99 
100     // Only include sections longer or equal to TimeTraceGranularity msec.
101     if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity)
102       Entries.emplace_back(E);
103 
104     // Track total time taken by each "name", but only the topmost levels of
105     // them; e.g. if there's a template instantiation that instantiates other
106     // templates from within, we only want to add the topmost one. "topmost"
107     // happens to be the ones that don't have any currently open entries above
108     // itself.
109     if (std::find_if(++Stack.rbegin(), Stack.rend(), [&](const Entry &Val) {
110           return Val.Name == E.Name;
111         }) == Stack.rend()) {
112       auto &CountAndTotal = CountAndTotalPerName[E.Name];
113       CountAndTotal.first++;
114       CountAndTotal.second += Duration;
115     }
116 
117     Stack.pop_back();
118   }
119 
120   // Write events from this TimeTraceProfilerInstance and
121   // ThreadTimeTraceProfilerInstances.
122   void write(raw_pwrite_stream &OS) {
123     // Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
124     std::lock_guard<std::mutex> Lock(Mu);
125     assert(Stack.empty() &&
126            "All profiler sections should be ended when calling write");
127     assert(llvm::all_of(ThreadTimeTraceProfilerInstances,
128                         [](const auto &TTP) { return TTP->Stack.empty(); }) &&
129            "All profiler sections should be ended when calling write");
130 
131     json::OStream J(OS);
132     J.objectBegin();
133     J.attributeBegin("traceEvents");
134     J.arrayBegin();
135 
136     // Emit all events for the main flame graph.
137     auto writeEvent = [&](const auto &E, uint64_t Tid) {
138       auto StartUs = E.getFlameGraphStartUs(StartTime);
139       auto DurUs = E.getFlameGraphDurUs();
140 
141       J.object([&]{
142         J.attribute("pid", 1);
143         J.attribute("tid", int64_t(Tid));
144         J.attribute("ph", "X");
145         J.attribute("ts", StartUs);
146         J.attribute("dur", DurUs);
147         J.attribute("name", E.Name);
148         if (!E.Detail.empty()) {
149           J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
150         }
151       });
152     };
153     for (const Entry &E : Entries)
154       writeEvent(E, this->Tid);
155     for (const TimeTraceProfiler *TTP : ThreadTimeTraceProfilerInstances)
156       for (const Entry &E : TTP->Entries)
157         writeEvent(E, TTP->Tid);
158 
159     // Emit totals by section name as additional "thread" events, sorted from
160     // longest one.
161     // Find highest used thread id.
162     uint64_t MaxTid = this->Tid;
163     for (const TimeTraceProfiler *TTP : ThreadTimeTraceProfilerInstances)
164       MaxTid = std::max(MaxTid, TTP->Tid);
165 
166     // Combine all CountAndTotalPerName from threads into one.
167     StringMap<CountAndDurationType> AllCountAndTotalPerName;
168     auto combineStat = [&](const auto &Stat) {
169       StringRef Key = Stat.getKey();
170       auto Value = Stat.getValue();
171       auto &CountAndTotal = AllCountAndTotalPerName[Key];
172       CountAndTotal.first += Value.first;
173       CountAndTotal.second += Value.second;
174     };
175     for (const auto &Stat : CountAndTotalPerName)
176       combineStat(Stat);
177     for (const TimeTraceProfiler *TTP : ThreadTimeTraceProfilerInstances)
178       for (const auto &Stat : TTP->CountAndTotalPerName)
179         combineStat(Stat);
180 
181     std::vector<NameAndCountAndDurationType> SortedTotals;
182     SortedTotals.reserve(AllCountAndTotalPerName.size());
183     for (const auto &Total : AllCountAndTotalPerName)
184       SortedTotals.emplace_back(std::string(Total.getKey()), Total.getValue());
185 
186     llvm::sort(SortedTotals, [](const NameAndCountAndDurationType &A,
187                                 const NameAndCountAndDurationType &B) {
188       return A.second.second > B.second.second;
189     });
190 
191     // Report totals on separate threads of tracing file.
192     uint64_t TotalTid = MaxTid + 1;
193     for (const NameAndCountAndDurationType &Total : SortedTotals) {
194       auto DurUs = duration_cast<microseconds>(Total.second.second).count();
195       auto Count = AllCountAndTotalPerName[Total.first].first;
196 
197       J.object([&]{
198         J.attribute("pid", 1);
199         J.attribute("tid", int64_t(TotalTid));
200         J.attribute("ph", "X");
201         J.attribute("ts", 0);
202         J.attribute("dur", DurUs);
203         J.attribute("name", "Total " + Total.first);
204         J.attributeObject("args", [&] {
205           J.attribute("count", int64_t(Count));
206           J.attribute("avg ms", int64_t(DurUs / Count / 1000));
207         });
208       });
209 
210       ++TotalTid;
211     }
212 
213     // Emit metadata event with process name.
214     J.object([&] {
215       J.attribute("cat", "");
216       J.attribute("pid", 1);
217       J.attribute("tid", 0);
218       J.attribute("ts", 0);
219       J.attribute("ph", "M");
220       J.attribute("name", "process_name");
221       J.attributeObject("args", [&] { J.attribute("name", ProcName); });
222     });
223 
224     J.arrayEnd();
225     J.attributeEnd();
226     J.objectEnd();
227   }
228 
229   SmallVector<Entry, 16> Stack;
230   SmallVector<Entry, 128> Entries;
231   StringMap<CountAndDurationType> CountAndTotalPerName;
232   const TimePointType StartTime;
233   const std::string ProcName;
234   const uint64_t Tid;
235 
236   // Minimum time granularity (in microseconds)
237   const unsigned TimeTraceGranularity;
238 };
239 
240 void llvm::timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
241                                        StringRef ProcName) {
242   assert(TimeTraceProfilerInstance == nullptr &&
243          "Profiler should not be initialized");
244   TimeTraceProfilerInstance = new TimeTraceProfiler(
245       TimeTraceGranularity, llvm::sys::path::filename(ProcName));
246 }
247 
248 // Removes all TimeTraceProfilerInstances.
249 // Called from main thread.
250 void llvm::timeTraceProfilerCleanup() {
251   delete TimeTraceProfilerInstance;
252   std::lock_guard<std::mutex> Lock(Mu);
253   for (auto TTP : ThreadTimeTraceProfilerInstances)
254     delete TTP;
255   ThreadTimeTraceProfilerInstances.clear();
256 }
257 
258 // Finish TimeTraceProfilerInstance on a worker thread.
259 // This doesn't remove the instance, just moves the pointer to global vector.
260 void llvm::timeTraceProfilerFinishThread() {
261   std::lock_guard<std::mutex> Lock(Mu);
262   ThreadTimeTraceProfilerInstances.push_back(TimeTraceProfilerInstance);
263   TimeTraceProfilerInstance = nullptr;
264 }
265 
266 void llvm::timeTraceProfilerWrite(raw_pwrite_stream &OS) {
267   assert(TimeTraceProfilerInstance != nullptr &&
268          "Profiler object can't be null");
269   TimeTraceProfilerInstance->write(OS);
270 }
271 
272 Error llvm::timeTraceProfilerWrite(StringRef PreferredFileName,
273                                    StringRef FallbackFileName) {
274   assert(TimeTraceProfilerInstance != nullptr &&
275          "Profiler object can't be null");
276 
277   std::string Path = PreferredFileName.str();
278   if (Path.empty()) {
279     Path = FallbackFileName == "-" ? "out" : FallbackFileName.str();
280     Path += ".time-trace";
281   }
282 
283   std::error_code EC;
284   raw_fd_ostream OS(Path, EC, sys::fs::OF_Text);
285   if (EC)
286     return createStringError(EC, "Could not open " + Path);
287 
288   timeTraceProfilerWrite(OS);
289   return Error::success();
290 }
291 
292 void llvm::timeTraceProfilerBegin(StringRef Name, StringRef Detail) {
293   if (TimeTraceProfilerInstance != nullptr)
294     TimeTraceProfilerInstance->begin(std::string(Name),
295                                      [&]() { return std::string(Detail); });
296 }
297 
298 void llvm::timeTraceProfilerBegin(StringRef Name,
299                                   llvm::function_ref<std::string()> Detail) {
300   if (TimeTraceProfilerInstance != nullptr)
301     TimeTraceProfilerInstance->begin(std::string(Name), Detail);
302 }
303 
304 void llvm::timeTraceProfilerEnd() {
305   if (TimeTraceProfilerInstance != nullptr)
306     TimeTraceProfilerInstance->end();
307 }
308