1 //===-- TimeProfiler.cpp - Hierarchical Time Profiler ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements hierarchical time profiler.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Support/TimeProfiler.h"
14 #include "llvm/ADT/StringMap.h"
15 #include "llvm/Support/CommandLine.h"
16 #include "llvm/Support/JSON.h"
17 #include "llvm/Support/Path.h"
18 #include "llvm/Support/Threading.h"
19 #include <algorithm>
20 #include <cassert>
21 #include <chrono>
22 #include <mutex>
23 #include <string>
24 #include <vector>
25 
26 using namespace std::chrono;
27 
28 namespace {
29 std::mutex Mu;
30 std::vector<std::unique_ptr<llvm::TimeTraceProfiler>>
31     ThreadTimeTraceProfilerInstances; // guarded by Mu
32 } // namespace
33 
34 namespace llvm {
35 
36 LLVM_THREAD_LOCAL std::unique_ptr<TimeTraceProfiler> TimeTraceProfilerInstance;
37 
38 typedef duration<steady_clock::rep, steady_clock::period> DurationType;
39 typedef time_point<steady_clock> TimePointType;
40 typedef std::pair<size_t, DurationType> CountAndDurationType;
41 typedef std::pair<std::string, CountAndDurationType>
42     NameAndCountAndDurationType;
43 
44 struct Entry {
45   const TimePointType Start;
46   TimePointType End;
47   const std::string Name;
48   const std::string Detail;
49 
50   Entry(TimePointType &&S, TimePointType &&E, std::string &&N, std::string &&Dt)
51       : Start(std::move(S)), End(std::move(E)), Name(std::move(N)),
52         Detail(std::move(Dt)) {}
53 
54   // Calculate timings for FlameGraph. Cast time points to microsecond precision
55   // rather than casting duration. This avoid truncation issues causing inner
56   // scopes overruning outer scopes.
57   steady_clock::rep getFlameGraphStartUs(TimePointType StartTime) const {
58     return (time_point_cast<microseconds>(Start) -
59             time_point_cast<microseconds>(StartTime))
60         .count();
61   }
62 
63   steady_clock::rep getFlameGraphDurUs() const {
64     return (time_point_cast<microseconds>(End) -
65             time_point_cast<microseconds>(Start))
66         .count();
67   }
68 };
69 
70 struct TimeTraceProfiler {
71   TimeTraceProfiler(unsigned TimeTraceGranularity = 0, StringRef ProcName = "")
72       : StartTime(steady_clock::now()), ProcName(ProcName),
73         Tid(llvm::get_threadid()), TimeTraceGranularity(TimeTraceGranularity) {}
74 
75   void begin(std::string Name, llvm::function_ref<std::string()> Detail) {
76     Stack.emplace_back(steady_clock::now(), TimePointType(), std::move(Name),
77                        Detail());
78   }
79 
80   void end() {
81     assert(!Stack.empty() && "Must call begin() first");
82     auto &E = Stack.back();
83     E.End = steady_clock::now();
84 
85     // Check that end times monotonically increase.
86     assert((Entries.empty() ||
87             (E.getFlameGraphStartUs(StartTime) + E.getFlameGraphDurUs() >=
88              Entries.back().getFlameGraphStartUs(StartTime) +
89                  Entries.back().getFlameGraphDurUs())) &&
90            "TimeProfiler scope ended earlier than previous scope");
91 
92     // Calculate duration at full precision for overall counts.
93     DurationType Duration = E.End - E.Start;
94 
95     // Only include sections longer or equal to TimeTraceGranularity msec.
96     if (duration_cast<microseconds>(Duration).count() >= TimeTraceGranularity)
97       Entries.emplace_back(E);
98 
99     // Track total time taken by each "name", but only the topmost levels of
100     // them; e.g. if there's a template instantiation that instantiates other
101     // templates from within, we only want to add the topmost one. "topmost"
102     // happens to be the ones that don't have any currently open entries above
103     // itself.
104     if (std::find_if(++Stack.rbegin(), Stack.rend(), [&](const Entry &Val) {
105           return Val.Name == E.Name;
106         }) == Stack.rend()) {
107       auto &CountAndTotal = CountAndTotalPerName[E.Name];
108       CountAndTotal.first++;
109       CountAndTotal.second += Duration;
110     }
111 
112     Stack.pop_back();
113   }
114 
115   // Write events from this TimeTraceProfilerInstance and
116   // ThreadTimeTraceProfilerInstances.
117   void Write(raw_pwrite_stream &OS) {
118     // Acquire Mutex as reading ThreadTimeTraceProfilerInstances.
119     std::lock_guard<std::mutex> Lock(Mu);
120     assert(Stack.empty() &&
121            "All profiler sections should be ended when calling Write");
122     assert(std::all_of(ThreadTimeTraceProfilerInstances.begin(),
123                        ThreadTimeTraceProfilerInstances.end(),
124                        [](const auto &TTP) { return TTP->Stack.empty(); }) &&
125            "All profiler sections should be ended when calling Write");
126 
127     json::OStream J(OS);
128     J.objectBegin();
129     J.attributeBegin("traceEvents");
130     J.arrayBegin();
131 
132     // Emit all events for the main flame graph.
133     auto writeEvent = [&](auto &E, uint64_t Tid) {
134       auto StartUs = E.getFlameGraphStartUs(StartTime);
135       auto DurUs = E.getFlameGraphDurUs();
136 
137       J.object([&]{
138         J.attribute("pid", 1);
139         J.attribute("tid", int64_t(Tid));
140         J.attribute("ph", "X");
141         J.attribute("ts", StartUs);
142         J.attribute("dur", DurUs);
143         J.attribute("name", E.Name);
144         if (!E.Detail.empty()) {
145           J.attributeObject("args", [&] { J.attribute("detail", E.Detail); });
146         }
147       });
148     };
149     for (const auto &E : Entries) {
150       writeEvent(E, this->Tid);
151     }
152     for (const auto &TTP : ThreadTimeTraceProfilerInstances) {
153       for (const auto &E : TTP->Entries) {
154         writeEvent(E, TTP->Tid);
155       }
156     }
157 
158     // Emit totals by section name as additional "thread" events, sorted from
159     // longest one.
160     // Find highest used thread id.
161     uint64_t MaxTid = this->Tid;
162     for (const auto &TTP : ThreadTimeTraceProfilerInstances) {
163       MaxTid = std::max(MaxTid, TTP->Tid);
164     }
165 
166     // Combine all CountAndTotalPerName from threads into one.
167     StringMap<CountAndDurationType> AllCountAndTotalPerName;
168     auto combineStat = [&](auto &Stat) {
169       std::string Key = Stat.getKey();
170       auto Value = Stat.getValue();
171       auto &CountAndTotal = AllCountAndTotalPerName[Key];
172       CountAndTotal.first += Value.first;
173       CountAndTotal.second += Value.second;
174     };
175     for (const auto &Stat : CountAndTotalPerName) {
176       combineStat(Stat);
177     }
178     for (const auto &TTP : ThreadTimeTraceProfilerInstances) {
179       for (const auto &Stat : TTP->CountAndTotalPerName) {
180         combineStat(Stat);
181       }
182     }
183 
184     std::vector<NameAndCountAndDurationType> SortedTotals;
185     SortedTotals.reserve(AllCountAndTotalPerName.size());
186     for (const auto &Total : AllCountAndTotalPerName)
187       SortedTotals.emplace_back(Total.getKey(), Total.getValue());
188 
189     llvm::sort(SortedTotals.begin(), SortedTotals.end(),
190                [](const NameAndCountAndDurationType &A,
191                   const NameAndCountAndDurationType &B) {
192                  return A.second.second > B.second.second;
193                });
194 
195     // Report totals on separate threads of tracing file.
196     uint64_t TotalTid = MaxTid + 1;
197     for (const auto &Total : SortedTotals) {
198       auto DurUs = duration_cast<microseconds>(Total.second.second).count();
199       auto Count = AllCountAndTotalPerName[Total.first].first;
200 
201       J.object([&]{
202         J.attribute("pid", 1);
203         J.attribute("tid", int64_t(TotalTid));
204         J.attribute("ph", "X");
205         J.attribute("ts", 0);
206         J.attribute("dur", DurUs);
207         J.attribute("name", "Total " + Total.first);
208         J.attributeObject("args", [&] {
209           J.attribute("count", int64_t(Count));
210           J.attribute("avg ms", int64_t(DurUs / Count / 1000));
211         });
212       });
213 
214       ++TotalTid;
215     }
216 
217     // Emit metadata event with process name.
218     J.object([&] {
219       J.attribute("cat", "");
220       J.attribute("pid", 1);
221       J.attribute("tid", 0);
222       J.attribute("ts", 0);
223       J.attribute("ph", "M");
224       J.attribute("name", "process_name");
225       J.attributeObject("args", [&] { J.attribute("name", ProcName); });
226     });
227 
228     J.arrayEnd();
229     J.attributeEnd();
230     J.objectEnd();
231   }
232 
233   SmallVector<Entry, 16> Stack;
234   SmallVector<Entry, 128> Entries;
235   StringMap<CountAndDurationType> CountAndTotalPerName;
236   const TimePointType StartTime;
237   const std::string ProcName;
238   const uint64_t Tid;
239 
240   // Minimum time granularity (in microseconds)
241   const unsigned TimeTraceGranularity;
242 };
243 
244 void timeTraceProfilerInitialize(unsigned TimeTraceGranularity,
245                                  StringRef ProcName) {
246   assert(TimeTraceProfilerInstance == nullptr &&
247          "Profiler should not be initialized");
248   TimeTraceProfilerInstance = std::make_unique<TimeTraceProfiler>(
249       TimeTraceGranularity, llvm::sys::path::filename(ProcName));
250 }
251 
252 // Removes all TimeTraceProfilerInstances.
253 void timeTraceProfilerCleanup() {
254   TimeTraceProfilerInstance.reset();
255   std::lock_guard<std::mutex> Lock(Mu);
256   ThreadTimeTraceProfilerInstances.clear();
257 }
258 
259 // Finish TimeTraceProfilerInstance on a worker thread.
260 // This doesn't remove the instance, just moves the pointer to global vector.
261 void timeTraceProfilerFinishThread() {
262   std::lock_guard<std::mutex> Lock(Mu);
263   ThreadTimeTraceProfilerInstances.push_back(
264       std::move(TimeTraceProfilerInstance));
265 }
266 
267 void timeTraceProfilerWrite(raw_pwrite_stream &OS) {
268   assert(TimeTraceProfilerInstance != nullptr &&
269          "Profiler object can't be null");
270   TimeTraceProfilerInstance->Write(OS);
271 }
272 
273 void timeTraceProfilerBegin(StringRef Name, StringRef Detail) {
274   if (TimeTraceProfilerInstance != nullptr)
275     TimeTraceProfilerInstance->begin(Name, [&]() { return Detail; });
276 }
277 
278 void timeTraceProfilerBegin(StringRef Name,
279                             llvm::function_ref<std::string()> Detail) {
280   if (TimeTraceProfilerInstance != nullptr)
281     TimeTraceProfilerInstance->begin(Name, Detail);
282 }
283 
284 void timeTraceProfilerEnd() {
285   if (TimeTraceProfilerInstance != nullptr)
286     TimeTraceProfilerInstance->end();
287 }
288 
289 } // namespace llvm
290