1 //===-- PerfContextSwitchDecoder.cpp --======------------------------------===//
2 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
3 // See https://llvm.org/LICENSE.txt for license information.
4 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5 //
6 //===----------------------------------------------------------------------===//
7
8 #include "PerfContextSwitchDecoder.h"
9
10 using namespace lldb;
11 using namespace lldb_private;
12 using namespace lldb_private::trace_intel_pt;
13 using namespace llvm;
14
15 /// Copied from <linux/perf_event.h> to avoid depending on perf_event.h on
16 /// non-linux platforms.
17 /// \{
18 #define PERF_RECORD_MISC_SWITCH_OUT (1 << 13)
19
20 #define PERF_RECORD_LOST 2
21 #define PERF_RECORD_THROTTLE 5
22 #define PERF_RECORD_UNTHROTTLE 6
23 #define PERF_RECORD_LOST_SAMPLES 13
24 #define PERF_RECORD_SWITCH_CPU_WIDE 15
25 #define PERF_RECORD_MAX 19
26
27 struct perf_event_header {
28 uint32_t type;
29 uint16_t misc;
30 uint16_t size;
31
32 /// \return
33 /// An \a llvm::Error if the record looks obviously wrong, or \a
34 /// llvm::Error::success() otherwise.
SanityCheckperf_event_header35 Error SanityCheck() const {
36 // The following checks are based on visual inspection of the records and
37 // enums in
38 // https://elixir.bootlin.com/linux/v4.8/source/include/uapi/linux/perf_event.h
39 // See PERF_RECORD_MAX, PERF_RECORD_SWITCH and the data similar records
40 // hold.
41
42 // A record of too many uint64_t's or more should mean that the data is
43 // wrong
44 const uint64_t max_valid_size_bytes = 8000;
45 if (size == 0 || size > max_valid_size_bytes)
46 return createStringError(
47 inconvertibleErrorCode(),
48 formatv("A record of {0} bytes was found.", size));
49
50 // We add some numbers to PERF_RECORD_MAX because some systems might have
51 // custom records. In any case, we are looking only for abnormal data.
52 if (type >= PERF_RECORD_MAX + 100)
53 return createStringError(
54 inconvertibleErrorCode(),
55 formatv("Invalid record type {0} was found.", type));
56 return Error::success();
57 }
58
IsContextSwitchRecordperf_event_header59 bool IsContextSwitchRecord() const {
60 return type == PERF_RECORD_SWITCH_CPU_WIDE;
61 }
62
IsErrorRecordperf_event_header63 bool IsErrorRecord() const {
64 return type == PERF_RECORD_LOST || type == PERF_RECORD_THROTTLE ||
65 type == PERF_RECORD_UNTHROTTLE || type == PERF_RECORD_LOST_SAMPLES;
66 }
67 };
68 /// \}
69
70 /// Record found in the perf_event context switch traces. It might contain
71 /// additional fields in memory, but header.size should have the actual size
72 /// of the record.
73 struct PerfContextSwitchRecord {
74 struct perf_event_header header;
75 uint32_t next_prev_pid;
76 uint32_t next_prev_tid;
77 uint32_t pid, tid;
78 uint64_t time_in_nanos;
79
IsOutPerfContextSwitchRecord80 bool IsOut() const { return header.misc & PERF_RECORD_MISC_SWITCH_OUT; }
81 };
82
83 /// Record produced after parsing the raw context switch trace produce by
84 /// perf_event. A major difference between this struct and
85 /// PerfContextSwitchRecord is that this one uses tsc instead of nanos.
86 struct ContextSwitchRecord {
87 uint64_t tsc;
88 /// Whether the switch is in or out
89 bool is_out;
90 /// pid = 0 and tid = 0 indicate the swapper or idle process, which normally
91 /// runs after a context switch out of a normal user thread.
92 lldb::pid_t pid;
93 lldb::tid_t tid;
94
IsOutContextSwitchRecord95 bool IsOut() const { return is_out; }
96
IsInContextSwitchRecord97 bool IsIn() const { return !is_out; }
98 };
99
GetLowestKnownTSC() const100 uint64_t ThreadContinuousExecution::GetLowestKnownTSC() const {
101 switch (variant) {
102 case Variant::Complete:
103 return tscs.complete.start;
104 case Variant::OnlyStart:
105 return tscs.only_start.start;
106 case Variant::OnlyEnd:
107 return tscs.only_end.end;
108 case Variant::HintedEnd:
109 return tscs.hinted_end.start;
110 case Variant::HintedStart:
111 return tscs.hinted_start.end;
112 }
113 }
114
GetStartTSC() const115 uint64_t ThreadContinuousExecution::GetStartTSC() const {
116 switch (variant) {
117 case Variant::Complete:
118 return tscs.complete.start;
119 case Variant::OnlyStart:
120 return tscs.only_start.start;
121 case Variant::OnlyEnd:
122 return 0;
123 case Variant::HintedEnd:
124 return tscs.hinted_end.start;
125 case Variant::HintedStart:
126 return tscs.hinted_start.hinted_start;
127 }
128 }
129
GetEndTSC() const130 uint64_t ThreadContinuousExecution::GetEndTSC() const {
131 switch (variant) {
132 case Variant::Complete:
133 return tscs.complete.end;
134 case Variant::OnlyStart:
135 return std::numeric_limits<uint64_t>::max();
136 case Variant::OnlyEnd:
137 return tscs.only_end.end;
138 case Variant::HintedEnd:
139 return tscs.hinted_end.hinted_end;
140 case Variant::HintedStart:
141 return tscs.hinted_start.end;
142 }
143 }
144
CreateCompleteExecution(lldb::cpu_id_t cpu_id,lldb::tid_t tid,lldb::pid_t pid,uint64_t start,uint64_t end)145 ThreadContinuousExecution ThreadContinuousExecution::CreateCompleteExecution(
146 lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start,
147 uint64_t end) {
148 ThreadContinuousExecution o(cpu_id, tid, pid);
149 o.variant = Variant::Complete;
150 o.tscs.complete.start = start;
151 o.tscs.complete.end = end;
152 return o;
153 }
154
CreateHintedStartExecution(lldb::cpu_id_t cpu_id,lldb::tid_t tid,lldb::pid_t pid,uint64_t hinted_start,uint64_t end)155 ThreadContinuousExecution ThreadContinuousExecution::CreateHintedStartExecution(
156 lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid,
157 uint64_t hinted_start, uint64_t end) {
158 ThreadContinuousExecution o(cpu_id, tid, pid);
159 o.variant = Variant::HintedStart;
160 o.tscs.hinted_start.hinted_start = hinted_start;
161 o.tscs.hinted_start.end = end;
162 return o;
163 }
164
CreateHintedEndExecution(lldb::cpu_id_t cpu_id,lldb::tid_t tid,lldb::pid_t pid,uint64_t start,uint64_t hinted_end)165 ThreadContinuousExecution ThreadContinuousExecution::CreateHintedEndExecution(
166 lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start,
167 uint64_t hinted_end) {
168 ThreadContinuousExecution o(cpu_id, tid, pid);
169 o.variant = Variant::HintedEnd;
170 o.tscs.hinted_end.start = start;
171 o.tscs.hinted_end.hinted_end = hinted_end;
172 return o;
173 }
174
CreateOnlyEndExecution(lldb::cpu_id_t cpu_id,lldb::tid_t tid,lldb::pid_t pid,uint64_t end)175 ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyEndExecution(
176 lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t end) {
177 ThreadContinuousExecution o(cpu_id, tid, pid);
178 o.variant = Variant::OnlyEnd;
179 o.tscs.only_end.end = end;
180 return o;
181 }
182
CreateOnlyStartExecution(lldb::cpu_id_t cpu_id,lldb::tid_t tid,lldb::pid_t pid,uint64_t start)183 ThreadContinuousExecution ThreadContinuousExecution::CreateOnlyStartExecution(
184 lldb::cpu_id_t cpu_id, lldb::tid_t tid, lldb::pid_t pid, uint64_t start) {
185 ThreadContinuousExecution o(cpu_id, tid, pid);
186 o.variant = Variant::OnlyStart;
187 o.tscs.only_start.start = start;
188 return o;
189 }
190
RecoverExecutionsFromConsecutiveRecords(cpu_id_t cpu_id,const LinuxPerfZeroTscConversion & tsc_conversion,const ContextSwitchRecord & current_record,const Optional<ContextSwitchRecord> & prev_record,std::function<void (const ThreadContinuousExecution & execution)> on_new_execution)191 static Error RecoverExecutionsFromConsecutiveRecords(
192 cpu_id_t cpu_id, const LinuxPerfZeroTscConversion &tsc_conversion,
193 const ContextSwitchRecord ¤t_record,
194 const Optional<ContextSwitchRecord> &prev_record,
195 std::function<void(const ThreadContinuousExecution &execution)>
196 on_new_execution) {
197 if (!prev_record) {
198 if (current_record.IsOut()) {
199 on_new_execution(ThreadContinuousExecution::CreateOnlyEndExecution(
200 cpu_id, current_record.tid, current_record.pid, current_record.tsc));
201 }
202 // The 'in' case will be handled later when we try to look for its end
203 return Error::success();
204 }
205
206 const ContextSwitchRecord &prev = *prev_record;
207 if (prev.tsc >= current_record.tsc)
208 return createStringError(
209 inconvertibleErrorCode(),
210 formatv("A context switch record doesn't happen after the previous "
211 "record. Previous TSC= {0}, current TSC = {1}.",
212 prev.tsc, current_record.tsc));
213
214 if (current_record.IsIn() && prev.IsIn()) {
215 // We found two consecutive ins, which means that we didn't capture
216 // the end of the previous execution.
217 on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution(
218 cpu_id, prev.tid, prev.pid, prev.tsc, current_record.tsc - 1));
219 } else if (current_record.IsOut() && prev.IsOut()) {
220 // We found two consecutive outs, that means that we didn't capture
221 // the beginning of the current execution.
222 on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution(
223 cpu_id, current_record.tid, current_record.pid, prev.tsc + 1,
224 current_record.tsc));
225 } else if (current_record.IsOut() && prev.IsIn()) {
226 if (current_record.pid == prev.pid && current_record.tid == prev.tid) {
227 /// A complete execution
228 on_new_execution(ThreadContinuousExecution::CreateCompleteExecution(
229 cpu_id, current_record.tid, current_record.pid, prev.tsc,
230 current_record.tsc));
231 } else {
232 // An out after the in of a different thread. The first one doesn't
233 // have an end, and the second one doesn't have a start.
234 on_new_execution(ThreadContinuousExecution::CreateHintedEndExecution(
235 cpu_id, prev.tid, prev.pid, prev.tsc, current_record.tsc - 1));
236 on_new_execution(ThreadContinuousExecution::CreateHintedStartExecution(
237 cpu_id, current_record.tid, current_record.pid, prev.tsc + 1,
238 current_record.tsc));
239 }
240 }
241 return Error::success();
242 }
243
244 Expected<std::vector<ThreadContinuousExecution>>
DecodePerfContextSwitchTrace(ArrayRef<uint8_t> data,cpu_id_t cpu_id,const LinuxPerfZeroTscConversion & tsc_conversion)245 lldb_private::trace_intel_pt::DecodePerfContextSwitchTrace(
246 ArrayRef<uint8_t> data, cpu_id_t cpu_id,
247 const LinuxPerfZeroTscConversion &tsc_conversion) {
248
249 std::vector<ThreadContinuousExecution> executions;
250
251 // This offset is used to create the error message in case of failures.
252 size_t offset = 0;
253
254 auto do_decode = [&]() -> Error {
255 Optional<ContextSwitchRecord> prev_record;
256 while (offset < data.size()) {
257 const perf_event_header &perf_record =
258 *reinterpret_cast<const perf_event_header *>(data.data() + offset);
259 if (Error err = perf_record.SanityCheck())
260 return err;
261
262 if (perf_record.IsContextSwitchRecord()) {
263 const PerfContextSwitchRecord &context_switch_record =
264 *reinterpret_cast<const PerfContextSwitchRecord *>(data.data() +
265 offset);
266 ContextSwitchRecord record{
267 tsc_conversion.ToTSC(context_switch_record.time_in_nanos),
268 context_switch_record.IsOut(),
269 static_cast<lldb::pid_t>(context_switch_record.pid),
270 static_cast<lldb::tid_t>(context_switch_record.tid)};
271
272 if (Error err = RecoverExecutionsFromConsecutiveRecords(
273 cpu_id, tsc_conversion, record, prev_record,
274 [&](const ThreadContinuousExecution &execution) {
275 executions.push_back(execution);
276 }))
277 return err;
278
279 prev_record = record;
280 }
281 offset += perf_record.size;
282 }
283
284 // We might have an incomplete last record
285 if (prev_record && prev_record->IsIn())
286 executions.push_back(ThreadContinuousExecution::CreateOnlyStartExecution(
287 cpu_id, prev_record->tid, prev_record->pid, prev_record->tsc));
288 return Error::success();
289 };
290
291 if (Error err = do_decode())
292 return createStringError(inconvertibleErrorCode(),
293 formatv("Malformed perf context switch trace for "
294 "cpu {0} at offset {1}. {2}",
295 cpu_id, offset, toString(std::move(err))));
296
297 return executions;
298 }
299
300 Expected<std::vector<uint8_t>>
FilterProcessesFromContextSwitchTrace(llvm::ArrayRef<uint8_t> data,const std::set<lldb::pid_t> & pids)301 lldb_private::trace_intel_pt::FilterProcessesFromContextSwitchTrace(
302 llvm::ArrayRef<uint8_t> data, const std::set<lldb::pid_t> &pids) {
303 size_t offset = 0;
304 std::vector<uint8_t> out_data;
305
306 while (offset < data.size()) {
307 const perf_event_header &perf_record =
308 *reinterpret_cast<const perf_event_header *>(data.data() + offset);
309 if (Error err = perf_record.SanityCheck())
310 return std::move(err);
311 bool should_copy = false;
312 if (perf_record.IsContextSwitchRecord()) {
313 const PerfContextSwitchRecord &context_switch_record =
314 *reinterpret_cast<const PerfContextSwitchRecord *>(data.data() +
315 offset);
316 if (pids.count(context_switch_record.pid))
317 should_copy = true;
318 } else if (perf_record.IsErrorRecord()) {
319 should_copy = true;
320 }
321
322 if (should_copy) {
323 for (size_t i = 0; i < perf_record.size; i++) {
324 out_data.push_back(data[offset + i]);
325 }
326 }
327
328 offset += perf_record.size;
329 }
330 return out_data;
331 }
332