xref: /llvm-project-15.0.7/llvm/lib/XRay/Trace.cpp (revision ed2f9a60)
1 //===- Trace.cpp - XRay Trace Loading implementation. ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // XRay log reader implementation.
11 //
12 //===----------------------------------------------------------------------===//
13 #include "llvm/XRay/Trace.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/Support/DataExtractor.h"
16 #include "llvm/Support/Error.h"
17 #include "llvm/Support/FileSystem.h"
18 #include "llvm/XRay/YAMLXRayRecord.h"
19 
20 using namespace llvm;
21 using namespace llvm::xray;
22 using llvm::yaml::Input;
23 
24 namespace {
25 using XRayRecordStorage =
26     std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type;
27 
28 // This is the number of bytes in the "body" of a MetadataRecord in FDR Mode.
29 // This already excludes the first byte, which indicates the type of metadata
30 // record it is.
31 constexpr auto kFDRMetadataBodySize = 15;
32 
33 // Populates the FileHeader reference by reading the first 32 bytes of the file.
34 Error readBinaryFormatHeader(DataExtractor &HeaderExtractor,
35                              uint32_t &OffsetPtr, XRayFileHeader &FileHeader) {
36   // FIXME: Maybe deduce whether the data is little or big-endian using some
37   // magic bytes in the beginning of the file?
38 
39   // First 32 bytes of the file will always be the header. We assume a certain
40   // format here:
41   //
42   //   (2)   uint16 : version
43   //   (2)   uint16 : type
44   //   (4)   uint32 : bitfield
45   //   (8)   uint64 : cycle frequency
46   //   (16)  -      : padding
47 
48   auto PreReadOffset = OffsetPtr;
49   FileHeader.Version = HeaderExtractor.getU16(&OffsetPtr);
50   if (OffsetPtr == PreReadOffset)
51     return createStringError(
52         std::make_error_code(std::errc::invalid_argument),
53         "Failed reading version from file header at offset %d.", OffsetPtr);
54 
55   PreReadOffset = OffsetPtr;
56   FileHeader.Type = HeaderExtractor.getU16(&OffsetPtr);
57   if (OffsetPtr == PreReadOffset)
58     return createStringError(
59         std::make_error_code(std::errc::invalid_argument),
60         "Failed reading file type from file header at offset %d.", OffsetPtr);
61 
62   PreReadOffset = OffsetPtr;
63   uint32_t Bitfield = HeaderExtractor.getU32(&OffsetPtr);
64   if (OffsetPtr == PreReadOffset)
65     return createStringError(
66         std::make_error_code(std::errc::invalid_argument),
67         "Failed reading flag bits from file header at offset %d.", OffsetPtr);
68 
69   FileHeader.ConstantTSC = Bitfield & 1uL;
70   FileHeader.NonstopTSC = Bitfield & 1uL << 1;
71   PreReadOffset = OffsetPtr;
72   FileHeader.CycleFrequency = HeaderExtractor.getU64(&OffsetPtr);
73   if (OffsetPtr == PreReadOffset)
74     return createStringError(
75         std::make_error_code(std::errc::invalid_argument),
76         "Failed reading cycle frequency from file header at offset %d.",
77         OffsetPtr);
78 
79   std::memcpy(&FileHeader.FreeFormData,
80               HeaderExtractor.getData().bytes_begin() + OffsetPtr, 16);
81 
82   // Manually advance the offset pointer 16 bytes, after getting a raw memcpy
83   // from the underlying data.
84   OffsetPtr += 16;
85   if (FileHeader.Version != 1 && FileHeader.Version != 2 &&
86       FileHeader.Version != 3)
87     return createStringError(std::make_error_code(std::errc::invalid_argument),
88                              "Unsupported XRay file version: %d at offset %d",
89                              FileHeader.Version, OffsetPtr);
90   return Error::success();
91 }
92 
93 Error loadNaiveFormatLog(StringRef Data, XRayFileHeader &FileHeader,
94                          std::vector<XRayRecord> &Records) {
95   if (Data.size() < 32)
96     return make_error<StringError>(
97         "Not enough bytes for an XRay log.",
98         std::make_error_code(std::errc::invalid_argument));
99 
100   if (Data.size() - 32 == 0 || Data.size() % 32 != 0)
101     return make_error<StringError>(
102         "Invalid-sized XRay data.",
103         std::make_error_code(std::errc::invalid_argument));
104 
105   DataExtractor Reader(Data, true, 8);
106   uint32_t OffsetPtr = 0;
107   if (auto E = readBinaryFormatHeader(Reader, OffsetPtr, FileHeader))
108     return E;
109 
110   // Each record after the header will be 32 bytes, in the following format:
111   //
112   //   (2)   uint16 : record type
113   //   (1)   uint8  : cpu id
114   //   (1)   uint8  : type
115   //   (4)   sint32 : function id
116   //   (8)   uint64 : tsc
117   //   (4)   uint32 : thread id
118   //   (4)   uint32 : process id
119   //   (8)   -      : padding
120   while (Reader.isValidOffset(OffsetPtr)) {
121     if (!Reader.isValidOffsetForDataOfSize(OffsetPtr, 32))
122       return createStringError(
123           std::make_error_code(std::errc::executable_format_error),
124           "Not enough bytes to read a full record at offset %d.", OffsetPtr);
125     auto PreReadOffset = OffsetPtr;
126     auto RecordType = Reader.getU16(&OffsetPtr);
127     if (OffsetPtr == PreReadOffset)
128       return createStringError(
129           std::make_error_code(std::errc::executable_format_error),
130           "Failed reading record type at offset %d.", OffsetPtr);
131 
132     switch (RecordType) {
133     case 0: { // Normal records.
134       Records.emplace_back();
135       auto &Record = Records.back();
136       Record.RecordType = RecordType;
137 
138       PreReadOffset = OffsetPtr;
139       Record.CPU = Reader.getU8(&OffsetPtr);
140       if (OffsetPtr == PreReadOffset)
141         return createStringError(
142             std::make_error_code(std::errc::executable_format_error),
143             "Failed reading CPU field at offset %d.", OffsetPtr);
144 
145       PreReadOffset = OffsetPtr;
146       auto Type = Reader.getU8(&OffsetPtr);
147       if (OffsetPtr == PreReadOffset)
148         return createStringError(
149             std::make_error_code(std::errc::executable_format_error),
150             "Failed reading record type field at offset %d.", OffsetPtr);
151 
152       switch (Type) {
153       case 0:
154         Record.Type = RecordTypes::ENTER;
155         break;
156       case 1:
157         Record.Type = RecordTypes::EXIT;
158         break;
159       case 2:
160         Record.Type = RecordTypes::TAIL_EXIT;
161         break;
162       case 3:
163         Record.Type = RecordTypes::ENTER_ARG;
164         break;
165       default:
166         return createStringError(
167             std::make_error_code(std::errc::executable_format_error),
168             "Unknown record type '%d' at offset %d.", Type, OffsetPtr);
169       }
170 
171       PreReadOffset = OffsetPtr;
172       Record.FuncId = Reader.getSigned(&OffsetPtr, sizeof(int32_t));
173       if (OffsetPtr == PreReadOffset)
174         return createStringError(
175             std::make_error_code(std::errc::executable_format_error),
176             "Failed reading function id field at offset %d.", OffsetPtr);
177 
178       PreReadOffset = OffsetPtr;
179       Record.TSC = Reader.getU64(&OffsetPtr);
180       if (OffsetPtr == PreReadOffset)
181         return createStringError(
182             std::make_error_code(std::errc::executable_format_error),
183             "Failed reading TSC field at offset %d.", OffsetPtr);
184 
185       PreReadOffset = OffsetPtr;
186       Record.TId = Reader.getU32(&OffsetPtr);
187       if (OffsetPtr == PreReadOffset)
188         return createStringError(
189             std::make_error_code(std::errc::executable_format_error),
190             "Failed reading thread id field at offset %d.", OffsetPtr);
191 
192       PreReadOffset = OffsetPtr;
193       Record.PId = Reader.getU32(&OffsetPtr);
194       if (OffsetPtr == PreReadOffset)
195         return createStringError(
196             std::make_error_code(std::errc::executable_format_error),
197             "Failed reading process id at offset %d.", OffsetPtr);
198 
199       break;
200     }
201     case 1: { // Arg payload record.
202       auto &Record = Records.back();
203 
204       // We skip the next two bytes of the record, because we don't need the
205       // type and the CPU record for arg payloads.
206       OffsetPtr += 2;
207       PreReadOffset = OffsetPtr;
208       int32_t FuncId = Reader.getSigned(&OffsetPtr, sizeof(int32_t));
209       if (OffsetPtr == PreReadOffset)
210         return createStringError(
211             std::make_error_code(std::errc::executable_format_error),
212             "Failed reading function id field at offset %d.", OffsetPtr);
213 
214       PreReadOffset = OffsetPtr;
215       auto TId = Reader.getU32(&OffsetPtr);
216       if (OffsetPtr == PreReadOffset)
217         return createStringError(
218             std::make_error_code(std::errc::executable_format_error),
219             "Failed reading thread id field at offset %d.", OffsetPtr);
220 
221       PreReadOffset = OffsetPtr;
222       auto PId = Reader.getU32(&OffsetPtr);
223       if (OffsetPtr == PreReadOffset)
224         return createStringError(
225             std::make_error_code(std::errc::executable_format_error),
226             "Failed reading process id field at offset %d.", OffsetPtr);
227 
228       // Make a check for versions above 3 for the Pid field
229       if (Record.FuncId != FuncId || Record.TId != TId ||
230           (FileHeader.Version >= 3 ? Record.PId != PId : false))
231         return createStringError(
232             std::make_error_code(std::errc::executable_format_error),
233             "Corrupted log, found arg payload following non-matching "
234             "function+thread record. Record for function %d != %d at offset "
235             "%d",
236             Record.FuncId, FuncId, OffsetPtr);
237 
238       PreReadOffset = OffsetPtr;
239       auto Arg = Reader.getU64(&OffsetPtr);
240       if (OffsetPtr == PreReadOffset)
241         return createStringError(
242             std::make_error_code(std::errc::executable_format_error),
243             "Failed reading argument payload at offset %d.", OffsetPtr);
244 
245       Record.CallArgs.push_back(Arg);
246       break;
247     }
248     default:
249       return createStringError(
250           std::make_error_code(std::errc::executable_format_error),
251           "Unknown record type '%d' at offset %d.", RecordType, OffsetPtr);
252     }
253     // Advance the offset pointer enough bytes to align to 32-byte records for
254     // basic mode logs.
255     OffsetPtr += 8;
256   }
257   return Error::success();
258 }
259 
260 /// When reading from a Flight Data Recorder mode log, metadata records are
261 /// sparse compared to packed function records, so we must maintain state as we
262 /// read through the sequence of entries. This allows the reader to denormalize
263 /// the CPUId and Thread Id onto each Function Record and transform delta
264 /// encoded TSC values into absolute encodings on each record.
265 struct FDRState {
266   uint16_t CPUId;
267   uint16_t ThreadId;
268   int32_t ProcessId;
269   uint64_t BaseTSC;
270 
271   /// Encode some of the state transitions for the FDR log reader as explicit
272   /// checks. These are expectations for the next Record in the stream.
273   enum class Token {
274     NEW_BUFFER_RECORD_OR_EOF,
275     WALLCLOCK_RECORD,
276     NEW_CPU_ID_RECORD,
277     FUNCTION_SEQUENCE,
278     SCAN_TO_END_OF_THREAD_BUF,
279     CUSTOM_EVENT_DATA,
280     CALL_ARGUMENT,
281     BUFFER_EXTENTS,
282     PID_RECORD,
283   };
284   Token Expects;
285 
286   // Each threads buffer may have trailing garbage to scan over, so we track our
287   // progress.
288   uint64_t CurrentBufferSize;
289   uint64_t CurrentBufferConsumed;
290 };
291 
292 const char *fdrStateToTwine(const FDRState::Token &state) {
293   switch (state) {
294   case FDRState::Token::NEW_BUFFER_RECORD_OR_EOF:
295     return "NEW_BUFFER_RECORD_OR_EOF";
296   case FDRState::Token::WALLCLOCK_RECORD:
297     return "WALLCLOCK_RECORD";
298   case FDRState::Token::NEW_CPU_ID_RECORD:
299     return "NEW_CPU_ID_RECORD";
300   case FDRState::Token::FUNCTION_SEQUENCE:
301     return "FUNCTION_SEQUENCE";
302   case FDRState::Token::SCAN_TO_END_OF_THREAD_BUF:
303     return "SCAN_TO_END_OF_THREAD_BUF";
304   case FDRState::Token::CUSTOM_EVENT_DATA:
305     return "CUSTOM_EVENT_DATA";
306   case FDRState::Token::CALL_ARGUMENT:
307     return "CALL_ARGUMENT";
308   case FDRState::Token::BUFFER_EXTENTS:
309     return "BUFFER_EXTENTS";
310   case FDRState::Token::PID_RECORD:
311     return "PID_RECORD";
312   }
313   return "UNKNOWN";
314 }
315 
316 /// State transition when a NewBufferRecord is encountered.
317 Error processFDRNewBufferRecord(FDRState &State, DataExtractor &RecordExtractor,
318                                 uint32_t &OffsetPtr) {
319   if (State.Expects != FDRState::Token::NEW_BUFFER_RECORD_OR_EOF)
320     return createStringError(
321         std::make_error_code(std::errc::executable_format_error),
322         "Malformed log: Read New Buffer record kind out of sequence; expected: "
323         "%s at offset %d.",
324         fdrStateToTwine(State.Expects), OffsetPtr);
325 
326   auto PreReadOffset = OffsetPtr;
327   State.ThreadId = RecordExtractor.getU16(&OffsetPtr);
328   if (OffsetPtr == PreReadOffset)
329     return createStringError(
330         std::make_error_code(std::errc::executable_format_error),
331         "Failed reading the thread id at offset %d.", OffsetPtr);
332   State.Expects = FDRState::Token::WALLCLOCK_RECORD;
333 
334   // Advance the offset pointer by enough bytes representing the remaining
335   // padding in a metadata record.
336   OffsetPtr += kFDRMetadataBodySize - 2;
337   assert(OffsetPtr - PreReadOffset == kFDRMetadataBodySize);
338   return Error::success();
339 }
340 
341 /// State transition when an EndOfBufferRecord is encountered.
342 Error processFDREndOfBufferRecord(FDRState &State, uint32_t &OffsetPtr) {
343   if (State.Expects == FDRState::Token::NEW_BUFFER_RECORD_OR_EOF)
344     return createStringError(
345         std::make_error_code(std::errc::executable_format_error),
346         "Malformed log: Received EOB message without current buffer; expected: "
347         "%s at offset %d.",
348         fdrStateToTwine(State.Expects), OffsetPtr);
349 
350   State.Expects = FDRState::Token::SCAN_TO_END_OF_THREAD_BUF;
351 
352   // Advance the offset pointer by enough bytes representing the remaining
353   // padding in a metadata record.
354   OffsetPtr += kFDRMetadataBodySize;
355   return Error::success();
356 }
357 
358 /// State transition when a NewCPUIdRecord is encountered.
359 Error processFDRNewCPUIdRecord(FDRState &State, DataExtractor &RecordExtractor,
360                                uint32_t &OffsetPtr) {
361   if (State.Expects != FDRState::Token::FUNCTION_SEQUENCE &&
362       State.Expects != FDRState::Token::NEW_CPU_ID_RECORD)
363     return make_error<StringError>(
364         Twine("Malformed log. Read NewCPUId record kind out of sequence; "
365               "expected: ") +
366             fdrStateToTwine(State.Expects),
367         std::make_error_code(std::errc::executable_format_error));
368   auto BeginOffset = OffsetPtr;
369   auto PreReadOffset = OffsetPtr;
370   State.CPUId = RecordExtractor.getU16(&OffsetPtr);
371   if (OffsetPtr == PreReadOffset)
372     return createStringError(
373         std::make_error_code(std::errc::executable_format_error),
374         "Failed reading the CPU field at offset %d.", OffsetPtr);
375 
376   PreReadOffset = OffsetPtr;
377   State.BaseTSC = RecordExtractor.getU64(&OffsetPtr);
378   if (OffsetPtr == PreReadOffset)
379     return createStringError(
380         std::make_error_code(std::errc::executable_format_error),
381         "Failed reading the base TSC field at offset %d.", OffsetPtr);
382 
383   State.Expects = FDRState::Token::FUNCTION_SEQUENCE;
384 
385   // Advance the offset pointer by a few bytes, to account for the padding in
386   // CPU ID metadata records that we've already advanced through.
387   OffsetPtr += kFDRMetadataBodySize - (OffsetPtr - BeginOffset);
388   assert(OffsetPtr - BeginOffset == kFDRMetadataBodySize);
389   return Error::success();
390 }
391 
392 /// State transition when a TSCWrapRecord (overflow detection) is encountered.
393 Error processFDRTSCWrapRecord(FDRState &State, DataExtractor &RecordExtractor,
394                               uint32_t &OffsetPtr) {
395   if (State.Expects != FDRState::Token::FUNCTION_SEQUENCE)
396     return make_error<StringError>(
397         Twine("Malformed log. Read TSCWrap record kind out of sequence; "
398               "expecting: ") +
399             fdrStateToTwine(State.Expects),
400         std::make_error_code(std::errc::executable_format_error));
401   auto PreReadOffset = OffsetPtr;
402   State.BaseTSC = RecordExtractor.getU64(&OffsetPtr);
403   if (OffsetPtr == PreReadOffset)
404     return createStringError(
405         std::make_error_code(std::errc::executable_format_error),
406         "Failed reading the base TSC field at offset %d.", OffsetPtr);
407 
408   // Advance the offset pointer by a few more bytes, accounting for the padding
409   // in the metadata record after reading the base TSC.
410   OffsetPtr += kFDRMetadataBodySize - 8;
411   assert(OffsetPtr - PreReadOffset == kFDRMetadataBodySize);
412   return Error::success();
413 }
414 
415 /// State transition when a WallTimeMarkerRecord is encountered.
416 Error processFDRWallTimeRecord(FDRState &State, DataExtractor &RecordExtractor,
417                                uint32_t &OffsetPtr) {
418   if (State.Expects != FDRState::Token::WALLCLOCK_RECORD)
419     return make_error<StringError>(
420         Twine("Malformed log. Read Wallclock record kind out of sequence; "
421               "expecting: ") +
422             fdrStateToTwine(State.Expects),
423         std::make_error_code(std::errc::executable_format_error));
424 
425   // Read in the data from the walltime record.
426   auto PreReadOffset = OffsetPtr;
427   auto WallTime = RecordExtractor.getU64(&OffsetPtr);
428   if (OffsetPtr == PreReadOffset)
429     return createStringError(
430         std::make_error_code(std::errc::executable_format_error),
431         "Failed reading the walltime record at offset %d.", OffsetPtr);
432 
433   // TODO: Someday, reconcile the TSC ticks to wall clock time for presentation
434   // purposes. For now, we're ignoring these records.
435   (void)WallTime;
436   State.Expects = FDRState::Token::NEW_CPU_ID_RECORD;
437 
438   // Advance the offset pointer by a few more bytes, accounting for the padding
439   // in the metadata record after reading in the walltime data.
440   OffsetPtr += kFDRMetadataBodySize - 8;
441   assert(OffsetPtr - PreReadOffset == kFDRMetadataBodySize);
442   return Error::success();
443 }
444 
445 /// State transition when a PidRecord is encountered.
446 Error processFDRPidRecord(FDRState &State, DataExtractor &RecordExtractor,
447                           uint32_t &OffsetPtr) {
448   if (State.Expects != FDRState::Token::PID_RECORD)
449     return make_error<StringError>(
450         Twine("Malformed log. Read Pid record kind out of sequence; "
451               "expected: ") +
452             fdrStateToTwine(State.Expects),
453         std::make_error_code(std::errc::executable_format_error));
454   auto PreReadOffset = OffsetPtr;
455   State.ProcessId = RecordExtractor.getU32(&OffsetPtr);
456   if (OffsetPtr == PreReadOffset)
457     return createStringError(
458         std::make_error_code(std::errc::executable_format_error),
459         "Failed reading the process ID at offset %d.", OffsetPtr);
460   State.Expects = FDRState::Token::NEW_CPU_ID_RECORD;
461 
462   // Advance the offset pointer by a few more bytes, accounting for the padding
463   // in the metadata record after reading in the PID.
464   OffsetPtr += kFDRMetadataBodySize - 4;
465   assert(OffsetPtr - PreReadOffset == kFDRMetadataBodySize);
466   return Error::success();
467 }
468 
469 /// State transition when a CustomEventMarker is encountered.
470 Error processCustomEventMarker(FDRState &State, DataExtractor &RecordExtractor,
471                                uint32_t &OffsetPtr) {
472   // We can encounter a CustomEventMarker anywhere in the log, so we can handle
473   // it regardless of the expectation. However, we do set the expectation to
474   // read a set number of fixed bytes, as described in the metadata.
475   auto BeginOffset = OffsetPtr;
476   auto PreReadOffset = OffsetPtr;
477   uint32_t DataSize = RecordExtractor.getU32(&OffsetPtr);
478   if (OffsetPtr == PreReadOffset)
479     return createStringError(
480         std::make_error_code(std::errc::executable_format_error),
481         "Failed reading a custom event marker at offset %d.", OffsetPtr);
482 
483   PreReadOffset = OffsetPtr;
484   uint64_t TSC = RecordExtractor.getU64(&OffsetPtr);
485   if (OffsetPtr == PreReadOffset)
486     return createStringError(
487         std::make_error_code(std::errc::executable_format_error),
488         "Failed reading the TSC at offset %d.", OffsetPtr);
489 
490   // FIXME: Actually represent the record through the API. For now we only
491   // skip through the data.
492   (void)TSC;
493   // Advance the offset ptr by the size of the data associated with the custom
494   // event, as well as the padding associated with the remainder of the metadata
495   // record.
496   OffsetPtr += (kFDRMetadataBodySize - (OffsetPtr - BeginOffset)) + DataSize;
497   if (!RecordExtractor.isValidOffset(OffsetPtr))
498     return createStringError(
499         std::make_error_code(std::errc::executable_format_error),
500         "Reading custom event data moves past addressable trace data (starting "
501         "at offset %d, advancing to offset %d).",
502         BeginOffset, OffsetPtr);
503   return Error::success();
504 }
505 
506 /// State transition when an BufferExtents record is encountered.
507 Error processBufferExtents(FDRState &State, DataExtractor &RecordExtractor,
508                            uint32_t &OffsetPtr) {
509   if (State.Expects != FDRState::Token::BUFFER_EXTENTS)
510     return make_error<StringError>(
511         Twine("Malformed log. Buffer Extents unexpected; expected: ") +
512             fdrStateToTwine(State.Expects),
513         std::make_error_code(std::errc::executable_format_error));
514 
515   auto PreReadOffset = OffsetPtr;
516   State.CurrentBufferSize = RecordExtractor.getU64(&OffsetPtr);
517   if (OffsetPtr == PreReadOffset)
518     return createStringError(
519         std::make_error_code(std::errc::executable_format_error),
520         "Failed to read current buffer size at offset %d.", OffsetPtr);
521 
522   State.Expects = FDRState::Token::NEW_BUFFER_RECORD_OR_EOF;
523 
524   // Advance the offset pointer by enough bytes accounting for the padding in a
525   // metadata record, after we read in the buffer extents.
526   OffsetPtr += kFDRMetadataBodySize - 8;
527   return Error::success();
528 }
529 
530 /// State transition when a CallArgumentRecord is encountered.
531 Error processFDRCallArgumentRecord(FDRState &State,
532                                    DataExtractor &RecordExtractor,
533                                    std::vector<XRayRecord> &Records,
534                                    uint32_t &OffsetPtr) {
535   auto &Enter = Records.back();
536   if (Enter.Type != RecordTypes::ENTER && Enter.Type != RecordTypes::ENTER_ARG)
537     return make_error<StringError>(
538         "CallArgument needs to be right after a function entry",
539         std::make_error_code(std::errc::executable_format_error));
540 
541   auto PreReadOffset = OffsetPtr;
542   auto Arg = RecordExtractor.getU64(&OffsetPtr);
543   if (OffsetPtr == PreReadOffset)
544     return createStringError(
545         std::make_error_code(std::errc::executable_format_error),
546         "Failed to read argument record at offset %d.", OffsetPtr);
547 
548   Enter.Type = RecordTypes::ENTER_ARG;
549   Enter.CallArgs.emplace_back(Arg);
550 
551   // Advance the offset pointer by enough bytes accounting for the padding in a
552   // metadata record, after reading the payload.
553   OffsetPtr += kFDRMetadataBodySize - 8;
554   return Error::success();
555 }
556 
557 /// Advances the state machine for reading the FDR record type by reading one
558 /// Metadata Record and updating the State appropriately based on the kind of
559 /// record encountered. The RecordKind is encoded in the first byte of the
560 /// Record, which the caller should pass in because they have already read it
561 /// to determine that this is a metadata record as opposed to a function record.
562 ///
563 /// Beginning with Version 2 of the FDR log, we do not depend on the size of the
564 /// buffer, but rather use the extents to determine how far to read in the log
565 /// for this particular buffer.
566 ///
567 /// In Version 3, FDR log now includes a pid metadata record after
568 /// WallTimeMarker
569 Error processFDRMetadataRecord(FDRState &State, DataExtractor &RecordExtractor,
570                                uint32_t &OffsetPtr,
571                                std::vector<XRayRecord> &Records,
572                                uint16_t Version, uint8_t FirstByte) {
573   // The remaining 7 bits of the first byte are the RecordKind enum for each
574   // Metadata Record.
575   switch (FirstByte >> 1) {
576   case 0: // NewBuffer
577     if (auto E = processFDRNewBufferRecord(State, RecordExtractor, OffsetPtr))
578       return E;
579     break;
580   case 1: // EndOfBuffer
581     if (Version >= 2)
582       return make_error<StringError>(
583           "Since Version 2 of FDR logging, we no longer support EOB records.",
584           std::make_error_code(std::errc::executable_format_error));
585     if (auto E = processFDREndOfBufferRecord(State, OffsetPtr))
586       return E;
587     break;
588   case 2: // NewCPUId
589     if (auto E = processFDRNewCPUIdRecord(State, RecordExtractor, OffsetPtr))
590       return E;
591     break;
592   case 3: // TSCWrap
593     if (auto E = processFDRTSCWrapRecord(State, RecordExtractor, OffsetPtr))
594       return E;
595     break;
596   case 4: // WallTimeMarker
597     if (auto E = processFDRWallTimeRecord(State, RecordExtractor, OffsetPtr))
598       return E;
599     // In Version 3 and and above, a PidRecord is expected after WallTimeRecord
600     if (Version >= 3)
601       State.Expects = FDRState::Token::PID_RECORD;
602     break;
603   case 5: // CustomEventMarker
604     if (auto E = processCustomEventMarker(State, RecordExtractor, OffsetPtr))
605       return E;
606     break;
607   case 6: // CallArgument
608     if (auto E = processFDRCallArgumentRecord(State, RecordExtractor, Records,
609                                               OffsetPtr))
610       return E;
611     break;
612   case 7: // BufferExtents
613     if (auto E = processBufferExtents(State, RecordExtractor, OffsetPtr))
614       return E;
615     break;
616   case 9: // Pid
617     if (auto E = processFDRPidRecord(State, RecordExtractor, OffsetPtr))
618       return E;
619     break;
620   default:
621     return createStringError(
622         std::make_error_code(std::errc::executable_format_error),
623         "Illegal metadata record type: '%d' at offset %d.", FirstByte >> 1,
624         OffsetPtr);
625   }
626   return Error::success();
627 }
628 
629 /// Reads a function record from an FDR format log, appending a new XRayRecord
630 /// to the vector being populated and updating the State with a new value
631 /// reference value to interpret TSC deltas.
632 ///
633 /// The XRayRecord constructed includes information from the function record
634 /// processed here as well as Thread ID and CPU ID formerly extracted into
635 /// State.
636 Error processFDRFunctionRecord(FDRState &State, DataExtractor &RecordExtractor,
637                                uint32_t &OffsetPtr, uint8_t FirstByte,
638                                std::vector<XRayRecord> &Records) {
639   switch (State.Expects) {
640   case FDRState::Token::NEW_BUFFER_RECORD_OR_EOF:
641     return make_error<StringError>(
642         "Malformed log. Received Function Record before new buffer setup.",
643         std::make_error_code(std::errc::executable_format_error));
644   case FDRState::Token::WALLCLOCK_RECORD:
645     return make_error<StringError>(
646         "Malformed log. Received Function Record when expecting wallclock.",
647         std::make_error_code(std::errc::executable_format_error));
648   case FDRState::Token::PID_RECORD:
649     return make_error<StringError>(
650         "Malformed log. Received Function Record when expecting pid.",
651         std::make_error_code(std::errc::executable_format_error));
652   case FDRState::Token::NEW_CPU_ID_RECORD:
653     return make_error<StringError>(
654         "Malformed log. Received Function Record before first CPU record.",
655         std::make_error_code(std::errc::executable_format_error));
656   default:
657     Records.emplace_back();
658     auto &Record = Records.back();
659     Record.RecordType = 0; // Record is type NORMAL.
660     // Strip off record type bit and use the next three bits.
661     auto T = (FirstByte >> 1) & 0x07;
662     switch (T) {
663     case static_cast<decltype(T)>(RecordTypes::ENTER):
664       Record.Type = RecordTypes::ENTER;
665       break;
666     case static_cast<decltype(T)>(RecordTypes::EXIT):
667       Record.Type = RecordTypes::EXIT;
668       break;
669     case static_cast<decltype(T)>(RecordTypes::TAIL_EXIT):
670       Record.Type = RecordTypes::TAIL_EXIT;
671       break;
672     case static_cast<decltype(T)>(RecordTypes::ENTER_ARG):
673       Record.Type = RecordTypes::ENTER_ARG;
674       State.Expects = FDRState::Token::CALL_ARGUMENT;
675       break;
676     default:
677       return createStringError(
678           std::make_error_code(std::errc::executable_format_error),
679           "Illegal function record type '%d' at offset %d.", T, OffsetPtr);
680     }
681     Record.CPU = State.CPUId;
682     Record.TId = State.ThreadId;
683     Record.PId = State.ProcessId;
684 
685     // Back up one byte to re-read the first byte, which is important for
686     // computing the function id for a record.
687     --OffsetPtr;
688 
689     // Despite function Id being a signed int on XRayRecord,
690     // when it is written to an FDR format, the top bits are truncated,
691     // so it is effectively an unsigned value. When we shift off the
692     // top four bits, we want the shift to be logical, so we read as
693     // uint32_t.
694     auto PreReadOffset = OffsetPtr;
695     uint32_t FuncIdBitField = RecordExtractor.getU32(&OffsetPtr);
696     if (OffsetPtr == PreReadOffset)
697       return createStringError(
698           std::make_error_code(std::errc::executable_format_error),
699           "Failed reading truncated function id field at offset %d.",
700           OffsetPtr);
701 
702     Record.FuncId = FuncIdBitField >> 4;
703 
704     // FunctionRecords have a 32 bit delta from the previous absolute TSC
705     // or TSC delta. If this would overflow, we should read a TSCWrap record
706     // with an absolute TSC reading.
707     PreReadOffset = OffsetPtr;
708     uint64_t NewTSC = State.BaseTSC + RecordExtractor.getU32(&OffsetPtr);
709     if (OffsetPtr == PreReadOffset)
710       return createStringError(
711           std::make_error_code(std::errc::executable_format_error),
712           "Failed reading TSC delta at offset %d.", OffsetPtr);
713 
714     State.BaseTSC = NewTSC;
715     Record.TSC = NewTSC;
716   }
717   return Error::success();
718 }
719 
720 /// Reads a log in FDR mode for version 1 of this binary format. FDR mode is
721 /// defined as part of the compiler-rt project in xray_fdr_logging.h, and such
722 /// a log consists of the familiar 32 bit XRayHeader, followed by sequences of
723 /// of interspersed 16 byte Metadata Records and 8 byte Function Records.
724 ///
725 /// The following is an attempt to document the grammar of the format, which is
726 /// parsed by this function for little-endian machines. Since the format makes
727 /// use of BitFields, when we support big-endian architectures, we will need to
728 /// adjust not only the endianness parameter to llvm's RecordExtractor, but also
729 /// the bit twiddling logic, which is consistent with the little-endian
730 /// convention that BitFields within a struct will first be packed into the
731 /// least significant bits the address they belong to.
732 ///
733 /// We expect a format complying with the grammar in the following pseudo-EBNF
734 /// in Version 1 of the FDR log.
735 ///
736 /// FDRLog: XRayFileHeader ThreadBuffer*
737 /// XRayFileHeader: 32 bytes to identify the log as FDR with machine metadata.
738 ///     Includes BufferSize
739 /// ThreadBuffer: NewBuffer WallClockTime NewCPUId FunctionSequence EOB
740 /// BufSize: 8 byte unsigned integer indicating how large the buffer is.
741 /// NewBuffer: 16 byte metadata record with Thread Id.
742 /// WallClockTime: 16 byte metadata record with human readable time.
743 /// Pid: 16 byte metadata record with Pid
744 /// NewCPUId: 16 byte metadata record with CPUId and a 64 bit TSC reading.
745 /// EOB: 16 byte record in a thread buffer plus mem garbage to fill BufSize.
746 /// FunctionSequence: NewCPUId | TSCWrap | FunctionRecord
747 /// TSCWrap: 16 byte metadata record with a full 64 bit TSC reading.
748 /// FunctionRecord: 8 byte record with FunctionId, entry/exit, and TSC delta.
749 ///
750 /// In Version 2, we make the following changes:
751 ///
752 /// ThreadBuffer: BufferExtents NewBuffer WallClockTime NewCPUId
753 ///               FunctionSequence
754 /// BufferExtents: 16 byte metdata record describing how many usable bytes are
755 ///                in the buffer. This is measured from the start of the buffer
756 ///                and must always be at least 48 (bytes).
757 ///
758 /// In Version 3, we make the following changes:
759 ///
760 /// ThreadBuffer: BufferExtents NewBuffer WallClockTime Pid NewCPUId
761 ///               FunctionSequence
762 /// EOB: *deprecated*
763 Error loadFDRLog(StringRef Data, XRayFileHeader &FileHeader,
764                  std::vector<XRayRecord> &Records) {
765   if (Data.size() < 32)
766     return make_error<StringError>(
767         "Not enough bytes for an XRay log.",
768         std::make_error_code(std::errc::invalid_argument));
769 
770   DataExtractor Reader(Data, true, 8);
771   uint32_t OffsetPtr = 0;
772 
773   if (auto E = readBinaryFormatHeader(Reader, OffsetPtr, FileHeader))
774     return E;
775 
776   uint64_t BufferSize = 0;
777   {
778     StringRef ExtraDataRef(FileHeader.FreeFormData, 16);
779     DataExtractor ExtraDataExtractor(ExtraDataRef, true, 8);
780     uint32_t ExtraDataOffset = 0;
781     BufferSize = ExtraDataExtractor.getU64(&ExtraDataOffset);
782   }
783 
784   FDRState::Token InitialExpectation;
785   switch (FileHeader.Version) {
786   case 1:
787     InitialExpectation = FDRState::Token::NEW_BUFFER_RECORD_OR_EOF;
788     break;
789   case 2:
790   case 3:
791     InitialExpectation = FDRState::Token::BUFFER_EXTENTS;
792     break;
793   default:
794     return make_error<StringError>(
795         Twine("Unsupported version '") + Twine(FileHeader.Version) + "'",
796         std::make_error_code(std::errc::executable_format_error));
797   }
798   FDRState State{0, 0, 0, 0, InitialExpectation, BufferSize, 0};
799 
800   // RecordSize will tell the loop how far to seek ahead based on the record
801   // type that we have just read.
802   while (Reader.isValidOffset(OffsetPtr)) {
803     auto BeginOffset = OffsetPtr;
804     if (State.Expects == FDRState::Token::SCAN_TO_END_OF_THREAD_BUF) {
805       OffsetPtr += State.CurrentBufferSize - State.CurrentBufferConsumed;
806       State.CurrentBufferConsumed = 0;
807       State.Expects = FDRState::Token::NEW_BUFFER_RECORD_OR_EOF;
808       continue;
809     }
810     auto PreReadOffset = OffsetPtr;
811     uint8_t BitField = Reader.getU8(&OffsetPtr);
812     if (OffsetPtr == PreReadOffset)
813       return createStringError(
814           std::make_error_code(std::errc::executable_format_error),
815           "Failed reading first byte of record at offset %d.", OffsetPtr);
816     bool isMetadataRecord = BitField & 0x01uL;
817     bool isBufferExtents =
818         (BitField >> 1) == 7; // BufferExtents record kind == 7
819     if (isMetadataRecord) {
820       if (auto E = processFDRMetadataRecord(State, Reader, OffsetPtr, Records,
821                                             FileHeader.Version, BitField))
822         return E;
823     } else { // Process Function Record
824       if (auto E = processFDRFunctionRecord(State, Reader, OffsetPtr, BitField,
825                                             Records))
826         return E;
827     }
828 
829     // The BufferExtents record is technically not part of the buffer, so we
830     // don't count the size of that record against the buffer's actual size.
831     if (!isBufferExtents)
832       State.CurrentBufferConsumed += OffsetPtr - BeginOffset;
833 
834     assert(State.CurrentBufferConsumed <= State.CurrentBufferSize);
835 
836     if ((FileHeader.Version == 2 || FileHeader.Version == 3) &&
837         State.CurrentBufferSize == State.CurrentBufferConsumed) {
838       // In Version 2 of the log, we don't need to scan to the end of the thread
839       // buffer if we've already consumed all the bytes we need to.
840       State.Expects = FDRState::Token::BUFFER_EXTENTS;
841       State.CurrentBufferSize = BufferSize;
842       State.CurrentBufferConsumed = 0;
843     }
844   }
845 
846   // Having iterated over everything we've been given, we've either consumed
847   // everything and ended up in the end state, or were told to skip the rest.
848   bool Finished = State.Expects == FDRState::Token::SCAN_TO_END_OF_THREAD_BUF &&
849                   State.CurrentBufferSize == State.CurrentBufferConsumed;
850   if ((State.Expects != FDRState::Token::NEW_BUFFER_RECORD_OR_EOF &&
851        State.Expects != FDRState::Token::BUFFER_EXTENTS) &&
852       !Finished)
853     return make_error<StringError>(
854         Twine("Encountered EOF with unexpected state expectation ") +
855             fdrStateToTwine(State.Expects) +
856             ". Remaining expected bytes in thread buffer total " +
857             Twine(State.CurrentBufferSize - State.CurrentBufferConsumed),
858         std::make_error_code(std::errc::executable_format_error));
859 
860   return Error::success();
861 }
862 
863 Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader,
864                   std::vector<XRayRecord> &Records) {
865   YAMLXRayTrace Trace;
866   Input In(Data);
867   In >> Trace;
868   if (In.error())
869     return make_error<StringError>("Failed loading YAML Data.", In.error());
870 
871   FileHeader.Version = Trace.Header.Version;
872   FileHeader.Type = Trace.Header.Type;
873   FileHeader.ConstantTSC = Trace.Header.ConstantTSC;
874   FileHeader.NonstopTSC = Trace.Header.NonstopTSC;
875   FileHeader.CycleFrequency = Trace.Header.CycleFrequency;
876 
877   if (FileHeader.Version != 1)
878     return make_error<StringError>(
879         Twine("Unsupported XRay file version: ") + Twine(FileHeader.Version),
880         std::make_error_code(std::errc::invalid_argument));
881 
882   Records.clear();
883   std::transform(Trace.Records.begin(), Trace.Records.end(),
884                  std::back_inserter(Records), [&](const YAMLXRayRecord &R) {
885                    return XRayRecord{R.RecordType, R.CPU, R.Type, R.FuncId,
886                                      R.TSC,        R.TId, R.PId,  R.CallArgs};
887                  });
888   return Error::success();
889 }
890 } // namespace
891 
892 Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) {
893   int Fd;
894   if (auto EC = sys::fs::openFileForRead(Filename, Fd)) {
895     return make_error<StringError>(
896         Twine("Cannot read log from '") + Filename + "'", EC);
897   }
898 
899   uint64_t FileSize;
900   if (auto EC = sys::fs::file_size(Filename, FileSize)) {
901     return make_error<StringError>(
902         Twine("Cannot read log from '") + Filename + "'", EC);
903   }
904   if (FileSize < 4) {
905     return make_error<StringError>(
906         Twine("File '") + Filename + "' too small for XRay.",
907         std::make_error_code(std::errc::executable_format_error));
908   }
909 
910   // Map the opened file into memory and use a StringRef to access it later.
911   std::error_code EC;
912   sys::fs::mapped_file_region MappedFile(
913       Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC);
914   if (EC) {
915     return make_error<StringError>(
916         Twine("Cannot read log from '") + Filename + "'", EC);
917   }
918   auto Data = StringRef(MappedFile.data(), MappedFile.size());
919 
920   // Attempt to detect the file type using file magic. We have a slight bias
921   // towards the binary format, and we do this by making sure that the first 4
922   // bytes of the binary file is some combination of the following byte
923   // patterns: (observe the code loading them assumes they're little endian)
924   //
925   //   0x01 0x00 0x00 0x00 - version 1, "naive" format
926   //   0x01 0x00 0x01 0x00 - version 1, "flight data recorder" format
927   //   0x02 0x00 0x01 0x00 - version 2, "flight data recorder" format
928   //
929   // YAML files don't typically have those first four bytes as valid text so we
930   // try loading assuming YAML if we don't find these bytes.
931   //
932   // Only if we can't load either the binary or the YAML format will we yield an
933   // error.
934   StringRef Magic(MappedFile.data(), 4);
935   DataExtractor HeaderExtractor(Magic, true, 8);
936   uint32_t OffsetPtr = 0;
937   uint16_t Version = HeaderExtractor.getU16(&OffsetPtr);
938   uint16_t Type = HeaderExtractor.getU16(&OffsetPtr);
939 
940   enum BinaryFormatType { NAIVE_FORMAT = 0, FLIGHT_DATA_RECORDER_FORMAT = 1 };
941 
942   Trace T;
943   switch (Type) {
944   case NAIVE_FORMAT:
945     if (Version == 1 || Version == 2 || Version == 3) {
946       if (auto E = loadNaiveFormatLog(Data, T.FileHeader, T.Records))
947         return std::move(E);
948     } else {
949       return make_error<StringError>(
950           Twine("Unsupported version for Basic/Naive Mode logging: ") +
951               Twine(Version),
952           std::make_error_code(std::errc::executable_format_error));
953     }
954     break;
955   case FLIGHT_DATA_RECORDER_FORMAT:
956     if (Version == 1 || Version == 2 || Version == 3) {
957       if (auto E = loadFDRLog(Data, T.FileHeader, T.Records))
958         return std::move(E);
959     } else {
960       return make_error<StringError>(
961           Twine("Unsupported version for FDR Mode logging: ") + Twine(Version),
962           std::make_error_code(std::errc::executable_format_error));
963     }
964     break;
965   default:
966     if (auto E = loadYAMLLog(Data, T.FileHeader, T.Records))
967       return std::move(E);
968   }
969 
970   if (Sort)
971     std::stable_sort(T.Records.begin(), T.Records.end(),
972                      [&](const XRayRecord &L, const XRayRecord &R) {
973                        return L.TSC < R.TSC;
974                      });
975 
976   return std::move(T);
977 }
978