1 //===- Trace.cpp - XRay Trace Loading implementation. ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // XRay log reader implementation. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "llvm/XRay/Trace.h" 14 #include "llvm/ADT/STLExtras.h" 15 #include "llvm/Support/DataExtractor.h" 16 #include "llvm/Support/Error.h" 17 #include "llvm/Support/FileSystem.h" 18 #include "llvm/XRay/YAMLXRayRecord.h" 19 20 using namespace llvm; 21 using namespace llvm::xray; 22 using llvm::yaml::Input; 23 24 using XRayRecordStorage = 25 std::aligned_storage<sizeof(XRayRecord), alignof(XRayRecord)>::type; 26 27 // Populates the FileHeader reference by reading the first 32 bytes of the file. 28 Error readBinaryFormatHeader(StringRef Data, XRayFileHeader &FileHeader) { 29 // FIXME: Maybe deduce whether the data is little or big-endian using some 30 // magic bytes in the beginning of the file? 31 32 // First 32 bytes of the file will always be the header. We assume a certain 33 // format here: 34 // 35 // (2) uint16 : version 36 // (2) uint16 : type 37 // (4) uint32 : bitfield 38 // (8) uint64 : cycle frequency 39 // (16) - : padding 40 41 DataExtractor HeaderExtractor(Data, true, 8); 42 uint32_t OffsetPtr = 0; 43 FileHeader.Version = HeaderExtractor.getU16(&OffsetPtr); 44 FileHeader.Type = HeaderExtractor.getU16(&OffsetPtr); 45 uint32_t Bitfield = HeaderExtractor.getU32(&OffsetPtr); 46 FileHeader.ConstantTSC = Bitfield & 1uL; 47 FileHeader.NonstopTSC = Bitfield & 1uL << 1; 48 FileHeader.CycleFrequency = HeaderExtractor.getU64(&OffsetPtr); 49 std::memcpy(&FileHeader.FreeFormData, Data.bytes_begin() + OffsetPtr, 16); 50 if (FileHeader.Version != 1) 51 return make_error<StringError>( 52 Twine("Unsupported XRay file version: ") + Twine(FileHeader.Version), 53 std::make_error_code(std::errc::invalid_argument)); 54 return Error::success(); 55 } 56 57 Error loadNaiveFormatLog(StringRef Data, XRayFileHeader &FileHeader, 58 std::vector<XRayRecord> &Records) { 59 // Check that there is at least a header 60 if (Data.size() < 32) 61 return make_error<StringError>( 62 "Not enough bytes for an XRay log.", 63 std::make_error_code(std::errc::invalid_argument)); 64 65 if (Data.size() - 32 == 0 || Data.size() % 32 != 0) 66 return make_error<StringError>( 67 "Invalid-sized XRay data.", 68 std::make_error_code(std::errc::invalid_argument)); 69 70 if (auto E = readBinaryFormatHeader(Data, FileHeader)) 71 return E; 72 73 // Each record after the header will be 32 bytes, in the following format: 74 // 75 // (2) uint16 : record type 76 // (1) uint8 : cpu id 77 // (1) uint8 : type 78 // (4) sint32 : function id 79 // (8) uint64 : tsc 80 // (4) uint32 : thread id 81 // (12) - : padding 82 for (auto S = Data.drop_front(32); !S.empty(); S = S.drop_front(32)) { 83 DataExtractor RecordExtractor(S, true, 8); 84 uint32_t OffsetPtr = 0; 85 Records.emplace_back(); 86 auto &Record = Records.back(); 87 Record.RecordType = RecordExtractor.getU16(&OffsetPtr); 88 Record.CPU = RecordExtractor.getU8(&OffsetPtr); 89 auto Type = RecordExtractor.getU8(&OffsetPtr); 90 switch (Type) { 91 case 0: 92 Record.Type = RecordTypes::ENTER; 93 break; 94 case 1: 95 Record.Type = RecordTypes::EXIT; 96 break; 97 default: 98 return make_error<StringError>( 99 Twine("Unknown record type '") + Twine(int{Type}) + "'", 100 std::make_error_code(std::errc::executable_format_error)); 101 } 102 Record.FuncId = RecordExtractor.getSigned(&OffsetPtr, sizeof(int32_t)); 103 Record.TSC = RecordExtractor.getU64(&OffsetPtr); 104 Record.TId = RecordExtractor.getU32(&OffsetPtr); 105 } 106 return Error::success(); 107 } 108 109 /// When reading from a Flight Data Recorder mode log, metadata records are 110 /// sparse compared to packed function records, so we must maintain state as we 111 /// read through the sequence of entries. This allows the reader to denormalize 112 /// the CPUId and Thread Id onto each Function Record and transform delta 113 /// encoded TSC values into absolute encodings on each record. 114 struct FDRState { 115 uint16_t CPUId; 116 uint16_t ThreadId; 117 uint64_t BaseTSC; 118 /// Encode some of the state transitions for the FDR log reader as explicit 119 /// checks. These are expectations for the next Record in the stream. 120 enum class Token { 121 NEW_BUFFER_RECORD_OR_EOF, 122 WALLCLOCK_RECORD, 123 NEW_CPU_ID_RECORD, 124 FUNCTION_SEQUENCE, 125 SCAN_TO_END_OF_THREAD_BUF, 126 }; 127 Token Expects; 128 // Each threads buffer may have trailing garbage to scan over, so we track our 129 // progress. 130 uint64_t CurrentBufferSize; 131 uint64_t CurrentBufferConsumed; 132 }; 133 134 Twine fdrStateToTwine(const FDRState::Token &state) { 135 switch (state) { 136 case FDRState::Token::NEW_BUFFER_RECORD_OR_EOF: 137 return "NEW_BUFFER_RECORD_OR_EOF"; 138 case FDRState::Token::WALLCLOCK_RECORD: 139 return "WALLCLOCK_RECORD"; 140 case FDRState::Token::NEW_CPU_ID_RECORD: 141 return "NEW_CPU_ID_RECORD"; 142 case FDRState::Token::FUNCTION_SEQUENCE: 143 return "FUNCTION_SEQUENCE"; 144 case FDRState::Token::SCAN_TO_END_OF_THREAD_BUF: 145 return "SCAN_TO_END_OF_THREAD_BUF"; 146 } 147 return "UNKNOWN"; 148 } 149 150 /// State transition when a NewBufferRecord is encountered. 151 Error processFDRNewBufferRecord(FDRState &State, uint8_t RecordFirstByte, 152 DataExtractor &RecordExtractor) { 153 154 if (State.Expects != FDRState::Token::NEW_BUFFER_RECORD_OR_EOF) 155 return make_error<StringError>( 156 "Malformed log. Read New Buffer record kind out of sequence", 157 std::make_error_code(std::errc::executable_format_error)); 158 uint32_t OffsetPtr = 1; // 1 byte into record. 159 State.ThreadId = RecordExtractor.getU16(&OffsetPtr); 160 State.Expects = FDRState::Token::WALLCLOCK_RECORD; 161 return Error::success(); 162 } 163 164 /// State transition when an EndOfBufferRecord is encountered. 165 Error processFDREndOfBufferRecord(FDRState &State, uint8_t RecordFirstByte, 166 DataExtractor &RecordExtractor) { 167 if (State.Expects == FDRState::Token::NEW_BUFFER_RECORD_OR_EOF) 168 return make_error<StringError>( 169 "Malformed log. Received EOB message without current buffer.", 170 std::make_error_code(std::errc::executable_format_error)); 171 State.Expects = FDRState::Token::SCAN_TO_END_OF_THREAD_BUF; 172 return Error::success(); 173 } 174 175 /// State transition when a NewCPUIdRecord is encountered. 176 Error processFDRNewCPUIdRecord(FDRState &State, uint8_t RecordFirstByte, 177 DataExtractor &RecordExtractor) { 178 if (State.Expects != FDRState::Token::FUNCTION_SEQUENCE && 179 State.Expects != FDRState::Token::NEW_CPU_ID_RECORD) 180 return make_error<StringError>( 181 "Malformed log. Read NewCPUId record kind out of sequence", 182 std::make_error_code(std::errc::executable_format_error)); 183 uint32_t OffsetPtr = 1; // Read starting after the first byte. 184 State.CPUId = RecordExtractor.getU16(&OffsetPtr); 185 State.BaseTSC = RecordExtractor.getU64(&OffsetPtr); 186 State.Expects = FDRState::Token::FUNCTION_SEQUENCE; 187 return Error::success(); 188 } 189 190 /// State transition when a TSCWrapRecord (overflow detection) is encountered. 191 Error processFDRTSCWrapRecord(FDRState &State, uint8_t RecordFirstByte, 192 DataExtractor &RecordExtractor) { 193 if (State.Expects != FDRState::Token::FUNCTION_SEQUENCE) 194 return make_error<StringError>( 195 "Malformed log. Read TSCWrap record kind out of sequence", 196 std::make_error_code(std::errc::executable_format_error)); 197 uint32_t OffsetPtr = 1; // Read starting after the first byte. 198 State.BaseTSC = RecordExtractor.getU64(&OffsetPtr); 199 return Error::success(); 200 } 201 202 /// State transition when a WallTimeMarkerRecord is encountered. 203 Error processFDRWallTimeRecord(FDRState &State, uint8_t RecordFirstByte, 204 DataExtractor &RecordExtractor) { 205 if (State.Expects != FDRState::Token::WALLCLOCK_RECORD) 206 return make_error<StringError>( 207 "Malformed log. Read Wallclock record kind out of sequence", 208 std::make_error_code(std::errc::executable_format_error)); 209 // We don't encode the wall time into any of the records. 210 // XRayRecords are concerned with the TSC instead. 211 State.Expects = FDRState::Token::NEW_CPU_ID_RECORD; 212 return Error::success(); 213 } 214 215 /// Advances the state machine for reading the FDR record type by reading one 216 /// Metadata Record and updating the State appropriately based on the kind of 217 /// record encountered. The RecordKind is encoded in the first byte of the 218 /// Record, which the caller should pass in because they have already read it 219 /// to determine that this is a metadata record as opposed to a function record. 220 Error processFDRMetadataRecord(FDRState &State, uint8_t RecordFirstByte, 221 DataExtractor &RecordExtractor) { 222 // The remaining 7 bits are the RecordKind enum. 223 uint8_t RecordKind = RecordFirstByte >> 1; 224 switch (RecordKind) { 225 case 0: // NewBuffer 226 if (auto E = 227 processFDRNewBufferRecord(State, RecordFirstByte, RecordExtractor)) 228 return E; 229 break; 230 case 1: // EndOfBuffer 231 if (auto E = processFDREndOfBufferRecord(State, RecordFirstByte, 232 RecordExtractor)) 233 return E; 234 break; 235 case 2: // NewCPUId 236 if (auto E = 237 processFDRNewCPUIdRecord(State, RecordFirstByte, RecordExtractor)) 238 return E; 239 break; 240 case 3: // TSCWrap 241 if (auto E = 242 processFDRTSCWrapRecord(State, RecordFirstByte, RecordExtractor)) 243 return E; 244 break; 245 case 4: // WallTimeMarker 246 if (auto E = 247 processFDRWallTimeRecord(State, RecordFirstByte, RecordExtractor)) 248 return E; 249 break; 250 default: 251 // Widen the record type to uint16_t to prevent conversion to char. 252 return make_error<StringError>( 253 Twine("Illegal metadata record type: ") 254 .concat(Twine(static_cast<unsigned>(RecordKind))), 255 std::make_error_code(std::errc::executable_format_error)); 256 } 257 return Error::success(); 258 } 259 260 /// Reads a function record from an FDR format log, appending a new XRayRecord 261 /// to the vector being populated and updating the State with a new value 262 /// reference value to interpret TSC deltas. 263 /// 264 /// The XRayRecord constructed includes information from the function record 265 /// processed here as well as Thread ID and CPU ID formerly extracted into 266 /// State. 267 Error processFDRFunctionRecord(FDRState &State, uint8_t RecordFirstByte, 268 DataExtractor &RecordExtractor, 269 std::vector<XRayRecord> &Records) { 270 switch (State.Expects) { 271 case FDRState::Token::NEW_BUFFER_RECORD_OR_EOF: 272 return make_error<StringError>( 273 "Malformed log. Received Function Record before new buffer setup.", 274 std::make_error_code(std::errc::executable_format_error)); 275 case FDRState::Token::WALLCLOCK_RECORD: 276 return make_error<StringError>( 277 "Malformed log. Received Function Record when expecting wallclock.", 278 std::make_error_code(std::errc::executable_format_error)); 279 case FDRState::Token::NEW_CPU_ID_RECORD: 280 return make_error<StringError>( 281 "Malformed log. Received Function Record before first CPU record.", 282 std::make_error_code(std::errc::executable_format_error)); 283 default: 284 Records.emplace_back(); 285 auto &Record = Records.back(); 286 Record.RecordType = 0; // Record is type NORMAL. 287 // Strip off record type bit and use the next three bits. 288 uint8_t RecordType = (RecordFirstByte >> 1) & 0x07; 289 switch (RecordType) { 290 case static_cast<uint8_t>(RecordTypes::ENTER): 291 Record.Type = RecordTypes::ENTER; 292 break; 293 case static_cast<uint8_t>(RecordTypes::EXIT): 294 case 2: // TAIL_EXIT is not yet defined in RecordTypes. 295 Record.Type = RecordTypes::EXIT; 296 break; 297 default: 298 // When initializing the error, convert to uint16_t so that the record 299 // type isn't interpreted as a char. 300 return make_error<StringError>( 301 Twine("Illegal function record type: ") 302 .concat(Twine(static_cast<unsigned>(RecordType))), 303 std::make_error_code(std::errc::executable_format_error)); 304 } 305 Record.CPU = State.CPUId; 306 Record.TId = State.ThreadId; 307 // Back up to read first 32 bits, including the 8 we pulled RecordType 308 // and RecordKind out of. The remaining 28 are FunctionId. 309 uint32_t OffsetPtr = 0; 310 // Despite function Id being a signed int on XRayRecord, 311 // when it is written to an FDR format, the top bits are truncated, 312 // so it is effectively an unsigned value. When we shift off the 313 // top four bits, we want the shift to be logical, so we read as 314 // uint32_t. 315 uint32_t FuncIdBitField = RecordExtractor.getU32(&OffsetPtr); 316 Record.FuncId = FuncIdBitField >> 4; 317 // FunctionRecords have a 32 bit delta from the previous absolute TSC 318 // or TSC delta. If this would overflow, we should read a TSCWrap record 319 // with an absolute TSC reading. 320 uint64_t new_tsc = State.BaseTSC + RecordExtractor.getU32(&OffsetPtr); 321 State.BaseTSC = new_tsc; 322 Record.TSC = new_tsc; 323 } 324 return Error::success(); 325 } 326 327 /// Reads a log in FDR mode for version 1 of this binary format. FDR mode is 328 /// defined as part of the compiler-rt project in xray_fdr_logging.h, and such 329 /// a log consists of the familiar 32 bit XRayHeader, followed by sequences of 330 /// of interspersed 16 byte Metadata Records and 8 byte Function Records. 331 /// 332 /// The following is an attempt to document the grammar of the format, which is 333 /// parsed by this function for little-endian machines. Since the format makes 334 /// use of BitFields, when we support big-Endian architectures, we will need to 335 /// adjust not only the endianness parameter to llvm's RecordExtractor, but also 336 /// the bit twiddling logic, which is consistent with the little-endian 337 /// convention that BitFields within a struct will first be packed into the 338 /// least significant bits the address they belong to. 339 /// 340 /// We expect a format complying with the grammar in the following pseudo-EBNF. 341 /// 342 /// FDRLog: XRayFileHeader ThreadBuffer* 343 /// XRayFileHeader: 32 bits to identify the log as FDR with machine metadata. 344 /// ThreadBuffer: BufSize NewBuffer WallClockTime NewCPUId FunctionSequence EOB 345 /// BufSize: 8 byte unsigned integer indicating how large the buffer is. 346 /// NewBuffer: 16 byte metadata record with Thread Id. 347 /// WallClockTime: 16 byte metadata record with human readable time. 348 /// NewCPUId: 16 byte metadata record with CPUId and a 64 bit TSC reading. 349 /// EOB: 16 byte record in a thread buffer plus mem garbage to fill BufSize. 350 /// FunctionSequence: NewCPUId | TSCWrap | FunctionRecord 351 /// TSCWrap: 16 byte metadata record with a full 64 bit TSC reading. 352 /// FunctionRecord: 8 byte record with FunctionId, entry/exit, and TSC delta. 353 Error loadFDRLog(StringRef Data, XRayFileHeader &FileHeader, 354 std::vector<XRayRecord> &Records) { 355 if (Data.size() < 32) 356 return make_error<StringError>( 357 "Not enough bytes for an XRay log.", 358 std::make_error_code(std::errc::invalid_argument)); 359 360 // For an FDR log, there are records sized 16 and 8 bytes. 361 // There actually may be no records if no non-trivial functions are 362 // instrumented. 363 if (Data.size() % 8 != 0) 364 return make_error<StringError>( 365 "Invalid-sized XRay data.", 366 std::make_error_code(std::errc::invalid_argument)); 367 368 if (auto E = readBinaryFormatHeader(Data, FileHeader)) 369 return E; 370 371 uint64_t BufferSize = 0; 372 { 373 StringRef ExtraDataRef(FileHeader.FreeFormData, 16); 374 DataExtractor ExtraDataExtractor(ExtraDataRef, true, 8); 375 uint32_t ExtraDataOffset = 0; 376 BufferSize = ExtraDataExtractor.getU64(&ExtraDataOffset); 377 } 378 FDRState State{0, 0, 0, FDRState::Token::NEW_BUFFER_RECORD_OR_EOF, 379 BufferSize, 0}; 380 // RecordSize will tell the loop how far to seek ahead based on the record 381 // type that we have just read. 382 size_t RecordSize = 0; 383 for (auto S = Data.drop_front(32); !S.empty(); S = S.drop_front(RecordSize)) { 384 DataExtractor RecordExtractor(S, true, 8); 385 uint32_t OffsetPtr = 0; 386 if (State.Expects == FDRState::Token::SCAN_TO_END_OF_THREAD_BUF) { 387 RecordSize = State.CurrentBufferSize - State.CurrentBufferConsumed; 388 if (S.size() < State.CurrentBufferSize - State.CurrentBufferConsumed) { 389 return make_error<StringError>( 390 Twine("Incomplete thread buffer. Expected ") + 391 Twine(State.CurrentBufferSize - State.CurrentBufferConsumed) + 392 " remaining bytes but found " + Twine(S.size()), 393 make_error_code(std::errc::invalid_argument)); 394 } 395 State.CurrentBufferConsumed = 0; 396 State.Expects = FDRState::Token::NEW_BUFFER_RECORD_OR_EOF; 397 continue; 398 } 399 uint8_t BitField = RecordExtractor.getU8(&OffsetPtr); 400 bool isMetadataRecord = BitField & 0x01uL; 401 if (isMetadataRecord) { 402 RecordSize = 16; 403 if (auto E = processFDRMetadataRecord(State, BitField, RecordExtractor)) 404 return E; 405 State.CurrentBufferConsumed += RecordSize; 406 } else { // Process Function Record 407 RecordSize = 8; 408 if (auto E = processFDRFunctionRecord(State, BitField, RecordExtractor, 409 Records)) 410 return E; 411 State.CurrentBufferConsumed += RecordSize; 412 } 413 } 414 // There are two conditions 415 if (State.Expects != FDRState::Token::NEW_BUFFER_RECORD_OR_EOF && 416 !(State.Expects == FDRState::Token::SCAN_TO_END_OF_THREAD_BUF && 417 State.CurrentBufferSize == State.CurrentBufferConsumed)) 418 return make_error<StringError>( 419 Twine("Encountered EOF with unexpected state expectation ") + 420 fdrStateToTwine(State.Expects) + 421 ". Remaining expected bytes in thread buffer total " + 422 Twine(State.CurrentBufferSize - State.CurrentBufferConsumed), 423 std::make_error_code(std::errc::executable_format_error)); 424 425 return Error::success(); 426 } 427 428 Error loadYAMLLog(StringRef Data, XRayFileHeader &FileHeader, 429 std::vector<XRayRecord> &Records) { 430 // Load the documents from the MappedFile. 431 YAMLXRayTrace Trace; 432 Input In(Data); 433 In >> Trace; 434 if (In.error()) 435 return make_error<StringError>("Failed loading YAML Data.", In.error()); 436 437 FileHeader.Version = Trace.Header.Version; 438 FileHeader.Type = Trace.Header.Type; 439 FileHeader.ConstantTSC = Trace.Header.ConstantTSC; 440 FileHeader.NonstopTSC = Trace.Header.NonstopTSC; 441 FileHeader.CycleFrequency = Trace.Header.CycleFrequency; 442 443 if (FileHeader.Version != 1) 444 return make_error<StringError>( 445 Twine("Unsupported XRay file version: ") + Twine(FileHeader.Version), 446 std::make_error_code(std::errc::invalid_argument)); 447 448 Records.clear(); 449 std::transform(Trace.Records.begin(), Trace.Records.end(), 450 std::back_inserter(Records), [&](const YAMLXRayRecord &R) { 451 return XRayRecord{R.RecordType, R.CPU, R.Type, 452 R.FuncId, R.TSC, R.TId}; 453 }); 454 return Error::success(); 455 } 456 457 Expected<Trace> llvm::xray::loadTraceFile(StringRef Filename, bool Sort) { 458 int Fd; 459 if (auto EC = sys::fs::openFileForRead(Filename, Fd)) { 460 return make_error<StringError>( 461 Twine("Cannot read log from '") + Filename + "'", EC); 462 } 463 464 // Attempt to get the filesize. 465 uint64_t FileSize; 466 if (auto EC = sys::fs::file_size(Filename, FileSize)) { 467 return make_error<StringError>( 468 Twine("Cannot read log from '") + Filename + "'", EC); 469 } 470 if (FileSize < 4) { 471 return make_error<StringError>( 472 Twine("File '") + Filename + "' too small for XRay.", 473 std::make_error_code(std::errc::executable_format_error)); 474 } 475 476 // Attempt to mmap the file. 477 std::error_code EC; 478 sys::fs::mapped_file_region MappedFile( 479 Fd, sys::fs::mapped_file_region::mapmode::readonly, FileSize, 0, EC); 480 if (EC) { 481 return make_error<StringError>( 482 Twine("Cannot read log from '") + Filename + "'", EC); 483 } 484 485 // Attempt to detect the file type using file magic. We have a slight bias 486 // towards the binary format, and we do this by making sure that the first 4 487 // bytes of the binary file is some combination of the following byte 488 // patterns: 489 // 490 // 0x0001 0x0000 - version 1, "naive" format 491 // 0x0001 0x0001 - version 1, "flight data recorder" format 492 // 493 // YAML files dont' typically have those first four bytes as valid text so we 494 // try loading assuming YAML if we don't find these bytes. 495 // 496 // Only if we can't load either the binary or the YAML format will we yield an 497 // error. 498 StringRef Magic(MappedFile.data(), 4); 499 DataExtractor HeaderExtractor(Magic, true, 8); 500 uint32_t OffsetPtr = 0; 501 uint16_t Version = HeaderExtractor.getU16(&OffsetPtr); 502 uint16_t Type = HeaderExtractor.getU16(&OffsetPtr); 503 504 enum BinaryFormatType { NAIVE_FORMAT = 0, FLIGHT_DATA_RECORDER_FORMAT = 1 }; 505 506 Trace T; 507 if (Version == 1 && Type == NAIVE_FORMAT) { 508 if (auto E = 509 loadNaiveFormatLog(StringRef(MappedFile.data(), MappedFile.size()), 510 T.FileHeader, T.Records)) 511 return std::move(E); 512 } else if (Version == 1 && Type == FLIGHT_DATA_RECORDER_FORMAT) { 513 if (auto E = loadFDRLog(StringRef(MappedFile.data(), MappedFile.size()), 514 T.FileHeader, T.Records)) 515 return std::move(E); 516 } else { 517 if (auto E = loadYAMLLog(StringRef(MappedFile.data(), MappedFile.size()), 518 T.FileHeader, T.Records)) 519 return std::move(E); 520 } 521 522 if (Sort) 523 std::sort(T.Records.begin(), T.Records.end(), 524 [&](const XRayRecord &L, const XRayRecord &R) { 525 return L.TSC < R.TSC; 526 }); 527 528 return std::move(T); 529 } 530