1 //===-- DataExtractor.cpp ---------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "lldb/Utility/DataExtractor.h" 10 11 #include "lldb/lldb-defines.h" 12 #include "lldb/lldb-enumerations.h" 13 #include "lldb/lldb-forward.h" 14 #include "lldb/lldb-types.h" 15 16 #include "lldb/Utility/DataBuffer.h" 17 #include "lldb/Utility/DataBufferHeap.h" 18 #include "lldb/Utility/Endian.h" 19 #include "lldb/Utility/LLDBAssert.h" 20 #include "lldb/Utility/Log.h" 21 #include "lldb/Utility/Stream.h" 22 #include "lldb/Utility/StreamString.h" 23 #include "lldb/Utility/UUID.h" 24 25 #include "llvm/ADT/ArrayRef.h" 26 #include "llvm/ADT/SmallVector.h" 27 #include "llvm/Support/MD5.h" 28 #include "llvm/Support/MathExtras.h" 29 30 #include <algorithm> 31 #include <array> 32 #include <cassert> 33 #include <cstdint> 34 #include <string> 35 36 #include <ctype.h> 37 #include <inttypes.h> 38 #include <string.h> 39 40 using namespace lldb; 41 using namespace lldb_private; 42 43 static inline uint16_t ReadInt16(const unsigned char *ptr, offset_t offset) { 44 uint16_t value; 45 memcpy(&value, ptr + offset, 2); 46 return value; 47 } 48 49 static inline uint32_t ReadInt32(const unsigned char *ptr, 50 offset_t offset = 0) { 51 uint32_t value; 52 memcpy(&value, ptr + offset, 4); 53 return value; 54 } 55 56 static inline uint64_t ReadInt64(const unsigned char *ptr, 57 offset_t offset = 0) { 58 uint64_t value; 59 memcpy(&value, ptr + offset, 8); 60 return value; 61 } 62 63 static inline uint16_t ReadInt16(const void *ptr) { 64 uint16_t value; 65 memcpy(&value, ptr, 2); 66 return value; 67 } 68 69 static inline uint16_t ReadSwapInt16(const unsigned char *ptr, 70 offset_t offset) { 71 uint16_t value; 72 memcpy(&value, ptr + offset, 2); 73 return llvm::ByteSwap_16(value); 74 } 75 76 static inline uint32_t ReadSwapInt32(const unsigned char *ptr, 77 offset_t offset) { 78 uint32_t value; 79 memcpy(&value, ptr + offset, 4); 80 return llvm::ByteSwap_32(value); 81 } 82 83 static inline uint64_t ReadSwapInt64(const unsigned char *ptr, 84 offset_t offset) { 85 uint64_t value; 86 memcpy(&value, ptr + offset, 8); 87 return llvm::ByteSwap_64(value); 88 } 89 90 static inline uint16_t ReadSwapInt16(const void *ptr) { 91 uint16_t value; 92 memcpy(&value, ptr, 2); 93 return llvm::ByteSwap_16(value); 94 } 95 96 static inline uint32_t ReadSwapInt32(const void *ptr) { 97 uint32_t value; 98 memcpy(&value, ptr, 4); 99 return llvm::ByteSwap_32(value); 100 } 101 102 static inline uint64_t ReadSwapInt64(const void *ptr) { 103 uint64_t value; 104 memcpy(&value, ptr, 8); 105 return llvm::ByteSwap_64(value); 106 } 107 108 static inline uint64_t ReadMaxInt64(const uint8_t *data, size_t byte_size, 109 ByteOrder byte_order) { 110 uint64_t res = 0; 111 if (byte_order == eByteOrderBig) 112 for (size_t i = 0; i < byte_size; ++i) 113 res = (res << 8) | data[i]; 114 else { 115 assert(byte_order == eByteOrderLittle); 116 for (size_t i = 0; i < byte_size; ++i) 117 res = (res << 8) | data[byte_size - 1 - i]; 118 } 119 return res; 120 } 121 122 DataExtractor::DataExtractor() 123 : m_start(nullptr), m_end(nullptr), 124 m_byte_order(endian::InlHostByteOrder()), m_addr_size(sizeof(void *)), 125 m_data_sp(), m_target_byte_size(1) {} 126 127 //---------------------------------------------------------------------- 128 // This constructor allows us to use data that is owned by someone else. The 129 // data must stay around as long as this object is valid. 130 //---------------------------------------------------------------------- 131 DataExtractor::DataExtractor(const void *data, offset_t length, 132 ByteOrder endian, uint32_t addr_size, 133 uint32_t target_byte_size /*=1*/) 134 : m_start(const_cast<uint8_t *>(reinterpret_cast<const uint8_t *>(data))), 135 m_end(const_cast<uint8_t *>(reinterpret_cast<const uint8_t *>(data)) + 136 length), 137 m_byte_order(endian), m_addr_size(addr_size), m_data_sp(), 138 m_target_byte_size(target_byte_size) { 139 #ifdef LLDB_CONFIGURATION_DEBUG 140 assert(addr_size == 4 || addr_size == 8); 141 #endif 142 } 143 144 //---------------------------------------------------------------------- 145 // Make a shared pointer reference to the shared data in "data_sp" and set the 146 // endian swapping setting to "swap", and the address size to "addr_size". The 147 // shared data reference will ensure the data lives as long as any 148 // DataExtractor objects exist that have a reference to this data. 149 //---------------------------------------------------------------------- 150 DataExtractor::DataExtractor(const DataBufferSP &data_sp, ByteOrder endian, 151 uint32_t addr_size, 152 uint32_t target_byte_size /*=1*/) 153 : m_start(nullptr), m_end(nullptr), m_byte_order(endian), 154 m_addr_size(addr_size), m_data_sp(), 155 m_target_byte_size(target_byte_size) { 156 #ifdef LLDB_CONFIGURATION_DEBUG 157 assert(addr_size == 4 || addr_size == 8); 158 #endif 159 SetData(data_sp); 160 } 161 162 //---------------------------------------------------------------------- 163 // Initialize this object with a subset of the data bytes in "data". If "data" 164 // contains shared data, then a reference to this shared data will added and 165 // the shared data will stay around as long as any object contains a reference 166 // to that data. The endian swap and address size settings are copied from 167 // "data". 168 //---------------------------------------------------------------------- 169 DataExtractor::DataExtractor(const DataExtractor &data, offset_t offset, 170 offset_t length, uint32_t target_byte_size /*=1*/) 171 : m_start(nullptr), m_end(nullptr), m_byte_order(data.m_byte_order), 172 m_addr_size(data.m_addr_size), m_data_sp(), 173 m_target_byte_size(target_byte_size) { 174 #ifdef LLDB_CONFIGURATION_DEBUG 175 assert(m_addr_size == 4 || m_addr_size == 8); 176 #endif 177 if (data.ValidOffset(offset)) { 178 offset_t bytes_available = data.GetByteSize() - offset; 179 if (length > bytes_available) 180 length = bytes_available; 181 SetData(data, offset, length); 182 } 183 } 184 185 DataExtractor::DataExtractor(const DataExtractor &rhs) 186 : m_start(rhs.m_start), m_end(rhs.m_end), m_byte_order(rhs.m_byte_order), 187 m_addr_size(rhs.m_addr_size), m_data_sp(rhs.m_data_sp), 188 m_target_byte_size(rhs.m_target_byte_size) { 189 #ifdef LLDB_CONFIGURATION_DEBUG 190 assert(m_addr_size == 4 || m_addr_size == 8); 191 #endif 192 } 193 194 //---------------------------------------------------------------------- 195 // Assignment operator 196 //---------------------------------------------------------------------- 197 const DataExtractor &DataExtractor::operator=(const DataExtractor &rhs) { 198 if (this != &rhs) { 199 m_start = rhs.m_start; 200 m_end = rhs.m_end; 201 m_byte_order = rhs.m_byte_order; 202 m_addr_size = rhs.m_addr_size; 203 m_data_sp = rhs.m_data_sp; 204 } 205 return *this; 206 } 207 208 DataExtractor::~DataExtractor() = default; 209 210 //------------------------------------------------------------------ 211 // Clears the object contents back to a default invalid state, and release any 212 // references to shared data that this object may contain. 213 //------------------------------------------------------------------ 214 void DataExtractor::Clear() { 215 m_start = nullptr; 216 m_end = nullptr; 217 m_byte_order = endian::InlHostByteOrder(); 218 m_addr_size = sizeof(void *); 219 m_data_sp.reset(); 220 } 221 222 //------------------------------------------------------------------ 223 // If this object contains shared data, this function returns the offset into 224 // that shared data. Else zero is returned. 225 //------------------------------------------------------------------ 226 size_t DataExtractor::GetSharedDataOffset() const { 227 if (m_start != nullptr) { 228 const DataBuffer *data = m_data_sp.get(); 229 if (data != nullptr) { 230 const uint8_t *data_bytes = data->GetBytes(); 231 if (data_bytes != nullptr) { 232 assert(m_start >= data_bytes); 233 return m_start - data_bytes; 234 } 235 } 236 } 237 return 0; 238 } 239 240 //---------------------------------------------------------------------- 241 // Set the data with which this object will extract from to data starting at 242 // BYTES and set the length of the data to LENGTH bytes long. The data is 243 // externally owned must be around at least as long as this object points to 244 // the data. No copy of the data is made, this object just refers to this data 245 // and can extract from it. If this object refers to any shared data upon 246 // entry, the reference to that data will be released. Is SWAP is set to true, 247 // any data extracted will be endian swapped. 248 //---------------------------------------------------------------------- 249 lldb::offset_t DataExtractor::SetData(const void *bytes, offset_t length, 250 ByteOrder endian) { 251 m_byte_order = endian; 252 m_data_sp.reset(); 253 if (bytes == nullptr || length == 0) { 254 m_start = nullptr; 255 m_end = nullptr; 256 } else { 257 m_start = const_cast<uint8_t *>(reinterpret_cast<const uint8_t *>(bytes)); 258 m_end = m_start + length; 259 } 260 return GetByteSize(); 261 } 262 263 //---------------------------------------------------------------------- 264 // Assign the data for this object to be a subrange in "data" starting 265 // "data_offset" bytes into "data" and ending "data_length" bytes later. If 266 // "data_offset" is not a valid offset into "data", then this object will 267 // contain no bytes. If "data_offset" is within "data" yet "data_length" is too 268 // large, the length will be capped at the number of bytes remaining in "data". 269 // If "data" contains a shared pointer to other data, then a ref counted 270 // pointer to that data will be made in this object. If "data" doesn't contain 271 // a shared pointer to data, then the bytes referred to in "data" will need to 272 // exist at least as long as this object refers to those bytes. The address 273 // size and endian swap settings are copied from the current values in "data". 274 //---------------------------------------------------------------------- 275 lldb::offset_t DataExtractor::SetData(const DataExtractor &data, 276 offset_t data_offset, 277 offset_t data_length) { 278 m_addr_size = data.m_addr_size; 279 #ifdef LLDB_CONFIGURATION_DEBUG 280 assert(m_addr_size == 4 || m_addr_size == 8); 281 #endif 282 // If "data" contains shared pointer to data, then we can use that 283 if (data.m_data_sp) { 284 m_byte_order = data.m_byte_order; 285 return SetData(data.m_data_sp, data.GetSharedDataOffset() + data_offset, 286 data_length); 287 } 288 289 // We have a DataExtractor object that just has a pointer to bytes 290 if (data.ValidOffset(data_offset)) { 291 if (data_length > data.GetByteSize() - data_offset) 292 data_length = data.GetByteSize() - data_offset; 293 return SetData(data.GetDataStart() + data_offset, data_length, 294 data.GetByteOrder()); 295 } 296 return 0; 297 } 298 299 //---------------------------------------------------------------------- 300 // Assign the data for this object to be a subrange of the shared data in 301 // "data_sp" starting "data_offset" bytes into "data_sp" and ending 302 // "data_length" bytes later. If "data_offset" is not a valid offset into 303 // "data_sp", then this object will contain no bytes. If "data_offset" is 304 // within "data_sp" yet "data_length" is too large, the length will be capped 305 // at the number of bytes remaining in "data_sp". A ref counted pointer to the 306 // data in "data_sp" will be made in this object IF the number of bytes this 307 // object refers to in greater than zero (if at least one byte was available 308 // starting at "data_offset") to ensure the data stays around as long as it is 309 // needed. The address size and endian swap settings will remain unchanged from 310 // their current settings. 311 //---------------------------------------------------------------------- 312 lldb::offset_t DataExtractor::SetData(const DataBufferSP &data_sp, 313 offset_t data_offset, 314 offset_t data_length) { 315 m_start = m_end = nullptr; 316 317 if (data_length > 0) { 318 m_data_sp = data_sp; 319 if (data_sp) { 320 const size_t data_size = data_sp->GetByteSize(); 321 if (data_offset < data_size) { 322 m_start = data_sp->GetBytes() + data_offset; 323 const size_t bytes_left = data_size - data_offset; 324 // Cap the length of we asked for too many 325 if (data_length <= bytes_left) 326 m_end = m_start + data_length; // We got all the bytes we wanted 327 else 328 m_end = m_start + bytes_left; // Not all the bytes requested were 329 // available in the shared data 330 } 331 } 332 } 333 334 size_t new_size = GetByteSize(); 335 336 // Don't hold a shared pointer to the data buffer if we don't share any valid 337 // bytes in the shared buffer. 338 if (new_size == 0) 339 m_data_sp.reset(); 340 341 return new_size; 342 } 343 344 //---------------------------------------------------------------------- 345 // Extract a single unsigned char from the binary data and update the offset 346 // pointed to by "offset_ptr". 347 // 348 // RETURNS the byte that was extracted, or zero on failure. 349 //---------------------------------------------------------------------- 350 uint8_t DataExtractor::GetU8(offset_t *offset_ptr) const { 351 const uint8_t *data = (const uint8_t *)GetData(offset_ptr, 1); 352 if (data) 353 return *data; 354 return 0; 355 } 356 357 //---------------------------------------------------------------------- 358 // Extract "count" unsigned chars from the binary data and update the offset 359 // pointed to by "offset_ptr". The extracted data is copied into "dst". 360 // 361 // RETURNS the non-nullptr buffer pointer upon successful extraction of 362 // all the requested bytes, or nullptr when the data is not available in the 363 // buffer due to being out of bounds, or insufficient data. 364 //---------------------------------------------------------------------- 365 void *DataExtractor::GetU8(offset_t *offset_ptr, void *dst, 366 uint32_t count) const { 367 const uint8_t *data = (const uint8_t *)GetData(offset_ptr, count); 368 if (data) { 369 // Copy the data into the buffer 370 memcpy(dst, data, count); 371 // Return a non-nullptr pointer to the converted data as an indicator of 372 // success 373 return dst; 374 } 375 return nullptr; 376 } 377 378 //---------------------------------------------------------------------- 379 // Extract a single uint16_t from the data and update the offset pointed to by 380 // "offset_ptr". 381 // 382 // RETURNS the uint16_t that was extracted, or zero on failure. 383 //---------------------------------------------------------------------- 384 uint16_t DataExtractor::GetU16(offset_t *offset_ptr) const { 385 uint16_t val = 0; 386 const uint8_t *data = (const uint8_t *)GetData(offset_ptr, sizeof(val)); 387 if (data) { 388 if (m_byte_order != endian::InlHostByteOrder()) 389 val = ReadSwapInt16(data); 390 else 391 val = ReadInt16(data); 392 } 393 return val; 394 } 395 396 uint16_t DataExtractor::GetU16_unchecked(offset_t *offset_ptr) const { 397 uint16_t val; 398 if (m_byte_order == endian::InlHostByteOrder()) 399 val = ReadInt16(m_start, *offset_ptr); 400 else 401 val = ReadSwapInt16(m_start, *offset_ptr); 402 *offset_ptr += sizeof(val); 403 return val; 404 } 405 406 uint32_t DataExtractor::GetU32_unchecked(offset_t *offset_ptr) const { 407 uint32_t val; 408 if (m_byte_order == endian::InlHostByteOrder()) 409 val = ReadInt32(m_start, *offset_ptr); 410 else 411 val = ReadSwapInt32(m_start, *offset_ptr); 412 *offset_ptr += sizeof(val); 413 return val; 414 } 415 416 uint64_t DataExtractor::GetU64_unchecked(offset_t *offset_ptr) const { 417 uint64_t val; 418 if (m_byte_order == endian::InlHostByteOrder()) 419 val = ReadInt64(m_start, *offset_ptr); 420 else 421 val = ReadSwapInt64(m_start, *offset_ptr); 422 *offset_ptr += sizeof(val); 423 return val; 424 } 425 426 //---------------------------------------------------------------------- 427 // Extract "count" uint16_t values from the binary data and update the offset 428 // pointed to by "offset_ptr". The extracted data is copied into "dst". 429 // 430 // RETURNS the non-nullptr buffer pointer upon successful extraction of 431 // all the requested bytes, or nullptr when the data is not available in the 432 // buffer due to being out of bounds, or insufficient data. 433 //---------------------------------------------------------------------- 434 void *DataExtractor::GetU16(offset_t *offset_ptr, void *void_dst, 435 uint32_t count) const { 436 const size_t src_size = sizeof(uint16_t) * count; 437 const uint16_t *src = (const uint16_t *)GetData(offset_ptr, src_size); 438 if (src) { 439 if (m_byte_order != endian::InlHostByteOrder()) { 440 uint16_t *dst_pos = (uint16_t *)void_dst; 441 uint16_t *dst_end = dst_pos + count; 442 const uint16_t *src_pos = src; 443 while (dst_pos < dst_end) { 444 *dst_pos = ReadSwapInt16(src_pos); 445 ++dst_pos; 446 ++src_pos; 447 } 448 } else { 449 memcpy(void_dst, src, src_size); 450 } 451 // Return a non-nullptr pointer to the converted data as an indicator of 452 // success 453 return void_dst; 454 } 455 return nullptr; 456 } 457 458 //---------------------------------------------------------------------- 459 // Extract a single uint32_t from the data and update the offset pointed to by 460 // "offset_ptr". 461 // 462 // RETURNS the uint32_t that was extracted, or zero on failure. 463 //---------------------------------------------------------------------- 464 uint32_t DataExtractor::GetU32(offset_t *offset_ptr) const { 465 uint32_t val = 0; 466 const uint8_t *data = (const uint8_t *)GetData(offset_ptr, sizeof(val)); 467 if (data) { 468 if (m_byte_order != endian::InlHostByteOrder()) { 469 val = ReadSwapInt32(data); 470 } else { 471 memcpy(&val, data, 4); 472 } 473 } 474 return val; 475 } 476 477 //---------------------------------------------------------------------- 478 // Extract "count" uint32_t values from the binary data and update the offset 479 // pointed to by "offset_ptr". The extracted data is copied into "dst". 480 // 481 // RETURNS the non-nullptr buffer pointer upon successful extraction of 482 // all the requested bytes, or nullptr when the data is not available in the 483 // buffer due to being out of bounds, or insufficient data. 484 //---------------------------------------------------------------------- 485 void *DataExtractor::GetU32(offset_t *offset_ptr, void *void_dst, 486 uint32_t count) const { 487 const size_t src_size = sizeof(uint32_t) * count; 488 const uint32_t *src = (const uint32_t *)GetData(offset_ptr, src_size); 489 if (src) { 490 if (m_byte_order != endian::InlHostByteOrder()) { 491 uint32_t *dst_pos = (uint32_t *)void_dst; 492 uint32_t *dst_end = dst_pos + count; 493 const uint32_t *src_pos = src; 494 while (dst_pos < dst_end) { 495 *dst_pos = ReadSwapInt32(src_pos); 496 ++dst_pos; 497 ++src_pos; 498 } 499 } else { 500 memcpy(void_dst, src, src_size); 501 } 502 // Return a non-nullptr pointer to the converted data as an indicator of 503 // success 504 return void_dst; 505 } 506 return nullptr; 507 } 508 509 //---------------------------------------------------------------------- 510 // Extract a single uint64_t from the data and update the offset pointed to by 511 // "offset_ptr". 512 // 513 // RETURNS the uint64_t that was extracted, or zero on failure. 514 //---------------------------------------------------------------------- 515 uint64_t DataExtractor::GetU64(offset_t *offset_ptr) const { 516 uint64_t val = 0; 517 const uint8_t *data = (const uint8_t *)GetData(offset_ptr, sizeof(val)); 518 if (data) { 519 if (m_byte_order != endian::InlHostByteOrder()) { 520 val = ReadSwapInt64(data); 521 } else { 522 memcpy(&val, data, 8); 523 } 524 } 525 return val; 526 } 527 528 //---------------------------------------------------------------------- 529 // GetU64 530 // 531 // Get multiple consecutive 64 bit values. Return true if the entire read 532 // succeeds and increment the offset pointed to by offset_ptr, else return 533 // false and leave the offset pointed to by offset_ptr unchanged. 534 //---------------------------------------------------------------------- 535 void *DataExtractor::GetU64(offset_t *offset_ptr, void *void_dst, 536 uint32_t count) const { 537 const size_t src_size = sizeof(uint64_t) * count; 538 const uint64_t *src = (const uint64_t *)GetData(offset_ptr, src_size); 539 if (src) { 540 if (m_byte_order != endian::InlHostByteOrder()) { 541 uint64_t *dst_pos = (uint64_t *)void_dst; 542 uint64_t *dst_end = dst_pos + count; 543 const uint64_t *src_pos = src; 544 while (dst_pos < dst_end) { 545 *dst_pos = ReadSwapInt64(src_pos); 546 ++dst_pos; 547 ++src_pos; 548 } 549 } else { 550 memcpy(void_dst, src, src_size); 551 } 552 // Return a non-nullptr pointer to the converted data as an indicator of 553 // success 554 return void_dst; 555 } 556 return nullptr; 557 } 558 559 uint32_t DataExtractor::GetMaxU32(offset_t *offset_ptr, 560 size_t byte_size) const { 561 lldbassert(byte_size > 0 && byte_size <= 4 && "GetMaxU32 invalid byte_size!"); 562 return GetMaxU64(offset_ptr, byte_size); 563 } 564 565 uint64_t DataExtractor::GetMaxU64(offset_t *offset_ptr, 566 size_t byte_size) const { 567 lldbassert(byte_size > 0 && byte_size <= 8 && "GetMaxU64 invalid byte_size!"); 568 switch (byte_size) { 569 case 1: 570 return GetU8(offset_ptr); 571 case 2: 572 return GetU16(offset_ptr); 573 case 4: 574 return GetU32(offset_ptr); 575 case 8: 576 return GetU64(offset_ptr); 577 default: { 578 // General case. 579 const uint8_t *data = 580 static_cast<const uint8_t *>(GetData(offset_ptr, byte_size)); 581 if (data == nullptr) 582 return 0; 583 return ReadMaxInt64(data, byte_size, m_byte_order); 584 } 585 } 586 return 0; 587 } 588 589 uint64_t DataExtractor::GetMaxU64_unchecked(offset_t *offset_ptr, 590 size_t byte_size) const { 591 switch (byte_size) { 592 case 1: 593 return GetU8_unchecked(offset_ptr); 594 case 2: 595 return GetU16_unchecked(offset_ptr); 596 case 4: 597 return GetU32_unchecked(offset_ptr); 598 case 8: 599 return GetU64_unchecked(offset_ptr); 600 default: { 601 uint64_t res = ReadMaxInt64(&m_start[*offset_ptr], byte_size, m_byte_order); 602 *offset_ptr += byte_size; 603 return res; 604 } 605 } 606 return 0; 607 } 608 609 int64_t DataExtractor::GetMaxS64(offset_t *offset_ptr, size_t byte_size) const { 610 uint64_t u64 = GetMaxU64(offset_ptr, byte_size); 611 return llvm::SignExtend64(u64, 8 * byte_size); 612 } 613 614 uint64_t DataExtractor::GetMaxU64Bitfield(offset_t *offset_ptr, size_t size, 615 uint32_t bitfield_bit_size, 616 uint32_t bitfield_bit_offset) const { 617 uint64_t uval64 = GetMaxU64(offset_ptr, size); 618 if (bitfield_bit_size > 0) { 619 int32_t lsbcount = bitfield_bit_offset; 620 if (m_byte_order == eByteOrderBig) 621 lsbcount = size * 8 - bitfield_bit_offset - bitfield_bit_size; 622 if (lsbcount > 0) 623 uval64 >>= lsbcount; 624 uint64_t bitfield_mask = ((1ul << bitfield_bit_size) - 1); 625 if (!bitfield_mask && bitfield_bit_offset == 0 && bitfield_bit_size == 64) 626 return uval64; 627 uval64 &= bitfield_mask; 628 } 629 return uval64; 630 } 631 632 int64_t DataExtractor::GetMaxS64Bitfield(offset_t *offset_ptr, size_t size, 633 uint32_t bitfield_bit_size, 634 uint32_t bitfield_bit_offset) const { 635 int64_t sval64 = GetMaxS64(offset_ptr, size); 636 if (bitfield_bit_size > 0) { 637 int32_t lsbcount = bitfield_bit_offset; 638 if (m_byte_order == eByteOrderBig) 639 lsbcount = size * 8 - bitfield_bit_offset - bitfield_bit_size; 640 if (lsbcount > 0) 641 sval64 >>= lsbcount; 642 uint64_t bitfield_mask = (((uint64_t)1) << bitfield_bit_size) - 1; 643 sval64 &= bitfield_mask; 644 // sign extend if needed 645 if (sval64 & (((uint64_t)1) << (bitfield_bit_size - 1))) 646 sval64 |= ~bitfield_mask; 647 } 648 return sval64; 649 } 650 651 float DataExtractor::GetFloat(offset_t *offset_ptr) const { 652 typedef float float_type; 653 float_type val = 0.0; 654 const size_t src_size = sizeof(float_type); 655 const float_type *src = (const float_type *)GetData(offset_ptr, src_size); 656 if (src) { 657 if (m_byte_order != endian::InlHostByteOrder()) { 658 const uint8_t *src_data = (const uint8_t *)src; 659 uint8_t *dst_data = (uint8_t *)&val; 660 for (size_t i = 0; i < sizeof(float_type); ++i) 661 dst_data[sizeof(float_type) - 1 - i] = src_data[i]; 662 } else { 663 val = *src; 664 } 665 } 666 return val; 667 } 668 669 double DataExtractor::GetDouble(offset_t *offset_ptr) const { 670 typedef double float_type; 671 float_type val = 0.0; 672 const size_t src_size = sizeof(float_type); 673 const float_type *src = (const float_type *)GetData(offset_ptr, src_size); 674 if (src) { 675 if (m_byte_order != endian::InlHostByteOrder()) { 676 const uint8_t *src_data = (const uint8_t *)src; 677 uint8_t *dst_data = (uint8_t *)&val; 678 for (size_t i = 0; i < sizeof(float_type); ++i) 679 dst_data[sizeof(float_type) - 1 - i] = src_data[i]; 680 } else { 681 val = *src; 682 } 683 } 684 return val; 685 } 686 687 long double DataExtractor::GetLongDouble(offset_t *offset_ptr) const { 688 long double val = 0.0; 689 #if defined(__i386__) || defined(__amd64__) || defined(__x86_64__) || \ 690 defined(_M_IX86) || defined(_M_IA64) || defined(_M_X64) 691 *offset_ptr += CopyByteOrderedData(*offset_ptr, 10, &val, sizeof(val), 692 endian::InlHostByteOrder()); 693 #else 694 *offset_ptr += CopyByteOrderedData(*offset_ptr, sizeof(val), &val, 695 sizeof(val), endian::InlHostByteOrder()); 696 #endif 697 return val; 698 } 699 700 //------------------------------------------------------------------ 701 // Extract a single address from the data and update the offset pointed to by 702 // "offset_ptr". The size of the extracted address comes from the 703 // "this->m_addr_size" member variable and should be set correctly prior to 704 // extracting any address values. 705 // 706 // RETURNS the address that was extracted, or zero on failure. 707 //------------------------------------------------------------------ 708 uint64_t DataExtractor::GetAddress(offset_t *offset_ptr) const { 709 #ifdef LLDB_CONFIGURATION_DEBUG 710 assert(m_addr_size == 4 || m_addr_size == 8); 711 #endif 712 return GetMaxU64(offset_ptr, m_addr_size); 713 } 714 715 uint64_t DataExtractor::GetAddress_unchecked(offset_t *offset_ptr) const { 716 #ifdef LLDB_CONFIGURATION_DEBUG 717 assert(m_addr_size == 4 || m_addr_size == 8); 718 #endif 719 return GetMaxU64_unchecked(offset_ptr, m_addr_size); 720 } 721 722 //------------------------------------------------------------------ 723 // Extract a single pointer from the data and update the offset pointed to by 724 // "offset_ptr". The size of the extracted pointer comes from the 725 // "this->m_addr_size" member variable and should be set correctly prior to 726 // extracting any pointer values. 727 // 728 // RETURNS the pointer that was extracted, or zero on failure. 729 //------------------------------------------------------------------ 730 uint64_t DataExtractor::GetPointer(offset_t *offset_ptr) const { 731 #ifdef LLDB_CONFIGURATION_DEBUG 732 assert(m_addr_size == 4 || m_addr_size == 8); 733 #endif 734 return GetMaxU64(offset_ptr, m_addr_size); 735 } 736 737 size_t DataExtractor::ExtractBytes(offset_t offset, offset_t length, 738 ByteOrder dst_byte_order, void *dst) const { 739 const uint8_t *src = PeekData(offset, length); 740 if (src) { 741 if (dst_byte_order != GetByteOrder()) { 742 // Validate that only a word- or register-sized dst is byte swapped 743 assert(length == 1 || length == 2 || length == 4 || length == 8 || 744 length == 10 || length == 16 || length == 32); 745 746 for (uint32_t i = 0; i < length; ++i) 747 ((uint8_t *)dst)[i] = src[length - i - 1]; 748 } else 749 ::memcpy(dst, src, length); 750 return length; 751 } 752 return 0; 753 } 754 755 // Extract data as it exists in target memory 756 lldb::offset_t DataExtractor::CopyData(offset_t offset, offset_t length, 757 void *dst) const { 758 const uint8_t *src = PeekData(offset, length); 759 if (src) { 760 ::memcpy(dst, src, length); 761 return length; 762 } 763 return 0; 764 } 765 766 // Extract data and swap if needed when doing the copy 767 lldb::offset_t 768 DataExtractor::CopyByteOrderedData(offset_t src_offset, offset_t src_len, 769 void *dst_void_ptr, offset_t dst_len, 770 ByteOrder dst_byte_order) const { 771 // Validate the source info 772 if (!ValidOffsetForDataOfSize(src_offset, src_len)) 773 assert(ValidOffsetForDataOfSize(src_offset, src_len)); 774 assert(src_len > 0); 775 assert(m_byte_order == eByteOrderBig || m_byte_order == eByteOrderLittle); 776 777 // Validate the destination info 778 assert(dst_void_ptr != nullptr); 779 assert(dst_len > 0); 780 assert(dst_byte_order == eByteOrderBig || dst_byte_order == eByteOrderLittle); 781 782 // Validate that only a word- or register-sized dst is byte swapped 783 assert(dst_byte_order == m_byte_order || dst_len == 1 || dst_len == 2 || 784 dst_len == 4 || dst_len == 8 || dst_len == 10 || dst_len == 16 || 785 dst_len == 32); 786 787 // Must have valid byte orders set in this object and for destination 788 if (!(dst_byte_order == eByteOrderBig || 789 dst_byte_order == eByteOrderLittle) || 790 !(m_byte_order == eByteOrderBig || m_byte_order == eByteOrderLittle)) 791 return 0; 792 793 uint8_t *dst = (uint8_t *)dst_void_ptr; 794 const uint8_t *src = (const uint8_t *)PeekData(src_offset, src_len); 795 if (src) { 796 if (dst_len >= src_len) { 797 // We are copying the entire value from src into dst. Calculate how many, 798 // if any, zeroes we need for the most significant bytes if "dst_len" is 799 // greater than "src_len"... 800 const size_t num_zeroes = dst_len - src_len; 801 if (dst_byte_order == eByteOrderBig) { 802 // Big endian, so we lead with zeroes... 803 if (num_zeroes > 0) 804 ::memset(dst, 0, num_zeroes); 805 // Then either copy or swap the rest 806 if (m_byte_order == eByteOrderBig) { 807 ::memcpy(dst + num_zeroes, src, src_len); 808 } else { 809 for (uint32_t i = 0; i < src_len; ++i) 810 dst[i + num_zeroes] = src[src_len - 1 - i]; 811 } 812 } else { 813 // Little endian destination, so we lead the value bytes 814 if (m_byte_order == eByteOrderBig) { 815 for (uint32_t i = 0; i < src_len; ++i) 816 dst[i] = src[src_len - 1 - i]; 817 } else { 818 ::memcpy(dst, src, src_len); 819 } 820 // And zero the rest... 821 if (num_zeroes > 0) 822 ::memset(dst + src_len, 0, num_zeroes); 823 } 824 return src_len; 825 } else { 826 // We are only copying some of the value from src into dst.. 827 828 if (dst_byte_order == eByteOrderBig) { 829 // Big endian dst 830 if (m_byte_order == eByteOrderBig) { 831 // Big endian dst, with big endian src 832 ::memcpy(dst, src + (src_len - dst_len), dst_len); 833 } else { 834 // Big endian dst, with little endian src 835 for (uint32_t i = 0; i < dst_len; ++i) 836 dst[i] = src[dst_len - 1 - i]; 837 } 838 } else { 839 // Little endian dst 840 if (m_byte_order == eByteOrderBig) { 841 // Little endian dst, with big endian src 842 for (uint32_t i = 0; i < dst_len; ++i) 843 dst[i] = src[src_len - 1 - i]; 844 } else { 845 // Little endian dst, with big endian src 846 ::memcpy(dst, src, dst_len); 847 } 848 } 849 return dst_len; 850 } 851 } 852 return 0; 853 } 854 855 //---------------------------------------------------------------------- 856 // Extracts a variable length NULL terminated C string from the data at the 857 // offset pointed to by "offset_ptr". The "offset_ptr" will be updated with 858 // the offset of the byte that follows the NULL terminator byte. 859 // 860 // If the offset pointed to by "offset_ptr" is out of bounds, or if "length" is 861 // non-zero and there aren't enough available bytes, nullptr will be returned 862 // and "offset_ptr" will not be updated. 863 //---------------------------------------------------------------------- 864 const char *DataExtractor::GetCStr(offset_t *offset_ptr) const { 865 const char *cstr = (const char *)PeekData(*offset_ptr, 1); 866 if (cstr) { 867 const char *cstr_end = cstr; 868 const char *end = (const char *)m_end; 869 while (cstr_end < end && *cstr_end) 870 ++cstr_end; 871 872 // Now we are either at the end of the data or we point to the 873 // NULL C string terminator with cstr_end... 874 if (*cstr_end == '\0') { 875 // Advance the offset with one extra byte for the NULL terminator 876 *offset_ptr += (cstr_end - cstr + 1); 877 return cstr; 878 } 879 880 // We reached the end of the data without finding a NULL C string 881 // terminator. Fall through and return nullptr otherwise anyone that would 882 // have used the result as a C string can wander into unknown memory... 883 } 884 return nullptr; 885 } 886 887 //---------------------------------------------------------------------- 888 // Extracts a NULL terminated C string from the fixed length field of length 889 // "len" at the offset pointed to by "offset_ptr". The "offset_ptr" will be 890 // updated with the offset of the byte that follows the fixed length field. 891 // 892 // If the offset pointed to by "offset_ptr" is out of bounds, or if the offset 893 // plus the length of the field is out of bounds, or if the field does not 894 // contain a NULL terminator byte, nullptr will be returned and "offset_ptr" 895 // will not be updated. 896 //---------------------------------------------------------------------- 897 const char *DataExtractor::GetCStr(offset_t *offset_ptr, offset_t len) const { 898 const char *cstr = (const char *)PeekData(*offset_ptr, len); 899 if (cstr != nullptr) { 900 if (memchr(cstr, '\0', len) == nullptr) { 901 return nullptr; 902 } 903 *offset_ptr += len; 904 return cstr; 905 } 906 return nullptr; 907 } 908 909 //------------------------------------------------------------------ 910 // Peeks at a string in the contained data. No verification is done to make 911 // sure the entire string lies within the bounds of this object's data, only 912 // "offset" is verified to be a valid offset. 913 // 914 // Returns a valid C string pointer if "offset" is a valid offset in this 915 // object's data, else nullptr is returned. 916 //------------------------------------------------------------------ 917 const char *DataExtractor::PeekCStr(offset_t offset) const { 918 return (const char *)PeekData(offset, 1); 919 } 920 921 //---------------------------------------------------------------------- 922 // Extracts an unsigned LEB128 number from this object's data starting at the 923 // offset pointed to by "offset_ptr". The offset pointed to by "offset_ptr" 924 // will be updated with the offset of the byte following the last extracted 925 // byte. 926 // 927 // Returned the extracted integer value. 928 //---------------------------------------------------------------------- 929 uint64_t DataExtractor::GetULEB128(offset_t *offset_ptr) const { 930 const uint8_t *src = (const uint8_t *)PeekData(*offset_ptr, 1); 931 if (src == nullptr) 932 return 0; 933 934 const uint8_t *end = m_end; 935 936 if (src < end) { 937 uint64_t result = *src++; 938 if (result >= 0x80) { 939 result &= 0x7f; 940 int shift = 7; 941 while (src < end) { 942 uint8_t byte = *src++; 943 result |= (uint64_t)(byte & 0x7f) << shift; 944 if ((byte & 0x80) == 0) 945 break; 946 shift += 7; 947 } 948 } 949 *offset_ptr = src - m_start; 950 return result; 951 } 952 953 return 0; 954 } 955 956 //---------------------------------------------------------------------- 957 // Extracts an signed LEB128 number from this object's data starting at the 958 // offset pointed to by "offset_ptr". The offset pointed to by "offset_ptr" 959 // will be updated with the offset of the byte following the last extracted 960 // byte. 961 // 962 // Returned the extracted integer value. 963 //---------------------------------------------------------------------- 964 int64_t DataExtractor::GetSLEB128(offset_t *offset_ptr) const { 965 const uint8_t *src = (const uint8_t *)PeekData(*offset_ptr, 1); 966 if (src == nullptr) 967 return 0; 968 969 const uint8_t *end = m_end; 970 971 if (src < end) { 972 int64_t result = 0; 973 int shift = 0; 974 int size = sizeof(int64_t) * 8; 975 976 uint8_t byte = 0; 977 int bytecount = 0; 978 979 while (src < end) { 980 bytecount++; 981 byte = *src++; 982 result |= (int64_t)(byte & 0x7f) << shift; 983 shift += 7; 984 if ((byte & 0x80) == 0) 985 break; 986 } 987 988 // Sign bit of byte is 2nd high order bit (0x40) 989 if (shift < size && (byte & 0x40)) 990 result |= -(1 << shift); 991 992 *offset_ptr += bytecount; 993 return result; 994 } 995 return 0; 996 } 997 998 //---------------------------------------------------------------------- 999 // Skips a ULEB128 number (signed or unsigned) from this object's data starting 1000 // at the offset pointed to by "offset_ptr". The offset pointed to by 1001 // "offset_ptr" will be updated with the offset of the byte following the last 1002 // extracted byte. 1003 // 1004 // Returns the number of bytes consumed during the extraction. 1005 //---------------------------------------------------------------------- 1006 uint32_t DataExtractor::Skip_LEB128(offset_t *offset_ptr) const { 1007 uint32_t bytes_consumed = 0; 1008 const uint8_t *src = (const uint8_t *)PeekData(*offset_ptr, 1); 1009 if (src == nullptr) 1010 return 0; 1011 1012 const uint8_t *end = m_end; 1013 1014 if (src < end) { 1015 const uint8_t *src_pos = src; 1016 while ((src_pos < end) && (*src_pos++ & 0x80)) 1017 ++bytes_consumed; 1018 *offset_ptr += src_pos - src; 1019 } 1020 return bytes_consumed; 1021 } 1022 1023 //---------------------------------------------------------------------- 1024 // Dumps bytes from this object's data to the stream "s" starting 1025 // "start_offset" bytes into this data, and ending with the byte before 1026 // "end_offset". "base_addr" will be added to the offset into the dumped data 1027 // when showing the offset into the data in the output information. 1028 // "num_per_line" objects of type "type" will be dumped with the option to 1029 // override the format for each object with "type_format". "type_format" is a 1030 // printf style formatting string. If "type_format" is nullptr, then an 1031 // appropriate format string will be used for the supplied "type". If the 1032 // stream "s" is nullptr, then the output will be send to Log(). 1033 //---------------------------------------------------------------------- 1034 lldb::offset_t DataExtractor::PutToLog(Log *log, offset_t start_offset, 1035 offset_t length, uint64_t base_addr, 1036 uint32_t num_per_line, 1037 DataExtractor::Type type, 1038 const char *format) const { 1039 if (log == nullptr) 1040 return start_offset; 1041 1042 offset_t offset; 1043 offset_t end_offset; 1044 uint32_t count; 1045 StreamString sstr; 1046 for (offset = start_offset, end_offset = offset + length, count = 0; 1047 ValidOffset(offset) && offset < end_offset; ++count) { 1048 if ((count % num_per_line) == 0) { 1049 // Print out any previous string 1050 if (sstr.GetSize() > 0) { 1051 log->PutString(sstr.GetString()); 1052 sstr.Clear(); 1053 } 1054 // Reset string offset and fill the current line string with address: 1055 if (base_addr != LLDB_INVALID_ADDRESS) 1056 sstr.Printf("0x%8.8" PRIx64 ":", 1057 (uint64_t)(base_addr + (offset - start_offset))); 1058 } 1059 1060 switch (type) { 1061 case TypeUInt8: 1062 sstr.Printf(format ? format : " %2.2x", GetU8(&offset)); 1063 break; 1064 case TypeChar: { 1065 char ch = GetU8(&offset); 1066 sstr.Printf(format ? format : " %c", isprint(ch) ? ch : ' '); 1067 } break; 1068 case TypeUInt16: 1069 sstr.Printf(format ? format : " %4.4x", GetU16(&offset)); 1070 break; 1071 case TypeUInt32: 1072 sstr.Printf(format ? format : " %8.8x", GetU32(&offset)); 1073 break; 1074 case TypeUInt64: 1075 sstr.Printf(format ? format : " %16.16" PRIx64, GetU64(&offset)); 1076 break; 1077 case TypePointer: 1078 sstr.Printf(format ? format : " 0x%" PRIx64, GetAddress(&offset)); 1079 break; 1080 case TypeULEB128: 1081 sstr.Printf(format ? format : " 0x%" PRIx64, GetULEB128(&offset)); 1082 break; 1083 case TypeSLEB128: 1084 sstr.Printf(format ? format : " %" PRId64, GetSLEB128(&offset)); 1085 break; 1086 } 1087 } 1088 1089 if (!sstr.Empty()) 1090 log->PutString(sstr.GetString()); 1091 1092 return offset; // Return the offset at which we ended up 1093 } 1094 1095 size_t DataExtractor::Copy(DataExtractor &dest_data) const { 1096 if (m_data_sp) { 1097 // we can pass along the SP to the data 1098 dest_data.SetData(m_data_sp); 1099 } else { 1100 const uint8_t *base_ptr = m_start; 1101 size_t data_size = GetByteSize(); 1102 dest_data.SetData(DataBufferSP(new DataBufferHeap(base_ptr, data_size))); 1103 } 1104 return GetByteSize(); 1105 } 1106 1107 bool DataExtractor::Append(DataExtractor &rhs) { 1108 if (rhs.GetByteOrder() != GetByteOrder()) 1109 return false; 1110 1111 if (rhs.GetByteSize() == 0) 1112 return true; 1113 1114 if (GetByteSize() == 0) 1115 return (rhs.Copy(*this) > 0); 1116 1117 size_t bytes = GetByteSize() + rhs.GetByteSize(); 1118 1119 DataBufferHeap *buffer_heap_ptr = nullptr; 1120 DataBufferSP buffer_sp(buffer_heap_ptr = new DataBufferHeap(bytes, 0)); 1121 1122 if (!buffer_sp || buffer_heap_ptr == nullptr) 1123 return false; 1124 1125 uint8_t *bytes_ptr = buffer_heap_ptr->GetBytes(); 1126 1127 memcpy(bytes_ptr, GetDataStart(), GetByteSize()); 1128 memcpy(bytes_ptr + GetByteSize(), rhs.GetDataStart(), rhs.GetByteSize()); 1129 1130 SetData(buffer_sp); 1131 1132 return true; 1133 } 1134 1135 bool DataExtractor::Append(void *buf, offset_t length) { 1136 if (buf == nullptr) 1137 return false; 1138 1139 if (length == 0) 1140 return true; 1141 1142 size_t bytes = GetByteSize() + length; 1143 1144 DataBufferHeap *buffer_heap_ptr = nullptr; 1145 DataBufferSP buffer_sp(buffer_heap_ptr = new DataBufferHeap(bytes, 0)); 1146 1147 if (!buffer_sp || buffer_heap_ptr == nullptr) 1148 return false; 1149 1150 uint8_t *bytes_ptr = buffer_heap_ptr->GetBytes(); 1151 1152 if (GetByteSize() > 0) 1153 memcpy(bytes_ptr, GetDataStart(), GetByteSize()); 1154 1155 memcpy(bytes_ptr + GetByteSize(), buf, length); 1156 1157 SetData(buffer_sp); 1158 1159 return true; 1160 } 1161 1162 void DataExtractor::Checksum(llvm::SmallVectorImpl<uint8_t> &dest, 1163 uint64_t max_data) { 1164 if (max_data == 0) 1165 max_data = GetByteSize(); 1166 else 1167 max_data = std::min(max_data, GetByteSize()); 1168 1169 llvm::MD5 md5; 1170 1171 const llvm::ArrayRef<uint8_t> data(GetDataStart(), max_data); 1172 md5.update(data); 1173 1174 llvm::MD5::MD5Result result; 1175 md5.final(result); 1176 1177 dest.clear(); 1178 dest.append(result.Bytes.begin(), result.Bytes.end()); 1179 } 1180