1 //===-- DataExtractor.cpp ---------------------------------------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 // C Includes
11 // C++ Includes
12 #include <cassert>
13 #include <cmath>
14 #include <cstddef>
15 #include <string>
16 
17 // Project includes
18 #include "lldb/Utility/DataBuffer.h"
19 #include "lldb/Utility/DataBufferHeap.h"
20 #include "lldb/Utility/DataExtractor.h"
21 #include "lldb/Utility/Endian.h"
22 #include "lldb/Utility/Log.h"
23 #include "lldb/Utility/Stream.h"
24 #include "lldb/Utility/StreamString.h"
25 #include "lldb/Utility/UUID.h"
26 
27 // Other libraries and framework includes
28 #include "llvm/ADT/ArrayRef.h"
29 #include "llvm/ADT/SmallVector.h"
30 #include "llvm/Support/MD5.h"
31 #include "llvm/Support/MathExtras.h"
32 
33 using namespace lldb;
34 using namespace lldb_private;
35 
36 static inline uint16_t ReadInt16(const unsigned char *ptr, offset_t offset) {
37   uint16_t value;
38   memcpy(&value, ptr + offset, 2);
39   return value;
40 }
41 
42 static inline uint32_t ReadInt32(const unsigned char *ptr,
43                                  offset_t offset = 0) {
44   uint32_t value;
45   memcpy(&value, ptr + offset, 4);
46   return value;
47 }
48 
49 static inline uint64_t ReadInt64(const unsigned char *ptr,
50                                  offset_t offset = 0) {
51   uint64_t value;
52   memcpy(&value, ptr + offset, 8);
53   return value;
54 }
55 
56 static inline uint16_t ReadInt16(const void *ptr) {
57   uint16_t value;
58   memcpy(&value, ptr, 2);
59   return value;
60 }
61 
62 static inline uint16_t ReadSwapInt16(const unsigned char *ptr,
63                                      offset_t offset) {
64   uint16_t value;
65   memcpy(&value, ptr + offset, 2);
66   return llvm::ByteSwap_16(value);
67 }
68 
69 static inline uint32_t ReadSwapInt32(const unsigned char *ptr,
70                                      offset_t offset) {
71   uint32_t value;
72   memcpy(&value, ptr + offset, 4);
73   return llvm::ByteSwap_32(value);
74 }
75 
76 static inline uint64_t ReadSwapInt64(const unsigned char *ptr,
77                                      offset_t offset) {
78   uint64_t value;
79   memcpy(&value, ptr + offset, 8);
80   return llvm::ByteSwap_64(value);
81 }
82 
83 static inline uint16_t ReadSwapInt16(const void *ptr) {
84   uint16_t value;
85   memcpy(&value, ptr, 2);
86   return llvm::ByteSwap_16(value);
87 }
88 
89 static inline uint32_t ReadSwapInt32(const void *ptr) {
90   uint32_t value;
91   memcpy(&value, ptr, 4);
92   return llvm::ByteSwap_32(value);
93 }
94 
95 static inline uint64_t ReadSwapInt64(const void *ptr) {
96   uint64_t value;
97   memcpy(&value, ptr, 8);
98   return llvm::ByteSwap_64(value);
99 }
100 
101 DataExtractor::DataExtractor()
102     : m_start(nullptr), m_end(nullptr),
103       m_byte_order(endian::InlHostByteOrder()), m_addr_size(sizeof(void *)),
104       m_data_sp(), m_target_byte_size(1) {}
105 
106 //----------------------------------------------------------------------
107 // This constructor allows us to use data that is owned by someone else.
108 // The data must stay around as long as this object is valid.
109 //----------------------------------------------------------------------
110 DataExtractor::DataExtractor(const void *data, offset_t length,
111                              ByteOrder endian, uint32_t addr_size,
112                              uint32_t target_byte_size /*=1*/)
113     : m_start(const_cast<uint8_t *>(reinterpret_cast<const uint8_t *>(data))),
114       m_end(const_cast<uint8_t *>(reinterpret_cast<const uint8_t *>(data)) +
115             length),
116       m_byte_order(endian), m_addr_size(addr_size), m_data_sp(),
117       m_target_byte_size(target_byte_size) {
118 #ifdef LLDB_CONFIGURATION_DEBUG
119   assert(addr_size == 4 || addr_size == 8);
120 #endif
121 }
122 
123 //----------------------------------------------------------------------
124 // Make a shared pointer reference to the shared data in "data_sp" and
125 // set the endian swapping setting to "swap", and the address size to
126 // "addr_size". The shared data reference will ensure the data lives
127 // as long as any DataExtractor objects exist that have a reference to
128 // this data.
129 //----------------------------------------------------------------------
130 DataExtractor::DataExtractor(const DataBufferSP &data_sp, ByteOrder endian,
131                              uint32_t addr_size,
132                              uint32_t target_byte_size /*=1*/)
133     : m_start(nullptr), m_end(nullptr), m_byte_order(endian),
134       m_addr_size(addr_size), m_data_sp(),
135       m_target_byte_size(target_byte_size) {
136 #ifdef LLDB_CONFIGURATION_DEBUG
137   assert(addr_size == 4 || addr_size == 8);
138 #endif
139   SetData(data_sp);
140 }
141 
142 //----------------------------------------------------------------------
143 // Initialize this object with a subset of the data bytes in "data".
144 // If "data" contains shared data, then a reference to this shared
145 // data will added and the shared data will stay around as long
146 // as any object contains a reference to that data. The endian
147 // swap and address size settings are copied from "data".
148 //----------------------------------------------------------------------
149 DataExtractor::DataExtractor(const DataExtractor &data, offset_t offset,
150                              offset_t length, uint32_t target_byte_size /*=1*/)
151     : m_start(nullptr), m_end(nullptr), m_byte_order(data.m_byte_order),
152       m_addr_size(data.m_addr_size), m_data_sp(),
153       m_target_byte_size(target_byte_size) {
154 #ifdef LLDB_CONFIGURATION_DEBUG
155   assert(m_addr_size == 4 || m_addr_size == 8);
156 #endif
157   if (data.ValidOffset(offset)) {
158     offset_t bytes_available = data.GetByteSize() - offset;
159     if (length > bytes_available)
160       length = bytes_available;
161     SetData(data, offset, length);
162   }
163 }
164 
165 DataExtractor::DataExtractor(const DataExtractor &rhs)
166     : m_start(rhs.m_start), m_end(rhs.m_end), m_byte_order(rhs.m_byte_order),
167       m_addr_size(rhs.m_addr_size), m_data_sp(rhs.m_data_sp),
168       m_target_byte_size(rhs.m_target_byte_size) {
169 #ifdef LLDB_CONFIGURATION_DEBUG
170   assert(m_addr_size == 4 || m_addr_size == 8);
171 #endif
172 }
173 
174 //----------------------------------------------------------------------
175 // Assignment operator
176 //----------------------------------------------------------------------
177 const DataExtractor &DataExtractor::operator=(const DataExtractor &rhs) {
178   if (this != &rhs) {
179     m_start = rhs.m_start;
180     m_end = rhs.m_end;
181     m_byte_order = rhs.m_byte_order;
182     m_addr_size = rhs.m_addr_size;
183     m_data_sp = rhs.m_data_sp;
184   }
185   return *this;
186 }
187 
188 DataExtractor::~DataExtractor() = default;
189 
190 //------------------------------------------------------------------
191 // Clears the object contents back to a default invalid state, and
192 // release any references to shared data that this object may
193 // contain.
194 //------------------------------------------------------------------
195 void DataExtractor::Clear() {
196   m_start = nullptr;
197   m_end = nullptr;
198   m_byte_order = endian::InlHostByteOrder();
199   m_addr_size = sizeof(void *);
200   m_data_sp.reset();
201 }
202 
203 //------------------------------------------------------------------
204 // If this object contains shared data, this function returns the
205 // offset into that shared data. Else zero is returned.
206 //------------------------------------------------------------------
207 size_t DataExtractor::GetSharedDataOffset() const {
208   if (m_start != nullptr) {
209     const DataBuffer *data = m_data_sp.get();
210     if (data != nullptr) {
211       const uint8_t *data_bytes = data->GetBytes();
212       if (data_bytes != nullptr) {
213         assert(m_start >= data_bytes);
214         return m_start - data_bytes;
215       }
216     }
217   }
218   return 0;
219 }
220 
221 //----------------------------------------------------------------------
222 // Set the data with which this object will extract from to data
223 // starting at BYTES and set the length of the data to LENGTH bytes
224 // long. The data is externally owned must be around at least as
225 // long as this object points to the data. No copy of the data is
226 // made, this object just refers to this data and can extract from
227 // it. If this object refers to any shared data upon entry, the
228 // reference to that data will be released. Is SWAP is set to true,
229 // any data extracted will be endian swapped.
230 //----------------------------------------------------------------------
231 lldb::offset_t DataExtractor::SetData(const void *bytes, offset_t length,
232                                       ByteOrder endian) {
233   m_byte_order = endian;
234   m_data_sp.reset();
235   if (bytes == nullptr || length == 0) {
236     m_start = nullptr;
237     m_end = nullptr;
238   } else {
239     m_start = const_cast<uint8_t *>(reinterpret_cast<const uint8_t *>(bytes));
240     m_end = m_start + length;
241   }
242   return GetByteSize();
243 }
244 
245 //----------------------------------------------------------------------
246 // Assign the data for this object to be a subrange in "data"
247 // starting "data_offset" bytes into "data" and ending "data_length"
248 // bytes later. If "data_offset" is not a valid offset into "data",
249 // then this object will contain no bytes. If "data_offset" is
250 // within "data" yet "data_length" is too large, the length will be
251 // capped at the number of bytes remaining in "data". If "data"
252 // contains a shared pointer to other data, then a ref counted
253 // pointer to that data will be made in this object. If "data"
254 // doesn't contain a shared pointer to data, then the bytes referred
255 // to in "data" will need to exist at least as long as this object
256 // refers to those bytes. The address size and endian swap settings
257 // are copied from the current values in "data".
258 //----------------------------------------------------------------------
259 lldb::offset_t DataExtractor::SetData(const DataExtractor &data,
260                                       offset_t data_offset,
261                                       offset_t data_length) {
262   m_addr_size = data.m_addr_size;
263 #ifdef LLDB_CONFIGURATION_DEBUG
264   assert(m_addr_size == 4 || m_addr_size == 8);
265 #endif
266   // If "data" contains shared pointer to data, then we can use that
267   if (data.m_data_sp) {
268     m_byte_order = data.m_byte_order;
269     return SetData(data.m_data_sp, data.GetSharedDataOffset() + data_offset,
270                    data_length);
271   }
272 
273   // We have a DataExtractor object that just has a pointer to bytes
274   if (data.ValidOffset(data_offset)) {
275     if (data_length > data.GetByteSize() - data_offset)
276       data_length = data.GetByteSize() - data_offset;
277     return SetData(data.GetDataStart() + data_offset, data_length,
278                    data.GetByteOrder());
279   }
280   return 0;
281 }
282 
283 //----------------------------------------------------------------------
284 // Assign the data for this object to be a subrange of the shared
285 // data in "data_sp" starting "data_offset" bytes into "data_sp"
286 // and ending "data_length" bytes later. If "data_offset" is not
287 // a valid offset into "data_sp", then this object will contain no
288 // bytes. If "data_offset" is within "data_sp" yet "data_length" is
289 // too large, the length will be capped at the number of bytes
290 // remaining in "data_sp". A ref counted pointer to the data in
291 // "data_sp" will be made in this object IF the number of bytes this
292 // object refers to in greater than zero (if at least one byte was
293 // available starting at "data_offset") to ensure the data stays
294 // around as long as it is needed. The address size and endian swap
295 // settings will remain unchanged from their current settings.
296 //----------------------------------------------------------------------
297 lldb::offset_t DataExtractor::SetData(const DataBufferSP &data_sp,
298                                       offset_t data_offset,
299                                       offset_t data_length) {
300   m_start = m_end = nullptr;
301 
302   if (data_length > 0) {
303     m_data_sp = data_sp;
304     if (data_sp) {
305       const size_t data_size = data_sp->GetByteSize();
306       if (data_offset < data_size) {
307         m_start = data_sp->GetBytes() + data_offset;
308         const size_t bytes_left = data_size - data_offset;
309         // Cap the length of we asked for too many
310         if (data_length <= bytes_left)
311           m_end = m_start + data_length; // We got all the bytes we wanted
312         else
313           m_end = m_start + bytes_left; // Not all the bytes requested were
314                                         // available in the shared data
315       }
316     }
317   }
318 
319   size_t new_size = GetByteSize();
320 
321   // Don't hold a shared pointer to the data buffer if we don't share
322   // any valid bytes in the shared buffer.
323   if (new_size == 0)
324     m_data_sp.reset();
325 
326   return new_size;
327 }
328 
329 //----------------------------------------------------------------------
330 // Extract a single unsigned char from the binary data and update
331 // the offset pointed to by "offset_ptr".
332 //
333 // RETURNS the byte that was extracted, or zero on failure.
334 //----------------------------------------------------------------------
335 uint8_t DataExtractor::GetU8(offset_t *offset_ptr) const {
336   const uint8_t *data = (const uint8_t *)GetData(offset_ptr, 1);
337   if (data)
338     return *data;
339   return 0;
340 }
341 
342 //----------------------------------------------------------------------
343 // Extract "count" unsigned chars from the binary data and update the
344 // offset pointed to by "offset_ptr". The extracted data is copied into
345 // "dst".
346 //
347 // RETURNS the non-nullptr buffer pointer upon successful extraction of
348 // all the requested bytes, or nullptr when the data is not available in
349 // the buffer due to being out of bounds, or insufficient data.
350 //----------------------------------------------------------------------
351 void *DataExtractor::GetU8(offset_t *offset_ptr, void *dst,
352                            uint32_t count) const {
353   const uint8_t *data = (const uint8_t *)GetData(offset_ptr, count);
354   if (data) {
355     // Copy the data into the buffer
356     memcpy(dst, data, count);
357     // Return a non-nullptr pointer to the converted data as an indicator of
358     // success
359     return dst;
360   }
361   return nullptr;
362 }
363 
364 //----------------------------------------------------------------------
365 // Extract a single uint16_t from the data and update the offset
366 // pointed to by "offset_ptr".
367 //
368 // RETURNS the uint16_t that was extracted, or zero on failure.
369 //----------------------------------------------------------------------
370 uint16_t DataExtractor::GetU16(offset_t *offset_ptr) const {
371   uint16_t val = 0;
372   const uint8_t *data = (const uint8_t *)GetData(offset_ptr, sizeof(val));
373   if (data) {
374     if (m_byte_order != endian::InlHostByteOrder())
375       val = ReadSwapInt16(data);
376     else
377       val = ReadInt16(data);
378   }
379   return val;
380 }
381 
382 uint16_t DataExtractor::GetU16_unchecked(offset_t *offset_ptr) const {
383   uint16_t val;
384   if (m_byte_order == endian::InlHostByteOrder())
385     val = ReadInt16(m_start, *offset_ptr);
386   else
387     val = ReadSwapInt16(m_start, *offset_ptr);
388   *offset_ptr += sizeof(val);
389   return val;
390 }
391 
392 uint32_t DataExtractor::GetU32_unchecked(offset_t *offset_ptr) const {
393   uint32_t val;
394   if (m_byte_order == endian::InlHostByteOrder())
395     val = ReadInt32(m_start, *offset_ptr);
396   else
397     val = ReadSwapInt32(m_start, *offset_ptr);
398   *offset_ptr += sizeof(val);
399   return val;
400 }
401 
402 uint64_t DataExtractor::GetU64_unchecked(offset_t *offset_ptr) const {
403   uint64_t val;
404   if (m_byte_order == endian::InlHostByteOrder())
405     val = ReadInt64(m_start, *offset_ptr);
406   else
407     val = ReadSwapInt64(m_start, *offset_ptr);
408   *offset_ptr += sizeof(val);
409   return val;
410 }
411 
412 //----------------------------------------------------------------------
413 // Extract "count" uint16_t values from the binary data and update
414 // the offset pointed to by "offset_ptr". The extracted data is
415 // copied into "dst".
416 //
417 // RETURNS the non-nullptr buffer pointer upon successful extraction of
418 // all the requested bytes, or nullptr when the data is not available
419 // in the buffer due to being out of bounds, or insufficient data.
420 //----------------------------------------------------------------------
421 void *DataExtractor::GetU16(offset_t *offset_ptr, void *void_dst,
422                             uint32_t count) const {
423   const size_t src_size = sizeof(uint16_t) * count;
424   const uint16_t *src = (const uint16_t *)GetData(offset_ptr, src_size);
425   if (src) {
426     if (m_byte_order != endian::InlHostByteOrder()) {
427       uint16_t *dst_pos = (uint16_t *)void_dst;
428       uint16_t *dst_end = dst_pos + count;
429       const uint16_t *src_pos = src;
430       while (dst_pos < dst_end) {
431         *dst_pos = ReadSwapInt16(src_pos);
432         ++dst_pos;
433         ++src_pos;
434       }
435     } else {
436       memcpy(void_dst, src, src_size);
437     }
438     // Return a non-nullptr pointer to the converted data as an indicator of
439     // success
440     return void_dst;
441   }
442   return nullptr;
443 }
444 
445 //----------------------------------------------------------------------
446 // Extract a single uint32_t from the data and update the offset
447 // pointed to by "offset_ptr".
448 //
449 // RETURNS the uint32_t that was extracted, or zero on failure.
450 //----------------------------------------------------------------------
451 uint32_t DataExtractor::GetU32(offset_t *offset_ptr) const {
452   uint32_t val = 0;
453   const uint8_t *data = (const uint8_t *)GetData(offset_ptr, sizeof(val));
454   if (data) {
455     if (m_byte_order != endian::InlHostByteOrder()) {
456       val = ReadSwapInt32(data);
457     } else {
458       memcpy(&val, data, 4);
459     }
460   }
461   return val;
462 }
463 
464 //----------------------------------------------------------------------
465 // Extract "count" uint32_t values from the binary data and update
466 // the offset pointed to by "offset_ptr". The extracted data is
467 // copied into "dst".
468 //
469 // RETURNS the non-nullptr buffer pointer upon successful extraction of
470 // all the requested bytes, or nullptr when the data is not available
471 // in the buffer due to being out of bounds, or insufficient data.
472 //----------------------------------------------------------------------
473 void *DataExtractor::GetU32(offset_t *offset_ptr, void *void_dst,
474                             uint32_t count) const {
475   const size_t src_size = sizeof(uint32_t) * count;
476   const uint32_t *src = (const uint32_t *)GetData(offset_ptr, src_size);
477   if (src) {
478     if (m_byte_order != endian::InlHostByteOrder()) {
479       uint32_t *dst_pos = (uint32_t *)void_dst;
480       uint32_t *dst_end = dst_pos + count;
481       const uint32_t *src_pos = src;
482       while (dst_pos < dst_end) {
483         *dst_pos = ReadSwapInt32(src_pos);
484         ++dst_pos;
485         ++src_pos;
486       }
487     } else {
488       memcpy(void_dst, src, src_size);
489     }
490     // Return a non-nullptr pointer to the converted data as an indicator of
491     // success
492     return void_dst;
493   }
494   return nullptr;
495 }
496 
497 //----------------------------------------------------------------------
498 // Extract a single uint64_t from the data and update the offset
499 // pointed to by "offset_ptr".
500 //
501 // RETURNS the uint64_t that was extracted, or zero on failure.
502 //----------------------------------------------------------------------
503 uint64_t DataExtractor::GetU64(offset_t *offset_ptr) const {
504   uint64_t val = 0;
505   const uint8_t *data = (const uint8_t *)GetData(offset_ptr, sizeof(val));
506   if (data) {
507     if (m_byte_order != endian::InlHostByteOrder()) {
508       val = ReadSwapInt64(data);
509     } else {
510       memcpy(&val, data, 8);
511     }
512   }
513   return val;
514 }
515 
516 //----------------------------------------------------------------------
517 // GetU64
518 //
519 // Get multiple consecutive 64 bit values. Return true if the entire
520 // read succeeds and increment the offset pointed to by offset_ptr, else
521 // return false and leave the offset pointed to by offset_ptr unchanged.
522 //----------------------------------------------------------------------
523 void *DataExtractor::GetU64(offset_t *offset_ptr, void *void_dst,
524                             uint32_t count) const {
525   const size_t src_size = sizeof(uint64_t) * count;
526   const uint64_t *src = (const uint64_t *)GetData(offset_ptr, src_size);
527   if (src) {
528     if (m_byte_order != endian::InlHostByteOrder()) {
529       uint64_t *dst_pos = (uint64_t *)void_dst;
530       uint64_t *dst_end = dst_pos + count;
531       const uint64_t *src_pos = src;
532       while (dst_pos < dst_end) {
533         *dst_pos = ReadSwapInt64(src_pos);
534         ++dst_pos;
535         ++src_pos;
536       }
537     } else {
538       memcpy(void_dst, src, src_size);
539     }
540     // Return a non-nullptr pointer to the converted data as an indicator of
541     // success
542     return void_dst;
543   }
544   return nullptr;
545 }
546 
547 //----------------------------------------------------------------------
548 // Extract a single integer value from the data and update the offset
549 // pointed to by "offset_ptr". The size of the extracted integer
550 // is specified by the "byte_size" argument. "byte_size" should have
551 // a value between 1 and 4 since the return value is only 32 bits
552 // wide. Any "byte_size" values less than 1 or greater than 4 will
553 // result in nothing being extracted, and zero being returned.
554 //
555 // RETURNS the integer value that was extracted, or zero on failure.
556 //----------------------------------------------------------------------
557 uint32_t DataExtractor::GetMaxU32(offset_t *offset_ptr,
558                                   size_t byte_size) const {
559   switch (byte_size) {
560   case 1:
561     return GetU8(offset_ptr);
562     break;
563   case 2:
564     return GetU16(offset_ptr);
565     break;
566   case 4:
567     return GetU32(offset_ptr);
568     break;
569   default:
570     assert(false && "GetMaxU32 unhandled case!");
571     break;
572   }
573   return 0;
574 }
575 
576 //----------------------------------------------------------------------
577 // Extract a single integer value from the data and update the offset
578 // pointed to by "offset_ptr". The size of the extracted integer
579 // is specified by the "byte_size" argument. "byte_size" should have
580 // a value >= 1 and <= 8 since the return value is only 64 bits
581 // wide. Any "byte_size" values less than 1 or greater than 8 will
582 // result in nothing being extracted, and zero being returned.
583 //
584 // RETURNS the integer value that was extracted, or zero on failure.
585 //----------------------------------------------------------------------
586 uint64_t DataExtractor::GetMaxU64(offset_t *offset_ptr, size_t size) const {
587   switch (size) {
588   case 1:
589     return GetU8(offset_ptr);
590     break;
591   case 2:
592     return GetU16(offset_ptr);
593     break;
594   case 4:
595     return GetU32(offset_ptr);
596     break;
597   case 8:
598     return GetU64(offset_ptr);
599     break;
600   default:
601     assert(false && "GetMax64 unhandled case!");
602     break;
603   }
604   return 0;
605 }
606 
607 uint64_t DataExtractor::GetMaxU64_unchecked(offset_t *offset_ptr,
608                                             size_t size) const {
609   switch (size) {
610   case 1:
611     return GetU8_unchecked(offset_ptr);
612     break;
613   case 2:
614     return GetU16_unchecked(offset_ptr);
615     break;
616   case 4:
617     return GetU32_unchecked(offset_ptr);
618     break;
619   case 8:
620     return GetU64_unchecked(offset_ptr);
621     break;
622   default:
623     assert(false && "GetMax64 unhandled case!");
624     break;
625   }
626   return 0;
627 }
628 
629 int64_t DataExtractor::GetMaxS64(offset_t *offset_ptr, size_t size) const {
630   switch (size) {
631   case 1:
632     return (int8_t)GetU8(offset_ptr);
633     break;
634   case 2:
635     return (int16_t)GetU16(offset_ptr);
636     break;
637   case 4:
638     return (int32_t)GetU32(offset_ptr);
639     break;
640   case 8:
641     return (int64_t)GetU64(offset_ptr);
642     break;
643   default:
644     assert(false && "GetMax64 unhandled case!");
645     break;
646   }
647   return 0;
648 }
649 
650 uint64_t DataExtractor::GetMaxU64Bitfield(offset_t *offset_ptr, size_t size,
651                                           uint32_t bitfield_bit_size,
652                                           uint32_t bitfield_bit_offset) const {
653   uint64_t uval64 = GetMaxU64(offset_ptr, size);
654   if (bitfield_bit_size > 0) {
655     int32_t lsbcount = bitfield_bit_offset;
656     if (m_byte_order == eByteOrderBig)
657       lsbcount = size * 8 - bitfield_bit_offset - bitfield_bit_size;
658     if (lsbcount > 0)
659       uval64 >>= lsbcount;
660     uint64_t bitfield_mask = ((1ul << bitfield_bit_size) - 1);
661     if (!bitfield_mask && bitfield_bit_offset == 0 && bitfield_bit_size == 64)
662       return uval64;
663     uval64 &= bitfield_mask;
664   }
665   return uval64;
666 }
667 
668 int64_t DataExtractor::GetMaxS64Bitfield(offset_t *offset_ptr, size_t size,
669                                          uint32_t bitfield_bit_size,
670                                          uint32_t bitfield_bit_offset) const {
671   int64_t sval64 = GetMaxS64(offset_ptr, size);
672   if (bitfield_bit_size > 0) {
673     int32_t lsbcount = bitfield_bit_offset;
674     if (m_byte_order == eByteOrderBig)
675       lsbcount = size * 8 - bitfield_bit_offset - bitfield_bit_size;
676     if (lsbcount > 0)
677       sval64 >>= lsbcount;
678     uint64_t bitfield_mask = (((uint64_t)1) << bitfield_bit_size) - 1;
679     sval64 &= bitfield_mask;
680     // sign extend if needed
681     if (sval64 & (((uint64_t)1) << (bitfield_bit_size - 1)))
682       sval64 |= ~bitfield_mask;
683   }
684   return sval64;
685 }
686 
687 float DataExtractor::GetFloat(offset_t *offset_ptr) const {
688   typedef float float_type;
689   float_type val = 0.0;
690   const size_t src_size = sizeof(float_type);
691   const float_type *src = (const float_type *)GetData(offset_ptr, src_size);
692   if (src) {
693     if (m_byte_order != endian::InlHostByteOrder()) {
694       const uint8_t *src_data = (const uint8_t *)src;
695       uint8_t *dst_data = (uint8_t *)&val;
696       for (size_t i = 0; i < sizeof(float_type); ++i)
697         dst_data[sizeof(float_type) - 1 - i] = src_data[i];
698     } else {
699       val = *src;
700     }
701   }
702   return val;
703 }
704 
705 double DataExtractor::GetDouble(offset_t *offset_ptr) const {
706   typedef double float_type;
707   float_type val = 0.0;
708   const size_t src_size = sizeof(float_type);
709   const float_type *src = (const float_type *)GetData(offset_ptr, src_size);
710   if (src) {
711     if (m_byte_order != endian::InlHostByteOrder()) {
712       const uint8_t *src_data = (const uint8_t *)src;
713       uint8_t *dst_data = (uint8_t *)&val;
714       for (size_t i = 0; i < sizeof(float_type); ++i)
715         dst_data[sizeof(float_type) - 1 - i] = src_data[i];
716     } else {
717       val = *src;
718     }
719   }
720   return val;
721 }
722 
723 long double DataExtractor::GetLongDouble(offset_t *offset_ptr) const {
724   long double val = 0.0;
725 #if defined(__i386__) || defined(__amd64__) || defined(__x86_64__) ||          \
726     defined(_M_IX86) || defined(_M_IA64) || defined(_M_X64)
727   *offset_ptr += CopyByteOrderedData(*offset_ptr, 10, &val, sizeof(val),
728                                      endian::InlHostByteOrder());
729 #else
730   *offset_ptr += CopyByteOrderedData(*offset_ptr, sizeof(val), &val,
731                                      sizeof(val), endian::InlHostByteOrder());
732 #endif
733   return val;
734 }
735 
736 //------------------------------------------------------------------
737 // Extract a single address from the data and update the offset
738 // pointed to by "offset_ptr". The size of the extracted address
739 // comes from the "this->m_addr_size" member variable and should be
740 // set correctly prior to extracting any address values.
741 //
742 // RETURNS the address that was extracted, or zero on failure.
743 //------------------------------------------------------------------
744 uint64_t DataExtractor::GetAddress(offset_t *offset_ptr) const {
745 #ifdef LLDB_CONFIGURATION_DEBUG
746   assert(m_addr_size == 4 || m_addr_size == 8);
747 #endif
748   return GetMaxU64(offset_ptr, m_addr_size);
749 }
750 
751 uint64_t DataExtractor::GetAddress_unchecked(offset_t *offset_ptr) const {
752 #ifdef LLDB_CONFIGURATION_DEBUG
753   assert(m_addr_size == 4 || m_addr_size == 8);
754 #endif
755   return GetMaxU64_unchecked(offset_ptr, m_addr_size);
756 }
757 
758 //------------------------------------------------------------------
759 // Extract a single pointer from the data and update the offset
760 // pointed to by "offset_ptr". The size of the extracted pointer
761 // comes from the "this->m_addr_size" member variable and should be
762 // set correctly prior to extracting any pointer values.
763 //
764 // RETURNS the pointer that was extracted, or zero on failure.
765 //------------------------------------------------------------------
766 uint64_t DataExtractor::GetPointer(offset_t *offset_ptr) const {
767 #ifdef LLDB_CONFIGURATION_DEBUG
768   assert(m_addr_size == 4 || m_addr_size == 8);
769 #endif
770   return GetMaxU64(offset_ptr, m_addr_size);
771 }
772 
773 size_t DataExtractor::ExtractBytes(offset_t offset, offset_t length,
774                                    ByteOrder dst_byte_order, void *dst) const {
775   const uint8_t *src = PeekData(offset, length);
776   if (src) {
777     if (dst_byte_order != GetByteOrder()) {
778       // Validate that only a word- or register-sized dst is byte swapped
779       assert(length == 1 || length == 2 || length == 4 || length == 8 ||
780              length == 10 || length == 16 || length == 32);
781 
782       for (uint32_t i = 0; i < length; ++i)
783         ((uint8_t *)dst)[i] = src[length - i - 1];
784     } else
785       ::memcpy(dst, src, length);
786     return length;
787   }
788   return 0;
789 }
790 
791 // Extract data as it exists in target memory
792 lldb::offset_t DataExtractor::CopyData(offset_t offset, offset_t length,
793                                        void *dst) const {
794   const uint8_t *src = PeekData(offset, length);
795   if (src) {
796     ::memcpy(dst, src, length);
797     return length;
798   }
799   return 0;
800 }
801 
802 // Extract data and swap if needed when doing the copy
803 lldb::offset_t
804 DataExtractor::CopyByteOrderedData(offset_t src_offset, offset_t src_len,
805                                    void *dst_void_ptr, offset_t dst_len,
806                                    ByteOrder dst_byte_order) const {
807   // Validate the source info
808   if (!ValidOffsetForDataOfSize(src_offset, src_len))
809     assert(ValidOffsetForDataOfSize(src_offset, src_len));
810   assert(src_len > 0);
811   assert(m_byte_order == eByteOrderBig || m_byte_order == eByteOrderLittle);
812 
813   // Validate the destination info
814   assert(dst_void_ptr != nullptr);
815   assert(dst_len > 0);
816   assert(dst_byte_order == eByteOrderBig || dst_byte_order == eByteOrderLittle);
817 
818   // Validate that only a word- or register-sized dst is byte swapped
819   assert(dst_byte_order == m_byte_order || dst_len == 1 || dst_len == 2 ||
820          dst_len == 4 || dst_len == 8 || dst_len == 10 || dst_len == 16 ||
821          dst_len == 32);
822 
823   // Must have valid byte orders set in this object and for destination
824   if (!(dst_byte_order == eByteOrderBig ||
825         dst_byte_order == eByteOrderLittle) ||
826       !(m_byte_order == eByteOrderBig || m_byte_order == eByteOrderLittle))
827     return 0;
828 
829   uint8_t *dst = (uint8_t *)dst_void_ptr;
830   const uint8_t *src = (const uint8_t *)PeekData(src_offset, src_len);
831   if (src) {
832     if (dst_len >= src_len) {
833       // We are copying the entire value from src into dst.
834       // Calculate how many, if any, zeroes we need for the most
835       // significant bytes if "dst_len" is greater than "src_len"...
836       const size_t num_zeroes = dst_len - src_len;
837       if (dst_byte_order == eByteOrderBig) {
838         // Big endian, so we lead with zeroes...
839         if (num_zeroes > 0)
840           ::memset(dst, 0, num_zeroes);
841         // Then either copy or swap the rest
842         if (m_byte_order == eByteOrderBig) {
843           ::memcpy(dst + num_zeroes, src, src_len);
844         } else {
845           for (uint32_t i = 0; i < src_len; ++i)
846             dst[i + num_zeroes] = src[src_len - 1 - i];
847         }
848       } else {
849         // Little endian destination, so we lead the value bytes
850         if (m_byte_order == eByteOrderBig) {
851           for (uint32_t i = 0; i < src_len; ++i)
852             dst[i] = src[src_len - 1 - i];
853         } else {
854           ::memcpy(dst, src, src_len);
855         }
856         // And zero the rest...
857         if (num_zeroes > 0)
858           ::memset(dst + src_len, 0, num_zeroes);
859       }
860       return src_len;
861     } else {
862       // We are only copying some of the value from src into dst..
863 
864       if (dst_byte_order == eByteOrderBig) {
865         // Big endian dst
866         if (m_byte_order == eByteOrderBig) {
867           // Big endian dst, with big endian src
868           ::memcpy(dst, src + (src_len - dst_len), dst_len);
869         } else {
870           // Big endian dst, with little endian src
871           for (uint32_t i = 0; i < dst_len; ++i)
872             dst[i] = src[dst_len - 1 - i];
873         }
874       } else {
875         // Little endian dst
876         if (m_byte_order == eByteOrderBig) {
877           // Little endian dst, with big endian src
878           for (uint32_t i = 0; i < dst_len; ++i)
879             dst[i] = src[src_len - 1 - i];
880         } else {
881           // Little endian dst, with big endian src
882           ::memcpy(dst, src, dst_len);
883         }
884       }
885       return dst_len;
886     }
887   }
888   return 0;
889 }
890 
891 //----------------------------------------------------------------------
892 // Extracts a variable length NULL terminated C string from
893 // the data at the offset pointed to by "offset_ptr".  The
894 // "offset_ptr" will be updated with the offset of the byte that
895 // follows the NULL terminator byte.
896 //
897 // If the offset pointed to by "offset_ptr" is out of bounds, or if
898 // "length" is non-zero and there aren't enough available
899 // bytes, nullptr will be returned and "offset_ptr" will not be
900 // updated.
901 //----------------------------------------------------------------------
902 const char *DataExtractor::GetCStr(offset_t *offset_ptr) const {
903   const char *cstr = (const char *)PeekData(*offset_ptr, 1);
904   if (cstr) {
905     const char *cstr_end = cstr;
906     const char *end = (const char *)m_end;
907     while (cstr_end < end && *cstr_end)
908       ++cstr_end;
909 
910     // Now we are either at the end of the data or we point to the
911     // NULL C string terminator with cstr_end...
912     if (*cstr_end == '\0') {
913       // Advance the offset with one extra byte for the NULL terminator
914       *offset_ptr += (cstr_end - cstr + 1);
915       return cstr;
916     }
917 
918     // We reached the end of the data without finding a NULL C string
919     // terminator. Fall through and return nullptr otherwise anyone that
920     // would have used the result as a C string can wander into
921     // unknown memory...
922   }
923   return nullptr;
924 }
925 
926 //----------------------------------------------------------------------
927 // Extracts a NULL terminated C string from the fixed length field of
928 // length "len" at the offset pointed to by "offset_ptr".
929 // The "offset_ptr" will be updated with the offset of the byte that
930 // follows the fixed length field.
931 //
932 // If the offset pointed to by "offset_ptr" is out of bounds, or if
933 // the offset plus the length of the field is out of bounds, or if the
934 // field does not contain a NULL terminator byte, nullptr will be returned
935 // and "offset_ptr" will not be updated.
936 //----------------------------------------------------------------------
937 const char *DataExtractor::GetCStr(offset_t *offset_ptr, offset_t len) const {
938   const char *cstr = (const char *)PeekData(*offset_ptr, len);
939   if (cstr != nullptr) {
940     if (memchr(cstr, '\0', len) == nullptr) {
941       return nullptr;
942     }
943     *offset_ptr += len;
944     return cstr;
945   }
946   return nullptr;
947 }
948 
949 //------------------------------------------------------------------
950 // Peeks at a string in the contained data. No verification is done
951 // to make sure the entire string lies within the bounds of this
952 // object's data, only "offset" is verified to be a valid offset.
953 //
954 // Returns a valid C string pointer if "offset" is a valid offset in
955 // this object's data, else nullptr is returned.
956 //------------------------------------------------------------------
957 const char *DataExtractor::PeekCStr(offset_t offset) const {
958   return (const char *)PeekData(offset, 1);
959 }
960 
961 //----------------------------------------------------------------------
962 // Extracts an unsigned LEB128 number from this object's data
963 // starting at the offset pointed to by "offset_ptr". The offset
964 // pointed to by "offset_ptr" will be updated with the offset of the
965 // byte following the last extracted byte.
966 //
967 // Returned the extracted integer value.
968 //----------------------------------------------------------------------
969 uint64_t DataExtractor::GetULEB128(offset_t *offset_ptr) const {
970   const uint8_t *src = (const uint8_t *)PeekData(*offset_ptr, 1);
971   if (src == nullptr)
972     return 0;
973 
974   const uint8_t *end = m_end;
975 
976   if (src < end) {
977     uint64_t result = *src++;
978     if (result >= 0x80) {
979       result &= 0x7f;
980       int shift = 7;
981       while (src < end) {
982         uint8_t byte = *src++;
983         result |= (uint64_t)(byte & 0x7f) << shift;
984         if ((byte & 0x80) == 0)
985           break;
986         shift += 7;
987       }
988     }
989     *offset_ptr = src - m_start;
990     return result;
991   }
992 
993   return 0;
994 }
995 
996 //----------------------------------------------------------------------
997 // Extracts an signed LEB128 number from this object's data
998 // starting at the offset pointed to by "offset_ptr". The offset
999 // pointed to by "offset_ptr" will be updated with the offset of the
1000 // byte following the last extracted byte.
1001 //
1002 // Returned the extracted integer value.
1003 //----------------------------------------------------------------------
1004 int64_t DataExtractor::GetSLEB128(offset_t *offset_ptr) const {
1005   const uint8_t *src = (const uint8_t *)PeekData(*offset_ptr, 1);
1006   if (src == nullptr)
1007     return 0;
1008 
1009   const uint8_t *end = m_end;
1010 
1011   if (src < end) {
1012     int64_t result = 0;
1013     int shift = 0;
1014     int size = sizeof(int64_t) * 8;
1015 
1016     uint8_t byte = 0;
1017     int bytecount = 0;
1018 
1019     while (src < end) {
1020       bytecount++;
1021       byte = *src++;
1022       result |= (int64_t)(byte & 0x7f) << shift;
1023       shift += 7;
1024       if ((byte & 0x80) == 0)
1025         break;
1026     }
1027 
1028     // Sign bit of byte is 2nd high order bit (0x40)
1029     if (shift < size && (byte & 0x40))
1030       result |= -(1 << shift);
1031 
1032     *offset_ptr += bytecount;
1033     return result;
1034   }
1035   return 0;
1036 }
1037 
1038 //----------------------------------------------------------------------
1039 // Skips a ULEB128 number (signed or unsigned) from this object's
1040 // data starting at the offset pointed to by "offset_ptr". The
1041 // offset pointed to by "offset_ptr" will be updated with the offset
1042 // of the byte following the last extracted byte.
1043 //
1044 // Returns the number of bytes consumed during the extraction.
1045 //----------------------------------------------------------------------
1046 uint32_t DataExtractor::Skip_LEB128(offset_t *offset_ptr) const {
1047   uint32_t bytes_consumed = 0;
1048   const uint8_t *src = (const uint8_t *)PeekData(*offset_ptr, 1);
1049   if (src == nullptr)
1050     return 0;
1051 
1052   const uint8_t *end = m_end;
1053 
1054   if (src < end) {
1055     const uint8_t *src_pos = src;
1056     while ((src_pos < end) && (*src_pos++ & 0x80))
1057       ++bytes_consumed;
1058     *offset_ptr += src_pos - src;
1059   }
1060   return bytes_consumed;
1061 }
1062 
1063 //----------------------------------------------------------------------
1064 // Dumps bytes from this object's data to the stream "s" starting
1065 // "start_offset" bytes into this data, and ending with the byte
1066 // before "end_offset". "base_addr" will be added to the offset
1067 // into the dumped data when showing the offset into the data in the
1068 // output information. "num_per_line" objects of type "type" will
1069 // be dumped with the option to override the format for each object
1070 // with "type_format". "type_format" is a printf style formatting
1071 // string. If "type_format" is nullptr, then an appropriate format
1072 // string will be used for the supplied "type". If the stream "s"
1073 // is nullptr, then the output will be send to Log().
1074 //----------------------------------------------------------------------
1075 lldb::offset_t DataExtractor::PutToLog(Log *log, offset_t start_offset,
1076                                        offset_t length, uint64_t base_addr,
1077                                        uint32_t num_per_line,
1078                                        DataExtractor::Type type,
1079                                        const char *format) const {
1080   if (log == nullptr)
1081     return start_offset;
1082 
1083   offset_t offset;
1084   offset_t end_offset;
1085   uint32_t count;
1086   StreamString sstr;
1087   for (offset = start_offset, end_offset = offset + length, count = 0;
1088        ValidOffset(offset) && offset < end_offset; ++count) {
1089     if ((count % num_per_line) == 0) {
1090       // Print out any previous string
1091       if (sstr.GetSize() > 0) {
1092         log->PutString(sstr.GetString());
1093         sstr.Clear();
1094       }
1095       // Reset string offset and fill the current line string with address:
1096       if (base_addr != LLDB_INVALID_ADDRESS)
1097         sstr.Printf("0x%8.8" PRIx64 ":",
1098                     (uint64_t)(base_addr + (offset - start_offset)));
1099     }
1100 
1101     switch (type) {
1102     case TypeUInt8:
1103       sstr.Printf(format ? format : " %2.2x", GetU8(&offset));
1104       break;
1105     case TypeChar: {
1106       char ch = GetU8(&offset);
1107       sstr.Printf(format ? format : " %c", isprint(ch) ? ch : ' ');
1108     } break;
1109     case TypeUInt16:
1110       sstr.Printf(format ? format : " %4.4x", GetU16(&offset));
1111       break;
1112     case TypeUInt32:
1113       sstr.Printf(format ? format : " %8.8x", GetU32(&offset));
1114       break;
1115     case TypeUInt64:
1116       sstr.Printf(format ? format : " %16.16" PRIx64, GetU64(&offset));
1117       break;
1118     case TypePointer:
1119       sstr.Printf(format ? format : " 0x%" PRIx64, GetAddress(&offset));
1120       break;
1121     case TypeULEB128:
1122       sstr.Printf(format ? format : " 0x%" PRIx64, GetULEB128(&offset));
1123       break;
1124     case TypeSLEB128:
1125       sstr.Printf(format ? format : " %" PRId64, GetSLEB128(&offset));
1126       break;
1127     }
1128   }
1129 
1130   if (!sstr.Empty())
1131     log->PutString(sstr.GetString());
1132 
1133   return offset; // Return the offset at which we ended up
1134 }
1135 
1136 //----------------------------------------------------------------------
1137 // DumpUUID
1138 //
1139 // Dump out a UUID starting at 'offset' bytes into the buffer
1140 //----------------------------------------------------------------------
1141 void DataExtractor::DumpUUID(Stream *s, offset_t offset) const {
1142   if (s) {
1143     const uint8_t *uuid_data = PeekData(offset, 16);
1144     if (uuid_data) {
1145       lldb_private::UUID uuid(uuid_data, 16);
1146       uuid.Dump(s);
1147     } else {
1148       s->Printf("<not enough data for UUID at offset 0x%8.8" PRIx64 ">",
1149                 offset);
1150     }
1151   }
1152 }
1153 
1154 size_t DataExtractor::Copy(DataExtractor &dest_data) const {
1155   if (m_data_sp) {
1156     // we can pass along the SP to the data
1157     dest_data.SetData(m_data_sp);
1158   } else {
1159     const uint8_t *base_ptr = m_start;
1160     size_t data_size = GetByteSize();
1161     dest_data.SetData(DataBufferSP(new DataBufferHeap(base_ptr, data_size)));
1162   }
1163   return GetByteSize();
1164 }
1165 
1166 bool DataExtractor::Append(DataExtractor &rhs) {
1167   if (rhs.GetByteOrder() != GetByteOrder())
1168     return false;
1169 
1170   if (rhs.GetByteSize() == 0)
1171     return true;
1172 
1173   if (GetByteSize() == 0)
1174     return (rhs.Copy(*this) > 0);
1175 
1176   size_t bytes = GetByteSize() + rhs.GetByteSize();
1177 
1178   DataBufferHeap *buffer_heap_ptr = nullptr;
1179   DataBufferSP buffer_sp(buffer_heap_ptr = new DataBufferHeap(bytes, 0));
1180 
1181   if (!buffer_sp || buffer_heap_ptr == nullptr)
1182     return false;
1183 
1184   uint8_t *bytes_ptr = buffer_heap_ptr->GetBytes();
1185 
1186   memcpy(bytes_ptr, GetDataStart(), GetByteSize());
1187   memcpy(bytes_ptr + GetByteSize(), rhs.GetDataStart(), rhs.GetByteSize());
1188 
1189   SetData(buffer_sp);
1190 
1191   return true;
1192 }
1193 
1194 bool DataExtractor::Append(void *buf, offset_t length) {
1195   if (buf == nullptr)
1196     return false;
1197 
1198   if (length == 0)
1199     return true;
1200 
1201   size_t bytes = GetByteSize() + length;
1202 
1203   DataBufferHeap *buffer_heap_ptr = nullptr;
1204   DataBufferSP buffer_sp(buffer_heap_ptr = new DataBufferHeap(bytes, 0));
1205 
1206   if (!buffer_sp || buffer_heap_ptr == nullptr)
1207     return false;
1208 
1209   uint8_t *bytes_ptr = buffer_heap_ptr->GetBytes();
1210 
1211   if (GetByteSize() > 0)
1212     memcpy(bytes_ptr, GetDataStart(), GetByteSize());
1213 
1214   memcpy(bytes_ptr + GetByteSize(), buf, length);
1215 
1216   SetData(buffer_sp);
1217 
1218   return true;
1219 }
1220 
1221 void DataExtractor::Checksum(llvm::SmallVectorImpl<uint8_t> &dest,
1222                              uint64_t max_data) {
1223   if (max_data == 0)
1224     max_data = GetByteSize();
1225   else
1226     max_data = std::min(max_data, GetByteSize());
1227 
1228   llvm::MD5 md5;
1229 
1230   const llvm::ArrayRef<uint8_t> data(GetDataStart(), max_data);
1231   md5.update(data);
1232 
1233   llvm::MD5::MD5Result result;
1234   md5.final(result);
1235 
1236   dest.clear();
1237   dest.append(result.Bytes.begin(), result.Bytes.end());
1238 }
1239