188a1d9fcSBenjamin Kramer //===-- DataExtractor.cpp -------------------------------------------------===//
288a1d9fcSBenjamin Kramer //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
688a1d9fcSBenjamin Kramer //
788a1d9fcSBenjamin Kramer //===----------------------------------------------------------------------===//
888a1d9fcSBenjamin Kramer 
988a1d9fcSBenjamin Kramer #include "llvm/Support/DataExtractor.h"
10b1f29cecSPavel Labath #include "llvm/Support/Errc.h"
1188a1d9fcSBenjamin Kramer #include "llvm/Support/ErrorHandling.h"
1288a1d9fcSBenjamin Kramer #include "llvm/Support/Host.h"
138242f35dSDavid Blaikie #include "llvm/Support/LEB128.h"
14b1f29cecSPavel Labath #include "llvm/Support/SwapByteOrder.h"
15b1f29cecSPavel Labath 
1688a1d9fcSBenjamin Kramer using namespace llvm;
1788a1d9fcSBenjamin Kramer 
185754a61eSPavel Labath static void unexpectedEndReached(Error *E, uint64_t Offset) {
19b1f29cecSPavel Labath   if (E)
20b1f29cecSPavel Labath     *E = createStringError(errc::illegal_byte_sequence,
215754a61eSPavel Labath                            "unexpected end of data at offset 0x%" PRIx64,
225754a61eSPavel Labath                            Offset);
23b1f29cecSPavel Labath }
24b1f29cecSPavel Labath 
25b1f29cecSPavel Labath static bool isError(Error *E) { return E && *E; }
26b1f29cecSPavel Labath 
2788a1d9fcSBenjamin Kramer template <typename T>
28f5f35c5cSIgor Kudrin static T getU(uint64_t *offset_ptr, const DataExtractor *de,
29b1f29cecSPavel Labath               bool isLittleEndian, const char *Data, llvm::Error *Err) {
30b1f29cecSPavel Labath   ErrorAsOutParameter ErrAsOut(Err);
3188a1d9fcSBenjamin Kramer   T val = 0;
32b1f29cecSPavel Labath   if (isError(Err))
33b1f29cecSPavel Labath     return val;
34b1f29cecSPavel Labath 
35f5f35c5cSIgor Kudrin   uint64_t offset = *offset_ptr;
36b1f29cecSPavel Labath   if (!de->isValidOffsetForDataOfSize(offset, sizeof(T))) {
375754a61eSPavel Labath     unexpectedEndReached(Err, offset);
38b1f29cecSPavel Labath     return val;
39b1f29cecSPavel Labath   }
4088a1d9fcSBenjamin Kramer   std::memcpy(&val, &Data[offset], sizeof(val));
4141cb64f4SRafael Espindola   if (sys::IsLittleEndianHost != isLittleEndian)
429aea8432SArtyom Skrobov     sys::swapByteOrder(val);
4388a1d9fcSBenjamin Kramer 
4488a1d9fcSBenjamin Kramer   // Advance the offset
4588a1d9fcSBenjamin Kramer   *offset_ptr += sizeof(val);
4688a1d9fcSBenjamin Kramer   return val;
4788a1d9fcSBenjamin Kramer }
4888a1d9fcSBenjamin Kramer 
4988a1d9fcSBenjamin Kramer template <typename T>
50f5f35c5cSIgor Kudrin static T *getUs(uint64_t *offset_ptr, T *dst, uint32_t count,
51b1f29cecSPavel Labath                 const DataExtractor *de, bool isLittleEndian, const char *Data,
52b1f29cecSPavel Labath                 llvm::Error *Err) {
53b1f29cecSPavel Labath   ErrorAsOutParameter ErrAsOut(Err);
54b1f29cecSPavel Labath   if (isError(Err))
55b1f29cecSPavel Labath     return nullptr;
56b1f29cecSPavel Labath 
57f5f35c5cSIgor Kudrin   uint64_t offset = *offset_ptr;
5888a1d9fcSBenjamin Kramer 
59b1f29cecSPavel Labath   if (!de->isValidOffsetForDataOfSize(offset, sizeof(*dst) * count)) {
605754a61eSPavel Labath     unexpectedEndReached(Err, offset);
61b1f29cecSPavel Labath     return nullptr;
62b1f29cecSPavel Labath   }
6388a1d9fcSBenjamin Kramer   for (T *value_ptr = dst, *end = dst + count; value_ptr != end;
6488a1d9fcSBenjamin Kramer        ++value_ptr, offset += sizeof(*dst))
65b1f29cecSPavel Labath     *value_ptr = getU<T>(offset_ptr, de, isLittleEndian, Data, Err);
6688a1d9fcSBenjamin Kramer   // Advance the offset
6788a1d9fcSBenjamin Kramer   *offset_ptr = offset;
6888a1d9fcSBenjamin Kramer   // Return a non-NULL pointer to the converted data as an indicator of
6988a1d9fcSBenjamin Kramer   // success
7088a1d9fcSBenjamin Kramer   return dst;
7188a1d9fcSBenjamin Kramer }
7288a1d9fcSBenjamin Kramer 
73b1f29cecSPavel Labath uint8_t DataExtractor::getU8(uint64_t *offset_ptr, llvm::Error *Err) const {
74b1f29cecSPavel Labath   return getU<uint8_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err);
7588a1d9fcSBenjamin Kramer }
7688a1d9fcSBenjamin Kramer 
7788a1d9fcSBenjamin Kramer uint8_t *
78f5f35c5cSIgor Kudrin DataExtractor::getU8(uint64_t *offset_ptr, uint8_t *dst, uint32_t count) const {
7988a1d9fcSBenjamin Kramer   return getUs<uint8_t>(offset_ptr, dst, count, this, IsLittleEndian,
80b1f29cecSPavel Labath                         Data.data(), nullptr);
8188a1d9fcSBenjamin Kramer }
8288a1d9fcSBenjamin Kramer 
83b1f29cecSPavel Labath uint8_t *DataExtractor::getU8(Cursor &C, uint8_t *Dst, uint32_t Count) const {
84b1f29cecSPavel Labath   return getUs<uint8_t>(&C.Offset, Dst, Count, this, IsLittleEndian,
85b1f29cecSPavel Labath                         Data.data(), &C.Err);
86b1f29cecSPavel Labath }
87b1f29cecSPavel Labath 
88b1f29cecSPavel Labath uint16_t DataExtractor::getU16(uint64_t *offset_ptr, llvm::Error *Err) const {
89b1f29cecSPavel Labath   return getU<uint16_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err);
9088a1d9fcSBenjamin Kramer }
9188a1d9fcSBenjamin Kramer 
92f5f35c5cSIgor Kudrin uint16_t *DataExtractor::getU16(uint64_t *offset_ptr, uint16_t *dst,
9388a1d9fcSBenjamin Kramer                                 uint32_t count) const {
9488a1d9fcSBenjamin Kramer   return getUs<uint16_t>(offset_ptr, dst, count, this, IsLittleEndian,
95b1f29cecSPavel Labath                          Data.data(), nullptr);
9688a1d9fcSBenjamin Kramer }
9788a1d9fcSBenjamin Kramer 
98f5f35c5cSIgor Kudrin uint32_t DataExtractor::getU24(uint64_t *offset_ptr) const {
99258927e3SWolfgang Pieb   uint24_t ExtractedVal =
100b1f29cecSPavel Labath       getU<uint24_t>(offset_ptr, this, IsLittleEndian, Data.data(), nullptr);
101258927e3SWolfgang Pieb   // The 3 bytes are in the correct byte order for the host.
102258927e3SWolfgang Pieb   return ExtractedVal.getAsUint32(sys::IsLittleEndianHost);
103258927e3SWolfgang Pieb }
104258927e3SWolfgang Pieb 
105b1f29cecSPavel Labath uint32_t DataExtractor::getU32(uint64_t *offset_ptr, llvm::Error *Err) const {
106b1f29cecSPavel Labath   return getU<uint32_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err);
10788a1d9fcSBenjamin Kramer }
10888a1d9fcSBenjamin Kramer 
109f5f35c5cSIgor Kudrin uint32_t *DataExtractor::getU32(uint64_t *offset_ptr, uint32_t *dst,
11088a1d9fcSBenjamin Kramer                                 uint32_t count) const {
11188a1d9fcSBenjamin Kramer   return getUs<uint32_t>(offset_ptr, dst, count, this, IsLittleEndian,
112b1f29cecSPavel Labath                          Data.data(), nullptr);
11388a1d9fcSBenjamin Kramer }
11488a1d9fcSBenjamin Kramer 
115b1f29cecSPavel Labath uint64_t DataExtractor::getU64(uint64_t *offset_ptr, llvm::Error *Err) const {
116b1f29cecSPavel Labath   return getU<uint64_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err);
11788a1d9fcSBenjamin Kramer }
11888a1d9fcSBenjamin Kramer 
119f5f35c5cSIgor Kudrin uint64_t *DataExtractor::getU64(uint64_t *offset_ptr, uint64_t *dst,
12088a1d9fcSBenjamin Kramer                                 uint32_t count) const {
12188a1d9fcSBenjamin Kramer   return getUs<uint64_t>(offset_ptr, dst, count, this, IsLittleEndian,
122b1f29cecSPavel Labath                          Data.data(), nullptr);
12388a1d9fcSBenjamin Kramer }
12488a1d9fcSBenjamin Kramer 
125b1f29cecSPavel Labath uint64_t DataExtractor::getUnsigned(uint64_t *offset_ptr, uint32_t byte_size,
126b1f29cecSPavel Labath                                     llvm::Error *Err) const {
12788a1d9fcSBenjamin Kramer   switch (byte_size) {
12888a1d9fcSBenjamin Kramer   case 1:
129b1f29cecSPavel Labath     return getU8(offset_ptr, Err);
13088a1d9fcSBenjamin Kramer   case 2:
131b1f29cecSPavel Labath     return getU16(offset_ptr, Err);
13288a1d9fcSBenjamin Kramer   case 4:
133b1f29cecSPavel Labath     return getU32(offset_ptr, Err);
13488a1d9fcSBenjamin Kramer   case 8:
135b1f29cecSPavel Labath     return getU64(offset_ptr, Err);
13688a1d9fcSBenjamin Kramer   }
13788a1d9fcSBenjamin Kramer   llvm_unreachable("getUnsigned unhandled case!");
13888a1d9fcSBenjamin Kramer }
13988a1d9fcSBenjamin Kramer 
14088a1d9fcSBenjamin Kramer int64_t
141f5f35c5cSIgor Kudrin DataExtractor::getSigned(uint64_t *offset_ptr, uint32_t byte_size) const {
14288a1d9fcSBenjamin Kramer   switch (byte_size) {
14388a1d9fcSBenjamin Kramer   case 1:
14488a1d9fcSBenjamin Kramer     return (int8_t)getU8(offset_ptr);
14588a1d9fcSBenjamin Kramer   case 2:
14688a1d9fcSBenjamin Kramer     return (int16_t)getU16(offset_ptr);
14788a1d9fcSBenjamin Kramer   case 4:
14888a1d9fcSBenjamin Kramer     return (int32_t)getU32(offset_ptr);
14988a1d9fcSBenjamin Kramer   case 8:
15088a1d9fcSBenjamin Kramer     return (int64_t)getU64(offset_ptr);
15188a1d9fcSBenjamin Kramer   }
15288a1d9fcSBenjamin Kramer   llvm_unreachable("getSigned unhandled case!");
15388a1d9fcSBenjamin Kramer }
15488a1d9fcSBenjamin Kramer 
155*a16fffa3SPavel Labath StringRef DataExtractor::getCStrRef(uint64_t *OffsetPtr, Error *Err) const {
156*a16fffa3SPavel Labath   ErrorAsOutParameter ErrAsOut(Err);
157*a16fffa3SPavel Labath   if (isError(Err))
158*a16fffa3SPavel Labath     return StringRef();
15988a1d9fcSBenjamin Kramer 
160*a16fffa3SPavel Labath   uint64_t Start = *OffsetPtr;
161ba1c9156SPaul Robinson   StringRef::size_type Pos = Data.find('\0', Start);
162ba1c9156SPaul Robinson   if (Pos != StringRef::npos) {
163*a16fffa3SPavel Labath     *OffsetPtr = Pos + 1;
164ba1c9156SPaul Robinson     return StringRef(Data.data() + Start, Pos - Start);
165ba1c9156SPaul Robinson   }
166*a16fffa3SPavel Labath   unexpectedEndReached(Err, Start);
167ba1c9156SPaul Robinson   return StringRef();
168ba1c9156SPaul Robinson }
169ba1c9156SPaul Robinson 
170df8dda67SGreg Clayton StringRef DataExtractor::getFixedLengthString(uint64_t *OffsetPtr,
171df8dda67SGreg Clayton                                               uint64_t Length,
172df8dda67SGreg Clayton                                               StringRef TrimChars) const {
173df8dda67SGreg Clayton   StringRef Bytes(getBytes(OffsetPtr, Length));
174df8dda67SGreg Clayton   return Bytes.trim(TrimChars);
175df8dda67SGreg Clayton }
176df8dda67SGreg Clayton 
177df8dda67SGreg Clayton StringRef DataExtractor::getBytes(uint64_t *OffsetPtr, uint64_t Length) const {
178df8dda67SGreg Clayton   if (!isValidOffsetForDataOfSize(*OffsetPtr, Length))
179df8dda67SGreg Clayton     return StringRef();
180df8dda67SGreg Clayton   StringRef Result = Data.substr(*OffsetPtr, Length);
181df8dda67SGreg Clayton   *OffsetPtr += Length;
182df8dda67SGreg Clayton   return Result;
183df8dda67SGreg Clayton }
184df8dda67SGreg Clayton 
185b1f29cecSPavel Labath uint64_t DataExtractor::getULEB128(uint64_t *offset_ptr,
186b1f29cecSPavel Labath                                    llvm::Error *Err) const {
1878242f35dSDavid Blaikie   assert(*offset_ptr <= Data.size());
188b1f29cecSPavel Labath   ErrorAsOutParameter ErrAsOut(Err);
189b1f29cecSPavel Labath   if (isError(Err))
190b1f29cecSPavel Labath     return 0;
1918242f35dSDavid Blaikie 
1928242f35dSDavid Blaikie   const char *error;
1938242f35dSDavid Blaikie   unsigned bytes_read;
1948242f35dSDavid Blaikie   uint64_t result = decodeULEB128(
1958242f35dSDavid Blaikie       reinterpret_cast<const uint8_t *>(Data.data() + *offset_ptr), &bytes_read,
1968242f35dSDavid Blaikie       reinterpret_cast<const uint8_t *>(Data.data() + Data.size()), &error);
197b1f29cecSPavel Labath   if (error) {
198b1f29cecSPavel Labath     if (Err)
199b1f29cecSPavel Labath       *Err = createStringError(errc::illegal_byte_sequence, error);
20088a1d9fcSBenjamin Kramer     return 0;
201b1f29cecSPavel Labath   }
2028242f35dSDavid Blaikie   *offset_ptr += bytes_read;
20388a1d9fcSBenjamin Kramer   return result;
20488a1d9fcSBenjamin Kramer }
20588a1d9fcSBenjamin Kramer 
206f5f35c5cSIgor Kudrin int64_t DataExtractor::getSLEB128(uint64_t *offset_ptr) const {
207f895e1bdSDavid Blaikie   assert(*offset_ptr <= Data.size());
208f895e1bdSDavid Blaikie 
209f895e1bdSDavid Blaikie   const char *error;
210f895e1bdSDavid Blaikie   unsigned bytes_read;
211f895e1bdSDavid Blaikie   int64_t result = decodeSLEB128(
212f895e1bdSDavid Blaikie       reinterpret_cast<const uint8_t *>(Data.data() + *offset_ptr), &bytes_read,
213f895e1bdSDavid Blaikie       reinterpret_cast<const uint8_t *>(Data.data() + Data.size()), &error);
214f895e1bdSDavid Blaikie   if (error)
21588a1d9fcSBenjamin Kramer     return 0;
216f895e1bdSDavid Blaikie   *offset_ptr += bytes_read;
21788a1d9fcSBenjamin Kramer   return result;
21888a1d9fcSBenjamin Kramer }
219b1f29cecSPavel Labath 
220b1f29cecSPavel Labath void DataExtractor::skip(Cursor &C, uint64_t Length) const {
221b1f29cecSPavel Labath   ErrorAsOutParameter ErrAsOut(&C.Err);
222b1f29cecSPavel Labath   if (isError(&C.Err))
223b1f29cecSPavel Labath     return;
224b1f29cecSPavel Labath 
225b1f29cecSPavel Labath   if (isValidOffsetForDataOfSize(C.Offset, Length))
226b1f29cecSPavel Labath     C.Offset += Length;
227b1f29cecSPavel Labath   else
2285754a61eSPavel Labath     unexpectedEndReached(&C.Err, C.Offset);
229b1f29cecSPavel Labath }
230