188a1d9fcSBenjamin Kramer //===-- DataExtractor.cpp -------------------------------------------------===//
288a1d9fcSBenjamin Kramer //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
688a1d9fcSBenjamin Kramer //
788a1d9fcSBenjamin Kramer //===----------------------------------------------------------------------===//
888a1d9fcSBenjamin Kramer 
988a1d9fcSBenjamin Kramer #include "llvm/Support/DataExtractor.h"
10b1f29cecSPavel Labath #include "llvm/Support/Errc.h"
1188a1d9fcSBenjamin Kramer #include "llvm/Support/ErrorHandling.h"
1288a1d9fcSBenjamin Kramer #include "llvm/Support/Host.h"
138242f35dSDavid Blaikie #include "llvm/Support/LEB128.h"
14b1f29cecSPavel Labath #include "llvm/Support/SwapByteOrder.h"
15b1f29cecSPavel Labath 
1688a1d9fcSBenjamin Kramer using namespace llvm;
1788a1d9fcSBenjamin Kramer 
18b1f29cecSPavel Labath static void unexpectedEndReached(Error *E) {
19b1f29cecSPavel Labath   if (E)
20b1f29cecSPavel Labath     *E = createStringError(errc::illegal_byte_sequence,
21b1f29cecSPavel Labath                            "unexpected end of data");
22b1f29cecSPavel Labath }
23b1f29cecSPavel Labath 
24b1f29cecSPavel Labath static bool isError(Error *E) { return E && *E; }
25b1f29cecSPavel Labath 
2688a1d9fcSBenjamin Kramer template <typename T>
27f5f35c5cSIgor Kudrin static T getU(uint64_t *offset_ptr, const DataExtractor *de,
28b1f29cecSPavel Labath               bool isLittleEndian, const char *Data, llvm::Error *Err) {
29b1f29cecSPavel Labath   ErrorAsOutParameter ErrAsOut(Err);
3088a1d9fcSBenjamin Kramer   T val = 0;
31b1f29cecSPavel Labath   if (isError(Err))
32b1f29cecSPavel Labath     return val;
33b1f29cecSPavel Labath 
34f5f35c5cSIgor Kudrin   uint64_t offset = *offset_ptr;
35b1f29cecSPavel Labath   if (!de->isValidOffsetForDataOfSize(offset, sizeof(T))) {
36b1f29cecSPavel Labath     unexpectedEndReached(Err);
37b1f29cecSPavel Labath     return val;
38b1f29cecSPavel Labath   }
3988a1d9fcSBenjamin Kramer   std::memcpy(&val, &Data[offset], sizeof(val));
4041cb64f4SRafael Espindola   if (sys::IsLittleEndianHost != isLittleEndian)
419aea8432SArtyom Skrobov     sys::swapByteOrder(val);
4288a1d9fcSBenjamin Kramer 
4388a1d9fcSBenjamin Kramer   // Advance the offset
4488a1d9fcSBenjamin Kramer   *offset_ptr += sizeof(val);
4588a1d9fcSBenjamin Kramer   return val;
4688a1d9fcSBenjamin Kramer }
4788a1d9fcSBenjamin Kramer 
4888a1d9fcSBenjamin Kramer template <typename T>
49f5f35c5cSIgor Kudrin static T *getUs(uint64_t *offset_ptr, T *dst, uint32_t count,
50b1f29cecSPavel Labath                 const DataExtractor *de, bool isLittleEndian, const char *Data,
51b1f29cecSPavel Labath                 llvm::Error *Err) {
52b1f29cecSPavel Labath   ErrorAsOutParameter ErrAsOut(Err);
53b1f29cecSPavel Labath   if (isError(Err))
54b1f29cecSPavel Labath     return nullptr;
55b1f29cecSPavel Labath 
56f5f35c5cSIgor Kudrin   uint64_t offset = *offset_ptr;
5788a1d9fcSBenjamin Kramer 
58b1f29cecSPavel Labath   if (!de->isValidOffsetForDataOfSize(offset, sizeof(*dst) * count)) {
59b1f29cecSPavel Labath     unexpectedEndReached(Err);
60b1f29cecSPavel Labath     return nullptr;
61b1f29cecSPavel Labath   }
6288a1d9fcSBenjamin Kramer   for (T *value_ptr = dst, *end = dst + count; value_ptr != end;
6388a1d9fcSBenjamin Kramer        ++value_ptr, offset += sizeof(*dst))
64b1f29cecSPavel Labath     *value_ptr = getU<T>(offset_ptr, de, isLittleEndian, Data, Err);
6588a1d9fcSBenjamin Kramer   // Advance the offset
6688a1d9fcSBenjamin Kramer   *offset_ptr = offset;
6788a1d9fcSBenjamin Kramer   // Return a non-NULL pointer to the converted data as an indicator of
6888a1d9fcSBenjamin Kramer   // success
6988a1d9fcSBenjamin Kramer   return dst;
7088a1d9fcSBenjamin Kramer }
7188a1d9fcSBenjamin Kramer 
72b1f29cecSPavel Labath uint8_t DataExtractor::getU8(uint64_t *offset_ptr, llvm::Error *Err) const {
73b1f29cecSPavel Labath   return getU<uint8_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err);
7488a1d9fcSBenjamin Kramer }
7588a1d9fcSBenjamin Kramer 
7688a1d9fcSBenjamin Kramer uint8_t *
77f5f35c5cSIgor Kudrin DataExtractor::getU8(uint64_t *offset_ptr, uint8_t *dst, uint32_t count) const {
7888a1d9fcSBenjamin Kramer   return getUs<uint8_t>(offset_ptr, dst, count, this, IsLittleEndian,
79b1f29cecSPavel Labath                         Data.data(), nullptr);
8088a1d9fcSBenjamin Kramer }
8188a1d9fcSBenjamin Kramer 
82b1f29cecSPavel Labath uint8_t *DataExtractor::getU8(Cursor &C, uint8_t *Dst, uint32_t Count) const {
83b1f29cecSPavel Labath   return getUs<uint8_t>(&C.Offset, Dst, Count, this, IsLittleEndian,
84b1f29cecSPavel Labath                         Data.data(), &C.Err);
85b1f29cecSPavel Labath }
86b1f29cecSPavel Labath 
87b1f29cecSPavel Labath uint16_t DataExtractor::getU16(uint64_t *offset_ptr, llvm::Error *Err) const {
88b1f29cecSPavel Labath   return getU<uint16_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err);
8988a1d9fcSBenjamin Kramer }
9088a1d9fcSBenjamin Kramer 
91f5f35c5cSIgor Kudrin uint16_t *DataExtractor::getU16(uint64_t *offset_ptr, uint16_t *dst,
9288a1d9fcSBenjamin Kramer                                 uint32_t count) const {
9388a1d9fcSBenjamin Kramer   return getUs<uint16_t>(offset_ptr, dst, count, this, IsLittleEndian,
94b1f29cecSPavel Labath                          Data.data(), nullptr);
9588a1d9fcSBenjamin Kramer }
9688a1d9fcSBenjamin Kramer 
97f5f35c5cSIgor Kudrin uint32_t DataExtractor::getU24(uint64_t *offset_ptr) const {
98258927e3SWolfgang Pieb   uint24_t ExtractedVal =
99b1f29cecSPavel Labath       getU<uint24_t>(offset_ptr, this, IsLittleEndian, Data.data(), nullptr);
100258927e3SWolfgang Pieb   // The 3 bytes are in the correct byte order for the host.
101258927e3SWolfgang Pieb   return ExtractedVal.getAsUint32(sys::IsLittleEndianHost);
102258927e3SWolfgang Pieb }
103258927e3SWolfgang Pieb 
104b1f29cecSPavel Labath uint32_t DataExtractor::getU32(uint64_t *offset_ptr, llvm::Error *Err) const {
105b1f29cecSPavel Labath   return getU<uint32_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err);
10688a1d9fcSBenjamin Kramer }
10788a1d9fcSBenjamin Kramer 
108f5f35c5cSIgor Kudrin uint32_t *DataExtractor::getU32(uint64_t *offset_ptr, uint32_t *dst,
10988a1d9fcSBenjamin Kramer                                 uint32_t count) const {
11088a1d9fcSBenjamin Kramer   return getUs<uint32_t>(offset_ptr, dst, count, this, IsLittleEndian,
111b1f29cecSPavel Labath                          Data.data(), nullptr);
11288a1d9fcSBenjamin Kramer }
11388a1d9fcSBenjamin Kramer 
114b1f29cecSPavel Labath uint64_t DataExtractor::getU64(uint64_t *offset_ptr, llvm::Error *Err) const {
115b1f29cecSPavel Labath   return getU<uint64_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err);
11688a1d9fcSBenjamin Kramer }
11788a1d9fcSBenjamin Kramer 
118f5f35c5cSIgor Kudrin uint64_t *DataExtractor::getU64(uint64_t *offset_ptr, uint64_t *dst,
11988a1d9fcSBenjamin Kramer                                 uint32_t count) const {
12088a1d9fcSBenjamin Kramer   return getUs<uint64_t>(offset_ptr, dst, count, this, IsLittleEndian,
121b1f29cecSPavel Labath                          Data.data(), nullptr);
12288a1d9fcSBenjamin Kramer }
12388a1d9fcSBenjamin Kramer 
124b1f29cecSPavel Labath uint64_t DataExtractor::getUnsigned(uint64_t *offset_ptr, uint32_t byte_size,
125b1f29cecSPavel Labath                                     llvm::Error *Err) const {
12688a1d9fcSBenjamin Kramer   switch (byte_size) {
12788a1d9fcSBenjamin Kramer   case 1:
128b1f29cecSPavel Labath     return getU8(offset_ptr, Err);
12988a1d9fcSBenjamin Kramer   case 2:
130b1f29cecSPavel Labath     return getU16(offset_ptr, Err);
13188a1d9fcSBenjamin Kramer   case 4:
132b1f29cecSPavel Labath     return getU32(offset_ptr, Err);
13388a1d9fcSBenjamin Kramer   case 8:
134b1f29cecSPavel Labath     return getU64(offset_ptr, Err);
13588a1d9fcSBenjamin Kramer   }
13688a1d9fcSBenjamin Kramer   llvm_unreachable("getUnsigned unhandled case!");
13788a1d9fcSBenjamin Kramer }
13888a1d9fcSBenjamin Kramer 
13988a1d9fcSBenjamin Kramer int64_t
140f5f35c5cSIgor Kudrin DataExtractor::getSigned(uint64_t *offset_ptr, uint32_t byte_size) const {
14188a1d9fcSBenjamin Kramer   switch (byte_size) {
14288a1d9fcSBenjamin Kramer   case 1:
14388a1d9fcSBenjamin Kramer     return (int8_t)getU8(offset_ptr);
14488a1d9fcSBenjamin Kramer   case 2:
14588a1d9fcSBenjamin Kramer     return (int16_t)getU16(offset_ptr);
14688a1d9fcSBenjamin Kramer   case 4:
14788a1d9fcSBenjamin Kramer     return (int32_t)getU32(offset_ptr);
14888a1d9fcSBenjamin Kramer   case 8:
14988a1d9fcSBenjamin Kramer     return (int64_t)getU64(offset_ptr);
15088a1d9fcSBenjamin Kramer   }
15188a1d9fcSBenjamin Kramer   llvm_unreachable("getSigned unhandled case!");
15288a1d9fcSBenjamin Kramer }
15388a1d9fcSBenjamin Kramer 
154f5f35c5cSIgor Kudrin const char *DataExtractor::getCStr(uint64_t *offset_ptr) const {
155f5f35c5cSIgor Kudrin   uint64_t offset = *offset_ptr;
15688a1d9fcSBenjamin Kramer   StringRef::size_type pos = Data.find('\0', offset);
15788a1d9fcSBenjamin Kramer   if (pos != StringRef::npos) {
15888a1d9fcSBenjamin Kramer     *offset_ptr = pos + 1;
15988a1d9fcSBenjamin Kramer     return Data.data() + offset;
16088a1d9fcSBenjamin Kramer   }
161c10719f5SCraig Topper   return nullptr;
16288a1d9fcSBenjamin Kramer }
16388a1d9fcSBenjamin Kramer 
164f5f35c5cSIgor Kudrin StringRef DataExtractor::getCStrRef(uint64_t *offset_ptr) const {
165f5f35c5cSIgor Kudrin   uint64_t Start = *offset_ptr;
166ba1c9156SPaul Robinson   StringRef::size_type Pos = Data.find('\0', Start);
167ba1c9156SPaul Robinson   if (Pos != StringRef::npos) {
168f5f35c5cSIgor Kudrin     *offset_ptr = Pos + 1;
169ba1c9156SPaul Robinson     return StringRef(Data.data() + Start, Pos - Start);
170ba1c9156SPaul Robinson   }
171ba1c9156SPaul Robinson   return StringRef();
172ba1c9156SPaul Robinson }
173ba1c9156SPaul Robinson 
174*df8dda67SGreg Clayton StringRef DataExtractor::getFixedLengthString(uint64_t *OffsetPtr,
175*df8dda67SGreg Clayton                                               uint64_t Length,
176*df8dda67SGreg Clayton                                               StringRef TrimChars) const {
177*df8dda67SGreg Clayton   StringRef Bytes(getBytes(OffsetPtr, Length));
178*df8dda67SGreg Clayton   return Bytes.trim(TrimChars);
179*df8dda67SGreg Clayton }
180*df8dda67SGreg Clayton 
181*df8dda67SGreg Clayton StringRef DataExtractor::getBytes(uint64_t *OffsetPtr, uint64_t Length) const {
182*df8dda67SGreg Clayton   if (!isValidOffsetForDataOfSize(*OffsetPtr, Length))
183*df8dda67SGreg Clayton     return StringRef();
184*df8dda67SGreg Clayton   StringRef Result = Data.substr(*OffsetPtr, Length);
185*df8dda67SGreg Clayton   *OffsetPtr += Length;
186*df8dda67SGreg Clayton   return Result;
187*df8dda67SGreg Clayton }
188*df8dda67SGreg Clayton 
189b1f29cecSPavel Labath uint64_t DataExtractor::getULEB128(uint64_t *offset_ptr,
190b1f29cecSPavel Labath                                    llvm::Error *Err) const {
1918242f35dSDavid Blaikie   assert(*offset_ptr <= Data.size());
192b1f29cecSPavel Labath   ErrorAsOutParameter ErrAsOut(Err);
193b1f29cecSPavel Labath   if (isError(Err))
194b1f29cecSPavel Labath     return 0;
1958242f35dSDavid Blaikie 
1968242f35dSDavid Blaikie   const char *error;
1978242f35dSDavid Blaikie   unsigned bytes_read;
1988242f35dSDavid Blaikie   uint64_t result = decodeULEB128(
1998242f35dSDavid Blaikie       reinterpret_cast<const uint8_t *>(Data.data() + *offset_ptr), &bytes_read,
2008242f35dSDavid Blaikie       reinterpret_cast<const uint8_t *>(Data.data() + Data.size()), &error);
201b1f29cecSPavel Labath   if (error) {
202b1f29cecSPavel Labath     if (Err)
203b1f29cecSPavel Labath       *Err = createStringError(errc::illegal_byte_sequence, error);
20488a1d9fcSBenjamin Kramer     return 0;
205b1f29cecSPavel Labath   }
2068242f35dSDavid Blaikie   *offset_ptr += bytes_read;
20788a1d9fcSBenjamin Kramer   return result;
20888a1d9fcSBenjamin Kramer }
20988a1d9fcSBenjamin Kramer 
210f5f35c5cSIgor Kudrin int64_t DataExtractor::getSLEB128(uint64_t *offset_ptr) const {
211f895e1bdSDavid Blaikie   assert(*offset_ptr <= Data.size());
212f895e1bdSDavid Blaikie 
213f895e1bdSDavid Blaikie   const char *error;
214f895e1bdSDavid Blaikie   unsigned bytes_read;
215f895e1bdSDavid Blaikie   int64_t result = decodeSLEB128(
216f895e1bdSDavid Blaikie       reinterpret_cast<const uint8_t *>(Data.data() + *offset_ptr), &bytes_read,
217f895e1bdSDavid Blaikie       reinterpret_cast<const uint8_t *>(Data.data() + Data.size()), &error);
218f895e1bdSDavid Blaikie   if (error)
21988a1d9fcSBenjamin Kramer     return 0;
220f895e1bdSDavid Blaikie   *offset_ptr += bytes_read;
22188a1d9fcSBenjamin Kramer   return result;
22288a1d9fcSBenjamin Kramer }
223b1f29cecSPavel Labath 
224b1f29cecSPavel Labath void DataExtractor::skip(Cursor &C, uint64_t Length) const {
225b1f29cecSPavel Labath   ErrorAsOutParameter ErrAsOut(&C.Err);
226b1f29cecSPavel Labath   if (isError(&C.Err))
227b1f29cecSPavel Labath     return;
228b1f29cecSPavel Labath 
229b1f29cecSPavel Labath   if (isValidOffsetForDataOfSize(C.Offset, Length))
230b1f29cecSPavel Labath     C.Offset += Length;
231b1f29cecSPavel Labath   else
232b1f29cecSPavel Labath     unexpectedEndReached(&C.Err);
233b1f29cecSPavel Labath }
234