188a1d9fcSBenjamin Kramer //===-- DataExtractor.cpp -------------------------------------------------===//
288a1d9fcSBenjamin Kramer //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
688a1d9fcSBenjamin Kramer //
788a1d9fcSBenjamin Kramer //===----------------------------------------------------------------------===//
888a1d9fcSBenjamin Kramer
988a1d9fcSBenjamin Kramer #include "llvm/Support/DataExtractor.h"
10b1f29cecSPavel Labath #include "llvm/Support/Errc.h"
1188a1d9fcSBenjamin Kramer #include "llvm/Support/ErrorHandling.h"
128242f35dSDavid Blaikie #include "llvm/Support/LEB128.h"
13b1f29cecSPavel Labath #include "llvm/Support/SwapByteOrder.h"
14b1f29cecSPavel Labath
1588a1d9fcSBenjamin Kramer using namespace llvm;
1688a1d9fcSBenjamin Kramer
prepareRead(uint64_t Offset,uint64_t Size,Error * E) const17*04aea769SPavel Labath bool DataExtractor::prepareRead(uint64_t Offset, uint64_t Size,
18*04aea769SPavel Labath Error *E) const {
19*04aea769SPavel Labath if (isValidOffsetForDataOfSize(Offset, Size))
20*04aea769SPavel Labath return true;
21*04aea769SPavel Labath if (E) {
22*04aea769SPavel Labath if (Offset <= Data.size())
23*04aea769SPavel Labath *E = createStringError(
24*04aea769SPavel Labath errc::illegal_byte_sequence,
25*04aea769SPavel Labath "unexpected end of data at offset 0x%zx while reading [0x%" PRIx64
26*04aea769SPavel Labath ", 0x%" PRIx64 ")",
27*04aea769SPavel Labath Data.size(), Offset, Offset + Size);
28*04aea769SPavel Labath else
29*04aea769SPavel Labath *E = createStringError(errc::invalid_argument,
30*04aea769SPavel Labath "offset 0x%" PRIx64
31*04aea769SPavel Labath " is beyond the end of data at 0x%zx",
32*04aea769SPavel Labath Offset, Data.size());
33*04aea769SPavel Labath }
34*04aea769SPavel Labath return false;
35b1f29cecSPavel Labath }
36b1f29cecSPavel Labath
isError(Error * E)37b1f29cecSPavel Labath static bool isError(Error *E) { return E && *E; }
38b1f29cecSPavel Labath
3988a1d9fcSBenjamin Kramer template <typename T>
getU(uint64_t * offset_ptr,Error * Err) const40*04aea769SPavel Labath T DataExtractor::getU(uint64_t *offset_ptr, Error *Err) const {
41b1f29cecSPavel Labath ErrorAsOutParameter ErrAsOut(Err);
4288a1d9fcSBenjamin Kramer T val = 0;
43b1f29cecSPavel Labath if (isError(Err))
44b1f29cecSPavel Labath return val;
45b1f29cecSPavel Labath
46f5f35c5cSIgor Kudrin uint64_t offset = *offset_ptr;
47*04aea769SPavel Labath if (!prepareRead(offset, sizeof(T), Err))
48b1f29cecSPavel Labath return val;
49*04aea769SPavel Labath std::memcpy(&val, &Data.data()[offset], sizeof(val));
50*04aea769SPavel Labath if (sys::IsLittleEndianHost != IsLittleEndian)
519aea8432SArtyom Skrobov sys::swapByteOrder(val);
5288a1d9fcSBenjamin Kramer
5388a1d9fcSBenjamin Kramer // Advance the offset
5488a1d9fcSBenjamin Kramer *offset_ptr += sizeof(val);
5588a1d9fcSBenjamin Kramer return val;
5688a1d9fcSBenjamin Kramer }
5788a1d9fcSBenjamin Kramer
5888a1d9fcSBenjamin Kramer template <typename T>
getUs(uint64_t * offset_ptr,T * dst,uint32_t count,Error * Err) const59*04aea769SPavel Labath T *DataExtractor::getUs(uint64_t *offset_ptr, T *dst, uint32_t count,
60*04aea769SPavel Labath Error *Err) const {
61b1f29cecSPavel Labath ErrorAsOutParameter ErrAsOut(Err);
62b1f29cecSPavel Labath if (isError(Err))
63b1f29cecSPavel Labath return nullptr;
64b1f29cecSPavel Labath
65f5f35c5cSIgor Kudrin uint64_t offset = *offset_ptr;
6688a1d9fcSBenjamin Kramer
67*04aea769SPavel Labath if (!prepareRead(offset, sizeof(*dst) * count, Err))
68b1f29cecSPavel Labath return nullptr;
6988a1d9fcSBenjamin Kramer for (T *value_ptr = dst, *end = dst + count; value_ptr != end;
7088a1d9fcSBenjamin Kramer ++value_ptr, offset += sizeof(*dst))
71*04aea769SPavel Labath *value_ptr = getU<T>(offset_ptr, Err);
7288a1d9fcSBenjamin Kramer // Advance the offset
7388a1d9fcSBenjamin Kramer *offset_ptr = offset;
7488a1d9fcSBenjamin Kramer // Return a non-NULL pointer to the converted data as an indicator of
7588a1d9fcSBenjamin Kramer // success
7688a1d9fcSBenjamin Kramer return dst;
7788a1d9fcSBenjamin Kramer }
7888a1d9fcSBenjamin Kramer
getU8(uint64_t * offset_ptr,llvm::Error * Err) const79b1f29cecSPavel Labath uint8_t DataExtractor::getU8(uint64_t *offset_ptr, llvm::Error *Err) const {
80*04aea769SPavel Labath return getU<uint8_t>(offset_ptr, Err);
8188a1d9fcSBenjamin Kramer }
8288a1d9fcSBenjamin Kramer
getU8(uint64_t * offset_ptr,uint8_t * dst,uint32_t count) const83*04aea769SPavel Labath uint8_t *DataExtractor::getU8(uint64_t *offset_ptr, uint8_t *dst,
84*04aea769SPavel Labath uint32_t count) const {
85*04aea769SPavel Labath return getUs<uint8_t>(offset_ptr, dst, count, nullptr);
8688a1d9fcSBenjamin Kramer }
8788a1d9fcSBenjamin Kramer
getU8(Cursor & C,uint8_t * Dst,uint32_t Count) const88b1f29cecSPavel Labath uint8_t *DataExtractor::getU8(Cursor &C, uint8_t *Dst, uint32_t Count) const {
89*04aea769SPavel Labath return getUs<uint8_t>(&C.Offset, Dst, Count, &C.Err);
90b1f29cecSPavel Labath }
91b1f29cecSPavel Labath
getU16(uint64_t * offset_ptr,llvm::Error * Err) const92b1f29cecSPavel Labath uint16_t DataExtractor::getU16(uint64_t *offset_ptr, llvm::Error *Err) const {
93*04aea769SPavel Labath return getU<uint16_t>(offset_ptr, Err);
9488a1d9fcSBenjamin Kramer }
9588a1d9fcSBenjamin Kramer
getU16(uint64_t * offset_ptr,uint16_t * dst,uint32_t count) const96f5f35c5cSIgor Kudrin uint16_t *DataExtractor::getU16(uint64_t *offset_ptr, uint16_t *dst,
9788a1d9fcSBenjamin Kramer uint32_t count) const {
98*04aea769SPavel Labath return getUs<uint16_t>(offset_ptr, dst, count, nullptr);
9988a1d9fcSBenjamin Kramer }
10088a1d9fcSBenjamin Kramer
getU24(uint64_t * OffsetPtr,Error * Err) const1019154a639SPavel Labath uint32_t DataExtractor::getU24(uint64_t *OffsetPtr, Error *Err) const {
102*04aea769SPavel Labath uint24_t ExtractedVal = getU<uint24_t>(OffsetPtr, Err);
103258927e3SWolfgang Pieb // The 3 bytes are in the correct byte order for the host.
104258927e3SWolfgang Pieb return ExtractedVal.getAsUint32(sys::IsLittleEndianHost);
105258927e3SWolfgang Pieb }
106258927e3SWolfgang Pieb
getU32(uint64_t * offset_ptr,llvm::Error * Err) const107b1f29cecSPavel Labath uint32_t DataExtractor::getU32(uint64_t *offset_ptr, llvm::Error *Err) const {
108*04aea769SPavel Labath return getU<uint32_t>(offset_ptr, Err);
10988a1d9fcSBenjamin Kramer }
11088a1d9fcSBenjamin Kramer
getU32(uint64_t * offset_ptr,uint32_t * dst,uint32_t count) const111f5f35c5cSIgor Kudrin uint32_t *DataExtractor::getU32(uint64_t *offset_ptr, uint32_t *dst,
11288a1d9fcSBenjamin Kramer uint32_t count) const {
113*04aea769SPavel Labath return getUs<uint32_t>(offset_ptr, dst, count, nullptr);
11488a1d9fcSBenjamin Kramer }
11588a1d9fcSBenjamin Kramer
getU64(uint64_t * offset_ptr,llvm::Error * Err) const116b1f29cecSPavel Labath uint64_t DataExtractor::getU64(uint64_t *offset_ptr, llvm::Error *Err) const {
117*04aea769SPavel Labath return getU<uint64_t>(offset_ptr, Err);
11888a1d9fcSBenjamin Kramer }
11988a1d9fcSBenjamin Kramer
getU64(uint64_t * offset_ptr,uint64_t * dst,uint32_t count) const120f5f35c5cSIgor Kudrin uint64_t *DataExtractor::getU64(uint64_t *offset_ptr, uint64_t *dst,
12188a1d9fcSBenjamin Kramer uint32_t count) const {
122*04aea769SPavel Labath return getUs<uint64_t>(offset_ptr, dst, count, nullptr);
12388a1d9fcSBenjamin Kramer }
12488a1d9fcSBenjamin Kramer
getUnsigned(uint64_t * offset_ptr,uint32_t byte_size,llvm::Error * Err) const125b1f29cecSPavel Labath uint64_t DataExtractor::getUnsigned(uint64_t *offset_ptr, uint32_t byte_size,
126b1f29cecSPavel Labath llvm::Error *Err) const {
12788a1d9fcSBenjamin Kramer switch (byte_size) {
12888a1d9fcSBenjamin Kramer case 1:
129b1f29cecSPavel Labath return getU8(offset_ptr, Err);
13088a1d9fcSBenjamin Kramer case 2:
131b1f29cecSPavel Labath return getU16(offset_ptr, Err);
13288a1d9fcSBenjamin Kramer case 4:
133b1f29cecSPavel Labath return getU32(offset_ptr, Err);
13488a1d9fcSBenjamin Kramer case 8:
135b1f29cecSPavel Labath return getU64(offset_ptr, Err);
13688a1d9fcSBenjamin Kramer }
13788a1d9fcSBenjamin Kramer llvm_unreachable("getUnsigned unhandled case!");
13888a1d9fcSBenjamin Kramer }
13988a1d9fcSBenjamin Kramer
14088a1d9fcSBenjamin Kramer int64_t
getSigned(uint64_t * offset_ptr,uint32_t byte_size) const141f5f35c5cSIgor Kudrin DataExtractor::getSigned(uint64_t *offset_ptr, uint32_t byte_size) const {
14288a1d9fcSBenjamin Kramer switch (byte_size) {
14388a1d9fcSBenjamin Kramer case 1:
14488a1d9fcSBenjamin Kramer return (int8_t)getU8(offset_ptr);
14588a1d9fcSBenjamin Kramer case 2:
14688a1d9fcSBenjamin Kramer return (int16_t)getU16(offset_ptr);
14788a1d9fcSBenjamin Kramer case 4:
14888a1d9fcSBenjamin Kramer return (int32_t)getU32(offset_ptr);
14988a1d9fcSBenjamin Kramer case 8:
15088a1d9fcSBenjamin Kramer return (int64_t)getU64(offset_ptr);
15188a1d9fcSBenjamin Kramer }
15288a1d9fcSBenjamin Kramer llvm_unreachable("getSigned unhandled case!");
15388a1d9fcSBenjamin Kramer }
15488a1d9fcSBenjamin Kramer
getCStrRef(uint64_t * OffsetPtr,Error * Err) const155a16fffa3SPavel Labath StringRef DataExtractor::getCStrRef(uint64_t *OffsetPtr, Error *Err) const {
156a16fffa3SPavel Labath ErrorAsOutParameter ErrAsOut(Err);
157a16fffa3SPavel Labath if (isError(Err))
158a16fffa3SPavel Labath return StringRef();
15988a1d9fcSBenjamin Kramer
160a16fffa3SPavel Labath uint64_t Start = *OffsetPtr;
161ba1c9156SPaul Robinson StringRef::size_type Pos = Data.find('\0', Start);
162ba1c9156SPaul Robinson if (Pos != StringRef::npos) {
163a16fffa3SPavel Labath *OffsetPtr = Pos + 1;
164ba1c9156SPaul Robinson return StringRef(Data.data() + Start, Pos - Start);
165ba1c9156SPaul Robinson }
166*04aea769SPavel Labath if (Err)
167*04aea769SPavel Labath *Err = createStringError(errc::illegal_byte_sequence,
168*04aea769SPavel Labath "no null terminated string at offset 0x%" PRIx64,
169*04aea769SPavel Labath Start);
170ba1c9156SPaul Robinson return StringRef();
171ba1c9156SPaul Robinson }
172ba1c9156SPaul Robinson
getFixedLengthString(uint64_t * OffsetPtr,uint64_t Length,StringRef TrimChars) const173df8dda67SGreg Clayton StringRef DataExtractor::getFixedLengthString(uint64_t *OffsetPtr,
174df8dda67SGreg Clayton uint64_t Length,
175df8dda67SGreg Clayton StringRef TrimChars) const {
176df8dda67SGreg Clayton StringRef Bytes(getBytes(OffsetPtr, Length));
177df8dda67SGreg Clayton return Bytes.trim(TrimChars);
178df8dda67SGreg Clayton }
179df8dda67SGreg Clayton
getBytes(uint64_t * OffsetPtr,uint64_t Length,Error * Err) const1809154a639SPavel Labath StringRef DataExtractor::getBytes(uint64_t *OffsetPtr, uint64_t Length,
1819154a639SPavel Labath Error *Err) const {
1829154a639SPavel Labath ErrorAsOutParameter ErrAsOut(Err);
1839154a639SPavel Labath if (isError(Err))
184df8dda67SGreg Clayton return StringRef();
1859154a639SPavel Labath
186*04aea769SPavel Labath if (!prepareRead(*OffsetPtr, Length, Err))
1879154a639SPavel Labath return StringRef();
1889154a639SPavel Labath
189df8dda67SGreg Clayton StringRef Result = Data.substr(*OffsetPtr, Length);
190df8dda67SGreg Clayton *OffsetPtr += Length;
191df8dda67SGreg Clayton return Result;
192df8dda67SGreg Clayton }
193df8dda67SGreg Clayton
1949154a639SPavel Labath template <typename T>
getLEB128(StringRef Data,uint64_t * OffsetPtr,Error * Err,T (& Decoder)(const uint8_t * p,unsigned * n,const uint8_t * end,const char ** error))1959154a639SPavel Labath static T getLEB128(StringRef Data, uint64_t *OffsetPtr, Error *Err,
1969154a639SPavel Labath T (&Decoder)(const uint8_t *p, unsigned *n,
1979154a639SPavel Labath const uint8_t *end, const char **error)) {
1989154a639SPavel Labath ArrayRef<uint8_t> Bytes = arrayRefFromStringRef(Data);
1999154a639SPavel Labath assert(*OffsetPtr <= Bytes.size());
200b1f29cecSPavel Labath ErrorAsOutParameter ErrAsOut(Err);
201b1f29cecSPavel Labath if (isError(Err))
2029154a639SPavel Labath return T();
2038242f35dSDavid Blaikie
2048242f35dSDavid Blaikie const char *error;
2058242f35dSDavid Blaikie unsigned bytes_read;
2069154a639SPavel Labath T result =
2079154a639SPavel Labath Decoder(Bytes.data() + *OffsetPtr, &bytes_read, Bytes.end(), &error);
208b1f29cecSPavel Labath if (error) {
209b1f29cecSPavel Labath if (Err)
2108d9070e0SJames Henderson *Err = createStringError(errc::illegal_byte_sequence,
2118d9070e0SJames Henderson "unable to decode LEB128 at offset 0x%8.8" PRIx64
2128d9070e0SJames Henderson ": %s",
2138d9070e0SJames Henderson *OffsetPtr, error);
2149154a639SPavel Labath return T();
215b1f29cecSPavel Labath }
2169154a639SPavel Labath *OffsetPtr += bytes_read;
21788a1d9fcSBenjamin Kramer return result;
21888a1d9fcSBenjamin Kramer }
21988a1d9fcSBenjamin Kramer
getULEB128(uint64_t * offset_ptr,Error * Err) const2209154a639SPavel Labath uint64_t DataExtractor::getULEB128(uint64_t *offset_ptr, Error *Err) const {
2219154a639SPavel Labath return getLEB128(Data, offset_ptr, Err, decodeULEB128);
2229154a639SPavel Labath }
223f895e1bdSDavid Blaikie
getSLEB128(uint64_t * offset_ptr,Error * Err) const2249154a639SPavel Labath int64_t DataExtractor::getSLEB128(uint64_t *offset_ptr, Error *Err) const {
2259154a639SPavel Labath return getLEB128(Data, offset_ptr, Err, decodeSLEB128);
22688a1d9fcSBenjamin Kramer }
227b1f29cecSPavel Labath
skip(Cursor & C,uint64_t Length) const228b1f29cecSPavel Labath void DataExtractor::skip(Cursor &C, uint64_t Length) const {
229b1f29cecSPavel Labath ErrorAsOutParameter ErrAsOut(&C.Err);
230b1f29cecSPavel Labath if (isError(&C.Err))
231b1f29cecSPavel Labath return;
232b1f29cecSPavel Labath
233*04aea769SPavel Labath if (prepareRead(C.Offset, Length, &C.Err))
234b1f29cecSPavel Labath C.Offset += Length;
235b1f29cecSPavel Labath }
236