188a1d9fcSBenjamin Kramer //===-- DataExtractor.cpp -------------------------------------------------===// 288a1d9fcSBenjamin Kramer // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 688a1d9fcSBenjamin Kramer // 788a1d9fcSBenjamin Kramer //===----------------------------------------------------------------------===// 888a1d9fcSBenjamin Kramer 988a1d9fcSBenjamin Kramer #include "llvm/Support/DataExtractor.h" 10b1f29cecSPavel Labath #include "llvm/Support/Errc.h" 1188a1d9fcSBenjamin Kramer #include "llvm/Support/ErrorHandling.h" 1288a1d9fcSBenjamin Kramer #include "llvm/Support/Host.h" 138242f35dSDavid Blaikie #include "llvm/Support/LEB128.h" 14b1f29cecSPavel Labath #include "llvm/Support/SwapByteOrder.h" 15b1f29cecSPavel Labath 1688a1d9fcSBenjamin Kramer using namespace llvm; 1788a1d9fcSBenjamin Kramer 18b1f29cecSPavel Labath static void unexpectedEndReached(Error *E) { 19b1f29cecSPavel Labath if (E) 20b1f29cecSPavel Labath *E = createStringError(errc::illegal_byte_sequence, 21b1f29cecSPavel Labath "unexpected end of data"); 22b1f29cecSPavel Labath } 23b1f29cecSPavel Labath 24b1f29cecSPavel Labath static bool isError(Error *E) { return E && *E; } 25b1f29cecSPavel Labath 2688a1d9fcSBenjamin Kramer template <typename T> 27f5f35c5cSIgor Kudrin static T getU(uint64_t *offset_ptr, const DataExtractor *de, 28b1f29cecSPavel Labath bool isLittleEndian, const char *Data, llvm::Error *Err) { 29b1f29cecSPavel Labath ErrorAsOutParameter ErrAsOut(Err); 3088a1d9fcSBenjamin Kramer T val = 0; 31b1f29cecSPavel Labath if (isError(Err)) 32b1f29cecSPavel Labath return val; 33b1f29cecSPavel Labath 34f5f35c5cSIgor Kudrin uint64_t offset = *offset_ptr; 35b1f29cecSPavel Labath if (!de->isValidOffsetForDataOfSize(offset, sizeof(T))) { 36b1f29cecSPavel Labath unexpectedEndReached(Err); 37b1f29cecSPavel Labath return val; 38b1f29cecSPavel Labath } 3988a1d9fcSBenjamin Kramer std::memcpy(&val, &Data[offset], sizeof(val)); 4041cb64f4SRafael Espindola if (sys::IsLittleEndianHost != isLittleEndian) 419aea8432SArtyom Skrobov sys::swapByteOrder(val); 4288a1d9fcSBenjamin Kramer 4388a1d9fcSBenjamin Kramer // Advance the offset 4488a1d9fcSBenjamin Kramer *offset_ptr += sizeof(val); 4588a1d9fcSBenjamin Kramer return val; 4688a1d9fcSBenjamin Kramer } 4788a1d9fcSBenjamin Kramer 4888a1d9fcSBenjamin Kramer template <typename T> 49f5f35c5cSIgor Kudrin static T *getUs(uint64_t *offset_ptr, T *dst, uint32_t count, 50b1f29cecSPavel Labath const DataExtractor *de, bool isLittleEndian, const char *Data, 51b1f29cecSPavel Labath llvm::Error *Err) { 52b1f29cecSPavel Labath ErrorAsOutParameter ErrAsOut(Err); 53b1f29cecSPavel Labath if (isError(Err)) 54b1f29cecSPavel Labath return nullptr; 55b1f29cecSPavel Labath 56f5f35c5cSIgor Kudrin uint64_t offset = *offset_ptr; 5788a1d9fcSBenjamin Kramer 58b1f29cecSPavel Labath if (!de->isValidOffsetForDataOfSize(offset, sizeof(*dst) * count)) { 59b1f29cecSPavel Labath unexpectedEndReached(Err); 60b1f29cecSPavel Labath return nullptr; 61b1f29cecSPavel Labath } 6288a1d9fcSBenjamin Kramer for (T *value_ptr = dst, *end = dst + count; value_ptr != end; 6388a1d9fcSBenjamin Kramer ++value_ptr, offset += sizeof(*dst)) 64b1f29cecSPavel Labath *value_ptr = getU<T>(offset_ptr, de, isLittleEndian, Data, Err); 6588a1d9fcSBenjamin Kramer // Advance the offset 6688a1d9fcSBenjamin Kramer *offset_ptr = offset; 6788a1d9fcSBenjamin Kramer // Return a non-NULL pointer to the converted data as an indicator of 6888a1d9fcSBenjamin Kramer // success 6988a1d9fcSBenjamin Kramer return dst; 7088a1d9fcSBenjamin Kramer } 7188a1d9fcSBenjamin Kramer 72b1f29cecSPavel Labath uint8_t DataExtractor::getU8(uint64_t *offset_ptr, llvm::Error *Err) const { 73b1f29cecSPavel Labath return getU<uint8_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err); 7488a1d9fcSBenjamin Kramer } 7588a1d9fcSBenjamin Kramer 7688a1d9fcSBenjamin Kramer uint8_t * 77f5f35c5cSIgor Kudrin DataExtractor::getU8(uint64_t *offset_ptr, uint8_t *dst, uint32_t count) const { 7888a1d9fcSBenjamin Kramer return getUs<uint8_t>(offset_ptr, dst, count, this, IsLittleEndian, 79b1f29cecSPavel Labath Data.data(), nullptr); 8088a1d9fcSBenjamin Kramer } 8188a1d9fcSBenjamin Kramer 82b1f29cecSPavel Labath uint8_t *DataExtractor::getU8(Cursor &C, uint8_t *Dst, uint32_t Count) const { 83b1f29cecSPavel Labath return getUs<uint8_t>(&C.Offset, Dst, Count, this, IsLittleEndian, 84b1f29cecSPavel Labath Data.data(), &C.Err); 85b1f29cecSPavel Labath } 86b1f29cecSPavel Labath 87b1f29cecSPavel Labath uint16_t DataExtractor::getU16(uint64_t *offset_ptr, llvm::Error *Err) const { 88b1f29cecSPavel Labath return getU<uint16_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err); 8988a1d9fcSBenjamin Kramer } 9088a1d9fcSBenjamin Kramer 91f5f35c5cSIgor Kudrin uint16_t *DataExtractor::getU16(uint64_t *offset_ptr, uint16_t *dst, 9288a1d9fcSBenjamin Kramer uint32_t count) const { 9388a1d9fcSBenjamin Kramer return getUs<uint16_t>(offset_ptr, dst, count, this, IsLittleEndian, 94b1f29cecSPavel Labath Data.data(), nullptr); 9588a1d9fcSBenjamin Kramer } 9688a1d9fcSBenjamin Kramer 97f5f35c5cSIgor Kudrin uint32_t DataExtractor::getU24(uint64_t *offset_ptr) const { 98258927e3SWolfgang Pieb uint24_t ExtractedVal = 99b1f29cecSPavel Labath getU<uint24_t>(offset_ptr, this, IsLittleEndian, Data.data(), nullptr); 100258927e3SWolfgang Pieb // The 3 bytes are in the correct byte order for the host. 101258927e3SWolfgang Pieb return ExtractedVal.getAsUint32(sys::IsLittleEndianHost); 102258927e3SWolfgang Pieb } 103258927e3SWolfgang Pieb 104b1f29cecSPavel Labath uint32_t DataExtractor::getU32(uint64_t *offset_ptr, llvm::Error *Err) const { 105b1f29cecSPavel Labath return getU<uint32_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err); 10688a1d9fcSBenjamin Kramer } 10788a1d9fcSBenjamin Kramer 108f5f35c5cSIgor Kudrin uint32_t *DataExtractor::getU32(uint64_t *offset_ptr, uint32_t *dst, 10988a1d9fcSBenjamin Kramer uint32_t count) const { 11088a1d9fcSBenjamin Kramer return getUs<uint32_t>(offset_ptr, dst, count, this, IsLittleEndian, 111b1f29cecSPavel Labath Data.data(), nullptr); 11288a1d9fcSBenjamin Kramer } 11388a1d9fcSBenjamin Kramer 114b1f29cecSPavel Labath uint64_t DataExtractor::getU64(uint64_t *offset_ptr, llvm::Error *Err) const { 115b1f29cecSPavel Labath return getU<uint64_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err); 11688a1d9fcSBenjamin Kramer } 11788a1d9fcSBenjamin Kramer 118f5f35c5cSIgor Kudrin uint64_t *DataExtractor::getU64(uint64_t *offset_ptr, uint64_t *dst, 11988a1d9fcSBenjamin Kramer uint32_t count) const { 12088a1d9fcSBenjamin Kramer return getUs<uint64_t>(offset_ptr, dst, count, this, IsLittleEndian, 121b1f29cecSPavel Labath Data.data(), nullptr); 12288a1d9fcSBenjamin Kramer } 12388a1d9fcSBenjamin Kramer 124b1f29cecSPavel Labath uint64_t DataExtractor::getUnsigned(uint64_t *offset_ptr, uint32_t byte_size, 125b1f29cecSPavel Labath llvm::Error *Err) const { 12688a1d9fcSBenjamin Kramer switch (byte_size) { 12788a1d9fcSBenjamin Kramer case 1: 128b1f29cecSPavel Labath return getU8(offset_ptr, Err); 12988a1d9fcSBenjamin Kramer case 2: 130b1f29cecSPavel Labath return getU16(offset_ptr, Err); 13188a1d9fcSBenjamin Kramer case 4: 132b1f29cecSPavel Labath return getU32(offset_ptr, Err); 13388a1d9fcSBenjamin Kramer case 8: 134b1f29cecSPavel Labath return getU64(offset_ptr, Err); 13588a1d9fcSBenjamin Kramer } 13688a1d9fcSBenjamin Kramer llvm_unreachable("getUnsigned unhandled case!"); 13788a1d9fcSBenjamin Kramer } 13888a1d9fcSBenjamin Kramer 13988a1d9fcSBenjamin Kramer int64_t 140f5f35c5cSIgor Kudrin DataExtractor::getSigned(uint64_t *offset_ptr, uint32_t byte_size) const { 14188a1d9fcSBenjamin Kramer switch (byte_size) { 14288a1d9fcSBenjamin Kramer case 1: 14388a1d9fcSBenjamin Kramer return (int8_t)getU8(offset_ptr); 14488a1d9fcSBenjamin Kramer case 2: 14588a1d9fcSBenjamin Kramer return (int16_t)getU16(offset_ptr); 14688a1d9fcSBenjamin Kramer case 4: 14788a1d9fcSBenjamin Kramer return (int32_t)getU32(offset_ptr); 14888a1d9fcSBenjamin Kramer case 8: 14988a1d9fcSBenjamin Kramer return (int64_t)getU64(offset_ptr); 15088a1d9fcSBenjamin Kramer } 15188a1d9fcSBenjamin Kramer llvm_unreachable("getSigned unhandled case!"); 15288a1d9fcSBenjamin Kramer } 15388a1d9fcSBenjamin Kramer 154f5f35c5cSIgor Kudrin const char *DataExtractor::getCStr(uint64_t *offset_ptr) const { 155f5f35c5cSIgor Kudrin uint64_t offset = *offset_ptr; 15688a1d9fcSBenjamin Kramer StringRef::size_type pos = Data.find('\0', offset); 15788a1d9fcSBenjamin Kramer if (pos != StringRef::npos) { 15888a1d9fcSBenjamin Kramer *offset_ptr = pos + 1; 15988a1d9fcSBenjamin Kramer return Data.data() + offset; 16088a1d9fcSBenjamin Kramer } 161c10719f5SCraig Topper return nullptr; 16288a1d9fcSBenjamin Kramer } 16388a1d9fcSBenjamin Kramer 164f5f35c5cSIgor Kudrin StringRef DataExtractor::getCStrRef(uint64_t *offset_ptr) const { 165f5f35c5cSIgor Kudrin uint64_t Start = *offset_ptr; 166ba1c9156SPaul Robinson StringRef::size_type Pos = Data.find('\0', Start); 167ba1c9156SPaul Robinson if (Pos != StringRef::npos) { 168f5f35c5cSIgor Kudrin *offset_ptr = Pos + 1; 169ba1c9156SPaul Robinson return StringRef(Data.data() + Start, Pos - Start); 170ba1c9156SPaul Robinson } 171ba1c9156SPaul Robinson return StringRef(); 172ba1c9156SPaul Robinson } 173ba1c9156SPaul Robinson 174*df8dda67SGreg Clayton StringRef DataExtractor::getFixedLengthString(uint64_t *OffsetPtr, 175*df8dda67SGreg Clayton uint64_t Length, 176*df8dda67SGreg Clayton StringRef TrimChars) const { 177*df8dda67SGreg Clayton StringRef Bytes(getBytes(OffsetPtr, Length)); 178*df8dda67SGreg Clayton return Bytes.trim(TrimChars); 179*df8dda67SGreg Clayton } 180*df8dda67SGreg Clayton 181*df8dda67SGreg Clayton StringRef DataExtractor::getBytes(uint64_t *OffsetPtr, uint64_t Length) const { 182*df8dda67SGreg Clayton if (!isValidOffsetForDataOfSize(*OffsetPtr, Length)) 183*df8dda67SGreg Clayton return StringRef(); 184*df8dda67SGreg Clayton StringRef Result = Data.substr(*OffsetPtr, Length); 185*df8dda67SGreg Clayton *OffsetPtr += Length; 186*df8dda67SGreg Clayton return Result; 187*df8dda67SGreg Clayton } 188*df8dda67SGreg Clayton 189b1f29cecSPavel Labath uint64_t DataExtractor::getULEB128(uint64_t *offset_ptr, 190b1f29cecSPavel Labath llvm::Error *Err) const { 1918242f35dSDavid Blaikie assert(*offset_ptr <= Data.size()); 192b1f29cecSPavel Labath ErrorAsOutParameter ErrAsOut(Err); 193b1f29cecSPavel Labath if (isError(Err)) 194b1f29cecSPavel Labath return 0; 1958242f35dSDavid Blaikie 1968242f35dSDavid Blaikie const char *error; 1978242f35dSDavid Blaikie unsigned bytes_read; 1988242f35dSDavid Blaikie uint64_t result = decodeULEB128( 1998242f35dSDavid Blaikie reinterpret_cast<const uint8_t *>(Data.data() + *offset_ptr), &bytes_read, 2008242f35dSDavid Blaikie reinterpret_cast<const uint8_t *>(Data.data() + Data.size()), &error); 201b1f29cecSPavel Labath if (error) { 202b1f29cecSPavel Labath if (Err) 203b1f29cecSPavel Labath *Err = createStringError(errc::illegal_byte_sequence, error); 20488a1d9fcSBenjamin Kramer return 0; 205b1f29cecSPavel Labath } 2068242f35dSDavid Blaikie *offset_ptr += bytes_read; 20788a1d9fcSBenjamin Kramer return result; 20888a1d9fcSBenjamin Kramer } 20988a1d9fcSBenjamin Kramer 210f5f35c5cSIgor Kudrin int64_t DataExtractor::getSLEB128(uint64_t *offset_ptr) const { 211f895e1bdSDavid Blaikie assert(*offset_ptr <= Data.size()); 212f895e1bdSDavid Blaikie 213f895e1bdSDavid Blaikie const char *error; 214f895e1bdSDavid Blaikie unsigned bytes_read; 215f895e1bdSDavid Blaikie int64_t result = decodeSLEB128( 216f895e1bdSDavid Blaikie reinterpret_cast<const uint8_t *>(Data.data() + *offset_ptr), &bytes_read, 217f895e1bdSDavid Blaikie reinterpret_cast<const uint8_t *>(Data.data() + Data.size()), &error); 218f895e1bdSDavid Blaikie if (error) 21988a1d9fcSBenjamin Kramer return 0; 220f895e1bdSDavid Blaikie *offset_ptr += bytes_read; 22188a1d9fcSBenjamin Kramer return result; 22288a1d9fcSBenjamin Kramer } 223b1f29cecSPavel Labath 224b1f29cecSPavel Labath void DataExtractor::skip(Cursor &C, uint64_t Length) const { 225b1f29cecSPavel Labath ErrorAsOutParameter ErrAsOut(&C.Err); 226b1f29cecSPavel Labath if (isError(&C.Err)) 227b1f29cecSPavel Labath return; 228b1f29cecSPavel Labath 229b1f29cecSPavel Labath if (isValidOffsetForDataOfSize(C.Offset, Length)) 230b1f29cecSPavel Labath C.Offset += Length; 231b1f29cecSPavel Labath else 232b1f29cecSPavel Labath unexpectedEndReached(&C.Err); 233b1f29cecSPavel Labath } 234