188a1d9fcSBenjamin Kramer //===-- DataExtractor.cpp -------------------------------------------------===// 288a1d9fcSBenjamin Kramer // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 688a1d9fcSBenjamin Kramer // 788a1d9fcSBenjamin Kramer //===----------------------------------------------------------------------===// 888a1d9fcSBenjamin Kramer 988a1d9fcSBenjamin Kramer #include "llvm/Support/DataExtractor.h" 10b1f29cecSPavel Labath #include "llvm/Support/Errc.h" 1188a1d9fcSBenjamin Kramer #include "llvm/Support/ErrorHandling.h" 1288a1d9fcSBenjamin Kramer #include "llvm/Support/Host.h" 138242f35dSDavid Blaikie #include "llvm/Support/LEB128.h" 14b1f29cecSPavel Labath #include "llvm/Support/SwapByteOrder.h" 15b1f29cecSPavel Labath 1688a1d9fcSBenjamin Kramer using namespace llvm; 1788a1d9fcSBenjamin Kramer 185754a61eSPavel Labath static void unexpectedEndReached(Error *E, uint64_t Offset) { 19b1f29cecSPavel Labath if (E) 20b1f29cecSPavel Labath *E = createStringError(errc::illegal_byte_sequence, 215754a61eSPavel Labath "unexpected end of data at offset 0x%" PRIx64, 225754a61eSPavel Labath Offset); 23b1f29cecSPavel Labath } 24b1f29cecSPavel Labath 25b1f29cecSPavel Labath static bool isError(Error *E) { return E && *E; } 26b1f29cecSPavel Labath 2788a1d9fcSBenjamin Kramer template <typename T> 28f5f35c5cSIgor Kudrin static T getU(uint64_t *offset_ptr, const DataExtractor *de, 29b1f29cecSPavel Labath bool isLittleEndian, const char *Data, llvm::Error *Err) { 30b1f29cecSPavel Labath ErrorAsOutParameter ErrAsOut(Err); 3188a1d9fcSBenjamin Kramer T val = 0; 32b1f29cecSPavel Labath if (isError(Err)) 33b1f29cecSPavel Labath return val; 34b1f29cecSPavel Labath 35f5f35c5cSIgor Kudrin uint64_t offset = *offset_ptr; 36b1f29cecSPavel Labath if (!de->isValidOffsetForDataOfSize(offset, sizeof(T))) { 375754a61eSPavel Labath unexpectedEndReached(Err, offset); 38b1f29cecSPavel Labath return val; 39b1f29cecSPavel Labath } 4088a1d9fcSBenjamin Kramer std::memcpy(&val, &Data[offset], sizeof(val)); 4141cb64f4SRafael Espindola if (sys::IsLittleEndianHost != isLittleEndian) 429aea8432SArtyom Skrobov sys::swapByteOrder(val); 4388a1d9fcSBenjamin Kramer 4488a1d9fcSBenjamin Kramer // Advance the offset 4588a1d9fcSBenjamin Kramer *offset_ptr += sizeof(val); 4688a1d9fcSBenjamin Kramer return val; 4788a1d9fcSBenjamin Kramer } 4888a1d9fcSBenjamin Kramer 4988a1d9fcSBenjamin Kramer template <typename T> 50f5f35c5cSIgor Kudrin static T *getUs(uint64_t *offset_ptr, T *dst, uint32_t count, 51b1f29cecSPavel Labath const DataExtractor *de, bool isLittleEndian, const char *Data, 52b1f29cecSPavel Labath llvm::Error *Err) { 53b1f29cecSPavel Labath ErrorAsOutParameter ErrAsOut(Err); 54b1f29cecSPavel Labath if (isError(Err)) 55b1f29cecSPavel Labath return nullptr; 56b1f29cecSPavel Labath 57f5f35c5cSIgor Kudrin uint64_t offset = *offset_ptr; 5888a1d9fcSBenjamin Kramer 59b1f29cecSPavel Labath if (!de->isValidOffsetForDataOfSize(offset, sizeof(*dst) * count)) { 605754a61eSPavel Labath unexpectedEndReached(Err, offset); 61b1f29cecSPavel Labath return nullptr; 62b1f29cecSPavel Labath } 6388a1d9fcSBenjamin Kramer for (T *value_ptr = dst, *end = dst + count; value_ptr != end; 6488a1d9fcSBenjamin Kramer ++value_ptr, offset += sizeof(*dst)) 65b1f29cecSPavel Labath *value_ptr = getU<T>(offset_ptr, de, isLittleEndian, Data, Err); 6688a1d9fcSBenjamin Kramer // Advance the offset 6788a1d9fcSBenjamin Kramer *offset_ptr = offset; 6888a1d9fcSBenjamin Kramer // Return a non-NULL pointer to the converted data as an indicator of 6988a1d9fcSBenjamin Kramer // success 7088a1d9fcSBenjamin Kramer return dst; 7188a1d9fcSBenjamin Kramer } 7288a1d9fcSBenjamin Kramer 73b1f29cecSPavel Labath uint8_t DataExtractor::getU8(uint64_t *offset_ptr, llvm::Error *Err) const { 74b1f29cecSPavel Labath return getU<uint8_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err); 7588a1d9fcSBenjamin Kramer } 7688a1d9fcSBenjamin Kramer 7788a1d9fcSBenjamin Kramer uint8_t * 78f5f35c5cSIgor Kudrin DataExtractor::getU8(uint64_t *offset_ptr, uint8_t *dst, uint32_t count) const { 7988a1d9fcSBenjamin Kramer return getUs<uint8_t>(offset_ptr, dst, count, this, IsLittleEndian, 80b1f29cecSPavel Labath Data.data(), nullptr); 8188a1d9fcSBenjamin Kramer } 8288a1d9fcSBenjamin Kramer 83b1f29cecSPavel Labath uint8_t *DataExtractor::getU8(Cursor &C, uint8_t *Dst, uint32_t Count) const { 84b1f29cecSPavel Labath return getUs<uint8_t>(&C.Offset, Dst, Count, this, IsLittleEndian, 85b1f29cecSPavel Labath Data.data(), &C.Err); 86b1f29cecSPavel Labath } 87b1f29cecSPavel Labath 88b1f29cecSPavel Labath uint16_t DataExtractor::getU16(uint64_t *offset_ptr, llvm::Error *Err) const { 89b1f29cecSPavel Labath return getU<uint16_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err); 9088a1d9fcSBenjamin Kramer } 9188a1d9fcSBenjamin Kramer 92f5f35c5cSIgor Kudrin uint16_t *DataExtractor::getU16(uint64_t *offset_ptr, uint16_t *dst, 9388a1d9fcSBenjamin Kramer uint32_t count) const { 9488a1d9fcSBenjamin Kramer return getUs<uint16_t>(offset_ptr, dst, count, this, IsLittleEndian, 95b1f29cecSPavel Labath Data.data(), nullptr); 9688a1d9fcSBenjamin Kramer } 9788a1d9fcSBenjamin Kramer 98f5f35c5cSIgor Kudrin uint32_t DataExtractor::getU24(uint64_t *offset_ptr) const { 99258927e3SWolfgang Pieb uint24_t ExtractedVal = 100b1f29cecSPavel Labath getU<uint24_t>(offset_ptr, this, IsLittleEndian, Data.data(), nullptr); 101258927e3SWolfgang Pieb // The 3 bytes are in the correct byte order for the host. 102258927e3SWolfgang Pieb return ExtractedVal.getAsUint32(sys::IsLittleEndianHost); 103258927e3SWolfgang Pieb } 104258927e3SWolfgang Pieb 105b1f29cecSPavel Labath uint32_t DataExtractor::getU32(uint64_t *offset_ptr, llvm::Error *Err) const { 106b1f29cecSPavel Labath return getU<uint32_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err); 10788a1d9fcSBenjamin Kramer } 10888a1d9fcSBenjamin Kramer 109f5f35c5cSIgor Kudrin uint32_t *DataExtractor::getU32(uint64_t *offset_ptr, uint32_t *dst, 11088a1d9fcSBenjamin Kramer uint32_t count) const { 11188a1d9fcSBenjamin Kramer return getUs<uint32_t>(offset_ptr, dst, count, this, IsLittleEndian, 112b1f29cecSPavel Labath Data.data(), nullptr); 11388a1d9fcSBenjamin Kramer } 11488a1d9fcSBenjamin Kramer 115b1f29cecSPavel Labath uint64_t DataExtractor::getU64(uint64_t *offset_ptr, llvm::Error *Err) const { 116b1f29cecSPavel Labath return getU<uint64_t>(offset_ptr, this, IsLittleEndian, Data.data(), Err); 11788a1d9fcSBenjamin Kramer } 11888a1d9fcSBenjamin Kramer 119f5f35c5cSIgor Kudrin uint64_t *DataExtractor::getU64(uint64_t *offset_ptr, uint64_t *dst, 12088a1d9fcSBenjamin Kramer uint32_t count) const { 12188a1d9fcSBenjamin Kramer return getUs<uint64_t>(offset_ptr, dst, count, this, IsLittleEndian, 122b1f29cecSPavel Labath Data.data(), nullptr); 12388a1d9fcSBenjamin Kramer } 12488a1d9fcSBenjamin Kramer 125b1f29cecSPavel Labath uint64_t DataExtractor::getUnsigned(uint64_t *offset_ptr, uint32_t byte_size, 126b1f29cecSPavel Labath llvm::Error *Err) const { 12788a1d9fcSBenjamin Kramer switch (byte_size) { 12888a1d9fcSBenjamin Kramer case 1: 129b1f29cecSPavel Labath return getU8(offset_ptr, Err); 13088a1d9fcSBenjamin Kramer case 2: 131b1f29cecSPavel Labath return getU16(offset_ptr, Err); 13288a1d9fcSBenjamin Kramer case 4: 133b1f29cecSPavel Labath return getU32(offset_ptr, Err); 13488a1d9fcSBenjamin Kramer case 8: 135b1f29cecSPavel Labath return getU64(offset_ptr, Err); 13688a1d9fcSBenjamin Kramer } 13788a1d9fcSBenjamin Kramer llvm_unreachable("getUnsigned unhandled case!"); 13888a1d9fcSBenjamin Kramer } 13988a1d9fcSBenjamin Kramer 14088a1d9fcSBenjamin Kramer int64_t 141f5f35c5cSIgor Kudrin DataExtractor::getSigned(uint64_t *offset_ptr, uint32_t byte_size) const { 14288a1d9fcSBenjamin Kramer switch (byte_size) { 14388a1d9fcSBenjamin Kramer case 1: 14488a1d9fcSBenjamin Kramer return (int8_t)getU8(offset_ptr); 14588a1d9fcSBenjamin Kramer case 2: 14688a1d9fcSBenjamin Kramer return (int16_t)getU16(offset_ptr); 14788a1d9fcSBenjamin Kramer case 4: 14888a1d9fcSBenjamin Kramer return (int32_t)getU32(offset_ptr); 14988a1d9fcSBenjamin Kramer case 8: 15088a1d9fcSBenjamin Kramer return (int64_t)getU64(offset_ptr); 15188a1d9fcSBenjamin Kramer } 15288a1d9fcSBenjamin Kramer llvm_unreachable("getSigned unhandled case!"); 15388a1d9fcSBenjamin Kramer } 15488a1d9fcSBenjamin Kramer 155*a16fffa3SPavel Labath StringRef DataExtractor::getCStrRef(uint64_t *OffsetPtr, Error *Err) const { 156*a16fffa3SPavel Labath ErrorAsOutParameter ErrAsOut(Err); 157*a16fffa3SPavel Labath if (isError(Err)) 158*a16fffa3SPavel Labath return StringRef(); 15988a1d9fcSBenjamin Kramer 160*a16fffa3SPavel Labath uint64_t Start = *OffsetPtr; 161ba1c9156SPaul Robinson StringRef::size_type Pos = Data.find('\0', Start); 162ba1c9156SPaul Robinson if (Pos != StringRef::npos) { 163*a16fffa3SPavel Labath *OffsetPtr = Pos + 1; 164ba1c9156SPaul Robinson return StringRef(Data.data() + Start, Pos - Start); 165ba1c9156SPaul Robinson } 166*a16fffa3SPavel Labath unexpectedEndReached(Err, Start); 167ba1c9156SPaul Robinson return StringRef(); 168ba1c9156SPaul Robinson } 169ba1c9156SPaul Robinson 170df8dda67SGreg Clayton StringRef DataExtractor::getFixedLengthString(uint64_t *OffsetPtr, 171df8dda67SGreg Clayton uint64_t Length, 172df8dda67SGreg Clayton StringRef TrimChars) const { 173df8dda67SGreg Clayton StringRef Bytes(getBytes(OffsetPtr, Length)); 174df8dda67SGreg Clayton return Bytes.trim(TrimChars); 175df8dda67SGreg Clayton } 176df8dda67SGreg Clayton 177df8dda67SGreg Clayton StringRef DataExtractor::getBytes(uint64_t *OffsetPtr, uint64_t Length) const { 178df8dda67SGreg Clayton if (!isValidOffsetForDataOfSize(*OffsetPtr, Length)) 179df8dda67SGreg Clayton return StringRef(); 180df8dda67SGreg Clayton StringRef Result = Data.substr(*OffsetPtr, Length); 181df8dda67SGreg Clayton *OffsetPtr += Length; 182df8dda67SGreg Clayton return Result; 183df8dda67SGreg Clayton } 184df8dda67SGreg Clayton 185b1f29cecSPavel Labath uint64_t DataExtractor::getULEB128(uint64_t *offset_ptr, 186b1f29cecSPavel Labath llvm::Error *Err) const { 1878242f35dSDavid Blaikie assert(*offset_ptr <= Data.size()); 188b1f29cecSPavel Labath ErrorAsOutParameter ErrAsOut(Err); 189b1f29cecSPavel Labath if (isError(Err)) 190b1f29cecSPavel Labath return 0; 1918242f35dSDavid Blaikie 1928242f35dSDavid Blaikie const char *error; 1938242f35dSDavid Blaikie unsigned bytes_read; 1948242f35dSDavid Blaikie uint64_t result = decodeULEB128( 1958242f35dSDavid Blaikie reinterpret_cast<const uint8_t *>(Data.data() + *offset_ptr), &bytes_read, 1968242f35dSDavid Blaikie reinterpret_cast<const uint8_t *>(Data.data() + Data.size()), &error); 197b1f29cecSPavel Labath if (error) { 198b1f29cecSPavel Labath if (Err) 199b1f29cecSPavel Labath *Err = createStringError(errc::illegal_byte_sequence, error); 20088a1d9fcSBenjamin Kramer return 0; 201b1f29cecSPavel Labath } 2028242f35dSDavid Blaikie *offset_ptr += bytes_read; 20388a1d9fcSBenjamin Kramer return result; 20488a1d9fcSBenjamin Kramer } 20588a1d9fcSBenjamin Kramer 206f5f35c5cSIgor Kudrin int64_t DataExtractor::getSLEB128(uint64_t *offset_ptr) const { 207f895e1bdSDavid Blaikie assert(*offset_ptr <= Data.size()); 208f895e1bdSDavid Blaikie 209f895e1bdSDavid Blaikie const char *error; 210f895e1bdSDavid Blaikie unsigned bytes_read; 211f895e1bdSDavid Blaikie int64_t result = decodeSLEB128( 212f895e1bdSDavid Blaikie reinterpret_cast<const uint8_t *>(Data.data() + *offset_ptr), &bytes_read, 213f895e1bdSDavid Blaikie reinterpret_cast<const uint8_t *>(Data.data() + Data.size()), &error); 214f895e1bdSDavid Blaikie if (error) 21588a1d9fcSBenjamin Kramer return 0; 216f895e1bdSDavid Blaikie *offset_ptr += bytes_read; 21788a1d9fcSBenjamin Kramer return result; 21888a1d9fcSBenjamin Kramer } 219b1f29cecSPavel Labath 220b1f29cecSPavel Labath void DataExtractor::skip(Cursor &C, uint64_t Length) const { 221b1f29cecSPavel Labath ErrorAsOutParameter ErrAsOut(&C.Err); 222b1f29cecSPavel Labath if (isError(&C.Err)) 223b1f29cecSPavel Labath return; 224b1f29cecSPavel Labath 225b1f29cecSPavel Labath if (isValidOffsetForDataOfSize(C.Offset, Length)) 226b1f29cecSPavel Labath C.Offset += Length; 227b1f29cecSPavel Labath else 2285754a61eSPavel Labath unexpectedEndReached(&C.Err, C.Offset); 229b1f29cecSPavel Labath } 230