1 //===- GsymCreator.cpp ----------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 //===----------------------------------------------------------------------===// 7 8 #include "llvm/DebugInfo/GSYM/GsymCreator.h" 9 #include "llvm/DebugInfo/GSYM/FileWriter.h" 10 #include "llvm/DebugInfo/GSYM/Header.h" 11 #include "llvm/DebugInfo/GSYM/LineTable.h" 12 #include "llvm/MC/StringTableBuilder.h" 13 #include "llvm/Support/raw_ostream.h" 14 15 #include <algorithm> 16 #include <cassert> 17 #include <functional> 18 #include <vector> 19 20 using namespace llvm; 21 using namespace gsym; 22 23 GsymCreator::GsymCreator() : StrTab(StringTableBuilder::ELF) { 24 insertFile(StringRef()); 25 } 26 27 uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) { 28 llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style); 29 llvm::StringRef filename = llvm::sys::path::filename(Path, Style); 30 // We must insert the strings first, then call the FileEntry constructor. 31 // If we inline the insertString() function call into the constructor, the 32 // call order is undefined due to parameter lists not having any ordering 33 // requirements. 34 const uint32_t Dir = insertString(directory); 35 const uint32_t Base = insertString(filename); 36 FileEntry FE(Dir, Base); 37 38 std::lock_guard<std::mutex> Guard(Mutex); 39 const auto NextIndex = Files.size(); 40 // Find FE in hash map and insert if not present. 41 auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex)); 42 if (R.second) 43 Files.emplace_back(FE); 44 return R.first->second; 45 } 46 47 llvm::Error GsymCreator::save(StringRef Path, 48 llvm::support::endianness ByteOrder) const { 49 std::error_code EC; 50 raw_fd_ostream OutStrm(Path, EC); 51 if (EC) 52 return llvm::errorCodeToError(EC); 53 FileWriter O(OutStrm, ByteOrder); 54 return encode(O); 55 } 56 57 llvm::Error GsymCreator::encode(FileWriter &O) const { 58 std::lock_guard<std::mutex> Guard(Mutex); 59 if (Funcs.empty()) 60 return createStringError(std::errc::invalid_argument, 61 "no functions to encode"); 62 if (!Finalized) 63 return createStringError(std::errc::invalid_argument, 64 "GsymCreator wasn't finalized prior to encoding"); 65 66 if (Funcs.size() > UINT32_MAX) 67 return createStringError(std::errc::invalid_argument, 68 "too many FunctionInfos"); 69 70 const uint64_t MinAddr = 71 BaseAddress ? *BaseAddress : Funcs.front().startAddress(); 72 const uint64_t MaxAddr = Funcs.back().startAddress(); 73 const uint64_t AddrDelta = MaxAddr - MinAddr; 74 Header Hdr; 75 Hdr.Magic = GSYM_MAGIC; 76 Hdr.Version = GSYM_VERSION; 77 Hdr.AddrOffSize = 0; 78 Hdr.UUIDSize = static_cast<uint8_t>(UUID.size()); 79 Hdr.BaseAddress = MinAddr; 80 Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size()); 81 Hdr.StrtabOffset = 0; // We will fix this up later. 82 Hdr.StrtabSize = 0; // We will fix this up later. 83 memset(Hdr.UUID, 0, sizeof(Hdr.UUID)); 84 if (UUID.size() > sizeof(Hdr.UUID)) 85 return createStringError(std::errc::invalid_argument, 86 "invalid UUID size %u", (uint32_t)UUID.size()); 87 // Set the address offset size correctly in the GSYM header. 88 if (AddrDelta <= UINT8_MAX) 89 Hdr.AddrOffSize = 1; 90 else if (AddrDelta <= UINT16_MAX) 91 Hdr.AddrOffSize = 2; 92 else if (AddrDelta <= UINT32_MAX) 93 Hdr.AddrOffSize = 4; 94 else 95 Hdr.AddrOffSize = 8; 96 // Copy the UUID value if we have one. 97 if (UUID.size() > 0) 98 memcpy(Hdr.UUID, UUID.data(), UUID.size()); 99 // Write out the header. 100 llvm::Error Err = Hdr.encode(O); 101 if (Err) 102 return Err; 103 104 // Write out the address offsets. 105 O.alignTo(Hdr.AddrOffSize); 106 for (const auto &FuncInfo : Funcs) { 107 uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress; 108 switch (Hdr.AddrOffSize) { 109 case 1: 110 O.writeU8(static_cast<uint8_t>(AddrOffset)); 111 break; 112 case 2: 113 O.writeU16(static_cast<uint16_t>(AddrOffset)); 114 break; 115 case 4: 116 O.writeU32(static_cast<uint32_t>(AddrOffset)); 117 break; 118 case 8: 119 O.writeU64(AddrOffset); 120 break; 121 } 122 } 123 124 // Write out all zeros for the AddrInfoOffsets. 125 O.alignTo(4); 126 const off_t AddrInfoOffsetsOffset = O.tell(); 127 for (size_t i = 0, n = Funcs.size(); i < n; ++i) 128 O.writeU32(0); 129 130 // Write out the file table 131 O.alignTo(4); 132 assert(!Files.empty()); 133 assert(Files[0].Dir == 0); 134 assert(Files[0].Base == 0); 135 size_t NumFiles = Files.size(); 136 if (NumFiles > UINT32_MAX) 137 return createStringError(std::errc::invalid_argument, "too many files"); 138 O.writeU32(static_cast<uint32_t>(NumFiles)); 139 for (auto File : Files) { 140 O.writeU32(File.Dir); 141 O.writeU32(File.Base); 142 } 143 144 // Write out the sting table. 145 const off_t StrtabOffset = O.tell(); 146 StrTab.write(O.get_stream()); 147 const off_t StrtabSize = O.tell() - StrtabOffset; 148 std::vector<uint32_t> AddrInfoOffsets; 149 150 // Write out the address infos for each function info. 151 for (const auto &FuncInfo : Funcs) { 152 if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O)) 153 AddrInfoOffsets.push_back(OffsetOrErr.get()); 154 else 155 return OffsetOrErr.takeError(); 156 } 157 // Fixup the string table offset and size in the header 158 O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset)); 159 O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize)); 160 161 // Fixup all address info offsets 162 uint64_t Offset = 0; 163 for (auto AddrInfoOffset : AddrInfoOffsets) { 164 O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset); 165 Offset += 4; 166 } 167 return ErrorSuccess(); 168 } 169 170 // Similar to std::remove_if, but the predicate is binary and it is passed both 171 // the previous and the current element. 172 template <class ForwardIt, class BinaryPredicate> 173 static ForwardIt removeIfBinary(ForwardIt FirstIt, ForwardIt LastIt, 174 BinaryPredicate Pred) { 175 if (FirstIt != LastIt) { 176 auto PrevIt = FirstIt++; 177 FirstIt = std::find_if(FirstIt, LastIt, [&](const auto &Curr) { 178 return Pred(*PrevIt++, Curr); 179 }); 180 if (FirstIt != LastIt) 181 for (ForwardIt CurrIt = FirstIt; ++CurrIt != LastIt;) 182 if (!Pred(*PrevIt, *CurrIt)) { 183 PrevIt = FirstIt; 184 *FirstIt++ = std::move(*CurrIt); 185 } 186 } 187 return FirstIt; 188 } 189 190 llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) { 191 std::lock_guard<std::mutex> Guard(Mutex); 192 if (Finalized) 193 return createStringError(std::errc::invalid_argument, "already finalized"); 194 Finalized = true; 195 196 // Sort function infos so we can emit sorted functions. 197 llvm::sort(Funcs); 198 199 // Don't let the string table indexes change by finalizing in order. 200 StrTab.finalizeInOrder(); 201 202 // Remove duplicates function infos that have both entries from debug info 203 // (DWARF or Breakpad) and entries from the SymbolTable. 204 // 205 // Also handle overlapping function. Usually there shouldn't be any, but they 206 // can and do happen in some rare cases. 207 // 208 // (a) (b) (c) 209 // ^ ^ ^ ^ 210 // |X |Y |X ^ |X 211 // | | | |Y | ^ 212 // | | | v v |Y 213 // v v v v 214 // 215 // In (a) and (b), Y is ignored and X will be reported for the full range. 216 // In (c), both functions will be included in the result and lookups for an 217 // address in the intersection will return Y because of binary search. 218 // 219 // Note that in case of (b), we cannot include Y in the result because then 220 // we wouldn't find any function for range (end of Y, end of X) 221 // with binary search 222 auto NumBefore = Funcs.size(); 223 Funcs.erase( 224 removeIfBinary(Funcs.begin(), Funcs.end(), 225 [&](const auto &Prev, const auto &Curr) { 226 if (Prev.Range.intersects(Curr.Range)) { 227 // Overlapping address ranges. 228 if (Prev.Range == Curr.Range) { 229 // Same address range. Check if one is from debug 230 // info and the other is from a symbol table. If 231 // so, then keep the one with debug info. Our 232 // sorting guarantees that entries with matching 233 // address ranges that have debug info are last in 234 // the sort. 235 if (Prev == Curr) { 236 // FunctionInfo entries match exactly (range, 237 // lines, inlines) 238 239 // We used to output a warning here, but this was 240 // so frequent on some binaries, in particular 241 // when those were built with GCC, that it slowed 242 // down processing extremely. 243 return true; 244 } else { 245 if (!Prev.hasRichInfo() && Curr.hasRichInfo()) { 246 // Same address range, one with no debug info 247 // (symbol) and the next with debug info. Keep 248 // the latter. 249 return true; 250 } else { 251 OS << "warning: same address range contains " 252 "different debug " 253 << "info. Removing:\n" 254 << Prev << "\nIn favor of this one:\n" 255 << Curr << "\n"; 256 return true; 257 } 258 } 259 } else { 260 // print warnings about overlaps 261 OS << "warning: function ranges overlap:\n" 262 << Prev << "\n" 263 << Curr << "\n"; 264 } 265 } else if (Prev.Range.size() == 0 && 266 Curr.Range.contains(Prev.Range.Start)) { 267 OS << "warning: removing symbol:\n" 268 << Prev << "\nKeeping:\n" 269 << Curr << "\n"; 270 return true; 271 } 272 273 return false; 274 }), 275 Funcs.end()); 276 277 // If our last function info entry doesn't have a size and if we have valid 278 // text ranges, we should set the size of the last entry since any search for 279 // a high address might match our last entry. By fixing up this size, we can 280 // help ensure we don't cause lookups to always return the last symbol that 281 // has no size when doing lookups. 282 if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) { 283 if (auto Range = 284 ValidTextRanges->getRangeThatContains(Funcs.back().Range.Start)) { 285 Funcs.back().Range.End = Range->End; 286 } 287 } 288 OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with " 289 << Funcs.size() << " total\n"; 290 return Error::success(); 291 } 292 293 uint32_t GsymCreator::insertString(StringRef S, bool Copy) { 294 if (S.empty()) 295 return 0; 296 297 // The hash can be calculated outside the lock. 298 CachedHashStringRef CHStr(S); 299 std::lock_guard<std::mutex> Guard(Mutex); 300 if (Copy) { 301 // We need to provide backing storage for the string if requested 302 // since StringTableBuilder stores references to strings. Any string 303 // that comes from a section in an object file doesn't need to be 304 // copied, but any string created by code will need to be copied. 305 // This allows GsymCreator to be really fast when parsing DWARF and 306 // other object files as most strings don't need to be copied. 307 if (!StrTab.contains(CHStr)) 308 CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(), 309 CHStr.hash()}; 310 } 311 return StrTab.add(CHStr); 312 } 313 314 void GsymCreator::addFunctionInfo(FunctionInfo &&FI) { 315 std::lock_guard<std::mutex> Guard(Mutex); 316 Ranges.insert(FI.Range); 317 Funcs.emplace_back(std::move(FI)); 318 } 319 320 void GsymCreator::forEachFunctionInfo( 321 std::function<bool(FunctionInfo &)> const &Callback) { 322 std::lock_guard<std::mutex> Guard(Mutex); 323 for (auto &FI : Funcs) { 324 if (!Callback(FI)) 325 break; 326 } 327 } 328 329 void GsymCreator::forEachFunctionInfo( 330 std::function<bool(const FunctionInfo &)> const &Callback) const { 331 std::lock_guard<std::mutex> Guard(Mutex); 332 for (const auto &FI : Funcs) { 333 if (!Callback(FI)) 334 break; 335 } 336 } 337 338 size_t GsymCreator::getNumFunctionInfos() const { 339 std::lock_guard<std::mutex> Guard(Mutex); 340 return Funcs.size(); 341 } 342 343 bool GsymCreator::IsValidTextAddress(uint64_t Addr) const { 344 if (ValidTextRanges) 345 return ValidTextRanges->contains(Addr); 346 return true; // No valid text ranges has been set, so accept all ranges. 347 } 348 349 bool GsymCreator::hasFunctionInfoForAddress(uint64_t Addr) const { 350 std::lock_guard<std::mutex> Guard(Mutex); 351 return Ranges.contains(Addr); 352 } 353