1 //===- bolt/Core/DebugData.cpp - Debugging information handling -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements functions and classes for handling debug info.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/DebugData.h"
14 #include "bolt/Core/BinaryBasicBlock.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "bolt/Utils/Utils.h"
17 #include "llvm/MC/MCObjectStreamer.h"
18 #include "llvm/MC/MCSymbol.h"
19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Support/EndianStream.h"
21 #include "llvm/Support/LEB128.h"
22 #include <algorithm>
23 #include <cassert>
24 #include <cstdint>
25 #include <limits>
26 #include <unordered_map>
27 
28 #define DEBUG_TYPE "bolt-debug-info"
29 
30 namespace opts {
31 extern llvm::cl::opt<unsigned> Verbosity;
32 } // namespace opts
33 
34 namespace llvm {
35 namespace bolt {
36 
37 const DebugLineTableRowRef DebugLineTableRowRef::NULL_ROW{0, 0};
38 
39 namespace {
40 
41 LLVM_ATTRIBUTE_UNUSED
42 static void printLE64(const std::string &S) {
43   for (uint32_t I = 0, Size = S.size(); I < Size; ++I) {
44     errs() << Twine::utohexstr(S[I]);
45     errs() << Twine::utohexstr((int8_t)S[I]);
46   }
47   errs() << "\n";
48 }
49 
50 // Writes address ranges to Writer as pairs of 64-bit (address, size).
51 // If RelativeRange is true, assumes the address range to be written must be of
52 // the form (begin address, range size), otherwise (begin address, end address).
53 // Terminates the list by writing a pair of two zeroes.
54 // Returns the number of written bytes.
55 uint64_t writeAddressRanges(raw_svector_ostream &Stream,
56                             const DebugAddressRangesVector &AddressRanges,
57                             const bool WriteRelativeRanges = false) {
58   for (const DebugAddressRange &Range : AddressRanges) {
59     support::endian::write(Stream, Range.LowPC, support::little);
60     support::endian::write(
61         Stream, WriteRelativeRanges ? Range.HighPC - Range.LowPC : Range.HighPC,
62         support::little);
63   }
64   // Finish with 0 entries.
65   support::endian::write(Stream, 0ULL, support::little);
66   support::endian::write(Stream, 0ULL, support::little);
67   return AddressRanges.size() * 16 + 16;
68 }
69 
70 } // namespace
71 
72 DebugRangesSectionWriter::DebugRangesSectionWriter() {
73   RangesBuffer = std::make_unique<DebugBufferVector>();
74   RangesStream = std::make_unique<raw_svector_ostream>(*RangesBuffer);
75 
76   // Add an empty range as the first entry;
77   SectionOffset +=
78       writeAddressRanges(*RangesStream.get(), DebugAddressRangesVector{});
79 }
80 
81 uint64_t DebugRangesSectionWriter::addRanges(
82     DebugAddressRangesVector &&Ranges,
83     std::map<DebugAddressRangesVector, uint64_t> &CachedRanges) {
84   if (Ranges.empty())
85     return getEmptyRangesOffset();
86 
87   const auto RI = CachedRanges.find(Ranges);
88   if (RI != CachedRanges.end())
89     return RI->second;
90 
91   const uint64_t EntryOffset = addRanges(Ranges);
92   CachedRanges.emplace(std::move(Ranges), EntryOffset);
93 
94   return EntryOffset;
95 }
96 
97 uint64_t
98 DebugRangesSectionWriter::addRanges(const DebugAddressRangesVector &Ranges) {
99   if (Ranges.empty())
100     return getEmptyRangesOffset();
101 
102   // Reading the SectionOffset and updating it should be atomic to guarantee
103   // unique and correct offsets in patches.
104   std::lock_guard<std::mutex> Lock(WriterMutex);
105   const uint32_t EntryOffset = SectionOffset;
106   SectionOffset += writeAddressRanges(*RangesStream.get(), Ranges);
107 
108   return EntryOffset;
109 }
110 
111 uint64_t DebugRangesSectionWriter::getSectionOffset() {
112   std::lock_guard<std::mutex> Lock(WriterMutex);
113   return SectionOffset;
114 }
115 
116 void DebugARangesSectionWriter::addCURanges(uint64_t CUOffset,
117                                             DebugAddressRangesVector &&Ranges) {
118   std::lock_guard<std::mutex> Lock(CUAddressRangesMutex);
119   CUAddressRanges.emplace(CUOffset, std::move(Ranges));
120 }
121 
122 void DebugARangesSectionWriter::writeARangesSection(
123     raw_svector_ostream &RangesStream,
124     const std::unordered_map<uint32_t, uint32_t> CUMap) const {
125   // For reference on the format of the .debug_aranges section, see the DWARF4
126   // specification, section 6.1.4 Lookup by Address
127   // http://www.dwarfstd.org/doc/DWARF4.pdf
128   for (const auto &CUOffsetAddressRangesPair : CUAddressRanges) {
129     const uint64_t Offset = CUOffsetAddressRangesPair.first;
130     const DebugAddressRangesVector &AddressRanges =
131         CUOffsetAddressRangesPair.second;
132 
133     // Emit header.
134 
135     // Size of this set: 8 (size of the header) + 4 (padding after header)
136     // + 2*sizeof(uint64_t) bytes for each of the ranges, plus an extra
137     // pair of uint64_t's for the terminating, zero-length range.
138     // Does not include size field itself.
139     uint32_t Size = 8 + 4 + 2 * sizeof(uint64_t) * (AddressRanges.size() + 1);
140 
141     // Header field #1: set size.
142     support::endian::write(RangesStream, Size, support::little);
143 
144     // Header field #2: version number, 2 as per the specification.
145     support::endian::write(RangesStream, static_cast<uint16_t>(2),
146                            support::little);
147 
148     assert(CUMap.count(Offset) && "Original CU offset is not found in CU Map");
149     // Header field #3: debug info offset of the correspondent compile unit.
150     support::endian::write(RangesStream,
151                            static_cast<uint32_t>(CUMap.find(Offset)->second),
152                            support::little);
153 
154     // Header field #4: address size.
155     // 8 since we only write ELF64 binaries for now.
156     RangesStream << char(8);
157 
158     // Header field #5: segment size of target architecture.
159     RangesStream << char(0);
160 
161     // Padding before address table - 4 bytes in the 64-bit-pointer case.
162     support::endian::write(RangesStream, static_cast<uint32_t>(0),
163                            support::little);
164 
165     writeAddressRanges(RangesStream, AddressRanges, true);
166   }
167 }
168 
169 DebugAddrWriter::DebugAddrWriter(BinaryContext *Bc) { BC = Bc; }
170 
171 void DebugAddrWriter::AddressForDWOCU::dump() {
172   std::vector<IndexAddressPair> SortedMap(indexToAddressBegin(),
173                                           indexToAdddessEnd());
174   // Sorting address in increasing order of indices.
175   std::sort(SortedMap.begin(), SortedMap.end(),
176             [](const IndexAddressPair &A, const IndexAddressPair &B) {
177               return A.first < B.first;
178             });
179   for (auto &Pair : SortedMap)
180     dbgs() << Twine::utohexstr(Pair.second) << "\t" << Pair.first << "\n";
181 }
182 uint32_t DebugAddrWriter::getIndexFromAddress(uint64_t Address,
183                                               uint64_t DWOId) {
184   std::lock_guard<std::mutex> Lock(WriterMutex);
185   if (!AddressMaps.count(DWOId))
186     AddressMaps[DWOId] = AddressForDWOCU();
187 
188   AddressForDWOCU &Map = AddressMaps[DWOId];
189   auto Entry = Map.find(Address);
190   if (Entry == Map.end()) {
191     auto Index = Map.getNextIndex();
192     Entry = Map.insert(Address, Index).first;
193   }
194   return Entry->second;
195 }
196 
197 // Case1) Address is not in map insert in to AddresToIndex and IndexToAddres
198 // Case2) Address is in the map but Index is higher or equal. Need to update
199 // IndexToAddrss. Case3) Address is in the map but Index is lower. Need to
200 // update AddressToIndex and IndexToAddress
201 void DebugAddrWriter::addIndexAddress(uint64_t Address, uint32_t Index,
202                                       uint64_t DWOId) {
203   std::lock_guard<std::mutex> Lock(WriterMutex);
204   AddressForDWOCU &Map = AddressMaps[DWOId];
205   auto Entry = Map.find(Address);
206   if (Entry != Map.end()) {
207     if (Entry->second > Index)
208       Map.updateAddressToIndex(Address, Index);
209     Map.updateIndexToAddrss(Address, Index);
210   } else {
211     Map.insert(Address, Index);
212   }
213 }
214 
215 AddressSectionBuffer DebugAddrWriter::finalize() {
216   // Need to layout all sections within .debug_addr
217   // Within each section sort Address by index.
218   AddressSectionBuffer Buffer;
219   raw_svector_ostream AddressStream(Buffer);
220   for (std::unique_ptr<DWARFUnit> &CU : BC->DwCtx->compile_units()) {
221     Optional<uint64_t> DWOId = CU->getDWOId();
222     // Handling the case wehre debug information is a mix of Debug fission and
223     // monolitic.
224     if (!DWOId)
225       continue;
226     auto AM = AddressMaps.find(*DWOId);
227     // Adding to map even if it did not contribute to .debug_addr.
228     // The Skeleton CU will still have DW_AT_GNU_addr_base.
229     DWOIdToOffsetMap[*DWOId] = Buffer.size();
230     // If does not exist this CUs DWO section didn't contribute to .debug_addr.
231     if (AM == AddressMaps.end())
232       continue;
233     std::vector<IndexAddressPair> SortedMap(AM->second.indexToAddressBegin(),
234                                             AM->second.indexToAdddessEnd());
235     // Sorting address in increasing order of indices.
236     std::sort(SortedMap.begin(), SortedMap.end(),
237               [](const IndexAddressPair &A, const IndexAddressPair &B) {
238                 return A.first < B.first;
239               });
240 
241     uint8_t AddrSize = CU->getAddressByteSize();
242     uint32_t Counter = 0;
243     auto WriteAddress = [&](uint64_t Address) -> void {
244       ++Counter;
245       switch (AddrSize) {
246       default:
247         assert(false && "Address Size is invalid.");
248         break;
249       case 4:
250         support::endian::write(AddressStream, static_cast<uint32_t>(Address),
251                                support::little);
252         break;
253       case 8:
254         support::endian::write(AddressStream, Address, support::little);
255         break;
256       }
257     };
258 
259     for (const IndexAddressPair &Val : SortedMap) {
260       while (Val.first > Counter)
261         WriteAddress(0);
262       WriteAddress(Val.second);
263     }
264   }
265 
266   return Buffer;
267 }
268 
269 uint64_t DebugAddrWriter::getOffset(uint64_t DWOId) {
270   auto Iter = DWOIdToOffsetMap.find(DWOId);
271   assert(Iter != DWOIdToOffsetMap.end() &&
272          "Offset in to.debug_addr was not found for DWO ID.");
273   return Iter->second;
274 }
275 
276 DebugLocWriter::DebugLocWriter(BinaryContext *BC) {
277   LocBuffer = std::make_unique<DebugBufferVector>();
278   LocStream = std::make_unique<raw_svector_ostream>(*LocBuffer);
279 }
280 
281 void DebugLocWriter::addList(uint64_t AttrOffset,
282                              DebugLocationsVector &&LocList) {
283   if (LocList.empty()) {
284     EmptyAttrLists.push_back(AttrOffset);
285     return;
286   }
287   // Since there is a separate DebugLocWriter for each thread,
288   // we don't need a lock to read the SectionOffset and update it.
289   const uint32_t EntryOffset = SectionOffset;
290 
291   for (const DebugLocationEntry &Entry : LocList) {
292     support::endian::write(*LocStream, static_cast<uint64_t>(Entry.LowPC),
293                            support::little);
294     support::endian::write(*LocStream, static_cast<uint64_t>(Entry.HighPC),
295                            support::little);
296     support::endian::write(*LocStream, static_cast<uint16_t>(Entry.Expr.size()),
297                            support::little);
298     *LocStream << StringRef(reinterpret_cast<const char *>(Entry.Expr.data()),
299                             Entry.Expr.size());
300     SectionOffset += 2 * 8 + 2 + Entry.Expr.size();
301   }
302   LocStream->write_zeros(16);
303   SectionOffset += 16;
304   LocListDebugInfoPatches.push_back({AttrOffset, EntryOffset});
305 }
306 
307 void DebugLoclistWriter::addList(uint64_t AttrOffset,
308                                  DebugLocationsVector &&LocList) {
309   Patches.push_back({AttrOffset, std::move(LocList)});
310 }
311 
312 std::unique_ptr<DebugBufferVector> DebugLocWriter::getBuffer() {
313   return std::move(LocBuffer);
314 }
315 
316 // DWARF 4: 2.6.2
317 void DebugLocWriter::finalize(uint64_t SectionOffset,
318                               SimpleBinaryPatcher &DebugInfoPatcher) {
319   for (const auto LocListDebugInfoPatchType : LocListDebugInfoPatches) {
320     uint64_t Offset = SectionOffset + LocListDebugInfoPatchType.LocListOffset;
321     DebugInfoPatcher.addLE32Patch(LocListDebugInfoPatchType.DebugInfoAttrOffset,
322                                   Offset);
323   }
324 
325   for (uint64_t DebugInfoAttrOffset : EmptyAttrLists)
326     DebugInfoPatcher.addLE32Patch(DebugInfoAttrOffset,
327                                   DebugLocWriter::EmptyListOffset);
328 }
329 
330 void DebugLoclistWriter::finalize(uint64_t SectionOffset,
331                                   SimpleBinaryPatcher &DebugInfoPatcher) {
332   for (LocPatch &Patch : Patches) {
333     if (Patch.LocList.empty()) {
334       DebugInfoPatcher.addLE32Patch(Patch.AttrOffset,
335                                     DebugLocWriter::EmptyListOffset);
336       continue;
337     }
338     const uint32_t EntryOffset = LocBuffer->size();
339     for (const DebugLocationEntry &Entry : Patch.LocList) {
340       support::endian::write(*LocStream,
341                              static_cast<uint8_t>(dwarf::DW_LLE_startx_length),
342                              support::little);
343       uint32_t Index = AddrWriter->getIndexFromAddress(Entry.LowPC, DWOId);
344       encodeULEB128(Index, *LocStream);
345 
346       // TODO: Support DWARF5
347       support::endian::write(*LocStream,
348                              static_cast<uint32_t>(Entry.HighPC - Entry.LowPC),
349                              support::little);
350       support::endian::write(*LocStream,
351                              static_cast<uint16_t>(Entry.Expr.size()),
352                              support::little);
353       *LocStream << StringRef(reinterpret_cast<const char *>(Entry.Expr.data()),
354                               Entry.Expr.size());
355     }
356     support::endian::write(*LocStream,
357                            static_cast<uint8_t>(dwarf::DW_LLE_end_of_list),
358                            support::little);
359     DebugInfoPatcher.addLE32Patch(Patch.AttrOffset, EntryOffset);
360     clearList(Patch.LocList);
361   }
362   clearList(Patches);
363 }
364 
365 DebugAddrWriter *DebugLoclistWriter::AddrWriter = nullptr;
366 
367 void DebugInfoBinaryPatcher::addUnitBaseOffsetLabel(uint64_t Offset) {
368   Offset -= DWPUnitOffset;
369   std::lock_guard<std::mutex> Lock(WriterMutex);
370   DebugPatches.emplace_back(new DWARFUnitOffsetBaseLabel(Offset));
371 }
372 
373 void DebugInfoBinaryPatcher::addDestinationReferenceLabel(uint64_t Offset) {
374   Offset -= DWPUnitOffset;
375   std::lock_guard<std::mutex> Lock(WriterMutex);
376   auto RetVal = DestinationLabels.insert(Offset);
377   if (!RetVal.second)
378     return;
379 
380   DebugPatches.emplace_back(new DestinationReferenceLabel(Offset));
381 }
382 
383 void DebugInfoBinaryPatcher::addReferenceToPatch(uint64_t Offset,
384                                                  uint32_t DestinationOffset,
385                                                  uint32_t OldValueSize,
386                                                  dwarf::Form Form) {
387   Offset -= DWPUnitOffset;
388   DestinationOffset -= DWPUnitOffset;
389   std::lock_guard<std::mutex> Lock(WriterMutex);
390   DebugPatches.emplace_back(
391       new DebugPatchReference(Offset, OldValueSize, DestinationOffset, Form));
392 }
393 
394 void DebugInfoBinaryPatcher::addUDataPatch(uint64_t Offset, uint64_t NewValue,
395                                            uint32_t OldValueSize) {
396   Offset -= DWPUnitOffset;
397   std::lock_guard<std::mutex> Lock(WriterMutex);
398   DebugPatches.emplace_back(
399       new DebugPatchVariableSize(Offset, OldValueSize, NewValue));
400 }
401 
402 void DebugInfoBinaryPatcher::addLE64Patch(uint64_t Offset, uint64_t NewValue) {
403   Offset -= DWPUnitOffset;
404   std::lock_guard<std::mutex> Lock(WriterMutex);
405   DebugPatches.emplace_back(new DebugPatch64(Offset, NewValue));
406 }
407 
408 void DebugInfoBinaryPatcher::addLE32Patch(uint64_t Offset, uint32_t NewValue,
409                                           uint32_t OldValueSize) {
410   Offset -= DWPUnitOffset;
411   std::lock_guard<std::mutex> Lock(WriterMutex);
412   if (OldValueSize == 4)
413     DebugPatches.emplace_back(new DebugPatch32(Offset, NewValue));
414   else
415     DebugPatches.emplace_back(new DebugPatch64to32(Offset, NewValue));
416 }
417 
418 void SimpleBinaryPatcher::addBinaryPatch(uint64_t Offset,
419                                          std::string &&NewValue,
420                                          uint32_t OldValueSize) {
421   Patches.emplace_back(Offset, std::move(NewValue));
422 }
423 
424 void SimpleBinaryPatcher::addBytePatch(uint64_t Offset, uint8_t Value) {
425   auto Str = std::string(1, Value);
426   Patches.emplace_back(Offset, std::move(Str));
427 }
428 
429 static std::string encodeLE(size_t ByteSize, uint64_t NewValue) {
430   std::string LE64(ByteSize, 0);
431   for (size_t I = 0; I < ByteSize; ++I) {
432     LE64[I] = NewValue & 0xff;
433     NewValue >>= 8;
434   }
435   return LE64;
436 }
437 
438 void SimpleBinaryPatcher::addLEPatch(uint64_t Offset, uint64_t NewValue,
439                                      size_t ByteSize) {
440   Patches.emplace_back(Offset, encodeLE(ByteSize, NewValue));
441 }
442 
443 void SimpleBinaryPatcher::addUDataPatch(uint64_t Offset, uint64_t Value,
444                                         uint32_t OldValueSize) {
445   std::string Buff;
446   raw_string_ostream OS(Buff);
447   encodeULEB128(Value, OS, OldValueSize);
448 
449   Patches.emplace_back(Offset, std::move(Buff));
450 }
451 
452 void SimpleBinaryPatcher::addLE64Patch(uint64_t Offset, uint64_t NewValue) {
453   addLEPatch(Offset, NewValue, 8);
454 }
455 
456 void SimpleBinaryPatcher::addLE32Patch(uint64_t Offset, uint32_t NewValue,
457                                        uint32_t OldValueSize) {
458   addLEPatch(Offset, NewValue, 4);
459 }
460 
461 std::string SimpleBinaryPatcher::patchBinary(StringRef BinaryContents) {
462   std::string BinaryContentsStr = std::string(BinaryContents);
463   for (const auto &Patch : Patches) {
464     uint32_t Offset = Patch.first;
465     const std::string &ByteSequence = Patch.second;
466     assert(Offset + ByteSequence.size() <= BinaryContents.size() &&
467            "Applied patch runs over binary size.");
468     for (uint64_t I = 0, Size = ByteSequence.size(); I < Size; ++I) {
469       BinaryContentsStr[Offset + I] = ByteSequence[I];
470     }
471   }
472   return BinaryContentsStr;
473 }
474 
475 std::unordered_map<uint32_t, uint32_t>
476 DebugInfoBinaryPatcher::computeNewOffsets() {
477   std::unordered_map<uint32_t, uint32_t> CUMap;
478   std::sort(DebugPatches.begin(), DebugPatches.end(),
479             [](const UniquePatchPtrType &V1, const UniquePatchPtrType &V2) {
480               return V1.get()->Offset < V2.get()->Offset;
481             });
482 
483   // Calculating changes in .debug_info size from Patches to build a map of old
484   // to updated reference destination offsets.
485   for (UniquePatchPtrType &PatchBase : DebugPatches) {
486     Patch *P = PatchBase.get();
487     switch (P->Kind) {
488     default:
489       continue;
490     case DebugPatchKind::PatchValue64to32: {
491       ChangeInSize -= 4;
492       break;
493     }
494     case DebugPatchKind::PatchValueVariable: {
495       DebugPatchVariableSize *DPV =
496           reinterpret_cast<DebugPatchVariableSize *>(P);
497       std::string Temp;
498       raw_string_ostream OS(Temp);
499       encodeULEB128(DPV->Value, OS);
500       ChangeInSize += Temp.size() - DPV->OldValueSize;
501       break;
502     }
503     case DebugPatchKind::DestinationReferenceLabel: {
504       DestinationReferenceLabel *DRL =
505           reinterpret_cast<DestinationReferenceLabel *>(P);
506       OldToNewOffset[DRL->Offset] = DRL->Offset + ChangeInSize;
507       break;
508     }
509     case DebugPatchKind::ReferencePatchValue: {
510       // This doesn't look to be a common case, so will always encode as 4 bytes
511       // to reduce algorithmic complexity.
512       DebugPatchReference *RDP = reinterpret_cast<DebugPatchReference *>(P);
513       if (RDP->PatchInfo.IndirectRelative) {
514         ChangeInSize += 4 - RDP->PatchInfo.OldValueSize;
515         assert(RDP->PatchInfo.OldValueSize <= 4 &&
516                "Variable encoding reference greater than 4 bytes.");
517       }
518       break;
519     }
520     case DebugPatchKind::DWARFUnitOffsetBaseLabel: {
521       DWARFUnitOffsetBaseLabel *BaseLabel =
522           reinterpret_cast<DWARFUnitOffsetBaseLabel *>(P);
523       uint32_t CUOffset = BaseLabel->Offset;
524       uint32_t CUOffsetUpdate = CUOffset + ChangeInSize;
525       CUMap[CUOffset] = CUOffsetUpdate;
526     }
527     }
528   }
529   return CUMap;
530 }
531 
532 std::string DebugInfoBinaryPatcher::patchBinary(StringRef BinaryContents) {
533   std::string NewBinaryContents;
534   NewBinaryContents.reserve(BinaryContents.size() + ChangeInSize);
535   uint32_t StartOffset = 0;
536   uint32_t DwarfUnitBaseOffset = 0;
537   uint32_t OldValueSize = 0;
538   uint32_t Offset = 0;
539   std::string ByteSequence;
540   std::vector<std::pair<uint32_t, uint32_t>> LengthPatches;
541   // Wasting one entry to avoid checks for first.
542   LengthPatches.push_back({0, 0});
543 
544   // Applying all the patches replacing current entry.
545   // This might change the size of .debug_info section.
546   for (const UniquePatchPtrType &PatchBase : DebugPatches) {
547     Patch *P = PatchBase.get();
548     switch (P->Kind) {
549     default:
550       continue;
551     case DebugPatchKind::ReferencePatchValue: {
552       DebugPatchReference *RDP = reinterpret_cast<DebugPatchReference *>(P);
553       uint32_t DestinationOffset = RDP->DestinationOffset;
554       assert(OldToNewOffset.count(DestinationOffset) &&
555              "Destination Offset for reference not updated.");
556       uint32_t UpdatedOffset = OldToNewOffset[DestinationOffset];
557       Offset = RDP->Offset;
558       OldValueSize = RDP->PatchInfo.OldValueSize;
559       if (RDP->PatchInfo.DirectRelative) {
560         UpdatedOffset -= DwarfUnitBaseOffset;
561         ByteSequence = encodeLE(OldValueSize, UpdatedOffset);
562         // In theory reference for DW_FORM_ref{1,2,4,8} can be right on the edge
563         // and overflow if later debug information grows.
564         if (ByteSequence.size() > OldValueSize)
565           errs() << "BOLT-ERROR: Relative reference of size "
566                  << Twine::utohexstr(OldValueSize)
567                  << " overflows with the new encoding.\n";
568       } else if (RDP->PatchInfo.DirectAbsolute) {
569         ByteSequence = encodeLE(OldValueSize, UpdatedOffset);
570       } else if (RDP->PatchInfo.IndirectRelative) {
571         UpdatedOffset -= DwarfUnitBaseOffset;
572         ByteSequence.clear();
573         raw_string_ostream OS(ByteSequence);
574         encodeULEB128(UpdatedOffset, OS, 4);
575       } else {
576         llvm_unreachable("Invalid Reference form.");
577       }
578       break;
579     }
580     case DebugPatchKind::PatchValue32: {
581       DebugPatch32 *P32 = reinterpret_cast<DebugPatch32 *>(P);
582       Offset = P32->Offset;
583       OldValueSize = 4;
584       ByteSequence = encodeLE(4, P32->Value);
585       break;
586     }
587     case DebugPatchKind::PatchValue64to32: {
588       DebugPatch64to32 *P64to32 = reinterpret_cast<DebugPatch64to32 *>(P);
589       Offset = P64to32->Offset;
590       OldValueSize = 8;
591       ByteSequence = encodeLE(4, P64to32->Value);
592       break;
593     }
594     case DebugPatchKind::PatchValueVariable: {
595       DebugPatchVariableSize *PV =
596           reinterpret_cast<DebugPatchVariableSize *>(P);
597       Offset = PV->Offset;
598       OldValueSize = PV->OldValueSize;
599       ByteSequence.clear();
600       raw_string_ostream OS(ByteSequence);
601       encodeULEB128(PV->Value, OS);
602       break;
603     }
604     case DebugPatchKind::PatchValue64: {
605       DebugPatch64 *P64 = reinterpret_cast<DebugPatch64 *>(P);
606       Offset = P64->Offset;
607       OldValueSize = 8;
608       ByteSequence = encodeLE(8, P64->Value);
609       break;
610     }
611     case DebugPatchKind::DWARFUnitOffsetBaseLabel: {
612       DWARFUnitOffsetBaseLabel *BaseLabel =
613           reinterpret_cast<DWARFUnitOffsetBaseLabel *>(P);
614       Offset = BaseLabel->Offset;
615       OldValueSize = 0;
616       ByteSequence.clear();
617       auto &Patch = LengthPatches.back();
618       // Length to copy between last patch entry and next compile unit.
619       uint32_t RemainingLength = Offset - StartOffset;
620       uint32_t NewCUOffset = NewBinaryContents.size() + RemainingLength;
621       DwarfUnitBaseOffset = NewCUOffset;
622       // Length of previous CU = This CU Offset - sizeof(length) - last CU
623       // Offset.
624       Patch.second = NewCUOffset - 4 - Patch.first;
625       LengthPatches.push_back({NewCUOffset, 0});
626       break;
627     }
628     }
629 
630     assert(Offset + ByteSequence.size() <= BinaryContents.size() &&
631            "Applied patch runs over binary size.");
632     uint32_t Length = Offset - StartOffset;
633     NewBinaryContents.append(BinaryContents.substr(StartOffset, Length).data(),
634                              Length);
635     NewBinaryContents.append(ByteSequence.data(), ByteSequence.size());
636     StartOffset = Offset + OldValueSize;
637   }
638   uint32_t Length = BinaryContents.size() - StartOffset;
639   NewBinaryContents.append(BinaryContents.substr(StartOffset, Length).data(),
640                            Length);
641   DebugPatches.clear();
642 
643   // Patching lengths of CUs
644   auto &Patch = LengthPatches.back();
645   Patch.second = NewBinaryContents.size() - 4 - Patch.first;
646   for (uint32_t J = 1, Size = LengthPatches.size(); J < Size; ++J) {
647     const auto &Patch = LengthPatches[J];
648     ByteSequence = encodeLE(4, Patch.second);
649     Offset = Patch.first;
650     for (uint64_t I = 0, Size = ByteSequence.size(); I < Size; ++I)
651       NewBinaryContents[Offset + I] = ByteSequence[I];
652   }
653 
654   return NewBinaryContents;
655 }
656 
657 void DebugStrWriter::create() {
658   StrBuffer = std::make_unique<DebugStrBufferVector>();
659   StrStream = std::make_unique<raw_svector_ostream>(*StrBuffer);
660 }
661 
662 void DebugStrWriter::initialize() {
663   auto StrSection = BC->DwCtx->getDWARFObj().getStrSection();
664   (*StrStream) << StrSection;
665 }
666 
667 uint32_t DebugStrWriter::addString(StringRef Str) {
668   std::lock_guard<std::mutex> Lock(WriterMutex);
669   if (StrBuffer->empty())
670     initialize();
671   auto Offset = StrBuffer->size();
672   (*StrStream) << Str;
673   StrStream->write_zeros(1);
674   return Offset;
675 }
676 
677 void DebugAbbrevWriter::addUnitAbbreviations(DWARFUnit &Unit) {
678   const DWARFAbbreviationDeclarationSet *Abbrevs = Unit.getAbbreviations();
679   if (!Abbrevs)
680     return;
681 
682   // Multiple units may share the same abbreviations. Only add abbreviations
683   // for the first unit and reuse them.
684   const uint64_t AbbrevOffset = Unit.getAbbreviationsOffset();
685   if (UnitsAbbrevData.find(AbbrevOffset) != UnitsAbbrevData.end())
686     return;
687 
688   AbbrevData &UnitData = UnitsAbbrevData[AbbrevOffset];
689   UnitData.Buffer = std::make_unique<DebugBufferVector>();
690   UnitData.Stream = std::make_unique<raw_svector_ostream>(*UnitData.Buffer);
691 
692   const PatchesTy &UnitPatches = Patches[&Unit];
693 
694   raw_svector_ostream &OS = *UnitData.Stream.get();
695 
696   // Take a fast path if there are no patches to apply. Simply copy the original
697   // contents.
698   if (UnitPatches.empty()) {
699     StringRef AbbrevSectionContents =
700         Unit.isDWOUnit() ? Unit.getContext().getDWARFObj().getAbbrevDWOSection()
701                          : Unit.getContext().getDWARFObj().getAbbrevSection();
702     StringRef AbbrevContents;
703 
704     const DWARFUnitIndex &CUIndex = Unit.getContext().getCUIndex();
705     if (!CUIndex.getRows().empty()) {
706       // Handle DWP section contribution.
707       const DWARFUnitIndex::Entry *DWOEntry =
708           CUIndex.getFromHash(*Unit.getDWOId());
709       if (!DWOEntry)
710         return;
711 
712       const DWARFUnitIndex::Entry::SectionContribution *DWOContrubution =
713           DWOEntry->getContribution(DWARFSectionKind::DW_SECT_ABBREV);
714       AbbrevContents = AbbrevSectionContents.substr(DWOContrubution->Offset,
715                                                     DWOContrubution->Length);
716     } else if (!Unit.isDWOUnit()) {
717       const uint64_t StartOffset = Unit.getAbbreviationsOffset();
718 
719       // We know where the unit's abbreviation set starts, but not where it ends
720       // as such data is not readily available. Hence, we have to build a sorted
721       // list of start addresses and find the next starting address to determine
722       // the set boundaries.
723       //
724       // FIXME: if we had a full access to DWARFDebugAbbrev::AbbrDeclSets
725       // we wouldn't have to build our own sorted list for the quick lookup.
726       if (AbbrevSetOffsets.empty()) {
727         for_each(
728             *Unit.getContext().getDebugAbbrev(),
729             [&](const std::pair<uint64_t, DWARFAbbreviationDeclarationSet> &P) {
730               AbbrevSetOffsets.push_back(P.first);
731             });
732         sort(AbbrevSetOffsets);
733       }
734       auto It = upper_bound(AbbrevSetOffsets, StartOffset);
735       const uint64_t EndOffset =
736           It == AbbrevSetOffsets.end() ? AbbrevSectionContents.size() : *It;
737       AbbrevContents = AbbrevSectionContents.slice(StartOffset, EndOffset);
738     } else {
739       // For DWO unit outside of DWP, we expect the entire section to hold
740       // abbreviations for this unit only.
741       AbbrevContents = AbbrevSectionContents;
742     }
743 
744     OS.reserveExtraSpace(AbbrevContents.size());
745     OS << AbbrevContents;
746 
747     return;
748   }
749 
750   for (auto I = Abbrevs->begin(), E = Abbrevs->end(); I != E; ++I) {
751     const DWARFAbbreviationDeclaration &Abbrev = *I;
752     auto Patch = UnitPatches.find(&Abbrev);
753 
754     encodeULEB128(Abbrev.getCode(), OS);
755     encodeULEB128(Abbrev.getTag(), OS);
756     encodeULEB128(Abbrev.hasChildren(), OS);
757     for (const DWARFAbbreviationDeclaration::AttributeSpec &AttrSpec :
758          Abbrev.attributes()) {
759       if (Patch != UnitPatches.end()) {
760         bool Patched = false;
761         // Patches added later take a precedence over earlier ones.
762         for (auto I = Patch->second.rbegin(), E = Patch->second.rend(); I != E;
763              ++I) {
764           if (I->OldAttr != AttrSpec.Attr)
765             continue;
766 
767           encodeULEB128(I->NewAttr, OS);
768           encodeULEB128(I->NewAttrForm, OS);
769           Patched = true;
770           break;
771         }
772         if (Patched)
773           continue;
774       }
775 
776       encodeULEB128(AttrSpec.Attr, OS);
777       encodeULEB128(AttrSpec.Form, OS);
778       if (AttrSpec.isImplicitConst())
779         encodeSLEB128(AttrSpec.getImplicitConstValue(), OS);
780     }
781 
782     encodeULEB128(0, OS);
783     encodeULEB128(0, OS);
784   }
785   encodeULEB128(0, OS);
786 }
787 
788 std::unique_ptr<DebugBufferVector> DebugAbbrevWriter::finalize() {
789   if (DWOId) {
790     // We expect abbrev_offset to always be zero for DWO units as there
791     // should be one CU per DWO, and TUs should share the same abbreviation
792     // set with the CU.
793     // For DWP AbbreviationsOffset is an Abbrev contribution in the DWP file, so
794     // can be none zero. Thus we are skipping the check for DWP.
795     bool IsDWP = !Context.getCUIndex().getRows().empty();
796     if (!IsDWP) {
797       for (const std::unique_ptr<DWARFUnit> &Unit : Context.dwo_units()) {
798         if (Unit->getAbbreviationsOffset() != 0) {
799           errs() << "BOLT-ERROR: detected DWO unit with non-zero abbr_offset. "
800                     "Unable to update debug info.\n";
801           exit(1);
802         }
803       }
804     }
805 
806     // Issue abbreviations for the DWO CU only.
807     addUnitAbbreviations(*Context.getDWOCompileUnitForHash(*DWOId));
808   } else {
809     // Add abbreviations from compile and type non-DWO units.
810     for (const std::unique_ptr<DWARFUnit> &Unit : Context.normal_units())
811       addUnitAbbreviations(*Unit);
812   }
813 
814   DebugBufferVector ReturnBuffer;
815 
816   // Pre-calculate the total size of abbrev section.
817   uint64_t Size = 0;
818   for (const auto &KV : UnitsAbbrevData) {
819     const AbbrevData &UnitData = KV.second;
820     Size += UnitData.Buffer->size();
821   }
822   ReturnBuffer.reserve(Size);
823 
824   uint64_t Pos = 0;
825   for (auto &KV : UnitsAbbrevData) {
826     AbbrevData &UnitData = KV.second;
827     ReturnBuffer.append(*UnitData.Buffer);
828     UnitData.Offset = Pos;
829     Pos += UnitData.Buffer->size();
830 
831     UnitData.Buffer.reset();
832     UnitData.Stream.reset();
833   }
834 
835   return std::make_unique<DebugBufferVector>(ReturnBuffer);
836 }
837 
838 static void emitDwarfSetLineAddrAbs(MCStreamer &OS,
839                                     MCDwarfLineTableParams Params,
840                                     int64_t LineDelta, uint64_t Address,
841                                     int PointerSize) {
842   // emit the sequence to set the address
843   OS.emitIntValue(dwarf::DW_LNS_extended_op, 1);
844   OS.emitULEB128IntValue(PointerSize + 1);
845   OS.emitIntValue(dwarf::DW_LNE_set_address, 1);
846   OS.emitIntValue(Address, PointerSize);
847 
848   // emit the sequence for the LineDelta (from 1) and a zero address delta.
849   MCDwarfLineAddr::Emit(&OS, Params, LineDelta, 0);
850 }
851 
852 static inline void emitBinaryDwarfLineTable(
853     MCStreamer *MCOS, MCDwarfLineTableParams Params,
854     const DWARFDebugLine::LineTable *Table,
855     const std::vector<DwarfLineTable::RowSequence> &InputSequences) {
856   if (InputSequences.empty())
857     return;
858 
859   constexpr uint64_t InvalidAddress = UINT64_MAX;
860   unsigned FileNum = 1;
861   unsigned LastLine = 1;
862   unsigned Column = 0;
863   unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
864   unsigned Isa = 0;
865   unsigned Discriminator = 0;
866   uint64_t LastAddress = InvalidAddress;
867   uint64_t PrevEndOfSequence = InvalidAddress;
868   const MCAsmInfo *AsmInfo = MCOS->getContext().getAsmInfo();
869 
870   auto emitEndOfSequence = [&](uint64_t Address) {
871     MCDwarfLineAddr::Emit(MCOS, Params, INT64_MAX, Address - LastAddress);
872     FileNum = 1;
873     LastLine = 1;
874     Column = 0;
875     Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
876     Isa = 0;
877     Discriminator = 0;
878     LastAddress = InvalidAddress;
879   };
880 
881   for (const DwarfLineTable::RowSequence &Sequence : InputSequences) {
882     const uint64_t SequenceStart =
883         Table->Rows[Sequence.FirstIndex].Address.Address;
884 
885     // Check if we need to mark the end of the sequence.
886     if (PrevEndOfSequence != InvalidAddress && LastAddress != InvalidAddress &&
887         PrevEndOfSequence != SequenceStart) {
888       emitEndOfSequence(PrevEndOfSequence);
889     }
890 
891     for (uint32_t RowIndex = Sequence.FirstIndex;
892          RowIndex <= Sequence.LastIndex; ++RowIndex) {
893       const DWARFDebugLine::Row &Row = Table->Rows[RowIndex];
894       int64_t LineDelta = static_cast<int64_t>(Row.Line) - LastLine;
895       const uint64_t Address = Row.Address.Address;
896 
897       if (FileNum != Row.File) {
898         FileNum = Row.File;
899         MCOS->emitInt8(dwarf::DW_LNS_set_file);
900         MCOS->emitULEB128IntValue(FileNum);
901       }
902       if (Column != Row.Column) {
903         Column = Row.Column;
904         MCOS->emitInt8(dwarf::DW_LNS_set_column);
905         MCOS->emitULEB128IntValue(Column);
906       }
907       if (Discriminator != Row.Discriminator &&
908           MCOS->getContext().getDwarfVersion() >= 4) {
909         Discriminator = Row.Discriminator;
910         unsigned Size = getULEB128Size(Discriminator);
911         MCOS->emitInt8(dwarf::DW_LNS_extended_op);
912         MCOS->emitULEB128IntValue(Size + 1);
913         MCOS->emitInt8(dwarf::DW_LNE_set_discriminator);
914         MCOS->emitULEB128IntValue(Discriminator);
915       }
916       if (Isa != Row.Isa) {
917         Isa = Row.Isa;
918         MCOS->emitInt8(dwarf::DW_LNS_set_isa);
919         MCOS->emitULEB128IntValue(Isa);
920       }
921       if (Row.IsStmt != Flags) {
922         Flags = Row.IsStmt;
923         MCOS->emitInt8(dwarf::DW_LNS_negate_stmt);
924       }
925       if (Row.BasicBlock)
926         MCOS->emitInt8(dwarf::DW_LNS_set_basic_block);
927       if (Row.PrologueEnd)
928         MCOS->emitInt8(dwarf::DW_LNS_set_prologue_end);
929       if (Row.EpilogueBegin)
930         MCOS->emitInt8(dwarf::DW_LNS_set_epilogue_begin);
931 
932       // The end of the sequence is not normal in the middle of the input
933       // sequence, but could happen, e.g. for assembly code.
934       if (Row.EndSequence) {
935         emitEndOfSequence(Address);
936       } else {
937         if (LastAddress == InvalidAddress)
938           emitDwarfSetLineAddrAbs(*MCOS, Params, LineDelta, Address,
939                                   AsmInfo->getCodePointerSize());
940         else
941           MCDwarfLineAddr::Emit(MCOS, Params, LineDelta, Address - LastAddress);
942 
943         LastAddress = Address;
944         LastLine = Row.Line;
945       }
946 
947       Discriminator = 0;
948     }
949     PrevEndOfSequence = Sequence.EndAddress;
950   }
951 
952   // Finish with the end of the sequence.
953   if (LastAddress != InvalidAddress)
954     emitEndOfSequence(PrevEndOfSequence);
955 }
956 
957 // This function is similar to the one from MCDwarfLineTable, except it handles
958 // end-of-sequence entries differently by utilizing line entries with
959 // DWARF2_FLAG_END_SEQUENCE flag.
960 static inline void emitDwarfLineTable(
961     MCStreamer *MCOS, MCSection *Section,
962     const MCLineSection::MCDwarfLineEntryCollection &LineEntries) {
963   unsigned FileNum = 1;
964   unsigned LastLine = 1;
965   unsigned Column = 0;
966   unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
967   unsigned Isa = 0;
968   unsigned Discriminator = 0;
969   MCSymbol *LastLabel = nullptr;
970   const MCAsmInfo *AsmInfo = MCOS->getContext().getAsmInfo();
971 
972   // Loop through each MCDwarfLineEntry and encode the dwarf line number table.
973   for (const MCDwarfLineEntry &LineEntry : LineEntries) {
974     if (LineEntry.getFlags() & DWARF2_FLAG_END_SEQUENCE) {
975       MCOS->emitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, LineEntry.getLabel(),
976                                      AsmInfo->getCodePointerSize());
977       FileNum = 1;
978       LastLine = 1;
979       Column = 0;
980       Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
981       Isa = 0;
982       Discriminator = 0;
983       LastLabel = nullptr;
984       continue;
985     }
986 
987     int64_t LineDelta = static_cast<int64_t>(LineEntry.getLine()) - LastLine;
988 
989     if (FileNum != LineEntry.getFileNum()) {
990       FileNum = LineEntry.getFileNum();
991       MCOS->emitInt8(dwarf::DW_LNS_set_file);
992       MCOS->emitULEB128IntValue(FileNum);
993     }
994     if (Column != LineEntry.getColumn()) {
995       Column = LineEntry.getColumn();
996       MCOS->emitInt8(dwarf::DW_LNS_set_column);
997       MCOS->emitULEB128IntValue(Column);
998     }
999     if (Discriminator != LineEntry.getDiscriminator() &&
1000         MCOS->getContext().getDwarfVersion() >= 4) {
1001       Discriminator = LineEntry.getDiscriminator();
1002       unsigned Size = getULEB128Size(Discriminator);
1003       MCOS->emitInt8(dwarf::DW_LNS_extended_op);
1004       MCOS->emitULEB128IntValue(Size + 1);
1005       MCOS->emitInt8(dwarf::DW_LNE_set_discriminator);
1006       MCOS->emitULEB128IntValue(Discriminator);
1007     }
1008     if (Isa != LineEntry.getIsa()) {
1009       Isa = LineEntry.getIsa();
1010       MCOS->emitInt8(dwarf::DW_LNS_set_isa);
1011       MCOS->emitULEB128IntValue(Isa);
1012     }
1013     if ((LineEntry.getFlags() ^ Flags) & DWARF2_FLAG_IS_STMT) {
1014       Flags = LineEntry.getFlags();
1015       MCOS->emitInt8(dwarf::DW_LNS_negate_stmt);
1016     }
1017     if (LineEntry.getFlags() & DWARF2_FLAG_BASIC_BLOCK)
1018       MCOS->emitInt8(dwarf::DW_LNS_set_basic_block);
1019     if (LineEntry.getFlags() & DWARF2_FLAG_PROLOGUE_END)
1020       MCOS->emitInt8(dwarf::DW_LNS_set_prologue_end);
1021     if (LineEntry.getFlags() & DWARF2_FLAG_EPILOGUE_BEGIN)
1022       MCOS->emitInt8(dwarf::DW_LNS_set_epilogue_begin);
1023 
1024     MCSymbol *Label = LineEntry.getLabel();
1025 
1026     // At this point we want to emit/create the sequence to encode the delta
1027     // in line numbers and the increment of the address from the previous
1028     // Label and the current Label.
1029     MCOS->emitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label,
1030                                    AsmInfo->getCodePointerSize());
1031     Discriminator = 0;
1032     LastLine = LineEntry.getLine();
1033     LastLabel = Label;
1034   }
1035 
1036   assert(LastLabel == nullptr && "end of sequence expected");
1037 }
1038 
1039 void DwarfLineTable::emitCU(MCStreamer *MCOS, MCDwarfLineTableParams Params,
1040                             Optional<MCDwarfLineStr> &LineStr,
1041                             BinaryContext &BC) const {
1042   if (!RawData.empty()) {
1043     assert(MCLineSections.getMCLineEntries().empty() &&
1044            InputSequences.empty() &&
1045            "cannot combine raw data with new line entries");
1046     MCOS->emitLabel(getLabel());
1047     MCOS->emitBytes(RawData);
1048 
1049     // Emit fake relocation for RuntimeDyld to always allocate the section.
1050     //
1051     // FIXME: remove this once RuntimeDyld stops skipping allocatable sections
1052     //        without relocations.
1053     MCOS->emitRelocDirective(
1054         *MCConstantExpr::create(0, *BC.Ctx), "BFD_RELOC_NONE",
1055         MCSymbolRefExpr::create(getLabel(), *BC.Ctx), SMLoc(), *BC.STI);
1056 
1057     return;
1058   }
1059 
1060   MCSymbol *LineEndSym = Header.Emit(MCOS, Params, LineStr).second;
1061 
1062   // Put out the line tables.
1063   for (const auto &LineSec : MCLineSections.getMCLineEntries())
1064     emitDwarfLineTable(MCOS, LineSec.first, LineSec.second);
1065 
1066   // Emit line tables for the original code.
1067   emitBinaryDwarfLineTable(MCOS, Params, InputTable, InputSequences);
1068 
1069   // This is the end of the section, so set the value of the symbol at the end
1070   // of this section (that was used in a previous expression).
1071   MCOS->emitLabel(LineEndSym);
1072 }
1073 
1074 void DwarfLineTable::emit(BinaryContext &BC, MCStreamer &Streamer) {
1075   MCAssembler &Assembler =
1076       static_cast<MCObjectStreamer *>(&Streamer)->getAssembler();
1077 
1078   MCDwarfLineTableParams Params = Assembler.getDWARFLinetableParams();
1079 
1080   auto &LineTables = BC.getDwarfLineTables();
1081 
1082   // Bail out early so we don't switch to the debug_line section needlessly and
1083   // in doing so create an unnecessary (if empty) section.
1084   if (LineTables.empty())
1085     return;
1086 
1087   // In a v5 non-split line table, put the strings in a separate section.
1088   Optional<MCDwarfLineStr> LineStr(None);
1089   if (BC.Ctx->getDwarfVersion() >= 5)
1090     LineStr = MCDwarfLineStr(*BC.Ctx);
1091 
1092   // Switch to the section where the table will be emitted into.
1093   Streamer.SwitchSection(BC.MOFI->getDwarfLineSection());
1094 
1095   // Handle the rest of the Compile Units.
1096   for (auto &CUIDTablePair : LineTables) {
1097     CUIDTablePair.second.emitCU(&Streamer, Params, LineStr, BC);
1098   }
1099 }
1100 
1101 } // namespace bolt
1102 } // namespace llvm
1103