1 //===- bolt/Core/DebugData.cpp - Debugging information handling -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements functions and classes for handling debug info.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Core/DebugData.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Utils/Utils.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCObjectStreamer.h"
18 #include "llvm/Support/CommandLine.h"
19 #include "llvm/Support/EndianStream.h"
20 #include "llvm/Support/LEB128.h"
21 #include <algorithm>
22 #include <cassert>
23 #include <cstdint>
24 #include <limits>
25 #include <unordered_map>
26 
27 #define DEBUG_TYPE "bolt-debug-info"
28 
29 namespace opts {
30 extern llvm::cl::opt<unsigned> Verbosity;
31 } // namespace opts
32 
33 namespace llvm {
34 class MCSymbol;
35 
36 namespace bolt {
37 
38 const DebugLineTableRowRef DebugLineTableRowRef::NULL_ROW{0, 0};
39 
40 namespace {
41 
42 LLVM_ATTRIBUTE_UNUSED
43 static void printLE64(const std::string &S) {
44   for (uint32_t I = 0, Size = S.size(); I < Size; ++I) {
45     errs() << Twine::utohexstr(S[I]);
46     errs() << Twine::utohexstr((int8_t)S[I]);
47   }
48   errs() << "\n";
49 }
50 
51 // Writes address ranges to Writer as pairs of 64-bit (address, size).
52 // If RelativeRange is true, assumes the address range to be written must be of
53 // the form (begin address, range size), otherwise (begin address, end address).
54 // Terminates the list by writing a pair of two zeroes.
55 // Returns the number of written bytes.
56 uint64_t writeAddressRanges(raw_svector_ostream &Stream,
57                             const DebugAddressRangesVector &AddressRanges,
58                             const bool WriteRelativeRanges = false) {
59   for (const DebugAddressRange &Range : AddressRanges) {
60     support::endian::write(Stream, Range.LowPC, support::little);
61     support::endian::write(
62         Stream, WriteRelativeRanges ? Range.HighPC - Range.LowPC : Range.HighPC,
63         support::little);
64   }
65   // Finish with 0 entries.
66   support::endian::write(Stream, 0ULL, support::little);
67   support::endian::write(Stream, 0ULL, support::little);
68   return AddressRanges.size() * 16 + 16;
69 }
70 
71 } // namespace
72 
73 DebugRangesSectionWriter::DebugRangesSectionWriter() {
74   RangesBuffer = std::make_unique<DebugBufferVector>();
75   RangesStream = std::make_unique<raw_svector_ostream>(*RangesBuffer);
76 
77   // Add an empty range as the first entry;
78   SectionOffset +=
79       writeAddressRanges(*RangesStream.get(), DebugAddressRangesVector{});
80 }
81 
82 uint64_t DebugRangesSectionWriter::addRanges(
83     DebugAddressRangesVector &&Ranges,
84     std::map<DebugAddressRangesVector, uint64_t> &CachedRanges) {
85   if (Ranges.empty())
86     return getEmptyRangesOffset();
87 
88   const auto RI = CachedRanges.find(Ranges);
89   if (RI != CachedRanges.end())
90     return RI->second;
91 
92   const uint64_t EntryOffset = addRanges(Ranges);
93   CachedRanges.emplace(std::move(Ranges), EntryOffset);
94 
95   return EntryOffset;
96 }
97 
98 uint64_t
99 DebugRangesSectionWriter::addRanges(const DebugAddressRangesVector &Ranges) {
100   if (Ranges.empty())
101     return getEmptyRangesOffset();
102 
103   // Reading the SectionOffset and updating it should be atomic to guarantee
104   // unique and correct offsets in patches.
105   std::lock_guard<std::mutex> Lock(WriterMutex);
106   const uint32_t EntryOffset = SectionOffset;
107   SectionOffset += writeAddressRanges(*RangesStream.get(), Ranges);
108 
109   return EntryOffset;
110 }
111 
112 uint64_t DebugRangesSectionWriter::getSectionOffset() {
113   std::lock_guard<std::mutex> Lock(WriterMutex);
114   return SectionOffset;
115 }
116 
117 void DebugARangesSectionWriter::addCURanges(uint64_t CUOffset,
118                                             DebugAddressRangesVector &&Ranges) {
119   std::lock_guard<std::mutex> Lock(CUAddressRangesMutex);
120   CUAddressRanges.emplace(CUOffset, std::move(Ranges));
121 }
122 
123 void DebugARangesSectionWriter::writeARangesSection(
124     raw_svector_ostream &RangesStream,
125     const std::unordered_map<uint32_t, uint32_t> CUMap) const {
126   // For reference on the format of the .debug_aranges section, see the DWARF4
127   // specification, section 6.1.4 Lookup by Address
128   // http://www.dwarfstd.org/doc/DWARF4.pdf
129   for (const auto &CUOffsetAddressRangesPair : CUAddressRanges) {
130     const uint64_t Offset = CUOffsetAddressRangesPair.first;
131     const DebugAddressRangesVector &AddressRanges =
132         CUOffsetAddressRangesPair.second;
133 
134     // Emit header.
135 
136     // Size of this set: 8 (size of the header) + 4 (padding after header)
137     // + 2*sizeof(uint64_t) bytes for each of the ranges, plus an extra
138     // pair of uint64_t's for the terminating, zero-length range.
139     // Does not include size field itself.
140     uint32_t Size = 8 + 4 + 2 * sizeof(uint64_t) * (AddressRanges.size() + 1);
141 
142     // Header field #1: set size.
143     support::endian::write(RangesStream, Size, support::little);
144 
145     // Header field #2: version number, 2 as per the specification.
146     support::endian::write(RangesStream, static_cast<uint16_t>(2),
147                            support::little);
148 
149     assert(CUMap.count(Offset) && "Original CU offset is not found in CU Map");
150     // Header field #3: debug info offset of the correspondent compile unit.
151     support::endian::write(RangesStream,
152                            static_cast<uint32_t>(CUMap.find(Offset)->second),
153                            support::little);
154 
155     // Header field #4: address size.
156     // 8 since we only write ELF64 binaries for now.
157     RangesStream << char(8);
158 
159     // Header field #5: segment size of target architecture.
160     RangesStream << char(0);
161 
162     // Padding before address table - 4 bytes in the 64-bit-pointer case.
163     support::endian::write(RangesStream, static_cast<uint32_t>(0),
164                            support::little);
165 
166     writeAddressRanges(RangesStream, AddressRanges, true);
167   }
168 }
169 
170 DebugAddrWriter::DebugAddrWriter(BinaryContext *Bc) { BC = Bc; }
171 
172 void DebugAddrWriter::AddressForDWOCU::dump() {
173   std::vector<IndexAddressPair> SortedMap(indexToAddressBegin(),
174                                           indexToAdddessEnd());
175   // Sorting address in increasing order of indices.
176   std::sort(SortedMap.begin(), SortedMap.end(),
177             [](const IndexAddressPair &A, const IndexAddressPair &B) {
178               return A.first < B.first;
179             });
180   for (auto &Pair : SortedMap)
181     dbgs() << Twine::utohexstr(Pair.second) << "\t" << Pair.first << "\n";
182 }
183 uint32_t DebugAddrWriter::getIndexFromAddress(uint64_t Address,
184                                               uint64_t DWOId) {
185   std::lock_guard<std::mutex> Lock(WriterMutex);
186   if (!AddressMaps.count(DWOId))
187     AddressMaps[DWOId] = AddressForDWOCU();
188 
189   AddressForDWOCU &Map = AddressMaps[DWOId];
190   auto Entry = Map.find(Address);
191   if (Entry == Map.end()) {
192     auto Index = Map.getNextIndex();
193     Entry = Map.insert(Address, Index).first;
194   }
195   return Entry->second;
196 }
197 
198 // Case1) Address is not in map insert in to AddresToIndex and IndexToAddres
199 // Case2) Address is in the map but Index is higher or equal. Need to update
200 // IndexToAddrss. Case3) Address is in the map but Index is lower. Need to
201 // update AddressToIndex and IndexToAddress
202 void DebugAddrWriter::addIndexAddress(uint64_t Address, uint32_t Index,
203                                       uint64_t DWOId) {
204   std::lock_guard<std::mutex> Lock(WriterMutex);
205   AddressForDWOCU &Map = AddressMaps[DWOId];
206   auto Entry = Map.find(Address);
207   if (Entry != Map.end()) {
208     if (Entry->second > Index)
209       Map.updateAddressToIndex(Address, Index);
210     Map.updateIndexToAddrss(Address, Index);
211   } else {
212     Map.insert(Address, Index);
213   }
214 }
215 
216 AddressSectionBuffer DebugAddrWriter::finalize() {
217   // Need to layout all sections within .debug_addr
218   // Within each section sort Address by index.
219   AddressSectionBuffer Buffer;
220   raw_svector_ostream AddressStream(Buffer);
221   for (std::unique_ptr<DWARFUnit> &CU : BC->DwCtx->compile_units()) {
222     Optional<uint64_t> DWOId = CU->getDWOId();
223     // Handling the case wehre debug information is a mix of Debug fission and
224     // monolitic.
225     if (!DWOId)
226       continue;
227     auto AM = AddressMaps.find(*DWOId);
228     // Adding to map even if it did not contribute to .debug_addr.
229     // The Skeleton CU will still have DW_AT_GNU_addr_base.
230     DWOIdToOffsetMap[*DWOId] = Buffer.size();
231     // If does not exist this CUs DWO section didn't contribute to .debug_addr.
232     if (AM == AddressMaps.end())
233       continue;
234     std::vector<IndexAddressPair> SortedMap(AM->second.indexToAddressBegin(),
235                                             AM->second.indexToAdddessEnd());
236     // Sorting address in increasing order of indices.
237     std::sort(SortedMap.begin(), SortedMap.end(),
238               [](const IndexAddressPair &A, const IndexAddressPair &B) {
239                 return A.first < B.first;
240               });
241 
242     uint8_t AddrSize = CU->getAddressByteSize();
243     uint32_t Counter = 0;
244     auto WriteAddress = [&](uint64_t Address) -> void {
245       ++Counter;
246       switch (AddrSize) {
247       default:
248         assert(false && "Address Size is invalid.");
249         break;
250       case 4:
251         support::endian::write(AddressStream, static_cast<uint32_t>(Address),
252                                support::little);
253         break;
254       case 8:
255         support::endian::write(AddressStream, Address, support::little);
256         break;
257       }
258     };
259 
260     for (const IndexAddressPair &Val : SortedMap) {
261       while (Val.first > Counter)
262         WriteAddress(0);
263       WriteAddress(Val.second);
264     }
265   }
266 
267   return Buffer;
268 }
269 
270 uint64_t DebugAddrWriter::getOffset(uint64_t DWOId) {
271   auto Iter = DWOIdToOffsetMap.find(DWOId);
272   assert(Iter != DWOIdToOffsetMap.end() &&
273          "Offset in to.debug_addr was not found for DWO ID.");
274   return Iter->second;
275 }
276 
277 DebugLocWriter::DebugLocWriter(BinaryContext *BC) {
278   LocBuffer = std::make_unique<DebugBufferVector>();
279   LocStream = std::make_unique<raw_svector_ostream>(*LocBuffer);
280 }
281 
282 void DebugLocWriter::addList(uint64_t AttrOffset,
283                              DebugLocationsVector &&LocList) {
284   if (LocList.empty()) {
285     EmptyAttrLists.push_back(AttrOffset);
286     return;
287   }
288   // Since there is a separate DebugLocWriter for each thread,
289   // we don't need a lock to read the SectionOffset and update it.
290   const uint32_t EntryOffset = SectionOffset;
291 
292   for (const DebugLocationEntry &Entry : LocList) {
293     support::endian::write(*LocStream, static_cast<uint64_t>(Entry.LowPC),
294                            support::little);
295     support::endian::write(*LocStream, static_cast<uint64_t>(Entry.HighPC),
296                            support::little);
297     support::endian::write(*LocStream, static_cast<uint16_t>(Entry.Expr.size()),
298                            support::little);
299     *LocStream << StringRef(reinterpret_cast<const char *>(Entry.Expr.data()),
300                             Entry.Expr.size());
301     SectionOffset += 2 * 8 + 2 + Entry.Expr.size();
302   }
303   LocStream->write_zeros(16);
304   SectionOffset += 16;
305   LocListDebugInfoPatches.push_back({AttrOffset, EntryOffset});
306 }
307 
308 void DebugLoclistWriter::addList(uint64_t AttrOffset,
309                                  DebugLocationsVector &&LocList) {
310   Patches.push_back({AttrOffset, std::move(LocList)});
311 }
312 
313 std::unique_ptr<DebugBufferVector> DebugLocWriter::getBuffer() {
314   return std::move(LocBuffer);
315 }
316 
317 // DWARF 4: 2.6.2
318 void DebugLocWriter::finalize(uint64_t SectionOffset,
319                               SimpleBinaryPatcher &DebugInfoPatcher) {
320   for (const auto LocListDebugInfoPatchType : LocListDebugInfoPatches) {
321     uint64_t Offset = SectionOffset + LocListDebugInfoPatchType.LocListOffset;
322     DebugInfoPatcher.addLE32Patch(LocListDebugInfoPatchType.DebugInfoAttrOffset,
323                                   Offset);
324   }
325 
326   for (uint64_t DebugInfoAttrOffset : EmptyAttrLists)
327     DebugInfoPatcher.addLE32Patch(DebugInfoAttrOffset,
328                                   DebugLocWriter::EmptyListOffset);
329 }
330 
331 void DebugLoclistWriter::finalize(uint64_t SectionOffset,
332                                   SimpleBinaryPatcher &DebugInfoPatcher) {
333   for (LocPatch &Patch : Patches) {
334     if (Patch.LocList.empty()) {
335       DebugInfoPatcher.addLE32Patch(Patch.AttrOffset,
336                                     DebugLocWriter::EmptyListOffset);
337       continue;
338     }
339     const uint32_t EntryOffset = LocBuffer->size();
340     for (const DebugLocationEntry &Entry : Patch.LocList) {
341       support::endian::write(*LocStream,
342                              static_cast<uint8_t>(dwarf::DW_LLE_startx_length),
343                              support::little);
344       uint32_t Index = AddrWriter->getIndexFromAddress(Entry.LowPC, DWOId);
345       encodeULEB128(Index, *LocStream);
346 
347       // TODO: Support DWARF5
348       support::endian::write(*LocStream,
349                              static_cast<uint32_t>(Entry.HighPC - Entry.LowPC),
350                              support::little);
351       support::endian::write(*LocStream,
352                              static_cast<uint16_t>(Entry.Expr.size()),
353                              support::little);
354       *LocStream << StringRef(reinterpret_cast<const char *>(Entry.Expr.data()),
355                               Entry.Expr.size());
356     }
357     support::endian::write(*LocStream,
358                            static_cast<uint8_t>(dwarf::DW_LLE_end_of_list),
359                            support::little);
360     DebugInfoPatcher.addLE32Patch(Patch.AttrOffset, EntryOffset);
361     clearList(Patch.LocList);
362   }
363   clearList(Patches);
364 }
365 
366 DebugAddrWriter *DebugLoclistWriter::AddrWriter = nullptr;
367 
368 void DebugInfoBinaryPatcher::addUnitBaseOffsetLabel(uint64_t Offset) {
369   Offset -= DWPUnitOffset;
370   std::lock_guard<std::mutex> Lock(WriterMutex);
371   DebugPatches.emplace_back(new DWARFUnitOffsetBaseLabel(Offset));
372 }
373 
374 void DebugInfoBinaryPatcher::addDestinationReferenceLabel(uint64_t Offset) {
375   Offset -= DWPUnitOffset;
376   std::lock_guard<std::mutex> Lock(WriterMutex);
377   auto RetVal = DestinationLabels.insert(Offset);
378   if (!RetVal.second)
379     return;
380 
381   DebugPatches.emplace_back(new DestinationReferenceLabel(Offset));
382 }
383 
384 void DebugInfoBinaryPatcher::addReferenceToPatch(uint64_t Offset,
385                                                  uint32_t DestinationOffset,
386                                                  uint32_t OldValueSize,
387                                                  dwarf::Form Form) {
388   Offset -= DWPUnitOffset;
389   DestinationOffset -= DWPUnitOffset;
390   std::lock_guard<std::mutex> Lock(WriterMutex);
391   DebugPatches.emplace_back(
392       new DebugPatchReference(Offset, OldValueSize, DestinationOffset, Form));
393 }
394 
395 void DebugInfoBinaryPatcher::addUDataPatch(uint64_t Offset, uint64_t NewValue,
396                                            uint32_t OldValueSize) {
397   Offset -= DWPUnitOffset;
398   std::lock_guard<std::mutex> Lock(WriterMutex);
399   DebugPatches.emplace_back(
400       new DebugPatchVariableSize(Offset, OldValueSize, NewValue));
401 }
402 
403 void DebugInfoBinaryPatcher::addLE64Patch(uint64_t Offset, uint64_t NewValue) {
404   Offset -= DWPUnitOffset;
405   std::lock_guard<std::mutex> Lock(WriterMutex);
406   DebugPatches.emplace_back(new DebugPatch64(Offset, NewValue));
407 }
408 
409 void DebugInfoBinaryPatcher::addLE32Patch(uint64_t Offset, uint32_t NewValue,
410                                           uint32_t OldValueSize) {
411   Offset -= DWPUnitOffset;
412   std::lock_guard<std::mutex> Lock(WriterMutex);
413   if (OldValueSize == 4)
414     DebugPatches.emplace_back(new DebugPatch32(Offset, NewValue));
415   else
416     DebugPatches.emplace_back(new DebugPatch64to32(Offset, NewValue));
417 }
418 
419 void SimpleBinaryPatcher::addBinaryPatch(uint64_t Offset,
420                                          std::string &&NewValue,
421                                          uint32_t OldValueSize) {
422   Patches.emplace_back(Offset, std::move(NewValue));
423 }
424 
425 void SimpleBinaryPatcher::addBytePatch(uint64_t Offset, uint8_t Value) {
426   auto Str = std::string(1, Value);
427   Patches.emplace_back(Offset, std::move(Str));
428 }
429 
430 static std::string encodeLE(size_t ByteSize, uint64_t NewValue) {
431   std::string LE64(ByteSize, 0);
432   for (size_t I = 0; I < ByteSize; ++I) {
433     LE64[I] = NewValue & 0xff;
434     NewValue >>= 8;
435   }
436   return LE64;
437 }
438 
439 void SimpleBinaryPatcher::addLEPatch(uint64_t Offset, uint64_t NewValue,
440                                      size_t ByteSize) {
441   Patches.emplace_back(Offset, encodeLE(ByteSize, NewValue));
442 }
443 
444 void SimpleBinaryPatcher::addUDataPatch(uint64_t Offset, uint64_t Value,
445                                         uint32_t OldValueSize) {
446   std::string Buff;
447   raw_string_ostream OS(Buff);
448   encodeULEB128(Value, OS, OldValueSize);
449 
450   Patches.emplace_back(Offset, std::move(Buff));
451 }
452 
453 void SimpleBinaryPatcher::addLE64Patch(uint64_t Offset, uint64_t NewValue) {
454   addLEPatch(Offset, NewValue, 8);
455 }
456 
457 void SimpleBinaryPatcher::addLE32Patch(uint64_t Offset, uint32_t NewValue,
458                                        uint32_t OldValueSize) {
459   addLEPatch(Offset, NewValue, 4);
460 }
461 
462 std::string SimpleBinaryPatcher::patchBinary(StringRef BinaryContents) {
463   std::string BinaryContentsStr = std::string(BinaryContents);
464   for (const auto &Patch : Patches) {
465     uint32_t Offset = Patch.first;
466     const std::string &ByteSequence = Patch.second;
467     assert(Offset + ByteSequence.size() <= BinaryContents.size() &&
468            "Applied patch runs over binary size.");
469     for (uint64_t I = 0, Size = ByteSequence.size(); I < Size; ++I) {
470       BinaryContentsStr[Offset + I] = ByteSequence[I];
471     }
472   }
473   return BinaryContentsStr;
474 }
475 
476 std::unordered_map<uint32_t, uint32_t>
477 DebugInfoBinaryPatcher::computeNewOffsets() {
478   std::unordered_map<uint32_t, uint32_t> CUMap;
479   std::sort(DebugPatches.begin(), DebugPatches.end(),
480             [](const UniquePatchPtrType &V1, const UniquePatchPtrType &V2) {
481               return V1.get()->Offset < V2.get()->Offset;
482             });
483 
484   // Calculating changes in .debug_info size from Patches to build a map of old
485   // to updated reference destination offsets.
486   for (UniquePatchPtrType &PatchBase : DebugPatches) {
487     Patch *P = PatchBase.get();
488     switch (P->Kind) {
489     default:
490       continue;
491     case DebugPatchKind::PatchValue64to32: {
492       ChangeInSize -= 4;
493       break;
494     }
495     case DebugPatchKind::PatchValueVariable: {
496       DebugPatchVariableSize *DPV =
497           reinterpret_cast<DebugPatchVariableSize *>(P);
498       std::string Temp;
499       raw_string_ostream OS(Temp);
500       encodeULEB128(DPV->Value, OS);
501       ChangeInSize += Temp.size() - DPV->OldValueSize;
502       break;
503     }
504     case DebugPatchKind::DestinationReferenceLabel: {
505       DestinationReferenceLabel *DRL =
506           reinterpret_cast<DestinationReferenceLabel *>(P);
507       OldToNewOffset[DRL->Offset] = DRL->Offset + ChangeInSize;
508       break;
509     }
510     case DebugPatchKind::ReferencePatchValue: {
511       // This doesn't look to be a common case, so will always encode as 4 bytes
512       // to reduce algorithmic complexity.
513       DebugPatchReference *RDP = reinterpret_cast<DebugPatchReference *>(P);
514       if (RDP->PatchInfo.IndirectRelative) {
515         ChangeInSize += 4 - RDP->PatchInfo.OldValueSize;
516         assert(RDP->PatchInfo.OldValueSize <= 4 &&
517                "Variable encoding reference greater than 4 bytes.");
518       }
519       break;
520     }
521     case DebugPatchKind::DWARFUnitOffsetBaseLabel: {
522       DWARFUnitOffsetBaseLabel *BaseLabel =
523           reinterpret_cast<DWARFUnitOffsetBaseLabel *>(P);
524       uint32_t CUOffset = BaseLabel->Offset;
525       uint32_t CUOffsetUpdate = CUOffset + ChangeInSize;
526       CUMap[CUOffset] = CUOffsetUpdate;
527     }
528     }
529   }
530   return CUMap;
531 }
532 
533 std::string DebugInfoBinaryPatcher::patchBinary(StringRef BinaryContents) {
534   std::string NewBinaryContents;
535   NewBinaryContents.reserve(BinaryContents.size() + ChangeInSize);
536   uint32_t StartOffset = 0;
537   uint32_t DwarfUnitBaseOffset = 0;
538   uint32_t OldValueSize = 0;
539   uint32_t Offset = 0;
540   std::string ByteSequence;
541   std::vector<std::pair<uint32_t, uint32_t>> LengthPatches;
542   // Wasting one entry to avoid checks for first.
543   LengthPatches.push_back({0, 0});
544 
545   // Applying all the patches replacing current entry.
546   // This might change the size of .debug_info section.
547   for (const UniquePatchPtrType &PatchBase : DebugPatches) {
548     Patch *P = PatchBase.get();
549     switch (P->Kind) {
550     default:
551       continue;
552     case DebugPatchKind::ReferencePatchValue: {
553       DebugPatchReference *RDP = reinterpret_cast<DebugPatchReference *>(P);
554       uint32_t DestinationOffset = RDP->DestinationOffset;
555       assert(OldToNewOffset.count(DestinationOffset) &&
556              "Destination Offset for reference not updated.");
557       uint32_t UpdatedOffset = OldToNewOffset[DestinationOffset];
558       Offset = RDP->Offset;
559       OldValueSize = RDP->PatchInfo.OldValueSize;
560       if (RDP->PatchInfo.DirectRelative) {
561         UpdatedOffset -= DwarfUnitBaseOffset;
562         ByteSequence = encodeLE(OldValueSize, UpdatedOffset);
563         // In theory reference for DW_FORM_ref{1,2,4,8} can be right on the edge
564         // and overflow if later debug information grows.
565         if (ByteSequence.size() > OldValueSize)
566           errs() << "BOLT-ERROR: Relative reference of size "
567                  << Twine::utohexstr(OldValueSize)
568                  << " overflows with the new encoding.\n";
569       } else if (RDP->PatchInfo.DirectAbsolute) {
570         ByteSequence = encodeLE(OldValueSize, UpdatedOffset);
571       } else if (RDP->PatchInfo.IndirectRelative) {
572         UpdatedOffset -= DwarfUnitBaseOffset;
573         ByteSequence.clear();
574         raw_string_ostream OS(ByteSequence);
575         encodeULEB128(UpdatedOffset, OS, 4);
576       } else {
577         llvm_unreachable("Invalid Reference form.");
578       }
579       break;
580     }
581     case DebugPatchKind::PatchValue32: {
582       DebugPatch32 *P32 = reinterpret_cast<DebugPatch32 *>(P);
583       Offset = P32->Offset;
584       OldValueSize = 4;
585       ByteSequence = encodeLE(4, P32->Value);
586       break;
587     }
588     case DebugPatchKind::PatchValue64to32: {
589       DebugPatch64to32 *P64to32 = reinterpret_cast<DebugPatch64to32 *>(P);
590       Offset = P64to32->Offset;
591       OldValueSize = 8;
592       ByteSequence = encodeLE(4, P64to32->Value);
593       break;
594     }
595     case DebugPatchKind::PatchValueVariable: {
596       DebugPatchVariableSize *PV =
597           reinterpret_cast<DebugPatchVariableSize *>(P);
598       Offset = PV->Offset;
599       OldValueSize = PV->OldValueSize;
600       ByteSequence.clear();
601       raw_string_ostream OS(ByteSequence);
602       encodeULEB128(PV->Value, OS);
603       break;
604     }
605     case DebugPatchKind::PatchValue64: {
606       DebugPatch64 *P64 = reinterpret_cast<DebugPatch64 *>(P);
607       Offset = P64->Offset;
608       OldValueSize = 8;
609       ByteSequence = encodeLE(8, P64->Value);
610       break;
611     }
612     case DebugPatchKind::DWARFUnitOffsetBaseLabel: {
613       DWARFUnitOffsetBaseLabel *BaseLabel =
614           reinterpret_cast<DWARFUnitOffsetBaseLabel *>(P);
615       Offset = BaseLabel->Offset;
616       OldValueSize = 0;
617       ByteSequence.clear();
618       auto &Patch = LengthPatches.back();
619       // Length to copy between last patch entry and next compile unit.
620       uint32_t RemainingLength = Offset - StartOffset;
621       uint32_t NewCUOffset = NewBinaryContents.size() + RemainingLength;
622       DwarfUnitBaseOffset = NewCUOffset;
623       // Length of previous CU = This CU Offset - sizeof(length) - last CU
624       // Offset.
625       Patch.second = NewCUOffset - 4 - Patch.first;
626       LengthPatches.push_back({NewCUOffset, 0});
627       break;
628     }
629     }
630 
631     assert(Offset + ByteSequence.size() <= BinaryContents.size() &&
632            "Applied patch runs over binary size.");
633     uint32_t Length = Offset - StartOffset;
634     NewBinaryContents.append(BinaryContents.substr(StartOffset, Length).data(),
635                              Length);
636     NewBinaryContents.append(ByteSequence.data(), ByteSequence.size());
637     StartOffset = Offset + OldValueSize;
638   }
639   uint32_t Length = BinaryContents.size() - StartOffset;
640   NewBinaryContents.append(BinaryContents.substr(StartOffset, Length).data(),
641                            Length);
642   DebugPatches.clear();
643 
644   // Patching lengths of CUs
645   auto &Patch = LengthPatches.back();
646   Patch.second = NewBinaryContents.size() - 4 - Patch.first;
647   for (uint32_t J = 1, Size = LengthPatches.size(); J < Size; ++J) {
648     const auto &Patch = LengthPatches[J];
649     ByteSequence = encodeLE(4, Patch.second);
650     Offset = Patch.first;
651     for (uint64_t I = 0, Size = ByteSequence.size(); I < Size; ++I)
652       NewBinaryContents[Offset + I] = ByteSequence[I];
653   }
654 
655   return NewBinaryContents;
656 }
657 
658 void DebugStrWriter::create() {
659   StrBuffer = std::make_unique<DebugStrBufferVector>();
660   StrStream = std::make_unique<raw_svector_ostream>(*StrBuffer);
661 }
662 
663 void DebugStrWriter::initialize() {
664   auto StrSection = BC->DwCtx->getDWARFObj().getStrSection();
665   (*StrStream) << StrSection;
666 }
667 
668 uint32_t DebugStrWriter::addString(StringRef Str) {
669   std::lock_guard<std::mutex> Lock(WriterMutex);
670   if (StrBuffer->empty())
671     initialize();
672   auto Offset = StrBuffer->size();
673   (*StrStream) << Str;
674   StrStream->write_zeros(1);
675   return Offset;
676 }
677 
678 void DebugAbbrevWriter::addUnitAbbreviations(DWARFUnit &Unit) {
679   const DWARFAbbreviationDeclarationSet *Abbrevs = Unit.getAbbreviations();
680   if (!Abbrevs)
681     return;
682 
683   // Multiple units may share the same abbreviations. Only add abbreviations
684   // for the first unit and reuse them.
685   const uint64_t AbbrevOffset = Unit.getAbbreviationsOffset();
686   if (UnitsAbbrevData.find(AbbrevOffset) != UnitsAbbrevData.end())
687     return;
688 
689   AbbrevData &UnitData = UnitsAbbrevData[AbbrevOffset];
690   UnitData.Buffer = std::make_unique<DebugBufferVector>();
691   UnitData.Stream = std::make_unique<raw_svector_ostream>(*UnitData.Buffer);
692 
693   const PatchesTy &UnitPatches = Patches[&Unit];
694 
695   raw_svector_ostream &OS = *UnitData.Stream.get();
696 
697   // Take a fast path if there are no patches to apply. Simply copy the original
698   // contents.
699   if (UnitPatches.empty()) {
700     StringRef AbbrevSectionContents =
701         Unit.isDWOUnit() ? Unit.getContext().getDWARFObj().getAbbrevDWOSection()
702                          : Unit.getContext().getDWARFObj().getAbbrevSection();
703     StringRef AbbrevContents;
704 
705     const DWARFUnitIndex &CUIndex = Unit.getContext().getCUIndex();
706     if (!CUIndex.getRows().empty()) {
707       // Handle DWP section contribution.
708       const DWARFUnitIndex::Entry *DWOEntry =
709           CUIndex.getFromHash(*Unit.getDWOId());
710       if (!DWOEntry)
711         return;
712 
713       const DWARFUnitIndex::Entry::SectionContribution *DWOContrubution =
714           DWOEntry->getContribution(DWARFSectionKind::DW_SECT_ABBREV);
715       AbbrevContents = AbbrevSectionContents.substr(DWOContrubution->Offset,
716                                                     DWOContrubution->Length);
717     } else if (!Unit.isDWOUnit()) {
718       const uint64_t StartOffset = Unit.getAbbreviationsOffset();
719 
720       // We know where the unit's abbreviation set starts, but not where it ends
721       // as such data is not readily available. Hence, we have to build a sorted
722       // list of start addresses and find the next starting address to determine
723       // the set boundaries.
724       //
725       // FIXME: if we had a full access to DWARFDebugAbbrev::AbbrDeclSets
726       // we wouldn't have to build our own sorted list for the quick lookup.
727       if (AbbrevSetOffsets.empty()) {
728         for_each(
729             *Unit.getContext().getDebugAbbrev(),
730             [&](const std::pair<uint64_t, DWARFAbbreviationDeclarationSet> &P) {
731               AbbrevSetOffsets.push_back(P.first);
732             });
733         sort(AbbrevSetOffsets);
734       }
735       auto It = upper_bound(AbbrevSetOffsets, StartOffset);
736       const uint64_t EndOffset =
737           It == AbbrevSetOffsets.end() ? AbbrevSectionContents.size() : *It;
738       AbbrevContents = AbbrevSectionContents.slice(StartOffset, EndOffset);
739     } else {
740       // For DWO unit outside of DWP, we expect the entire section to hold
741       // abbreviations for this unit only.
742       AbbrevContents = AbbrevSectionContents;
743     }
744 
745     OS.reserveExtraSpace(AbbrevContents.size());
746     OS << AbbrevContents;
747 
748     return;
749   }
750 
751   for (auto I = Abbrevs->begin(), E = Abbrevs->end(); I != E; ++I) {
752     const DWARFAbbreviationDeclaration &Abbrev = *I;
753     auto Patch = UnitPatches.find(&Abbrev);
754 
755     encodeULEB128(Abbrev.getCode(), OS);
756     encodeULEB128(Abbrev.getTag(), OS);
757     encodeULEB128(Abbrev.hasChildren(), OS);
758     for (const DWARFAbbreviationDeclaration::AttributeSpec &AttrSpec :
759          Abbrev.attributes()) {
760       if (Patch != UnitPatches.end()) {
761         bool Patched = false;
762         // Patches added later take a precedence over earlier ones.
763         for (auto I = Patch->second.rbegin(), E = Patch->second.rend(); I != E;
764              ++I) {
765           if (I->OldAttr != AttrSpec.Attr)
766             continue;
767 
768           encodeULEB128(I->NewAttr, OS);
769           encodeULEB128(I->NewAttrForm, OS);
770           Patched = true;
771           break;
772         }
773         if (Patched)
774           continue;
775       }
776 
777       encodeULEB128(AttrSpec.Attr, OS);
778       encodeULEB128(AttrSpec.Form, OS);
779       if (AttrSpec.isImplicitConst())
780         encodeSLEB128(AttrSpec.getImplicitConstValue(), OS);
781     }
782 
783     encodeULEB128(0, OS);
784     encodeULEB128(0, OS);
785   }
786   encodeULEB128(0, OS);
787 }
788 
789 std::unique_ptr<DebugBufferVector> DebugAbbrevWriter::finalize() {
790   if (DWOId) {
791     // We expect abbrev_offset to always be zero for DWO units as there
792     // should be one CU per DWO, and TUs should share the same abbreviation
793     // set with the CU.
794     // For DWP AbbreviationsOffset is an Abbrev contribution in the DWP file, so
795     // can be none zero. Thus we are skipping the check for DWP.
796     bool IsDWP = !Context.getCUIndex().getRows().empty();
797     if (!IsDWP) {
798       for (const std::unique_ptr<DWARFUnit> &Unit : Context.dwo_units()) {
799         if (Unit->getAbbreviationsOffset() != 0) {
800           errs() << "BOLT-ERROR: detected DWO unit with non-zero abbr_offset. "
801                     "Unable to update debug info.\n";
802           exit(1);
803         }
804       }
805     }
806 
807     // Issue abbreviations for the DWO CU only.
808     addUnitAbbreviations(*Context.getDWOCompileUnitForHash(*DWOId));
809   } else {
810     // Add abbreviations from compile and type non-DWO units.
811     for (const std::unique_ptr<DWARFUnit> &Unit : Context.normal_units())
812       addUnitAbbreviations(*Unit);
813   }
814 
815   DebugBufferVector ReturnBuffer;
816 
817   // Pre-calculate the total size of abbrev section.
818   uint64_t Size = 0;
819   for (const auto &KV : UnitsAbbrevData) {
820     const AbbrevData &UnitData = KV.second;
821     Size += UnitData.Buffer->size();
822   }
823   ReturnBuffer.reserve(Size);
824 
825   uint64_t Pos = 0;
826   for (auto &KV : UnitsAbbrevData) {
827     AbbrevData &UnitData = KV.second;
828     ReturnBuffer.append(*UnitData.Buffer);
829     UnitData.Offset = Pos;
830     Pos += UnitData.Buffer->size();
831 
832     UnitData.Buffer.reset();
833     UnitData.Stream.reset();
834   }
835 
836   return std::make_unique<DebugBufferVector>(ReturnBuffer);
837 }
838 
839 static void emitDwarfSetLineAddrAbs(MCStreamer &OS,
840                                     MCDwarfLineTableParams Params,
841                                     int64_t LineDelta, uint64_t Address,
842                                     int PointerSize) {
843   // emit the sequence to set the address
844   OS.emitIntValue(dwarf::DW_LNS_extended_op, 1);
845   OS.emitULEB128IntValue(PointerSize + 1);
846   OS.emitIntValue(dwarf::DW_LNE_set_address, 1);
847   OS.emitIntValue(Address, PointerSize);
848 
849   // emit the sequence for the LineDelta (from 1) and a zero address delta.
850   MCDwarfLineAddr::Emit(&OS, Params, LineDelta, 0);
851 }
852 
853 static inline void emitBinaryDwarfLineTable(
854     MCStreamer *MCOS, MCDwarfLineTableParams Params,
855     const DWARFDebugLine::LineTable *Table,
856     const std::vector<DwarfLineTable::RowSequence> &InputSequences) {
857   if (InputSequences.empty())
858     return;
859 
860   constexpr uint64_t InvalidAddress = UINT64_MAX;
861   unsigned FileNum = 1;
862   unsigned LastLine = 1;
863   unsigned Column = 0;
864   unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
865   unsigned Isa = 0;
866   unsigned Discriminator = 0;
867   uint64_t LastAddress = InvalidAddress;
868   uint64_t PrevEndOfSequence = InvalidAddress;
869   const MCAsmInfo *AsmInfo = MCOS->getContext().getAsmInfo();
870 
871   auto emitEndOfSequence = [&](uint64_t Address) {
872     MCDwarfLineAddr::Emit(MCOS, Params, INT64_MAX, Address - LastAddress);
873     FileNum = 1;
874     LastLine = 1;
875     Column = 0;
876     Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
877     Isa = 0;
878     Discriminator = 0;
879     LastAddress = InvalidAddress;
880   };
881 
882   for (const DwarfLineTable::RowSequence &Sequence : InputSequences) {
883     const uint64_t SequenceStart =
884         Table->Rows[Sequence.FirstIndex].Address.Address;
885 
886     // Check if we need to mark the end of the sequence.
887     if (PrevEndOfSequence != InvalidAddress && LastAddress != InvalidAddress &&
888         PrevEndOfSequence != SequenceStart) {
889       emitEndOfSequence(PrevEndOfSequence);
890     }
891 
892     for (uint32_t RowIndex = Sequence.FirstIndex;
893          RowIndex <= Sequence.LastIndex; ++RowIndex) {
894       const DWARFDebugLine::Row &Row = Table->Rows[RowIndex];
895       int64_t LineDelta = static_cast<int64_t>(Row.Line) - LastLine;
896       const uint64_t Address = Row.Address.Address;
897 
898       if (FileNum != Row.File) {
899         FileNum = Row.File;
900         MCOS->emitInt8(dwarf::DW_LNS_set_file);
901         MCOS->emitULEB128IntValue(FileNum);
902       }
903       if (Column != Row.Column) {
904         Column = Row.Column;
905         MCOS->emitInt8(dwarf::DW_LNS_set_column);
906         MCOS->emitULEB128IntValue(Column);
907       }
908       if (Discriminator != Row.Discriminator &&
909           MCOS->getContext().getDwarfVersion() >= 4) {
910         Discriminator = Row.Discriminator;
911         unsigned Size = getULEB128Size(Discriminator);
912         MCOS->emitInt8(dwarf::DW_LNS_extended_op);
913         MCOS->emitULEB128IntValue(Size + 1);
914         MCOS->emitInt8(dwarf::DW_LNE_set_discriminator);
915         MCOS->emitULEB128IntValue(Discriminator);
916       }
917       if (Isa != Row.Isa) {
918         Isa = Row.Isa;
919         MCOS->emitInt8(dwarf::DW_LNS_set_isa);
920         MCOS->emitULEB128IntValue(Isa);
921       }
922       if (Row.IsStmt != Flags) {
923         Flags = Row.IsStmt;
924         MCOS->emitInt8(dwarf::DW_LNS_negate_stmt);
925       }
926       if (Row.BasicBlock)
927         MCOS->emitInt8(dwarf::DW_LNS_set_basic_block);
928       if (Row.PrologueEnd)
929         MCOS->emitInt8(dwarf::DW_LNS_set_prologue_end);
930       if (Row.EpilogueBegin)
931         MCOS->emitInt8(dwarf::DW_LNS_set_epilogue_begin);
932 
933       // The end of the sequence is not normal in the middle of the input
934       // sequence, but could happen, e.g. for assembly code.
935       if (Row.EndSequence) {
936         emitEndOfSequence(Address);
937       } else {
938         if (LastAddress == InvalidAddress)
939           emitDwarfSetLineAddrAbs(*MCOS, Params, LineDelta, Address,
940                                   AsmInfo->getCodePointerSize());
941         else
942           MCDwarfLineAddr::Emit(MCOS, Params, LineDelta, Address - LastAddress);
943 
944         LastAddress = Address;
945         LastLine = Row.Line;
946       }
947 
948       Discriminator = 0;
949     }
950     PrevEndOfSequence = Sequence.EndAddress;
951   }
952 
953   // Finish with the end of the sequence.
954   if (LastAddress != InvalidAddress)
955     emitEndOfSequence(PrevEndOfSequence);
956 }
957 
958 // This function is similar to the one from MCDwarfLineTable, except it handles
959 // end-of-sequence entries differently by utilizing line entries with
960 // DWARF2_FLAG_END_SEQUENCE flag.
961 static inline void emitDwarfLineTable(
962     MCStreamer *MCOS, MCSection *Section,
963     const MCLineSection::MCDwarfLineEntryCollection &LineEntries) {
964   unsigned FileNum = 1;
965   unsigned LastLine = 1;
966   unsigned Column = 0;
967   unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
968   unsigned Isa = 0;
969   unsigned Discriminator = 0;
970   MCSymbol *LastLabel = nullptr;
971   const MCAsmInfo *AsmInfo = MCOS->getContext().getAsmInfo();
972 
973   // Loop through each MCDwarfLineEntry and encode the dwarf line number table.
974   for (const MCDwarfLineEntry &LineEntry : LineEntries) {
975     if (LineEntry.getFlags() & DWARF2_FLAG_END_SEQUENCE) {
976       MCOS->emitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, LineEntry.getLabel(),
977                                      AsmInfo->getCodePointerSize());
978       FileNum = 1;
979       LastLine = 1;
980       Column = 0;
981       Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
982       Isa = 0;
983       Discriminator = 0;
984       LastLabel = nullptr;
985       continue;
986     }
987 
988     int64_t LineDelta = static_cast<int64_t>(LineEntry.getLine()) - LastLine;
989 
990     if (FileNum != LineEntry.getFileNum()) {
991       FileNum = LineEntry.getFileNum();
992       MCOS->emitInt8(dwarf::DW_LNS_set_file);
993       MCOS->emitULEB128IntValue(FileNum);
994     }
995     if (Column != LineEntry.getColumn()) {
996       Column = LineEntry.getColumn();
997       MCOS->emitInt8(dwarf::DW_LNS_set_column);
998       MCOS->emitULEB128IntValue(Column);
999     }
1000     if (Discriminator != LineEntry.getDiscriminator() &&
1001         MCOS->getContext().getDwarfVersion() >= 4) {
1002       Discriminator = LineEntry.getDiscriminator();
1003       unsigned Size = getULEB128Size(Discriminator);
1004       MCOS->emitInt8(dwarf::DW_LNS_extended_op);
1005       MCOS->emitULEB128IntValue(Size + 1);
1006       MCOS->emitInt8(dwarf::DW_LNE_set_discriminator);
1007       MCOS->emitULEB128IntValue(Discriminator);
1008     }
1009     if (Isa != LineEntry.getIsa()) {
1010       Isa = LineEntry.getIsa();
1011       MCOS->emitInt8(dwarf::DW_LNS_set_isa);
1012       MCOS->emitULEB128IntValue(Isa);
1013     }
1014     if ((LineEntry.getFlags() ^ Flags) & DWARF2_FLAG_IS_STMT) {
1015       Flags = LineEntry.getFlags();
1016       MCOS->emitInt8(dwarf::DW_LNS_negate_stmt);
1017     }
1018     if (LineEntry.getFlags() & DWARF2_FLAG_BASIC_BLOCK)
1019       MCOS->emitInt8(dwarf::DW_LNS_set_basic_block);
1020     if (LineEntry.getFlags() & DWARF2_FLAG_PROLOGUE_END)
1021       MCOS->emitInt8(dwarf::DW_LNS_set_prologue_end);
1022     if (LineEntry.getFlags() & DWARF2_FLAG_EPILOGUE_BEGIN)
1023       MCOS->emitInt8(dwarf::DW_LNS_set_epilogue_begin);
1024 
1025     MCSymbol *Label = LineEntry.getLabel();
1026 
1027     // At this point we want to emit/create the sequence to encode the delta
1028     // in line numbers and the increment of the address from the previous
1029     // Label and the current Label.
1030     MCOS->emitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label,
1031                                    AsmInfo->getCodePointerSize());
1032     Discriminator = 0;
1033     LastLine = LineEntry.getLine();
1034     LastLabel = Label;
1035   }
1036 
1037   assert(LastLabel == nullptr && "end of sequence expected");
1038 }
1039 
1040 void DwarfLineTable::emitCU(MCStreamer *MCOS, MCDwarfLineTableParams Params,
1041                             Optional<MCDwarfLineStr> &LineStr,
1042                             BinaryContext &BC) const {
1043   if (!RawData.empty()) {
1044     assert(MCLineSections.getMCLineEntries().empty() &&
1045            InputSequences.empty() &&
1046            "cannot combine raw data with new line entries");
1047     MCOS->emitLabel(getLabel());
1048     MCOS->emitBytes(RawData);
1049 
1050     // Emit fake relocation for RuntimeDyld to always allocate the section.
1051     //
1052     // FIXME: remove this once RuntimeDyld stops skipping allocatable sections
1053     //        without relocations.
1054     MCOS->emitRelocDirective(
1055         *MCConstantExpr::create(0, *BC.Ctx), "BFD_RELOC_NONE",
1056         MCSymbolRefExpr::create(getLabel(), *BC.Ctx), SMLoc(), *BC.STI);
1057 
1058     return;
1059   }
1060 
1061   MCSymbol *LineEndSym = Header.Emit(MCOS, Params, LineStr).second;
1062 
1063   // Put out the line tables.
1064   for (const auto &LineSec : MCLineSections.getMCLineEntries())
1065     emitDwarfLineTable(MCOS, LineSec.first, LineSec.second);
1066 
1067   // Emit line tables for the original code.
1068   emitBinaryDwarfLineTable(MCOS, Params, InputTable, InputSequences);
1069 
1070   // This is the end of the section, so set the value of the symbol at the end
1071   // of this section (that was used in a previous expression).
1072   MCOS->emitLabel(LineEndSym);
1073 }
1074 
1075 void DwarfLineTable::emit(BinaryContext &BC, MCStreamer &Streamer) {
1076   MCAssembler &Assembler =
1077       static_cast<MCObjectStreamer *>(&Streamer)->getAssembler();
1078 
1079   MCDwarfLineTableParams Params = Assembler.getDWARFLinetableParams();
1080 
1081   auto &LineTables = BC.getDwarfLineTables();
1082 
1083   // Bail out early so we don't switch to the debug_line section needlessly and
1084   // in doing so create an unnecessary (if empty) section.
1085   if (LineTables.empty())
1086     return;
1087 
1088   // In a v5 non-split line table, put the strings in a separate section.
1089   Optional<MCDwarfLineStr> LineStr(None);
1090   if (BC.Ctx->getDwarfVersion() >= 5)
1091     LineStr = MCDwarfLineStr(*BC.Ctx);
1092 
1093   // Switch to the section where the table will be emitted into.
1094   Streamer.SwitchSection(BC.MOFI->getDwarfLineSection());
1095 
1096   // Handle the rest of the Compile Units.
1097   for (auto &CUIDTablePair : LineTables) {
1098     CUIDTablePair.second.emitCU(&Streamer, Params, LineStr, BC);
1099   }
1100 }
1101 
1102 } // namespace bolt
1103 } // namespace llvm
1104