1 //===- bolt/Core/Exceptions.cpp - Helpers for C++ exceptions --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements functions for handling C++ exception meta data.
10 //
11 // Some of the code is taken from examples/ExceptionDemo
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "bolt/Core/Exceptions.h"
16 #include "bolt/Core/BinaryFunction.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/BinaryFormat/Dwarf.h"
20 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
21 #include "llvm/Support/Casting.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Debug.h"
24 #include "llvm/Support/Errc.h"
25 #include "llvm/Support/LEB128.h"
26 #include "llvm/Support/MathExtras.h"
27 #include "llvm/Support/raw_ostream.h"
28 #include <map>
29
30 #undef DEBUG_TYPE
31 #define DEBUG_TYPE "bolt-exceptions"
32
33 using namespace llvm::dwarf;
34
35 namespace opts {
36
37 extern llvm::cl::OptionCategory BoltCategory;
38
39 extern llvm::cl::opt<unsigned> Verbosity;
40
41 static llvm::cl::opt<bool>
42 PrintExceptions("print-exceptions",
43 llvm::cl::desc("print exception handling data"),
44 llvm::cl::Hidden, llvm::cl::cat(BoltCategory));
45
46 } // namespace opts
47
48 namespace llvm {
49 namespace bolt {
50
51 // Read and dump the .gcc_exception_table section entry.
52 //
53 // .gcc_except_table section contains a set of Language-Specific Data Areas -
54 // a fancy name for exception handling tables. There's one LSDA entry per
55 // function. However, we can't actually tell which function LSDA refers to
56 // unless we parse .eh_frame entry that refers to the LSDA.
57 // Then inside LSDA most addresses are encoded relative to the function start,
58 // so we need the function context in order to get to real addresses.
59 //
60 // The best visual representation of the tables comprising LSDA and
61 // relationships between them is illustrated at:
62 // https://github.com/itanium-cxx-abi/cxx-abi/blob/master/exceptions.pdf
63 // Keep in mind that GCC implementation deviates slightly from that document.
64 //
65 // To summarize, there are 4 tables in LSDA: call site table, actions table,
66 // types table, and types index table (for indirection). The main table contains
67 // call site entries. Each call site includes a PC range that can throw an
68 // exception, a handler (landing pad), and a reference to an entry in the action
69 // table. The handler and/or action could be 0. The action entry is a head
70 // of a list of actions associated with a call site. The action table contains
71 // all such lists (it could be optimized to share list tails). Each action could
72 // be either to catch an exception of a given type, to perform a cleanup, or to
73 // propagate the exception after filtering it out (e.g. to make sure function
74 // exception specification is not violated). Catch action contains a reference
75 // to an entry in the type table, and filter action refers to an entry in the
76 // type index table to encode a set of types to filter.
77 //
78 // Call site table follows LSDA header. Action table immediately follows the
79 // call site table.
80 //
81 // Both types table and type index table start at the same location, but they
82 // grow in opposite directions (types go up, indices go down). The beginning of
83 // these tables is encoded in LSDA header. Sizes for both of the tables are not
84 // included anywhere.
85 //
86 // We have to parse all of the tables to determine their sizes. Then we have
87 // to parse the call site table and associate discovered information with
88 // actual call instructions and landing pad blocks.
89 //
90 // For the purpose of rewriting exception handling tables, we can reuse action,
91 // and type index tables in their original binary format.
92 //
93 // Type table could be encoded using position-independent references, and thus
94 // may require relocation.
95 //
96 // Ideally we should be able to re-write LSDA in-place, without the need to
97 // allocate a new space for it. Sadly there's no guarantee that the new call
98 // site table will be the same size as GCC uses uleb encodings for PC offsets.
99 //
100 // Note: some functions have LSDA entries with 0 call site entries.
parseLSDA(ArrayRef<uint8_t> LSDASectionData,uint64_t LSDASectionAddress)101 void BinaryFunction::parseLSDA(ArrayRef<uint8_t> LSDASectionData,
102 uint64_t LSDASectionAddress) {
103 assert(CurrentState == State::Disassembled && "unexpected function state");
104
105 if (!getLSDAAddress())
106 return;
107
108 DWARFDataExtractor Data(
109 StringRef(reinterpret_cast<const char *>(LSDASectionData.data()),
110 LSDASectionData.size()),
111 BC.DwCtx->getDWARFObj().isLittleEndian(), 8);
112 uint64_t Offset = getLSDAAddress() - LSDASectionAddress;
113 assert(Data.isValidOffset(Offset) && "wrong LSDA address");
114
115 uint8_t LPStartEncoding = Data.getU8(&Offset);
116 uint64_t LPStart = 0;
117 // Convert to offset if LPStartEncoding is typed absptr DW_EH_PE_absptr
118 if (Optional<uint64_t> MaybeLPStart = Data.getEncodedPointer(
119 &Offset, LPStartEncoding, Offset + LSDASectionAddress))
120 LPStart = (LPStartEncoding && 0xFF == 0) ? *MaybeLPStart
121 : *MaybeLPStart - Address;
122
123 const uint8_t TTypeEncoding = Data.getU8(&Offset);
124 size_t TTypeEncodingSize = 0;
125 uintptr_t TTypeEnd = 0;
126 if (TTypeEncoding != DW_EH_PE_omit) {
127 TTypeEnd = Data.getULEB128(&Offset);
128 TTypeEncodingSize = BC.getDWARFEncodingSize(TTypeEncoding);
129 }
130
131 if (opts::PrintExceptions) {
132 outs() << "[LSDA at 0x" << Twine::utohexstr(getLSDAAddress())
133 << " for function " << *this << "]:\n";
134 outs() << "LPStart Encoding = 0x" << Twine::utohexstr(LPStartEncoding)
135 << '\n';
136 outs() << "LPStart = 0x" << Twine::utohexstr(LPStart) << '\n';
137 outs() << "TType Encoding = 0x" << Twine::utohexstr(TTypeEncoding) << '\n';
138 outs() << "TType End = " << TTypeEnd << '\n';
139 }
140
141 // Table to store list of indices in type table. Entries are uleb128 values.
142 const uint64_t TypeIndexTableStart = Offset + TTypeEnd;
143
144 // Offset past the last decoded index.
145 uint64_t MaxTypeIndexTableOffset = 0;
146
147 // Max positive index used in type table.
148 unsigned MaxTypeIndex = 0;
149
150 // The actual type info table starts at the same location, but grows in
151 // opposite direction. TTypeEncoding is used to encode stored values.
152 const uint64_t TypeTableStart = Offset + TTypeEnd;
153
154 uint8_t CallSiteEncoding = Data.getU8(&Offset);
155 uint32_t CallSiteTableLength = Data.getULEB128(&Offset);
156 uint64_t CallSiteTableStart = Offset;
157 uint64_t CallSiteTableEnd = CallSiteTableStart + CallSiteTableLength;
158 uint64_t CallSitePtr = CallSiteTableStart;
159 uint64_t ActionTableStart = CallSiteTableEnd;
160
161 if (opts::PrintExceptions) {
162 outs() << "CallSite Encoding = " << (unsigned)CallSiteEncoding << '\n';
163 outs() << "CallSite table length = " << CallSiteTableLength << '\n';
164 outs() << '\n';
165 }
166
167 this->HasEHRanges = CallSitePtr < CallSiteTableEnd;
168 const uint64_t RangeBase = getAddress();
169 while (CallSitePtr < CallSiteTableEnd) {
170 uint64_t Start = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
171 CallSitePtr + LSDASectionAddress);
172 uint64_t Length = *Data.getEncodedPointer(&CallSitePtr, CallSiteEncoding,
173 CallSitePtr + LSDASectionAddress);
174 uint64_t LandingPad = *Data.getEncodedPointer(
175 &CallSitePtr, CallSiteEncoding, CallSitePtr + LSDASectionAddress);
176 uint64_t ActionEntry = Data.getULEB128(&CallSitePtr);
177
178 uint64_t LPOffset = LPStart + LandingPad;
179 uint64_t LPAddress = Address + LPOffset;
180
181 // Verify if landing pad code is located outside current function
182 // Support landing pad to builtin_unreachable
183 if (LPAddress < Address || LPAddress > Address + getSize()) {
184 BinaryFunction *Fragment =
185 BC.getBinaryFunctionContainingAddress(LPAddress);
186 assert(Fragment != nullptr &&
187 "BOLT-ERROR: cannot find landing pad fragment");
188 BC.addInterproceduralReference(this, Fragment->getAddress());
189 BC.processInterproceduralReferences();
190 auto isFragmentOf = [](BinaryFunction *Fragment,
191 BinaryFunction *Parent) -> bool {
192 return (Fragment->isFragment() && Fragment->isParentFragment(Parent));
193 };
194 assert((isFragmentOf(this, Fragment) || isFragmentOf(Fragment, this)) &&
195 "BOLT-ERROR: cannot have landing pads in different "
196 "functions");
197 setHasIndirectTargetToSplitFragment(true);
198 BC.addFragmentsToSkip(this);
199 return;
200 }
201
202 if (opts::PrintExceptions) {
203 outs() << "Call Site: [0x" << Twine::utohexstr(RangeBase + Start)
204 << ", 0x" << Twine::utohexstr(RangeBase + Start + Length)
205 << "); landing pad: 0x" << Twine::utohexstr(LPOffset)
206 << "; action entry: 0x" << Twine::utohexstr(ActionEntry) << "\n";
207 outs() << " current offset is " << (CallSitePtr - CallSiteTableStart)
208 << '\n';
209 }
210
211 // Create a handler entry if necessary.
212 MCSymbol *LPSymbol = nullptr;
213 if (LPOffset) {
214 if (!getInstructionAtOffset(LPOffset)) {
215 if (opts::Verbosity >= 1)
216 errs() << "BOLT-WARNING: landing pad " << Twine::utohexstr(LPOffset)
217 << " not pointing to an instruction in function " << *this
218 << " - ignoring.\n";
219 } else {
220 auto Label = Labels.find(LPOffset);
221 if (Label != Labels.end()) {
222 LPSymbol = Label->second;
223 } else {
224 LPSymbol = BC.Ctx->createNamedTempSymbol("LP");
225 Labels[LPOffset] = LPSymbol;
226 }
227 }
228 }
229
230 // Mark all call instructions in the range.
231 auto II = Instructions.find(Start);
232 auto IE = Instructions.end();
233 assert(II != IE && "exception range not pointing to an instruction");
234 do {
235 MCInst &Instruction = II->second;
236 if (BC.MIB->isCall(Instruction) &&
237 !BC.MIB->getConditionalTailCall(Instruction)) {
238 assert(!BC.MIB->isInvoke(Instruction) &&
239 "overlapping exception ranges detected");
240 // Add extra operands to a call instruction making it an invoke from
241 // now on.
242 BC.MIB->addEHInfo(Instruction,
243 MCPlus::MCLandingPad(LPSymbol, ActionEntry));
244 }
245 ++II;
246 } while (II != IE && II->first < Start + Length);
247
248 if (ActionEntry != 0) {
249 auto printType = [&](int Index, raw_ostream &OS) {
250 assert(Index > 0 && "only positive indices are valid");
251 uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
252 const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
253 uint64_t TypeAddress =
254 *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
255 if ((TTypeEncoding & DW_EH_PE_pcrel) && TypeAddress == TTEntryAddress)
256 TypeAddress = 0;
257 if (TypeAddress == 0) {
258 OS << "<all>";
259 return;
260 }
261 if (TTypeEncoding & DW_EH_PE_indirect) {
262 ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
263 assert(PointerOrErr && "failed to decode indirect address");
264 TypeAddress = *PointerOrErr;
265 }
266 if (BinaryData *TypeSymBD = BC.getBinaryDataAtAddress(TypeAddress))
267 OS << TypeSymBD->getName();
268 else
269 OS << "0x" << Twine::utohexstr(TypeAddress);
270 };
271 if (opts::PrintExceptions)
272 outs() << " actions: ";
273 uint64_t ActionPtr = ActionTableStart + ActionEntry - 1;
274 int64_t ActionType;
275 int64_t ActionNext;
276 const char *Sep = "";
277 do {
278 ActionType = Data.getSLEB128(&ActionPtr);
279 const uint32_t Self = ActionPtr;
280 ActionNext = Data.getSLEB128(&ActionPtr);
281 if (opts::PrintExceptions)
282 outs() << Sep << "(" << ActionType << ", " << ActionNext << ") ";
283 if (ActionType == 0) {
284 if (opts::PrintExceptions)
285 outs() << "cleanup";
286 } else if (ActionType > 0) {
287 // It's an index into a type table.
288 MaxTypeIndex =
289 std::max(MaxTypeIndex, static_cast<unsigned>(ActionType));
290 if (opts::PrintExceptions) {
291 outs() << "catch type ";
292 printType(ActionType, outs());
293 }
294 } else { // ActionType < 0
295 if (opts::PrintExceptions)
296 outs() << "filter exception types ";
297 const char *TSep = "";
298 // ActionType is a negative *byte* offset into *uleb128-encoded* table
299 // of indices with base 1.
300 // E.g. -1 means offset 0, -2 is offset 1, etc. The indices are
301 // encoded using uleb128 thus we cannot directly dereference them.
302 uint64_t TypeIndexTablePtr = TypeIndexTableStart - ActionType - 1;
303 while (uint64_t Index = Data.getULEB128(&TypeIndexTablePtr)) {
304 MaxTypeIndex = std::max(MaxTypeIndex, static_cast<unsigned>(Index));
305 if (opts::PrintExceptions) {
306 outs() << TSep;
307 printType(Index, outs());
308 TSep = ", ";
309 }
310 }
311 MaxTypeIndexTableOffset = std::max(
312 MaxTypeIndexTableOffset, TypeIndexTablePtr - TypeIndexTableStart);
313 }
314
315 Sep = "; ";
316
317 ActionPtr = Self + ActionNext;
318 } while (ActionNext);
319 if (opts::PrintExceptions)
320 outs() << '\n';
321 }
322 }
323 if (opts::PrintExceptions)
324 outs() << '\n';
325
326 assert(TypeIndexTableStart + MaxTypeIndexTableOffset <=
327 Data.getData().size() &&
328 "LSDA entry has crossed section boundary");
329
330 if (TTypeEnd) {
331 LSDAActionTable = LSDASectionData.slice(
332 ActionTableStart, TypeIndexTableStart -
333 MaxTypeIndex * TTypeEncodingSize -
334 ActionTableStart);
335 for (unsigned Index = 1; Index <= MaxTypeIndex; ++Index) {
336 uint64_t TTEntry = TypeTableStart - Index * TTypeEncodingSize;
337 const uint64_t TTEntryAddress = TTEntry + LSDASectionAddress;
338 uint64_t TypeAddress =
339 *Data.getEncodedPointer(&TTEntry, TTypeEncoding, TTEntryAddress);
340 if ((TTypeEncoding & DW_EH_PE_pcrel) && (TypeAddress == TTEntryAddress))
341 TypeAddress = 0;
342 if (TTypeEncoding & DW_EH_PE_indirect) {
343 LSDATypeAddressTable.emplace_back(TypeAddress);
344 if (TypeAddress) {
345 ErrorOr<uint64_t> PointerOrErr = BC.getPointerAtAddress(TypeAddress);
346 assert(PointerOrErr && "failed to decode indirect address");
347 TypeAddress = *PointerOrErr;
348 }
349 }
350 LSDATypeTable.emplace_back(TypeAddress);
351 }
352 LSDATypeIndexTable =
353 LSDASectionData.slice(TypeIndexTableStart, MaxTypeIndexTableOffset);
354 }
355 }
356
updateEHRanges()357 void BinaryFunction::updateEHRanges() {
358 if (getSize() == 0)
359 return;
360
361 assert(CurrentState == State::CFG_Finalized && "unexpected state");
362
363 // Build call sites table.
364 struct EHInfo {
365 const MCSymbol *LP; // landing pad
366 uint64_t Action;
367 };
368
369 // If previous call can throw, this is its exception handler.
370 EHInfo PreviousEH = {nullptr, 0};
371
372 // Marker for the beginning of exceptions range.
373 const MCSymbol *StartRange = nullptr;
374
375 // Indicates whether the start range is located in a cold part.
376 bool IsStartInCold = false;
377
378 // Have we crossed hot/cold border for split functions?
379 bool SeenCold = false;
380
381 // Sites to update - either regular or cold.
382 CallSitesType *Sites = &CallSites;
383
384 for (BinaryBasicBlock *BB : getLayout().blocks()) {
385
386 if (BB->isCold() && !SeenCold) {
387 SeenCold = true;
388
389 // Close the range (if any) and change the target call sites.
390 if (StartRange) {
391 Sites->emplace_back(CallSite{StartRange, getFunctionEndLabel(),
392 PreviousEH.LP, PreviousEH.Action});
393 }
394 Sites = &ColdCallSites;
395
396 // Reset the range.
397 StartRange = nullptr;
398 PreviousEH = {nullptr, 0};
399 }
400
401 for (auto II = BB->begin(); II != BB->end(); ++II) {
402 if (!BC.MIB->isCall(*II))
403 continue;
404
405 // Instruction can throw an exception that should be handled.
406 const bool Throws = BC.MIB->isInvoke(*II);
407
408 // Ignore the call if it's a continuation of a no-throw gap.
409 if (!Throws && !StartRange)
410 continue;
411
412 // Extract exception handling information from the instruction.
413 const MCSymbol *LP = nullptr;
414 uint64_t Action = 0;
415 if (const Optional<MCPlus::MCLandingPad> EHInfo = BC.MIB->getEHInfo(*II))
416 std::tie(LP, Action) = *EHInfo;
417
418 // No action if the exception handler has not changed.
419 if (Throws && StartRange && PreviousEH.LP == LP &&
420 PreviousEH.Action == Action)
421 continue;
422
423 // Same symbol is used for the beginning and the end of the range.
424 const MCSymbol *EHSymbol;
425 MCInst EHLabel;
426 {
427 std::unique_lock<std::shared_timed_mutex> Lock(BC.CtxMutex);
428 EHSymbol = BC.Ctx->createNamedTempSymbol("EH");
429 BC.MIB->createEHLabel(EHLabel, EHSymbol, BC.Ctx.get());
430 }
431
432 II = std::next(BB->insertPseudoInstr(II, EHLabel));
433
434 // At this point we could be in one of the following states:
435 //
436 // I. Exception handler has changed and we need to close previous range
437 // and start a new one.
438 //
439 // II. Start a new exception range after the gap.
440 //
441 // III. Close current exception range and start a new gap.
442 const MCSymbol *EndRange;
443 if (StartRange) {
444 // I, III:
445 EndRange = EHSymbol;
446 } else {
447 // II:
448 StartRange = EHSymbol;
449 IsStartInCold = SeenCold;
450 EndRange = nullptr;
451 }
452
453 // Close the previous range.
454 if (EndRange) {
455 Sites->emplace_back(
456 CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
457 }
458
459 if (Throws) {
460 // I, II:
461 StartRange = EHSymbol;
462 IsStartInCold = SeenCold;
463 PreviousEH = EHInfo{LP, Action};
464 } else {
465 StartRange = nullptr;
466 }
467 }
468 }
469
470 // Check if we need to close the range.
471 if (StartRange) {
472 assert((!isSplit() || Sites == &ColdCallSites) && "sites mismatch");
473 const MCSymbol *EndRange =
474 IsStartInCold ? getFunctionColdEndLabel() : getFunctionEndLabel();
475 Sites->emplace_back(
476 CallSite{StartRange, EndRange, PreviousEH.LP, PreviousEH.Action});
477 }
478 }
479
480 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
481
CFIReaderWriter(const DWARFDebugFrame & EHFrame)482 CFIReaderWriter::CFIReaderWriter(const DWARFDebugFrame &EHFrame) {
483 // Prepare FDEs for fast lookup
484 for (const dwarf::FrameEntry &Entry : EHFrame.entries()) {
485 const auto *CurFDE = dyn_cast<dwarf::FDE>(&Entry);
486 // Skip CIEs.
487 if (!CurFDE)
488 continue;
489 // There could me multiple FDEs with the same initial address, and perhaps
490 // different sizes (address ranges). Use the first entry with non-zero size.
491 auto FDEI = FDEs.lower_bound(CurFDE->getInitialLocation());
492 if (FDEI != FDEs.end() && FDEI->first == CurFDE->getInitialLocation()) {
493 if (CurFDE->getAddressRange()) {
494 if (FDEI->second->getAddressRange() == 0) {
495 FDEI->second = CurFDE;
496 } else if (opts::Verbosity > 0) {
497 errs() << "BOLT-WARNING: different FDEs for function at 0x"
498 << Twine::utohexstr(FDEI->first)
499 << " detected; sizes: " << FDEI->second->getAddressRange()
500 << " and " << CurFDE->getAddressRange() << '\n';
501 }
502 }
503 } else {
504 FDEs.emplace_hint(FDEI, CurFDE->getInitialLocation(), CurFDE);
505 }
506 }
507 }
508
fillCFIInfoFor(BinaryFunction & Function) const509 bool CFIReaderWriter::fillCFIInfoFor(BinaryFunction &Function) const {
510 uint64_t Address = Function.getAddress();
511 auto I = FDEs.find(Address);
512 // Ignore zero-length FDE ranges.
513 if (I == FDEs.end() || !I->second->getAddressRange())
514 return true;
515
516 const FDE &CurFDE = *I->second;
517 Optional<uint64_t> LSDA = CurFDE.getLSDAAddress();
518 Function.setLSDAAddress(LSDA ? *LSDA : 0);
519
520 uint64_t Offset = Function.getFirstInstructionOffset();
521 uint64_t CodeAlignment = CurFDE.getLinkedCIE()->getCodeAlignmentFactor();
522 uint64_t DataAlignment = CurFDE.getLinkedCIE()->getDataAlignmentFactor();
523 if (CurFDE.getLinkedCIE()->getPersonalityAddress()) {
524 Function.setPersonalityFunction(
525 *CurFDE.getLinkedCIE()->getPersonalityAddress());
526 Function.setPersonalityEncoding(
527 *CurFDE.getLinkedCIE()->getPersonalityEncoding());
528 }
529
530 auto decodeFrameInstruction = [&Function, &Offset, Address, CodeAlignment,
531 DataAlignment](
532 const CFIProgram::Instruction &Instr) {
533 uint8_t Opcode = Instr.Opcode;
534 if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK)
535 Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK;
536 switch (Instr.Opcode) {
537 case DW_CFA_nop:
538 break;
539 case DW_CFA_advance_loc4:
540 case DW_CFA_advance_loc2:
541 case DW_CFA_advance_loc1:
542 case DW_CFA_advance_loc:
543 // Advance our current address
544 Offset += CodeAlignment * int64_t(Instr.Ops[0]);
545 break;
546 case DW_CFA_offset_extended_sf:
547 Function.addCFIInstruction(
548 Offset,
549 MCCFIInstruction::createOffset(
550 nullptr, Instr.Ops[0], DataAlignment * int64_t(Instr.Ops[1])));
551 break;
552 case DW_CFA_offset_extended:
553 case DW_CFA_offset:
554 Function.addCFIInstruction(
555 Offset, MCCFIInstruction::createOffset(nullptr, Instr.Ops[0],
556 DataAlignment * Instr.Ops[1]));
557 break;
558 case DW_CFA_restore_extended:
559 case DW_CFA_restore:
560 Function.addCFIInstruction(
561 Offset, MCCFIInstruction::createRestore(nullptr, Instr.Ops[0]));
562 break;
563 case DW_CFA_set_loc:
564 assert(Instr.Ops[0] >= Address && "set_loc out of function bounds");
565 assert(Instr.Ops[0] <= Address + Function.getSize() &&
566 "set_loc out of function bounds");
567 Offset = Instr.Ops[0] - Address;
568 break;
569
570 case DW_CFA_undefined:
571 Function.addCFIInstruction(
572 Offset, MCCFIInstruction::createUndefined(nullptr, Instr.Ops[0]));
573 break;
574 case DW_CFA_same_value:
575 Function.addCFIInstruction(
576 Offset, MCCFIInstruction::createSameValue(nullptr, Instr.Ops[0]));
577 break;
578 case DW_CFA_register:
579 Function.addCFIInstruction(
580 Offset, MCCFIInstruction::createRegister(nullptr, Instr.Ops[0],
581 Instr.Ops[1]));
582 break;
583 case DW_CFA_remember_state:
584 Function.addCFIInstruction(
585 Offset, MCCFIInstruction::createRememberState(nullptr));
586 break;
587 case DW_CFA_restore_state:
588 Function.addCFIInstruction(Offset,
589 MCCFIInstruction::createRestoreState(nullptr));
590 break;
591 case DW_CFA_def_cfa:
592 Function.addCFIInstruction(
593 Offset,
594 MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0], Instr.Ops[1]));
595 break;
596 case DW_CFA_def_cfa_sf:
597 Function.addCFIInstruction(
598 Offset,
599 MCCFIInstruction::cfiDefCfa(nullptr, Instr.Ops[0],
600 DataAlignment * int64_t(Instr.Ops[1])));
601 break;
602 case DW_CFA_def_cfa_register:
603 Function.addCFIInstruction(Offset, MCCFIInstruction::createDefCfaRegister(
604 nullptr, Instr.Ops[0]));
605 break;
606 case DW_CFA_def_cfa_offset:
607 Function.addCFIInstruction(
608 Offset, MCCFIInstruction::cfiDefCfaOffset(nullptr, Instr.Ops[0]));
609 break;
610 case DW_CFA_def_cfa_offset_sf:
611 Function.addCFIInstruction(
612 Offset, MCCFIInstruction::cfiDefCfaOffset(
613 nullptr, DataAlignment * int64_t(Instr.Ops[0])));
614 break;
615 case DW_CFA_GNU_args_size:
616 Function.addCFIInstruction(
617 Offset, MCCFIInstruction::createGnuArgsSize(nullptr, Instr.Ops[0]));
618 Function.setUsesGnuArgsSize();
619 break;
620 case DW_CFA_val_offset_sf:
621 case DW_CFA_val_offset:
622 if (opts::Verbosity >= 1) {
623 errs() << "BOLT-WARNING: DWARF val_offset() unimplemented\n";
624 }
625 return false;
626 case DW_CFA_def_cfa_expression:
627 case DW_CFA_val_expression:
628 case DW_CFA_expression: {
629 StringRef ExprBytes = Instr.Expression->getData();
630 std::string Str;
631 raw_string_ostream OS(Str);
632 // Manually encode this instruction using CFI escape
633 OS << Opcode;
634 if (Opcode != DW_CFA_def_cfa_expression)
635 encodeULEB128(Instr.Ops[0], OS);
636 encodeULEB128(ExprBytes.size(), OS);
637 OS << ExprBytes;
638 Function.addCFIInstruction(
639 Offset, MCCFIInstruction::createEscape(nullptr, OS.str()));
640 break;
641 }
642 case DW_CFA_MIPS_advance_loc8:
643 if (opts::Verbosity >= 1)
644 errs() << "BOLT-WARNING: DW_CFA_MIPS_advance_loc unimplemented\n";
645 return false;
646 case DW_CFA_GNU_window_save:
647 case DW_CFA_lo_user:
648 case DW_CFA_hi_user:
649 if (opts::Verbosity >= 1) {
650 errs() << "BOLT-WARNING: DW_CFA_GNU_* and DW_CFA_*_user "
651 "unimplemented\n";
652 }
653 return false;
654 default:
655 if (opts::Verbosity >= 1) {
656 errs() << "BOLT-WARNING: Unrecognized CFI instruction: " << Instr.Opcode
657 << '\n';
658 }
659 return false;
660 }
661
662 return true;
663 };
664
665 for (const CFIProgram::Instruction &Instr : CurFDE.getLinkedCIE()->cfis())
666 if (!decodeFrameInstruction(Instr))
667 return false;
668
669 for (const CFIProgram::Instruction &Instr : CurFDE.cfis())
670 if (!decodeFrameInstruction(Instr))
671 return false;
672
673 return true;
674 }
675
generateEHFrameHeader(const DWARFDebugFrame & OldEHFrame,const DWARFDebugFrame & NewEHFrame,uint64_t EHFrameHeaderAddress,std::vector<uint64_t> & FailedAddresses) const676 std::vector<char> CFIReaderWriter::generateEHFrameHeader(
677 const DWARFDebugFrame &OldEHFrame, const DWARFDebugFrame &NewEHFrame,
678 uint64_t EHFrameHeaderAddress,
679 std::vector<uint64_t> &FailedAddresses) const {
680 // Common PC -> FDE map to be written into .eh_frame_hdr.
681 std::map<uint64_t, uint64_t> PCToFDE;
682
683 // Presort array for binary search.
684 llvm::sort(FailedAddresses);
685
686 // Initialize PCToFDE using NewEHFrame.
687 for (dwarf::FrameEntry &Entry : NewEHFrame.entries()) {
688 const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
689 if (FDE == nullptr)
690 continue;
691 const uint64_t FuncAddress = FDE->getInitialLocation();
692 const uint64_t FDEAddress =
693 NewEHFrame.getEHFrameAddress() + FDE->getOffset();
694
695 // Ignore unused FDEs.
696 if (FuncAddress == 0)
697 continue;
698
699 // Add the address to the map unless we failed to write it.
700 if (!std::binary_search(FailedAddresses.begin(), FailedAddresses.end(),
701 FuncAddress)) {
702 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: FDE for function at 0x"
703 << Twine::utohexstr(FuncAddress) << " is at 0x"
704 << Twine::utohexstr(FDEAddress) << '\n');
705 PCToFDE[FuncAddress] = FDEAddress;
706 }
707 };
708
709 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: new .eh_frame contains "
710 << llvm::size(NewEHFrame.entries()) << " entries\n");
711
712 // Add entries from the original .eh_frame corresponding to the functions
713 // that we did not update.
714 for (const dwarf::FrameEntry &Entry : OldEHFrame) {
715 const dwarf::FDE *FDE = dyn_cast<dwarf::FDE>(&Entry);
716 if (FDE == nullptr)
717 continue;
718 const uint64_t FuncAddress = FDE->getInitialLocation();
719 const uint64_t FDEAddress =
720 OldEHFrame.getEHFrameAddress() + FDE->getOffset();
721
722 // Add the address if we failed to write it.
723 if (PCToFDE.count(FuncAddress) == 0) {
724 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old FDE for function at 0x"
725 << Twine::utohexstr(FuncAddress) << " is at 0x"
726 << Twine::utohexstr(FDEAddress) << '\n');
727 PCToFDE[FuncAddress] = FDEAddress;
728 }
729 };
730
731 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: old .eh_frame contains "
732 << llvm::size(OldEHFrame.entries()) << " entries\n");
733
734 // Generate a new .eh_frame_hdr based on the new map.
735
736 // Header plus table of entries of size 8 bytes.
737 std::vector<char> EHFrameHeader(12 + PCToFDE.size() * 8);
738
739 // Version is 1.
740 EHFrameHeader[0] = 1;
741 // Encoding of the eh_frame pointer.
742 EHFrameHeader[1] = DW_EH_PE_pcrel | DW_EH_PE_sdata4;
743 // Encoding of the count field to follow.
744 EHFrameHeader[2] = DW_EH_PE_udata4;
745 // Encoding of the table entries - 4-byte offset from the start of the header.
746 EHFrameHeader[3] = DW_EH_PE_datarel | DW_EH_PE_sdata4;
747
748 // Address of eh_frame. Use the new one.
749 support::ulittle32_t::ref(EHFrameHeader.data() + 4) =
750 NewEHFrame.getEHFrameAddress() - (EHFrameHeaderAddress + 4);
751
752 // Number of entries in the table (FDE count).
753 support::ulittle32_t::ref(EHFrameHeader.data() + 8) = PCToFDE.size();
754
755 // Write the table at offset 12.
756 char *Ptr = EHFrameHeader.data();
757 uint32_t Offset = 12;
758 for (const auto &PCI : PCToFDE) {
759 int64_t InitialPCOffset = PCI.first - EHFrameHeaderAddress;
760 assert(isInt<32>(InitialPCOffset) && "PC offset out of bounds");
761 support::ulittle32_t::ref(Ptr + Offset) = InitialPCOffset;
762 Offset += 4;
763 int64_t FDEOffset = PCI.second - EHFrameHeaderAddress;
764 assert(isInt<32>(FDEOffset) && "FDE offset out of bounds");
765 support::ulittle32_t::ref(Ptr + Offset) = FDEOffset;
766 Offset += 4;
767 }
768
769 return EHFrameHeader;
770 }
771
parseCIE(uint64_t StartOffset)772 Error EHFrameParser::parseCIE(uint64_t StartOffset) {
773 uint8_t Version = Data.getU8(&Offset);
774 const char *Augmentation = Data.getCStr(&Offset);
775 StringRef AugmentationString(Augmentation ? Augmentation : "");
776 uint8_t AddressSize =
777 Version < 4 ? Data.getAddressSize() : Data.getU8(&Offset);
778 Data.setAddressSize(AddressSize);
779 // Skip segment descriptor size
780 if (Version >= 4)
781 Offset += 1;
782 // Skip code alignment factor
783 Data.getULEB128(&Offset);
784 // Skip data alignment
785 Data.getSLEB128(&Offset);
786 // Skip return address register
787 if (Version == 1)
788 Offset += 1;
789 else
790 Data.getULEB128(&Offset);
791
792 uint32_t FDEPointerEncoding = DW_EH_PE_absptr;
793 uint32_t LSDAPointerEncoding = DW_EH_PE_omit;
794 // Walk the augmentation string to get all the augmentation data.
795 for (unsigned i = 0, e = AugmentationString.size(); i != e; ++i) {
796 switch (AugmentationString[i]) {
797 default:
798 return createStringError(
799 errc::invalid_argument,
800 "unknown augmentation character in entry at 0x%" PRIx64, StartOffset);
801 case 'L':
802 LSDAPointerEncoding = Data.getU8(&Offset);
803 break;
804 case 'P': {
805 uint32_t PersonalityEncoding = Data.getU8(&Offset);
806 Optional<uint64_t> Personality =
807 Data.getEncodedPointer(&Offset, PersonalityEncoding,
808 EHFrameAddress ? EHFrameAddress + Offset : 0);
809 // Patch personality address
810 if (Personality)
811 PatcherCallback(*Personality, Offset, PersonalityEncoding);
812 break;
813 }
814 case 'R':
815 FDEPointerEncoding = Data.getU8(&Offset);
816 break;
817 case 'z':
818 if (i)
819 return createStringError(
820 errc::invalid_argument,
821 "'z' must be the first character at 0x%" PRIx64, StartOffset);
822 // Skip augmentation length
823 Data.getULEB128(&Offset);
824 break;
825 case 'S':
826 case 'B':
827 break;
828 }
829 }
830 Entries.emplace_back(std::make_unique<CIEInfo>(
831 FDEPointerEncoding, LSDAPointerEncoding, AugmentationString));
832 CIEs[StartOffset] = &*Entries.back();
833 return Error::success();
834 }
835
parseFDE(uint64_t CIEPointer,uint64_t StartStructureOffset)836 Error EHFrameParser::parseFDE(uint64_t CIEPointer,
837 uint64_t StartStructureOffset) {
838 Optional<uint64_t> LSDAAddress;
839 CIEInfo *Cie = CIEs[StartStructureOffset - CIEPointer];
840
841 // The address size is encoded in the CIE we reference.
842 if (!Cie)
843 return createStringError(errc::invalid_argument,
844 "parsing FDE data at 0x%" PRIx64
845 " failed due to missing CIE",
846 StartStructureOffset);
847 // Patch initial location
848 if (auto Val = Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding,
849 EHFrameAddress + Offset)) {
850 PatcherCallback(*Val, Offset, Cie->FDEPtrEncoding);
851 }
852 // Skip address range
853 Data.getEncodedPointer(&Offset, Cie->FDEPtrEncoding, 0);
854
855 // Process augmentation data for this FDE.
856 StringRef AugmentationString = Cie->AugmentationString;
857 if (!AugmentationString.empty() && Cie->LSDAPtrEncoding != DW_EH_PE_omit) {
858 // Skip augmentation length
859 Data.getULEB128(&Offset);
860 LSDAAddress =
861 Data.getEncodedPointer(&Offset, Cie->LSDAPtrEncoding,
862 EHFrameAddress ? Offset + EHFrameAddress : 0);
863 // Patch LSDA address
864 PatcherCallback(*LSDAAddress, Offset, Cie->LSDAPtrEncoding);
865 }
866 return Error::success();
867 }
868
parse()869 Error EHFrameParser::parse() {
870 while (Data.isValidOffset(Offset)) {
871 const uint64_t StartOffset = Offset;
872
873 uint64_t Length;
874 DwarfFormat Format;
875 std::tie(Length, Format) = Data.getInitialLength(&Offset);
876
877 // If the Length is 0, then this CIE is a terminator
878 if (Length == 0)
879 break;
880
881 const uint64_t StartStructureOffset = Offset;
882 const uint64_t EndStructureOffset = Offset + Length;
883
884 Error Err = Error::success();
885 const uint64_t Id = Data.getRelocatedValue(4, &Offset,
886 /*SectionIndex=*/nullptr, &Err);
887 if (Err)
888 return Err;
889
890 if (!Id) {
891 if (Error Err = parseCIE(StartOffset))
892 return Err;
893 } else {
894 if (Error Err = parseFDE(Id, StartStructureOffset))
895 return Err;
896 }
897 Offset = EndStructureOffset;
898 }
899
900 return Error::success();
901 }
902
parse(DWARFDataExtractor Data,uint64_t EHFrameAddress,PatcherCallbackTy PatcherCallback)903 Error EHFrameParser::parse(DWARFDataExtractor Data, uint64_t EHFrameAddress,
904 PatcherCallbackTy PatcherCallback) {
905 EHFrameParser Parser(Data, EHFrameAddress, PatcherCallback);
906 return Parser.parse();
907 }
908
909 } // namespace bolt
910 } // namespace llvm
911