1 //=--------- MachOLinkGraphBuilder.cpp - MachO LinkGraph builder ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Generic MachO LinkGraph buliding code.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "MachOLinkGraphBuilder.h"
14 
15 #define DEBUG_TYPE "jitlink"
16 
17 static const char *CommonSectionName = "__common";
18 
19 namespace llvm {
20 namespace jitlink {
21 
22 MachOLinkGraphBuilder::~MachOLinkGraphBuilder() {}
23 
24 Expected<std::unique_ptr<LinkGraph>> MachOLinkGraphBuilder::buildGraph() {
25 
26   // Sanity check: we only operate on relocatable objects.
27   if (!Obj.isRelocatableObject())
28     return make_error<JITLinkError>("Object is not a relocatable MachO");
29 
30   if (auto Err = createNormalizedSections())
31     return std::move(Err);
32 
33   if (auto Err = createNormalizedSymbols())
34     return std::move(Err);
35 
36   if (auto Err = graphifyRegularSymbols())
37     return std::move(Err);
38 
39   if (auto Err = graphifySectionsWithCustomParsers())
40     return std::move(Err);
41 
42   if (auto Err = addRelocations())
43     return std::move(Err);
44 
45   return std::move(G);
46 }
47 
48 MachOLinkGraphBuilder::MachOLinkGraphBuilder(const object::MachOObjectFile &Obj)
49     : Obj(Obj),
50       G(std::make_unique<LinkGraph>(std::string(Obj.getFileName()),
51                                     getPointerSize(Obj), getEndianness(Obj))) {}
52 
53 void MachOLinkGraphBuilder::addCustomSectionParser(
54     StringRef SectionName, SectionParserFunction Parser) {
55   assert(!CustomSectionParserFunctions.count(SectionName) &&
56          "Custom parser for this section already exists");
57   CustomSectionParserFunctions[SectionName] = std::move(Parser);
58 }
59 
60 Linkage MachOLinkGraphBuilder::getLinkage(uint16_t Desc) {
61   if ((Desc & MachO::N_WEAK_DEF) || (Desc & MachO::N_WEAK_REF))
62     return Linkage::Weak;
63   return Linkage::Strong;
64 }
65 
66 Scope MachOLinkGraphBuilder::getScope(StringRef Name, uint8_t Type) {
67   if (Type & MachO::N_EXT) {
68     if ((Type & MachO::N_PEXT) || Name.startswith("l"))
69       return Scope::Hidden;
70     else
71       return Scope::Default;
72   }
73   return Scope::Local;
74 }
75 
76 bool MachOLinkGraphBuilder::isAltEntry(const NormalizedSymbol &NSym) {
77   return NSym.Desc & MachO::N_ALT_ENTRY;
78 }
79 
80 bool MachOLinkGraphBuilder::isDebugSection(const NormalizedSection &NSec) {
81   return (NSec.Flags & MachO::S_ATTR_DEBUG &&
82           strcmp(NSec.SegName, "__DWARF") == 0);
83 }
84 
85 unsigned
86 MachOLinkGraphBuilder::getPointerSize(const object::MachOObjectFile &Obj) {
87   return Obj.is64Bit() ? 8 : 4;
88 }
89 
90 support::endianness
91 MachOLinkGraphBuilder::getEndianness(const object::MachOObjectFile &Obj) {
92   return Obj.isLittleEndian() ? support::little : support::big;
93 }
94 
95 Section &MachOLinkGraphBuilder::getCommonSection() {
96   if (!CommonSection) {
97     auto Prot = static_cast<sys::Memory::ProtectionFlags>(
98         sys::Memory::MF_READ | sys::Memory::MF_WRITE);
99     CommonSection = &G->createSection(CommonSectionName, Prot);
100   }
101   return *CommonSection;
102 }
103 
104 Error MachOLinkGraphBuilder::createNormalizedSections() {
105   // Build normalized sections. Verifies that section data is in-range (for
106   // sections with content) and that address ranges are non-overlapping.
107 
108   LLVM_DEBUG(dbgs() << "Creating normalized sections...\n");
109 
110   for (auto &SecRef : Obj.sections()) {
111     NormalizedSection NSec;
112     uint32_t DataOffset = 0;
113 
114     auto SecIndex = Obj.getSectionIndex(SecRef.getRawDataRefImpl());
115 
116     auto Name = SecRef.getName();
117     if (!Name)
118       return Name.takeError();
119 
120     if (Obj.is64Bit()) {
121       const MachO::section_64 &Sec64 =
122           Obj.getSection64(SecRef.getRawDataRefImpl());
123 
124       memcpy(&NSec.SectName, &Sec64.sectname, 16);
125       NSec.SectName[16] = '\0';
126       memcpy(&NSec.SegName, Sec64.segname, 16);
127       NSec.SegName[16] = '\0';
128 
129       NSec.Address = Sec64.addr;
130       NSec.Size = Sec64.size;
131       NSec.Alignment = 1ULL << Sec64.align;
132       NSec.Flags = Sec64.flags;
133       DataOffset = Sec64.offset;
134     } else {
135       const MachO::section &Sec32 = Obj.getSection(SecRef.getRawDataRefImpl());
136 
137       memcpy(&NSec.SectName, &Sec32.sectname, 16);
138       NSec.SectName[16] = '\0';
139       memcpy(&NSec.SegName, Sec32.segname, 16);
140       NSec.SegName[16] = '\0';
141 
142       NSec.Address = Sec32.addr;
143       NSec.Size = Sec32.size;
144       NSec.Alignment = 1ULL << Sec32.align;
145       NSec.Flags = Sec32.flags;
146       DataOffset = Sec32.offset;
147     }
148 
149     LLVM_DEBUG({
150       dbgs() << "  " << *Name << ": " << formatv("{0:x16}", NSec.Address)
151              << " -- " << formatv("{0:x16}", NSec.Address + NSec.Size)
152              << ", align: " << NSec.Alignment << ", index: " << SecIndex
153              << "\n";
154     });
155 
156     // Get the section data if any.
157     {
158       unsigned SectionType = NSec.Flags & MachO::SECTION_TYPE;
159       if (SectionType != MachO::S_ZEROFILL &&
160           SectionType != MachO::S_GB_ZEROFILL) {
161 
162         if (DataOffset + NSec.Size > Obj.getData().size())
163           return make_error<JITLinkError>(
164               "Section data extends past end of file");
165 
166         NSec.Data = Obj.getData().data() + DataOffset;
167       }
168     }
169 
170     // Get prot flags.
171     // FIXME: Make sure this test is correct (it's probably missing cases
172     // as-is).
173     sys::Memory::ProtectionFlags Prot;
174     if (NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS)
175       Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
176                                                        sys::Memory::MF_EXEC);
177     else
178       Prot = static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
179                                                        sys::Memory::MF_WRITE);
180 
181     if (!isDebugSection(NSec))
182       NSec.GraphSection = &G->createSection(*Name, Prot);
183     else
184       LLVM_DEBUG({
185         dbgs() << "    " << *Name
186                << " is a debug section: No graph section will be created.\n";
187       });
188 
189     IndexToSection.insert(std::make_pair(SecIndex, std::move(NSec)));
190   }
191 
192   std::vector<NormalizedSection *> Sections;
193   Sections.reserve(IndexToSection.size());
194   for (auto &KV : IndexToSection)
195     Sections.push_back(&KV.second);
196 
197   // If we didn't end up creating any sections then bail out. The code below
198   // assumes that we have at least one section.
199   if (Sections.empty())
200     return Error::success();
201 
202   llvm::sort(Sections,
203              [](const NormalizedSection *LHS, const NormalizedSection *RHS) {
204                assert(LHS && RHS && "Null section?");
205                if (LHS->Address != RHS->Address)
206                  return LHS->Address < RHS->Address;
207                return LHS->Size < RHS->Size;
208              });
209 
210   for (unsigned I = 0, E = Sections.size() - 1; I != E; ++I) {
211     auto &Cur = *Sections[I];
212     auto &Next = *Sections[I + 1];
213     if (Next.Address < Cur.Address + Cur.Size)
214       return make_error<JITLinkError>(
215           "Address range for section " +
216           formatv("\"{0}/{1}\" [ {2:x16} -- {3:x16} ] ", Cur.SegName,
217                   Cur.SectName, Cur.Address, Cur.Address + Cur.Size) +
218           "overlaps section \"" + Next.SegName + "/" + Next.SectName + "\"" +
219           formatv("\"{0}/{1}\" [ {2:x16} -- {3:x16} ] ", Next.SegName,
220                   Next.SectName, Next.Address, Next.Address + Next.Size));
221   }
222 
223   return Error::success();
224 }
225 
226 Error MachOLinkGraphBuilder::createNormalizedSymbols() {
227   LLVM_DEBUG(dbgs() << "Creating normalized symbols...\n");
228 
229   for (auto &SymRef : Obj.symbols()) {
230 
231     unsigned SymbolIndex = Obj.getSymbolIndex(SymRef.getRawDataRefImpl());
232     uint64_t Value;
233     uint32_t NStrX;
234     uint8_t Type;
235     uint8_t Sect;
236     uint16_t Desc;
237 
238     if (Obj.is64Bit()) {
239       const MachO::nlist_64 &NL64 =
240           Obj.getSymbol64TableEntry(SymRef.getRawDataRefImpl());
241       Value = NL64.n_value;
242       NStrX = NL64.n_strx;
243       Type = NL64.n_type;
244       Sect = NL64.n_sect;
245       Desc = NL64.n_desc;
246     } else {
247       const MachO::nlist &NL32 =
248           Obj.getSymbolTableEntry(SymRef.getRawDataRefImpl());
249       Value = NL32.n_value;
250       NStrX = NL32.n_strx;
251       Type = NL32.n_type;
252       Sect = NL32.n_sect;
253       Desc = NL32.n_desc;
254     }
255 
256     // Skip stabs.
257     // FIXME: Are there other symbols we should be skipping?
258     if (Type & MachO::N_STAB)
259       continue;
260 
261     Optional<StringRef> Name;
262     if (NStrX) {
263       if (auto NameOrErr = SymRef.getName())
264         Name = *NameOrErr;
265       else
266         return NameOrErr.takeError();
267     }
268 
269     LLVM_DEBUG({
270       dbgs() << "  ";
271       if (!Name)
272         dbgs() << "<anonymous symbol>";
273       else
274         dbgs() << *Name;
275       dbgs() << ": value = " << formatv("{0:x16}", Value)
276              << ", type = " << formatv("{0:x2}", Type)
277              << ", desc = " << formatv("{0:x4}", Desc) << ", sect = ";
278       if (Sect)
279         dbgs() << static_cast<unsigned>(Sect - 1);
280       else
281         dbgs() << "none";
282       dbgs() << "\n";
283     });
284 
285     // If this symbol has a section, sanity check that the addresses line up.
286     if (Sect != 0) {
287       auto NSec = findSectionByIndex(Sect - 1);
288       if (!NSec)
289         return NSec.takeError();
290 
291       if (Value < NSec->Address || Value > NSec->Address + NSec->Size)
292         return make_error<JITLinkError>("Symbol address does not fall within "
293                                         "section");
294 
295       if (!NSec->GraphSection) {
296         LLVM_DEBUG({
297           dbgs() << "  Skipping: Symbol is in section " << NSec->SegName << "/"
298                  << NSec->SectName
299                  << " which has no associated graph section.\n";
300         });
301         continue;
302       }
303     }
304 
305     IndexToSymbol[SymbolIndex] =
306         &createNormalizedSymbol(*Name, Value, Type, Sect, Desc,
307                                 getLinkage(Desc), getScope(*Name, Type));
308   }
309 
310   return Error::success();
311 }
312 
313 void MachOLinkGraphBuilder::addSectionStartSymAndBlock(
314     Section &GraphSec, uint64_t Address, const char *Data, uint64_t Size,
315     uint32_t Alignment, bool IsLive) {
316   Block &B =
317       Data ? G->createContentBlock(GraphSec, StringRef(Data, Size), Address,
318                                    Alignment, 0)
319            : G->createZeroFillBlock(GraphSec, Size, Address, Alignment, 0);
320   auto &Sym = G->addAnonymousSymbol(B, 0, Size, false, IsLive);
321   assert(!AddrToCanonicalSymbol.count(Sym.getAddress()) &&
322          "Anonymous block start symbol clashes with existing symbol address");
323   AddrToCanonicalSymbol[Sym.getAddress()] = &Sym;
324 }
325 
326 Error MachOLinkGraphBuilder::graphifyRegularSymbols() {
327 
328   LLVM_DEBUG(dbgs() << "Creating graph symbols...\n");
329 
330   /// We only have 256 section indexes: Use a vector rather than a map.
331   std::vector<std::vector<NormalizedSymbol *>> SecIndexToSymbols;
332   SecIndexToSymbols.resize(256);
333 
334   // Create commons, externs, and absolutes, and partition all other symbols by
335   // section.
336   for (auto &KV : IndexToSymbol) {
337     auto &NSym = *KV.second;
338 
339     switch (NSym.Type & MachO::N_TYPE) {
340     case MachO::N_UNDF:
341       if (NSym.Value) {
342         if (!NSym.Name)
343           return make_error<JITLinkError>("Anonymous common symbol at index " +
344                                           Twine(KV.first));
345         NSym.GraphSymbol = &G->addCommonSymbol(
346             *NSym.Name, NSym.S, getCommonSection(), 0, NSym.Value,
347             1ull << MachO::GET_COMM_ALIGN(NSym.Desc),
348             NSym.Desc & MachO::N_NO_DEAD_STRIP);
349       } else {
350         if (!NSym.Name)
351           return make_error<JITLinkError>("Anonymous external symbol at "
352                                           "index " +
353                                           Twine(KV.first));
354         NSym.GraphSymbol = &G->addExternalSymbol(
355             *NSym.Name, 0,
356             NSym.Desc & MachO::N_WEAK_REF ? Linkage::Weak : Linkage::Strong);
357       }
358       break;
359     case MachO::N_ABS:
360       if (!NSym.Name)
361         return make_error<JITLinkError>("Anonymous absolute symbol at index " +
362                                         Twine(KV.first));
363       NSym.GraphSymbol = &G->addAbsoluteSymbol(
364           *NSym.Name, NSym.Value, 0, Linkage::Strong, Scope::Default,
365           NSym.Desc & MachO::N_NO_DEAD_STRIP);
366       break;
367     case MachO::N_SECT:
368       SecIndexToSymbols[NSym.Sect - 1].push_back(&NSym);
369       break;
370     case MachO::N_PBUD:
371       return make_error<JITLinkError>(
372           "Unupported N_PBUD symbol " +
373           (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("<anon>")) +
374           " at index " + Twine(KV.first));
375     case MachO::N_INDR:
376       return make_error<JITLinkError>(
377           "Unupported N_INDR symbol " +
378           (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("<anon>")) +
379           " at index " + Twine(KV.first));
380     default:
381       return make_error<JITLinkError>(
382           "Unrecognized symbol type " + Twine(NSym.Type & MachO::N_TYPE) +
383           " for symbol " +
384           (NSym.Name ? ("\"" + *NSym.Name + "\"") : Twine("<anon>")) +
385           " at index " + Twine(KV.first));
386     }
387   }
388 
389   // Loop over sections performing regular graphification for those that
390   // don't have custom parsers.
391   for (auto &KV : IndexToSection) {
392     auto SecIndex = KV.first;
393     auto &NSec = KV.second;
394 
395     if (!NSec.GraphSection) {
396       LLVM_DEBUG({
397         dbgs() << "  " << NSec.SegName << "/" << NSec.SectName
398                << " has no graph section. Skipping.\n";
399       });
400       continue;
401     }
402 
403     // Skip sections with custom parsers.
404     if (CustomSectionParserFunctions.count(NSec.GraphSection->getName())) {
405       LLVM_DEBUG({
406         dbgs() << "  Skipping section " << NSec.GraphSection->getName()
407                << " as it has a custom parser.\n";
408       });
409       continue;
410     } else
411       LLVM_DEBUG({
412         dbgs() << "  Processing section " << NSec.GraphSection->getName()
413                << "...\n";
414       });
415 
416     bool SectionIsNoDeadStrip = NSec.Flags & MachO::S_ATTR_NO_DEAD_STRIP;
417     bool SectionIsText = NSec.Flags & MachO::S_ATTR_PURE_INSTRUCTIONS;
418 
419     auto &SecNSymStack = SecIndexToSymbols[SecIndex];
420 
421     // If this section is non-empty but there are no symbols covering it then
422     // create one block and anonymous symbol to cover the entire section.
423     if (SecNSymStack.empty()) {
424       if (NSec.Size > 0) {
425         LLVM_DEBUG({
426           dbgs() << "    Section non-empty, but contains no symbols. "
427                     "Creating anonymous block to cover "
428                  << formatv("{0:x16}", NSec.Address) << " -- "
429                  << formatv("{0:x16}", NSec.Address + NSec.Size) << "\n";
430         });
431         addSectionStartSymAndBlock(*NSec.GraphSection, NSec.Address, NSec.Data,
432                                    NSec.Size, NSec.Alignment,
433                                    SectionIsNoDeadStrip);
434       } else
435         LLVM_DEBUG({
436           dbgs() << "    Section empty and contains no symbols. Skipping.\n";
437         });
438       continue;
439     }
440 
441     // Sort the symbol stack in by address, alt-entry status, scope, and name.
442     // We sort in reverse order so that symbols will be visited in the right
443     // order when we pop off the stack below.
444     llvm::sort(SecNSymStack, [](const NormalizedSymbol *LHS,
445                                 const NormalizedSymbol *RHS) {
446       if (LHS->Value != RHS->Value)
447         return LHS->Value > RHS->Value;
448       if (isAltEntry(*LHS) != isAltEntry(*RHS))
449         return isAltEntry(*RHS);
450       if (LHS->S != RHS->S)
451         return static_cast<uint8_t>(LHS->S) < static_cast<uint8_t>(RHS->S);
452       return LHS->Name < RHS->Name;
453     });
454 
455     // The first symbol in a section can not be an alt-entry symbol.
456     if (!SecNSymStack.empty() && isAltEntry(*SecNSymStack.back()))
457       return make_error<JITLinkError>(
458           "First symbol in " + NSec.GraphSection->getName() + " is alt-entry");
459 
460     // If the section is non-empty but there is no symbol covering the start
461     // address then add an anonymous one.
462     if (SecNSymStack.back()->Value != NSec.Address) {
463       auto AnonBlockSize = SecNSymStack.back()->Value - NSec.Address;
464       LLVM_DEBUG({
465         dbgs() << "    Section start not covered by symbol. "
466                << "Creating anonymous block to cover [ "
467                << formatv("{0:x16}", NSec.Address) << " -- "
468                << formatv("{0:x16}", NSec.Address + AnonBlockSize) << " ]\n";
469       });
470       addSectionStartSymAndBlock(*NSec.GraphSection, NSec.Address, NSec.Data,
471                                  AnonBlockSize, NSec.Alignment,
472                                  SectionIsNoDeadStrip);
473     }
474 
475     // Visit section symbols in order by popping off the reverse-sorted stack,
476     // building blocks for each alt-entry chain and creating symbols as we go.
477     while (!SecNSymStack.empty()) {
478       SmallVector<NormalizedSymbol *, 8> BlockSyms;
479 
480       BlockSyms.push_back(SecNSymStack.back());
481       SecNSymStack.pop_back();
482       while (!SecNSymStack.empty() &&
483              (isAltEntry(*SecNSymStack.back()) ||
484               SecNSymStack.back()->Value == BlockSyms.back()->Value)) {
485         BlockSyms.push_back(SecNSymStack.back());
486         SecNSymStack.pop_back();
487       }
488 
489       // BlockNSyms now contains the block symbols in reverse canonical order.
490       JITTargetAddress BlockStart = BlockSyms.front()->Value;
491       JITTargetAddress BlockEnd = SecNSymStack.empty()
492                                       ? NSec.Address + NSec.Size
493                                       : SecNSymStack.back()->Value;
494       JITTargetAddress BlockOffset = BlockStart - NSec.Address;
495       JITTargetAddress BlockSize = BlockEnd - BlockStart;
496 
497       LLVM_DEBUG({
498         dbgs() << "    Creating block for " << formatv("{0:x16}", BlockStart)
499                << " -- " << formatv("{0:x16}", BlockEnd) << ": "
500                << NSec.GraphSection->getName() << " + "
501                << formatv("{0:x16}", BlockOffset) << " with "
502                << BlockSyms.size() << " symbol(s)...\n";
503       });
504 
505       Block &B =
506           NSec.Data
507               ? G->createContentBlock(
508                     *NSec.GraphSection,
509                     StringRef(NSec.Data + BlockOffset, BlockSize), BlockStart,
510                     NSec.Alignment, BlockStart % NSec.Alignment)
511               : G->createZeroFillBlock(*NSec.GraphSection, BlockSize,
512                                        BlockStart, NSec.Alignment,
513                                        BlockStart % NSec.Alignment);
514 
515       Optional<JITTargetAddress> LastCanonicalAddr;
516       JITTargetAddress SymEnd = BlockEnd;
517       while (!BlockSyms.empty()) {
518         auto &NSym = *BlockSyms.back();
519         BlockSyms.pop_back();
520 
521         bool SymLive =
522             (NSym.Desc & MachO::N_NO_DEAD_STRIP) || SectionIsNoDeadStrip;
523 
524         LLVM_DEBUG({
525           dbgs() << "      " << formatv("{0:x16}", NSym.Value) << " -- "
526                  << formatv("{0:x16}", SymEnd) << ": ";
527           if (!NSym.Name)
528             dbgs() << "<anonymous symbol>";
529           else
530             dbgs() << NSym.Name;
531           if (SymLive)
532             dbgs() << " [no-dead-strip]";
533           if (LastCanonicalAddr == NSym.Value)
534             dbgs() << " [non-canonical]";
535           dbgs() << "\n";
536         });
537 
538         auto &Sym =
539             NSym.Name
540                 ? G->addDefinedSymbol(B, NSym.Value - BlockStart, *NSym.Name,
541                                       SymEnd - NSym.Value, NSym.L, NSym.S,
542                                       SectionIsText, SymLive)
543                 : G->addAnonymousSymbol(B, NSym.Value - BlockStart,
544                                         SymEnd - NSym.Value, SectionIsText,
545                                         SymLive);
546         NSym.GraphSymbol = &Sym;
547         if (LastCanonicalAddr != Sym.getAddress()) {
548           if (LastCanonicalAddr)
549             SymEnd = *LastCanonicalAddr;
550           LastCanonicalAddr = Sym.getAddress();
551           setCanonicalSymbol(Sym);
552         }
553       }
554     }
555   }
556 
557   return Error::success();
558 }
559 
560 Error MachOLinkGraphBuilder::graphifySectionsWithCustomParsers() {
561   // Graphify special sections.
562   for (auto &KV : IndexToSection) {
563     auto &NSec = KV.second;
564 
565     // Skip non-graph sections.
566     if (!NSec.GraphSection)
567       continue;
568 
569     auto HI = CustomSectionParserFunctions.find(NSec.GraphSection->getName());
570     if (HI != CustomSectionParserFunctions.end()) {
571       auto &Parse = HI->second;
572       if (auto Err = Parse(NSec))
573         return Err;
574     }
575   }
576 
577   return Error::success();
578 }
579 
580 } // end namespace jitlink
581 } // end namespace llvm
582