1 //===- llvm/MC/MCMachObjectWriter.h - Mach Object Writer --------*- C++ -*-===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #ifndef LLVM_MC_MCMACHOBJECTWRITER_H
11 #define LLVM_MC_MCMACHOBJECTWRITER_H
12 
13 #include "llvm/ADT/DenseMap.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/BinaryFormat/MachO.h"
16 #include "llvm/MC/MCExpr.h"
17 #include "llvm/MC/MCObjectWriter.h"
18 #include "llvm/MC/MCSection.h"
19 #include "llvm/MC/StringTableBuilder.h"
20 #include <cstdint>
21 #include <memory>
22 #include <string>
23 #include <vector>
24 
25 namespace llvm {
26 
27 class MachObjectWriter;
28 
29 class MCMachObjectTargetWriter : public MCObjectTargetWriter {
30   const unsigned Is64Bit : 1;
31   const uint32_t CPUType;
32   const uint32_t CPUSubtype;
33   unsigned LocalDifference_RIT;
34 
35 protected:
36   MCMachObjectTargetWriter(bool Is64Bit_, uint32_t CPUType_,
37                            uint32_t CPUSubtype_);
38 
setLocalDifferenceRelocationType(unsigned Type)39   void setLocalDifferenceRelocationType(unsigned Type) {
40     LocalDifference_RIT = Type;
41   }
42 
43 public:
44   virtual ~MCMachObjectTargetWriter();
45 
getFormat()46   virtual Triple::ObjectFormatType getFormat() const { return Triple::MachO; }
classof(const MCObjectTargetWriter * W)47   static bool classof(const MCObjectTargetWriter *W) {
48     return W->getFormat() == Triple::MachO;
49   }
50 
51   /// \name Lifetime Management
52   /// @{
53 
reset()54   virtual void reset() {}
55 
56   /// @}
57 
58   /// \name Accessors
59   /// @{
60 
is64Bit()61   bool is64Bit() const { return Is64Bit; }
getCPUType()62   uint32_t getCPUType() const { return CPUType; }
getCPUSubtype()63   uint32_t getCPUSubtype() const { return CPUSubtype; }
getLocalDifferenceRelocationType()64   unsigned getLocalDifferenceRelocationType() const {
65     return LocalDifference_RIT;
66   }
67 
68   /// @}
69 
70   /// \name API
71   /// @{
72 
73   virtual void recordRelocation(MachObjectWriter *Writer, MCAssembler &Asm,
74                                 const MCAsmLayout &Layout,
75                                 const MCFragment *Fragment,
76                                 const MCFixup &Fixup, MCValue Target,
77                                 uint64_t &FixedValue) = 0;
78 
79   /// @}
80 };
81 
82 class MachObjectWriter : public MCObjectWriter {
83   /// Helper struct for containing some precomputed information on symbols.
84   struct MachSymbolData {
85     const MCSymbol *Symbol;
86     uint64_t StringIndex;
87     uint8_t SectionIndex;
88 
89     // Support lexicographic sorting.
90     bool operator<(const MachSymbolData &RHS) const;
91   };
92 
93   /// The target specific Mach-O writer instance.
94   std::unique_ptr<MCMachObjectTargetWriter> TargetObjectWriter;
95 
96   /// \name Relocation Data
97   /// @{
98 
99   struct RelAndSymbol {
100     const MCSymbol *Sym;
101     MachO::any_relocation_info MRE;
RelAndSymbolRelAndSymbol102     RelAndSymbol(const MCSymbol *Sym, const MachO::any_relocation_info &MRE)
103         : Sym(Sym), MRE(MRE) {}
104   };
105 
106   DenseMap<const MCSection *, std::vector<RelAndSymbol>> Relocations;
107   DenseMap<const MCSection *, unsigned> IndirectSymBase;
108 
109   SectionAddrMap SectionAddress;
110 
111   /// @}
112   /// \name Symbol Table Data
113   /// @{
114 
115   StringTableBuilder StringTable{StringTableBuilder::MachO};
116   std::vector<MachSymbolData> LocalSymbolData;
117   std::vector<MachSymbolData> ExternalSymbolData;
118   std::vector<MachSymbolData> UndefinedSymbolData;
119 
120   /// @}
121 
122   MachSymbolData *findSymbolData(const MCSymbol &Sym);
123 
124   void writeWithPadding(StringRef Str, uint64_t Size);
125 
126 public:
MachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW,raw_pwrite_stream & OS,bool IsLittleEndian)127   MachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW,
128                    raw_pwrite_stream &OS, bool IsLittleEndian)
129       : TargetObjectWriter(std::move(MOTW)),
130         W(OS, IsLittleEndian ? support::little : support::big) {}
131 
132   support::endian::Writer W;
133 
134   const MCSymbol &findAliasedSymbol(const MCSymbol &Sym) const;
135 
136   /// \name Lifetime management Methods
137   /// @{
138 
139   void reset() override;
140 
141   /// @}
142 
143   /// \name Utility Methods
144   /// @{
145 
146   bool isFixupKindPCRel(const MCAssembler &Asm, unsigned Kind);
147 
getSectionAddressMap()148   SectionAddrMap &getSectionAddressMap() { return SectionAddress; }
149 
getSectionAddress(const MCSection * Sec)150   uint64_t getSectionAddress(const MCSection *Sec) const {
151     return SectionAddress.lookup(Sec);
152   }
153   uint64_t getSymbolAddress(const MCSymbol &S, const MCAsmLayout &Layout) const;
154 
155   uint64_t getFragmentAddress(const MCFragment *Fragment,
156                               const MCAsmLayout &Layout) const;
157 
158   uint64_t getPaddingSize(const MCSection *SD, const MCAsmLayout &Layout) const;
159 
160   bool doesSymbolRequireExternRelocation(const MCSymbol &S);
161 
162   /// @}
163 
164   /// \name Target Writer Proxy Accessors
165   /// @{
166 
is64Bit()167   bool is64Bit() const { return TargetObjectWriter->is64Bit(); }
isX86_64()168   bool isX86_64() const {
169     uint32_t CPUType = TargetObjectWriter->getCPUType();
170     return CPUType == MachO::CPU_TYPE_X86_64;
171   }
172 
173   /// @}
174 
175   void writeHeader(MachO::HeaderFileType Type, unsigned NumLoadCommands,
176                    unsigned LoadCommandsSize, bool SubsectionsViaSymbols);
177 
178   /// Write a segment load command.
179   ///
180   /// \param NumSections The number of sections in this segment.
181   /// \param SectionDataSize The total size of the sections.
182   void writeSegmentLoadCommand(StringRef Name, unsigned NumSections,
183                                uint64_t VMAddr, uint64_t VMSize,
184                                uint64_t SectionDataStartOffset,
185                                uint64_t SectionDataSize, uint32_t MaxProt,
186                                uint32_t InitProt);
187 
188   void writeSection(const MCAsmLayout &Layout, const MCSection &Sec,
189                     uint64_t VMAddr, uint64_t FileOffset, unsigned Flags,
190                     uint64_t RelocationsStart, unsigned NumRelocations);
191 
192   void writeSymtabLoadCommand(uint32_t SymbolOffset, uint32_t NumSymbols,
193                               uint32_t StringTableOffset,
194                               uint32_t StringTableSize);
195 
196   void writeDysymtabLoadCommand(
197       uint32_t FirstLocalSymbol, uint32_t NumLocalSymbols,
198       uint32_t FirstExternalSymbol, uint32_t NumExternalSymbols,
199       uint32_t FirstUndefinedSymbol, uint32_t NumUndefinedSymbols,
200       uint32_t IndirectSymbolOffset, uint32_t NumIndirectSymbols);
201 
202   void writeNlist(MachSymbolData &MSD, const MCAsmLayout &Layout);
203 
204   void writeLinkeditLoadCommand(uint32_t Type, uint32_t DataOffset,
205                                 uint32_t DataSize);
206 
207   void writeLinkerOptionsLoadCommand(const std::vector<std::string> &Options);
208 
209   // FIXME: We really need to improve the relocation validation. Basically, we
210   // want to implement a separate computation which evaluates the relocation
211   // entry as the linker would, and verifies that the resultant fixup value is
212   // exactly what the encoder wanted. This will catch several classes of
213   // problems:
214   //
215   //  - Relocation entry bugs, the two algorithms are unlikely to have the same
216   //    exact bug.
217   //
218   //  - Relaxation issues, where we forget to relax something.
219   //
220   //  - Input errors, where something cannot be correctly encoded. 'as' allows
221   //    these through in many cases.
222 
223   // Add a relocation to be output in the object file. At the time this is
224   // called, the symbol indexes are not know, so if the relocation refers
225   // to a symbol it should be passed as \p RelSymbol so that it can be updated
226   // afterwards. If the relocation doesn't refer to a symbol, nullptr should be
227   // used.
addRelocation(const MCSymbol * RelSymbol,const MCSection * Sec,MachO::any_relocation_info & MRE)228   void addRelocation(const MCSymbol *RelSymbol, const MCSection *Sec,
229                      MachO::any_relocation_info &MRE) {
230     RelAndSymbol P(RelSymbol, MRE);
231     Relocations[Sec].push_back(P);
232   }
233 
234   void recordScatteredRelocation(const MCAssembler &Asm,
235                                  const MCAsmLayout &Layout,
236                                  const MCFragment *Fragment,
237                                  const MCFixup &Fixup, MCValue Target,
238                                  unsigned Log2Size, uint64_t &FixedValue);
239 
240   void recordTLVPRelocation(const MCAssembler &Asm, const MCAsmLayout &Layout,
241                             const MCFragment *Fragment, const MCFixup &Fixup,
242                             MCValue Target, uint64_t &FixedValue);
243 
244   void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
245                         const MCFragment *Fragment, const MCFixup &Fixup,
246                         MCValue Target, uint64_t &FixedValue) override;
247 
248   void bindIndirectSymbols(MCAssembler &Asm);
249 
250   /// Compute the symbol table data.
251   void computeSymbolTable(MCAssembler &Asm,
252                           std::vector<MachSymbolData> &LocalSymbolData,
253                           std::vector<MachSymbolData> &ExternalSymbolData,
254                           std::vector<MachSymbolData> &UndefinedSymbolData);
255 
256   void computeSectionAddresses(const MCAssembler &Asm,
257                                const MCAsmLayout &Layout);
258 
259   void executePostLayoutBinding(MCAssembler &Asm,
260                                 const MCAsmLayout &Layout) override;
261 
262   bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
263                                               const MCSymbol &A,
264                                               const MCSymbol &B,
265                                               bool InSet) const override;
266 
267   bool isSymbolRefDifferenceFullyResolvedImpl(const MCAssembler &Asm,
268                                               const MCSymbol &SymA,
269                                               const MCFragment &FB, bool InSet,
270                                               bool IsPCRel) const override;
271 
272   uint64_t writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
273 };
274 
275 /// Construct a new Mach-O writer instance.
276 ///
277 /// This routine takes ownership of the target writer subclass.
278 ///
279 /// \param MOTW - The target specific Mach-O writer subclass.
280 /// \param OS - The stream to write to.
281 /// \returns The constructed object writer.
282 std::unique_ptr<MCObjectWriter>
283 createMachObjectWriter(std::unique_ptr<MCMachObjectTargetWriter> MOTW,
284                        raw_pwrite_stream &OS, bool IsLittleEndian);
285 
286 } // end namespace llvm
287 
288 #endif // LLVM_MC_MCMACHOBJECTWRITER_H
289