1 //===- LinkerScript.cpp ---------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the parser/evaluator of the linker script.
11 // It does not construct an AST but consume linker script directives directly.
12 // Results are written to Driver or Config object.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "LinkerScript.h"
17 #include "Config.h"
18 #include "Driver.h"
19 #include "InputSection.h"
20 #include "OutputSections.h"
21 #include "ScriptParser.h"
22 #include "SymbolTable.h"
23 #include "llvm/ADT/StringSwitch.h"
24 #include "llvm/Support/ELF.h"
25 #include "llvm/Support/FileSystem.h"
26 #include "llvm/Support/MemoryBuffer.h"
27 #include "llvm/Support/Path.h"
28 #include "llvm/Support/StringSaver.h"
29 
30 using namespace llvm;
31 using namespace llvm::ELF;
32 using namespace llvm::object;
33 using namespace lld;
34 using namespace lld::elf;
35 
36 LinkerScript *elf::Script;
37 
38 static uint64_t getInteger(StringRef S) {
39   uint64_t V;
40   if (S.getAsInteger(0, V)) {
41     error("malformed number: " + S);
42     return 0;
43   }
44   return V;
45 }
46 
47 static int precedence(StringRef Op) {
48   return StringSwitch<int>(Op)
49       .Case("*", 4)
50       .Case("/", 3)
51       .Case("+", 2)
52       .Case("-", 2)
53       .Case("&", 1)
54       .Default(-1);
55 }
56 
57 static StringRef next(ArrayRef<StringRef> &Tokens) {
58   if (Tokens.empty()) {
59     error("no next token");
60     return "";
61   }
62   StringRef Tok = Tokens.front();
63   Tokens = Tokens.slice(1);
64   return Tok;
65 }
66 
67 static uint64_t parseExpr(uint64_t Lhs, int MinPrec,
68                           ArrayRef<StringRef> &Tokens, uint64_t Dot);
69 
70 // This is a part of the operator-precedence parser to evaluate
71 // arithmetic expressions in SECTIONS command. This function evaluates an
72 // integer literal, a parenthesized expression or the special variable ".".
73 static uint64_t parsePrimary(ArrayRef<StringRef> &Tokens, uint64_t Dot) {
74   StringRef Tok = next(Tokens);
75   if (Tok == ".")
76     return Dot;
77   if (Tok == "(") {
78     uint64_t V = parsePrimary(Tokens, Dot);
79     V = parseExpr(V, 0, Tokens, Dot);
80     if (Tokens.empty()) {
81       error(") expected");
82     } else {
83       Tok = next(Tokens);
84       if (Tok != ")")
85         error(") expected, but got " + Tok);
86     }
87     return V;
88   }
89   return getInteger(Tok);
90 }
91 
92 static uint64_t apply(StringRef Op, uint64_t L, uint64_t R) {
93   if (Op == "+")
94     return L + R;
95   if (Op == "-")
96     return L - R;
97   if (Op == "*")
98     return L * R;
99   if (Op == "/") {
100     if (R == 0) {
101       error("division by zero");
102       return 0;
103     }
104     return L / R;
105   }
106   if (Op == "&")
107     return L & R;
108   llvm_unreachable("invalid operator");
109   return 0;
110 }
111 
112 // This is an operator-precedence parser to evaluate
113 // arithmetic expressions in SECTIONS command.
114 static uint64_t parseExpr(uint64_t Lhs, int MinPrec,
115                           ArrayRef<StringRef> &Tokens, uint64_t Dot) {
116   while (!Tokens.empty()) {
117     // Read an operator and an expression.
118     StringRef Op1 = Tokens.front();
119     if (precedence(Op1) < MinPrec)
120       return Lhs;
121     next(Tokens);
122     uint64_t Rhs = parsePrimary(Tokens, Dot);
123 
124     // Evaluate the remaining part of the expression first if the
125     // next operator has greater precedence than the previous one.
126     // For example, if we have read "+" and "3", and if the next
127     // operator is "*", then we'll evaluate 3 * ... part first.
128     while (!Tokens.empty()) {
129       StringRef Op2 = Tokens.front();
130       if (precedence(Op2) <= precedence(Op1))
131         break;
132       Rhs = parseExpr(Rhs, precedence(Op2), Tokens, Dot);
133     }
134 
135     Lhs = apply(Op1, Lhs, Rhs);
136   }
137   return Lhs;
138 }
139 
140 // Evaluates the expression given by list of tokens.
141 static uint64_t evaluate(ArrayRef<StringRef> Tokens, uint64_t Dot) {
142   uint64_t V = parsePrimary(Tokens, Dot);
143   V = parseExpr(V, 0, Tokens, Dot);
144   if (!Tokens.empty())
145     error("stray token: " + Tokens[0]);
146   return V;
147 }
148 
149 template <class ELFT>
150 SectionRule *LinkerScript::find(InputSectionBase<ELFT> *S) {
151   for (SectionRule &R : Sections)
152     if (R.match(S))
153       return &R;
154   return nullptr;
155 }
156 
157 template <class ELFT>
158 StringRef LinkerScript::getOutputSection(InputSectionBase<ELFT> *S) {
159   SectionRule *R = find(S);
160   return R ? R->Dest : "";
161 }
162 
163 template <class ELFT>
164 bool LinkerScript::isDiscarded(InputSectionBase<ELFT> *S) {
165   return getOutputSection(S) == "/DISCARD/";
166 }
167 
168 template <class ELFT> bool LinkerScript::shouldKeep(InputSectionBase<ELFT> *S) {
169   SectionRule *R = find(S);
170   return R && R->Keep;
171 }
172 
173 template <class ELFT>
174 static OutputSectionBase<ELFT> *
175 findSection(std::vector<OutputSectionBase<ELFT> *> &V, StringRef Name) {
176   for (OutputSectionBase<ELFT> *Sec : V)
177     if (Sec->getName() == Name)
178       return Sec;
179   return nullptr;
180 }
181 
182 template <class ELFT>
183 void LinkerScript::assignAddresses(
184     std::vector<OutputSectionBase<ELFT> *> &Sections) {
185   typedef typename ELFT::uint uintX_t;
186 
187   // Orphan sections are sections present in the input files which
188   // are not explicitly placed into the output file by the linker script.
189   // We place orphan sections at end of file.
190   // Other linkers places them using some heuristics as described in
191   // https://sourceware.org/binutils/docs/ld/Orphan-Sections.html#Orphan-Sections.
192   for (OutputSectionBase<ELFT> *Sec : Sections) {
193     StringRef Name = Sec->getName();
194     auto I = std::find(SectionOrder.begin(), SectionOrder.end(), Name);
195     if (I == SectionOrder.end())
196       Commands.push_back({SectionKind, {}, Name});
197   }
198 
199   // Assign addresses as instructed by linker script SECTIONS sub-commands.
200   uintX_t ThreadBssOffset = 0;
201   uintX_t VA =
202       Out<ELFT>::ElfHeader->getSize() + Out<ELFT>::ProgramHeaders->getSize();
203 
204   for (SectionsCommand &Cmd : Commands) {
205     if (Cmd.Kind == ExprKind) {
206       VA = evaluate(Cmd.Expr, VA);
207       continue;
208     }
209 
210     OutputSectionBase<ELFT> *Sec = findSection(Sections, Cmd.SectionName);
211     if (!Sec)
212       continue;
213 
214     if ((Sec->getFlags() & SHF_TLS) && Sec->getType() == SHT_NOBITS) {
215       uintX_t TVA = VA + ThreadBssOffset;
216       TVA = alignTo(TVA, Sec->getAlign());
217       Sec->setVA(TVA);
218       ThreadBssOffset = TVA - VA + Sec->getSize();
219       continue;
220     }
221 
222     if (Sec->getFlags() & SHF_ALLOC) {
223       VA = alignTo(VA, Sec->getAlign());
224       Sec->setVA(VA);
225       VA += Sec->getSize();
226       continue;
227     }
228   }
229 }
230 
231 ArrayRef<uint8_t> LinkerScript::getFiller(StringRef Name) {
232   auto I = Filler.find(Name);
233   if (I == Filler.end())
234     return {};
235   return I->second;
236 }
237 
238 // A compartor to sort output sections. Returns -1 or 1 if both
239 // A and B are mentioned in linker scripts. Otherwise, returns 0
240 // to use the default rule which is implemented in Writer.cpp.
241 int LinkerScript::compareSections(StringRef A, StringRef B) {
242   auto E = SectionOrder.end();
243   auto I = std::find(SectionOrder.begin(), E, A);
244   auto J = std::find(SectionOrder.begin(), E, B);
245   if (I == E || J == E)
246     return 0;
247   return I < J ? -1 : 1;
248 }
249 
250 // Returns true if S matches T. S can contain glob meta-characters.
251 // The asterisk ('*') matches zero or more characacters, and the question
252 // mark ('?') matches one character.
253 static bool matchStr(StringRef S, StringRef T) {
254   for (;;) {
255     if (S.empty())
256       return T.empty();
257     if (S[0] == '*') {
258       S = S.substr(1);
259       if (S.empty())
260         // Fast path. If a pattern is '*', it matches anything.
261         return true;
262       for (size_t I = 0, E = T.size(); I < E; ++I)
263         if (matchStr(S, T.substr(I)))
264           return true;
265       return false;
266     }
267     if (T.empty() || (S[0] != T[0] && S[0] != '?'))
268       return false;
269     S = S.substr(1);
270     T = T.substr(1);
271   }
272 }
273 
274 template <class ELFT> bool SectionRule::match(InputSectionBase<ELFT> *S) {
275   return matchStr(SectionPattern, S->getSectionName());
276 }
277 
278 class elf::ScriptParser final : public elf::ScriptParserBase {
279   typedef void (ScriptParser::*Handler)();
280 
281 public:
282   ScriptParser(BumpPtrAllocator *A, StringRef S, bool B)
283       : ScriptParserBase(S), Saver(*A), IsUnderSysroot(B) {}
284 
285   void run() override;
286 
287 private:
288   void addFile(StringRef Path);
289 
290   void readAsNeeded();
291   void readEntry();
292   void readExtern();
293   void readGroup();
294   void readInclude();
295   void readNothing() {}
296   void readOutput();
297   void readOutputArch();
298   void readOutputFormat();
299   void readSearchDir();
300   void readSections();
301 
302   void readLocationCounterValue();
303   void readOutputSectionDescription();
304   void readSectionPatterns(StringRef OutSec, bool Keep);
305 
306   StringSaver Saver;
307   const static StringMap<Handler> Cmd;
308   bool IsUnderSysroot;
309 };
310 
311 const StringMap<elf::ScriptParser::Handler> elf::ScriptParser::Cmd = {
312     {"ENTRY", &ScriptParser::readEntry},
313     {"EXTERN", &ScriptParser::readExtern},
314     {"GROUP", &ScriptParser::readGroup},
315     {"INCLUDE", &ScriptParser::readInclude},
316     {"INPUT", &ScriptParser::readGroup},
317     {"OUTPUT", &ScriptParser::readOutput},
318     {"OUTPUT_ARCH", &ScriptParser::readOutputArch},
319     {"OUTPUT_FORMAT", &ScriptParser::readOutputFormat},
320     {"SEARCH_DIR", &ScriptParser::readSearchDir},
321     {"SECTIONS", &ScriptParser::readSections},
322     {";", &ScriptParser::readNothing}};
323 
324 void ScriptParser::run() {
325   while (!atEOF()) {
326     StringRef Tok = next();
327     if (Handler Fn = Cmd.lookup(Tok))
328       (this->*Fn)();
329     else
330       setError("unknown directive: " + Tok);
331   }
332 }
333 
334 void ScriptParser::addFile(StringRef S) {
335   if (IsUnderSysroot && S.startswith("/")) {
336     SmallString<128> Path;
337     (Config->Sysroot + S).toStringRef(Path);
338     if (sys::fs::exists(Path)) {
339       Driver->addFile(Saver.save(Path.str()));
340       return;
341     }
342   }
343 
344   if (sys::path::is_absolute(S)) {
345     Driver->addFile(S);
346   } else if (S.startswith("=")) {
347     if (Config->Sysroot.empty())
348       Driver->addFile(S.substr(1));
349     else
350       Driver->addFile(Saver.save(Config->Sysroot + "/" + S.substr(1)));
351   } else if (S.startswith("-l")) {
352     Driver->addLibrary(S.substr(2));
353   } else if (sys::fs::exists(S)) {
354     Driver->addFile(S);
355   } else {
356     std::string Path = findFromSearchPaths(S);
357     if (Path.empty())
358       setError("unable to find " + S);
359     else
360       Driver->addFile(Saver.save(Path));
361   }
362 }
363 
364 void ScriptParser::readAsNeeded() {
365   expect("(");
366   bool Orig = Config->AsNeeded;
367   Config->AsNeeded = true;
368   while (!Error) {
369     StringRef Tok = next();
370     if (Tok == ")")
371       break;
372     addFile(Tok);
373   }
374   Config->AsNeeded = Orig;
375 }
376 
377 void ScriptParser::readEntry() {
378   // -e <symbol> takes predecence over ENTRY(<symbol>).
379   expect("(");
380   StringRef Tok = next();
381   if (Config->Entry.empty())
382     Config->Entry = Tok;
383   expect(")");
384 }
385 
386 void ScriptParser::readExtern() {
387   expect("(");
388   while (!Error) {
389     StringRef Tok = next();
390     if (Tok == ")")
391       return;
392     Config->Undefined.push_back(Tok);
393   }
394 }
395 
396 void ScriptParser::readGroup() {
397   expect("(");
398   while (!Error) {
399     StringRef Tok = next();
400     if (Tok == ")")
401       return;
402     if (Tok == "AS_NEEDED") {
403       readAsNeeded();
404       continue;
405     }
406     addFile(Tok);
407   }
408 }
409 
410 void ScriptParser::readInclude() {
411   StringRef Tok = next();
412   auto MBOrErr = MemoryBuffer::getFile(Tok);
413   if (!MBOrErr) {
414     setError("cannot open " + Tok);
415     return;
416   }
417   std::unique_ptr<MemoryBuffer> &MB = *MBOrErr;
418   StringRef S = Saver.save(MB->getMemBufferRef().getBuffer());
419   std::vector<StringRef> V = tokenize(S);
420   Tokens.insert(Tokens.begin() + Pos, V.begin(), V.end());
421 }
422 
423 void ScriptParser::readOutput() {
424   // -o <file> takes predecence over OUTPUT(<file>).
425   expect("(");
426   StringRef Tok = next();
427   if (Config->OutputFile.empty())
428     Config->OutputFile = Tok;
429   expect(")");
430 }
431 
432 void ScriptParser::readOutputArch() {
433   // Error checking only for now.
434   expect("(");
435   next();
436   expect(")");
437 }
438 
439 void ScriptParser::readOutputFormat() {
440   // Error checking only for now.
441   expect("(");
442   next();
443   StringRef Tok = next();
444   if (Tok == ")")
445    return;
446   if (Tok != ",") {
447     setError("unexpected token: " + Tok);
448     return;
449   }
450   next();
451   expect(",");
452   next();
453   expect(")");
454 }
455 
456 void ScriptParser::readSearchDir() {
457   expect("(");
458   Config->SearchPaths.push_back(next());
459   expect(")");
460 }
461 
462 void ScriptParser::readSections() {
463   Script->DoLayout = true;
464   expect("{");
465   while (!Error && !skip("}")) {
466     StringRef Tok = peek();
467     if (Tok == ".")
468       readLocationCounterValue();
469     else
470       readOutputSectionDescription();
471   }
472 }
473 
474 void ScriptParser::readSectionPatterns(StringRef OutSec, bool Keep) {
475   expect("(");
476   while (!Error && !skip(")"))
477     Script->Sections.emplace_back(OutSec, next(), Keep);
478 }
479 
480 void ScriptParser::readLocationCounterValue() {
481   expect(".");
482   expect("=");
483   Script->Commands.push_back({ExprKind, {}, ""});
484   SectionsCommand &Cmd = Script->Commands.back();
485   while (!Error) {
486     StringRef Tok = next();
487     if (Tok == ";")
488       break;
489     Cmd.Expr.push_back(Tok);
490   }
491   if (Cmd.Expr.empty())
492     error("error in location counter expression");
493 }
494 
495 void ScriptParser::readOutputSectionDescription() {
496   StringRef OutSec = next();
497   Script->SectionOrder.push_back(OutSec);
498   Script->Commands.push_back({SectionKind, {}, OutSec});
499   expect(":");
500   expect("{");
501   while (!Error && !skip("}")) {
502     StringRef Tok = next();
503     if (Tok == "*") {
504       readSectionPatterns(OutSec, false);
505     } else if (Tok == "KEEP") {
506       expect("(");
507       next(); // Skip *
508       readSectionPatterns(OutSec, true);
509       expect(")");
510     } else {
511       setError("unknown command " + Tok);
512     }
513   }
514   StringRef Tok = peek();
515   if (Tok.startswith("=")) {
516     if (!Tok.startswith("=0x")) {
517       setError("filler should be a hexadecimal value");
518       return;
519     }
520     Tok = Tok.substr(3);
521     Script->Filler[OutSec] = parseHex(Tok);
522     next();
523   }
524 }
525 
526 static bool isUnderSysroot(StringRef Path) {
527   if (Config->Sysroot == "")
528     return false;
529   for (; !Path.empty(); Path = sys::path::parent_path(Path))
530     if (sys::fs::equivalent(Config->Sysroot, Path))
531       return true;
532   return false;
533 }
534 
535 // Entry point. The other functions or classes are private to this file.
536 void LinkerScript::read(MemoryBufferRef MB) {
537   StringRef Path = MB.getBufferIdentifier();
538   ScriptParser(&Alloc, MB.getBuffer(), isUnderSysroot(Path)).run();
539 }
540 
541 template StringRef LinkerScript::getOutputSection(InputSectionBase<ELF32LE> *);
542 template StringRef LinkerScript::getOutputSection(InputSectionBase<ELF32BE> *);
543 template StringRef LinkerScript::getOutputSection(InputSectionBase<ELF64LE> *);
544 template StringRef LinkerScript::getOutputSection(InputSectionBase<ELF64BE> *);
545 
546 template bool LinkerScript::isDiscarded(InputSectionBase<ELF32LE> *);
547 template bool LinkerScript::isDiscarded(InputSectionBase<ELF32BE> *);
548 template bool LinkerScript::isDiscarded(InputSectionBase<ELF64LE> *);
549 template bool LinkerScript::isDiscarded(InputSectionBase<ELF64BE> *);
550 
551 template bool LinkerScript::shouldKeep(InputSectionBase<ELF32LE> *);
552 template bool LinkerScript::shouldKeep(InputSectionBase<ELF32BE> *);
553 template bool LinkerScript::shouldKeep(InputSectionBase<ELF64LE> *);
554 template bool LinkerScript::shouldKeep(InputSectionBase<ELF64BE> *);
555 
556 template void
557 LinkerScript::assignAddresses(std::vector<OutputSectionBase<ELF32LE> *> &);
558 template void
559 LinkerScript::assignAddresses(std::vector<OutputSectionBase<ELF32BE> *> &);
560 template void
561 LinkerScript::assignAddresses(std::vector<OutputSectionBase<ELF64LE> *> &);
562 template void
563 LinkerScript::assignAddresses(std::vector<OutputSectionBase<ELF64BE> *> &);
564