1 //===- ScriptParser.cpp ---------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the base parser class for linker script and dynamic
11 // list.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ScriptParser.h"
16 #include "Error.h"
17 #include "llvm/ADT/Twine.h"
18 
19 using namespace llvm;
20 using namespace lld;
21 using namespace lld::elf;
22 
23 // Returns the line that the character S[Pos] is in.
24 static StringRef getLine(StringRef S, size_t Pos) {
25   size_t Begin = S.rfind('\n', Pos);
26   size_t End = S.find('\n', Pos);
27   Begin = (Begin == StringRef::npos) ? 0 : Begin + 1;
28   if (End == StringRef::npos)
29     End = S.size();
30   // rtrim for DOS-style newlines.
31   return S.substr(Begin, End - Begin).rtrim();
32 }
33 
34 void ScriptParserBase::printErrorPos() {
35   StringRef Tok = Tokens[Pos == 0 ? 0 : Pos - 1];
36   StringRef Line = getLine(Input, Tok.data() - Input.data());
37   size_t Col = Tok.data() - Line.data();
38   error(Line);
39   error(std::string(Col, ' ') + "^");
40 }
41 
42 // We don't want to record cascading errors. Keep only the first one.
43 void ScriptParserBase::setError(const Twine &Msg) {
44   if (Error)
45     return;
46   error("line " + Twine(getPos()) + ": " + Msg);
47   printErrorPos();
48   Error = true;
49 }
50 
51 // Split S into linker script tokens.
52 std::vector<StringRef> ScriptParserBase::tokenize(StringRef S) {
53   std::vector<StringRef> Ret;
54   for (;;) {
55     S = skipSpace(S);
56     if (S.empty())
57       return Ret;
58 
59     // Quoted token
60     if (S.startswith("\"")) {
61       size_t E = S.find("\"", 1);
62       if (E == StringRef::npos) {
63         error("unclosed quote");
64         return {};
65       }
66       Ret.push_back(S.substr(1, E - 1));
67       S = S.substr(E + 1);
68       continue;
69     }
70 
71     // Unquoted token
72     size_t Pos = S.find_first_not_of(
73         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
74         "0123456789_.$/\\~=+[]*?-:");
75     // A character that cannot start a word (which is usually a
76     // punctuation) forms a single character token.
77     if (Pos == 0)
78       Pos = 1;
79     Ret.push_back(S.substr(0, Pos));
80     S = S.substr(Pos);
81   }
82 }
83 
84 // Skip leading whitespace characters or /**/-style comments.
85 StringRef ScriptParserBase::skipSpace(StringRef S) {
86   for (;;) {
87     if (S.startswith("/*")) {
88       size_t E = S.find("*/", 2);
89       if (E == StringRef::npos) {
90         error("unclosed comment in a linker script");
91         return "";
92       }
93       S = S.substr(E + 2);
94       continue;
95     }
96     size_t Size = S.size();
97     S = S.ltrim();
98     if (S.size() == Size)
99       return S;
100   }
101 }
102 
103 // An erroneous token is handled as if it were the last token before EOF.
104 bool ScriptParserBase::atEOF() { return Error || Tokens.size() == Pos; }
105 
106 StringRef ScriptParserBase::next() {
107   if (Error)
108     return "";
109   if (atEOF()) {
110     setError("unexpected EOF");
111     return "";
112   }
113   return Tokens[Pos++];
114 }
115 
116 StringRef ScriptParserBase::peek() {
117   StringRef Tok = next();
118   if (Error)
119     return "";
120   --Pos;
121   return Tok;
122 }
123 
124 bool ScriptParserBase::skip(StringRef Tok) {
125   if (Error)
126     return false;
127   if (atEOF()) {
128     setError("unexpected EOF");
129     return false;
130   }
131   if (Tokens[Pos] != Tok)
132     return false;
133   ++Pos;
134   return true;
135 }
136 
137 void ScriptParserBase::expect(StringRef Expect) {
138   if (Error)
139     return;
140   StringRef Tok = next();
141   if (Tok != Expect)
142     setError(Expect + " expected, but got " + Tok);
143 }
144 
145 // Returns the current line number.
146 size_t ScriptParserBase::getPos() {
147   if (Pos == 0)
148     return 1;
149   const char *Begin = Input.data();
150   const char *Tok = Tokens[Pos - 1].data();
151   return StringRef(Begin, Tok - Begin).count('\n') + 1;
152 }
153 
154 std::vector<uint8_t> ScriptParserBase::parseHex(StringRef S) {
155   std::vector<uint8_t> Hex;
156   while (!S.empty()) {
157     StringRef B = S.substr(0, 2);
158     S = S.substr(2);
159     uint8_t H;
160     if (B.getAsInteger(16, H)) {
161       setError("not a hexadecimal value: " + B);
162       return {};
163     }
164     Hex.push_back(H);
165   }
166   return Hex;
167 }
168