1 //===- ScriptParser.cpp ---------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the base parser class for linker script and dynamic
11 // list.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ScriptParser.h"
16 #include "Error.h"
17 #include "llvm/ADT/Twine.h"
18 
19 using namespace llvm;
20 using namespace lld;
21 using namespace lld::elf;
22 
23 // Returns the line that the character S[Pos] is in.
24 static StringRef getLine(StringRef S, size_t Pos) {
25   size_t Begin = S.rfind('\n', Pos);
26   size_t End = S.find('\n', Pos);
27   Begin = (Begin == StringRef::npos) ? 0 : Begin + 1;
28   if (End == StringRef::npos)
29     End = S.size();
30   // rtrim for DOS-style newlines.
31   return S.substr(Begin, End - Begin).rtrim();
32 }
33 
34 void ScriptParserBase::printErrorPos() {
35   StringRef Tok = Tokens[Pos == 0 ? 0 : Pos - 1];
36   StringRef Line = getLine(Input, Tok.data() - Input.data());
37   size_t Col = Tok.data() - Line.data();
38   error(Line);
39   error(std::string(Col, ' ') + "^");
40 }
41 
42 // We don't want to record cascading errors. Keep only the first one.
43 void ScriptParserBase::setError(const Twine &Msg) {
44   if (Error)
45     return;
46   if (Input.empty()) {
47     error(Msg);
48   } else {
49     error("line " + Twine(getPos()) + ": " + Msg);
50     printErrorPos();
51   }
52   Error = true;
53 }
54 
55 // Split S into linker script tokens.
56 std::vector<StringRef> ScriptParserBase::tokenize(StringRef S) {
57   std::vector<StringRef> Ret;
58   for (;;) {
59     S = skipSpace(S);
60     if (S.empty())
61       return Ret;
62 
63     // Quoted token
64     if (S.startswith("\"")) {
65       size_t E = S.find("\"", 1);
66       if (E == StringRef::npos) {
67         error("unclosed quote");
68         return {};
69       }
70       Ret.push_back(S.substr(1, E - 1));
71       S = S.substr(E + 1);
72       continue;
73     }
74 
75     // Unquoted token
76     size_t Pos = S.find_first_not_of(
77         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
78         "0123456789_.$/\\~=+[]*?-:!<>");
79     // A character that cannot start a word (which is usually a
80     // punctuation) forms a single character token.
81     if (Pos == 0)
82       Pos = 1;
83     Ret.push_back(S.substr(0, Pos));
84     S = S.substr(Pos);
85   }
86 }
87 
88 // Skip leading whitespace characters or /**/-style comments.
89 StringRef ScriptParserBase::skipSpace(StringRef S) {
90   for (;;) {
91     if (S.startswith("/*")) {
92       size_t E = S.find("*/", 2);
93       if (E == StringRef::npos) {
94         error("unclosed comment in a linker script");
95         return "";
96       }
97       S = S.substr(E + 2);
98       continue;
99     }
100     size_t Size = S.size();
101     S = S.ltrim();
102     if (S.size() == Size)
103       return S;
104   }
105 }
106 
107 // An erroneous token is handled as if it were the last token before EOF.
108 bool ScriptParserBase::atEOF() { return Error || Tokens.size() == Pos; }
109 
110 StringRef ScriptParserBase::next() {
111   if (Error)
112     return "";
113   if (atEOF()) {
114     setError("unexpected EOF");
115     return "";
116   }
117   return Tokens[Pos++];
118 }
119 
120 StringRef ScriptParserBase::peek() {
121   StringRef Tok = next();
122   if (Error)
123     return "";
124   --Pos;
125   return Tok;
126 }
127 
128 bool ScriptParserBase::skip(StringRef Tok) {
129   if (Error)
130     return false;
131   if (atEOF()) {
132     setError("unexpected EOF");
133     return false;
134   }
135   if (Tokens[Pos] != Tok)
136     return false;
137   ++Pos;
138   return true;
139 }
140 
141 void ScriptParserBase::expect(StringRef Expect) {
142   if (Error)
143     return;
144   StringRef Tok = next();
145   if (Tok != Expect)
146     setError(Expect + " expected, but got " + Tok);
147 }
148 
149 // Returns the current line number.
150 size_t ScriptParserBase::getPos() {
151   if (Pos == 0)
152     return 1;
153   const char *Begin = Input.data();
154   const char *Tok = Tokens[Pos - 1].data();
155   return StringRef(Begin, Tok - Begin).count('\n') + 1;
156 }
157 
158 std::vector<uint8_t> ScriptParserBase::parseHex(StringRef S) {
159   std::vector<uint8_t> Hex;
160   while (!S.empty()) {
161     StringRef B = S.substr(0, 2);
162     S = S.substr(2);
163     uint8_t H;
164     if (B.getAsInteger(16, H)) {
165       setError("not a hexadecimal value: " + B);
166       return {};
167     }
168     Hex.push_back(H);
169   }
170   return Hex;
171 }
172