1 //===- ScriptParser.cpp ---------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the base parser class for linker script and dynamic
11 // list.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ScriptParser.h"
16 #include "Error.h"
17 #include "llvm/ADT/Twine.h"
18 
19 using namespace llvm;
20 using namespace lld;
21 using namespace lld::elf;
22 
23 // Returns the line that the character S[Pos] is in.
24 static StringRef getLine(StringRef S, size_t Pos) {
25   size_t Begin = S.rfind('\n', Pos);
26   size_t End = S.find('\n', Pos);
27   Begin = (Begin == StringRef::npos) ? 0 : Begin + 1;
28   if (End == StringRef::npos)
29     End = S.size();
30   // rtrim for DOS-style newlines.
31   return S.substr(Begin, End - Begin).rtrim();
32 }
33 
34 void ScriptParserBase::printErrorPos() {
35   StringRef Tok = Tokens[Pos == 0 ? 0 : Pos - 1];
36   StringRef Line = getLine(Input, Tok.data() - Input.data());
37   size_t Col = Tok.data() - Line.data();
38   error(Line);
39   error(std::string(Col, ' ') + "^");
40 }
41 
42 // We don't want to record cascading errors. Keep only the first one.
43 void ScriptParserBase::setError(const Twine &Msg) {
44   if (Error)
45     return;
46   if (Input.empty() || Tokens.empty()) {
47     error(Msg);
48   } else {
49     error("line " + Twine(getPos()) + ": " + Msg);
50     printErrorPos();
51   }
52   Error = true;
53 }
54 
55 // Split S into linker script tokens.
56 std::vector<StringRef> ScriptParserBase::tokenize(StringRef S) {
57   std::vector<StringRef> Ret;
58   for (;;) {
59     S = skipSpace(S);
60     if (S.empty())
61       return Ret;
62 
63     // Quoted token.
64     if (S.startswith("\"")) {
65       size_t E = S.find("\"", 1);
66       if (E == StringRef::npos) {
67         error("unclosed quote");
68         return {};
69       }
70       Ret.push_back(S.substr(1, E - 1));
71       S = S.substr(E + 1);
72       continue;
73     }
74 
75     // Unquoted token. This is more relaxed than tokens in C-like language,
76     // so that you can write "file-name.cpp" as one bare token, for example.
77     size_t Pos = S.find_first_not_of(
78         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
79         "0123456789_.$/\\~=+[]*?-:!<>^");
80 
81     // A character that cannot start a word (which is usually a
82     // punctuation) forms a single character token.
83     if (Pos == 0)
84       Pos = 1;
85     Ret.push_back(S.substr(0, Pos));
86     S = S.substr(Pos);
87   }
88 }
89 
90 // Skip leading whitespace characters or comments.
91 StringRef ScriptParserBase::skipSpace(StringRef S) {
92   for (;;) {
93     if (S.startswith("/*")) {
94       size_t E = S.find("*/", 2);
95       if (E == StringRef::npos) {
96         error("unclosed comment in a linker script");
97         return "";
98       }
99       S = S.substr(E + 2);
100       continue;
101     }
102     if (S.startswith("#")) {
103       size_t E = S.find('\n', 1);
104       if (E == StringRef::npos)
105         E = S.size() - 1;
106       S = S.substr(E + 1);
107       continue;
108     }
109     size_t Size = S.size();
110     S = S.ltrim();
111     if (S.size() == Size)
112       return S;
113   }
114 }
115 
116 // An erroneous token is handled as if it were the last token before EOF.
117 bool ScriptParserBase::atEOF() { return Error || Tokens.size() == Pos; }
118 
119 StringRef ScriptParserBase::next() {
120   if (Error)
121     return "";
122   if (atEOF()) {
123     setError("unexpected EOF");
124     return "";
125   }
126   return Tokens[Pos++];
127 }
128 
129 StringRef ScriptParserBase::peek() {
130   StringRef Tok = next();
131   if (Error)
132     return "";
133   --Pos;
134   return Tok;
135 }
136 
137 bool ScriptParserBase::skip(StringRef Tok) {
138   if (Error)
139     return false;
140   if (atEOF()) {
141     setError("unexpected EOF");
142     return false;
143   }
144   if (Tokens[Pos] != Tok)
145     return false;
146   ++Pos;
147   return true;
148 }
149 
150 void ScriptParserBase::expect(StringRef Expect) {
151   if (Error)
152     return;
153   StringRef Tok = next();
154   if (Tok != Expect)
155     setError(Expect + " expected, but got " + Tok);
156 }
157 
158 // Returns the current line number.
159 size_t ScriptParserBase::getPos() {
160   if (Pos == 0)
161     return 1;
162   const char *Begin = Input.data();
163   const char *Tok = Tokens[Pos - 1].data();
164   return StringRef(Begin, Tok - Begin).count('\n') + 1;
165 }
166