1 //===- ScriptParser.cpp ---------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the base parser class for linker script and dynamic
11 // list.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ScriptParser.h"
16 #include "Error.h"
17 #include "llvm/ADT/Twine.h"
18 
19 using namespace llvm;
20 using namespace lld;
21 using namespace lld::elf;
22 
23 // Returns the line that the character S[Pos] is in.
24 static StringRef getLine(StringRef S, size_t Pos) {
25   size_t Begin = S.rfind('\n', Pos);
26   size_t End = S.find('\n', Pos);
27   Begin = (Begin == StringRef::npos) ? 0 : Begin + 1;
28   if (End == StringRef::npos)
29     End = S.size();
30   // rtrim for DOS-style newlines.
31   return S.substr(Begin, End - Begin).rtrim();
32 }
33 
34 void ScriptParserBase::printErrorPos() {
35   StringRef Tok = Tokens[Pos == 0 ? 0 : Pos - 1];
36   StringRef Line = getLine(Input, Tok.data() - Input.data());
37   size_t Col = Tok.data() - Line.data();
38   error(Line);
39   error(std::string(Col, ' ') + "^");
40 }
41 
42 // We don't want to record cascading errors. Keep only the first one.
43 void ScriptParserBase::setError(const Twine &Msg) {
44   if (Error)
45     return;
46   if (Input.empty() || Tokens.empty()) {
47     error(Msg);
48   } else {
49     error("line " + Twine(getPos()) + ": " + Msg);
50     printErrorPos();
51   }
52   Error = true;
53 }
54 
55 // Split S into linker script tokens.
56 std::vector<StringRef> ScriptParserBase::tokenize(StringRef S) {
57   std::vector<StringRef> Ret;
58   for (;;) {
59     S = skipSpace(S);
60     if (S.empty())
61       return Ret;
62 
63     // Quoted token. Note that double-quote characters are parts of a token
64     // because, in a glob match context, only unquoted tokens are interpreted
65     // as glob patterns. Double-quoted tokens are literal patterns in that
66     // context.
67     if (S.startswith("\"")) {
68       size_t E = S.find("\"", 1);
69       if (E == StringRef::npos) {
70         error("unclosed quote");
71         return {};
72       }
73       Ret.push_back(S.take_front(E + 1));
74       S = S.substr(E + 1);
75       continue;
76     }
77 
78     // Unquoted token. This is more relaxed than tokens in C-like language,
79     // so that you can write "file-name.cpp" as one bare token, for example.
80     size_t Pos = S.find_first_not_of(
81         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
82         "0123456789_.$/\\~=+[]*?-:!<>^");
83 
84     // A character that cannot start a word (which is usually a
85     // punctuation) forms a single character token.
86     if (Pos == 0)
87       Pos = 1;
88     Ret.push_back(S.substr(0, Pos));
89     S = S.substr(Pos);
90   }
91 }
92 
93 // Skip leading whitespace characters or comments.
94 StringRef ScriptParserBase::skipSpace(StringRef S) {
95   for (;;) {
96     if (S.startswith("/*")) {
97       size_t E = S.find("*/", 2);
98       if (E == StringRef::npos) {
99         error("unclosed comment in a linker script");
100         return "";
101       }
102       S = S.substr(E + 2);
103       continue;
104     }
105     if (S.startswith("#")) {
106       size_t E = S.find('\n', 1);
107       if (E == StringRef::npos)
108         E = S.size() - 1;
109       S = S.substr(E + 1);
110       continue;
111     }
112     size_t Size = S.size();
113     S = S.ltrim();
114     if (S.size() == Size)
115       return S;
116   }
117 }
118 
119 // An erroneous token is handled as if it were the last token before EOF.
120 bool ScriptParserBase::atEOF() { return Error || Tokens.size() == Pos; }
121 
122 StringRef ScriptParserBase::next() {
123   if (Error)
124     return "";
125   if (atEOF()) {
126     setError("unexpected EOF");
127     return "";
128   }
129   return Tokens[Pos++];
130 }
131 
132 StringRef ScriptParserBase::peek() {
133   StringRef Tok = next();
134   if (Error)
135     return "";
136   --Pos;
137   return Tok;
138 }
139 
140 bool ScriptParserBase::skip(StringRef Tok) {
141   if (Error)
142     return false;
143   if (atEOF()) {
144     setError("unexpected EOF");
145     return false;
146   }
147   if (Tokens[Pos] != Tok)
148     return false;
149   ++Pos;
150   return true;
151 }
152 
153 void ScriptParserBase::expect(StringRef Expect) {
154   if (Error)
155     return;
156   StringRef Tok = next();
157   if (Tok != Expect)
158     setError(Expect + " expected, but got " + Tok);
159 }
160 
161 // Returns the current line number.
162 size_t ScriptParserBase::getPos() {
163   if (Pos == 0)
164     return 1;
165   const char *Begin = Input.data();
166   const char *Tok = Tokens[Pos - 1].data();
167   return StringRef(Begin, Tok - Begin).count('\n') + 1;
168 }
169