1 //===- ScriptParser.cpp ---------------------------------------------------===//
2 //
3 //                             The LLVM Linker
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains the base parser class for linker script and dynamic
11 // list.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "ScriptParser.h"
16 #include "Error.h"
17 
18 using namespace llvm;
19 using namespace lld;
20 using namespace lld::elf;
21 
22 // Returns the line that the character S[Pos] is in.
23 static StringRef getLine(StringRef S, size_t Pos) {
24   size_t Begin = S.rfind('\n', Pos);
25   size_t End = S.find('\n', Pos);
26   Begin = (Begin == StringRef::npos) ? 0 : Begin + 1;
27   if (End == StringRef::npos)
28     End = S.size();
29   // rtrim for DOS-style newlines.
30   return S.substr(Begin, End - Begin).rtrim();
31 }
32 
33 void ScriptParserBase::printErrorPos() {
34   StringRef Tok = Tokens[Pos == 0 ? 0 : Pos - 1];
35   StringRef Line = getLine(Input, Tok.data() - Input.data());
36   size_t Col = Tok.data() - Line.data();
37   error(Line);
38   error(std::string(Col, ' ') + "^");
39 }
40 
41 // We don't want to record cascading errors. Keep only the first one.
42 void ScriptParserBase::setError(const Twine &Msg) {
43   if (Error)
44     return;
45   error("line " + Twine(getPos()) + ": " + Msg);
46   printErrorPos();
47   Error = true;
48 }
49 
50 // Split S into linker script tokens.
51 std::vector<StringRef> ScriptParserBase::tokenize(StringRef S) {
52   std::vector<StringRef> Ret;
53   for (;;) {
54     S = skipSpace(S);
55     if (S.empty())
56       return Ret;
57 
58     // Quoted token
59     if (S.startswith("\"")) {
60       size_t E = S.find("\"", 1);
61       if (E == StringRef::npos) {
62         error("unclosed quote");
63         return {};
64       }
65       Ret.push_back(S.substr(1, E - 1));
66       S = S.substr(E + 1);
67       continue;
68     }
69 
70     // Unquoted token
71     size_t Pos = S.find_first_not_of(
72         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
73         "0123456789_.$/\\~=+[]*?-:");
74     // A character that cannot start a word (which is usually a
75     // punctuation) forms a single character token.
76     if (Pos == 0)
77       Pos = 1;
78     Ret.push_back(S.substr(0, Pos));
79     S = S.substr(Pos);
80   }
81 }
82 
83 // Skip leading whitespace characters or /**/-style comments.
84 StringRef ScriptParserBase::skipSpace(StringRef S) {
85   for (;;) {
86     if (S.startswith("/*")) {
87       size_t E = S.find("*/", 2);
88       if (E == StringRef::npos) {
89         error("unclosed comment in a linker script");
90         return "";
91       }
92       S = S.substr(E + 2);
93       continue;
94     }
95     size_t Size = S.size();
96     S = S.ltrim();
97     if (S.size() == Size)
98       return S;
99   }
100 }
101 
102 // An erroneous token is handled as if it were the last token before EOF.
103 bool ScriptParserBase::atEOF() { return Error || Tokens.size() == Pos; }
104 
105 StringRef ScriptParserBase::next() {
106   if (Error)
107     return "";
108   if (atEOF()) {
109     setError("unexpected EOF");
110     return "";
111   }
112   return Tokens[Pos++];
113 }
114 
115 StringRef ScriptParserBase::peek() {
116   StringRef Tok = next();
117   if (Error)
118     return "";
119   --Pos;
120   return Tok;
121 }
122 
123 bool ScriptParserBase::skip(StringRef Tok) {
124   if (Error)
125     return false;
126   if (atEOF()) {
127     setError("unexpected EOF");
128     return false;
129   }
130   if (Tokens[Pos] != Tok)
131     return false;
132   ++Pos;
133   return true;
134 }
135 
136 void ScriptParserBase::expect(StringRef Expect) {
137   if (Error)
138     return;
139   StringRef Tok = next();
140   if (Tok != Expect)
141     setError(Expect + " expected, but got " + Tok);
142 }
143 
144 // Returns the current line number.
145 size_t ScriptParserBase::getPos() {
146   if (Pos == 0)
147     return 1;
148   const char *Begin = Input.data();
149   const char *Tok = Tokens[Pos - 1].data();
150   return StringRef(Begin, Tok - Begin).count('\n') + 1;
151 }
152 
153 std::vector<uint8_t> ScriptParserBase::parseHex(StringRef S) {
154   std::vector<uint8_t> Hex;
155   while (!S.empty()) {
156     StringRef B = S.substr(0, 2);
157     S = S.substr(2);
158     uint8_t H;
159     if (B.getAsInteger(16, H)) {
160       setError("not a hexadecimal value: " + B);
161       return {};
162     }
163     Hex.push_back(H);
164   }
165   return Hex;
166 }
167