1 //===- ScriptParser.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the base parser class for linker script and dynamic 11 // list. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ScriptParser.h" 16 #include "Error.h" 17 #include "llvm/ADT/Twine.h" 18 19 using namespace llvm; 20 using namespace lld; 21 using namespace lld::elf; 22 23 // Returns a whole line containing the current token. 24 StringRef ScriptParserBase::getLine() { 25 StringRef S = getCurrentMB().getBuffer(); 26 StringRef Tok = Tokens[Pos - 1]; 27 28 size_t Pos = S.rfind('\n', Tok.data() - S.data()); 29 if (Pos != StringRef::npos) 30 S = S.substr(Pos + 1); 31 return S.substr(0, S.find_first_of("\r\n")); 32 } 33 34 // Returns 1-based line number of the current token. 35 size_t ScriptParserBase::getLineNumber() { 36 StringRef S = getCurrentMB().getBuffer(); 37 StringRef Tok = Tokens[Pos - 1]; 38 return S.substr(0, Tok.data() - S.data()).count('\n') + 1; 39 } 40 41 // Returns 0-based column number of the current token. 42 size_t ScriptParserBase::getColumnNumber() { 43 StringRef Tok = Tokens[Pos - 1]; 44 return Tok.data() - getLine().data(); 45 } 46 47 std::string ScriptParserBase::getCurrentLocation() { 48 std::string Filename = getCurrentMB().getBufferIdentifier(); 49 if (!Pos) 50 return Filename; 51 return (Filename + ":" + Twine(getLineNumber())).str(); 52 } 53 54 ScriptParserBase::ScriptParserBase(MemoryBufferRef MB) { tokenize(MB); } 55 56 // We don't want to record cascading errors. Keep only the first one. 57 void ScriptParserBase::setError(const Twine &Msg) { 58 if (Error) 59 return; 60 Error = true; 61 62 if (!Pos) { 63 error(getCurrentLocation() + ": " + Msg); 64 return; 65 } 66 67 std::string S = getCurrentLocation() + ": "; 68 error(S + Msg); 69 error(S + getLine()); 70 error(S + std::string(getColumnNumber(), ' ') + "^"); 71 } 72 73 // Split S into linker script tokens. 74 void ScriptParserBase::tokenize(MemoryBufferRef MB) { 75 std::vector<StringRef> Vec; 76 MBs.push_back(MB); 77 StringRef S = MB.getBuffer(); 78 StringRef Begin = S; 79 80 for (;;) { 81 S = skipSpace(S); 82 if (S.empty()) 83 break; 84 85 // Quoted token. Note that double-quote characters are parts of a token 86 // because, in a glob match context, only unquoted tokens are interpreted 87 // as glob patterns. Double-quoted tokens are literal patterns in that 88 // context. 89 if (S.startswith("\"")) { 90 size_t E = S.find("\"", 1); 91 if (E == StringRef::npos) { 92 StringRef Filename = MB.getBufferIdentifier(); 93 size_t Lineno = Begin.substr(0, S.data() - Begin.data()).count('\n'); 94 error(Filename + ":" + Twine(Lineno + 1) + ": unclosed quote"); 95 return; 96 } 97 98 Vec.push_back(S.take_front(E + 1)); 99 S = S.substr(E + 1); 100 continue; 101 } 102 103 // Unquoted token. This is more relaxed than tokens in C-like language, 104 // so that you can write "file-name.cpp" as one bare token, for example. 105 size_t Pos = S.find_first_not_of( 106 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" 107 "0123456789_.$/\\~=+[]*?-:!<>^"); 108 109 // A character that cannot start a word (which is usually a 110 // punctuation) forms a single character token. 111 if (Pos == 0) 112 Pos = 1; 113 Vec.push_back(S.substr(0, Pos)); 114 S = S.substr(Pos); 115 } 116 117 Tokens.insert(Tokens.begin() + Pos, Vec.begin(), Vec.end()); 118 } 119 120 // Skip leading whitespace characters or comments. 121 StringRef ScriptParserBase::skipSpace(StringRef S) { 122 for (;;) { 123 if (S.startswith("/*")) { 124 size_t E = S.find("*/", 2); 125 if (E == StringRef::npos) { 126 error("unclosed comment in a linker script"); 127 return ""; 128 } 129 S = S.substr(E + 2); 130 continue; 131 } 132 if (S.startswith("#")) { 133 size_t E = S.find('\n', 1); 134 if (E == StringRef::npos) 135 E = S.size() - 1; 136 S = S.substr(E + 1); 137 continue; 138 } 139 size_t Size = S.size(); 140 S = S.ltrim(); 141 if (S.size() == Size) 142 return S; 143 } 144 } 145 146 // An erroneous token is handled as if it were the last token before EOF. 147 bool ScriptParserBase::atEOF() { return Error || Tokens.size() == Pos; } 148 149 StringRef ScriptParserBase::next() { 150 if (Error) 151 return ""; 152 if (atEOF()) { 153 setError("unexpected EOF"); 154 return ""; 155 } 156 return Tokens[Pos++]; 157 } 158 159 StringRef ScriptParserBase::peek() { 160 StringRef Tok = next(); 161 if (Error) 162 return ""; 163 --Pos; 164 return Tok; 165 } 166 167 bool ScriptParserBase::consume(StringRef Tok) { 168 if (peek() == Tok) { 169 skip(); 170 return true; 171 } 172 return false; 173 } 174 175 void ScriptParserBase::skip() { (void)next(); } 176 177 void ScriptParserBase::expect(StringRef Expect) { 178 if (Error) 179 return; 180 StringRef Tok = next(); 181 if (Tok != Expect) 182 setError(Expect + " expected, but got " + Tok); 183 } 184 185 // Returns true if S encloses T. 186 static bool encloses(StringRef S, StringRef T) { 187 return S.bytes_begin() <= T.bytes_begin() && T.bytes_end() <= S.bytes_end(); 188 } 189 190 MemoryBufferRef ScriptParserBase::getCurrentMB() { 191 // Find input buffer containing the current token. 192 assert(!MBs.empty()); 193 if (!Pos) 194 return MBs[0]; 195 196 for (MemoryBufferRef MB : MBs) 197 if (encloses(MB.getBuffer(), Tokens[Pos - 1])) 198 return MB; 199 llvm_unreachable("getCurrentMB: failed to find a token"); 200 } 201