1 //===- ScriptParser.cpp ---------------------------------------------------===// 2 // 3 // The LLVM Linker 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the base parser class for linker script and dynamic 11 // list. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "ScriptParser.h" 16 #include "Error.h" 17 #include "llvm/ADT/Twine.h" 18 19 using namespace llvm; 20 using namespace lld; 21 using namespace lld::elf; 22 23 // Returns the line that the token Tok is in. 24 static StringRef getLine(StringRef Data, StringRef Tok) { 25 size_t Pos = Tok.data() - Data.data(); 26 size_t Begin = Data.rfind('\n', Pos); 27 size_t End = Data.find('\n', Pos); 28 Begin = (Begin == StringRef::npos) ? 0 : Begin + 1; 29 if (End == StringRef::npos) 30 End = Data.size(); 31 // rtrim for DOS-style newlines. 32 return Data.substr(Begin, End - Begin).rtrim(); 33 } 34 35 static std::pair<size_t, size_t> getPos(StringRef Data, StringRef Tok) { 36 StringRef Line = getLine(Data, Tok); 37 size_t LineNo = 38 StringRef(Data.data(), Tok.data() - Data.data()).count('\n') + 1; 39 return {LineNo, Tok.data() - Line.data()}; 40 } 41 42 ScriptParserBase::ScriptParserBase(MemoryBufferRef MB) { tokenize(MB); } 43 44 // We don't want to record cascading errors. Keep only the first one. 45 void ScriptParserBase::setError(const Twine &Msg) { 46 if (Error) 47 return; 48 49 std::pair<size_t, size_t> ErrPos; 50 MemoryBufferRef MB = currentBuffer(); 51 std::string Location = MB.getBufferIdentifier(); 52 if (Pos) { 53 ErrPos = getPos(MB.getBuffer(), Tokens[Pos - 1]); 54 Location += ":"; 55 Location += std::to_string(ErrPos.first); 56 } 57 error(Location + ": " + Msg); 58 if (Pos) { 59 error(Location + ": " + getLine(MB.getBuffer(), Tokens[Pos - 1])); 60 error(Location + ": " + std::string(ErrPos.second, ' ') + "^"); 61 } 62 63 Error = true; 64 } 65 66 // Split S into linker script tokens. 67 void ScriptParserBase::tokenize(MemoryBufferRef MB) { 68 std::vector<StringRef> Ret; 69 MBs.push_back(MB); 70 StringRef S = MB.getBuffer(); 71 StringRef Begin = S; 72 for (;;) { 73 S = skipSpace(S); 74 if (S.empty()) 75 break; 76 77 // Quoted token. Note that double-quote characters are parts of a token 78 // because, in a glob match context, only unquoted tokens are interpreted 79 // as glob patterns. Double-quoted tokens are literal patterns in that 80 // context. 81 if (S.startswith("\"")) { 82 size_t E = S.find("\"", 1); 83 if (E == StringRef::npos) { 84 auto ErrPos = getPos(Begin, S); 85 error(MB.getBufferIdentifier() + ":" + Twine(ErrPos.first) + 86 ": unclosed quote"); 87 return; 88 } 89 Ret.push_back(S.take_front(E + 1)); 90 S = S.substr(E + 1); 91 continue; 92 } 93 94 // Unquoted token. This is more relaxed than tokens in C-like language, 95 // so that you can write "file-name.cpp" as one bare token, for example. 96 size_t Pos = S.find_first_not_of( 97 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" 98 "0123456789_.$/\\~=+[]*?-:!<>^"); 99 100 // A character that cannot start a word (which is usually a 101 // punctuation) forms a single character token. 102 if (Pos == 0) 103 Pos = 1; 104 Ret.push_back(S.substr(0, Pos)); 105 S = S.substr(Pos); 106 } 107 Tokens.insert(Tokens.begin() + Pos, Ret.begin(), Ret.end()); 108 } 109 110 // Skip leading whitespace characters or comments. 111 StringRef ScriptParserBase::skipSpace(StringRef S) { 112 for (;;) { 113 if (S.startswith("/*")) { 114 size_t E = S.find("*/", 2); 115 if (E == StringRef::npos) { 116 error("unclosed comment in a linker script"); 117 return ""; 118 } 119 S = S.substr(E + 2); 120 continue; 121 } 122 if (S.startswith("#")) { 123 size_t E = S.find('\n', 1); 124 if (E == StringRef::npos) 125 E = S.size() - 1; 126 S = S.substr(E + 1); 127 continue; 128 } 129 size_t Size = S.size(); 130 S = S.ltrim(); 131 if (S.size() == Size) 132 return S; 133 } 134 } 135 136 // An erroneous token is handled as if it were the last token before EOF. 137 bool ScriptParserBase::atEOF() { return Error || Tokens.size() == Pos; } 138 139 StringRef ScriptParserBase::next() { 140 if (Error) 141 return ""; 142 if (atEOF()) { 143 setError("unexpected EOF"); 144 return ""; 145 } 146 return Tokens[Pos++]; 147 } 148 149 StringRef ScriptParserBase::peek() { 150 StringRef Tok = next(); 151 if (Error) 152 return ""; 153 --Pos; 154 return Tok; 155 } 156 157 bool ScriptParserBase::consume(StringRef Tok) { 158 if (peek() == Tok) { 159 skip(); 160 return true; 161 } 162 return false; 163 } 164 165 void ScriptParserBase::skip() { (void)next(); } 166 167 void ScriptParserBase::expect(StringRef Expect) { 168 if (Error) 169 return; 170 StringRef Tok = next(); 171 if (Tok != Expect) 172 setError(Expect + " expected, but got " + Tok); 173 } 174 175 // Returns true if string 'Bigger' contains string 'Shorter'. 176 static bool containsString(StringRef Bigger, StringRef Shorter) { 177 const char *BiggerEnd = Bigger.data() + Bigger.size(); 178 const char *ShorterEnd = Shorter.data() + Shorter.size(); 179 180 return Bigger.data() <= Shorter.data() && BiggerEnd >= ShorterEnd; 181 } 182 183 MemoryBufferRef ScriptParserBase::currentBuffer() { 184 // Find input buffer containing the current token. 185 assert(!MBs.empty()); 186 if (Pos) 187 for (MemoryBufferRef MB : MBs) 188 if (containsString(MB.getBuffer(), Tokens[Pos - 1])) 189 return MB; 190 191 return MBs.front(); 192 } 193