1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Windows-specific. 11 // A parser for the module-definition file (.def file). 12 // 13 // The format of module-definition files are described in this document: 14 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "llvm/Object/COFFModuleDefinition.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/ADT/StringSwitch.h" 21 #include "llvm/Object/COFF.h" 22 #include "llvm/Object/COFFImportFile.h" 23 #include "llvm/Object/Error.h" 24 #include "llvm/Support/Error.h" 25 #include "llvm/Support/Path.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 using namespace llvm::COFF; 29 using namespace llvm; 30 31 namespace llvm { 32 namespace object { 33 34 enum Kind { 35 Unknown, 36 Eof, 37 Identifier, 38 Comma, 39 Equal, 40 KwBase, 41 KwConstant, 42 KwData, 43 KwExports, 44 KwHeapsize, 45 KwLibrary, 46 KwName, 47 KwNoname, 48 KwPrivate, 49 KwStacksize, 50 KwVersion, 51 }; 52 53 struct Token { 54 explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {} 55 Kind K; 56 StringRef Value; 57 }; 58 59 static bool isDecorated(StringRef Sym, bool MingwDef) { 60 // In def files, the symbols can either be listed decorated or undecorated. 61 // 62 // - For cdecl symbols, only the undecorated form is allowed. 63 // - For fastcall and vectorcall symbols, both fully decorated or 64 // undecorated forms can be present. 65 // - For stdcall symbols in non-MinGW environments, the decorated form is 66 // fully decorated with leading underscore and trailing stack argument 67 // size - like "_Func@0". 68 // - In MinGW def files, a decorated stdcall symbol does not include the 69 // leading underscore though, like "Func@0". 70 71 // This function controls whether a leading underscore should be added to 72 // the given symbol name or not. For MinGW, treat a stdcall symbol name such 73 // as "Func@0" as undecorated, i.e. a leading underscore must be added. 74 // For non-MinGW, look for '@' in the whole string and consider "_Func@0" 75 // as decorated, i.e. don't add any more leading underscores. 76 // We can't check for a leading underscore here, since function names 77 // themselves can start with an underscore, while a second one still needs 78 // to be added. 79 return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") || 80 (!MingwDef && Sym.contains('@')); 81 } 82 83 static Error createError(const Twine &Err) { 84 return make_error<StringError>(StringRef(Err.str()), 85 object_error::parse_failed); 86 } 87 88 class Lexer { 89 public: 90 Lexer(StringRef S) : Buf(S) {} 91 92 Token lex() { 93 Buf = Buf.trim(); 94 if (Buf.empty()) 95 return Token(Eof); 96 97 switch (Buf[0]) { 98 case '\0': 99 return Token(Eof); 100 case ';': { 101 size_t End = Buf.find('\n'); 102 Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); 103 return lex(); 104 } 105 case '=': 106 Buf = Buf.drop_front(); 107 // GNU dlltool accepts both = and ==. 108 if (Buf.startswith("=")) 109 Buf = Buf.drop_front(); 110 return Token(Equal, "="); 111 case ',': 112 Buf = Buf.drop_front(); 113 return Token(Comma, ","); 114 case '"': { 115 StringRef S; 116 std::tie(S, Buf) = Buf.substr(1).split('"'); 117 return Token(Identifier, S); 118 } 119 default: { 120 size_t End = Buf.find_first_of("=,\r\n \t\v"); 121 StringRef Word = Buf.substr(0, End); 122 Kind K = llvm::StringSwitch<Kind>(Word) 123 .Case("BASE", KwBase) 124 .Case("CONSTANT", KwConstant) 125 .Case("DATA", KwData) 126 .Case("EXPORTS", KwExports) 127 .Case("HEAPSIZE", KwHeapsize) 128 .Case("LIBRARY", KwLibrary) 129 .Case("NAME", KwName) 130 .Case("NONAME", KwNoname) 131 .Case("PRIVATE", KwPrivate) 132 .Case("STACKSIZE", KwStacksize) 133 .Case("VERSION", KwVersion) 134 .Default(Identifier); 135 Buf = (End == Buf.npos) ? "" : Buf.drop_front(End); 136 return Token(K, Word); 137 } 138 } 139 } 140 141 private: 142 StringRef Buf; 143 }; 144 145 class Parser { 146 public: 147 explicit Parser(StringRef S, MachineTypes M, bool B) 148 : Lex(S), Machine(M), MingwDef(B) {} 149 150 Expected<COFFModuleDefinition> parse() { 151 do { 152 if (Error Err = parseOne()) 153 return std::move(Err); 154 } while (Tok.K != Eof); 155 return Info; 156 } 157 158 private: 159 void read() { 160 if (Stack.empty()) { 161 Tok = Lex.lex(); 162 return; 163 } 164 Tok = Stack.back(); 165 Stack.pop_back(); 166 } 167 168 Error readAsInt(uint64_t *I) { 169 read(); 170 if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I)) 171 return createError("integer expected"); 172 return Error::success(); 173 } 174 175 Error expect(Kind Expected, StringRef Msg) { 176 read(); 177 if (Tok.K != Expected) 178 return createError(Msg); 179 return Error::success(); 180 } 181 182 void unget() { Stack.push_back(Tok); } 183 184 Error parseOne() { 185 read(); 186 switch (Tok.K) { 187 case Eof: 188 return Error::success(); 189 case KwExports: 190 for (;;) { 191 read(); 192 if (Tok.K != Identifier) { 193 unget(); 194 return Error::success(); 195 } 196 if (Error Err = parseExport()) 197 return Err; 198 } 199 case KwHeapsize: 200 return parseNumbers(&Info.HeapReserve, &Info.HeapCommit); 201 case KwStacksize: 202 return parseNumbers(&Info.StackReserve, &Info.StackCommit); 203 case KwLibrary: 204 case KwName: { 205 bool IsDll = Tok.K == KwLibrary; // Check before parseName. 206 std::string Name; 207 if (Error Err = parseName(&Name, &Info.ImageBase)) 208 return Err; 209 210 Info.ImportName = Name; 211 212 // Set the output file, but don't override /out if it was already passed. 213 if (Info.OutputFile.empty()) { 214 Info.OutputFile = Name; 215 // Append the appropriate file extension if not already present. 216 if (!sys::path::has_extension(Name)) 217 Info.OutputFile += IsDll ? ".dll" : ".exe"; 218 } 219 220 return Error::success(); 221 } 222 case KwVersion: 223 return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion); 224 default: 225 return createError("unknown directive: " + Tok.Value); 226 } 227 } 228 229 Error parseExport() { 230 COFFShortExport E; 231 E.Name = Tok.Value; 232 read(); 233 if (Tok.K == Equal) { 234 read(); 235 if (Tok.K != Identifier) 236 return createError("identifier expected, but got " + Tok.Value); 237 E.ExtName = E.Name; 238 E.Name = Tok.Value; 239 } else { 240 unget(); 241 } 242 243 if (Machine == IMAGE_FILE_MACHINE_I386) { 244 if (!isDecorated(E.Name, MingwDef)) 245 E.Name = (std::string("_").append(E.Name)); 246 if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef)) 247 E.ExtName = (std::string("_").append(E.ExtName)); 248 } 249 250 for (;;) { 251 read(); 252 if (Tok.K == Identifier && Tok.Value[0] == '@') { 253 if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) { 254 // Not an ordinal modifier at all, but the next export (fastcall 255 // decorated) - complete the current one. 256 unget(); 257 Info.Exports.push_back(E); 258 return Error::success(); 259 } 260 read(); 261 if (Tok.K == KwNoname) { 262 E.Noname = true; 263 } else { 264 unget(); 265 } 266 continue; 267 } 268 if (Tok.K == KwData) { 269 E.Data = true; 270 continue; 271 } 272 if (Tok.K == KwConstant) { 273 E.Constant = true; 274 continue; 275 } 276 if (Tok.K == KwPrivate) { 277 E.Private = true; 278 continue; 279 } 280 unget(); 281 Info.Exports.push_back(E); 282 return Error::success(); 283 } 284 } 285 286 // HEAPSIZE/STACKSIZE reserve[,commit] 287 Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) { 288 if (Error Err = readAsInt(Reserve)) 289 return Err; 290 read(); 291 if (Tok.K != Comma) { 292 unget(); 293 Commit = nullptr; 294 return Error::success(); 295 } 296 if (Error Err = readAsInt(Commit)) 297 return Err; 298 return Error::success(); 299 } 300 301 // NAME outputPath [BASE=address] 302 Error parseName(std::string *Out, uint64_t *Baseaddr) { 303 read(); 304 if (Tok.K == Identifier) { 305 *Out = Tok.Value; 306 } else { 307 *Out = ""; 308 unget(); 309 return Error::success(); 310 } 311 read(); 312 if (Tok.K == KwBase) { 313 if (Error Err = expect(Equal, "'=' expected")) 314 return Err; 315 if (Error Err = readAsInt(Baseaddr)) 316 return Err; 317 } else { 318 unget(); 319 *Baseaddr = 0; 320 } 321 return Error::success(); 322 } 323 324 // VERSION major[.minor] 325 Error parseVersion(uint32_t *Major, uint32_t *Minor) { 326 read(); 327 if (Tok.K != Identifier) 328 return createError("identifier expected, but got " + Tok.Value); 329 StringRef V1, V2; 330 std::tie(V1, V2) = Tok.Value.split('.'); 331 if (V1.getAsInteger(10, *Major)) 332 return createError("integer expected, but got " + Tok.Value); 333 if (V2.empty()) 334 *Minor = 0; 335 else if (V2.getAsInteger(10, *Minor)) 336 return createError("integer expected, but got " + Tok.Value); 337 return Error::success(); 338 } 339 340 Lexer Lex; 341 Token Tok; 342 std::vector<Token> Stack; 343 MachineTypes Machine; 344 COFFModuleDefinition Info; 345 bool MingwDef; 346 }; 347 348 Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB, 349 MachineTypes Machine, 350 bool MingwDef) { 351 return Parser(MB.getBuffer(), Machine, MingwDef).parse(); 352 } 353 354 } // namespace object 355 } // namespace llvm 356