1 //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Windows-specific.
11 // A parser for the module-definition file (.def file).
12 //
13 // The format of module-definition files are described in this document:
14 // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/Object/COFFModuleDefinition.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/StringSwitch.h"
21 #include "llvm/Object/COFF.h"
22 #include "llvm/Object/COFFImportFile.h"
23 #include "llvm/Object/Error.h"
24 #include "llvm/Support/Error.h"
25 #include "llvm/Support/Path.h"
26 #include "llvm/Support/raw_ostream.h"
27 
28 using namespace llvm::COFF;
29 using namespace llvm;
30 
31 namespace llvm {
32 namespace object {
33 
34 enum Kind {
35   Unknown,
36   Eof,
37   Identifier,
38   Comma,
39   Equal,
40   KwBase,
41   KwConstant,
42   KwData,
43   KwExports,
44   KwHeapsize,
45   KwLibrary,
46   KwName,
47   KwNoname,
48   KwPrivate,
49   KwStacksize,
50   KwVersion,
51 };
52 
53 struct Token {
54   explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
55   Kind K;
56   StringRef Value;
57 };
58 
59 static bool isDecorated(StringRef Sym, bool MingwDef) {
60   // In def files, the symbols can either be listed decorated or undecorated.
61   //
62   // - For cdecl symbols, only the undecorated form is allowed.
63   // - For fastcall and vectorcall symbols, both fully decorated or
64   //   undecorated forms can be present.
65   // - For stdcall symbols in non-MinGW environments, the decorated form is
66   //   fully decorated with leading underscore and trailing stack argument
67   //   size - like "_Func@0".
68   // - In MinGW def files, a decorated stdcall symbol does not include the
69   //   leading underscore though, like "Func@0".
70 
71   // This function controls whether a leading underscore should be added to
72   // the given symbol name or not. For MinGW, treat a stdcall symbol name such
73   // as "Func@0" as undecorated, i.e. a leading underscore must be added.
74   // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
75   // as decorated, i.e. don't add any more leading underscores.
76   // We can't check for a leading underscore here, since function names
77   // themselves can start with an underscore, while a second one still needs
78   // to be added.
79   return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") ||
80          (!MingwDef && Sym.contains('@'));
81 }
82 
83 static Error createError(const Twine &Err) {
84   return make_error<StringError>(StringRef(Err.str()),
85                                  object_error::parse_failed);
86 }
87 
88 class Lexer {
89 public:
90   Lexer(StringRef S) : Buf(S) {}
91 
92   Token lex() {
93     Buf = Buf.trim();
94     if (Buf.empty())
95       return Token(Eof);
96 
97     switch (Buf[0]) {
98     case '\0':
99       return Token(Eof);
100     case ';': {
101       size_t End = Buf.find('\n');
102       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
103       return lex();
104     }
105     case '=':
106       Buf = Buf.drop_front();
107       // GNU dlltool accepts both = and ==.
108       if (Buf.startswith("="))
109         Buf = Buf.drop_front();
110       return Token(Equal, "=");
111     case ',':
112       Buf = Buf.drop_front();
113       return Token(Comma, ",");
114     case '"': {
115       StringRef S;
116       std::tie(S, Buf) = Buf.substr(1).split('"');
117       return Token(Identifier, S);
118     }
119     default: {
120       size_t End = Buf.find_first_of("=,\r\n \t\v");
121       StringRef Word = Buf.substr(0, End);
122       Kind K = llvm::StringSwitch<Kind>(Word)
123                    .Case("BASE", KwBase)
124                    .Case("CONSTANT", KwConstant)
125                    .Case("DATA", KwData)
126                    .Case("EXPORTS", KwExports)
127                    .Case("HEAPSIZE", KwHeapsize)
128                    .Case("LIBRARY", KwLibrary)
129                    .Case("NAME", KwName)
130                    .Case("NONAME", KwNoname)
131                    .Case("PRIVATE", KwPrivate)
132                    .Case("STACKSIZE", KwStacksize)
133                    .Case("VERSION", KwVersion)
134                    .Default(Identifier);
135       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
136       return Token(K, Word);
137     }
138     }
139   }
140 
141 private:
142   StringRef Buf;
143 };
144 
145 class Parser {
146 public:
147   explicit Parser(StringRef S, MachineTypes M, bool B)
148       : Lex(S), Machine(M), MingwDef(B) {}
149 
150   Expected<COFFModuleDefinition> parse() {
151     do {
152       if (Error Err = parseOne())
153         return std::move(Err);
154     } while (Tok.K != Eof);
155     return Info;
156   }
157 
158 private:
159   void read() {
160     if (Stack.empty()) {
161       Tok = Lex.lex();
162       return;
163     }
164     Tok = Stack.back();
165     Stack.pop_back();
166   }
167 
168   Error readAsInt(uint64_t *I) {
169     read();
170     if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
171       return createError("integer expected");
172     return Error::success();
173   }
174 
175   Error expect(Kind Expected, StringRef Msg) {
176     read();
177     if (Tok.K != Expected)
178       return createError(Msg);
179     return Error::success();
180   }
181 
182   void unget() { Stack.push_back(Tok); }
183 
184   Error parseOne() {
185     read();
186     switch (Tok.K) {
187     case Eof:
188       return Error::success();
189     case KwExports:
190       for (;;) {
191         read();
192         if (Tok.K != Identifier) {
193           unget();
194           return Error::success();
195         }
196         if (Error Err = parseExport())
197           return Err;
198       }
199     case KwHeapsize:
200       return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
201     case KwStacksize:
202       return parseNumbers(&Info.StackReserve, &Info.StackCommit);
203     case KwLibrary:
204     case KwName: {
205       bool IsDll = Tok.K == KwLibrary; // Check before parseName.
206       std::string Name;
207       if (Error Err = parseName(&Name, &Info.ImageBase))
208         return Err;
209 
210       Info.ImportName = Name;
211 
212       // Set the output file, but don't override /out if it was already passed.
213       if (Info.OutputFile.empty()) {
214         Info.OutputFile = Name;
215         // Append the appropriate file extension if not already present.
216         if (!sys::path::has_extension(Name))
217           Info.OutputFile += IsDll ? ".dll" : ".exe";
218       }
219 
220       return Error::success();
221     }
222     case KwVersion:
223       return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
224     default:
225       return createError("unknown directive: " + Tok.Value);
226     }
227   }
228 
229   Error parseExport() {
230     COFFShortExport E;
231     E.Name = Tok.Value;
232     read();
233     if (Tok.K == Equal) {
234       read();
235       if (Tok.K != Identifier)
236         return createError("identifier expected, but got " + Tok.Value);
237       E.ExtName = E.Name;
238       E.Name = Tok.Value;
239     } else {
240       unget();
241     }
242 
243     if (Machine == IMAGE_FILE_MACHINE_I386) {
244       if (!isDecorated(E.Name, MingwDef))
245         E.Name = (std::string("_").append(E.Name));
246       if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
247         E.ExtName = (std::string("_").append(E.ExtName));
248     }
249 
250     for (;;) {
251       read();
252       if (Tok.K == Identifier && Tok.Value[0] == '@') {
253         if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
254           // Not an ordinal modifier at all, but the next export (fastcall
255           // decorated) - complete the current one.
256           unget();
257           Info.Exports.push_back(E);
258           return Error::success();
259         }
260         read();
261         if (Tok.K == KwNoname) {
262           E.Noname = true;
263         } else {
264           unget();
265         }
266         continue;
267       }
268       if (Tok.K == KwData) {
269         E.Data = true;
270         continue;
271       }
272       if (Tok.K == KwConstant) {
273         E.Constant = true;
274         continue;
275       }
276       if (Tok.K == KwPrivate) {
277         E.Private = true;
278         continue;
279       }
280       unget();
281       Info.Exports.push_back(E);
282       return Error::success();
283     }
284   }
285 
286   // HEAPSIZE/STACKSIZE reserve[,commit]
287   Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
288     if (Error Err = readAsInt(Reserve))
289       return Err;
290     read();
291     if (Tok.K != Comma) {
292       unget();
293       Commit = nullptr;
294       return Error::success();
295     }
296     if (Error Err = readAsInt(Commit))
297       return Err;
298     return Error::success();
299   }
300 
301   // NAME outputPath [BASE=address]
302   Error parseName(std::string *Out, uint64_t *Baseaddr) {
303     read();
304     if (Tok.K == Identifier) {
305       *Out = Tok.Value;
306     } else {
307       *Out = "";
308       unget();
309       return Error::success();
310     }
311     read();
312     if (Tok.K == KwBase) {
313       if (Error Err = expect(Equal, "'=' expected"))
314         return Err;
315       if (Error Err = readAsInt(Baseaddr))
316         return Err;
317     } else {
318       unget();
319       *Baseaddr = 0;
320     }
321     return Error::success();
322   }
323 
324   // VERSION major[.minor]
325   Error parseVersion(uint32_t *Major, uint32_t *Minor) {
326     read();
327     if (Tok.K != Identifier)
328       return createError("identifier expected, but got " + Tok.Value);
329     StringRef V1, V2;
330     std::tie(V1, V2) = Tok.Value.split('.');
331     if (V1.getAsInteger(10, *Major))
332       return createError("integer expected, but got " + Tok.Value);
333     if (V2.empty())
334       *Minor = 0;
335     else if (V2.getAsInteger(10, *Minor))
336       return createError("integer expected, but got " + Tok.Value);
337     return Error::success();
338   }
339 
340   Lexer Lex;
341   Token Tok;
342   std::vector<Token> Stack;
343   MachineTypes Machine;
344   COFFModuleDefinition Info;
345   bool MingwDef;
346 };
347 
348 Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
349                                                          MachineTypes Machine,
350                                                          bool MingwDef) {
351   return Parser(MB.getBuffer(), Machine, MingwDef).parse();
352 }
353 
354 } // namespace object
355 } // namespace llvm
356