1146eb7a6SReid Kleckner //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2146eb7a6SReid Kleckner //
3146eb7a6SReid Kleckner //                     The LLVM Compiler Infrastructure
4146eb7a6SReid Kleckner //
5146eb7a6SReid Kleckner // This file is distributed under the University of Illinois Open Source
6146eb7a6SReid Kleckner // License. See LICENSE.TXT for details.
7146eb7a6SReid Kleckner //
8146eb7a6SReid Kleckner //===----------------------------------------------------------------------===//
9146eb7a6SReid Kleckner //
10146eb7a6SReid Kleckner // Windows-specific.
11146eb7a6SReid Kleckner // A parser for the module-definition file (.def file).
12146eb7a6SReid Kleckner //
13146eb7a6SReid Kleckner // The format of module-definition files are described in this document:
14146eb7a6SReid Kleckner // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
15146eb7a6SReid Kleckner //
16146eb7a6SReid Kleckner //===----------------------------------------------------------------------===//
17146eb7a6SReid Kleckner 
18146eb7a6SReid Kleckner #include "llvm/Object/COFFModuleDefinition.h"
19146eb7a6SReid Kleckner #include "llvm/ADT/StringRef.h"
20146eb7a6SReid Kleckner #include "llvm/ADT/StringSwitch.h"
21146eb7a6SReid Kleckner #include "llvm/Object/COFF.h"
22146eb7a6SReid Kleckner #include "llvm/Object/COFFImportFile.h"
23146eb7a6SReid Kleckner #include "llvm/Object/Error.h"
24146eb7a6SReid Kleckner #include "llvm/Support/Error.h"
25e234901aSSaleem Abdulrasool #include "llvm/Support/Path.h"
26146eb7a6SReid Kleckner #include "llvm/Support/raw_ostream.h"
27146eb7a6SReid Kleckner 
28146eb7a6SReid Kleckner using namespace llvm::COFF;
29146eb7a6SReid Kleckner using namespace llvm;
30146eb7a6SReid Kleckner 
31146eb7a6SReid Kleckner namespace llvm {
32146eb7a6SReid Kleckner namespace object {
33146eb7a6SReid Kleckner 
34146eb7a6SReid Kleckner enum Kind {
35146eb7a6SReid Kleckner   Unknown,
36146eb7a6SReid Kleckner   Eof,
37146eb7a6SReid Kleckner   Identifier,
38146eb7a6SReid Kleckner   Comma,
39146eb7a6SReid Kleckner   Equal,
40*284ab80fSMartin Storsjo   EqualEqual,
41146eb7a6SReid Kleckner   KwBase,
42146eb7a6SReid Kleckner   KwConstant,
43146eb7a6SReid Kleckner   KwData,
44146eb7a6SReid Kleckner   KwExports,
45146eb7a6SReid Kleckner   KwHeapsize,
46146eb7a6SReid Kleckner   KwLibrary,
47146eb7a6SReid Kleckner   KwName,
48146eb7a6SReid Kleckner   KwNoname,
49146eb7a6SReid Kleckner   KwPrivate,
50146eb7a6SReid Kleckner   KwStacksize,
51146eb7a6SReid Kleckner   KwVersion,
52146eb7a6SReid Kleckner };
53146eb7a6SReid Kleckner 
54146eb7a6SReid Kleckner struct Token {
55146eb7a6SReid Kleckner   explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
56146eb7a6SReid Kleckner   Kind K;
57146eb7a6SReid Kleckner   StringRef Value;
58146eb7a6SReid Kleckner };
59146eb7a6SReid Kleckner 
601079ef8dSMartell Malone static bool isDecorated(StringRef Sym, bool MingwDef) {
61843cbbddSMartin Storsjo   // In def files, the symbols can either be listed decorated or undecorated.
62843cbbddSMartin Storsjo   //
63843cbbddSMartin Storsjo   // - For cdecl symbols, only the undecorated form is allowed.
64843cbbddSMartin Storsjo   // - For fastcall and vectorcall symbols, both fully decorated or
65843cbbddSMartin Storsjo   //   undecorated forms can be present.
66843cbbddSMartin Storsjo   // - For stdcall symbols in non-MinGW environments, the decorated form is
67843cbbddSMartin Storsjo   //   fully decorated with leading underscore and trailing stack argument
68843cbbddSMartin Storsjo   //   size - like "_Func@0".
69843cbbddSMartin Storsjo   // - In MinGW def files, a decorated stdcall symbol does not include the
70843cbbddSMartin Storsjo   //   leading underscore though, like "Func@0".
71843cbbddSMartin Storsjo 
72843cbbddSMartin Storsjo   // This function controls whether a leading underscore should be added to
73843cbbddSMartin Storsjo   // the given symbol name or not. For MinGW, treat a stdcall symbol name such
74843cbbddSMartin Storsjo   // as "Func@0" as undecorated, i.e. a leading underscore must be added.
75843cbbddSMartin Storsjo   // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
76843cbbddSMartin Storsjo   // as decorated, i.e. don't add any more leading underscores.
77843cbbddSMartin Storsjo   // We can't check for a leading underscore here, since function names
78843cbbddSMartin Storsjo   // themselves can start with an underscore, while a second one still needs
79843cbbddSMartin Storsjo   // to be added.
80843cbbddSMartin Storsjo   return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") ||
81843cbbddSMartin Storsjo          (!MingwDef && Sym.contains('@'));
82146eb7a6SReid Kleckner }
83146eb7a6SReid Kleckner 
84146eb7a6SReid Kleckner static Error createError(const Twine &Err) {
85146eb7a6SReid Kleckner   return make_error<StringError>(StringRef(Err.str()),
86146eb7a6SReid Kleckner                                  object_error::parse_failed);
87146eb7a6SReid Kleckner }
88146eb7a6SReid Kleckner 
89146eb7a6SReid Kleckner class Lexer {
90146eb7a6SReid Kleckner public:
91146eb7a6SReid Kleckner   Lexer(StringRef S) : Buf(S) {}
92146eb7a6SReid Kleckner 
93146eb7a6SReid Kleckner   Token lex() {
94146eb7a6SReid Kleckner     Buf = Buf.trim();
95146eb7a6SReid Kleckner     if (Buf.empty())
96146eb7a6SReid Kleckner       return Token(Eof);
97146eb7a6SReid Kleckner 
98146eb7a6SReid Kleckner     switch (Buf[0]) {
99146eb7a6SReid Kleckner     case '\0':
100146eb7a6SReid Kleckner       return Token(Eof);
101146eb7a6SReid Kleckner     case ';': {
102146eb7a6SReid Kleckner       size_t End = Buf.find('\n');
103146eb7a6SReid Kleckner       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
104146eb7a6SReid Kleckner       return lex();
105146eb7a6SReid Kleckner     }
106146eb7a6SReid Kleckner     case '=':
107146eb7a6SReid Kleckner       Buf = Buf.drop_front();
108*284ab80fSMartin Storsjo       if (Buf.startswith("=")) {
1091079ef8dSMartell Malone         Buf = Buf.drop_front();
110*284ab80fSMartin Storsjo         return Token(EqualEqual, "==");
111*284ab80fSMartin Storsjo       }
112146eb7a6SReid Kleckner       return Token(Equal, "=");
113146eb7a6SReid Kleckner     case ',':
114146eb7a6SReid Kleckner       Buf = Buf.drop_front();
115146eb7a6SReid Kleckner       return Token(Comma, ",");
116146eb7a6SReid Kleckner     case '"': {
117146eb7a6SReid Kleckner       StringRef S;
118146eb7a6SReid Kleckner       std::tie(S, Buf) = Buf.substr(1).split('"');
119146eb7a6SReid Kleckner       return Token(Identifier, S);
120146eb7a6SReid Kleckner     }
121146eb7a6SReid Kleckner     default: {
122efb5024eSRui Ueyama       size_t End = Buf.find_first_of("=,;\r\n \t\v");
123146eb7a6SReid Kleckner       StringRef Word = Buf.substr(0, End);
124146eb7a6SReid Kleckner       Kind K = llvm::StringSwitch<Kind>(Word)
125146eb7a6SReid Kleckner                    .Case("BASE", KwBase)
126146eb7a6SReid Kleckner                    .Case("CONSTANT", KwConstant)
127146eb7a6SReid Kleckner                    .Case("DATA", KwData)
128146eb7a6SReid Kleckner                    .Case("EXPORTS", KwExports)
129146eb7a6SReid Kleckner                    .Case("HEAPSIZE", KwHeapsize)
130146eb7a6SReid Kleckner                    .Case("LIBRARY", KwLibrary)
131146eb7a6SReid Kleckner                    .Case("NAME", KwName)
132146eb7a6SReid Kleckner                    .Case("NONAME", KwNoname)
133146eb7a6SReid Kleckner                    .Case("PRIVATE", KwPrivate)
134146eb7a6SReid Kleckner                    .Case("STACKSIZE", KwStacksize)
135146eb7a6SReid Kleckner                    .Case("VERSION", KwVersion)
136146eb7a6SReid Kleckner                    .Default(Identifier);
137146eb7a6SReid Kleckner       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
138146eb7a6SReid Kleckner       return Token(K, Word);
139146eb7a6SReid Kleckner     }
140146eb7a6SReid Kleckner     }
141146eb7a6SReid Kleckner   }
142146eb7a6SReid Kleckner 
143146eb7a6SReid Kleckner private:
144146eb7a6SReid Kleckner   StringRef Buf;
145146eb7a6SReid Kleckner };
146146eb7a6SReid Kleckner 
147146eb7a6SReid Kleckner class Parser {
148146eb7a6SReid Kleckner public:
1491079ef8dSMartell Malone   explicit Parser(StringRef S, MachineTypes M, bool B)
1501079ef8dSMartell Malone       : Lex(S), Machine(M), MingwDef(B) {}
151146eb7a6SReid Kleckner 
152146eb7a6SReid Kleckner   Expected<COFFModuleDefinition> parse() {
153146eb7a6SReid Kleckner     do {
154146eb7a6SReid Kleckner       if (Error Err = parseOne())
155146eb7a6SReid Kleckner         return std::move(Err);
156146eb7a6SReid Kleckner     } while (Tok.K != Eof);
157146eb7a6SReid Kleckner     return Info;
158146eb7a6SReid Kleckner   }
159146eb7a6SReid Kleckner 
160146eb7a6SReid Kleckner private:
161146eb7a6SReid Kleckner   void read() {
162146eb7a6SReid Kleckner     if (Stack.empty()) {
163146eb7a6SReid Kleckner       Tok = Lex.lex();
164146eb7a6SReid Kleckner       return;
165146eb7a6SReid Kleckner     }
166146eb7a6SReid Kleckner     Tok = Stack.back();
167146eb7a6SReid Kleckner     Stack.pop_back();
168146eb7a6SReid Kleckner   }
169146eb7a6SReid Kleckner 
170146eb7a6SReid Kleckner   Error readAsInt(uint64_t *I) {
171146eb7a6SReid Kleckner     read();
172146eb7a6SReid Kleckner     if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
173146eb7a6SReid Kleckner       return createError("integer expected");
174146eb7a6SReid Kleckner     return Error::success();
175146eb7a6SReid Kleckner   }
176146eb7a6SReid Kleckner 
177146eb7a6SReid Kleckner   Error expect(Kind Expected, StringRef Msg) {
178146eb7a6SReid Kleckner     read();
179146eb7a6SReid Kleckner     if (Tok.K != Expected)
180146eb7a6SReid Kleckner       return createError(Msg);
181146eb7a6SReid Kleckner     return Error::success();
182146eb7a6SReid Kleckner   }
183146eb7a6SReid Kleckner 
184146eb7a6SReid Kleckner   void unget() { Stack.push_back(Tok); }
185146eb7a6SReid Kleckner 
186146eb7a6SReid Kleckner   Error parseOne() {
187146eb7a6SReid Kleckner     read();
188146eb7a6SReid Kleckner     switch (Tok.K) {
189146eb7a6SReid Kleckner     case Eof:
190146eb7a6SReid Kleckner       return Error::success();
191146eb7a6SReid Kleckner     case KwExports:
192146eb7a6SReid Kleckner       for (;;) {
193146eb7a6SReid Kleckner         read();
194146eb7a6SReid Kleckner         if (Tok.K != Identifier) {
195146eb7a6SReid Kleckner           unget();
196146eb7a6SReid Kleckner           return Error::success();
197146eb7a6SReid Kleckner         }
198146eb7a6SReid Kleckner         if (Error Err = parseExport())
199146eb7a6SReid Kleckner           return Err;
200146eb7a6SReid Kleckner       }
201146eb7a6SReid Kleckner     case KwHeapsize:
202146eb7a6SReid Kleckner       return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
203146eb7a6SReid Kleckner     case KwStacksize:
204146eb7a6SReid Kleckner       return parseNumbers(&Info.StackReserve, &Info.StackCommit);
205146eb7a6SReid Kleckner     case KwLibrary:
206146eb7a6SReid Kleckner     case KwName: {
207146eb7a6SReid Kleckner       bool IsDll = Tok.K == KwLibrary; // Check before parseName.
208146eb7a6SReid Kleckner       std::string Name;
209146eb7a6SReid Kleckner       if (Error Err = parseName(&Name, &Info.ImageBase))
210146eb7a6SReid Kleckner         return Err;
21108e5f685SSaleem Abdulrasool 
21208e5f685SSaleem Abdulrasool       Info.ImportName = Name;
213146eb7a6SReid Kleckner 
214146eb7a6SReid Kleckner       // Set the output file, but don't override /out if it was already passed.
21508e5f685SSaleem Abdulrasool       if (Info.OutputFile.empty()) {
216146eb7a6SReid Kleckner         Info.OutputFile = Name;
21708e5f685SSaleem Abdulrasool         // Append the appropriate file extension if not already present.
21808e5f685SSaleem Abdulrasool         if (!sys::path::has_extension(Name))
21908e5f685SSaleem Abdulrasool           Info.OutputFile += IsDll ? ".dll" : ".exe";
22008e5f685SSaleem Abdulrasool       }
22108e5f685SSaleem Abdulrasool 
222146eb7a6SReid Kleckner       return Error::success();
223146eb7a6SReid Kleckner     }
224146eb7a6SReid Kleckner     case KwVersion:
225146eb7a6SReid Kleckner       return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
226146eb7a6SReid Kleckner     default:
227146eb7a6SReid Kleckner       return createError("unknown directive: " + Tok.Value);
228146eb7a6SReid Kleckner     }
229146eb7a6SReid Kleckner   }
230146eb7a6SReid Kleckner 
231146eb7a6SReid Kleckner   Error parseExport() {
232146eb7a6SReid Kleckner     COFFShortExport E;
233146eb7a6SReid Kleckner     E.Name = Tok.Value;
234146eb7a6SReid Kleckner     read();
235146eb7a6SReid Kleckner     if (Tok.K == Equal) {
236146eb7a6SReid Kleckner       read();
237146eb7a6SReid Kleckner       if (Tok.K != Identifier)
238146eb7a6SReid Kleckner         return createError("identifier expected, but got " + Tok.Value);
239146eb7a6SReid Kleckner       E.ExtName = E.Name;
240146eb7a6SReid Kleckner       E.Name = Tok.Value;
241146eb7a6SReid Kleckner     } else {
242146eb7a6SReid Kleckner       unget();
243146eb7a6SReid Kleckner     }
244146eb7a6SReid Kleckner 
245146eb7a6SReid Kleckner     if (Machine == IMAGE_FILE_MACHINE_I386) {
2461079ef8dSMartell Malone       if (!isDecorated(E.Name, MingwDef))
247146eb7a6SReid Kleckner         E.Name = (std::string("_").append(E.Name));
2481079ef8dSMartell Malone       if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
249146eb7a6SReid Kleckner         E.ExtName = (std::string("_").append(E.ExtName));
250146eb7a6SReid Kleckner     }
251146eb7a6SReid Kleckner 
252146eb7a6SReid Kleckner     for (;;) {
253146eb7a6SReid Kleckner       read();
254146eb7a6SReid Kleckner       if (Tok.K == Identifier && Tok.Value[0] == '@') {
2556c1fd299SMartin Storsjo         if (Tok.Value == "@") {
2566c1fd299SMartin Storsjo           // "foo @ 10"
2576c1fd299SMartin Storsjo           read();
2586c1fd299SMartin Storsjo           Tok.Value.getAsInteger(10, E.Ordinal);
2596c1fd299SMartin Storsjo         } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
2606c1fd299SMartin Storsjo           // "foo \n @bar" - Not an ordinal modifier at all, but the next
2616c1fd299SMartin Storsjo           // export (fastcall decorated) - complete the current one.
26258c9527eSMartin Storsjo           unget();
26358c9527eSMartin Storsjo           Info.Exports.push_back(E);
26458c9527eSMartin Storsjo           return Error::success();
26558c9527eSMartin Storsjo         }
2666c1fd299SMartin Storsjo         // "foo @10"
267146eb7a6SReid Kleckner         read();
268146eb7a6SReid Kleckner         if (Tok.K == KwNoname) {
269146eb7a6SReid Kleckner           E.Noname = true;
270146eb7a6SReid Kleckner         } else {
271146eb7a6SReid Kleckner           unget();
272146eb7a6SReid Kleckner         }
273146eb7a6SReid Kleckner         continue;
274146eb7a6SReid Kleckner       }
275146eb7a6SReid Kleckner       if (Tok.K == KwData) {
276146eb7a6SReid Kleckner         E.Data = true;
277146eb7a6SReid Kleckner         continue;
278146eb7a6SReid Kleckner       }
279146eb7a6SReid Kleckner       if (Tok.K == KwConstant) {
280146eb7a6SReid Kleckner         E.Constant = true;
281146eb7a6SReid Kleckner         continue;
282146eb7a6SReid Kleckner       }
283146eb7a6SReid Kleckner       if (Tok.K == KwPrivate) {
284146eb7a6SReid Kleckner         E.Private = true;
285146eb7a6SReid Kleckner         continue;
286146eb7a6SReid Kleckner       }
287*284ab80fSMartin Storsjo       if (Tok.K == EqualEqual) {
288*284ab80fSMartin Storsjo         read();
289*284ab80fSMartin Storsjo         E.AliasTarget = Tok.Value;
290*284ab80fSMartin Storsjo         if (Machine == IMAGE_FILE_MACHINE_I386 && !isDecorated(E.AliasTarget, MingwDef))
291*284ab80fSMartin Storsjo           E.AliasTarget = std::string("_").append(E.AliasTarget);
292*284ab80fSMartin Storsjo         continue;
293*284ab80fSMartin Storsjo       }
294146eb7a6SReid Kleckner       unget();
295146eb7a6SReid Kleckner       Info.Exports.push_back(E);
296146eb7a6SReid Kleckner       return Error::success();
297146eb7a6SReid Kleckner     }
298146eb7a6SReid Kleckner   }
299146eb7a6SReid Kleckner 
300146eb7a6SReid Kleckner   // HEAPSIZE/STACKSIZE reserve[,commit]
301146eb7a6SReid Kleckner   Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
302146eb7a6SReid Kleckner     if (Error Err = readAsInt(Reserve))
303146eb7a6SReid Kleckner       return Err;
304146eb7a6SReid Kleckner     read();
305146eb7a6SReid Kleckner     if (Tok.K != Comma) {
306146eb7a6SReid Kleckner       unget();
307146eb7a6SReid Kleckner       Commit = nullptr;
308146eb7a6SReid Kleckner       return Error::success();
309146eb7a6SReid Kleckner     }
310146eb7a6SReid Kleckner     if (Error Err = readAsInt(Commit))
311146eb7a6SReid Kleckner       return Err;
312146eb7a6SReid Kleckner     return Error::success();
313146eb7a6SReid Kleckner   }
314146eb7a6SReid Kleckner 
315146eb7a6SReid Kleckner   // NAME outputPath [BASE=address]
316146eb7a6SReid Kleckner   Error parseName(std::string *Out, uint64_t *Baseaddr) {
317146eb7a6SReid Kleckner     read();
318146eb7a6SReid Kleckner     if (Tok.K == Identifier) {
319146eb7a6SReid Kleckner       *Out = Tok.Value;
320146eb7a6SReid Kleckner     } else {
321146eb7a6SReid Kleckner       *Out = "";
322146eb7a6SReid Kleckner       unget();
323146eb7a6SReid Kleckner       return Error::success();
324146eb7a6SReid Kleckner     }
325146eb7a6SReid Kleckner     read();
326146eb7a6SReid Kleckner     if (Tok.K == KwBase) {
327146eb7a6SReid Kleckner       if (Error Err = expect(Equal, "'=' expected"))
328146eb7a6SReid Kleckner         return Err;
329146eb7a6SReid Kleckner       if (Error Err = readAsInt(Baseaddr))
330146eb7a6SReid Kleckner         return Err;
331146eb7a6SReid Kleckner     } else {
332146eb7a6SReid Kleckner       unget();
333146eb7a6SReid Kleckner       *Baseaddr = 0;
334146eb7a6SReid Kleckner     }
335146eb7a6SReid Kleckner     return Error::success();
336146eb7a6SReid Kleckner   }
337146eb7a6SReid Kleckner 
338146eb7a6SReid Kleckner   // VERSION major[.minor]
339146eb7a6SReid Kleckner   Error parseVersion(uint32_t *Major, uint32_t *Minor) {
340146eb7a6SReid Kleckner     read();
341146eb7a6SReid Kleckner     if (Tok.K != Identifier)
342146eb7a6SReid Kleckner       return createError("identifier expected, but got " + Tok.Value);
343146eb7a6SReid Kleckner     StringRef V1, V2;
344146eb7a6SReid Kleckner     std::tie(V1, V2) = Tok.Value.split('.');
345146eb7a6SReid Kleckner     if (V1.getAsInteger(10, *Major))
346146eb7a6SReid Kleckner       return createError("integer expected, but got " + Tok.Value);
347146eb7a6SReid Kleckner     if (V2.empty())
348146eb7a6SReid Kleckner       *Minor = 0;
349146eb7a6SReid Kleckner     else if (V2.getAsInteger(10, *Minor))
350146eb7a6SReid Kleckner       return createError("integer expected, but got " + Tok.Value);
351146eb7a6SReid Kleckner     return Error::success();
352146eb7a6SReid Kleckner   }
353146eb7a6SReid Kleckner 
354146eb7a6SReid Kleckner   Lexer Lex;
355146eb7a6SReid Kleckner   Token Tok;
356146eb7a6SReid Kleckner   std::vector<Token> Stack;
357146eb7a6SReid Kleckner   MachineTypes Machine;
358146eb7a6SReid Kleckner   COFFModuleDefinition Info;
3591079ef8dSMartell Malone   bool MingwDef;
360146eb7a6SReid Kleckner };
361146eb7a6SReid Kleckner 
362146eb7a6SReid Kleckner Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
3631079ef8dSMartell Malone                                                          MachineTypes Machine,
3641079ef8dSMartell Malone                                                          bool MingwDef) {
3651079ef8dSMartell Malone   return Parser(MB.getBuffer(), Machine, MingwDef).parse();
366146eb7a6SReid Kleckner }
367146eb7a6SReid Kleckner 
368146eb7a6SReid Kleckner } // namespace object
369146eb7a6SReid Kleckner } // namespace llvm
370