1146eb7a6SReid Kleckner //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2146eb7a6SReid Kleckner //
3146eb7a6SReid Kleckner //                     The LLVM Compiler Infrastructure
4146eb7a6SReid Kleckner //
5146eb7a6SReid Kleckner // This file is distributed under the University of Illinois Open Source
6146eb7a6SReid Kleckner // License. See LICENSE.TXT for details.
7146eb7a6SReid Kleckner //
8146eb7a6SReid Kleckner //===----------------------------------------------------------------------===//
9146eb7a6SReid Kleckner //
10146eb7a6SReid Kleckner // Windows-specific.
11146eb7a6SReid Kleckner // A parser for the module-definition file (.def file).
12146eb7a6SReid Kleckner //
13146eb7a6SReid Kleckner // The format of module-definition files are described in this document:
14146eb7a6SReid Kleckner // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
15146eb7a6SReid Kleckner //
16146eb7a6SReid Kleckner //===----------------------------------------------------------------------===//
17146eb7a6SReid Kleckner 
18146eb7a6SReid Kleckner #include "llvm/Object/COFFModuleDefinition.h"
19146eb7a6SReid Kleckner #include "llvm/ADT/StringRef.h"
20146eb7a6SReid Kleckner #include "llvm/ADT/StringSwitch.h"
21146eb7a6SReid Kleckner #include "llvm/Object/COFF.h"
22146eb7a6SReid Kleckner #include "llvm/Object/COFFImportFile.h"
23146eb7a6SReid Kleckner #include "llvm/Object/Error.h"
24146eb7a6SReid Kleckner #include "llvm/Support/Error.h"
25e234901aSSaleem Abdulrasool #include "llvm/Support/Path.h"
26146eb7a6SReid Kleckner #include "llvm/Support/raw_ostream.h"
27146eb7a6SReid Kleckner 
28146eb7a6SReid Kleckner using namespace llvm::COFF;
29146eb7a6SReid Kleckner using namespace llvm;
30146eb7a6SReid Kleckner 
31146eb7a6SReid Kleckner namespace llvm {
32146eb7a6SReid Kleckner namespace object {
33146eb7a6SReid Kleckner 
34146eb7a6SReid Kleckner enum Kind {
35146eb7a6SReid Kleckner   Unknown,
36146eb7a6SReid Kleckner   Eof,
37146eb7a6SReid Kleckner   Identifier,
38146eb7a6SReid Kleckner   Comma,
39146eb7a6SReid Kleckner   Equal,
40146eb7a6SReid Kleckner   KwBase,
41146eb7a6SReid Kleckner   KwConstant,
42146eb7a6SReid Kleckner   KwData,
43146eb7a6SReid Kleckner   KwExports,
44146eb7a6SReid Kleckner   KwHeapsize,
45146eb7a6SReid Kleckner   KwLibrary,
46146eb7a6SReid Kleckner   KwName,
47146eb7a6SReid Kleckner   KwNoname,
48146eb7a6SReid Kleckner   KwPrivate,
49146eb7a6SReid Kleckner   KwStacksize,
50146eb7a6SReid Kleckner   KwVersion,
51146eb7a6SReid Kleckner };
52146eb7a6SReid Kleckner 
53146eb7a6SReid Kleckner struct Token {
54146eb7a6SReid Kleckner   explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
55146eb7a6SReid Kleckner   Kind K;
56146eb7a6SReid Kleckner   StringRef Value;
57146eb7a6SReid Kleckner };
58146eb7a6SReid Kleckner 
591079ef8dSMartell Malone static bool isDecorated(StringRef Sym, bool MingwDef) {
60*843cbbddSMartin Storsjo   // In def files, the symbols can either be listed decorated or undecorated.
61*843cbbddSMartin Storsjo   //
62*843cbbddSMartin Storsjo   // - For cdecl symbols, only the undecorated form is allowed.
63*843cbbddSMartin Storsjo   // - For fastcall and vectorcall symbols, both fully decorated or
64*843cbbddSMartin Storsjo   //   undecorated forms can be present.
65*843cbbddSMartin Storsjo   // - For stdcall symbols in non-MinGW environments, the decorated form is
66*843cbbddSMartin Storsjo   //   fully decorated with leading underscore and trailing stack argument
67*843cbbddSMartin Storsjo   //   size - like "_Func@0".
68*843cbbddSMartin Storsjo   // - In MinGW def files, a decorated stdcall symbol does not include the
69*843cbbddSMartin Storsjo   //   leading underscore though, like "Func@0".
70*843cbbddSMartin Storsjo 
71*843cbbddSMartin Storsjo   // This function controls whether a leading underscore should be added to
72*843cbbddSMartin Storsjo   // the given symbol name or not. For MinGW, treat a stdcall symbol name such
73*843cbbddSMartin Storsjo   // as "Func@0" as undecorated, i.e. a leading underscore must be added.
74*843cbbddSMartin Storsjo   // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
75*843cbbddSMartin Storsjo   // as decorated, i.e. don't add any more leading underscores.
76*843cbbddSMartin Storsjo   // We can't check for a leading underscore here, since function names
77*843cbbddSMartin Storsjo   // themselves can start with an underscore, while a second one still needs
78*843cbbddSMartin Storsjo   // to be added.
79*843cbbddSMartin Storsjo   return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") ||
80*843cbbddSMartin Storsjo          (!MingwDef && Sym.contains('@'));
81146eb7a6SReid Kleckner }
82146eb7a6SReid Kleckner 
83146eb7a6SReid Kleckner static Error createError(const Twine &Err) {
84146eb7a6SReid Kleckner   return make_error<StringError>(StringRef(Err.str()),
85146eb7a6SReid Kleckner                                  object_error::parse_failed);
86146eb7a6SReid Kleckner }
87146eb7a6SReid Kleckner 
88146eb7a6SReid Kleckner class Lexer {
89146eb7a6SReid Kleckner public:
90146eb7a6SReid Kleckner   Lexer(StringRef S) : Buf(S) {}
91146eb7a6SReid Kleckner 
92146eb7a6SReid Kleckner   Token lex() {
93146eb7a6SReid Kleckner     Buf = Buf.trim();
94146eb7a6SReid Kleckner     if (Buf.empty())
95146eb7a6SReid Kleckner       return Token(Eof);
96146eb7a6SReid Kleckner 
97146eb7a6SReid Kleckner     switch (Buf[0]) {
98146eb7a6SReid Kleckner     case '\0':
99146eb7a6SReid Kleckner       return Token(Eof);
100146eb7a6SReid Kleckner     case ';': {
101146eb7a6SReid Kleckner       size_t End = Buf.find('\n');
102146eb7a6SReid Kleckner       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
103146eb7a6SReid Kleckner       return lex();
104146eb7a6SReid Kleckner     }
105146eb7a6SReid Kleckner     case '=':
106146eb7a6SReid Kleckner       Buf = Buf.drop_front();
1071079ef8dSMartell Malone       // GNU dlltool accepts both = and ==.
1081079ef8dSMartell Malone       if (Buf.startswith("="))
1091079ef8dSMartell Malone         Buf = Buf.drop_front();
110146eb7a6SReid Kleckner       return Token(Equal, "=");
111146eb7a6SReid Kleckner     case ',':
112146eb7a6SReid Kleckner       Buf = Buf.drop_front();
113146eb7a6SReid Kleckner       return Token(Comma, ",");
114146eb7a6SReid Kleckner     case '"': {
115146eb7a6SReid Kleckner       StringRef S;
116146eb7a6SReid Kleckner       std::tie(S, Buf) = Buf.substr(1).split('"');
117146eb7a6SReid Kleckner       return Token(Identifier, S);
118146eb7a6SReid Kleckner     }
119146eb7a6SReid Kleckner     default: {
120146eb7a6SReid Kleckner       size_t End = Buf.find_first_of("=,\r\n \t\v");
121146eb7a6SReid Kleckner       StringRef Word = Buf.substr(0, End);
122146eb7a6SReid Kleckner       Kind K = llvm::StringSwitch<Kind>(Word)
123146eb7a6SReid Kleckner                    .Case("BASE", KwBase)
124146eb7a6SReid Kleckner                    .Case("CONSTANT", KwConstant)
125146eb7a6SReid Kleckner                    .Case("DATA", KwData)
126146eb7a6SReid Kleckner                    .Case("EXPORTS", KwExports)
127146eb7a6SReid Kleckner                    .Case("HEAPSIZE", KwHeapsize)
128146eb7a6SReid Kleckner                    .Case("LIBRARY", KwLibrary)
129146eb7a6SReid Kleckner                    .Case("NAME", KwName)
130146eb7a6SReid Kleckner                    .Case("NONAME", KwNoname)
131146eb7a6SReid Kleckner                    .Case("PRIVATE", KwPrivate)
132146eb7a6SReid Kleckner                    .Case("STACKSIZE", KwStacksize)
133146eb7a6SReid Kleckner                    .Case("VERSION", KwVersion)
134146eb7a6SReid Kleckner                    .Default(Identifier);
135146eb7a6SReid Kleckner       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
136146eb7a6SReid Kleckner       return Token(K, Word);
137146eb7a6SReid Kleckner     }
138146eb7a6SReid Kleckner     }
139146eb7a6SReid Kleckner   }
140146eb7a6SReid Kleckner 
141146eb7a6SReid Kleckner private:
142146eb7a6SReid Kleckner   StringRef Buf;
143146eb7a6SReid Kleckner };
144146eb7a6SReid Kleckner 
145146eb7a6SReid Kleckner class Parser {
146146eb7a6SReid Kleckner public:
1471079ef8dSMartell Malone   explicit Parser(StringRef S, MachineTypes M, bool B)
1481079ef8dSMartell Malone       : Lex(S), Machine(M), MingwDef(B) {}
149146eb7a6SReid Kleckner 
150146eb7a6SReid Kleckner   Expected<COFFModuleDefinition> parse() {
151146eb7a6SReid Kleckner     do {
152146eb7a6SReid Kleckner       if (Error Err = parseOne())
153146eb7a6SReid Kleckner         return std::move(Err);
154146eb7a6SReid Kleckner     } while (Tok.K != Eof);
155146eb7a6SReid Kleckner     return Info;
156146eb7a6SReid Kleckner   }
157146eb7a6SReid Kleckner 
158146eb7a6SReid Kleckner private:
159146eb7a6SReid Kleckner   void read() {
160146eb7a6SReid Kleckner     if (Stack.empty()) {
161146eb7a6SReid Kleckner       Tok = Lex.lex();
162146eb7a6SReid Kleckner       return;
163146eb7a6SReid Kleckner     }
164146eb7a6SReid Kleckner     Tok = Stack.back();
165146eb7a6SReid Kleckner     Stack.pop_back();
166146eb7a6SReid Kleckner   }
167146eb7a6SReid Kleckner 
168146eb7a6SReid Kleckner   Error readAsInt(uint64_t *I) {
169146eb7a6SReid Kleckner     read();
170146eb7a6SReid Kleckner     if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
171146eb7a6SReid Kleckner       return createError("integer expected");
172146eb7a6SReid Kleckner     return Error::success();
173146eb7a6SReid Kleckner   }
174146eb7a6SReid Kleckner 
175146eb7a6SReid Kleckner   Error expect(Kind Expected, StringRef Msg) {
176146eb7a6SReid Kleckner     read();
177146eb7a6SReid Kleckner     if (Tok.K != Expected)
178146eb7a6SReid Kleckner       return createError(Msg);
179146eb7a6SReid Kleckner     return Error::success();
180146eb7a6SReid Kleckner   }
181146eb7a6SReid Kleckner 
182146eb7a6SReid Kleckner   void unget() { Stack.push_back(Tok); }
183146eb7a6SReid Kleckner 
184146eb7a6SReid Kleckner   Error parseOne() {
185146eb7a6SReid Kleckner     read();
186146eb7a6SReid Kleckner     switch (Tok.K) {
187146eb7a6SReid Kleckner     case Eof:
188146eb7a6SReid Kleckner       return Error::success();
189146eb7a6SReid Kleckner     case KwExports:
190146eb7a6SReid Kleckner       for (;;) {
191146eb7a6SReid Kleckner         read();
192146eb7a6SReid Kleckner         if (Tok.K != Identifier) {
193146eb7a6SReid Kleckner           unget();
194146eb7a6SReid Kleckner           return Error::success();
195146eb7a6SReid Kleckner         }
196146eb7a6SReid Kleckner         if (Error Err = parseExport())
197146eb7a6SReid Kleckner           return Err;
198146eb7a6SReid Kleckner       }
199146eb7a6SReid Kleckner     case KwHeapsize:
200146eb7a6SReid Kleckner       return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
201146eb7a6SReid Kleckner     case KwStacksize:
202146eb7a6SReid Kleckner       return parseNumbers(&Info.StackReserve, &Info.StackCommit);
203146eb7a6SReid Kleckner     case KwLibrary:
204146eb7a6SReid Kleckner     case KwName: {
205146eb7a6SReid Kleckner       bool IsDll = Tok.K == KwLibrary; // Check before parseName.
206146eb7a6SReid Kleckner       std::string Name;
207146eb7a6SReid Kleckner       if (Error Err = parseName(&Name, &Info.ImageBase))
208146eb7a6SReid Kleckner         return Err;
20908e5f685SSaleem Abdulrasool 
21008e5f685SSaleem Abdulrasool       Info.ImportName = Name;
211146eb7a6SReid Kleckner 
212146eb7a6SReid Kleckner       // Set the output file, but don't override /out if it was already passed.
21308e5f685SSaleem Abdulrasool       if (Info.OutputFile.empty()) {
214146eb7a6SReid Kleckner         Info.OutputFile = Name;
21508e5f685SSaleem Abdulrasool         // Append the appropriate file extension if not already present.
21608e5f685SSaleem Abdulrasool         if (!sys::path::has_extension(Name))
21708e5f685SSaleem Abdulrasool           Info.OutputFile += IsDll ? ".dll" : ".exe";
21808e5f685SSaleem Abdulrasool       }
21908e5f685SSaleem Abdulrasool 
220146eb7a6SReid Kleckner       return Error::success();
221146eb7a6SReid Kleckner     }
222146eb7a6SReid Kleckner     case KwVersion:
223146eb7a6SReid Kleckner       return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
224146eb7a6SReid Kleckner     default:
225146eb7a6SReid Kleckner       return createError("unknown directive: " + Tok.Value);
226146eb7a6SReid Kleckner     }
227146eb7a6SReid Kleckner   }
228146eb7a6SReid Kleckner 
229146eb7a6SReid Kleckner   Error parseExport() {
230146eb7a6SReid Kleckner     COFFShortExport E;
231146eb7a6SReid Kleckner     E.Name = Tok.Value;
232146eb7a6SReid Kleckner     read();
233146eb7a6SReid Kleckner     if (Tok.K == Equal) {
234146eb7a6SReid Kleckner       read();
235146eb7a6SReid Kleckner       if (Tok.K != Identifier)
236146eb7a6SReid Kleckner         return createError("identifier expected, but got " + Tok.Value);
237146eb7a6SReid Kleckner       E.ExtName = E.Name;
238146eb7a6SReid Kleckner       E.Name = Tok.Value;
239146eb7a6SReid Kleckner     } else {
240146eb7a6SReid Kleckner       unget();
241146eb7a6SReid Kleckner     }
242146eb7a6SReid Kleckner 
243146eb7a6SReid Kleckner     if (Machine == IMAGE_FILE_MACHINE_I386) {
2441079ef8dSMartell Malone       if (!isDecorated(E.Name, MingwDef))
245146eb7a6SReid Kleckner         E.Name = (std::string("_").append(E.Name));
2461079ef8dSMartell Malone       if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
247146eb7a6SReid Kleckner         E.ExtName = (std::string("_").append(E.ExtName));
248146eb7a6SReid Kleckner     }
249146eb7a6SReid Kleckner 
250146eb7a6SReid Kleckner     for (;;) {
251146eb7a6SReid Kleckner       read();
252146eb7a6SReid Kleckner       if (Tok.K == Identifier && Tok.Value[0] == '@') {
25358c9527eSMartin Storsjo         if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
25458c9527eSMartin Storsjo           // Not an ordinal modifier at all, but the next export (fastcall
25558c9527eSMartin Storsjo           // decorated) - complete the current one.
25658c9527eSMartin Storsjo           unget();
25758c9527eSMartin Storsjo           Info.Exports.push_back(E);
25858c9527eSMartin Storsjo           return Error::success();
25958c9527eSMartin Storsjo         }
260146eb7a6SReid Kleckner         read();
261146eb7a6SReid Kleckner         if (Tok.K == KwNoname) {
262146eb7a6SReid Kleckner           E.Noname = true;
263146eb7a6SReid Kleckner         } else {
264146eb7a6SReid Kleckner           unget();
265146eb7a6SReid Kleckner         }
266146eb7a6SReid Kleckner         continue;
267146eb7a6SReid Kleckner       }
268146eb7a6SReid Kleckner       if (Tok.K == KwData) {
269146eb7a6SReid Kleckner         E.Data = true;
270146eb7a6SReid Kleckner         continue;
271146eb7a6SReid Kleckner       }
272146eb7a6SReid Kleckner       if (Tok.K == KwConstant) {
273146eb7a6SReid Kleckner         E.Constant = true;
274146eb7a6SReid Kleckner         continue;
275146eb7a6SReid Kleckner       }
276146eb7a6SReid Kleckner       if (Tok.K == KwPrivate) {
277146eb7a6SReid Kleckner         E.Private = true;
278146eb7a6SReid Kleckner         continue;
279146eb7a6SReid Kleckner       }
280146eb7a6SReid Kleckner       unget();
281146eb7a6SReid Kleckner       Info.Exports.push_back(E);
282146eb7a6SReid Kleckner       return Error::success();
283146eb7a6SReid Kleckner     }
284146eb7a6SReid Kleckner   }
285146eb7a6SReid Kleckner 
286146eb7a6SReid Kleckner   // HEAPSIZE/STACKSIZE reserve[,commit]
287146eb7a6SReid Kleckner   Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
288146eb7a6SReid Kleckner     if (Error Err = readAsInt(Reserve))
289146eb7a6SReid Kleckner       return Err;
290146eb7a6SReid Kleckner     read();
291146eb7a6SReid Kleckner     if (Tok.K != Comma) {
292146eb7a6SReid Kleckner       unget();
293146eb7a6SReid Kleckner       Commit = nullptr;
294146eb7a6SReid Kleckner       return Error::success();
295146eb7a6SReid Kleckner     }
296146eb7a6SReid Kleckner     if (Error Err = readAsInt(Commit))
297146eb7a6SReid Kleckner       return Err;
298146eb7a6SReid Kleckner     return Error::success();
299146eb7a6SReid Kleckner   }
300146eb7a6SReid Kleckner 
301146eb7a6SReid Kleckner   // NAME outputPath [BASE=address]
302146eb7a6SReid Kleckner   Error parseName(std::string *Out, uint64_t *Baseaddr) {
303146eb7a6SReid Kleckner     read();
304146eb7a6SReid Kleckner     if (Tok.K == Identifier) {
305146eb7a6SReid Kleckner       *Out = Tok.Value;
306146eb7a6SReid Kleckner     } else {
307146eb7a6SReid Kleckner       *Out = "";
308146eb7a6SReid Kleckner       unget();
309146eb7a6SReid Kleckner       return Error::success();
310146eb7a6SReid Kleckner     }
311146eb7a6SReid Kleckner     read();
312146eb7a6SReid Kleckner     if (Tok.K == KwBase) {
313146eb7a6SReid Kleckner       if (Error Err = expect(Equal, "'=' expected"))
314146eb7a6SReid Kleckner         return Err;
315146eb7a6SReid Kleckner       if (Error Err = readAsInt(Baseaddr))
316146eb7a6SReid Kleckner         return Err;
317146eb7a6SReid Kleckner     } else {
318146eb7a6SReid Kleckner       unget();
319146eb7a6SReid Kleckner       *Baseaddr = 0;
320146eb7a6SReid Kleckner     }
321146eb7a6SReid Kleckner     return Error::success();
322146eb7a6SReid Kleckner   }
323146eb7a6SReid Kleckner 
324146eb7a6SReid Kleckner   // VERSION major[.minor]
325146eb7a6SReid Kleckner   Error parseVersion(uint32_t *Major, uint32_t *Minor) {
326146eb7a6SReid Kleckner     read();
327146eb7a6SReid Kleckner     if (Tok.K != Identifier)
328146eb7a6SReid Kleckner       return createError("identifier expected, but got " + Tok.Value);
329146eb7a6SReid Kleckner     StringRef V1, V2;
330146eb7a6SReid Kleckner     std::tie(V1, V2) = Tok.Value.split('.');
331146eb7a6SReid Kleckner     if (V1.getAsInteger(10, *Major))
332146eb7a6SReid Kleckner       return createError("integer expected, but got " + Tok.Value);
333146eb7a6SReid Kleckner     if (V2.empty())
334146eb7a6SReid Kleckner       *Minor = 0;
335146eb7a6SReid Kleckner     else if (V2.getAsInteger(10, *Minor))
336146eb7a6SReid Kleckner       return createError("integer expected, but got " + Tok.Value);
337146eb7a6SReid Kleckner     return Error::success();
338146eb7a6SReid Kleckner   }
339146eb7a6SReid Kleckner 
340146eb7a6SReid Kleckner   Lexer Lex;
341146eb7a6SReid Kleckner   Token Tok;
342146eb7a6SReid Kleckner   std::vector<Token> Stack;
343146eb7a6SReid Kleckner   MachineTypes Machine;
344146eb7a6SReid Kleckner   COFFModuleDefinition Info;
3451079ef8dSMartell Malone   bool MingwDef;
346146eb7a6SReid Kleckner };
347146eb7a6SReid Kleckner 
348146eb7a6SReid Kleckner Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
3491079ef8dSMartell Malone                                                          MachineTypes Machine,
3501079ef8dSMartell Malone                                                          bool MingwDef) {
3511079ef8dSMartell Malone   return Parser(MB.getBuffer(), Machine, MingwDef).parse();
352146eb7a6SReid Kleckner }
353146eb7a6SReid Kleckner 
354146eb7a6SReid Kleckner } // namespace object
355146eb7a6SReid Kleckner } // namespace llvm
356