1146eb7a6SReid Kleckner //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2146eb7a6SReid Kleckner //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6146eb7a6SReid Kleckner //
7146eb7a6SReid Kleckner //===----------------------------------------------------------------------===//
8146eb7a6SReid Kleckner //
9146eb7a6SReid Kleckner // Windows-specific.
10146eb7a6SReid Kleckner // A parser for the module-definition file (.def file).
11146eb7a6SReid Kleckner //
12146eb7a6SReid Kleckner // The format of module-definition files are described in this document:
13146eb7a6SReid Kleckner // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
14146eb7a6SReid Kleckner //
15146eb7a6SReid Kleckner //===----------------------------------------------------------------------===//
16146eb7a6SReid Kleckner 
17146eb7a6SReid Kleckner #include "llvm/Object/COFFModuleDefinition.h"
18146eb7a6SReid Kleckner #include "llvm/ADT/StringRef.h"
19146eb7a6SReid Kleckner #include "llvm/ADT/StringSwitch.h"
20146eb7a6SReid Kleckner #include "llvm/Object/COFFImportFile.h"
21146eb7a6SReid Kleckner #include "llvm/Object/Error.h"
22146eb7a6SReid Kleckner #include "llvm/Support/Error.h"
23e234901aSSaleem Abdulrasool #include "llvm/Support/Path.h"
24146eb7a6SReid Kleckner 
25146eb7a6SReid Kleckner using namespace llvm::COFF;
26146eb7a6SReid Kleckner using namespace llvm;
27146eb7a6SReid Kleckner 
28146eb7a6SReid Kleckner namespace llvm {
29146eb7a6SReid Kleckner namespace object {
30146eb7a6SReid Kleckner 
31146eb7a6SReid Kleckner enum Kind {
32146eb7a6SReid Kleckner   Unknown,
33146eb7a6SReid Kleckner   Eof,
34146eb7a6SReid Kleckner   Identifier,
35146eb7a6SReid Kleckner   Comma,
36146eb7a6SReid Kleckner   Equal,
37284ab80fSMartin Storsjo   EqualEqual,
38146eb7a6SReid Kleckner   KwBase,
39146eb7a6SReid Kleckner   KwConstant,
40146eb7a6SReid Kleckner   KwData,
41146eb7a6SReid Kleckner   KwExports,
42146eb7a6SReid Kleckner   KwHeapsize,
43146eb7a6SReid Kleckner   KwLibrary,
44146eb7a6SReid Kleckner   KwName,
45146eb7a6SReid Kleckner   KwNoname,
46146eb7a6SReid Kleckner   KwPrivate,
47146eb7a6SReid Kleckner   KwStacksize,
48146eb7a6SReid Kleckner   KwVersion,
49146eb7a6SReid Kleckner };
50146eb7a6SReid Kleckner 
51146eb7a6SReid Kleckner struct Token {
Tokenllvm::object::Token52146eb7a6SReid Kleckner   explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
53146eb7a6SReid Kleckner   Kind K;
54146eb7a6SReid Kleckner   StringRef Value;
55146eb7a6SReid Kleckner };
56146eb7a6SReid Kleckner 
isDecorated(StringRef Sym,bool MingwDef)571079ef8dSMartell Malone static bool isDecorated(StringRef Sym, bool MingwDef) {
58843cbbddSMartin Storsjo   // In def files, the symbols can either be listed decorated or undecorated.
59843cbbddSMartin Storsjo   //
60843cbbddSMartin Storsjo   // - For cdecl symbols, only the undecorated form is allowed.
61843cbbddSMartin Storsjo   // - For fastcall and vectorcall symbols, both fully decorated or
62843cbbddSMartin Storsjo   //   undecorated forms can be present.
63843cbbddSMartin Storsjo   // - For stdcall symbols in non-MinGW environments, the decorated form is
64843cbbddSMartin Storsjo   //   fully decorated with leading underscore and trailing stack argument
65843cbbddSMartin Storsjo   //   size - like "_Func@0".
66843cbbddSMartin Storsjo   // - In MinGW def files, a decorated stdcall symbol does not include the
67843cbbddSMartin Storsjo   //   leading underscore though, like "Func@0".
68843cbbddSMartin Storsjo 
69843cbbddSMartin Storsjo   // This function controls whether a leading underscore should be added to
70843cbbddSMartin Storsjo   // the given symbol name or not. For MinGW, treat a stdcall symbol name such
71843cbbddSMartin Storsjo   // as "Func@0" as undecorated, i.e. a leading underscore must be added.
72843cbbddSMartin Storsjo   // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
73843cbbddSMartin Storsjo   // as decorated, i.e. don't add any more leading underscores.
74843cbbddSMartin Storsjo   // We can't check for a leading underscore here, since function names
75843cbbddSMartin Storsjo   // themselves can start with an underscore, while a second one still needs
76843cbbddSMartin Storsjo   // to be added.
77843cbbddSMartin Storsjo   return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") ||
78843cbbddSMartin Storsjo          (!MingwDef && Sym.contains('@'));
79146eb7a6SReid Kleckner }
80146eb7a6SReid Kleckner 
81146eb7a6SReid Kleckner class Lexer {
82146eb7a6SReid Kleckner public:
Lexer(StringRef S)83146eb7a6SReid Kleckner   Lexer(StringRef S) : Buf(S) {}
84146eb7a6SReid Kleckner 
lex()85146eb7a6SReid Kleckner   Token lex() {
86146eb7a6SReid Kleckner     Buf = Buf.trim();
87146eb7a6SReid Kleckner     if (Buf.empty())
88146eb7a6SReid Kleckner       return Token(Eof);
89146eb7a6SReid Kleckner 
90146eb7a6SReid Kleckner     switch (Buf[0]) {
91146eb7a6SReid Kleckner     case '\0':
92146eb7a6SReid Kleckner       return Token(Eof);
93146eb7a6SReid Kleckner     case ';': {
94146eb7a6SReid Kleckner       size_t End = Buf.find('\n');
95146eb7a6SReid Kleckner       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
96146eb7a6SReid Kleckner       return lex();
97146eb7a6SReid Kleckner     }
98146eb7a6SReid Kleckner     case '=':
99146eb7a6SReid Kleckner       Buf = Buf.drop_front();
100284ab80fSMartin Storsjo       if (Buf.startswith("=")) {
1011079ef8dSMartell Malone         Buf = Buf.drop_front();
102284ab80fSMartin Storsjo         return Token(EqualEqual, "==");
103284ab80fSMartin Storsjo       }
104146eb7a6SReid Kleckner       return Token(Equal, "=");
105146eb7a6SReid Kleckner     case ',':
106146eb7a6SReid Kleckner       Buf = Buf.drop_front();
107146eb7a6SReid Kleckner       return Token(Comma, ",");
108146eb7a6SReid Kleckner     case '"': {
109146eb7a6SReid Kleckner       StringRef S;
110146eb7a6SReid Kleckner       std::tie(S, Buf) = Buf.substr(1).split('"');
111146eb7a6SReid Kleckner       return Token(Identifier, S);
112146eb7a6SReid Kleckner     }
113146eb7a6SReid Kleckner     default: {
114efb5024eSRui Ueyama       size_t End = Buf.find_first_of("=,;\r\n \t\v");
115146eb7a6SReid Kleckner       StringRef Word = Buf.substr(0, End);
116146eb7a6SReid Kleckner       Kind K = llvm::StringSwitch<Kind>(Word)
117146eb7a6SReid Kleckner                    .Case("BASE", KwBase)
118146eb7a6SReid Kleckner                    .Case("CONSTANT", KwConstant)
119146eb7a6SReid Kleckner                    .Case("DATA", KwData)
120146eb7a6SReid Kleckner                    .Case("EXPORTS", KwExports)
121146eb7a6SReid Kleckner                    .Case("HEAPSIZE", KwHeapsize)
122146eb7a6SReid Kleckner                    .Case("LIBRARY", KwLibrary)
123146eb7a6SReid Kleckner                    .Case("NAME", KwName)
124146eb7a6SReid Kleckner                    .Case("NONAME", KwNoname)
125146eb7a6SReid Kleckner                    .Case("PRIVATE", KwPrivate)
126146eb7a6SReid Kleckner                    .Case("STACKSIZE", KwStacksize)
127146eb7a6SReid Kleckner                    .Case("VERSION", KwVersion)
128146eb7a6SReid Kleckner                    .Default(Identifier);
129146eb7a6SReid Kleckner       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
130146eb7a6SReid Kleckner       return Token(K, Word);
131146eb7a6SReid Kleckner     }
132146eb7a6SReid Kleckner     }
133146eb7a6SReid Kleckner   }
134146eb7a6SReid Kleckner 
135146eb7a6SReid Kleckner private:
136146eb7a6SReid Kleckner   StringRef Buf;
137146eb7a6SReid Kleckner };
138146eb7a6SReid Kleckner 
139146eb7a6SReid Kleckner class Parser {
140146eb7a6SReid Kleckner public:
Parser(StringRef S,MachineTypes M,bool B)1411079ef8dSMartell Malone   explicit Parser(StringRef S, MachineTypes M, bool B)
1421079ef8dSMartell Malone       : Lex(S), Machine(M), MingwDef(B) {}
143146eb7a6SReid Kleckner 
parse()144146eb7a6SReid Kleckner   Expected<COFFModuleDefinition> parse() {
145146eb7a6SReid Kleckner     do {
146146eb7a6SReid Kleckner       if (Error Err = parseOne())
147*c55cf4afSBill Wendling         return std::move(Err);
148146eb7a6SReid Kleckner     } while (Tok.K != Eof);
149146eb7a6SReid Kleckner     return Info;
150146eb7a6SReid Kleckner   }
151146eb7a6SReid Kleckner 
152146eb7a6SReid Kleckner private:
read()153146eb7a6SReid Kleckner   void read() {
154146eb7a6SReid Kleckner     if (Stack.empty()) {
155146eb7a6SReid Kleckner       Tok = Lex.lex();
156146eb7a6SReid Kleckner       return;
157146eb7a6SReid Kleckner     }
158146eb7a6SReid Kleckner     Tok = Stack.back();
159146eb7a6SReid Kleckner     Stack.pop_back();
160146eb7a6SReid Kleckner   }
161146eb7a6SReid Kleckner 
readAsInt(uint64_t * I)162146eb7a6SReid Kleckner   Error readAsInt(uint64_t *I) {
163146eb7a6SReid Kleckner     read();
164146eb7a6SReid Kleckner     if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
165146eb7a6SReid Kleckner       return createError("integer expected");
166146eb7a6SReid Kleckner     return Error::success();
167146eb7a6SReid Kleckner   }
168146eb7a6SReid Kleckner 
expect(Kind Expected,StringRef Msg)169146eb7a6SReid Kleckner   Error expect(Kind Expected, StringRef Msg) {
170146eb7a6SReid Kleckner     read();
171146eb7a6SReid Kleckner     if (Tok.K != Expected)
172146eb7a6SReid Kleckner       return createError(Msg);
173146eb7a6SReid Kleckner     return Error::success();
174146eb7a6SReid Kleckner   }
175146eb7a6SReid Kleckner 
unget()176146eb7a6SReid Kleckner   void unget() { Stack.push_back(Tok); }
177146eb7a6SReid Kleckner 
parseOne()178146eb7a6SReid Kleckner   Error parseOne() {
179146eb7a6SReid Kleckner     read();
180146eb7a6SReid Kleckner     switch (Tok.K) {
181146eb7a6SReid Kleckner     case Eof:
182146eb7a6SReid Kleckner       return Error::success();
183146eb7a6SReid Kleckner     case KwExports:
184146eb7a6SReid Kleckner       for (;;) {
185146eb7a6SReid Kleckner         read();
186146eb7a6SReid Kleckner         if (Tok.K != Identifier) {
187146eb7a6SReid Kleckner           unget();
188146eb7a6SReid Kleckner           return Error::success();
189146eb7a6SReid Kleckner         }
190146eb7a6SReid Kleckner         if (Error Err = parseExport())
191146eb7a6SReid Kleckner           return Err;
192146eb7a6SReid Kleckner       }
193146eb7a6SReid Kleckner     case KwHeapsize:
194146eb7a6SReid Kleckner       return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
195146eb7a6SReid Kleckner     case KwStacksize:
196146eb7a6SReid Kleckner       return parseNumbers(&Info.StackReserve, &Info.StackCommit);
197146eb7a6SReid Kleckner     case KwLibrary:
198146eb7a6SReid Kleckner     case KwName: {
199146eb7a6SReid Kleckner       bool IsDll = Tok.K == KwLibrary; // Check before parseName.
200146eb7a6SReid Kleckner       std::string Name;
201146eb7a6SReid Kleckner       if (Error Err = parseName(&Name, &Info.ImageBase))
202146eb7a6SReid Kleckner         return Err;
20308e5f685SSaleem Abdulrasool 
20408e5f685SSaleem Abdulrasool       Info.ImportName = Name;
205146eb7a6SReid Kleckner 
206146eb7a6SReid Kleckner       // Set the output file, but don't override /out if it was already passed.
20708e5f685SSaleem Abdulrasool       if (Info.OutputFile.empty()) {
208146eb7a6SReid Kleckner         Info.OutputFile = Name;
20908e5f685SSaleem Abdulrasool         // Append the appropriate file extension if not already present.
21008e5f685SSaleem Abdulrasool         if (!sys::path::has_extension(Name))
21108e5f685SSaleem Abdulrasool           Info.OutputFile += IsDll ? ".dll" : ".exe";
21208e5f685SSaleem Abdulrasool       }
21308e5f685SSaleem Abdulrasool 
214146eb7a6SReid Kleckner       return Error::success();
215146eb7a6SReid Kleckner     }
216146eb7a6SReid Kleckner     case KwVersion:
217146eb7a6SReid Kleckner       return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
218146eb7a6SReid Kleckner     default:
219146eb7a6SReid Kleckner       return createError("unknown directive: " + Tok.Value);
220146eb7a6SReid Kleckner     }
221146eb7a6SReid Kleckner   }
222146eb7a6SReid Kleckner 
parseExport()223146eb7a6SReid Kleckner   Error parseExport() {
224146eb7a6SReid Kleckner     COFFShortExport E;
225adcd0268SBenjamin Kramer     E.Name = std::string(Tok.Value);
226146eb7a6SReid Kleckner     read();
227146eb7a6SReid Kleckner     if (Tok.K == Equal) {
228146eb7a6SReid Kleckner       read();
229146eb7a6SReid Kleckner       if (Tok.K != Identifier)
230146eb7a6SReid Kleckner         return createError("identifier expected, but got " + Tok.Value);
231146eb7a6SReid Kleckner       E.ExtName = E.Name;
232adcd0268SBenjamin Kramer       E.Name = std::string(Tok.Value);
233146eb7a6SReid Kleckner     } else {
234146eb7a6SReid Kleckner       unget();
235146eb7a6SReid Kleckner     }
236146eb7a6SReid Kleckner 
237146eb7a6SReid Kleckner     if (Machine == IMAGE_FILE_MACHINE_I386) {
2381079ef8dSMartell Malone       if (!isDecorated(E.Name, MingwDef))
239146eb7a6SReid Kleckner         E.Name = (std::string("_").append(E.Name));
2401079ef8dSMartell Malone       if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
241146eb7a6SReid Kleckner         E.ExtName = (std::string("_").append(E.ExtName));
242146eb7a6SReid Kleckner     }
243146eb7a6SReid Kleckner 
244146eb7a6SReid Kleckner     for (;;) {
245146eb7a6SReid Kleckner       read();
246146eb7a6SReid Kleckner       if (Tok.K == Identifier && Tok.Value[0] == '@') {
2476c1fd299SMartin Storsjo         if (Tok.Value == "@") {
2486c1fd299SMartin Storsjo           // "foo @ 10"
2496c1fd299SMartin Storsjo           read();
2506c1fd299SMartin Storsjo           Tok.Value.getAsInteger(10, E.Ordinal);
2516c1fd299SMartin Storsjo         } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
2526c1fd299SMartin Storsjo           // "foo \n @bar" - Not an ordinal modifier at all, but the next
2536c1fd299SMartin Storsjo           // export (fastcall decorated) - complete the current one.
25458c9527eSMartin Storsjo           unget();
25558c9527eSMartin Storsjo           Info.Exports.push_back(E);
25658c9527eSMartin Storsjo           return Error::success();
25758c9527eSMartin Storsjo         }
2586c1fd299SMartin Storsjo         // "foo @10"
259146eb7a6SReid Kleckner         read();
260146eb7a6SReid Kleckner         if (Tok.K == KwNoname) {
261146eb7a6SReid Kleckner           E.Noname = true;
262146eb7a6SReid Kleckner         } else {
263146eb7a6SReid Kleckner           unget();
264146eb7a6SReid Kleckner         }
265146eb7a6SReid Kleckner         continue;
266146eb7a6SReid Kleckner       }
267146eb7a6SReid Kleckner       if (Tok.K == KwData) {
268146eb7a6SReid Kleckner         E.Data = true;
269146eb7a6SReid Kleckner         continue;
270146eb7a6SReid Kleckner       }
271146eb7a6SReid Kleckner       if (Tok.K == KwConstant) {
272146eb7a6SReid Kleckner         E.Constant = true;
273146eb7a6SReid Kleckner         continue;
274146eb7a6SReid Kleckner       }
275146eb7a6SReid Kleckner       if (Tok.K == KwPrivate) {
276146eb7a6SReid Kleckner         E.Private = true;
277146eb7a6SReid Kleckner         continue;
278146eb7a6SReid Kleckner       }
279284ab80fSMartin Storsjo       if (Tok.K == EqualEqual) {
280284ab80fSMartin Storsjo         read();
281adcd0268SBenjamin Kramer         E.AliasTarget = std::string(Tok.Value);
282284ab80fSMartin Storsjo         if (Machine == IMAGE_FILE_MACHINE_I386 && !isDecorated(E.AliasTarget, MingwDef))
283284ab80fSMartin Storsjo           E.AliasTarget = std::string("_").append(E.AliasTarget);
284284ab80fSMartin Storsjo         continue;
285284ab80fSMartin Storsjo       }
286146eb7a6SReid Kleckner       unget();
287146eb7a6SReid Kleckner       Info.Exports.push_back(E);
288146eb7a6SReid Kleckner       return Error::success();
289146eb7a6SReid Kleckner     }
290146eb7a6SReid Kleckner   }
291146eb7a6SReid Kleckner 
292146eb7a6SReid Kleckner   // HEAPSIZE/STACKSIZE reserve[,commit]
parseNumbers(uint64_t * Reserve,uint64_t * Commit)293146eb7a6SReid Kleckner   Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
294146eb7a6SReid Kleckner     if (Error Err = readAsInt(Reserve))
295146eb7a6SReid Kleckner       return Err;
296146eb7a6SReid Kleckner     read();
297146eb7a6SReid Kleckner     if (Tok.K != Comma) {
298146eb7a6SReid Kleckner       unget();
299146eb7a6SReid Kleckner       Commit = nullptr;
300146eb7a6SReid Kleckner       return Error::success();
301146eb7a6SReid Kleckner     }
302146eb7a6SReid Kleckner     if (Error Err = readAsInt(Commit))
303146eb7a6SReid Kleckner       return Err;
304146eb7a6SReid Kleckner     return Error::success();
305146eb7a6SReid Kleckner   }
306146eb7a6SReid Kleckner 
307146eb7a6SReid Kleckner   // NAME outputPath [BASE=address]
parseName(std::string * Out,uint64_t * Baseaddr)308146eb7a6SReid Kleckner   Error parseName(std::string *Out, uint64_t *Baseaddr) {
309146eb7a6SReid Kleckner     read();
310146eb7a6SReid Kleckner     if (Tok.K == Identifier) {
311adcd0268SBenjamin Kramer       *Out = std::string(Tok.Value);
312146eb7a6SReid Kleckner     } else {
313146eb7a6SReid Kleckner       *Out = "";
314146eb7a6SReid Kleckner       unget();
315146eb7a6SReid Kleckner       return Error::success();
316146eb7a6SReid Kleckner     }
317146eb7a6SReid Kleckner     read();
318146eb7a6SReid Kleckner     if (Tok.K == KwBase) {
319146eb7a6SReid Kleckner       if (Error Err = expect(Equal, "'=' expected"))
320146eb7a6SReid Kleckner         return Err;
321146eb7a6SReid Kleckner       if (Error Err = readAsInt(Baseaddr))
322146eb7a6SReid Kleckner         return Err;
323146eb7a6SReid Kleckner     } else {
324146eb7a6SReid Kleckner       unget();
325146eb7a6SReid Kleckner       *Baseaddr = 0;
326146eb7a6SReid Kleckner     }
327146eb7a6SReid Kleckner     return Error::success();
328146eb7a6SReid Kleckner   }
329146eb7a6SReid Kleckner 
330146eb7a6SReid Kleckner   // VERSION major[.minor]
parseVersion(uint32_t * Major,uint32_t * Minor)331146eb7a6SReid Kleckner   Error parseVersion(uint32_t *Major, uint32_t *Minor) {
332146eb7a6SReid Kleckner     read();
333146eb7a6SReid Kleckner     if (Tok.K != Identifier)
334146eb7a6SReid Kleckner       return createError("identifier expected, but got " + Tok.Value);
335146eb7a6SReid Kleckner     StringRef V1, V2;
336146eb7a6SReid Kleckner     std::tie(V1, V2) = Tok.Value.split('.');
337146eb7a6SReid Kleckner     if (V1.getAsInteger(10, *Major))
338146eb7a6SReid Kleckner       return createError("integer expected, but got " + Tok.Value);
339146eb7a6SReid Kleckner     if (V2.empty())
340146eb7a6SReid Kleckner       *Minor = 0;
341146eb7a6SReid Kleckner     else if (V2.getAsInteger(10, *Minor))
342146eb7a6SReid Kleckner       return createError("integer expected, but got " + Tok.Value);
343146eb7a6SReid Kleckner     return Error::success();
344146eb7a6SReid Kleckner   }
345146eb7a6SReid Kleckner 
346146eb7a6SReid Kleckner   Lexer Lex;
347146eb7a6SReid Kleckner   Token Tok;
348146eb7a6SReid Kleckner   std::vector<Token> Stack;
349146eb7a6SReid Kleckner   MachineTypes Machine;
350146eb7a6SReid Kleckner   COFFModuleDefinition Info;
3511079ef8dSMartell Malone   bool MingwDef;
352146eb7a6SReid Kleckner };
353146eb7a6SReid Kleckner 
parseCOFFModuleDefinition(MemoryBufferRef MB,MachineTypes Machine,bool MingwDef)354146eb7a6SReid Kleckner Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
3551079ef8dSMartell Malone                                                          MachineTypes Machine,
3561079ef8dSMartell Malone                                                          bool MingwDef) {
3571079ef8dSMartell Malone   return Parser(MB.getBuffer(), Machine, MingwDef).parse();
358146eb7a6SReid Kleckner }
359146eb7a6SReid Kleckner 
360146eb7a6SReid Kleckner } // namespace object
361146eb7a6SReid Kleckner } // namespace llvm
362