1146eb7a6SReid Kleckner //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2146eb7a6SReid Kleckner //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6146eb7a6SReid Kleckner //
7146eb7a6SReid Kleckner //===----------------------------------------------------------------------===//
8146eb7a6SReid Kleckner //
9146eb7a6SReid Kleckner // Windows-specific.
10146eb7a6SReid Kleckner // A parser for the module-definition file (.def file).
11146eb7a6SReid Kleckner //
12146eb7a6SReid Kleckner // The format of module-definition files are described in this document:
13146eb7a6SReid Kleckner // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
14146eb7a6SReid Kleckner //
15146eb7a6SReid Kleckner //===----------------------------------------------------------------------===//
16146eb7a6SReid Kleckner 
17146eb7a6SReid Kleckner #include "llvm/Object/COFFModuleDefinition.h"
18146eb7a6SReid Kleckner #include "llvm/ADT/StringRef.h"
19146eb7a6SReid Kleckner #include "llvm/ADT/StringSwitch.h"
20146eb7a6SReid Kleckner #include "llvm/Object/COFF.h"
21146eb7a6SReid Kleckner #include "llvm/Object/COFFImportFile.h"
22146eb7a6SReid Kleckner #include "llvm/Object/Error.h"
23146eb7a6SReid Kleckner #include "llvm/Support/Error.h"
24e234901aSSaleem Abdulrasool #include "llvm/Support/Path.h"
25146eb7a6SReid Kleckner #include "llvm/Support/raw_ostream.h"
26146eb7a6SReid Kleckner 
27146eb7a6SReid Kleckner using namespace llvm::COFF;
28146eb7a6SReid Kleckner using namespace llvm;
29146eb7a6SReid Kleckner 
30146eb7a6SReid Kleckner namespace llvm {
31146eb7a6SReid Kleckner namespace object {
32146eb7a6SReid Kleckner 
33146eb7a6SReid Kleckner enum Kind {
34146eb7a6SReid Kleckner   Unknown,
35146eb7a6SReid Kleckner   Eof,
36146eb7a6SReid Kleckner   Identifier,
37146eb7a6SReid Kleckner   Comma,
38146eb7a6SReid Kleckner   Equal,
39284ab80fSMartin Storsjo   EqualEqual,
40146eb7a6SReid Kleckner   KwBase,
41146eb7a6SReid Kleckner   KwConstant,
42146eb7a6SReid Kleckner   KwData,
43146eb7a6SReid Kleckner   KwExports,
44146eb7a6SReid Kleckner   KwHeapsize,
45146eb7a6SReid Kleckner   KwLibrary,
46146eb7a6SReid Kleckner   KwName,
47146eb7a6SReid Kleckner   KwNoname,
48146eb7a6SReid Kleckner   KwPrivate,
49146eb7a6SReid Kleckner   KwStacksize,
50146eb7a6SReid Kleckner   KwVersion,
51146eb7a6SReid Kleckner };
52146eb7a6SReid Kleckner 
53146eb7a6SReid Kleckner struct Token {
54146eb7a6SReid Kleckner   explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
55146eb7a6SReid Kleckner   Kind K;
56146eb7a6SReid Kleckner   StringRef Value;
57146eb7a6SReid Kleckner };
58146eb7a6SReid Kleckner 
591079ef8dSMartell Malone static bool isDecorated(StringRef Sym, bool MingwDef) {
60843cbbddSMartin Storsjo   // In def files, the symbols can either be listed decorated or undecorated.
61843cbbddSMartin Storsjo   //
62843cbbddSMartin Storsjo   // - For cdecl symbols, only the undecorated form is allowed.
63843cbbddSMartin Storsjo   // - For fastcall and vectorcall symbols, both fully decorated or
64843cbbddSMartin Storsjo   //   undecorated forms can be present.
65843cbbddSMartin Storsjo   // - For stdcall symbols in non-MinGW environments, the decorated form is
66843cbbddSMartin Storsjo   //   fully decorated with leading underscore and trailing stack argument
67843cbbddSMartin Storsjo   //   size - like "_Func@0".
68843cbbddSMartin Storsjo   // - In MinGW def files, a decorated stdcall symbol does not include the
69843cbbddSMartin Storsjo   //   leading underscore though, like "Func@0".
70843cbbddSMartin Storsjo 
71843cbbddSMartin Storsjo   // This function controls whether a leading underscore should be added to
72843cbbddSMartin Storsjo   // the given symbol name or not. For MinGW, treat a stdcall symbol name such
73843cbbddSMartin Storsjo   // as "Func@0" as undecorated, i.e. a leading underscore must be added.
74843cbbddSMartin Storsjo   // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
75843cbbddSMartin Storsjo   // as decorated, i.e. don't add any more leading underscores.
76843cbbddSMartin Storsjo   // We can't check for a leading underscore here, since function names
77843cbbddSMartin Storsjo   // themselves can start with an underscore, while a second one still needs
78843cbbddSMartin Storsjo   // to be added.
79843cbbddSMartin Storsjo   return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") ||
80843cbbddSMartin Storsjo          (!MingwDef && Sym.contains('@'));
81146eb7a6SReid Kleckner }
82146eb7a6SReid Kleckner 
83146eb7a6SReid Kleckner static Error createError(const Twine &Err) {
84146eb7a6SReid Kleckner   return make_error<StringError>(StringRef(Err.str()),
85146eb7a6SReid Kleckner                                  object_error::parse_failed);
86146eb7a6SReid Kleckner }
87146eb7a6SReid Kleckner 
88146eb7a6SReid Kleckner class Lexer {
89146eb7a6SReid Kleckner public:
90146eb7a6SReid Kleckner   Lexer(StringRef S) : Buf(S) {}
91146eb7a6SReid Kleckner 
92146eb7a6SReid Kleckner   Token lex() {
93146eb7a6SReid Kleckner     Buf = Buf.trim();
94146eb7a6SReid Kleckner     if (Buf.empty())
95146eb7a6SReid Kleckner       return Token(Eof);
96146eb7a6SReid Kleckner 
97146eb7a6SReid Kleckner     switch (Buf[0]) {
98146eb7a6SReid Kleckner     case '\0':
99146eb7a6SReid Kleckner       return Token(Eof);
100146eb7a6SReid Kleckner     case ';': {
101146eb7a6SReid Kleckner       size_t End = Buf.find('\n');
102146eb7a6SReid Kleckner       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
103146eb7a6SReid Kleckner       return lex();
104146eb7a6SReid Kleckner     }
105146eb7a6SReid Kleckner     case '=':
106146eb7a6SReid Kleckner       Buf = Buf.drop_front();
107284ab80fSMartin Storsjo       if (Buf.startswith("=")) {
1081079ef8dSMartell Malone         Buf = Buf.drop_front();
109284ab80fSMartin Storsjo         return Token(EqualEqual, "==");
110284ab80fSMartin Storsjo       }
111146eb7a6SReid Kleckner       return Token(Equal, "=");
112146eb7a6SReid Kleckner     case ',':
113146eb7a6SReid Kleckner       Buf = Buf.drop_front();
114146eb7a6SReid Kleckner       return Token(Comma, ",");
115146eb7a6SReid Kleckner     case '"': {
116146eb7a6SReid Kleckner       StringRef S;
117146eb7a6SReid Kleckner       std::tie(S, Buf) = Buf.substr(1).split('"');
118146eb7a6SReid Kleckner       return Token(Identifier, S);
119146eb7a6SReid Kleckner     }
120146eb7a6SReid Kleckner     default: {
121efb5024eSRui Ueyama       size_t End = Buf.find_first_of("=,;\r\n \t\v");
122146eb7a6SReid Kleckner       StringRef Word = Buf.substr(0, End);
123146eb7a6SReid Kleckner       Kind K = llvm::StringSwitch<Kind>(Word)
124146eb7a6SReid Kleckner                    .Case("BASE", KwBase)
125146eb7a6SReid Kleckner                    .Case("CONSTANT", KwConstant)
126146eb7a6SReid Kleckner                    .Case("DATA", KwData)
127146eb7a6SReid Kleckner                    .Case("EXPORTS", KwExports)
128146eb7a6SReid Kleckner                    .Case("HEAPSIZE", KwHeapsize)
129146eb7a6SReid Kleckner                    .Case("LIBRARY", KwLibrary)
130146eb7a6SReid Kleckner                    .Case("NAME", KwName)
131146eb7a6SReid Kleckner                    .Case("NONAME", KwNoname)
132146eb7a6SReid Kleckner                    .Case("PRIVATE", KwPrivate)
133146eb7a6SReid Kleckner                    .Case("STACKSIZE", KwStacksize)
134146eb7a6SReid Kleckner                    .Case("VERSION", KwVersion)
135146eb7a6SReid Kleckner                    .Default(Identifier);
136146eb7a6SReid Kleckner       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
137146eb7a6SReid Kleckner       return Token(K, Word);
138146eb7a6SReid Kleckner     }
139146eb7a6SReid Kleckner     }
140146eb7a6SReid Kleckner   }
141146eb7a6SReid Kleckner 
142146eb7a6SReid Kleckner private:
143146eb7a6SReid Kleckner   StringRef Buf;
144146eb7a6SReid Kleckner };
145146eb7a6SReid Kleckner 
146146eb7a6SReid Kleckner class Parser {
147146eb7a6SReid Kleckner public:
1481079ef8dSMartell Malone   explicit Parser(StringRef S, MachineTypes M, bool B)
1491079ef8dSMartell Malone       : Lex(S), Machine(M), MingwDef(B) {}
150146eb7a6SReid Kleckner 
151146eb7a6SReid Kleckner   Expected<COFFModuleDefinition> parse() {
152146eb7a6SReid Kleckner     do {
153146eb7a6SReid Kleckner       if (Error Err = parseOne())
154146eb7a6SReid Kleckner         return std::move(Err);
155146eb7a6SReid Kleckner     } while (Tok.K != Eof);
156146eb7a6SReid Kleckner     return Info;
157146eb7a6SReid Kleckner   }
158146eb7a6SReid Kleckner 
159146eb7a6SReid Kleckner private:
160146eb7a6SReid Kleckner   void read() {
161146eb7a6SReid Kleckner     if (Stack.empty()) {
162146eb7a6SReid Kleckner       Tok = Lex.lex();
163146eb7a6SReid Kleckner       return;
164146eb7a6SReid Kleckner     }
165146eb7a6SReid Kleckner     Tok = Stack.back();
166146eb7a6SReid Kleckner     Stack.pop_back();
167146eb7a6SReid Kleckner   }
168146eb7a6SReid Kleckner 
169146eb7a6SReid Kleckner   Error readAsInt(uint64_t *I) {
170146eb7a6SReid Kleckner     read();
171146eb7a6SReid Kleckner     if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
172146eb7a6SReid Kleckner       return createError("integer expected");
173146eb7a6SReid Kleckner     return Error::success();
174146eb7a6SReid Kleckner   }
175146eb7a6SReid Kleckner 
176146eb7a6SReid Kleckner   Error expect(Kind Expected, StringRef Msg) {
177146eb7a6SReid Kleckner     read();
178146eb7a6SReid Kleckner     if (Tok.K != Expected)
179146eb7a6SReid Kleckner       return createError(Msg);
180146eb7a6SReid Kleckner     return Error::success();
181146eb7a6SReid Kleckner   }
182146eb7a6SReid Kleckner 
183146eb7a6SReid Kleckner   void unget() { Stack.push_back(Tok); }
184146eb7a6SReid Kleckner 
185146eb7a6SReid Kleckner   Error parseOne() {
186146eb7a6SReid Kleckner     read();
187146eb7a6SReid Kleckner     switch (Tok.K) {
188146eb7a6SReid Kleckner     case Eof:
189146eb7a6SReid Kleckner       return Error::success();
190146eb7a6SReid Kleckner     case KwExports:
191146eb7a6SReid Kleckner       for (;;) {
192146eb7a6SReid Kleckner         read();
193146eb7a6SReid Kleckner         if (Tok.K != Identifier) {
194146eb7a6SReid Kleckner           unget();
195146eb7a6SReid Kleckner           return Error::success();
196146eb7a6SReid Kleckner         }
197146eb7a6SReid Kleckner         if (Error Err = parseExport())
198146eb7a6SReid Kleckner           return Err;
199146eb7a6SReid Kleckner       }
200146eb7a6SReid Kleckner     case KwHeapsize:
201146eb7a6SReid Kleckner       return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
202146eb7a6SReid Kleckner     case KwStacksize:
203146eb7a6SReid Kleckner       return parseNumbers(&Info.StackReserve, &Info.StackCommit);
204146eb7a6SReid Kleckner     case KwLibrary:
205146eb7a6SReid Kleckner     case KwName: {
206146eb7a6SReid Kleckner       bool IsDll = Tok.K == KwLibrary; // Check before parseName.
207146eb7a6SReid Kleckner       std::string Name;
208146eb7a6SReid Kleckner       if (Error Err = parseName(&Name, &Info.ImageBase))
209146eb7a6SReid Kleckner         return Err;
21008e5f685SSaleem Abdulrasool 
21108e5f685SSaleem Abdulrasool       Info.ImportName = Name;
212146eb7a6SReid Kleckner 
213146eb7a6SReid Kleckner       // Set the output file, but don't override /out if it was already passed.
21408e5f685SSaleem Abdulrasool       if (Info.OutputFile.empty()) {
215146eb7a6SReid Kleckner         Info.OutputFile = Name;
21608e5f685SSaleem Abdulrasool         // Append the appropriate file extension if not already present.
21708e5f685SSaleem Abdulrasool         if (!sys::path::has_extension(Name))
21808e5f685SSaleem Abdulrasool           Info.OutputFile += IsDll ? ".dll" : ".exe";
21908e5f685SSaleem Abdulrasool       }
22008e5f685SSaleem Abdulrasool 
221146eb7a6SReid Kleckner       return Error::success();
222146eb7a6SReid Kleckner     }
223146eb7a6SReid Kleckner     case KwVersion:
224146eb7a6SReid Kleckner       return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
225146eb7a6SReid Kleckner     default:
226146eb7a6SReid Kleckner       return createError("unknown directive: " + Tok.Value);
227146eb7a6SReid Kleckner     }
228146eb7a6SReid Kleckner   }
229146eb7a6SReid Kleckner 
230146eb7a6SReid Kleckner   Error parseExport() {
231146eb7a6SReid Kleckner     COFFShortExport E;
232*adcd0268SBenjamin Kramer     E.Name = std::string(Tok.Value);
233146eb7a6SReid Kleckner     read();
234146eb7a6SReid Kleckner     if (Tok.K == Equal) {
235146eb7a6SReid Kleckner       read();
236146eb7a6SReid Kleckner       if (Tok.K != Identifier)
237146eb7a6SReid Kleckner         return createError("identifier expected, but got " + Tok.Value);
238146eb7a6SReid Kleckner       E.ExtName = E.Name;
239*adcd0268SBenjamin Kramer       E.Name = std::string(Tok.Value);
240146eb7a6SReid Kleckner     } else {
241146eb7a6SReid Kleckner       unget();
242146eb7a6SReid Kleckner     }
243146eb7a6SReid Kleckner 
244146eb7a6SReid Kleckner     if (Machine == IMAGE_FILE_MACHINE_I386) {
2451079ef8dSMartell Malone       if (!isDecorated(E.Name, MingwDef))
246146eb7a6SReid Kleckner         E.Name = (std::string("_").append(E.Name));
2471079ef8dSMartell Malone       if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
248146eb7a6SReid Kleckner         E.ExtName = (std::string("_").append(E.ExtName));
249146eb7a6SReid Kleckner     }
250146eb7a6SReid Kleckner 
251146eb7a6SReid Kleckner     for (;;) {
252146eb7a6SReid Kleckner       read();
253146eb7a6SReid Kleckner       if (Tok.K == Identifier && Tok.Value[0] == '@') {
2546c1fd299SMartin Storsjo         if (Tok.Value == "@") {
2556c1fd299SMartin Storsjo           // "foo @ 10"
2566c1fd299SMartin Storsjo           read();
2576c1fd299SMartin Storsjo           Tok.Value.getAsInteger(10, E.Ordinal);
2586c1fd299SMartin Storsjo         } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
2596c1fd299SMartin Storsjo           // "foo \n @bar" - Not an ordinal modifier at all, but the next
2606c1fd299SMartin Storsjo           // export (fastcall decorated) - complete the current one.
26158c9527eSMartin Storsjo           unget();
26258c9527eSMartin Storsjo           Info.Exports.push_back(E);
26358c9527eSMartin Storsjo           return Error::success();
26458c9527eSMartin Storsjo         }
2656c1fd299SMartin Storsjo         // "foo @10"
266146eb7a6SReid Kleckner         read();
267146eb7a6SReid Kleckner         if (Tok.K == KwNoname) {
268146eb7a6SReid Kleckner           E.Noname = true;
269146eb7a6SReid Kleckner         } else {
270146eb7a6SReid Kleckner           unget();
271146eb7a6SReid Kleckner         }
272146eb7a6SReid Kleckner         continue;
273146eb7a6SReid Kleckner       }
274146eb7a6SReid Kleckner       if (Tok.K == KwData) {
275146eb7a6SReid Kleckner         E.Data = true;
276146eb7a6SReid Kleckner         continue;
277146eb7a6SReid Kleckner       }
278146eb7a6SReid Kleckner       if (Tok.K == KwConstant) {
279146eb7a6SReid Kleckner         E.Constant = true;
280146eb7a6SReid Kleckner         continue;
281146eb7a6SReid Kleckner       }
282146eb7a6SReid Kleckner       if (Tok.K == KwPrivate) {
283146eb7a6SReid Kleckner         E.Private = true;
284146eb7a6SReid Kleckner         continue;
285146eb7a6SReid Kleckner       }
286284ab80fSMartin Storsjo       if (Tok.K == EqualEqual) {
287284ab80fSMartin Storsjo         read();
288*adcd0268SBenjamin Kramer         E.AliasTarget = std::string(Tok.Value);
289284ab80fSMartin Storsjo         if (Machine == IMAGE_FILE_MACHINE_I386 && !isDecorated(E.AliasTarget, MingwDef))
290284ab80fSMartin Storsjo           E.AliasTarget = std::string("_").append(E.AliasTarget);
291284ab80fSMartin Storsjo         continue;
292284ab80fSMartin Storsjo       }
293146eb7a6SReid Kleckner       unget();
294146eb7a6SReid Kleckner       Info.Exports.push_back(E);
295146eb7a6SReid Kleckner       return Error::success();
296146eb7a6SReid Kleckner     }
297146eb7a6SReid Kleckner   }
298146eb7a6SReid Kleckner 
299146eb7a6SReid Kleckner   // HEAPSIZE/STACKSIZE reserve[,commit]
300146eb7a6SReid Kleckner   Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
301146eb7a6SReid Kleckner     if (Error Err = readAsInt(Reserve))
302146eb7a6SReid Kleckner       return Err;
303146eb7a6SReid Kleckner     read();
304146eb7a6SReid Kleckner     if (Tok.K != Comma) {
305146eb7a6SReid Kleckner       unget();
306146eb7a6SReid Kleckner       Commit = nullptr;
307146eb7a6SReid Kleckner       return Error::success();
308146eb7a6SReid Kleckner     }
309146eb7a6SReid Kleckner     if (Error Err = readAsInt(Commit))
310146eb7a6SReid Kleckner       return Err;
311146eb7a6SReid Kleckner     return Error::success();
312146eb7a6SReid Kleckner   }
313146eb7a6SReid Kleckner 
314146eb7a6SReid Kleckner   // NAME outputPath [BASE=address]
315146eb7a6SReid Kleckner   Error parseName(std::string *Out, uint64_t *Baseaddr) {
316146eb7a6SReid Kleckner     read();
317146eb7a6SReid Kleckner     if (Tok.K == Identifier) {
318*adcd0268SBenjamin Kramer       *Out = std::string(Tok.Value);
319146eb7a6SReid Kleckner     } else {
320146eb7a6SReid Kleckner       *Out = "";
321146eb7a6SReid Kleckner       unget();
322146eb7a6SReid Kleckner       return Error::success();
323146eb7a6SReid Kleckner     }
324146eb7a6SReid Kleckner     read();
325146eb7a6SReid Kleckner     if (Tok.K == KwBase) {
326146eb7a6SReid Kleckner       if (Error Err = expect(Equal, "'=' expected"))
327146eb7a6SReid Kleckner         return Err;
328146eb7a6SReid Kleckner       if (Error Err = readAsInt(Baseaddr))
329146eb7a6SReid Kleckner         return Err;
330146eb7a6SReid Kleckner     } else {
331146eb7a6SReid Kleckner       unget();
332146eb7a6SReid Kleckner       *Baseaddr = 0;
333146eb7a6SReid Kleckner     }
334146eb7a6SReid Kleckner     return Error::success();
335146eb7a6SReid Kleckner   }
336146eb7a6SReid Kleckner 
337146eb7a6SReid Kleckner   // VERSION major[.minor]
338146eb7a6SReid Kleckner   Error parseVersion(uint32_t *Major, uint32_t *Minor) {
339146eb7a6SReid Kleckner     read();
340146eb7a6SReid Kleckner     if (Tok.K != Identifier)
341146eb7a6SReid Kleckner       return createError("identifier expected, but got " + Tok.Value);
342146eb7a6SReid Kleckner     StringRef V1, V2;
343146eb7a6SReid Kleckner     std::tie(V1, V2) = Tok.Value.split('.');
344146eb7a6SReid Kleckner     if (V1.getAsInteger(10, *Major))
345146eb7a6SReid Kleckner       return createError("integer expected, but got " + Tok.Value);
346146eb7a6SReid Kleckner     if (V2.empty())
347146eb7a6SReid Kleckner       *Minor = 0;
348146eb7a6SReid Kleckner     else if (V2.getAsInteger(10, *Minor))
349146eb7a6SReid Kleckner       return createError("integer expected, but got " + Tok.Value);
350146eb7a6SReid Kleckner     return Error::success();
351146eb7a6SReid Kleckner   }
352146eb7a6SReid Kleckner 
353146eb7a6SReid Kleckner   Lexer Lex;
354146eb7a6SReid Kleckner   Token Tok;
355146eb7a6SReid Kleckner   std::vector<Token> Stack;
356146eb7a6SReid Kleckner   MachineTypes Machine;
357146eb7a6SReid Kleckner   COFFModuleDefinition Info;
3581079ef8dSMartell Malone   bool MingwDef;
359146eb7a6SReid Kleckner };
360146eb7a6SReid Kleckner 
361146eb7a6SReid Kleckner Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
3621079ef8dSMartell Malone                                                          MachineTypes Machine,
3631079ef8dSMartell Malone                                                          bool MingwDef) {
3641079ef8dSMartell Malone   return Parser(MB.getBuffer(), Machine, MingwDef).parse();
365146eb7a6SReid Kleckner }
366146eb7a6SReid Kleckner 
367146eb7a6SReid Kleckner } // namespace object
368146eb7a6SReid Kleckner } // namespace llvm
369