1d8866befSDimitry Andric //===--- COFFModuleDefinition.cpp - Simple DEF parser ---------------------===//
2d8866befSDimitry Andric //
3d8866befSDimitry Andric //                     The LLVM Compiler Infrastructure
4d8866befSDimitry Andric //
5d8866befSDimitry Andric // This file is distributed under the University of Illinois Open Source
6d8866befSDimitry Andric // License. See LICENSE.TXT for details.
7d8866befSDimitry Andric //
8d8866befSDimitry Andric //===----------------------------------------------------------------------===//
9d8866befSDimitry Andric //
10d8866befSDimitry Andric // Windows-specific.
11d8866befSDimitry Andric // A parser for the module-definition file (.def file).
12d8866befSDimitry Andric //
13d8866befSDimitry Andric // The format of module-definition files are described in this document:
14d8866befSDimitry Andric // https://msdn.microsoft.com/en-us/library/28d6s79h.aspx
15d8866befSDimitry Andric //
16d8866befSDimitry Andric //===----------------------------------------------------------------------===//
17d8866befSDimitry Andric 
18d8866befSDimitry Andric #include "llvm/Object/COFFModuleDefinition.h"
19d8866befSDimitry Andric #include "llvm/ADT/StringRef.h"
20d8866befSDimitry Andric #include "llvm/ADT/StringSwitch.h"
21d8866befSDimitry Andric #include "llvm/Object/COFF.h"
22d8866befSDimitry Andric #include "llvm/Object/COFFImportFile.h"
23d8866befSDimitry Andric #include "llvm/Object/Error.h"
24d8866befSDimitry Andric #include "llvm/Support/Error.h"
25b40b48b8SDimitry Andric #include "llvm/Support/Path.h"
26d8866befSDimitry Andric #include "llvm/Support/raw_ostream.h"
27d8866befSDimitry Andric 
28d8866befSDimitry Andric using namespace llvm::COFF;
29d8866befSDimitry Andric using namespace llvm;
30d8866befSDimitry Andric 
31d8866befSDimitry Andric namespace llvm {
32d8866befSDimitry Andric namespace object {
33d8866befSDimitry Andric 
34d8866befSDimitry Andric enum Kind {
35d8866befSDimitry Andric   Unknown,
36d8866befSDimitry Andric   Eof,
37d8866befSDimitry Andric   Identifier,
38d8866befSDimitry Andric   Comma,
39d8866befSDimitry Andric   Equal,
40*4ba319b5SDimitry Andric   EqualEqual,
41d8866befSDimitry Andric   KwBase,
42d8866befSDimitry Andric   KwConstant,
43d8866befSDimitry Andric   KwData,
44d8866befSDimitry Andric   KwExports,
45d8866befSDimitry Andric   KwHeapsize,
46d8866befSDimitry Andric   KwLibrary,
47d8866befSDimitry Andric   KwName,
48d8866befSDimitry Andric   KwNoname,
49d8866befSDimitry Andric   KwPrivate,
50d8866befSDimitry Andric   KwStacksize,
51d8866befSDimitry Andric   KwVersion,
52d8866befSDimitry Andric };
53d8866befSDimitry Andric 
54d8866befSDimitry Andric struct Token {
Tokenllvm::object::Token55d8866befSDimitry Andric   explicit Token(Kind T = Unknown, StringRef S = "") : K(T), Value(S) {}
56d8866befSDimitry Andric   Kind K;
57d8866befSDimitry Andric   StringRef Value;
58d8866befSDimitry Andric };
59d8866befSDimitry Andric 
isDecorated(StringRef Sym,bool MingwDef)60b40b48b8SDimitry Andric static bool isDecorated(StringRef Sym, bool MingwDef) {
612cab237bSDimitry Andric   // In def files, the symbols can either be listed decorated or undecorated.
622cab237bSDimitry Andric   //
632cab237bSDimitry Andric   // - For cdecl symbols, only the undecorated form is allowed.
642cab237bSDimitry Andric   // - For fastcall and vectorcall symbols, both fully decorated or
652cab237bSDimitry Andric   //   undecorated forms can be present.
662cab237bSDimitry Andric   // - For stdcall symbols in non-MinGW environments, the decorated form is
672cab237bSDimitry Andric   //   fully decorated with leading underscore and trailing stack argument
682cab237bSDimitry Andric   //   size - like "_Func@0".
692cab237bSDimitry Andric   // - In MinGW def files, a decorated stdcall symbol does not include the
702cab237bSDimitry Andric   //   leading underscore though, like "Func@0".
712cab237bSDimitry Andric 
722cab237bSDimitry Andric   // This function controls whether a leading underscore should be added to
732cab237bSDimitry Andric   // the given symbol name or not. For MinGW, treat a stdcall symbol name such
742cab237bSDimitry Andric   // as "Func@0" as undecorated, i.e. a leading underscore must be added.
752cab237bSDimitry Andric   // For non-MinGW, look for '@' in the whole string and consider "_Func@0"
762cab237bSDimitry Andric   // as decorated, i.e. don't add any more leading underscores.
772cab237bSDimitry Andric   // We can't check for a leading underscore here, since function names
782cab237bSDimitry Andric   // themselves can start with an underscore, while a second one still needs
792cab237bSDimitry Andric   // to be added.
802cab237bSDimitry Andric   return Sym.startswith("@") || Sym.contains("@@") || Sym.startswith("?") ||
812cab237bSDimitry Andric          (!MingwDef && Sym.contains('@'));
82d8866befSDimitry Andric }
83d8866befSDimitry Andric 
createError(const Twine & Err)84d8866befSDimitry Andric static Error createError(const Twine &Err) {
85d8866befSDimitry Andric   return make_error<StringError>(StringRef(Err.str()),
86d8866befSDimitry Andric                                  object_error::parse_failed);
87d8866befSDimitry Andric }
88d8866befSDimitry Andric 
89d8866befSDimitry Andric class Lexer {
90d8866befSDimitry Andric public:
Lexer(StringRef S)91d8866befSDimitry Andric   Lexer(StringRef S) : Buf(S) {}
92d8866befSDimitry Andric 
lex()93d8866befSDimitry Andric   Token lex() {
94d8866befSDimitry Andric     Buf = Buf.trim();
95d8866befSDimitry Andric     if (Buf.empty())
96d8866befSDimitry Andric       return Token(Eof);
97d8866befSDimitry Andric 
98d8866befSDimitry Andric     switch (Buf[0]) {
99d8866befSDimitry Andric     case '\0':
100d8866befSDimitry Andric       return Token(Eof);
101d8866befSDimitry Andric     case ';': {
102d8866befSDimitry Andric       size_t End = Buf.find('\n');
103d8866befSDimitry Andric       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
104d8866befSDimitry Andric       return lex();
105d8866befSDimitry Andric     }
106d8866befSDimitry Andric     case '=':
107d8866befSDimitry Andric       Buf = Buf.drop_front();
108*4ba319b5SDimitry Andric       if (Buf.startswith("=")) {
109b40b48b8SDimitry Andric         Buf = Buf.drop_front();
110*4ba319b5SDimitry Andric         return Token(EqualEqual, "==");
111*4ba319b5SDimitry Andric       }
112d8866befSDimitry Andric       return Token(Equal, "=");
113d8866befSDimitry Andric     case ',':
114d8866befSDimitry Andric       Buf = Buf.drop_front();
115d8866befSDimitry Andric       return Token(Comma, ",");
116d8866befSDimitry Andric     case '"': {
117d8866befSDimitry Andric       StringRef S;
118d8866befSDimitry Andric       std::tie(S, Buf) = Buf.substr(1).split('"');
119d8866befSDimitry Andric       return Token(Identifier, S);
120d8866befSDimitry Andric     }
121d8866befSDimitry Andric     default: {
1222cab237bSDimitry Andric       size_t End = Buf.find_first_of("=,;\r\n \t\v");
123d8866befSDimitry Andric       StringRef Word = Buf.substr(0, End);
124d8866befSDimitry Andric       Kind K = llvm::StringSwitch<Kind>(Word)
125d8866befSDimitry Andric                    .Case("BASE", KwBase)
126d8866befSDimitry Andric                    .Case("CONSTANT", KwConstant)
127d8866befSDimitry Andric                    .Case("DATA", KwData)
128d8866befSDimitry Andric                    .Case("EXPORTS", KwExports)
129d8866befSDimitry Andric                    .Case("HEAPSIZE", KwHeapsize)
130d8866befSDimitry Andric                    .Case("LIBRARY", KwLibrary)
131d8866befSDimitry Andric                    .Case("NAME", KwName)
132d8866befSDimitry Andric                    .Case("NONAME", KwNoname)
133d8866befSDimitry Andric                    .Case("PRIVATE", KwPrivate)
134d8866befSDimitry Andric                    .Case("STACKSIZE", KwStacksize)
135d8866befSDimitry Andric                    .Case("VERSION", KwVersion)
136d8866befSDimitry Andric                    .Default(Identifier);
137d8866befSDimitry Andric       Buf = (End == Buf.npos) ? "" : Buf.drop_front(End);
138d8866befSDimitry Andric       return Token(K, Word);
139d8866befSDimitry Andric     }
140d8866befSDimitry Andric     }
141d8866befSDimitry Andric   }
142d8866befSDimitry Andric 
143d8866befSDimitry Andric private:
144d8866befSDimitry Andric   StringRef Buf;
145d8866befSDimitry Andric };
146d8866befSDimitry Andric 
147d8866befSDimitry Andric class Parser {
148d8866befSDimitry Andric public:
Parser(StringRef S,MachineTypes M,bool B)149b40b48b8SDimitry Andric   explicit Parser(StringRef S, MachineTypes M, bool B)
150b40b48b8SDimitry Andric       : Lex(S), Machine(M), MingwDef(B) {}
151d8866befSDimitry Andric 
parse()152d8866befSDimitry Andric   Expected<COFFModuleDefinition> parse() {
153d8866befSDimitry Andric     do {
154d8866befSDimitry Andric       if (Error Err = parseOne())
155d8866befSDimitry Andric         return std::move(Err);
156d8866befSDimitry Andric     } while (Tok.K != Eof);
157d8866befSDimitry Andric     return Info;
158d8866befSDimitry Andric   }
159d8866befSDimitry Andric 
160d8866befSDimitry Andric private:
read()161d8866befSDimitry Andric   void read() {
162d8866befSDimitry Andric     if (Stack.empty()) {
163d8866befSDimitry Andric       Tok = Lex.lex();
164d8866befSDimitry Andric       return;
165d8866befSDimitry Andric     }
166d8866befSDimitry Andric     Tok = Stack.back();
167d8866befSDimitry Andric     Stack.pop_back();
168d8866befSDimitry Andric   }
169d8866befSDimitry Andric 
readAsInt(uint64_t * I)170d8866befSDimitry Andric   Error readAsInt(uint64_t *I) {
171d8866befSDimitry Andric     read();
172d8866befSDimitry Andric     if (Tok.K != Identifier || Tok.Value.getAsInteger(10, *I))
173d8866befSDimitry Andric       return createError("integer expected");
174d8866befSDimitry Andric     return Error::success();
175d8866befSDimitry Andric   }
176d8866befSDimitry Andric 
expect(Kind Expected,StringRef Msg)177d8866befSDimitry Andric   Error expect(Kind Expected, StringRef Msg) {
178d8866befSDimitry Andric     read();
179d8866befSDimitry Andric     if (Tok.K != Expected)
180d8866befSDimitry Andric       return createError(Msg);
181d8866befSDimitry Andric     return Error::success();
182d8866befSDimitry Andric   }
183d8866befSDimitry Andric 
unget()184d8866befSDimitry Andric   void unget() { Stack.push_back(Tok); }
185d8866befSDimitry Andric 
parseOne()186d8866befSDimitry Andric   Error parseOne() {
187d8866befSDimitry Andric     read();
188d8866befSDimitry Andric     switch (Tok.K) {
189d8866befSDimitry Andric     case Eof:
190d8866befSDimitry Andric       return Error::success();
191d8866befSDimitry Andric     case KwExports:
192d8866befSDimitry Andric       for (;;) {
193d8866befSDimitry Andric         read();
194d8866befSDimitry Andric         if (Tok.K != Identifier) {
195d8866befSDimitry Andric           unget();
196d8866befSDimitry Andric           return Error::success();
197d8866befSDimitry Andric         }
198d8866befSDimitry Andric         if (Error Err = parseExport())
199d8866befSDimitry Andric           return Err;
200d8866befSDimitry Andric       }
201d8866befSDimitry Andric     case KwHeapsize:
202d8866befSDimitry Andric       return parseNumbers(&Info.HeapReserve, &Info.HeapCommit);
203d8866befSDimitry Andric     case KwStacksize:
204d8866befSDimitry Andric       return parseNumbers(&Info.StackReserve, &Info.StackCommit);
205d8866befSDimitry Andric     case KwLibrary:
206d8866befSDimitry Andric     case KwName: {
207d8866befSDimitry Andric       bool IsDll = Tok.K == KwLibrary; // Check before parseName.
208d8866befSDimitry Andric       std::string Name;
209d8866befSDimitry Andric       if (Error Err = parseName(&Name, &Info.ImageBase))
210d8866befSDimitry Andric         return Err;
211b40b48b8SDimitry Andric 
212b40b48b8SDimitry Andric       Info.ImportName = Name;
213d8866befSDimitry Andric 
214d8866befSDimitry Andric       // Set the output file, but don't override /out if it was already passed.
215b40b48b8SDimitry Andric       if (Info.OutputFile.empty()) {
216d8866befSDimitry Andric         Info.OutputFile = Name;
217b40b48b8SDimitry Andric         // Append the appropriate file extension if not already present.
218b40b48b8SDimitry Andric         if (!sys::path::has_extension(Name))
219b40b48b8SDimitry Andric           Info.OutputFile += IsDll ? ".dll" : ".exe";
220b40b48b8SDimitry Andric       }
221b40b48b8SDimitry Andric 
222d8866befSDimitry Andric       return Error::success();
223d8866befSDimitry Andric     }
224d8866befSDimitry Andric     case KwVersion:
225d8866befSDimitry Andric       return parseVersion(&Info.MajorImageVersion, &Info.MinorImageVersion);
226d8866befSDimitry Andric     default:
227d8866befSDimitry Andric       return createError("unknown directive: " + Tok.Value);
228d8866befSDimitry Andric     }
229d8866befSDimitry Andric   }
230d8866befSDimitry Andric 
parseExport()231d8866befSDimitry Andric   Error parseExport() {
232d8866befSDimitry Andric     COFFShortExport E;
233d8866befSDimitry Andric     E.Name = Tok.Value;
234d8866befSDimitry Andric     read();
235d8866befSDimitry Andric     if (Tok.K == Equal) {
236d8866befSDimitry Andric       read();
237d8866befSDimitry Andric       if (Tok.K != Identifier)
238d8866befSDimitry Andric         return createError("identifier expected, but got " + Tok.Value);
239d8866befSDimitry Andric       E.ExtName = E.Name;
240d8866befSDimitry Andric       E.Name = Tok.Value;
241d8866befSDimitry Andric     } else {
242d8866befSDimitry Andric       unget();
243d8866befSDimitry Andric     }
244d8866befSDimitry Andric 
245d8866befSDimitry Andric     if (Machine == IMAGE_FILE_MACHINE_I386) {
246b40b48b8SDimitry Andric       if (!isDecorated(E.Name, MingwDef))
247d8866befSDimitry Andric         E.Name = (std::string("_").append(E.Name));
248b40b48b8SDimitry Andric       if (!E.ExtName.empty() && !isDecorated(E.ExtName, MingwDef))
249d8866befSDimitry Andric         E.ExtName = (std::string("_").append(E.ExtName));
250d8866befSDimitry Andric     }
251d8866befSDimitry Andric 
252d8866befSDimitry Andric     for (;;) {
253d8866befSDimitry Andric       read();
254d8866befSDimitry Andric       if (Tok.K == Identifier && Tok.Value[0] == '@') {
2552cab237bSDimitry Andric         if (Tok.Value == "@") {
2562cab237bSDimitry Andric           // "foo @ 10"
2572cab237bSDimitry Andric           read();
2582cab237bSDimitry Andric           Tok.Value.getAsInteger(10, E.Ordinal);
2592cab237bSDimitry Andric         } else if (Tok.Value.drop_front().getAsInteger(10, E.Ordinal)) {
2602cab237bSDimitry Andric           // "foo \n @bar" - Not an ordinal modifier at all, but the next
2612cab237bSDimitry Andric           // export (fastcall decorated) - complete the current one.
2620fa43771SDimitry Andric           unget();
2630fa43771SDimitry Andric           Info.Exports.push_back(E);
2640fa43771SDimitry Andric           return Error::success();
2650fa43771SDimitry Andric         }
2662cab237bSDimitry Andric         // "foo @10"
267d8866befSDimitry Andric         read();
268d8866befSDimitry Andric         if (Tok.K == KwNoname) {
269d8866befSDimitry Andric           E.Noname = true;
270d8866befSDimitry Andric         } else {
271d8866befSDimitry Andric           unget();
272d8866befSDimitry Andric         }
273d8866befSDimitry Andric         continue;
274d8866befSDimitry Andric       }
275d8866befSDimitry Andric       if (Tok.K == KwData) {
276d8866befSDimitry Andric         E.Data = true;
277d8866befSDimitry Andric         continue;
278d8866befSDimitry Andric       }
279d8866befSDimitry Andric       if (Tok.K == KwConstant) {
280d8866befSDimitry Andric         E.Constant = true;
281d8866befSDimitry Andric         continue;
282d8866befSDimitry Andric       }
283d8866befSDimitry Andric       if (Tok.K == KwPrivate) {
284d8866befSDimitry Andric         E.Private = true;
285d8866befSDimitry Andric         continue;
286d8866befSDimitry Andric       }
287*4ba319b5SDimitry Andric       if (Tok.K == EqualEqual) {
288*4ba319b5SDimitry Andric         read();
289*4ba319b5SDimitry Andric         E.AliasTarget = Tok.Value;
290*4ba319b5SDimitry Andric         if (Machine == IMAGE_FILE_MACHINE_I386 && !isDecorated(E.AliasTarget, MingwDef))
291*4ba319b5SDimitry Andric           E.AliasTarget = std::string("_").append(E.AliasTarget);
292*4ba319b5SDimitry Andric         continue;
293*4ba319b5SDimitry Andric       }
294d8866befSDimitry Andric       unget();
295d8866befSDimitry Andric       Info.Exports.push_back(E);
296d8866befSDimitry Andric       return Error::success();
297d8866befSDimitry Andric     }
298d8866befSDimitry Andric   }
299d8866befSDimitry Andric 
300d8866befSDimitry Andric   // HEAPSIZE/STACKSIZE reserve[,commit]
parseNumbers(uint64_t * Reserve,uint64_t * Commit)301d8866befSDimitry Andric   Error parseNumbers(uint64_t *Reserve, uint64_t *Commit) {
302d8866befSDimitry Andric     if (Error Err = readAsInt(Reserve))
303d8866befSDimitry Andric       return Err;
304d8866befSDimitry Andric     read();
305d8866befSDimitry Andric     if (Tok.K != Comma) {
306d8866befSDimitry Andric       unget();
307d8866befSDimitry Andric       Commit = nullptr;
308d8866befSDimitry Andric       return Error::success();
309d8866befSDimitry Andric     }
310d8866befSDimitry Andric     if (Error Err = readAsInt(Commit))
311d8866befSDimitry Andric       return Err;
312d8866befSDimitry Andric     return Error::success();
313d8866befSDimitry Andric   }
314d8866befSDimitry Andric 
315d8866befSDimitry Andric   // NAME outputPath [BASE=address]
parseName(std::string * Out,uint64_t * Baseaddr)316d8866befSDimitry Andric   Error parseName(std::string *Out, uint64_t *Baseaddr) {
317d8866befSDimitry Andric     read();
318d8866befSDimitry Andric     if (Tok.K == Identifier) {
319d8866befSDimitry Andric       *Out = Tok.Value;
320d8866befSDimitry Andric     } else {
321d8866befSDimitry Andric       *Out = "";
322d8866befSDimitry Andric       unget();
323d8866befSDimitry Andric       return Error::success();
324d8866befSDimitry Andric     }
325d8866befSDimitry Andric     read();
326d8866befSDimitry Andric     if (Tok.K == KwBase) {
327d8866befSDimitry Andric       if (Error Err = expect(Equal, "'=' expected"))
328d8866befSDimitry Andric         return Err;
329d8866befSDimitry Andric       if (Error Err = readAsInt(Baseaddr))
330d8866befSDimitry Andric         return Err;
331d8866befSDimitry Andric     } else {
332d8866befSDimitry Andric       unget();
333d8866befSDimitry Andric       *Baseaddr = 0;
334d8866befSDimitry Andric     }
335d8866befSDimitry Andric     return Error::success();
336d8866befSDimitry Andric   }
337d8866befSDimitry Andric 
338d8866befSDimitry Andric   // VERSION major[.minor]
parseVersion(uint32_t * Major,uint32_t * Minor)339d8866befSDimitry Andric   Error parseVersion(uint32_t *Major, uint32_t *Minor) {
340d8866befSDimitry Andric     read();
341d8866befSDimitry Andric     if (Tok.K != Identifier)
342d8866befSDimitry Andric       return createError("identifier expected, but got " + Tok.Value);
343d8866befSDimitry Andric     StringRef V1, V2;
344d8866befSDimitry Andric     std::tie(V1, V2) = Tok.Value.split('.');
345d8866befSDimitry Andric     if (V1.getAsInteger(10, *Major))
346d8866befSDimitry Andric       return createError("integer expected, but got " + Tok.Value);
347d8866befSDimitry Andric     if (V2.empty())
348d8866befSDimitry Andric       *Minor = 0;
349d8866befSDimitry Andric     else if (V2.getAsInteger(10, *Minor))
350d8866befSDimitry Andric       return createError("integer expected, but got " + Tok.Value);
351d8866befSDimitry Andric     return Error::success();
352d8866befSDimitry Andric   }
353d8866befSDimitry Andric 
354d8866befSDimitry Andric   Lexer Lex;
355d8866befSDimitry Andric   Token Tok;
356d8866befSDimitry Andric   std::vector<Token> Stack;
357d8866befSDimitry Andric   MachineTypes Machine;
358d8866befSDimitry Andric   COFFModuleDefinition Info;
359b40b48b8SDimitry Andric   bool MingwDef;
360d8866befSDimitry Andric };
361d8866befSDimitry Andric 
parseCOFFModuleDefinition(MemoryBufferRef MB,MachineTypes Machine,bool MingwDef)362d8866befSDimitry Andric Expected<COFFModuleDefinition> parseCOFFModuleDefinition(MemoryBufferRef MB,
363b40b48b8SDimitry Andric                                                          MachineTypes Machine,
364b40b48b8SDimitry Andric                                                          bool MingwDef) {
365b40b48b8SDimitry Andric   return Parser(MB.getBuffer(), Machine, MingwDef).parse();
366d8866befSDimitry Andric }
367d8866befSDimitry Andric 
368d8866befSDimitry Andric } // namespace object
369d8866befSDimitry Andric } // namespace llvm
370