1e336b74cSManuel Klimek //===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===//
2e336b74cSManuel Klimek //
3c874dd53SChristopher Di Bella // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4c874dd53SChristopher Di Bella // See https://llvm.org/LICENSE.txt for license information.
5c874dd53SChristopher Di Bella // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e336b74cSManuel Klimek //
7e336b74cSManuel Klimek //===----------------------------------------------------------------------===//
8e336b74cSManuel Klimek ///
9e336b74cSManuel Klimek /// \file
10e336b74cSManuel Klimek /// This file contains the implementation of MacroExpander, which handles macro
11e336b74cSManuel Klimek /// configuration and expansion while formatting.
12e336b74cSManuel Klimek ///
13e336b74cSManuel Klimek //===----------------------------------------------------------------------===//
14e336b74cSManuel Klimek 
15e336b74cSManuel Klimek #include "Macros.h"
16e336b74cSManuel Klimek 
17e336b74cSManuel Klimek #include "Encoding.h"
18e336b74cSManuel Klimek #include "FormatToken.h"
19e336b74cSManuel Klimek #include "FormatTokenLexer.h"
20e336b74cSManuel Klimek #include "clang/Basic/TokenKinds.h"
21e336b74cSManuel Klimek #include "clang/Format/Format.h"
22e336b74cSManuel Klimek #include "clang/Lex/HeaderSearch.h"
23e336b74cSManuel Klimek #include "clang/Lex/HeaderSearchOptions.h"
24e336b74cSManuel Klimek #include "clang/Lex/Lexer.h"
25e336b74cSManuel Klimek #include "clang/Lex/ModuleLoader.h"
26e336b74cSManuel Klimek #include "clang/Lex/Preprocessor.h"
27e336b74cSManuel Klimek #include "clang/Lex/PreprocessorOptions.h"
28e336b74cSManuel Klimek #include "llvm/ADT/StringSet.h"
29e336b74cSManuel Klimek #include "llvm/Support/ErrorHandling.h"
30e336b74cSManuel Klimek 
31e336b74cSManuel Klimek namespace clang {
32e336b74cSManuel Klimek namespace format {
33e336b74cSManuel Klimek 
34e336b74cSManuel Klimek struct MacroExpander::Definition {
35e336b74cSManuel Klimek   StringRef Name;
36e336b74cSManuel Klimek   SmallVector<FormatToken *, 8> Params;
37e336b74cSManuel Klimek   SmallVector<FormatToken *, 8> Body;
38e336b74cSManuel Klimek 
39e336b74cSManuel Klimek   // Map from each argument's name to its position in the argument list.
40e336b74cSManuel Klimek   // With "M(x, y) x + y":
41e336b74cSManuel Klimek   //   x -> 0
42e336b74cSManuel Klimek   //   y -> 1
43e336b74cSManuel Klimek   llvm::StringMap<size_t> ArgMap;
44e336b74cSManuel Klimek 
45e336b74cSManuel Klimek   bool ObjectLike = true;
46e336b74cSManuel Klimek };
47e336b74cSManuel Klimek 
48e336b74cSManuel Klimek class MacroExpander::DefinitionParser {
49e336b74cSManuel Klimek public:
DefinitionParser(ArrayRef<FormatToken * > Tokens)50e336b74cSManuel Klimek   DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) {
51e336b74cSManuel Klimek     assert(!Tokens.empty());
52e336b74cSManuel Klimek     Current = Tokens[0];
53e336b74cSManuel Klimek   }
54e336b74cSManuel Klimek 
55f1191705SNico Weber   // Parse the token stream and return the corresponding Definition object.
56e336b74cSManuel Klimek   // Returns an empty definition object with a null-Name on error.
parse()57e336b74cSManuel Klimek   MacroExpander::Definition parse() {
58e336b74cSManuel Klimek     if (!Current->is(tok::identifier))
59e336b74cSManuel Klimek       return {};
60e336b74cSManuel Klimek     Def.Name = Current->TokenText;
61e336b74cSManuel Klimek     nextToken();
62e336b74cSManuel Klimek     if (Current->is(tok::l_paren)) {
63e336b74cSManuel Klimek       Def.ObjectLike = false;
64e336b74cSManuel Klimek       if (!parseParams())
65e336b74cSManuel Klimek         return {};
66e336b74cSManuel Klimek     }
67e336b74cSManuel Klimek     if (!parseExpansion())
68e336b74cSManuel Klimek       return {};
69e336b74cSManuel Klimek 
70e336b74cSManuel Klimek     return Def;
71e336b74cSManuel Klimek   }
72e336b74cSManuel Klimek 
73e336b74cSManuel Klimek private:
parseParams()74e336b74cSManuel Klimek   bool parseParams() {
75e336b74cSManuel Klimek     assert(Current->is(tok::l_paren));
76e336b74cSManuel Klimek     nextToken();
77e336b74cSManuel Klimek     while (Current->is(tok::identifier)) {
78e336b74cSManuel Klimek       Def.Params.push_back(Current);
79e336b74cSManuel Klimek       Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1;
80e336b74cSManuel Klimek       nextToken();
81e336b74cSManuel Klimek       if (Current->isNot(tok::comma))
82e336b74cSManuel Klimek         break;
83e336b74cSManuel Klimek       nextToken();
84e336b74cSManuel Klimek     }
85e336b74cSManuel Klimek     if (Current->isNot(tok::r_paren))
86e336b74cSManuel Klimek       return false;
87e336b74cSManuel Klimek     nextToken();
88e336b74cSManuel Klimek     return true;
89e336b74cSManuel Klimek   }
90e336b74cSManuel Klimek 
parseExpansion()91e336b74cSManuel Klimek   bool parseExpansion() {
92e336b74cSManuel Klimek     if (!Current->isOneOf(tok::equal, tok::eof))
93e336b74cSManuel Klimek       return false;
94e336b74cSManuel Klimek     if (Current->is(tok::equal))
95e336b74cSManuel Klimek       nextToken();
96e336b74cSManuel Klimek     parseTail();
97e336b74cSManuel Klimek     return true;
98e336b74cSManuel Klimek   }
99e336b74cSManuel Klimek 
parseTail()100e336b74cSManuel Klimek   void parseTail() {
101e336b74cSManuel Klimek     while (Current->isNot(tok::eof)) {
102e336b74cSManuel Klimek       Def.Body.push_back(Current);
103e336b74cSManuel Klimek       nextToken();
104e336b74cSManuel Klimek     }
105e336b74cSManuel Klimek     Def.Body.push_back(Current);
106e336b74cSManuel Klimek   }
107e336b74cSManuel Klimek 
nextToken()108e336b74cSManuel Klimek   void nextToken() {
109e336b74cSManuel Klimek     if (Pos + 1 < Tokens.size())
110e336b74cSManuel Klimek       ++Pos;
111e336b74cSManuel Klimek     Current = Tokens[Pos];
112e336b74cSManuel Klimek     Current->Finalized = true;
113e336b74cSManuel Klimek   }
114e336b74cSManuel Klimek 
115e336b74cSManuel Klimek   size_t Pos = 0;
116e336b74cSManuel Klimek   FormatToken *Current = nullptr;
117e336b74cSManuel Klimek   Definition Def;
118e336b74cSManuel Klimek   ArrayRef<FormatToken *> Tokens;
119e336b74cSManuel Klimek };
120e336b74cSManuel Klimek 
MacroExpander(const std::vector<std::string> & Macros,clang::SourceManager & SourceMgr,const FormatStyle & Style,llvm::SpecificBumpPtrAllocator<FormatToken> & Allocator,IdentifierTable & IdentTable)121e336b74cSManuel Klimek MacroExpander::MacroExpander(
122e336b74cSManuel Klimek     const std::vector<std::string> &Macros, clang::SourceManager &SourceMgr,
123e336b74cSManuel Klimek     const FormatStyle &Style,
124e336b74cSManuel Klimek     llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
125e336b74cSManuel Klimek     IdentifierTable &IdentTable)
126e336b74cSManuel Klimek     : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator),
127e336b74cSManuel Klimek       IdentTable(IdentTable) {
128*630c7360SMarek Kurdej   for (const std::string &Macro : Macros)
129e336b74cSManuel Klimek     parseDefinition(Macro);
130e336b74cSManuel Klimek }
131e336b74cSManuel Klimek 
132e336b74cSManuel Klimek MacroExpander::~MacroExpander() = default;
133e336b74cSManuel Klimek 
parseDefinition(const std::string & Macro)134e336b74cSManuel Klimek void MacroExpander::parseDefinition(const std::string &Macro) {
135e336b74cSManuel Klimek   Buffers.push_back(
136e336b74cSManuel Klimek       llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>"));
13751d1d585SDuncan P. N. Exon Smith   clang::FileID FID = SourceMgr.createFileID(Buffers.back()->getMemBufferRef());
138e336b74cSManuel Klimek   FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8,
139e336b74cSManuel Klimek                        Allocator, IdentTable);
140e336b74cSManuel Klimek   const auto Tokens = Lex.lex();
141e336b74cSManuel Klimek   if (!Tokens.empty()) {
142e336b74cSManuel Klimek     DefinitionParser Parser(Tokens);
143e336b74cSManuel Klimek     auto Definition = Parser.parse();
144e336b74cSManuel Klimek     Definitions[Definition.Name] = std::move(Definition);
145e336b74cSManuel Klimek   }
146e336b74cSManuel Klimek }
147e336b74cSManuel Klimek 
defined(llvm::StringRef Name) const148e336b74cSManuel Klimek bool MacroExpander::defined(llvm::StringRef Name) const {
149e336b74cSManuel Klimek   return Definitions.find(Name) != Definitions.end();
150e336b74cSManuel Klimek }
151e336b74cSManuel Klimek 
objectLike(llvm::StringRef Name) const152e336b74cSManuel Klimek bool MacroExpander::objectLike(llvm::StringRef Name) const {
153e336b74cSManuel Klimek   return Definitions.find(Name)->second.ObjectLike;
154e336b74cSManuel Klimek }
155e336b74cSManuel Klimek 
expand(FormatToken * ID,ArgsList Args) const156e336b74cSManuel Klimek llvm::SmallVector<FormatToken *, 8> MacroExpander::expand(FormatToken *ID,
157e336b74cSManuel Klimek                                                           ArgsList Args) const {
158e336b74cSManuel Klimek   assert(defined(ID->TokenText));
159e336b74cSManuel Klimek   SmallVector<FormatToken *, 8> Result;
160e336b74cSManuel Klimek   const Definition &Def = Definitions.find(ID->TokenText)->second;
161e336b74cSManuel Klimek 
162e336b74cSManuel Klimek   // Expand each argument at most once.
163e336b74cSManuel Klimek   llvm::StringSet<> ExpandedArgs;
164e336b74cSManuel Klimek 
165e336b74cSManuel Klimek   // Adds the given token to Result.
166e336b74cSManuel Klimek   auto pushToken = [&](FormatToken *Tok) {
167e336b74cSManuel Klimek     Tok->MacroCtx->ExpandedFrom.push_back(ID);
168e336b74cSManuel Klimek     Result.push_back(Tok);
169e336b74cSManuel Klimek   };
170e336b74cSManuel Klimek 
171e336b74cSManuel Klimek   // If Tok references a parameter, adds the corresponding argument to Result.
172e336b74cSManuel Klimek   // Returns false if Tok does not reference a parameter.
173e336b74cSManuel Klimek   auto expandArgument = [&](FormatToken *Tok) -> bool {
174e336b74cSManuel Klimek     // If the current token references a parameter, expand the corresponding
175e336b74cSManuel Klimek     // argument.
176e336b74cSManuel Klimek     if (!Tok->is(tok::identifier) || ExpandedArgs.contains(Tok->TokenText))
177e336b74cSManuel Klimek       return false;
178e336b74cSManuel Klimek     ExpandedArgs.insert(Tok->TokenText);
179e336b74cSManuel Klimek     auto I = Def.ArgMap.find(Tok->TokenText);
180e336b74cSManuel Klimek     if (I == Def.ArgMap.end())
181e336b74cSManuel Klimek       return false;
182e336b74cSManuel Klimek     // If there are fewer arguments than referenced parameters, treat the
183e336b74cSManuel Klimek     // parameter as empty.
184e336b74cSManuel Klimek     // FIXME: Potentially fully abort the expansion instead.
185e336b74cSManuel Klimek     if (I->getValue() >= Args.size())
186e336b74cSManuel Klimek       return true;
187e336b74cSManuel Klimek     for (FormatToken *Arg : Args[I->getValue()]) {
188e336b74cSManuel Klimek       // A token can be part of a macro argument at multiple levels.
189e336b74cSManuel Klimek       // For example, with "ID(x) x":
190e336b74cSManuel Klimek       // in ID(ID(x)), 'x' is expanded first as argument to the inner
191e336b74cSManuel Klimek       // ID, then again as argument to the outer ID. We keep the macro
192e336b74cSManuel Klimek       // role the token had from the inner expansion.
193e336b74cSManuel Klimek       if (!Arg->MacroCtx)
194e336b74cSManuel Klimek         Arg->MacroCtx = MacroExpansion(MR_ExpandedArg);
195e336b74cSManuel Klimek       pushToken(Arg);
196e336b74cSManuel Klimek     }
197e336b74cSManuel Klimek     return true;
198e336b74cSManuel Klimek   };
199e336b74cSManuel Klimek 
200e336b74cSManuel Klimek   // Expand the definition into Result.
201e336b74cSManuel Klimek   for (FormatToken *Tok : Def.Body) {
202e336b74cSManuel Klimek     if (expandArgument(Tok))
203e336b74cSManuel Klimek       continue;
204e336b74cSManuel Klimek     // Create a copy of the tokens from the macro body, i.e. were not provided
205e336b74cSManuel Klimek     // by user code.
206e336b74cSManuel Klimek     FormatToken *New = new (Allocator.Allocate()) FormatToken;
207e336b74cSManuel Klimek     New->copyFrom(*Tok);
208e336b74cSManuel Klimek     assert(!New->MacroCtx);
209e336b74cSManuel Klimek     // Tokens that are not part of the user code are not formatted.
210e336b74cSManuel Klimek     New->MacroCtx = MacroExpansion(MR_Hidden);
211e336b74cSManuel Klimek     pushToken(New);
212e336b74cSManuel Klimek   }
213e336b74cSManuel Klimek   assert(Result.size() >= 1 && Result.back()->is(tok::eof));
214e336b74cSManuel Klimek   if (Result.size() > 1) {
215e336b74cSManuel Klimek     ++Result[0]->MacroCtx->StartOfExpansion;
216e336b74cSManuel Klimek     ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion;
217e336b74cSManuel Klimek   }
218e336b74cSManuel Klimek   return Result;
219e336b74cSManuel Klimek }
220e336b74cSManuel Klimek 
221e336b74cSManuel Klimek } // namespace format
222e336b74cSManuel Klimek } // namespace clang
223