1e336b74cSManuel Klimek //===--- MacroExpander.cpp - Format C++ code --------------------*- C++ -*-===//
2e336b74cSManuel Klimek //
3c874dd53SChristopher Di Bella // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4c874dd53SChristopher Di Bella // See https://llvm.org/LICENSE.txt for license information.
5c874dd53SChristopher Di Bella // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e336b74cSManuel Klimek //
7e336b74cSManuel Klimek //===----------------------------------------------------------------------===//
8e336b74cSManuel Klimek ///
9e336b74cSManuel Klimek /// \file
10e336b74cSManuel Klimek /// This file contains the implementation of MacroExpander, which handles macro
11e336b74cSManuel Klimek /// configuration and expansion while formatting.
12e336b74cSManuel Klimek ///
13e336b74cSManuel Klimek //===----------------------------------------------------------------------===//
14e336b74cSManuel Klimek
15e336b74cSManuel Klimek #include "Macros.h"
16e336b74cSManuel Klimek
17e336b74cSManuel Klimek #include "Encoding.h"
18e336b74cSManuel Klimek #include "FormatToken.h"
19e336b74cSManuel Klimek #include "FormatTokenLexer.h"
20e336b74cSManuel Klimek #include "clang/Basic/TokenKinds.h"
21e336b74cSManuel Klimek #include "clang/Format/Format.h"
22e336b74cSManuel Klimek #include "clang/Lex/HeaderSearch.h"
23e336b74cSManuel Klimek #include "clang/Lex/HeaderSearchOptions.h"
24e336b74cSManuel Klimek #include "clang/Lex/Lexer.h"
25e336b74cSManuel Klimek #include "clang/Lex/ModuleLoader.h"
26e336b74cSManuel Klimek #include "clang/Lex/Preprocessor.h"
27e336b74cSManuel Klimek #include "clang/Lex/PreprocessorOptions.h"
28e336b74cSManuel Klimek #include "llvm/ADT/StringSet.h"
29e336b74cSManuel Klimek #include "llvm/Support/ErrorHandling.h"
30e336b74cSManuel Klimek
31e336b74cSManuel Klimek namespace clang {
32e336b74cSManuel Klimek namespace format {
33e336b74cSManuel Klimek
34e336b74cSManuel Klimek struct MacroExpander::Definition {
35e336b74cSManuel Klimek StringRef Name;
36e336b74cSManuel Klimek SmallVector<FormatToken *, 8> Params;
37e336b74cSManuel Klimek SmallVector<FormatToken *, 8> Body;
38e336b74cSManuel Klimek
39e336b74cSManuel Klimek // Map from each argument's name to its position in the argument list.
40e336b74cSManuel Klimek // With "M(x, y) x + y":
41e336b74cSManuel Klimek // x -> 0
42e336b74cSManuel Klimek // y -> 1
43e336b74cSManuel Klimek llvm::StringMap<size_t> ArgMap;
44e336b74cSManuel Klimek
45e336b74cSManuel Klimek bool ObjectLike = true;
46e336b74cSManuel Klimek };
47e336b74cSManuel Klimek
48e336b74cSManuel Klimek class MacroExpander::DefinitionParser {
49e336b74cSManuel Klimek public:
DefinitionParser(ArrayRef<FormatToken * > Tokens)50e336b74cSManuel Klimek DefinitionParser(ArrayRef<FormatToken *> Tokens) : Tokens(Tokens) {
51e336b74cSManuel Klimek assert(!Tokens.empty());
52e336b74cSManuel Klimek Current = Tokens[0];
53e336b74cSManuel Klimek }
54e336b74cSManuel Klimek
55f1191705SNico Weber // Parse the token stream and return the corresponding Definition object.
56e336b74cSManuel Klimek // Returns an empty definition object with a null-Name on error.
parse()57e336b74cSManuel Klimek MacroExpander::Definition parse() {
58e336b74cSManuel Klimek if (!Current->is(tok::identifier))
59e336b74cSManuel Klimek return {};
60e336b74cSManuel Klimek Def.Name = Current->TokenText;
61e336b74cSManuel Klimek nextToken();
62e336b74cSManuel Klimek if (Current->is(tok::l_paren)) {
63e336b74cSManuel Klimek Def.ObjectLike = false;
64e336b74cSManuel Klimek if (!parseParams())
65e336b74cSManuel Klimek return {};
66e336b74cSManuel Klimek }
67e336b74cSManuel Klimek if (!parseExpansion())
68e336b74cSManuel Klimek return {};
69e336b74cSManuel Klimek
70e336b74cSManuel Klimek return Def;
71e336b74cSManuel Klimek }
72e336b74cSManuel Klimek
73e336b74cSManuel Klimek private:
parseParams()74e336b74cSManuel Klimek bool parseParams() {
75e336b74cSManuel Klimek assert(Current->is(tok::l_paren));
76e336b74cSManuel Klimek nextToken();
77e336b74cSManuel Klimek while (Current->is(tok::identifier)) {
78e336b74cSManuel Klimek Def.Params.push_back(Current);
79e336b74cSManuel Klimek Def.ArgMap[Def.Params.back()->TokenText] = Def.Params.size() - 1;
80e336b74cSManuel Klimek nextToken();
81e336b74cSManuel Klimek if (Current->isNot(tok::comma))
82e336b74cSManuel Klimek break;
83e336b74cSManuel Klimek nextToken();
84e336b74cSManuel Klimek }
85e336b74cSManuel Klimek if (Current->isNot(tok::r_paren))
86e336b74cSManuel Klimek return false;
87e336b74cSManuel Klimek nextToken();
88e336b74cSManuel Klimek return true;
89e336b74cSManuel Klimek }
90e336b74cSManuel Klimek
parseExpansion()91e336b74cSManuel Klimek bool parseExpansion() {
92e336b74cSManuel Klimek if (!Current->isOneOf(tok::equal, tok::eof))
93e336b74cSManuel Klimek return false;
94e336b74cSManuel Klimek if (Current->is(tok::equal))
95e336b74cSManuel Klimek nextToken();
96e336b74cSManuel Klimek parseTail();
97e336b74cSManuel Klimek return true;
98e336b74cSManuel Klimek }
99e336b74cSManuel Klimek
parseTail()100e336b74cSManuel Klimek void parseTail() {
101e336b74cSManuel Klimek while (Current->isNot(tok::eof)) {
102e336b74cSManuel Klimek Def.Body.push_back(Current);
103e336b74cSManuel Klimek nextToken();
104e336b74cSManuel Klimek }
105e336b74cSManuel Klimek Def.Body.push_back(Current);
106e336b74cSManuel Klimek }
107e336b74cSManuel Klimek
nextToken()108e336b74cSManuel Klimek void nextToken() {
109e336b74cSManuel Klimek if (Pos + 1 < Tokens.size())
110e336b74cSManuel Klimek ++Pos;
111e336b74cSManuel Klimek Current = Tokens[Pos];
112e336b74cSManuel Klimek Current->Finalized = true;
113e336b74cSManuel Klimek }
114e336b74cSManuel Klimek
115e336b74cSManuel Klimek size_t Pos = 0;
116e336b74cSManuel Klimek FormatToken *Current = nullptr;
117e336b74cSManuel Klimek Definition Def;
118e336b74cSManuel Klimek ArrayRef<FormatToken *> Tokens;
119e336b74cSManuel Klimek };
120e336b74cSManuel Klimek
MacroExpander(const std::vector<std::string> & Macros,clang::SourceManager & SourceMgr,const FormatStyle & Style,llvm::SpecificBumpPtrAllocator<FormatToken> & Allocator,IdentifierTable & IdentTable)121e336b74cSManuel Klimek MacroExpander::MacroExpander(
122e336b74cSManuel Klimek const std::vector<std::string> &Macros, clang::SourceManager &SourceMgr,
123e336b74cSManuel Klimek const FormatStyle &Style,
124e336b74cSManuel Klimek llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator,
125e336b74cSManuel Klimek IdentifierTable &IdentTable)
126e336b74cSManuel Klimek : SourceMgr(SourceMgr), Style(Style), Allocator(Allocator),
127e336b74cSManuel Klimek IdentTable(IdentTable) {
128*630c7360SMarek Kurdej for (const std::string &Macro : Macros)
129e336b74cSManuel Klimek parseDefinition(Macro);
130e336b74cSManuel Klimek }
131e336b74cSManuel Klimek
132e336b74cSManuel Klimek MacroExpander::~MacroExpander() = default;
133e336b74cSManuel Klimek
parseDefinition(const std::string & Macro)134e336b74cSManuel Klimek void MacroExpander::parseDefinition(const std::string &Macro) {
135e336b74cSManuel Klimek Buffers.push_back(
136e336b74cSManuel Klimek llvm::MemoryBuffer::getMemBufferCopy(Macro, "<scratch space>"));
13751d1d585SDuncan P. N. Exon Smith clang::FileID FID = SourceMgr.createFileID(Buffers.back()->getMemBufferRef());
138e336b74cSManuel Klimek FormatTokenLexer Lex(SourceMgr, FID, 0, Style, encoding::Encoding_UTF8,
139e336b74cSManuel Klimek Allocator, IdentTable);
140e336b74cSManuel Klimek const auto Tokens = Lex.lex();
141e336b74cSManuel Klimek if (!Tokens.empty()) {
142e336b74cSManuel Klimek DefinitionParser Parser(Tokens);
143e336b74cSManuel Klimek auto Definition = Parser.parse();
144e336b74cSManuel Klimek Definitions[Definition.Name] = std::move(Definition);
145e336b74cSManuel Klimek }
146e336b74cSManuel Klimek }
147e336b74cSManuel Klimek
defined(llvm::StringRef Name) const148e336b74cSManuel Klimek bool MacroExpander::defined(llvm::StringRef Name) const {
149e336b74cSManuel Klimek return Definitions.find(Name) != Definitions.end();
150e336b74cSManuel Klimek }
151e336b74cSManuel Klimek
objectLike(llvm::StringRef Name) const152e336b74cSManuel Klimek bool MacroExpander::objectLike(llvm::StringRef Name) const {
153e336b74cSManuel Klimek return Definitions.find(Name)->second.ObjectLike;
154e336b74cSManuel Klimek }
155e336b74cSManuel Klimek
expand(FormatToken * ID,ArgsList Args) const156e336b74cSManuel Klimek llvm::SmallVector<FormatToken *, 8> MacroExpander::expand(FormatToken *ID,
157e336b74cSManuel Klimek ArgsList Args) const {
158e336b74cSManuel Klimek assert(defined(ID->TokenText));
159e336b74cSManuel Klimek SmallVector<FormatToken *, 8> Result;
160e336b74cSManuel Klimek const Definition &Def = Definitions.find(ID->TokenText)->second;
161e336b74cSManuel Klimek
162e336b74cSManuel Klimek // Expand each argument at most once.
163e336b74cSManuel Klimek llvm::StringSet<> ExpandedArgs;
164e336b74cSManuel Klimek
165e336b74cSManuel Klimek // Adds the given token to Result.
166e336b74cSManuel Klimek auto pushToken = [&](FormatToken *Tok) {
167e336b74cSManuel Klimek Tok->MacroCtx->ExpandedFrom.push_back(ID);
168e336b74cSManuel Klimek Result.push_back(Tok);
169e336b74cSManuel Klimek };
170e336b74cSManuel Klimek
171e336b74cSManuel Klimek // If Tok references a parameter, adds the corresponding argument to Result.
172e336b74cSManuel Klimek // Returns false if Tok does not reference a parameter.
173e336b74cSManuel Klimek auto expandArgument = [&](FormatToken *Tok) -> bool {
174e336b74cSManuel Klimek // If the current token references a parameter, expand the corresponding
175e336b74cSManuel Klimek // argument.
176e336b74cSManuel Klimek if (!Tok->is(tok::identifier) || ExpandedArgs.contains(Tok->TokenText))
177e336b74cSManuel Klimek return false;
178e336b74cSManuel Klimek ExpandedArgs.insert(Tok->TokenText);
179e336b74cSManuel Klimek auto I = Def.ArgMap.find(Tok->TokenText);
180e336b74cSManuel Klimek if (I == Def.ArgMap.end())
181e336b74cSManuel Klimek return false;
182e336b74cSManuel Klimek // If there are fewer arguments than referenced parameters, treat the
183e336b74cSManuel Klimek // parameter as empty.
184e336b74cSManuel Klimek // FIXME: Potentially fully abort the expansion instead.
185e336b74cSManuel Klimek if (I->getValue() >= Args.size())
186e336b74cSManuel Klimek return true;
187e336b74cSManuel Klimek for (FormatToken *Arg : Args[I->getValue()]) {
188e336b74cSManuel Klimek // A token can be part of a macro argument at multiple levels.
189e336b74cSManuel Klimek // For example, with "ID(x) x":
190e336b74cSManuel Klimek // in ID(ID(x)), 'x' is expanded first as argument to the inner
191e336b74cSManuel Klimek // ID, then again as argument to the outer ID. We keep the macro
192e336b74cSManuel Klimek // role the token had from the inner expansion.
193e336b74cSManuel Klimek if (!Arg->MacroCtx)
194e336b74cSManuel Klimek Arg->MacroCtx = MacroExpansion(MR_ExpandedArg);
195e336b74cSManuel Klimek pushToken(Arg);
196e336b74cSManuel Klimek }
197e336b74cSManuel Klimek return true;
198e336b74cSManuel Klimek };
199e336b74cSManuel Klimek
200e336b74cSManuel Klimek // Expand the definition into Result.
201e336b74cSManuel Klimek for (FormatToken *Tok : Def.Body) {
202e336b74cSManuel Klimek if (expandArgument(Tok))
203e336b74cSManuel Klimek continue;
204e336b74cSManuel Klimek // Create a copy of the tokens from the macro body, i.e. were not provided
205e336b74cSManuel Klimek // by user code.
206e336b74cSManuel Klimek FormatToken *New = new (Allocator.Allocate()) FormatToken;
207e336b74cSManuel Klimek New->copyFrom(*Tok);
208e336b74cSManuel Klimek assert(!New->MacroCtx);
209e336b74cSManuel Klimek // Tokens that are not part of the user code are not formatted.
210e336b74cSManuel Klimek New->MacroCtx = MacroExpansion(MR_Hidden);
211e336b74cSManuel Klimek pushToken(New);
212e336b74cSManuel Klimek }
213e336b74cSManuel Klimek assert(Result.size() >= 1 && Result.back()->is(tok::eof));
214e336b74cSManuel Klimek if (Result.size() > 1) {
215e336b74cSManuel Klimek ++Result[0]->MacroCtx->StartOfExpansion;
216e336b74cSManuel Klimek ++Result[Result.size() - 2]->MacroCtx->EndOfExpansion;
217e336b74cSManuel Klimek }
218e336b74cSManuel Klimek return Result;
219e336b74cSManuel Klimek }
220e336b74cSManuel Klimek
221e336b74cSManuel Klimek } // namespace format
222e336b74cSManuel Klimek } // namespace clang
223