1e7230ea7SIlya Biryukov //===- Tokens.cpp - collect tokens from preprocessing ---------------------===//
2e7230ea7SIlya Biryukov //
3e7230ea7SIlya Biryukov // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e7230ea7SIlya Biryukov // See https://llvm.org/LICENSE.txt for license information.
5e7230ea7SIlya Biryukov // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e7230ea7SIlya Biryukov //
7e7230ea7SIlya Biryukov //===----------------------------------------------------------------------===//
8e7230ea7SIlya Biryukov #include "clang/Tooling/Syntax/Tokens.h"
9e7230ea7SIlya Biryukov
10e7230ea7SIlya Biryukov #include "clang/Basic/Diagnostic.h"
11e7230ea7SIlya Biryukov #include "clang/Basic/IdentifierTable.h"
12e7230ea7SIlya Biryukov #include "clang/Basic/LLVM.h"
13e7230ea7SIlya Biryukov #include "clang/Basic/LangOptions.h"
14e7230ea7SIlya Biryukov #include "clang/Basic/SourceLocation.h"
15e7230ea7SIlya Biryukov #include "clang/Basic/SourceManager.h"
16e7230ea7SIlya Biryukov #include "clang/Basic/TokenKinds.h"
175e69f27eSIlya Biryukov #include "clang/Lex/PPCallbacks.h"
18e7230ea7SIlya Biryukov #include "clang/Lex/Preprocessor.h"
19e7230ea7SIlya Biryukov #include "clang/Lex/Token.h"
20e7230ea7SIlya Biryukov #include "llvm/ADT/ArrayRef.h"
21e7230ea7SIlya Biryukov #include "llvm/ADT/None.h"
22e7230ea7SIlya Biryukov #include "llvm/ADT/Optional.h"
23e7230ea7SIlya Biryukov #include "llvm/ADT/STLExtras.h"
24e7230ea7SIlya Biryukov #include "llvm/Support/Debug.h"
25e7230ea7SIlya Biryukov #include "llvm/Support/ErrorHandling.h"
26e7230ea7SIlya Biryukov #include "llvm/Support/FormatVariadic.h"
27e7230ea7SIlya Biryukov #include "llvm/Support/raw_ostream.h"
28e7230ea7SIlya Biryukov #include <algorithm>
29e7230ea7SIlya Biryukov #include <cassert>
30e7230ea7SIlya Biryukov #include <iterator>
31e7230ea7SIlya Biryukov #include <string>
32e7230ea7SIlya Biryukov #include <utility>
33e7230ea7SIlya Biryukov #include <vector>
34e7230ea7SIlya Biryukov
35e7230ea7SIlya Biryukov using namespace clang;
36e7230ea7SIlya Biryukov using namespace clang::syntax;
37e7230ea7SIlya Biryukov
389619c2ccSKadir Cetinkaya namespace {
399619c2ccSKadir Cetinkaya // Finds the smallest consecutive subsuquence of Toks that covers R.
409619c2ccSKadir Cetinkaya llvm::ArrayRef<syntax::Token>
getTokensCovering(llvm::ArrayRef<syntax::Token> Toks,SourceRange R,const SourceManager & SM)419619c2ccSKadir Cetinkaya getTokensCovering(llvm::ArrayRef<syntax::Token> Toks, SourceRange R,
429619c2ccSKadir Cetinkaya const SourceManager &SM) {
439619c2ccSKadir Cetinkaya if (R.isInvalid())
449619c2ccSKadir Cetinkaya return {};
459619c2ccSKadir Cetinkaya const syntax::Token *Begin =
469619c2ccSKadir Cetinkaya llvm::partition_point(Toks, [&](const syntax::Token &T) {
479619c2ccSKadir Cetinkaya return SM.isBeforeInTranslationUnit(T.location(), R.getBegin());
489619c2ccSKadir Cetinkaya });
499619c2ccSKadir Cetinkaya const syntax::Token *End =
509619c2ccSKadir Cetinkaya llvm::partition_point(Toks, [&](const syntax::Token &T) {
519619c2ccSKadir Cetinkaya return !SM.isBeforeInTranslationUnit(R.getEnd(), T.location());
529619c2ccSKadir Cetinkaya });
539619c2ccSKadir Cetinkaya if (Begin > End)
549619c2ccSKadir Cetinkaya return {};
559619c2ccSKadir Cetinkaya return {Begin, End};
569619c2ccSKadir Cetinkaya }
579619c2ccSKadir Cetinkaya
5827e075fcSSam McCall // Finds the range within FID corresponding to expanded tokens [First, Last].
5927e075fcSSam McCall // Prev precedes First and Next follows Last, these must *not* be included.
6027e075fcSSam McCall // If no range satisfies the criteria, returns an invalid range.
6127e075fcSSam McCall //
629619c2ccSKadir Cetinkaya // #define ID(x) x
639619c2ccSKadir Cetinkaya // ID(ID(ID(a1) a2))
649619c2ccSKadir Cetinkaya // ~~ -> a1
659619c2ccSKadir Cetinkaya // ~~ -> a2
669619c2ccSKadir Cetinkaya // ~~~~~~~~~ -> a1 a2
spelledForExpandedSlow(SourceLocation First,SourceLocation Last,SourceLocation Prev,SourceLocation Next,FileID TargetFile,const SourceManager & SM)6727e075fcSSam McCall SourceRange spelledForExpandedSlow(SourceLocation First, SourceLocation Last,
6827e075fcSSam McCall SourceLocation Prev, SourceLocation Next,
6927e075fcSSam McCall FileID TargetFile,
709619c2ccSKadir Cetinkaya const SourceManager &SM) {
7127e075fcSSam McCall // There are two main parts to this algorithm:
7227e075fcSSam McCall // - identifying which spelled range covers the expanded tokens
7327e075fcSSam McCall // - validating that this range doesn't cover any extra tokens (First/Last)
7427e075fcSSam McCall //
7527e075fcSSam McCall // We do these in order. However as we transform the expanded range into the
7627e075fcSSam McCall // spelled one, we adjust First/Last so the validation remains simple.
7727e075fcSSam McCall
7827e075fcSSam McCall assert(SM.getSLocEntry(TargetFile).isFile());
7927e075fcSSam McCall // In most cases, to select First and Last we must return their expansion
8027e075fcSSam McCall // range, i.e. the whole of any macros they are included in.
8127e075fcSSam McCall //
8227e075fcSSam McCall // When First and Last are part of the *same macro arg* of a macro written
8327e075fcSSam McCall // in TargetFile, we that slice of the arg, i.e. their spelling range.
8427e075fcSSam McCall //
8527e075fcSSam McCall // Unwrap such macro calls. If the target file has A(B(C)), the
8627e075fcSSam McCall // SourceLocation stack of a token inside C shows us the expansion of A first,
8727e075fcSSam McCall // then B, then any macros inside C's body, then C itself.
8827e075fcSSam McCall // (This is the reverse of the order the PP applies the expansions in).
8927e075fcSSam McCall while (First.isMacroID() && Last.isMacroID()) {
9027e075fcSSam McCall auto DecFirst = SM.getDecomposedLoc(First);
9127e075fcSSam McCall auto DecLast = SM.getDecomposedLoc(Last);
9227e075fcSSam McCall auto &ExpFirst = SM.getSLocEntry(DecFirst.first).getExpansion();
9327e075fcSSam McCall auto &ExpLast = SM.getSLocEntry(DecLast.first).getExpansion();
9427e075fcSSam McCall
9527e075fcSSam McCall if (!ExpFirst.isMacroArgExpansion() || !ExpLast.isMacroArgExpansion())
969619c2ccSKadir Cetinkaya break;
9727e075fcSSam McCall // Locations are in the same macro arg if they expand to the same place.
9827e075fcSSam McCall // (They may still have different FileIDs - an arg can have >1 chunks!)
9927e075fcSSam McCall if (ExpFirst.getExpansionLocStart() != ExpLast.getExpansionLocStart())
1009619c2ccSKadir Cetinkaya break;
10127e075fcSSam McCall // Careful, given:
10227e075fcSSam McCall // #define HIDE ID(ID(a))
10327e075fcSSam McCall // ID(ID(HIDE))
10427e075fcSSam McCall // The token `a` is wrapped in 4 arg-expansions, we only want to unwrap 2.
10527e075fcSSam McCall // We distinguish them by whether the macro expands into the target file.
10627e075fcSSam McCall // Fortunately, the target file ones will always appear first.
10727e075fcSSam McCall auto &ExpMacro =
10827e075fcSSam McCall SM.getSLocEntry(SM.getFileID(ExpFirst.getExpansionLocStart()))
10927e075fcSSam McCall .getExpansion();
11027e075fcSSam McCall if (ExpMacro.getExpansionLocStart().isMacroID())
11127e075fcSSam McCall break;
11227e075fcSSam McCall // Replace each endpoint with its spelling inside the macro arg.
11327e075fcSSam McCall // (This is getImmediateSpellingLoc without repeating lookups).
11427e075fcSSam McCall First = ExpFirst.getSpellingLoc().getLocWithOffset(DecFirst.second);
11527e075fcSSam McCall Last = ExpLast.getSpellingLoc().getLocWithOffset(DecLast.second);
11627e075fcSSam McCall
11727e075fcSSam McCall // Now: how do we adjust the previous/next bounds? Three cases:
11827e075fcSSam McCall // A) If they are also part of the same macro arg, we translate them too.
11927e075fcSSam McCall // This will ensure that we don't select any macros nested within the
12027e075fcSSam McCall // macro arg that cover extra tokens. Critical case:
12127e075fcSSam McCall // #define ID(X) X
12227e075fcSSam McCall // ID(prev target) // selecting 'target' succeeds
12327e075fcSSam McCall // #define LARGE ID(prev target)
12427e075fcSSam McCall // LARGE // selecting 'target' fails.
12527e075fcSSam McCall // B) They are not in the macro at all, then their expansion range is a
12627e075fcSSam McCall // sibling to it, and we can safely substitute that.
12727e075fcSSam McCall // #define PREV prev
12827e075fcSSam McCall // #define ID(X) X
12927e075fcSSam McCall // PREV ID(target) // selecting 'target' succeeds.
13027e075fcSSam McCall // #define LARGE PREV ID(target)
13127e075fcSSam McCall // LARGE // selecting 'target' fails.
13227e075fcSSam McCall // C) They are in a different arg of this macro, or the macro body.
13327e075fcSSam McCall // Now selecting the whole macro arg is fine, but the whole macro is not.
13427e075fcSSam McCall // Model this by setting using the edge of the macro call as the bound.
13527e075fcSSam McCall // #define ID2(X, Y) X Y
13627e075fcSSam McCall // ID2(prev, target) // selecting 'target' succeeds
13727e075fcSSam McCall // #define LARGE ID2(prev, target)
13827e075fcSSam McCall // LARGE // selecting 'target' fails
13927e075fcSSam McCall auto AdjustBound = [&](SourceLocation &Bound) {
14027e075fcSSam McCall if (Bound.isInvalid() || !Bound.isMacroID()) // Non-macro must be case B.
14127e075fcSSam McCall return;
14227e075fcSSam McCall auto DecBound = SM.getDecomposedLoc(Bound);
14327e075fcSSam McCall auto &ExpBound = SM.getSLocEntry(DecBound.first).getExpansion();
14427e075fcSSam McCall if (ExpBound.isMacroArgExpansion() &&
14527e075fcSSam McCall ExpBound.getExpansionLocStart() == ExpFirst.getExpansionLocStart()) {
14627e075fcSSam McCall // Case A: translate to (spelling) loc within the macro arg.
14727e075fcSSam McCall Bound = ExpBound.getSpellingLoc().getLocWithOffset(DecBound.second);
14827e075fcSSam McCall return;
1499619c2ccSKadir Cetinkaya }
15027e075fcSSam McCall while (Bound.isMacroID()) {
15127e075fcSSam McCall SourceRange Exp = SM.getImmediateExpansionRange(Bound).getAsRange();
15227e075fcSSam McCall if (Exp.getBegin() == ExpMacro.getExpansionLocStart()) {
15327e075fcSSam McCall // Case B: bounds become the macro call itself.
15427e075fcSSam McCall Bound = (&Bound == &Prev) ? Exp.getBegin() : Exp.getEnd();
15527e075fcSSam McCall return;
15627e075fcSSam McCall }
15727e075fcSSam McCall // Either case C, or expansion location will later find case B.
15827e075fcSSam McCall // We choose the upper bound for Prev and the lower one for Next:
15927e075fcSSam McCall // ID(prev) target ID(next)
16027e075fcSSam McCall // ^ ^
16127e075fcSSam McCall // new-prev new-next
16227e075fcSSam McCall Bound = (&Bound == &Prev) ? Exp.getEnd() : Exp.getBegin();
16327e075fcSSam McCall }
16427e075fcSSam McCall };
16527e075fcSSam McCall AdjustBound(Prev);
16627e075fcSSam McCall AdjustBound(Next);
16727e075fcSSam McCall }
16827e075fcSSam McCall
16927e075fcSSam McCall // In all remaining cases we need the full containing macros.
17027e075fcSSam McCall // If this overlaps Prev or Next, then no range is possible.
17127e075fcSSam McCall SourceRange Candidate =
17227e075fcSSam McCall SM.getExpansionRange(SourceRange(First, Last)).getAsRange();
17327e075fcSSam McCall auto DecFirst = SM.getDecomposedExpansionLoc(Candidate.getBegin());
17427e075fcSSam McCall auto DecLast = SM.getDecomposedLoc(Candidate.getEnd());
17527e075fcSSam McCall // Can end up in the wrong file due to bad input or token-pasting shenanigans.
17627e075fcSSam McCall if (Candidate.isInvalid() || DecFirst.first != TargetFile || DecLast.first != TargetFile)
17727e075fcSSam McCall return SourceRange();
17827e075fcSSam McCall // Check bounds, which may still be inside macros.
17927e075fcSSam McCall if (Prev.isValid()) {
18027e075fcSSam McCall auto Dec = SM.getDecomposedLoc(SM.getExpansionRange(Prev).getBegin());
18127e075fcSSam McCall if (Dec.first != DecFirst.first || Dec.second >= DecFirst.second)
18227e075fcSSam McCall return SourceRange();
18327e075fcSSam McCall }
18427e075fcSSam McCall if (Next.isValid()) {
18527e075fcSSam McCall auto Dec = SM.getDecomposedLoc(SM.getExpansionRange(Next).getEnd());
18627e075fcSSam McCall if (Dec.first != DecLast.first || Dec.second <= DecLast.second)
18727e075fcSSam McCall return SourceRange();
18827e075fcSSam McCall }
18927e075fcSSam McCall // Now we know that Candidate is a file range that covers [First, Last]
19027e075fcSSam McCall // without encroaching on {Prev, Next}. Ship it!
19127e075fcSSam McCall return Candidate;
1929619c2ccSKadir Cetinkaya }
1939619c2ccSKadir Cetinkaya
1949619c2ccSKadir Cetinkaya } // namespace
1959619c2ccSKadir Cetinkaya
Token(SourceLocation Location,unsigned Length,tok::TokenKind Kind)196625a0f70SIlya Biryukov syntax::Token::Token(SourceLocation Location, unsigned Length,
197625a0f70SIlya Biryukov tok::TokenKind Kind)
198625a0f70SIlya Biryukov : Location(Location), Length(Length), Kind(Kind) {
199625a0f70SIlya Biryukov assert(Location.isValid());
200625a0f70SIlya Biryukov }
201625a0f70SIlya Biryukov
Token(const clang::Token & T)202e7230ea7SIlya Biryukov syntax::Token::Token(const clang::Token &T)
203e7230ea7SIlya Biryukov : Token(T.getLocation(), T.getLength(), T.getKind()) {
204e7230ea7SIlya Biryukov assert(!T.isAnnotation());
205e7230ea7SIlya Biryukov }
206e7230ea7SIlya Biryukov
text(const SourceManager & SM) const207e7230ea7SIlya Biryukov llvm::StringRef syntax::Token::text(const SourceManager &SM) const {
208e7230ea7SIlya Biryukov bool Invalid = false;
209e7230ea7SIlya Biryukov const char *Start = SM.getCharacterData(location(), &Invalid);
210e7230ea7SIlya Biryukov assert(!Invalid);
211e7230ea7SIlya Biryukov return llvm::StringRef(Start, length());
212e7230ea7SIlya Biryukov }
213e7230ea7SIlya Biryukov
range(const SourceManager & SM) const214e7230ea7SIlya Biryukov FileRange syntax::Token::range(const SourceManager &SM) const {
215e7230ea7SIlya Biryukov assert(location().isFileID() && "must be a spelled token");
216e7230ea7SIlya Biryukov FileID File;
217e7230ea7SIlya Biryukov unsigned StartOffset;
218e7230ea7SIlya Biryukov std::tie(File, StartOffset) = SM.getDecomposedLoc(location());
219e7230ea7SIlya Biryukov return FileRange(File, StartOffset, StartOffset + length());
220e7230ea7SIlya Biryukov }
221e7230ea7SIlya Biryukov
range(const SourceManager & SM,const syntax::Token & First,const syntax::Token & Last)222e7230ea7SIlya Biryukov FileRange syntax::Token::range(const SourceManager &SM,
223e7230ea7SIlya Biryukov const syntax::Token &First,
224e7230ea7SIlya Biryukov const syntax::Token &Last) {
225e7230ea7SIlya Biryukov auto F = First.range(SM);
226e7230ea7SIlya Biryukov auto L = Last.range(SM);
227e7230ea7SIlya Biryukov assert(F.file() == L.file() && "tokens from different files");
2288c2cf499SKadir Cetinkaya assert((F == L || F.endOffset() <= L.beginOffset()) &&
2298c2cf499SKadir Cetinkaya "wrong order of tokens");
230e7230ea7SIlya Biryukov return FileRange(F.file(), F.beginOffset(), L.endOffset());
231e7230ea7SIlya Biryukov }
232e7230ea7SIlya Biryukov
operator <<(llvm::raw_ostream & OS,const Token & T)233e7230ea7SIlya Biryukov llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, const Token &T) {
234e7230ea7SIlya Biryukov return OS << T.str();
235e7230ea7SIlya Biryukov }
236e7230ea7SIlya Biryukov
FileRange(FileID File,unsigned BeginOffset,unsigned EndOffset)237e7230ea7SIlya Biryukov FileRange::FileRange(FileID File, unsigned BeginOffset, unsigned EndOffset)
238e7230ea7SIlya Biryukov : File(File), Begin(BeginOffset), End(EndOffset) {
239e7230ea7SIlya Biryukov assert(File.isValid());
240e7230ea7SIlya Biryukov assert(BeginOffset <= EndOffset);
241e7230ea7SIlya Biryukov }
242e7230ea7SIlya Biryukov
FileRange(const SourceManager & SM,SourceLocation BeginLoc,unsigned Length)243e7230ea7SIlya Biryukov FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
244e7230ea7SIlya Biryukov unsigned Length) {
245e7230ea7SIlya Biryukov assert(BeginLoc.isValid());
246e7230ea7SIlya Biryukov assert(BeginLoc.isFileID());
247e7230ea7SIlya Biryukov
248e7230ea7SIlya Biryukov std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
249e7230ea7SIlya Biryukov End = Begin + Length;
250e7230ea7SIlya Biryukov }
FileRange(const SourceManager & SM,SourceLocation BeginLoc,SourceLocation EndLoc)251e7230ea7SIlya Biryukov FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
252e7230ea7SIlya Biryukov SourceLocation EndLoc) {
253e7230ea7SIlya Biryukov assert(BeginLoc.isValid());
254e7230ea7SIlya Biryukov assert(BeginLoc.isFileID());
255e7230ea7SIlya Biryukov assert(EndLoc.isValid());
256e7230ea7SIlya Biryukov assert(EndLoc.isFileID());
257e7230ea7SIlya Biryukov assert(SM.getFileID(BeginLoc) == SM.getFileID(EndLoc));
258e7230ea7SIlya Biryukov assert(SM.getFileOffset(BeginLoc) <= SM.getFileOffset(EndLoc));
259e7230ea7SIlya Biryukov
260e7230ea7SIlya Biryukov std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
261e7230ea7SIlya Biryukov End = SM.getFileOffset(EndLoc);
262e7230ea7SIlya Biryukov }
263e7230ea7SIlya Biryukov
operator <<(llvm::raw_ostream & OS,const FileRange & R)264e7230ea7SIlya Biryukov llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS,
265e7230ea7SIlya Biryukov const FileRange &R) {
266e7230ea7SIlya Biryukov return OS << llvm::formatv("FileRange(file = {0}, offsets = {1}-{2})",
267e7230ea7SIlya Biryukov R.file().getHashValue(), R.beginOffset(),
268e7230ea7SIlya Biryukov R.endOffset());
269e7230ea7SIlya Biryukov }
270e7230ea7SIlya Biryukov
text(const SourceManager & SM) const271e7230ea7SIlya Biryukov llvm::StringRef FileRange::text(const SourceManager &SM) const {
272e7230ea7SIlya Biryukov bool Invalid = false;
273e7230ea7SIlya Biryukov StringRef Text = SM.getBufferData(File, &Invalid);
274e7230ea7SIlya Biryukov if (Invalid)
275e7230ea7SIlya Biryukov return "";
276e7230ea7SIlya Biryukov assert(Begin <= Text.size());
277e7230ea7SIlya Biryukov assert(End <= Text.size());
278e7230ea7SIlya Biryukov return Text.substr(Begin, length());
279e7230ea7SIlya Biryukov }
280e7230ea7SIlya Biryukov
indexExpandedTokens()281aa979084SUtkarsh Saxena void TokenBuffer::indexExpandedTokens() {
282aa979084SUtkarsh Saxena // No-op if the index is already created.
283aa979084SUtkarsh Saxena if (!ExpandedTokIndex.empty())
284aa979084SUtkarsh Saxena return;
285aa979084SUtkarsh Saxena ExpandedTokIndex.reserve(ExpandedTokens.size());
286aa979084SUtkarsh Saxena // Index ExpandedTokens for faster lookups by SourceLocation.
287cd824a48SUtkarsh Saxena for (size_t I = 0, E = ExpandedTokens.size(); I != E; ++I) {
288cd824a48SUtkarsh Saxena SourceLocation Loc = ExpandedTokens[I].location();
289cd824a48SUtkarsh Saxena if (Loc.isValid())
290cd824a48SUtkarsh Saxena ExpandedTokIndex[Loc] = I;
291cd824a48SUtkarsh Saxena }
292aa979084SUtkarsh Saxena }
293aa979084SUtkarsh Saxena
expandedTokens(SourceRange R) const294c9c714c7SSam McCall llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const {
295cd824a48SUtkarsh Saxena if (R.isInvalid())
296cd824a48SUtkarsh Saxena return {};
297aa979084SUtkarsh Saxena if (!ExpandedTokIndex.empty()) {
298aa979084SUtkarsh Saxena // Quick lookup if `R` is a token range.
299aa979084SUtkarsh Saxena // This is a huge win since majority of the users use ranges provided by an
300aa979084SUtkarsh Saxena // AST. Ranges in AST are token ranges from expanded token stream.
301aa979084SUtkarsh Saxena const auto B = ExpandedTokIndex.find(R.getBegin());
302aa979084SUtkarsh Saxena const auto E = ExpandedTokIndex.find(R.getEnd());
303aa979084SUtkarsh Saxena if (B != ExpandedTokIndex.end() && E != ExpandedTokIndex.end()) {
304cd824a48SUtkarsh Saxena const Token *L = ExpandedTokens.data() + B->getSecond();
305aa979084SUtkarsh Saxena // Add 1 to End to make a half-open range.
306cd824a48SUtkarsh Saxena const Token *R = ExpandedTokens.data() + E->getSecond() + 1;
307cd824a48SUtkarsh Saxena if (L > R)
308cd824a48SUtkarsh Saxena return {};
309cd824a48SUtkarsh Saxena return {L, R};
310aa979084SUtkarsh Saxena }
311aa979084SUtkarsh Saxena }
312aa979084SUtkarsh Saxena // Slow case. Use `isBeforeInTranslationUnit` to binary search for the
313aa979084SUtkarsh Saxena // required range.
3149619c2ccSKadir Cetinkaya return getTokensCovering(expandedTokens(), R, *SourceMgr);
315c9c714c7SSam McCall }
316c9c714c7SSam McCall
toCharRange(const SourceManager & SM) const3171ad15046SIlya Biryukov CharSourceRange FileRange::toCharRange(const SourceManager &SM) const {
3181ad15046SIlya Biryukov return CharSourceRange(
3191ad15046SIlya Biryukov SourceRange(SM.getComposedLoc(File, Begin), SM.getComposedLoc(File, End)),
3201ad15046SIlya Biryukov /*IsTokenRange=*/false);
3211ad15046SIlya Biryukov }
3221ad15046SIlya Biryukov
323e7230ea7SIlya Biryukov std::pair<const syntax::Token *, const TokenBuffer::Mapping *>
spelledForExpandedToken(const syntax::Token * Expanded) const324e7230ea7SIlya Biryukov TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
325e7230ea7SIlya Biryukov assert(Expanded);
326e7230ea7SIlya Biryukov assert(ExpandedTokens.data() <= Expanded &&
327e7230ea7SIlya Biryukov Expanded < ExpandedTokens.data() + ExpandedTokens.size());
328e7230ea7SIlya Biryukov
329e7230ea7SIlya Biryukov auto FileIt = Files.find(
330e7230ea7SIlya Biryukov SourceMgr->getFileID(SourceMgr->getExpansionLoc(Expanded->location())));
331e7230ea7SIlya Biryukov assert(FileIt != Files.end() && "no file for an expanded token");
332e7230ea7SIlya Biryukov
333e7230ea7SIlya Biryukov const MarkedFile &File = FileIt->second;
334e7230ea7SIlya Biryukov
335e7230ea7SIlya Biryukov unsigned ExpandedIndex = Expanded - ExpandedTokens.data();
336e7230ea7SIlya Biryukov // Find the first mapping that produced tokens after \p Expanded.
33778ee2fbfSFangrui Song auto It = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
33878ee2fbfSFangrui Song return M.BeginExpanded <= ExpandedIndex;
339e7230ea7SIlya Biryukov });
340e7230ea7SIlya Biryukov // Our token could only be produced by the previous mapping.
341e7230ea7SIlya Biryukov if (It == File.Mappings.begin()) {
342e7230ea7SIlya Biryukov // No previous mapping, no need to modify offsets.
3431bf055c9SMarcel Hlopko return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded],
3441bf055c9SMarcel Hlopko /*Mapping=*/nullptr};
345e7230ea7SIlya Biryukov }
346e7230ea7SIlya Biryukov --It; // 'It' now points to last mapping that started before our token.
347e7230ea7SIlya Biryukov
348e7230ea7SIlya Biryukov // Check if the token is part of the mapping.
349e7230ea7SIlya Biryukov if (ExpandedIndex < It->EndExpanded)
3501bf055c9SMarcel Hlopko return {&File.SpelledTokens[It->BeginSpelled], /*Mapping=*/&*It};
351e7230ea7SIlya Biryukov
352e7230ea7SIlya Biryukov // Not part of the mapping, use the index from previous mapping to compute the
353e7230ea7SIlya Biryukov // corresponding spelled token.
354e7230ea7SIlya Biryukov return {
355e7230ea7SIlya Biryukov &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)],
3561bf055c9SMarcel Hlopko /*Mapping=*/nullptr};
3571bf055c9SMarcel Hlopko }
3581bf055c9SMarcel Hlopko
3591bf055c9SMarcel Hlopko const TokenBuffer::Mapping *
mappingStartingBeforeSpelled(const MarkedFile & F,const syntax::Token * Spelled)3601bf055c9SMarcel Hlopko TokenBuffer::mappingStartingBeforeSpelled(const MarkedFile &F,
3611bf055c9SMarcel Hlopko const syntax::Token *Spelled) {
3621bf055c9SMarcel Hlopko assert(F.SpelledTokens.data() <= Spelled);
3631bf055c9SMarcel Hlopko unsigned SpelledI = Spelled - F.SpelledTokens.data();
3641bf055c9SMarcel Hlopko assert(SpelledI < F.SpelledTokens.size());
3651bf055c9SMarcel Hlopko
3661bf055c9SMarcel Hlopko auto It = llvm::partition_point(F.Mappings, [SpelledI](const Mapping &M) {
3671bf055c9SMarcel Hlopko return M.BeginSpelled <= SpelledI;
3681bf055c9SMarcel Hlopko });
3691bf055c9SMarcel Hlopko if (It == F.Mappings.begin())
3701bf055c9SMarcel Hlopko return nullptr;
3711bf055c9SMarcel Hlopko --It;
3721bf055c9SMarcel Hlopko return &*It;
3731bf055c9SMarcel Hlopko }
3741bf055c9SMarcel Hlopko
3751bf055c9SMarcel Hlopko llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1>
expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const3761bf055c9SMarcel Hlopko TokenBuffer::expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
3771bf055c9SMarcel Hlopko if (Spelled.empty())
3781bf055c9SMarcel Hlopko return {};
379f0ab336eSSam McCall const auto &File = fileForSpelled(Spelled);
3801bf055c9SMarcel Hlopko
3811bf055c9SMarcel Hlopko auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front());
3821bf055c9SMarcel Hlopko unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data();
3831bf055c9SMarcel Hlopko assert(SpelledFrontI < File.SpelledTokens.size());
3841bf055c9SMarcel Hlopko unsigned ExpandedBegin;
3851bf055c9SMarcel Hlopko if (!FrontMapping) {
3861bf055c9SMarcel Hlopko // No mapping that starts before the first token of Spelled, we don't have
3871bf055c9SMarcel Hlopko // to modify offsets.
3881bf055c9SMarcel Hlopko ExpandedBegin = File.BeginExpanded + SpelledFrontI;
3891bf055c9SMarcel Hlopko } else if (SpelledFrontI < FrontMapping->EndSpelled) {
3901bf055c9SMarcel Hlopko // This mapping applies to Spelled tokens.
3911bf055c9SMarcel Hlopko if (SpelledFrontI != FrontMapping->BeginSpelled) {
3921bf055c9SMarcel Hlopko // Spelled tokens don't cover the entire mapping, returning empty result.
3931bf055c9SMarcel Hlopko return {}; // FIXME: support macro arguments.
3941bf055c9SMarcel Hlopko }
3951bf055c9SMarcel Hlopko // Spelled tokens start at the beginning of this mapping.
3961bf055c9SMarcel Hlopko ExpandedBegin = FrontMapping->BeginExpanded;
3971bf055c9SMarcel Hlopko } else {
3981bf055c9SMarcel Hlopko // Spelled tokens start after the mapping ends (they start in the hole
3991bf055c9SMarcel Hlopko // between 2 mappings, or between a mapping and end of the file).
4001bf055c9SMarcel Hlopko ExpandedBegin =
4011bf055c9SMarcel Hlopko FrontMapping->EndExpanded + (SpelledFrontI - FrontMapping->EndSpelled);
4021bf055c9SMarcel Hlopko }
4031bf055c9SMarcel Hlopko
4041bf055c9SMarcel Hlopko auto *BackMapping = mappingStartingBeforeSpelled(File, &Spelled.back());
4051bf055c9SMarcel Hlopko unsigned SpelledBackI = &Spelled.back() - File.SpelledTokens.data();
4061bf055c9SMarcel Hlopko unsigned ExpandedEnd;
4071bf055c9SMarcel Hlopko if (!BackMapping) {
4081bf055c9SMarcel Hlopko // No mapping that starts before the last token of Spelled, we don't have to
4091bf055c9SMarcel Hlopko // modify offsets.
4101bf055c9SMarcel Hlopko ExpandedEnd = File.BeginExpanded + SpelledBackI + 1;
4111bf055c9SMarcel Hlopko } else if (SpelledBackI < BackMapping->EndSpelled) {
4121bf055c9SMarcel Hlopko // This mapping applies to Spelled tokens.
4131bf055c9SMarcel Hlopko if (SpelledBackI + 1 != BackMapping->EndSpelled) {
4141bf055c9SMarcel Hlopko // Spelled tokens don't cover the entire mapping, returning empty result.
4151bf055c9SMarcel Hlopko return {}; // FIXME: support macro arguments.
4161bf055c9SMarcel Hlopko }
4171bf055c9SMarcel Hlopko ExpandedEnd = BackMapping->EndExpanded;
4181bf055c9SMarcel Hlopko } else {
4191bf055c9SMarcel Hlopko // Spelled tokens end after the mapping ends.
4201bf055c9SMarcel Hlopko ExpandedEnd =
4211bf055c9SMarcel Hlopko BackMapping->EndExpanded + (SpelledBackI - BackMapping->EndSpelled) + 1;
4221bf055c9SMarcel Hlopko }
4231bf055c9SMarcel Hlopko
4241bf055c9SMarcel Hlopko assert(ExpandedBegin < ExpandedTokens.size());
4251bf055c9SMarcel Hlopko assert(ExpandedEnd < ExpandedTokens.size());
4261bf055c9SMarcel Hlopko // Avoid returning empty ranges.
4271bf055c9SMarcel Hlopko if (ExpandedBegin == ExpandedEnd)
4281bf055c9SMarcel Hlopko return {};
4291bf055c9SMarcel Hlopko return {llvm::makeArrayRef(ExpandedTokens.data() + ExpandedBegin,
4301bf055c9SMarcel Hlopko ExpandedTokens.data() + ExpandedEnd)};
431e7230ea7SIlya Biryukov }
432e7230ea7SIlya Biryukov
spelledTokens(FileID FID) const433e7230ea7SIlya Biryukov llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const {
434e7230ea7SIlya Biryukov auto It = Files.find(FID);
435e7230ea7SIlya Biryukov assert(It != Files.end());
436e7230ea7SIlya Biryukov return It->second.SpelledTokens;
437e7230ea7SIlya Biryukov }
438e7230ea7SIlya Biryukov
spelledTokenAt(SourceLocation Loc) const439cd9b2e18SKadir Cetinkaya const syntax::Token *TokenBuffer::spelledTokenAt(SourceLocation Loc) const {
440cd9b2e18SKadir Cetinkaya assert(Loc.isFileID());
441cd9b2e18SKadir Cetinkaya const auto *Tok = llvm::partition_point(
442cd9b2e18SKadir Cetinkaya spelledTokens(SourceMgr->getFileID(Loc)),
443cd9b2e18SKadir Cetinkaya [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
444cd9b2e18SKadir Cetinkaya if (!Tok || Tok->location() != Loc)
445cd9b2e18SKadir Cetinkaya return nullptr;
446cd9b2e18SKadir Cetinkaya return Tok;
447cd9b2e18SKadir Cetinkaya }
448cd9b2e18SKadir Cetinkaya
str() const449e7230ea7SIlya Biryukov std::string TokenBuffer::Mapping::str() const {
450adcd0268SBenjamin Kramer return std::string(
451adcd0268SBenjamin Kramer llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})",
452adcd0268SBenjamin Kramer BeginSpelled, EndSpelled, BeginExpanded, EndExpanded));
453e7230ea7SIlya Biryukov }
454e7230ea7SIlya Biryukov
455e7230ea7SIlya Biryukov llvm::Optional<llvm::ArrayRef<syntax::Token>>
spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const456e7230ea7SIlya Biryukov TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
457e7230ea7SIlya Biryukov // Mapping an empty range is ambiguous in case of empty mappings at either end
458e7230ea7SIlya Biryukov // of the range, bail out in that case.
459e7230ea7SIlya Biryukov if (Expanded.empty())
460e7230ea7SIlya Biryukov return llvm::None;
46127e075fcSSam McCall const syntax::Token *First = &Expanded.front();
46227e075fcSSam McCall const syntax::Token *Last = &Expanded.back();
463*02129eabSSam McCall const syntax::Token *FirstSpelled, *LastSpelled;
464*02129eabSSam McCall const TokenBuffer::Mapping *FirstMapping, *LastMapping;
465*02129eabSSam McCall std::tie(FirstSpelled, FirstMapping) = spelledForExpandedToken(First);
466*02129eabSSam McCall std::tie(LastSpelled, LastMapping) = spelledForExpandedToken(Last);
467e7230ea7SIlya Biryukov
46827e075fcSSam McCall FileID FID = SourceMgr->getFileID(FirstSpelled->location());
469e7230ea7SIlya Biryukov // FIXME: Handle multi-file changes by trying to map onto a common root.
470e7230ea7SIlya Biryukov if (FID != SourceMgr->getFileID(LastSpelled->location()))
471e7230ea7SIlya Biryukov return llvm::None;
472e7230ea7SIlya Biryukov
473e7230ea7SIlya Biryukov const MarkedFile &File = Files.find(FID)->second;
474e7230ea7SIlya Biryukov
47527e075fcSSam McCall // If the range is within one macro argument, the result may be only part of a
47627e075fcSSam McCall // Mapping. We must use the general (SourceManager-based) algorithm.
47727e075fcSSam McCall if (FirstMapping && FirstMapping == LastMapping &&
47827e075fcSSam McCall SourceMgr->isMacroArgExpansion(First->location()) &&
47927e075fcSSam McCall SourceMgr->isMacroArgExpansion(Last->location())) {
48027e075fcSSam McCall // We use excluded Prev/Next token for bounds checking.
48127e075fcSSam McCall SourceLocation Prev = (First == &ExpandedTokens.front())
48227e075fcSSam McCall ? SourceLocation()
48327e075fcSSam McCall : (First - 1)->location();
48427e075fcSSam McCall SourceLocation Next = (Last == &ExpandedTokens.back())
48527e075fcSSam McCall ? SourceLocation()
48627e075fcSSam McCall : (Last + 1)->location();
48727e075fcSSam McCall SourceRange Range = spelledForExpandedSlow(
48827e075fcSSam McCall First->location(), Last->location(), Prev, Next, FID, *SourceMgr);
48927e075fcSSam McCall if (Range.isInvalid())
49027e075fcSSam McCall return llvm::None;
49127e075fcSSam McCall return getTokensCovering(File.SpelledTokens, Range, *SourceMgr);
4929619c2ccSKadir Cetinkaya }
4939619c2ccSKadir Cetinkaya
49427e075fcSSam McCall // Otherwise, use the fast version based on Mappings.
4959619c2ccSKadir Cetinkaya // Do not allow changes that doesn't cover full expansion.
49627e075fcSSam McCall unsigned FirstExpanded = Expanded.begin() - ExpandedTokens.data();
49727e075fcSSam McCall unsigned LastExpanded = Expanded.end() - ExpandedTokens.data();
49827e075fcSSam McCall if (FirstMapping && FirstExpanded != FirstMapping->BeginExpanded)
499e7230ea7SIlya Biryukov return llvm::None;
50027e075fcSSam McCall if (LastMapping && LastMapping->EndExpanded != LastExpanded)
501e7230ea7SIlya Biryukov return llvm::None;
502e7230ea7SIlya Biryukov return llvm::makeArrayRef(
50327e075fcSSam McCall FirstMapping ? File.SpelledTokens.data() + FirstMapping->BeginSpelled
50427e075fcSSam McCall : FirstSpelled,
505e7230ea7SIlya Biryukov LastMapping ? File.SpelledTokens.data() + LastMapping->EndSpelled
506e7230ea7SIlya Biryukov : LastSpelled + 1);
507e7230ea7SIlya Biryukov }
508e7230ea7SIlya Biryukov
makeExpansion(const MarkedFile & F,const Mapping & M) const509f0ab336eSSam McCall TokenBuffer::Expansion TokenBuffer::makeExpansion(const MarkedFile &F,
510f0ab336eSSam McCall const Mapping &M) const {
511f0ab336eSSam McCall Expansion E;
512f0ab336eSSam McCall E.Spelled = llvm::makeArrayRef(F.SpelledTokens.data() + M.BeginSpelled,
513f0ab336eSSam McCall F.SpelledTokens.data() + M.EndSpelled);
514f0ab336eSSam McCall E.Expanded = llvm::makeArrayRef(ExpandedTokens.data() + M.BeginExpanded,
515f0ab336eSSam McCall ExpandedTokens.data() + M.EndExpanded);
516f0ab336eSSam McCall return E;
517f0ab336eSSam McCall }
518f0ab336eSSam McCall
519f0ab336eSSam McCall const TokenBuffer::MarkedFile &
fileForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const520f0ab336eSSam McCall TokenBuffer::fileForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
521f0ab336eSSam McCall assert(!Spelled.empty());
522f0ab336eSSam McCall assert(Spelled.front().location().isFileID() && "not a spelled token");
523f0ab336eSSam McCall auto FileIt = Files.find(SourceMgr->getFileID(Spelled.front().location()));
524f0ab336eSSam McCall assert(FileIt != Files.end() && "file not tracked by token buffer");
525f0ab336eSSam McCall const auto &File = FileIt->second;
526f0ab336eSSam McCall assert(File.SpelledTokens.data() <= Spelled.data() &&
527f0ab336eSSam McCall Spelled.end() <=
528f0ab336eSSam McCall (File.SpelledTokens.data() + File.SpelledTokens.size()) &&
529f0ab336eSSam McCall "Tokens not in spelled range");
530f0ab336eSSam McCall #ifndef NDEBUG
531f0ab336eSSam McCall auto T1 = Spelled.back().location();
532f0ab336eSSam McCall auto T2 = File.SpelledTokens.back().location();
533f0ab336eSSam McCall assert(T1 == T2 || sourceManager().isBeforeInTranslationUnit(T1, T2));
534f0ab336eSSam McCall #endif
535f0ab336eSSam McCall return File;
536f0ab336eSSam McCall }
537f0ab336eSSam McCall
5385aed309aSIlya Biryukov llvm::Optional<TokenBuffer::Expansion>
expansionStartingAt(const syntax::Token * Spelled) const5395aed309aSIlya Biryukov TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const {
5405aed309aSIlya Biryukov assert(Spelled);
541f0ab336eSSam McCall const auto &File = fileForSpelled(*Spelled);
5425aed309aSIlya Biryukov
5435aed309aSIlya Biryukov unsigned SpelledIndex = Spelled - File.SpelledTokens.data();
54478ee2fbfSFangrui Song auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
54578ee2fbfSFangrui Song return M.BeginSpelled < SpelledIndex;
5465aed309aSIlya Biryukov });
5475aed309aSIlya Biryukov if (M == File.Mappings.end() || M->BeginSpelled != SpelledIndex)
5485aed309aSIlya Biryukov return llvm::None;
549f0ab336eSSam McCall return makeExpansion(File, *M);
5505aed309aSIlya Biryukov }
551f0ab336eSSam McCall
expansionsOverlapping(llvm::ArrayRef<syntax::Token> Spelled) const552f0ab336eSSam McCall std::vector<TokenBuffer::Expansion> TokenBuffer::expansionsOverlapping(
553f0ab336eSSam McCall llvm::ArrayRef<syntax::Token> Spelled) const {
554f0ab336eSSam McCall if (Spelled.empty())
555f0ab336eSSam McCall return {};
556f0ab336eSSam McCall const auto &File = fileForSpelled(Spelled);
557f0ab336eSSam McCall
558f0ab336eSSam McCall // Find the first overlapping range, and then copy until we stop overlapping.
559f0ab336eSSam McCall unsigned SpelledBeginIndex = Spelled.begin() - File.SpelledTokens.data();
560f0ab336eSSam McCall unsigned SpelledEndIndex = Spelled.end() - File.SpelledTokens.data();
561f0ab336eSSam McCall auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
562f0ab336eSSam McCall return M.EndSpelled <= SpelledBeginIndex;
563f0ab336eSSam McCall });
564f0ab336eSSam McCall std::vector<TokenBuffer::Expansion> Expansions;
565f0ab336eSSam McCall for (; M != File.Mappings.end() && M->BeginSpelled < SpelledEndIndex; ++M)
566f0ab336eSSam McCall Expansions.push_back(makeExpansion(File, *M));
567f0ab336eSSam McCall return Expansions;
568f0ab336eSSam McCall }
569f0ab336eSSam McCall
5703f8da5d0SSam McCall llvm::ArrayRef<syntax::Token>
spelledTokensTouching(SourceLocation Loc,llvm::ArrayRef<syntax::Token> Tokens)5713f8da5d0SSam McCall syntax::spelledTokensTouching(SourceLocation Loc,
5726bfc45cfSKirill Bobyrev llvm::ArrayRef<syntax::Token> Tokens) {
5733f8da5d0SSam McCall assert(Loc.isFileID());
5746bfc45cfSKirill Bobyrev
5753f8da5d0SSam McCall auto *Right = llvm::partition_point(
5766bfc45cfSKirill Bobyrev Tokens, [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
5776bfc45cfSKirill Bobyrev bool AcceptRight = Right != Tokens.end() && Right->location() <= Loc;
5786bfc45cfSKirill Bobyrev bool AcceptLeft =
5796bfc45cfSKirill Bobyrev Right != Tokens.begin() && (Right - 1)->endLocation() >= Loc;
5803f8da5d0SSam McCall return llvm::makeArrayRef(Right - (AcceptLeft ? 1 : 0),
5813f8da5d0SSam McCall Right + (AcceptRight ? 1 : 0));
5823f8da5d0SSam McCall }
5833f8da5d0SSam McCall
5846bfc45cfSKirill Bobyrev llvm::ArrayRef<syntax::Token>
spelledTokensTouching(SourceLocation Loc,const syntax::TokenBuffer & Tokens)5856bfc45cfSKirill Bobyrev syntax::spelledTokensTouching(SourceLocation Loc,
5866bfc45cfSKirill Bobyrev const syntax::TokenBuffer &Tokens) {
5876bfc45cfSKirill Bobyrev return spelledTokensTouching(
5886bfc45cfSKirill Bobyrev Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
5896bfc45cfSKirill Bobyrev }
5906bfc45cfSKirill Bobyrev
5913f8da5d0SSam McCall const syntax::Token *
spelledIdentifierTouching(SourceLocation Loc,llvm::ArrayRef<syntax::Token> Tokens)5923f8da5d0SSam McCall syntax::spelledIdentifierTouching(SourceLocation Loc,
5936bfc45cfSKirill Bobyrev llvm::ArrayRef<syntax::Token> Tokens) {
5943f8da5d0SSam McCall for (const syntax::Token &Tok : spelledTokensTouching(Loc, Tokens)) {
5953f8da5d0SSam McCall if (Tok.kind() == tok::identifier)
5963f8da5d0SSam McCall return &Tok;
5973f8da5d0SSam McCall }
5983f8da5d0SSam McCall return nullptr;
5993f8da5d0SSam McCall }
6003f8da5d0SSam McCall
6016bfc45cfSKirill Bobyrev const syntax::Token *
spelledIdentifierTouching(SourceLocation Loc,const syntax::TokenBuffer & Tokens)6026bfc45cfSKirill Bobyrev syntax::spelledIdentifierTouching(SourceLocation Loc,
6036bfc45cfSKirill Bobyrev const syntax::TokenBuffer &Tokens) {
6046bfc45cfSKirill Bobyrev return spelledIdentifierTouching(
6056bfc45cfSKirill Bobyrev Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
6066bfc45cfSKirill Bobyrev }
6076bfc45cfSKirill Bobyrev
6086687fde0SJohan Vikstrom std::vector<const syntax::Token *>
macroExpansions(FileID FID) const6096687fde0SJohan Vikstrom TokenBuffer::macroExpansions(FileID FID) const {
6106687fde0SJohan Vikstrom auto FileIt = Files.find(FID);
6116687fde0SJohan Vikstrom assert(FileIt != Files.end() && "file not tracked by token buffer");
6126687fde0SJohan Vikstrom auto &File = FileIt->second;
6136687fde0SJohan Vikstrom std::vector<const syntax::Token *> Expansions;
6146687fde0SJohan Vikstrom auto &Spelled = File.SpelledTokens;
6156687fde0SJohan Vikstrom for (auto Mapping : File.Mappings) {
6166687fde0SJohan Vikstrom const syntax::Token *Token = &Spelled[Mapping.BeginSpelled];
6176687fde0SJohan Vikstrom if (Token->kind() == tok::TokenKind::identifier)
6186687fde0SJohan Vikstrom Expansions.push_back(Token);
6196687fde0SJohan Vikstrom }
6206687fde0SJohan Vikstrom return Expansions;
6216687fde0SJohan Vikstrom }
6226687fde0SJohan Vikstrom
tokenize(const FileRange & FR,const SourceManager & SM,const LangOptions & LO)6238c2cf499SKadir Cetinkaya std::vector<syntax::Token> syntax::tokenize(const FileRange &FR,
6248c2cf499SKadir Cetinkaya const SourceManager &SM,
625e7230ea7SIlya Biryukov const LangOptions &LO) {
626e7230ea7SIlya Biryukov std::vector<syntax::Token> Tokens;
627e7230ea7SIlya Biryukov IdentifierTable Identifiers(LO);
628e7230ea7SIlya Biryukov auto AddToken = [&](clang::Token T) {
629e7230ea7SIlya Biryukov // Fill the proper token kind for keywords, etc.
630e7230ea7SIlya Biryukov if (T.getKind() == tok::raw_identifier && !T.needsCleaning() &&
631e7230ea7SIlya Biryukov !T.hasUCN()) { // FIXME: support needsCleaning and hasUCN cases.
632e7230ea7SIlya Biryukov clang::IdentifierInfo &II = Identifiers.get(T.getRawIdentifier());
633e7230ea7SIlya Biryukov T.setIdentifierInfo(&II);
634e7230ea7SIlya Biryukov T.setKind(II.getTokenID());
635e7230ea7SIlya Biryukov }
636e7230ea7SIlya Biryukov Tokens.push_back(syntax::Token(T));
637e7230ea7SIlya Biryukov };
638e7230ea7SIlya Biryukov
6398c2cf499SKadir Cetinkaya auto SrcBuffer = SM.getBufferData(FR.file());
6408c2cf499SKadir Cetinkaya Lexer L(SM.getLocForStartOfFile(FR.file()), LO, SrcBuffer.data(),
6418c2cf499SKadir Cetinkaya SrcBuffer.data() + FR.beginOffset(),
6428c2cf499SKadir Cetinkaya // We can't make BufEnd point to FR.endOffset, as Lexer requires a
6438c2cf499SKadir Cetinkaya // null terminated buffer.
6448c2cf499SKadir Cetinkaya SrcBuffer.data() + SrcBuffer.size());
645e7230ea7SIlya Biryukov
646e7230ea7SIlya Biryukov clang::Token T;
6478c2cf499SKadir Cetinkaya while (!L.LexFromRawLexer(T) && L.getCurrentBufferOffset() < FR.endOffset())
648e7230ea7SIlya Biryukov AddToken(T);
6498c2cf499SKadir Cetinkaya // LexFromRawLexer returns true when it parses the last token of the file, add
6508c2cf499SKadir Cetinkaya // it iff it starts within the range we are interested in.
6518c2cf499SKadir Cetinkaya if (SM.getFileOffset(T.getLocation()) < FR.endOffset())
652e7230ea7SIlya Biryukov AddToken(T);
653e7230ea7SIlya Biryukov return Tokens;
654e7230ea7SIlya Biryukov }
655e7230ea7SIlya Biryukov
tokenize(FileID FID,const SourceManager & SM,const LangOptions & LO)6568c2cf499SKadir Cetinkaya std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM,
6578c2cf499SKadir Cetinkaya const LangOptions &LO) {
6588c2cf499SKadir Cetinkaya return tokenize(syntax::FileRange(FID, 0, SM.getFileIDSize(FID)), SM, LO);
6598c2cf499SKadir Cetinkaya }
6608c2cf499SKadir Cetinkaya
6615e69f27eSIlya Biryukov /// Records information reqired to construct mappings for the token buffer that
6625e69f27eSIlya Biryukov /// we are collecting.
6635e69f27eSIlya Biryukov class TokenCollector::CollectPPExpansions : public PPCallbacks {
6645e69f27eSIlya Biryukov public:
CollectPPExpansions(TokenCollector & C)6655e69f27eSIlya Biryukov CollectPPExpansions(TokenCollector &C) : Collector(&C) {}
6665e69f27eSIlya Biryukov
6675e69f27eSIlya Biryukov /// Disabled instance will stop reporting anything to TokenCollector.
6685e69f27eSIlya Biryukov /// This ensures that uses of the preprocessor after TokenCollector::consume()
6695e69f27eSIlya Biryukov /// is called do not access the (possibly invalid) collector instance.
disable()6705e69f27eSIlya Biryukov void disable() { Collector = nullptr; }
6715e69f27eSIlya Biryukov
MacroExpands(const clang::Token & MacroNameTok,const MacroDefinition & MD,SourceRange Range,const MacroArgs * Args)6725e69f27eSIlya Biryukov void MacroExpands(const clang::Token &MacroNameTok, const MacroDefinition &MD,
6735e69f27eSIlya Biryukov SourceRange Range, const MacroArgs *Args) override {
6745e69f27eSIlya Biryukov if (!Collector)
6755e69f27eSIlya Biryukov return;
676d66afd6dSSam McCall const auto &SM = Collector->PP.getSourceManager();
677d66afd6dSSam McCall // Only record top-level expansions that directly produce expanded tokens.
678d66afd6dSSam McCall // This excludes those where:
6795e69f27eSIlya Biryukov // - the macro use is inside a macro body,
6805e69f27eSIlya Biryukov // - the macro appears in an argument to another macro.
681d66afd6dSSam McCall // However macro expansion isn't really a tree, it's token rewrite rules,
682d66afd6dSSam McCall // so there are other cases, e.g.
683d66afd6dSSam McCall // #define B(X) X
684d66afd6dSSam McCall // #define A 1 + B
685d66afd6dSSam McCall // A(2)
686d66afd6dSSam McCall // Both A and B produce expanded tokens, though the macro name 'B' comes
687d66afd6dSSam McCall // from an expansion. The best we can do is merge the mappings for both.
688d66afd6dSSam McCall
689d66afd6dSSam McCall // The *last* token of any top-level macro expansion must be in a file.
690d66afd6dSSam McCall // (In the example above, see the closing paren of the expansion of B).
691d66afd6dSSam McCall if (!Range.getEnd().isFileID())
6925e69f27eSIlya Biryukov return;
693d66afd6dSSam McCall // If there's a current expansion that encloses this one, this one can't be
694d66afd6dSSam McCall // top-level.
695d66afd6dSSam McCall if (LastExpansionEnd.isValid() &&
696d66afd6dSSam McCall !SM.isBeforeInTranslationUnit(LastExpansionEnd, Range.getEnd()))
697d66afd6dSSam McCall return;
698d66afd6dSSam McCall
699d66afd6dSSam McCall // If the macro invocation (B) starts in a macro (A) but ends in a file,
700d66afd6dSSam McCall // we'll create a merged mapping for A + B by overwriting the endpoint for
701d66afd6dSSam McCall // A's startpoint.
702d66afd6dSSam McCall if (!Range.getBegin().isFileID()) {
703d66afd6dSSam McCall Range.setBegin(SM.getExpansionLoc(Range.getBegin()));
70478194118SMikhail Maltsev assert(Collector->Expansions.count(Range.getBegin()) &&
705d66afd6dSSam McCall "Overlapping macros should have same expansion location");
706d66afd6dSSam McCall }
707d66afd6dSSam McCall
70878194118SMikhail Maltsev Collector->Expansions[Range.getBegin()] = Range.getEnd();
7095e69f27eSIlya Biryukov LastExpansionEnd = Range.getEnd();
7105e69f27eSIlya Biryukov }
7115e69f27eSIlya Biryukov // FIXME: handle directives like #pragma, #include, etc.
7125e69f27eSIlya Biryukov private:
7135e69f27eSIlya Biryukov TokenCollector *Collector;
7145e69f27eSIlya Biryukov /// Used to detect recursive macro expansions.
7155e69f27eSIlya Biryukov SourceLocation LastExpansionEnd;
7165e69f27eSIlya Biryukov };
7175e69f27eSIlya Biryukov
718e7230ea7SIlya Biryukov /// Fills in the TokenBuffer by tracing the run of a preprocessor. The
719e7230ea7SIlya Biryukov /// implementation tracks the tokens, macro expansions and directives coming
720e7230ea7SIlya Biryukov /// from the preprocessor and:
721e7230ea7SIlya Biryukov /// - for each token, figures out if it is a part of an expanded token stream,
722e7230ea7SIlya Biryukov /// spelled token stream or both. Stores the tokens appropriately.
723e7230ea7SIlya Biryukov /// - records mappings from the spelled to expanded token ranges, e.g. for macro
724e7230ea7SIlya Biryukov /// expansions.
725e7230ea7SIlya Biryukov /// FIXME: also properly record:
726e7230ea7SIlya Biryukov /// - #include directives,
727e7230ea7SIlya Biryukov /// - #pragma, #line and other PP directives,
728e7230ea7SIlya Biryukov /// - skipped pp regions,
729e7230ea7SIlya Biryukov /// - ...
730e7230ea7SIlya Biryukov
TokenCollector(Preprocessor & PP)731e7230ea7SIlya Biryukov TokenCollector::TokenCollector(Preprocessor &PP) : PP(PP) {
732e7230ea7SIlya Biryukov // Collect the expanded token stream during preprocessing.
733e7230ea7SIlya Biryukov PP.setTokenWatcher([this](const clang::Token &T) {
734e7230ea7SIlya Biryukov if (T.isAnnotation())
735e7230ea7SIlya Biryukov return;
736e7230ea7SIlya Biryukov DEBUG_WITH_TYPE("collect-tokens", llvm::dbgs()
737e7230ea7SIlya Biryukov << "Token: "
738e7230ea7SIlya Biryukov << syntax::Token(T).dumpForTests(
739e7230ea7SIlya Biryukov this->PP.getSourceManager())
740e7230ea7SIlya Biryukov << "\n"
741e7230ea7SIlya Biryukov
742e7230ea7SIlya Biryukov );
743e7230ea7SIlya Biryukov Expanded.push_back(syntax::Token(T));
744e7230ea7SIlya Biryukov });
7455e69f27eSIlya Biryukov // And locations of macro calls, to properly recover boundaries of those in
7465e69f27eSIlya Biryukov // case of empty expansions.
7472b3d49b6SJonas Devlieghere auto CB = std::make_unique<CollectPPExpansions>(*this);
7485e69f27eSIlya Biryukov this->Collector = CB.get();
7495e69f27eSIlya Biryukov PP.addPPCallbacks(std::move(CB));
750e7230ea7SIlya Biryukov }
751e7230ea7SIlya Biryukov
752e7230ea7SIlya Biryukov /// Builds mappings and spelled tokens in the TokenBuffer based on the expanded
753e7230ea7SIlya Biryukov /// token stream.
754e7230ea7SIlya Biryukov class TokenCollector::Builder {
755e7230ea7SIlya Biryukov public:
Builder(std::vector<syntax::Token> Expanded,PPExpansions CollectedExpansions,const SourceManager & SM,const LangOptions & LangOpts)7565e69f27eSIlya Biryukov Builder(std::vector<syntax::Token> Expanded, PPExpansions CollectedExpansions,
7575e69f27eSIlya Biryukov const SourceManager &SM, const LangOptions &LangOpts)
7585e69f27eSIlya Biryukov : Result(SM), CollectedExpansions(std::move(CollectedExpansions)), SM(SM),
7595e69f27eSIlya Biryukov LangOpts(LangOpts) {
760e7230ea7SIlya Biryukov Result.ExpandedTokens = std::move(Expanded);
761e7230ea7SIlya Biryukov }
762e7230ea7SIlya Biryukov
build()763e7230ea7SIlya Biryukov TokenBuffer build() && {
764e7230ea7SIlya Biryukov assert(!Result.ExpandedTokens.empty());
765e7230ea7SIlya Biryukov assert(Result.ExpandedTokens.back().kind() == tok::eof);
766ec0b9908SSam McCall
767ec0b9908SSam McCall // Tokenize every file that contributed tokens to the expanded stream.
768ec0b9908SSam McCall buildSpelledTokens();
769ec0b9908SSam McCall
770ec0b9908SSam McCall // The expanded token stream consists of runs of tokens that came from
771ec0b9908SSam McCall // the same source (a macro expansion, part of a file etc).
772ec0b9908SSam McCall // Between these runs are the logical positions of spelled tokens that
773ec0b9908SSam McCall // didn't expand to anything.
774ec0b9908SSam McCall while (NextExpanded < Result.ExpandedTokens.size() - 1 /* eof */) {
775ec0b9908SSam McCall // Create empty mappings for spelled tokens that expanded to nothing here.
776ec0b9908SSam McCall // May advance NextSpelled, but NextExpanded is unchanged.
777ec0b9908SSam McCall discard();
778ec0b9908SSam McCall // Create mapping for a contiguous run of expanded tokens.
779ec0b9908SSam McCall // Advances NextExpanded past the run, and NextSpelled accordingly.
780ec0b9908SSam McCall unsigned OldPosition = NextExpanded;
781ec0b9908SSam McCall advance();
782ec0b9908SSam McCall if (NextExpanded == OldPosition)
783ec0b9908SSam McCall diagnoseAdvanceFailure();
784e7230ea7SIlya Biryukov }
785ec0b9908SSam McCall // If any tokens remain in any of the files, they didn't expand to anything.
786ec0b9908SSam McCall // Create empty mappings up until the end of the file.
787ec0b9908SSam McCall for (const auto &File : Result.Files)
788ec0b9908SSam McCall discard(File.first);
789e7230ea7SIlya Biryukov
7901bf055c9SMarcel Hlopko #ifndef NDEBUG
7911bf055c9SMarcel Hlopko for (auto &pair : Result.Files) {
7921bf055c9SMarcel Hlopko auto &mappings = pair.second.Mappings;
7931647ff6eSGeorgii Rymar assert(llvm::is_sorted(mappings, [](const TokenBuffer::Mapping &M1,
7941647ff6eSGeorgii Rymar const TokenBuffer::Mapping &M2) {
7951bf055c9SMarcel Hlopko return M1.BeginSpelled < M2.BeginSpelled &&
7961bf055c9SMarcel Hlopko M1.EndSpelled < M2.EndSpelled &&
7971bf055c9SMarcel Hlopko M1.BeginExpanded < M2.BeginExpanded &&
7981bf055c9SMarcel Hlopko M1.EndExpanded < M2.EndExpanded;
7991bf055c9SMarcel Hlopko }));
8001bf055c9SMarcel Hlopko }
8011bf055c9SMarcel Hlopko #endif
8021bf055c9SMarcel Hlopko
803e7230ea7SIlya Biryukov return std::move(Result);
804e7230ea7SIlya Biryukov }
805e7230ea7SIlya Biryukov
806e7230ea7SIlya Biryukov private:
807ec0b9908SSam McCall // Consume a sequence of spelled tokens that didn't expand to anything.
808ec0b9908SSam McCall // In the simplest case, skips spelled tokens until finding one that produced
809ec0b9908SSam McCall // the NextExpanded token, and creates an empty mapping for them.
810ec0b9908SSam McCall // If Drain is provided, skips remaining tokens from that file instead.
discard(llvm::Optional<FileID> Drain=llvm::None)811ec0b9908SSam McCall void discard(llvm::Optional<FileID> Drain = llvm::None) {
812ec0b9908SSam McCall SourceLocation Target =
813ec0b9908SSam McCall Drain ? SM.getLocForEndOfFile(*Drain)
814ec0b9908SSam McCall : SM.getExpansionLoc(
815ec0b9908SSam McCall Result.ExpandedTokens[NextExpanded].location());
816ec0b9908SSam McCall FileID File = SM.getFileID(Target);
817ec0b9908SSam McCall const auto &SpelledTokens = Result.Files[File].SpelledTokens;
818ec0b9908SSam McCall auto &NextSpelled = this->NextSpelled[File];
819ec0b9908SSam McCall
820ec0b9908SSam McCall TokenBuffer::Mapping Mapping;
821ec0b9908SSam McCall Mapping.BeginSpelled = NextSpelled;
822ec0b9908SSam McCall // When dropping trailing tokens from a file, the empty mapping should
823ec0b9908SSam McCall // be positioned within the file's expanded-token range (at the end).
824ec0b9908SSam McCall Mapping.BeginExpanded = Mapping.EndExpanded =
825ec0b9908SSam McCall Drain ? Result.Files[*Drain].EndExpanded : NextExpanded;
826ec0b9908SSam McCall // We may want to split into several adjacent empty mappings.
827ec0b9908SSam McCall // FlushMapping() emits the current mapping and starts a new one.
828ec0b9908SSam McCall auto FlushMapping = [&, this] {
829ec0b9908SSam McCall Mapping.EndSpelled = NextSpelled;
830ec0b9908SSam McCall if (Mapping.BeginSpelled != Mapping.EndSpelled)
831ec0b9908SSam McCall Result.Files[File].Mappings.push_back(Mapping);
832ec0b9908SSam McCall Mapping.BeginSpelled = NextSpelled;
833ec0b9908SSam McCall };
834ec0b9908SSam McCall
835ec0b9908SSam McCall while (NextSpelled < SpelledTokens.size() &&
836ec0b9908SSam McCall SpelledTokens[NextSpelled].location() < Target) {
837ec0b9908SSam McCall // If we know mapping bounds at [NextSpelled, KnownEnd] (macro expansion)
838ec0b9908SSam McCall // then we want to partition our (empty) mapping.
839ec0b9908SSam McCall // [Start, NextSpelled) [NextSpelled, KnownEnd] (KnownEnd, Target)
84078194118SMikhail Maltsev SourceLocation KnownEnd =
84178194118SMikhail Maltsev CollectedExpansions.lookup(SpelledTokens[NextSpelled].location());
842ec0b9908SSam McCall if (KnownEnd.isValid()) {
843ec0b9908SSam McCall FlushMapping(); // Emits [Start, NextSpelled)
844ec0b9908SSam McCall while (NextSpelled < SpelledTokens.size() &&
845ec0b9908SSam McCall SpelledTokens[NextSpelled].location() <= KnownEnd)
846ec0b9908SSam McCall ++NextSpelled;
847ec0b9908SSam McCall FlushMapping(); // Emits [NextSpelled, KnownEnd]
848ec0b9908SSam McCall // Now the loop contitues and will emit (KnownEnd, Target).
849ec0b9908SSam McCall } else {
850ec0b9908SSam McCall ++NextSpelled;
851e7230ea7SIlya Biryukov }
852ec0b9908SSam McCall }
853ec0b9908SSam McCall FlushMapping();
854ec0b9908SSam McCall }
855e7230ea7SIlya Biryukov
856ec0b9908SSam McCall // Consumes the NextExpanded token and others that are part of the same run.
857ec0b9908SSam McCall // Increases NextExpanded and NextSpelled by at least one, and adds a mapping
858ec0b9908SSam McCall // (unless this is a run of file tokens, which we represent with no mapping).
advance()859ec0b9908SSam McCall void advance() {
860ec0b9908SSam McCall const syntax::Token &Tok = Result.ExpandedTokens[NextExpanded];
861ec0b9908SSam McCall SourceLocation Expansion = SM.getExpansionLoc(Tok.location());
862ec0b9908SSam McCall FileID File = SM.getFileID(Expansion);
863ec0b9908SSam McCall const auto &SpelledTokens = Result.Files[File].SpelledTokens;
864ec0b9908SSam McCall auto &NextSpelled = this->NextSpelled[File];
865e7230ea7SIlya Biryukov
866ec0b9908SSam McCall if (Tok.location().isFileID()) {
867ec0b9908SSam McCall // A run of file tokens continues while the expanded/spelled tokens match.
868ec0b9908SSam McCall while (NextSpelled < SpelledTokens.size() &&
869ec0b9908SSam McCall NextExpanded < Result.ExpandedTokens.size() &&
870ec0b9908SSam McCall SpelledTokens[NextSpelled].location() ==
871ec0b9908SSam McCall Result.ExpandedTokens[NextExpanded].location()) {
872ec0b9908SSam McCall ++NextSpelled;
873ec0b9908SSam McCall ++NextExpanded;
874ec0b9908SSam McCall }
875ec0b9908SSam McCall // We need no mapping for file tokens copied to the expanded stream.
876ec0b9908SSam McCall } else {
877ec0b9908SSam McCall // We found a new macro expansion. We should have its spelling bounds.
87878194118SMikhail Maltsev auto End = CollectedExpansions.lookup(Expansion);
879ec0b9908SSam McCall assert(End.isValid() && "Macro expansion wasn't captured?");
880ec0b9908SSam McCall
881ec0b9908SSam McCall // Mapping starts here...
882ec0b9908SSam McCall TokenBuffer::Mapping Mapping;
883ec0b9908SSam McCall Mapping.BeginExpanded = NextExpanded;
884ec0b9908SSam McCall Mapping.BeginSpelled = NextSpelled;
885ec0b9908SSam McCall // ... consumes spelled tokens within bounds we captured ...
886ec0b9908SSam McCall while (NextSpelled < SpelledTokens.size() &&
887ec0b9908SSam McCall SpelledTokens[NextSpelled].location() <= End)
888ec0b9908SSam McCall ++NextSpelled;
889ec0b9908SSam McCall // ... consumes expanded tokens rooted at the same expansion ...
890ec0b9908SSam McCall while (NextExpanded < Result.ExpandedTokens.size() &&
891ec0b9908SSam McCall SM.getExpansionLoc(
892ec0b9908SSam McCall Result.ExpandedTokens[NextExpanded].location()) == Expansion)
893ec0b9908SSam McCall ++NextExpanded;
894ec0b9908SSam McCall // ... and ends here.
895ec0b9908SSam McCall Mapping.EndExpanded = NextExpanded;
896ec0b9908SSam McCall Mapping.EndSpelled = NextSpelled;
897ec0b9908SSam McCall Result.Files[File].Mappings.push_back(Mapping);
898e7230ea7SIlya Biryukov }
899e7230ea7SIlya Biryukov }
900e7230ea7SIlya Biryukov
901ec0b9908SSam McCall // advance() is supposed to consume at least one token - if not, we crash.
diagnoseAdvanceFailure()902ec0b9908SSam McCall void diagnoseAdvanceFailure() {
903ec0b9908SSam McCall #ifndef NDEBUG
904ec0b9908SSam McCall // Show the failed-to-map token in context.
905ec0b9908SSam McCall for (unsigned I = (NextExpanded < 10) ? 0 : NextExpanded - 10;
906ec0b9908SSam McCall I < NextExpanded + 5 && I < Result.ExpandedTokens.size(); ++I) {
907ec0b9908SSam McCall const char *L =
908ec0b9908SSam McCall (I == NextExpanded) ? "!! " : (I < NextExpanded) ? "ok " : " ";
909ec0b9908SSam McCall llvm::errs() << L << Result.ExpandedTokens[I].dumpForTests(SM) << "\n";
910e7230ea7SIlya Biryukov }
911ec0b9908SSam McCall #endif
912ec0b9908SSam McCall llvm_unreachable("Couldn't map expanded token to spelled tokens!");
913e7230ea7SIlya Biryukov }
914e7230ea7SIlya Biryukov
915e7230ea7SIlya Biryukov /// Initializes TokenBuffer::Files and fills spelled tokens and expanded
916e7230ea7SIlya Biryukov /// ranges for each of the files.
buildSpelledTokens()917e7230ea7SIlya Biryukov void buildSpelledTokens() {
918e7230ea7SIlya Biryukov for (unsigned I = 0; I < Result.ExpandedTokens.size(); ++I) {
919ec0b9908SSam McCall const auto &Tok = Result.ExpandedTokens[I];
920ec0b9908SSam McCall auto FID = SM.getFileID(SM.getExpansionLoc(Tok.location()));
921e7230ea7SIlya Biryukov auto It = Result.Files.try_emplace(FID);
922e7230ea7SIlya Biryukov TokenBuffer::MarkedFile &File = It.first->second;
923e7230ea7SIlya Biryukov
924ec0b9908SSam McCall // The eof token should not be considered part of the main-file's range.
925ec0b9908SSam McCall File.EndExpanded = Tok.kind() == tok::eof ? I : I + 1;
926ec0b9908SSam McCall
927e7230ea7SIlya Biryukov if (!It.second)
928e7230ea7SIlya Biryukov continue; // we have seen this file before.
929e7230ea7SIlya Biryukov // This is the first time we see this file.
930e7230ea7SIlya Biryukov File.BeginExpanded = I;
931e7230ea7SIlya Biryukov File.SpelledTokens = tokenize(FID, SM, LangOpts);
932e7230ea7SIlya Biryukov }
933e7230ea7SIlya Biryukov }
934e7230ea7SIlya Biryukov
935e7230ea7SIlya Biryukov TokenBuffer Result;
936ec0b9908SSam McCall unsigned NextExpanded = 0; // cursor in ExpandedTokens
937ec0b9908SSam McCall llvm::DenseMap<FileID, unsigned> NextSpelled; // cursor in SpelledTokens
9385e69f27eSIlya Biryukov PPExpansions CollectedExpansions;
939e7230ea7SIlya Biryukov const SourceManager &SM;
940e7230ea7SIlya Biryukov const LangOptions &LangOpts;
941e7230ea7SIlya Biryukov };
942e7230ea7SIlya Biryukov
consume()943e7230ea7SIlya Biryukov TokenBuffer TokenCollector::consume() && {
944e7230ea7SIlya Biryukov PP.setTokenWatcher(nullptr);
9455e69f27eSIlya Biryukov Collector->disable();
9465e69f27eSIlya Biryukov return Builder(std::move(Expanded), std::move(Expansions),
9475e69f27eSIlya Biryukov PP.getSourceManager(), PP.getLangOpts())
948e7230ea7SIlya Biryukov .build();
949e7230ea7SIlya Biryukov }
950e7230ea7SIlya Biryukov
str() const951e7230ea7SIlya Biryukov std::string syntax::Token::str() const {
952adcd0268SBenjamin Kramer return std::string(llvm::formatv("Token({0}, length = {1})",
953adcd0268SBenjamin Kramer tok::getTokenName(kind()), length()));
954e7230ea7SIlya Biryukov }
955e7230ea7SIlya Biryukov
dumpForTests(const SourceManager & SM) const956e7230ea7SIlya Biryukov std::string syntax::Token::dumpForTests(const SourceManager &SM) const {
957cdce2fe5SMarcel Hlopko return std::string(llvm::formatv("Token(`{0}`, {1}, length = {2})", text(SM),
958cdce2fe5SMarcel Hlopko tok::getTokenName(kind()), length()));
959e7230ea7SIlya Biryukov }
960e7230ea7SIlya Biryukov
dumpForTests() const961e7230ea7SIlya Biryukov std::string TokenBuffer::dumpForTests() const {
962e7230ea7SIlya Biryukov auto PrintToken = [this](const syntax::Token &T) -> std::string {
963e7230ea7SIlya Biryukov if (T.kind() == tok::eof)
964e7230ea7SIlya Biryukov return "<eof>";
965adcd0268SBenjamin Kramer return std::string(T.text(*SourceMgr));
966e7230ea7SIlya Biryukov };
967e7230ea7SIlya Biryukov
968e7230ea7SIlya Biryukov auto DumpTokens = [this, &PrintToken](llvm::raw_ostream &OS,
969e7230ea7SIlya Biryukov llvm::ArrayRef<syntax::Token> Tokens) {
97026c066d6SIlya Biryukov if (Tokens.empty()) {
971e7230ea7SIlya Biryukov OS << "<empty>";
972e7230ea7SIlya Biryukov return;
973e7230ea7SIlya Biryukov }
974e7230ea7SIlya Biryukov OS << Tokens[0].text(*SourceMgr);
975e7230ea7SIlya Biryukov for (unsigned I = 1; I < Tokens.size(); ++I) {
976e7230ea7SIlya Biryukov if (Tokens[I].kind() == tok::eof)
977e7230ea7SIlya Biryukov continue;
978e7230ea7SIlya Biryukov OS << " " << PrintToken(Tokens[I]);
979e7230ea7SIlya Biryukov }
980e7230ea7SIlya Biryukov };
981e7230ea7SIlya Biryukov
982e7230ea7SIlya Biryukov std::string Dump;
983e7230ea7SIlya Biryukov llvm::raw_string_ostream OS(Dump);
984e7230ea7SIlya Biryukov
985e7230ea7SIlya Biryukov OS << "expanded tokens:\n"
986e7230ea7SIlya Biryukov << " ";
98726c066d6SIlya Biryukov // (!) we do not show '<eof>'.
98826c066d6SIlya Biryukov DumpTokens(OS, llvm::makeArrayRef(ExpandedTokens).drop_back());
989e7230ea7SIlya Biryukov OS << "\n";
990e7230ea7SIlya Biryukov
991e7230ea7SIlya Biryukov std::vector<FileID> Keys;
992e7230ea7SIlya Biryukov for (auto F : Files)
993e7230ea7SIlya Biryukov Keys.push_back(F.first);
994e7230ea7SIlya Biryukov llvm::sort(Keys);
995e7230ea7SIlya Biryukov
996e7230ea7SIlya Biryukov for (FileID ID : Keys) {
997e7230ea7SIlya Biryukov const MarkedFile &File = Files.find(ID)->second;
998e7230ea7SIlya Biryukov auto *Entry = SourceMgr->getFileEntryForID(ID);
999e7230ea7SIlya Biryukov if (!Entry)
1000e7230ea7SIlya Biryukov continue; // Skip builtin files.
1001e7230ea7SIlya Biryukov OS << llvm::formatv("file '{0}'\n", Entry->getName())
1002e7230ea7SIlya Biryukov << " spelled tokens:\n"
1003e7230ea7SIlya Biryukov << " ";
1004e7230ea7SIlya Biryukov DumpTokens(OS, File.SpelledTokens);
1005e7230ea7SIlya Biryukov OS << "\n";
1006e7230ea7SIlya Biryukov
1007e7230ea7SIlya Biryukov if (File.Mappings.empty()) {
1008e7230ea7SIlya Biryukov OS << " no mappings.\n";
1009e7230ea7SIlya Biryukov continue;
1010e7230ea7SIlya Biryukov }
1011e7230ea7SIlya Biryukov OS << " mappings:\n";
1012e7230ea7SIlya Biryukov for (auto &M : File.Mappings) {
1013e7230ea7SIlya Biryukov OS << llvm::formatv(
1014e7230ea7SIlya Biryukov " ['{0}'_{1}, '{2}'_{3}) => ['{4}'_{5}, '{6}'_{7})\n",
1015e7230ea7SIlya Biryukov PrintToken(File.SpelledTokens[M.BeginSpelled]), M.BeginSpelled,
1016e7230ea7SIlya Biryukov M.EndSpelled == File.SpelledTokens.size()
1017e7230ea7SIlya Biryukov ? "<eof>"
1018e7230ea7SIlya Biryukov : PrintToken(File.SpelledTokens[M.EndSpelled]),
1019e7230ea7SIlya Biryukov M.EndSpelled, PrintToken(ExpandedTokens[M.BeginExpanded]),
1020e7230ea7SIlya Biryukov M.BeginExpanded, PrintToken(ExpandedTokens[M.EndExpanded]),
1021e7230ea7SIlya Biryukov M.EndExpanded);
1022e7230ea7SIlya Biryukov }
1023e7230ea7SIlya Biryukov }
10245336befeSLogan Smith return Dump;
1025e7230ea7SIlya Biryukov }
1026