1e7230ea7SIlya Biryukov //===- Tokens.cpp - collect tokens from preprocessing ---------------------===//
2e7230ea7SIlya Biryukov //
3e7230ea7SIlya Biryukov // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e7230ea7SIlya Biryukov // See https://llvm.org/LICENSE.txt for license information.
5e7230ea7SIlya Biryukov // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e7230ea7SIlya Biryukov //
7e7230ea7SIlya Biryukov //===----------------------------------------------------------------------===//
8e7230ea7SIlya Biryukov #include "clang/Tooling/Syntax/Tokens.h"
9e7230ea7SIlya Biryukov 
10e7230ea7SIlya Biryukov #include "clang/Basic/Diagnostic.h"
11e7230ea7SIlya Biryukov #include "clang/Basic/IdentifierTable.h"
12e7230ea7SIlya Biryukov #include "clang/Basic/LLVM.h"
13e7230ea7SIlya Biryukov #include "clang/Basic/LangOptions.h"
14e7230ea7SIlya Biryukov #include "clang/Basic/SourceLocation.h"
15e7230ea7SIlya Biryukov #include "clang/Basic/SourceManager.h"
16e7230ea7SIlya Biryukov #include "clang/Basic/TokenKinds.h"
175e69f27eSIlya Biryukov #include "clang/Lex/PPCallbacks.h"
18e7230ea7SIlya Biryukov #include "clang/Lex/Preprocessor.h"
19e7230ea7SIlya Biryukov #include "clang/Lex/Token.h"
20e7230ea7SIlya Biryukov #include "llvm/ADT/ArrayRef.h"
21e7230ea7SIlya Biryukov #include "llvm/ADT/None.h"
22e7230ea7SIlya Biryukov #include "llvm/ADT/Optional.h"
23e7230ea7SIlya Biryukov #include "llvm/ADT/STLExtras.h"
24e7230ea7SIlya Biryukov #include "llvm/Support/Debug.h"
25e7230ea7SIlya Biryukov #include "llvm/Support/ErrorHandling.h"
26e7230ea7SIlya Biryukov #include "llvm/Support/FormatVariadic.h"
27e7230ea7SIlya Biryukov #include "llvm/Support/raw_ostream.h"
28e7230ea7SIlya Biryukov #include <algorithm>
29e7230ea7SIlya Biryukov #include <cassert>
30e7230ea7SIlya Biryukov #include <iterator>
31e7230ea7SIlya Biryukov #include <string>
32e7230ea7SIlya Biryukov #include <utility>
33e7230ea7SIlya Biryukov #include <vector>
34e7230ea7SIlya Biryukov 
35e7230ea7SIlya Biryukov using namespace clang;
36e7230ea7SIlya Biryukov using namespace clang::syntax;
37e7230ea7SIlya Biryukov 
389619c2ccSKadir Cetinkaya namespace {
399619c2ccSKadir Cetinkaya // Finds the smallest consecutive subsuquence of Toks that covers R.
409619c2ccSKadir Cetinkaya llvm::ArrayRef<syntax::Token>
getTokensCovering(llvm::ArrayRef<syntax::Token> Toks,SourceRange R,const SourceManager & SM)419619c2ccSKadir Cetinkaya getTokensCovering(llvm::ArrayRef<syntax::Token> Toks, SourceRange R,
429619c2ccSKadir Cetinkaya                   const SourceManager &SM) {
439619c2ccSKadir Cetinkaya   if (R.isInvalid())
449619c2ccSKadir Cetinkaya     return {};
459619c2ccSKadir Cetinkaya   const syntax::Token *Begin =
469619c2ccSKadir Cetinkaya       llvm::partition_point(Toks, [&](const syntax::Token &T) {
479619c2ccSKadir Cetinkaya         return SM.isBeforeInTranslationUnit(T.location(), R.getBegin());
489619c2ccSKadir Cetinkaya       });
499619c2ccSKadir Cetinkaya   const syntax::Token *End =
509619c2ccSKadir Cetinkaya       llvm::partition_point(Toks, [&](const syntax::Token &T) {
519619c2ccSKadir Cetinkaya         return !SM.isBeforeInTranslationUnit(R.getEnd(), T.location());
529619c2ccSKadir Cetinkaya       });
539619c2ccSKadir Cetinkaya   if (Begin > End)
549619c2ccSKadir Cetinkaya     return {};
559619c2ccSKadir Cetinkaya   return {Begin, End};
569619c2ccSKadir Cetinkaya }
579619c2ccSKadir Cetinkaya 
5827e075fcSSam McCall // Finds the range within FID corresponding to expanded tokens [First, Last].
5927e075fcSSam McCall // Prev precedes First and Next follows Last, these must *not* be included.
6027e075fcSSam McCall // If no range satisfies the criteria, returns an invalid range.
6127e075fcSSam McCall //
629619c2ccSKadir Cetinkaya // #define ID(x) x
639619c2ccSKadir Cetinkaya // ID(ID(ID(a1) a2))
649619c2ccSKadir Cetinkaya //          ~~       -> a1
659619c2ccSKadir Cetinkaya //              ~~   -> a2
669619c2ccSKadir Cetinkaya //       ~~~~~~~~~   -> a1 a2
spelledForExpandedSlow(SourceLocation First,SourceLocation Last,SourceLocation Prev,SourceLocation Next,FileID TargetFile,const SourceManager & SM)6727e075fcSSam McCall SourceRange spelledForExpandedSlow(SourceLocation First, SourceLocation Last,
6827e075fcSSam McCall                                    SourceLocation Prev, SourceLocation Next,
6927e075fcSSam McCall                                    FileID TargetFile,
709619c2ccSKadir Cetinkaya                                    const SourceManager &SM) {
7127e075fcSSam McCall   // There are two main parts to this algorithm:
7227e075fcSSam McCall   //  - identifying which spelled range covers the expanded tokens
7327e075fcSSam McCall   //  - validating that this range doesn't cover any extra tokens (First/Last)
7427e075fcSSam McCall   //
7527e075fcSSam McCall   // We do these in order. However as we transform the expanded range into the
7627e075fcSSam McCall   // spelled one, we adjust First/Last so the validation remains simple.
7727e075fcSSam McCall 
7827e075fcSSam McCall   assert(SM.getSLocEntry(TargetFile).isFile());
7927e075fcSSam McCall   // In most cases, to select First and Last we must return their expansion
8027e075fcSSam McCall   // range, i.e. the whole of any macros they are included in.
8127e075fcSSam McCall   //
8227e075fcSSam McCall   // When First and Last are part of the *same macro arg* of a macro written
8327e075fcSSam McCall   // in TargetFile, we that slice of the arg, i.e. their spelling range.
8427e075fcSSam McCall   //
8527e075fcSSam McCall   // Unwrap such macro calls. If the target file has A(B(C)), the
8627e075fcSSam McCall   // SourceLocation stack of a token inside C shows us the expansion of A first,
8727e075fcSSam McCall   // then B, then any macros inside C's body, then C itself.
8827e075fcSSam McCall   // (This is the reverse of the order the PP applies the expansions in).
8927e075fcSSam McCall   while (First.isMacroID() && Last.isMacroID()) {
9027e075fcSSam McCall     auto DecFirst = SM.getDecomposedLoc(First);
9127e075fcSSam McCall     auto DecLast = SM.getDecomposedLoc(Last);
9227e075fcSSam McCall     auto &ExpFirst = SM.getSLocEntry(DecFirst.first).getExpansion();
9327e075fcSSam McCall     auto &ExpLast = SM.getSLocEntry(DecLast.first).getExpansion();
9427e075fcSSam McCall 
9527e075fcSSam McCall     if (!ExpFirst.isMacroArgExpansion() || !ExpLast.isMacroArgExpansion())
969619c2ccSKadir Cetinkaya       break;
9727e075fcSSam McCall     // Locations are in the same macro arg if they expand to the same place.
9827e075fcSSam McCall     // (They may still have different FileIDs - an arg can have >1 chunks!)
9927e075fcSSam McCall     if (ExpFirst.getExpansionLocStart() != ExpLast.getExpansionLocStart())
1009619c2ccSKadir Cetinkaya       break;
10127e075fcSSam McCall     // Careful, given:
10227e075fcSSam McCall     //   #define HIDE ID(ID(a))
10327e075fcSSam McCall     //   ID(ID(HIDE))
10427e075fcSSam McCall     // The token `a` is wrapped in 4 arg-expansions, we only want to unwrap 2.
10527e075fcSSam McCall     // We distinguish them by whether the macro expands into the target file.
10627e075fcSSam McCall     // Fortunately, the target file ones will always appear first.
10727e075fcSSam McCall     auto &ExpMacro =
10827e075fcSSam McCall         SM.getSLocEntry(SM.getFileID(ExpFirst.getExpansionLocStart()))
10927e075fcSSam McCall             .getExpansion();
11027e075fcSSam McCall     if (ExpMacro.getExpansionLocStart().isMacroID())
11127e075fcSSam McCall       break;
11227e075fcSSam McCall     // Replace each endpoint with its spelling inside the macro arg.
11327e075fcSSam McCall     // (This is getImmediateSpellingLoc without repeating lookups).
11427e075fcSSam McCall     First = ExpFirst.getSpellingLoc().getLocWithOffset(DecFirst.second);
11527e075fcSSam McCall     Last = ExpLast.getSpellingLoc().getLocWithOffset(DecLast.second);
11627e075fcSSam McCall 
11727e075fcSSam McCall     // Now: how do we adjust the previous/next bounds? Three cases:
11827e075fcSSam McCall     // A) If they are also part of the same macro arg, we translate them too.
11927e075fcSSam McCall     //   This will ensure that we don't select any macros nested within the
12027e075fcSSam McCall     //   macro arg that cover extra tokens. Critical case:
12127e075fcSSam McCall     //      #define ID(X) X
12227e075fcSSam McCall     //      ID(prev target) // selecting 'target' succeeds
12327e075fcSSam McCall     //      #define LARGE ID(prev target)
12427e075fcSSam McCall     //      LARGE // selecting 'target' fails.
12527e075fcSSam McCall     // B) They are not in the macro at all, then their expansion range is a
12627e075fcSSam McCall     //    sibling to it, and we can safely substitute that.
12727e075fcSSam McCall     //      #define PREV prev
12827e075fcSSam McCall     //      #define ID(X) X
12927e075fcSSam McCall     //      PREV ID(target) // selecting 'target' succeeds.
13027e075fcSSam McCall     //      #define LARGE PREV ID(target)
13127e075fcSSam McCall     //      LARGE // selecting 'target' fails.
13227e075fcSSam McCall     // C) They are in a different arg of this macro, or the macro body.
13327e075fcSSam McCall     //    Now selecting the whole macro arg is fine, but the whole macro is not.
13427e075fcSSam McCall     //    Model this by setting using the edge of the macro call as the bound.
13527e075fcSSam McCall     //      #define ID2(X, Y) X Y
13627e075fcSSam McCall     //      ID2(prev, target) // selecting 'target' succeeds
13727e075fcSSam McCall     //      #define LARGE ID2(prev, target)
13827e075fcSSam McCall     //      LARGE // selecting 'target' fails
13927e075fcSSam McCall     auto AdjustBound = [&](SourceLocation &Bound) {
14027e075fcSSam McCall       if (Bound.isInvalid() || !Bound.isMacroID()) // Non-macro must be case B.
14127e075fcSSam McCall         return;
14227e075fcSSam McCall       auto DecBound = SM.getDecomposedLoc(Bound);
14327e075fcSSam McCall       auto &ExpBound = SM.getSLocEntry(DecBound.first).getExpansion();
14427e075fcSSam McCall       if (ExpBound.isMacroArgExpansion() &&
14527e075fcSSam McCall           ExpBound.getExpansionLocStart() == ExpFirst.getExpansionLocStart()) {
14627e075fcSSam McCall         // Case A: translate to (spelling) loc within the macro arg.
14727e075fcSSam McCall         Bound = ExpBound.getSpellingLoc().getLocWithOffset(DecBound.second);
14827e075fcSSam McCall         return;
1499619c2ccSKadir Cetinkaya       }
15027e075fcSSam McCall       while (Bound.isMacroID()) {
15127e075fcSSam McCall         SourceRange Exp = SM.getImmediateExpansionRange(Bound).getAsRange();
15227e075fcSSam McCall         if (Exp.getBegin() == ExpMacro.getExpansionLocStart()) {
15327e075fcSSam McCall           // Case B: bounds become the macro call itself.
15427e075fcSSam McCall           Bound = (&Bound == &Prev) ? Exp.getBegin() : Exp.getEnd();
15527e075fcSSam McCall           return;
15627e075fcSSam McCall         }
15727e075fcSSam McCall         // Either case C, or expansion location will later find case B.
15827e075fcSSam McCall         // We choose the upper bound for Prev and the lower one for Next:
15927e075fcSSam McCall         //   ID(prev) target ID(next)
16027e075fcSSam McCall         //          ^        ^
16127e075fcSSam McCall         //      new-prev  new-next
16227e075fcSSam McCall         Bound = (&Bound == &Prev) ? Exp.getEnd() : Exp.getBegin();
16327e075fcSSam McCall       }
16427e075fcSSam McCall     };
16527e075fcSSam McCall     AdjustBound(Prev);
16627e075fcSSam McCall     AdjustBound(Next);
16727e075fcSSam McCall   }
16827e075fcSSam McCall 
16927e075fcSSam McCall   // In all remaining cases we need the full containing macros.
17027e075fcSSam McCall   // If this overlaps Prev or Next, then no range is possible.
17127e075fcSSam McCall   SourceRange Candidate =
17227e075fcSSam McCall       SM.getExpansionRange(SourceRange(First, Last)).getAsRange();
17327e075fcSSam McCall   auto DecFirst = SM.getDecomposedExpansionLoc(Candidate.getBegin());
17427e075fcSSam McCall   auto DecLast = SM.getDecomposedLoc(Candidate.getEnd());
17527e075fcSSam McCall   // Can end up in the wrong file due to bad input or token-pasting shenanigans.
17627e075fcSSam McCall   if (Candidate.isInvalid() || DecFirst.first != TargetFile || DecLast.first != TargetFile)
17727e075fcSSam McCall     return SourceRange();
17827e075fcSSam McCall   // Check bounds, which may still be inside macros.
17927e075fcSSam McCall   if (Prev.isValid()) {
18027e075fcSSam McCall     auto Dec = SM.getDecomposedLoc(SM.getExpansionRange(Prev).getBegin());
18127e075fcSSam McCall     if (Dec.first != DecFirst.first || Dec.second >= DecFirst.second)
18227e075fcSSam McCall       return SourceRange();
18327e075fcSSam McCall   }
18427e075fcSSam McCall   if (Next.isValid()) {
18527e075fcSSam McCall     auto Dec = SM.getDecomposedLoc(SM.getExpansionRange(Next).getEnd());
18627e075fcSSam McCall     if (Dec.first != DecLast.first || Dec.second <= DecLast.second)
18727e075fcSSam McCall       return SourceRange();
18827e075fcSSam McCall   }
18927e075fcSSam McCall   // Now we know that Candidate is a file range that covers [First, Last]
19027e075fcSSam McCall   // without encroaching on {Prev, Next}. Ship it!
19127e075fcSSam McCall   return Candidate;
1929619c2ccSKadir Cetinkaya }
1939619c2ccSKadir Cetinkaya 
1949619c2ccSKadir Cetinkaya } // namespace
1959619c2ccSKadir Cetinkaya 
Token(SourceLocation Location,unsigned Length,tok::TokenKind Kind)196625a0f70SIlya Biryukov syntax::Token::Token(SourceLocation Location, unsigned Length,
197625a0f70SIlya Biryukov                      tok::TokenKind Kind)
198625a0f70SIlya Biryukov     : Location(Location), Length(Length), Kind(Kind) {
199625a0f70SIlya Biryukov   assert(Location.isValid());
200625a0f70SIlya Biryukov }
201625a0f70SIlya Biryukov 
Token(const clang::Token & T)202e7230ea7SIlya Biryukov syntax::Token::Token(const clang::Token &T)
203e7230ea7SIlya Biryukov     : Token(T.getLocation(), T.getLength(), T.getKind()) {
204e7230ea7SIlya Biryukov   assert(!T.isAnnotation());
205e7230ea7SIlya Biryukov }
206e7230ea7SIlya Biryukov 
text(const SourceManager & SM) const207e7230ea7SIlya Biryukov llvm::StringRef syntax::Token::text(const SourceManager &SM) const {
208e7230ea7SIlya Biryukov   bool Invalid = false;
209e7230ea7SIlya Biryukov   const char *Start = SM.getCharacterData(location(), &Invalid);
210e7230ea7SIlya Biryukov   assert(!Invalid);
211e7230ea7SIlya Biryukov   return llvm::StringRef(Start, length());
212e7230ea7SIlya Biryukov }
213e7230ea7SIlya Biryukov 
range(const SourceManager & SM) const214e7230ea7SIlya Biryukov FileRange syntax::Token::range(const SourceManager &SM) const {
215e7230ea7SIlya Biryukov   assert(location().isFileID() && "must be a spelled token");
216e7230ea7SIlya Biryukov   FileID File;
217e7230ea7SIlya Biryukov   unsigned StartOffset;
218e7230ea7SIlya Biryukov   std::tie(File, StartOffset) = SM.getDecomposedLoc(location());
219e7230ea7SIlya Biryukov   return FileRange(File, StartOffset, StartOffset + length());
220e7230ea7SIlya Biryukov }
221e7230ea7SIlya Biryukov 
range(const SourceManager & SM,const syntax::Token & First,const syntax::Token & Last)222e7230ea7SIlya Biryukov FileRange syntax::Token::range(const SourceManager &SM,
223e7230ea7SIlya Biryukov                                const syntax::Token &First,
224e7230ea7SIlya Biryukov                                const syntax::Token &Last) {
225e7230ea7SIlya Biryukov   auto F = First.range(SM);
226e7230ea7SIlya Biryukov   auto L = Last.range(SM);
227e7230ea7SIlya Biryukov   assert(F.file() == L.file() && "tokens from different files");
2288c2cf499SKadir Cetinkaya   assert((F == L || F.endOffset() <= L.beginOffset()) &&
2298c2cf499SKadir Cetinkaya          "wrong order of tokens");
230e7230ea7SIlya Biryukov   return FileRange(F.file(), F.beginOffset(), L.endOffset());
231e7230ea7SIlya Biryukov }
232e7230ea7SIlya Biryukov 
operator <<(llvm::raw_ostream & OS,const Token & T)233e7230ea7SIlya Biryukov llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, const Token &T) {
234e7230ea7SIlya Biryukov   return OS << T.str();
235e7230ea7SIlya Biryukov }
236e7230ea7SIlya Biryukov 
FileRange(FileID File,unsigned BeginOffset,unsigned EndOffset)237e7230ea7SIlya Biryukov FileRange::FileRange(FileID File, unsigned BeginOffset, unsigned EndOffset)
238e7230ea7SIlya Biryukov     : File(File), Begin(BeginOffset), End(EndOffset) {
239e7230ea7SIlya Biryukov   assert(File.isValid());
240e7230ea7SIlya Biryukov   assert(BeginOffset <= EndOffset);
241e7230ea7SIlya Biryukov }
242e7230ea7SIlya Biryukov 
FileRange(const SourceManager & SM,SourceLocation BeginLoc,unsigned Length)243e7230ea7SIlya Biryukov FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
244e7230ea7SIlya Biryukov                      unsigned Length) {
245e7230ea7SIlya Biryukov   assert(BeginLoc.isValid());
246e7230ea7SIlya Biryukov   assert(BeginLoc.isFileID());
247e7230ea7SIlya Biryukov 
248e7230ea7SIlya Biryukov   std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
249e7230ea7SIlya Biryukov   End = Begin + Length;
250e7230ea7SIlya Biryukov }
FileRange(const SourceManager & SM,SourceLocation BeginLoc,SourceLocation EndLoc)251e7230ea7SIlya Biryukov FileRange::FileRange(const SourceManager &SM, SourceLocation BeginLoc,
252e7230ea7SIlya Biryukov                      SourceLocation EndLoc) {
253e7230ea7SIlya Biryukov   assert(BeginLoc.isValid());
254e7230ea7SIlya Biryukov   assert(BeginLoc.isFileID());
255e7230ea7SIlya Biryukov   assert(EndLoc.isValid());
256e7230ea7SIlya Biryukov   assert(EndLoc.isFileID());
257e7230ea7SIlya Biryukov   assert(SM.getFileID(BeginLoc) == SM.getFileID(EndLoc));
258e7230ea7SIlya Biryukov   assert(SM.getFileOffset(BeginLoc) <= SM.getFileOffset(EndLoc));
259e7230ea7SIlya Biryukov 
260e7230ea7SIlya Biryukov   std::tie(File, Begin) = SM.getDecomposedLoc(BeginLoc);
261e7230ea7SIlya Biryukov   End = SM.getFileOffset(EndLoc);
262e7230ea7SIlya Biryukov }
263e7230ea7SIlya Biryukov 
operator <<(llvm::raw_ostream & OS,const FileRange & R)264e7230ea7SIlya Biryukov llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS,
265e7230ea7SIlya Biryukov                                       const FileRange &R) {
266e7230ea7SIlya Biryukov   return OS << llvm::formatv("FileRange(file = {0}, offsets = {1}-{2})",
267e7230ea7SIlya Biryukov                              R.file().getHashValue(), R.beginOffset(),
268e7230ea7SIlya Biryukov                              R.endOffset());
269e7230ea7SIlya Biryukov }
270e7230ea7SIlya Biryukov 
text(const SourceManager & SM) const271e7230ea7SIlya Biryukov llvm::StringRef FileRange::text(const SourceManager &SM) const {
272e7230ea7SIlya Biryukov   bool Invalid = false;
273e7230ea7SIlya Biryukov   StringRef Text = SM.getBufferData(File, &Invalid);
274e7230ea7SIlya Biryukov   if (Invalid)
275e7230ea7SIlya Biryukov     return "";
276e7230ea7SIlya Biryukov   assert(Begin <= Text.size());
277e7230ea7SIlya Biryukov   assert(End <= Text.size());
278e7230ea7SIlya Biryukov   return Text.substr(Begin, length());
279e7230ea7SIlya Biryukov }
280e7230ea7SIlya Biryukov 
indexExpandedTokens()281aa979084SUtkarsh Saxena void TokenBuffer::indexExpandedTokens() {
282aa979084SUtkarsh Saxena   // No-op if the index is already created.
283aa979084SUtkarsh Saxena   if (!ExpandedTokIndex.empty())
284aa979084SUtkarsh Saxena     return;
285aa979084SUtkarsh Saxena   ExpandedTokIndex.reserve(ExpandedTokens.size());
286aa979084SUtkarsh Saxena   // Index ExpandedTokens for faster lookups by SourceLocation.
287cd824a48SUtkarsh Saxena   for (size_t I = 0, E = ExpandedTokens.size(); I != E; ++I) {
288cd824a48SUtkarsh Saxena     SourceLocation Loc = ExpandedTokens[I].location();
289cd824a48SUtkarsh Saxena     if (Loc.isValid())
290cd824a48SUtkarsh Saxena       ExpandedTokIndex[Loc] = I;
291cd824a48SUtkarsh Saxena   }
292aa979084SUtkarsh Saxena }
293aa979084SUtkarsh Saxena 
expandedTokens(SourceRange R) const294c9c714c7SSam McCall llvm::ArrayRef<syntax::Token> TokenBuffer::expandedTokens(SourceRange R) const {
295cd824a48SUtkarsh Saxena   if (R.isInvalid())
296cd824a48SUtkarsh Saxena     return {};
297aa979084SUtkarsh Saxena   if (!ExpandedTokIndex.empty()) {
298aa979084SUtkarsh Saxena     // Quick lookup if `R` is a token range.
299aa979084SUtkarsh Saxena     // This is a huge win since majority of the users use ranges provided by an
300aa979084SUtkarsh Saxena     // AST. Ranges in AST are token ranges from expanded token stream.
301aa979084SUtkarsh Saxena     const auto B = ExpandedTokIndex.find(R.getBegin());
302aa979084SUtkarsh Saxena     const auto E = ExpandedTokIndex.find(R.getEnd());
303aa979084SUtkarsh Saxena     if (B != ExpandedTokIndex.end() && E != ExpandedTokIndex.end()) {
304cd824a48SUtkarsh Saxena       const Token *L = ExpandedTokens.data() + B->getSecond();
305aa979084SUtkarsh Saxena       // Add 1 to End to make a half-open range.
306cd824a48SUtkarsh Saxena       const Token *R = ExpandedTokens.data() + E->getSecond() + 1;
307cd824a48SUtkarsh Saxena       if (L > R)
308cd824a48SUtkarsh Saxena         return {};
309cd824a48SUtkarsh Saxena       return {L, R};
310aa979084SUtkarsh Saxena     }
311aa979084SUtkarsh Saxena   }
312aa979084SUtkarsh Saxena   // Slow case. Use `isBeforeInTranslationUnit` to binary search for the
313aa979084SUtkarsh Saxena   // required range.
3149619c2ccSKadir Cetinkaya   return getTokensCovering(expandedTokens(), R, *SourceMgr);
315c9c714c7SSam McCall }
316c9c714c7SSam McCall 
toCharRange(const SourceManager & SM) const3171ad15046SIlya Biryukov CharSourceRange FileRange::toCharRange(const SourceManager &SM) const {
3181ad15046SIlya Biryukov   return CharSourceRange(
3191ad15046SIlya Biryukov       SourceRange(SM.getComposedLoc(File, Begin), SM.getComposedLoc(File, End)),
3201ad15046SIlya Biryukov       /*IsTokenRange=*/false);
3211ad15046SIlya Biryukov }
3221ad15046SIlya Biryukov 
323e7230ea7SIlya Biryukov std::pair<const syntax::Token *, const TokenBuffer::Mapping *>
spelledForExpandedToken(const syntax::Token * Expanded) const324e7230ea7SIlya Biryukov TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
325e7230ea7SIlya Biryukov   assert(Expanded);
326e7230ea7SIlya Biryukov   assert(ExpandedTokens.data() <= Expanded &&
327e7230ea7SIlya Biryukov          Expanded < ExpandedTokens.data() + ExpandedTokens.size());
328e7230ea7SIlya Biryukov 
329e7230ea7SIlya Biryukov   auto FileIt = Files.find(
330e7230ea7SIlya Biryukov       SourceMgr->getFileID(SourceMgr->getExpansionLoc(Expanded->location())));
331e7230ea7SIlya Biryukov   assert(FileIt != Files.end() && "no file for an expanded token");
332e7230ea7SIlya Biryukov 
333e7230ea7SIlya Biryukov   const MarkedFile &File = FileIt->second;
334e7230ea7SIlya Biryukov 
335e7230ea7SIlya Biryukov   unsigned ExpandedIndex = Expanded - ExpandedTokens.data();
336e7230ea7SIlya Biryukov   // Find the first mapping that produced tokens after \p Expanded.
33778ee2fbfSFangrui Song   auto It = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
33878ee2fbfSFangrui Song     return M.BeginExpanded <= ExpandedIndex;
339e7230ea7SIlya Biryukov   });
340e7230ea7SIlya Biryukov   // Our token could only be produced by the previous mapping.
341e7230ea7SIlya Biryukov   if (It == File.Mappings.begin()) {
342e7230ea7SIlya Biryukov     // No previous mapping, no need to modify offsets.
3431bf055c9SMarcel Hlopko     return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded],
3441bf055c9SMarcel Hlopko             /*Mapping=*/nullptr};
345e7230ea7SIlya Biryukov   }
346e7230ea7SIlya Biryukov   --It; // 'It' now points to last mapping that started before our token.
347e7230ea7SIlya Biryukov 
348e7230ea7SIlya Biryukov   // Check if the token is part of the mapping.
349e7230ea7SIlya Biryukov   if (ExpandedIndex < It->EndExpanded)
3501bf055c9SMarcel Hlopko     return {&File.SpelledTokens[It->BeginSpelled], /*Mapping=*/&*It};
351e7230ea7SIlya Biryukov 
352e7230ea7SIlya Biryukov   // Not part of the mapping, use the index from previous mapping to compute the
353e7230ea7SIlya Biryukov   // corresponding spelled token.
354e7230ea7SIlya Biryukov   return {
355e7230ea7SIlya Biryukov       &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)],
3561bf055c9SMarcel Hlopko       /*Mapping=*/nullptr};
3571bf055c9SMarcel Hlopko }
3581bf055c9SMarcel Hlopko 
3591bf055c9SMarcel Hlopko const TokenBuffer::Mapping *
mappingStartingBeforeSpelled(const MarkedFile & F,const syntax::Token * Spelled)3601bf055c9SMarcel Hlopko TokenBuffer::mappingStartingBeforeSpelled(const MarkedFile &F,
3611bf055c9SMarcel Hlopko                                           const syntax::Token *Spelled) {
3621bf055c9SMarcel Hlopko   assert(F.SpelledTokens.data() <= Spelled);
3631bf055c9SMarcel Hlopko   unsigned SpelledI = Spelled - F.SpelledTokens.data();
3641bf055c9SMarcel Hlopko   assert(SpelledI < F.SpelledTokens.size());
3651bf055c9SMarcel Hlopko 
3661bf055c9SMarcel Hlopko   auto It = llvm::partition_point(F.Mappings, [SpelledI](const Mapping &M) {
3671bf055c9SMarcel Hlopko     return M.BeginSpelled <= SpelledI;
3681bf055c9SMarcel Hlopko   });
3691bf055c9SMarcel Hlopko   if (It == F.Mappings.begin())
3701bf055c9SMarcel Hlopko     return nullptr;
3711bf055c9SMarcel Hlopko   --It;
3721bf055c9SMarcel Hlopko   return &*It;
3731bf055c9SMarcel Hlopko }
3741bf055c9SMarcel Hlopko 
3751bf055c9SMarcel Hlopko llvm::SmallVector<llvm::ArrayRef<syntax::Token>, 1>
expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const3761bf055c9SMarcel Hlopko TokenBuffer::expandedForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
3771bf055c9SMarcel Hlopko   if (Spelled.empty())
3781bf055c9SMarcel Hlopko     return {};
379f0ab336eSSam McCall   const auto &File = fileForSpelled(Spelled);
3801bf055c9SMarcel Hlopko 
3811bf055c9SMarcel Hlopko   auto *FrontMapping = mappingStartingBeforeSpelled(File, &Spelled.front());
3821bf055c9SMarcel Hlopko   unsigned SpelledFrontI = &Spelled.front() - File.SpelledTokens.data();
3831bf055c9SMarcel Hlopko   assert(SpelledFrontI < File.SpelledTokens.size());
3841bf055c9SMarcel Hlopko   unsigned ExpandedBegin;
3851bf055c9SMarcel Hlopko   if (!FrontMapping) {
3861bf055c9SMarcel Hlopko     // No mapping that starts before the first token of Spelled, we don't have
3871bf055c9SMarcel Hlopko     // to modify offsets.
3881bf055c9SMarcel Hlopko     ExpandedBegin = File.BeginExpanded + SpelledFrontI;
3891bf055c9SMarcel Hlopko   } else if (SpelledFrontI < FrontMapping->EndSpelled) {
3901bf055c9SMarcel Hlopko     // This mapping applies to Spelled tokens.
3911bf055c9SMarcel Hlopko     if (SpelledFrontI != FrontMapping->BeginSpelled) {
3921bf055c9SMarcel Hlopko       // Spelled tokens don't cover the entire mapping, returning empty result.
3931bf055c9SMarcel Hlopko       return {}; // FIXME: support macro arguments.
3941bf055c9SMarcel Hlopko     }
3951bf055c9SMarcel Hlopko     // Spelled tokens start at the beginning of this mapping.
3961bf055c9SMarcel Hlopko     ExpandedBegin = FrontMapping->BeginExpanded;
3971bf055c9SMarcel Hlopko   } else {
3981bf055c9SMarcel Hlopko     // Spelled tokens start after the mapping ends (they start in the hole
3991bf055c9SMarcel Hlopko     // between 2 mappings, or between a mapping and end of the file).
4001bf055c9SMarcel Hlopko     ExpandedBegin =
4011bf055c9SMarcel Hlopko         FrontMapping->EndExpanded + (SpelledFrontI - FrontMapping->EndSpelled);
4021bf055c9SMarcel Hlopko   }
4031bf055c9SMarcel Hlopko 
4041bf055c9SMarcel Hlopko   auto *BackMapping = mappingStartingBeforeSpelled(File, &Spelled.back());
4051bf055c9SMarcel Hlopko   unsigned SpelledBackI = &Spelled.back() - File.SpelledTokens.data();
4061bf055c9SMarcel Hlopko   unsigned ExpandedEnd;
4071bf055c9SMarcel Hlopko   if (!BackMapping) {
4081bf055c9SMarcel Hlopko     // No mapping that starts before the last token of Spelled, we don't have to
4091bf055c9SMarcel Hlopko     // modify offsets.
4101bf055c9SMarcel Hlopko     ExpandedEnd = File.BeginExpanded + SpelledBackI + 1;
4111bf055c9SMarcel Hlopko   } else if (SpelledBackI < BackMapping->EndSpelled) {
4121bf055c9SMarcel Hlopko     // This mapping applies to Spelled tokens.
4131bf055c9SMarcel Hlopko     if (SpelledBackI + 1 != BackMapping->EndSpelled) {
4141bf055c9SMarcel Hlopko       // Spelled tokens don't cover the entire mapping, returning empty result.
4151bf055c9SMarcel Hlopko       return {}; // FIXME: support macro arguments.
4161bf055c9SMarcel Hlopko     }
4171bf055c9SMarcel Hlopko     ExpandedEnd = BackMapping->EndExpanded;
4181bf055c9SMarcel Hlopko   } else {
4191bf055c9SMarcel Hlopko     // Spelled tokens end after the mapping ends.
4201bf055c9SMarcel Hlopko     ExpandedEnd =
4211bf055c9SMarcel Hlopko         BackMapping->EndExpanded + (SpelledBackI - BackMapping->EndSpelled) + 1;
4221bf055c9SMarcel Hlopko   }
4231bf055c9SMarcel Hlopko 
4241bf055c9SMarcel Hlopko   assert(ExpandedBegin < ExpandedTokens.size());
4251bf055c9SMarcel Hlopko   assert(ExpandedEnd < ExpandedTokens.size());
4261bf055c9SMarcel Hlopko   // Avoid returning empty ranges.
4271bf055c9SMarcel Hlopko   if (ExpandedBegin == ExpandedEnd)
4281bf055c9SMarcel Hlopko     return {};
4291bf055c9SMarcel Hlopko   return {llvm::makeArrayRef(ExpandedTokens.data() + ExpandedBegin,
4301bf055c9SMarcel Hlopko                              ExpandedTokens.data() + ExpandedEnd)};
431e7230ea7SIlya Biryukov }
432e7230ea7SIlya Biryukov 
spelledTokens(FileID FID) const433e7230ea7SIlya Biryukov llvm::ArrayRef<syntax::Token> TokenBuffer::spelledTokens(FileID FID) const {
434e7230ea7SIlya Biryukov   auto It = Files.find(FID);
435e7230ea7SIlya Biryukov   assert(It != Files.end());
436e7230ea7SIlya Biryukov   return It->second.SpelledTokens;
437e7230ea7SIlya Biryukov }
438e7230ea7SIlya Biryukov 
spelledTokenAt(SourceLocation Loc) const439cd9b2e18SKadir Cetinkaya const syntax::Token *TokenBuffer::spelledTokenAt(SourceLocation Loc) const {
440cd9b2e18SKadir Cetinkaya   assert(Loc.isFileID());
441cd9b2e18SKadir Cetinkaya   const auto *Tok = llvm::partition_point(
442cd9b2e18SKadir Cetinkaya       spelledTokens(SourceMgr->getFileID(Loc)),
443cd9b2e18SKadir Cetinkaya       [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
444cd9b2e18SKadir Cetinkaya   if (!Tok || Tok->location() != Loc)
445cd9b2e18SKadir Cetinkaya     return nullptr;
446cd9b2e18SKadir Cetinkaya   return Tok;
447cd9b2e18SKadir Cetinkaya }
448cd9b2e18SKadir Cetinkaya 
str() const449e7230ea7SIlya Biryukov std::string TokenBuffer::Mapping::str() const {
450adcd0268SBenjamin Kramer   return std::string(
451adcd0268SBenjamin Kramer       llvm::formatv("spelled tokens: [{0},{1}), expanded tokens: [{2},{3})",
452adcd0268SBenjamin Kramer                     BeginSpelled, EndSpelled, BeginExpanded, EndExpanded));
453e7230ea7SIlya Biryukov }
454e7230ea7SIlya Biryukov 
455e7230ea7SIlya Biryukov llvm::Optional<llvm::ArrayRef<syntax::Token>>
spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const456e7230ea7SIlya Biryukov TokenBuffer::spelledForExpanded(llvm::ArrayRef<syntax::Token> Expanded) const {
457e7230ea7SIlya Biryukov   // Mapping an empty range is ambiguous in case of empty mappings at either end
458e7230ea7SIlya Biryukov   // of the range, bail out in that case.
459e7230ea7SIlya Biryukov   if (Expanded.empty())
460e7230ea7SIlya Biryukov     return llvm::None;
46127e075fcSSam McCall   const syntax::Token *First = &Expanded.front();
46227e075fcSSam McCall   const syntax::Token *Last = &Expanded.back();
463*02129eabSSam McCall   const syntax::Token *FirstSpelled, *LastSpelled;
464*02129eabSSam McCall   const TokenBuffer::Mapping *FirstMapping, *LastMapping;
465*02129eabSSam McCall   std::tie(FirstSpelled, FirstMapping) = spelledForExpandedToken(First);
466*02129eabSSam McCall   std::tie(LastSpelled, LastMapping) = spelledForExpandedToken(Last);
467e7230ea7SIlya Biryukov 
46827e075fcSSam McCall   FileID FID = SourceMgr->getFileID(FirstSpelled->location());
469e7230ea7SIlya Biryukov   // FIXME: Handle multi-file changes by trying to map onto a common root.
470e7230ea7SIlya Biryukov   if (FID != SourceMgr->getFileID(LastSpelled->location()))
471e7230ea7SIlya Biryukov     return llvm::None;
472e7230ea7SIlya Biryukov 
473e7230ea7SIlya Biryukov   const MarkedFile &File = Files.find(FID)->second;
474e7230ea7SIlya Biryukov 
47527e075fcSSam McCall   // If the range is within one macro argument, the result may be only part of a
47627e075fcSSam McCall   // Mapping. We must use the general (SourceManager-based) algorithm.
47727e075fcSSam McCall   if (FirstMapping && FirstMapping == LastMapping &&
47827e075fcSSam McCall       SourceMgr->isMacroArgExpansion(First->location()) &&
47927e075fcSSam McCall       SourceMgr->isMacroArgExpansion(Last->location())) {
48027e075fcSSam McCall     // We use excluded Prev/Next token for bounds checking.
48127e075fcSSam McCall     SourceLocation Prev = (First == &ExpandedTokens.front())
48227e075fcSSam McCall                               ? SourceLocation()
48327e075fcSSam McCall                               : (First - 1)->location();
48427e075fcSSam McCall     SourceLocation Next = (Last == &ExpandedTokens.back())
48527e075fcSSam McCall                               ? SourceLocation()
48627e075fcSSam McCall                               : (Last + 1)->location();
48727e075fcSSam McCall     SourceRange Range = spelledForExpandedSlow(
48827e075fcSSam McCall         First->location(), Last->location(), Prev, Next, FID, *SourceMgr);
48927e075fcSSam McCall     if (Range.isInvalid())
49027e075fcSSam McCall       return llvm::None;
49127e075fcSSam McCall     return getTokensCovering(File.SpelledTokens, Range, *SourceMgr);
4929619c2ccSKadir Cetinkaya   }
4939619c2ccSKadir Cetinkaya 
49427e075fcSSam McCall   // Otherwise, use the fast version based on Mappings.
4959619c2ccSKadir Cetinkaya   // Do not allow changes that doesn't cover full expansion.
49627e075fcSSam McCall   unsigned FirstExpanded = Expanded.begin() - ExpandedTokens.data();
49727e075fcSSam McCall   unsigned LastExpanded = Expanded.end() - ExpandedTokens.data();
49827e075fcSSam McCall   if (FirstMapping && FirstExpanded != FirstMapping->BeginExpanded)
499e7230ea7SIlya Biryukov     return llvm::None;
50027e075fcSSam McCall   if (LastMapping && LastMapping->EndExpanded != LastExpanded)
501e7230ea7SIlya Biryukov     return llvm::None;
502e7230ea7SIlya Biryukov   return llvm::makeArrayRef(
50327e075fcSSam McCall       FirstMapping ? File.SpelledTokens.data() + FirstMapping->BeginSpelled
50427e075fcSSam McCall                    : FirstSpelled,
505e7230ea7SIlya Biryukov       LastMapping ? File.SpelledTokens.data() + LastMapping->EndSpelled
506e7230ea7SIlya Biryukov                   : LastSpelled + 1);
507e7230ea7SIlya Biryukov }
508e7230ea7SIlya Biryukov 
makeExpansion(const MarkedFile & F,const Mapping & M) const509f0ab336eSSam McCall TokenBuffer::Expansion TokenBuffer::makeExpansion(const MarkedFile &F,
510f0ab336eSSam McCall                                                   const Mapping &M) const {
511f0ab336eSSam McCall   Expansion E;
512f0ab336eSSam McCall   E.Spelled = llvm::makeArrayRef(F.SpelledTokens.data() + M.BeginSpelled,
513f0ab336eSSam McCall                                  F.SpelledTokens.data() + M.EndSpelled);
514f0ab336eSSam McCall   E.Expanded = llvm::makeArrayRef(ExpandedTokens.data() + M.BeginExpanded,
515f0ab336eSSam McCall                                   ExpandedTokens.data() + M.EndExpanded);
516f0ab336eSSam McCall   return E;
517f0ab336eSSam McCall }
518f0ab336eSSam McCall 
519f0ab336eSSam McCall const TokenBuffer::MarkedFile &
fileForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const520f0ab336eSSam McCall TokenBuffer::fileForSpelled(llvm::ArrayRef<syntax::Token> Spelled) const {
521f0ab336eSSam McCall   assert(!Spelled.empty());
522f0ab336eSSam McCall   assert(Spelled.front().location().isFileID() && "not a spelled token");
523f0ab336eSSam McCall   auto FileIt = Files.find(SourceMgr->getFileID(Spelled.front().location()));
524f0ab336eSSam McCall   assert(FileIt != Files.end() && "file not tracked by token buffer");
525f0ab336eSSam McCall   const auto &File = FileIt->second;
526f0ab336eSSam McCall   assert(File.SpelledTokens.data() <= Spelled.data() &&
527f0ab336eSSam McCall          Spelled.end() <=
528f0ab336eSSam McCall              (File.SpelledTokens.data() + File.SpelledTokens.size()) &&
529f0ab336eSSam McCall          "Tokens not in spelled range");
530f0ab336eSSam McCall #ifndef NDEBUG
531f0ab336eSSam McCall   auto T1 = Spelled.back().location();
532f0ab336eSSam McCall   auto T2 = File.SpelledTokens.back().location();
533f0ab336eSSam McCall   assert(T1 == T2 || sourceManager().isBeforeInTranslationUnit(T1, T2));
534f0ab336eSSam McCall #endif
535f0ab336eSSam McCall   return File;
536f0ab336eSSam McCall }
537f0ab336eSSam McCall 
5385aed309aSIlya Biryukov llvm::Optional<TokenBuffer::Expansion>
expansionStartingAt(const syntax::Token * Spelled) const5395aed309aSIlya Biryukov TokenBuffer::expansionStartingAt(const syntax::Token *Spelled) const {
5405aed309aSIlya Biryukov   assert(Spelled);
541f0ab336eSSam McCall   const auto &File = fileForSpelled(*Spelled);
5425aed309aSIlya Biryukov 
5435aed309aSIlya Biryukov   unsigned SpelledIndex = Spelled - File.SpelledTokens.data();
54478ee2fbfSFangrui Song   auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
54578ee2fbfSFangrui Song     return M.BeginSpelled < SpelledIndex;
5465aed309aSIlya Biryukov   });
5475aed309aSIlya Biryukov   if (M == File.Mappings.end() || M->BeginSpelled != SpelledIndex)
5485aed309aSIlya Biryukov     return llvm::None;
549f0ab336eSSam McCall   return makeExpansion(File, *M);
5505aed309aSIlya Biryukov }
551f0ab336eSSam McCall 
expansionsOverlapping(llvm::ArrayRef<syntax::Token> Spelled) const552f0ab336eSSam McCall std::vector<TokenBuffer::Expansion> TokenBuffer::expansionsOverlapping(
553f0ab336eSSam McCall     llvm::ArrayRef<syntax::Token> Spelled) const {
554f0ab336eSSam McCall   if (Spelled.empty())
555f0ab336eSSam McCall     return {};
556f0ab336eSSam McCall   const auto &File = fileForSpelled(Spelled);
557f0ab336eSSam McCall 
558f0ab336eSSam McCall   // Find the first overlapping range, and then copy until we stop overlapping.
559f0ab336eSSam McCall   unsigned SpelledBeginIndex = Spelled.begin() - File.SpelledTokens.data();
560f0ab336eSSam McCall   unsigned SpelledEndIndex = Spelled.end() - File.SpelledTokens.data();
561f0ab336eSSam McCall   auto M = llvm::partition_point(File.Mappings, [&](const Mapping &M) {
562f0ab336eSSam McCall     return M.EndSpelled <= SpelledBeginIndex;
563f0ab336eSSam McCall   });
564f0ab336eSSam McCall   std::vector<TokenBuffer::Expansion> Expansions;
565f0ab336eSSam McCall   for (; M != File.Mappings.end() && M->BeginSpelled < SpelledEndIndex; ++M)
566f0ab336eSSam McCall     Expansions.push_back(makeExpansion(File, *M));
567f0ab336eSSam McCall   return Expansions;
568f0ab336eSSam McCall }
569f0ab336eSSam McCall 
5703f8da5d0SSam McCall llvm::ArrayRef<syntax::Token>
spelledTokensTouching(SourceLocation Loc,llvm::ArrayRef<syntax::Token> Tokens)5713f8da5d0SSam McCall syntax::spelledTokensTouching(SourceLocation Loc,
5726bfc45cfSKirill Bobyrev                               llvm::ArrayRef<syntax::Token> Tokens) {
5733f8da5d0SSam McCall   assert(Loc.isFileID());
5746bfc45cfSKirill Bobyrev 
5753f8da5d0SSam McCall   auto *Right = llvm::partition_point(
5766bfc45cfSKirill Bobyrev       Tokens, [&](const syntax::Token &Tok) { return Tok.location() < Loc; });
5776bfc45cfSKirill Bobyrev   bool AcceptRight = Right != Tokens.end() && Right->location() <= Loc;
5786bfc45cfSKirill Bobyrev   bool AcceptLeft =
5796bfc45cfSKirill Bobyrev       Right != Tokens.begin() && (Right - 1)->endLocation() >= Loc;
5803f8da5d0SSam McCall   return llvm::makeArrayRef(Right - (AcceptLeft ? 1 : 0),
5813f8da5d0SSam McCall                             Right + (AcceptRight ? 1 : 0));
5823f8da5d0SSam McCall }
5833f8da5d0SSam McCall 
5846bfc45cfSKirill Bobyrev llvm::ArrayRef<syntax::Token>
spelledTokensTouching(SourceLocation Loc,const syntax::TokenBuffer & Tokens)5856bfc45cfSKirill Bobyrev syntax::spelledTokensTouching(SourceLocation Loc,
5866bfc45cfSKirill Bobyrev                               const syntax::TokenBuffer &Tokens) {
5876bfc45cfSKirill Bobyrev   return spelledTokensTouching(
5886bfc45cfSKirill Bobyrev       Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
5896bfc45cfSKirill Bobyrev }
5906bfc45cfSKirill Bobyrev 
5913f8da5d0SSam McCall const syntax::Token *
spelledIdentifierTouching(SourceLocation Loc,llvm::ArrayRef<syntax::Token> Tokens)5923f8da5d0SSam McCall syntax::spelledIdentifierTouching(SourceLocation Loc,
5936bfc45cfSKirill Bobyrev                                   llvm::ArrayRef<syntax::Token> Tokens) {
5943f8da5d0SSam McCall   for (const syntax::Token &Tok : spelledTokensTouching(Loc, Tokens)) {
5953f8da5d0SSam McCall     if (Tok.kind() == tok::identifier)
5963f8da5d0SSam McCall       return &Tok;
5973f8da5d0SSam McCall   }
5983f8da5d0SSam McCall   return nullptr;
5993f8da5d0SSam McCall }
6003f8da5d0SSam McCall 
6016bfc45cfSKirill Bobyrev const syntax::Token *
spelledIdentifierTouching(SourceLocation Loc,const syntax::TokenBuffer & Tokens)6026bfc45cfSKirill Bobyrev syntax::spelledIdentifierTouching(SourceLocation Loc,
6036bfc45cfSKirill Bobyrev                                   const syntax::TokenBuffer &Tokens) {
6046bfc45cfSKirill Bobyrev   return spelledIdentifierTouching(
6056bfc45cfSKirill Bobyrev       Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)));
6066bfc45cfSKirill Bobyrev }
6076bfc45cfSKirill Bobyrev 
6086687fde0SJohan Vikstrom std::vector<const syntax::Token *>
macroExpansions(FileID FID) const6096687fde0SJohan Vikstrom TokenBuffer::macroExpansions(FileID FID) const {
6106687fde0SJohan Vikstrom   auto FileIt = Files.find(FID);
6116687fde0SJohan Vikstrom   assert(FileIt != Files.end() && "file not tracked by token buffer");
6126687fde0SJohan Vikstrom   auto &File = FileIt->second;
6136687fde0SJohan Vikstrom   std::vector<const syntax::Token *> Expansions;
6146687fde0SJohan Vikstrom   auto &Spelled = File.SpelledTokens;
6156687fde0SJohan Vikstrom   for (auto Mapping : File.Mappings) {
6166687fde0SJohan Vikstrom     const syntax::Token *Token = &Spelled[Mapping.BeginSpelled];
6176687fde0SJohan Vikstrom     if (Token->kind() == tok::TokenKind::identifier)
6186687fde0SJohan Vikstrom       Expansions.push_back(Token);
6196687fde0SJohan Vikstrom   }
6206687fde0SJohan Vikstrom   return Expansions;
6216687fde0SJohan Vikstrom }
6226687fde0SJohan Vikstrom 
tokenize(const FileRange & FR,const SourceManager & SM,const LangOptions & LO)6238c2cf499SKadir Cetinkaya std::vector<syntax::Token> syntax::tokenize(const FileRange &FR,
6248c2cf499SKadir Cetinkaya                                             const SourceManager &SM,
625e7230ea7SIlya Biryukov                                             const LangOptions &LO) {
626e7230ea7SIlya Biryukov   std::vector<syntax::Token> Tokens;
627e7230ea7SIlya Biryukov   IdentifierTable Identifiers(LO);
628e7230ea7SIlya Biryukov   auto AddToken = [&](clang::Token T) {
629e7230ea7SIlya Biryukov     // Fill the proper token kind for keywords, etc.
630e7230ea7SIlya Biryukov     if (T.getKind() == tok::raw_identifier && !T.needsCleaning() &&
631e7230ea7SIlya Biryukov         !T.hasUCN()) { // FIXME: support needsCleaning and hasUCN cases.
632e7230ea7SIlya Biryukov       clang::IdentifierInfo &II = Identifiers.get(T.getRawIdentifier());
633e7230ea7SIlya Biryukov       T.setIdentifierInfo(&II);
634e7230ea7SIlya Biryukov       T.setKind(II.getTokenID());
635e7230ea7SIlya Biryukov     }
636e7230ea7SIlya Biryukov     Tokens.push_back(syntax::Token(T));
637e7230ea7SIlya Biryukov   };
638e7230ea7SIlya Biryukov 
6398c2cf499SKadir Cetinkaya   auto SrcBuffer = SM.getBufferData(FR.file());
6408c2cf499SKadir Cetinkaya   Lexer L(SM.getLocForStartOfFile(FR.file()), LO, SrcBuffer.data(),
6418c2cf499SKadir Cetinkaya           SrcBuffer.data() + FR.beginOffset(),
6428c2cf499SKadir Cetinkaya           // We can't make BufEnd point to FR.endOffset, as Lexer requires a
6438c2cf499SKadir Cetinkaya           // null terminated buffer.
6448c2cf499SKadir Cetinkaya           SrcBuffer.data() + SrcBuffer.size());
645e7230ea7SIlya Biryukov 
646e7230ea7SIlya Biryukov   clang::Token T;
6478c2cf499SKadir Cetinkaya   while (!L.LexFromRawLexer(T) && L.getCurrentBufferOffset() < FR.endOffset())
648e7230ea7SIlya Biryukov     AddToken(T);
6498c2cf499SKadir Cetinkaya   // LexFromRawLexer returns true when it parses the last token of the file, add
6508c2cf499SKadir Cetinkaya   // it iff it starts within the range we are interested in.
6518c2cf499SKadir Cetinkaya   if (SM.getFileOffset(T.getLocation()) < FR.endOffset())
652e7230ea7SIlya Biryukov     AddToken(T);
653e7230ea7SIlya Biryukov   return Tokens;
654e7230ea7SIlya Biryukov }
655e7230ea7SIlya Biryukov 
tokenize(FileID FID,const SourceManager & SM,const LangOptions & LO)6568c2cf499SKadir Cetinkaya std::vector<syntax::Token> syntax::tokenize(FileID FID, const SourceManager &SM,
6578c2cf499SKadir Cetinkaya                                             const LangOptions &LO) {
6588c2cf499SKadir Cetinkaya   return tokenize(syntax::FileRange(FID, 0, SM.getFileIDSize(FID)), SM, LO);
6598c2cf499SKadir Cetinkaya }
6608c2cf499SKadir Cetinkaya 
6615e69f27eSIlya Biryukov /// Records information reqired to construct mappings for the token buffer that
6625e69f27eSIlya Biryukov /// we are collecting.
6635e69f27eSIlya Biryukov class TokenCollector::CollectPPExpansions : public PPCallbacks {
6645e69f27eSIlya Biryukov public:
CollectPPExpansions(TokenCollector & C)6655e69f27eSIlya Biryukov   CollectPPExpansions(TokenCollector &C) : Collector(&C) {}
6665e69f27eSIlya Biryukov 
6675e69f27eSIlya Biryukov   /// Disabled instance will stop reporting anything to TokenCollector.
6685e69f27eSIlya Biryukov   /// This ensures that uses of the preprocessor after TokenCollector::consume()
6695e69f27eSIlya Biryukov   /// is called do not access the (possibly invalid) collector instance.
disable()6705e69f27eSIlya Biryukov   void disable() { Collector = nullptr; }
6715e69f27eSIlya Biryukov 
MacroExpands(const clang::Token & MacroNameTok,const MacroDefinition & MD,SourceRange Range,const MacroArgs * Args)6725e69f27eSIlya Biryukov   void MacroExpands(const clang::Token &MacroNameTok, const MacroDefinition &MD,
6735e69f27eSIlya Biryukov                     SourceRange Range, const MacroArgs *Args) override {
6745e69f27eSIlya Biryukov     if (!Collector)
6755e69f27eSIlya Biryukov       return;
676d66afd6dSSam McCall     const auto &SM = Collector->PP.getSourceManager();
677d66afd6dSSam McCall     // Only record top-level expansions that directly produce expanded tokens.
678d66afd6dSSam McCall     // This excludes those where:
6795e69f27eSIlya Biryukov     //   - the macro use is inside a macro body,
6805e69f27eSIlya Biryukov     //   - the macro appears in an argument to another macro.
681d66afd6dSSam McCall     // However macro expansion isn't really a tree, it's token rewrite rules,
682d66afd6dSSam McCall     // so there are other cases, e.g.
683d66afd6dSSam McCall     //   #define B(X) X
684d66afd6dSSam McCall     //   #define A 1 + B
685d66afd6dSSam McCall     //   A(2)
686d66afd6dSSam McCall     // Both A and B produce expanded tokens, though the macro name 'B' comes
687d66afd6dSSam McCall     // from an expansion. The best we can do is merge the mappings for both.
688d66afd6dSSam McCall 
689d66afd6dSSam McCall     // The *last* token of any top-level macro expansion must be in a file.
690d66afd6dSSam McCall     // (In the example above, see the closing paren of the expansion of B).
691d66afd6dSSam McCall     if (!Range.getEnd().isFileID())
6925e69f27eSIlya Biryukov       return;
693d66afd6dSSam McCall     // If there's a current expansion that encloses this one, this one can't be
694d66afd6dSSam McCall     // top-level.
695d66afd6dSSam McCall     if (LastExpansionEnd.isValid() &&
696d66afd6dSSam McCall         !SM.isBeforeInTranslationUnit(LastExpansionEnd, Range.getEnd()))
697d66afd6dSSam McCall       return;
698d66afd6dSSam McCall 
699d66afd6dSSam McCall     // If the macro invocation (B) starts in a macro (A) but ends in a file,
700d66afd6dSSam McCall     // we'll create a merged mapping for A + B by overwriting the endpoint for
701d66afd6dSSam McCall     // A's startpoint.
702d66afd6dSSam McCall     if (!Range.getBegin().isFileID()) {
703d66afd6dSSam McCall       Range.setBegin(SM.getExpansionLoc(Range.getBegin()));
70478194118SMikhail Maltsev       assert(Collector->Expansions.count(Range.getBegin()) &&
705d66afd6dSSam McCall              "Overlapping macros should have same expansion location");
706d66afd6dSSam McCall     }
707d66afd6dSSam McCall 
70878194118SMikhail Maltsev     Collector->Expansions[Range.getBegin()] = Range.getEnd();
7095e69f27eSIlya Biryukov     LastExpansionEnd = Range.getEnd();
7105e69f27eSIlya Biryukov   }
7115e69f27eSIlya Biryukov   // FIXME: handle directives like #pragma, #include, etc.
7125e69f27eSIlya Biryukov private:
7135e69f27eSIlya Biryukov   TokenCollector *Collector;
7145e69f27eSIlya Biryukov   /// Used to detect recursive macro expansions.
7155e69f27eSIlya Biryukov   SourceLocation LastExpansionEnd;
7165e69f27eSIlya Biryukov };
7175e69f27eSIlya Biryukov 
718e7230ea7SIlya Biryukov /// Fills in the TokenBuffer by tracing the run of a preprocessor. The
719e7230ea7SIlya Biryukov /// implementation tracks the tokens, macro expansions and directives coming
720e7230ea7SIlya Biryukov /// from the preprocessor and:
721e7230ea7SIlya Biryukov /// - for each token, figures out if it is a part of an expanded token stream,
722e7230ea7SIlya Biryukov ///   spelled token stream or both. Stores the tokens appropriately.
723e7230ea7SIlya Biryukov /// - records mappings from the spelled to expanded token ranges, e.g. for macro
724e7230ea7SIlya Biryukov ///   expansions.
725e7230ea7SIlya Biryukov /// FIXME: also properly record:
726e7230ea7SIlya Biryukov ///          - #include directives,
727e7230ea7SIlya Biryukov ///          - #pragma, #line and other PP directives,
728e7230ea7SIlya Biryukov ///          - skipped pp regions,
729e7230ea7SIlya Biryukov ///          - ...
730e7230ea7SIlya Biryukov 
TokenCollector(Preprocessor & PP)731e7230ea7SIlya Biryukov TokenCollector::TokenCollector(Preprocessor &PP) : PP(PP) {
732e7230ea7SIlya Biryukov   // Collect the expanded token stream during preprocessing.
733e7230ea7SIlya Biryukov   PP.setTokenWatcher([this](const clang::Token &T) {
734e7230ea7SIlya Biryukov     if (T.isAnnotation())
735e7230ea7SIlya Biryukov       return;
736e7230ea7SIlya Biryukov     DEBUG_WITH_TYPE("collect-tokens", llvm::dbgs()
737e7230ea7SIlya Biryukov                                           << "Token: "
738e7230ea7SIlya Biryukov                                           << syntax::Token(T).dumpForTests(
739e7230ea7SIlya Biryukov                                                  this->PP.getSourceManager())
740e7230ea7SIlya Biryukov                                           << "\n"
741e7230ea7SIlya Biryukov 
742e7230ea7SIlya Biryukov     );
743e7230ea7SIlya Biryukov     Expanded.push_back(syntax::Token(T));
744e7230ea7SIlya Biryukov   });
7455e69f27eSIlya Biryukov   // And locations of macro calls, to properly recover boundaries of those in
7465e69f27eSIlya Biryukov   // case of empty expansions.
7472b3d49b6SJonas Devlieghere   auto CB = std::make_unique<CollectPPExpansions>(*this);
7485e69f27eSIlya Biryukov   this->Collector = CB.get();
7495e69f27eSIlya Biryukov   PP.addPPCallbacks(std::move(CB));
750e7230ea7SIlya Biryukov }
751e7230ea7SIlya Biryukov 
752e7230ea7SIlya Biryukov /// Builds mappings and spelled tokens in the TokenBuffer based on the expanded
753e7230ea7SIlya Biryukov /// token stream.
754e7230ea7SIlya Biryukov class TokenCollector::Builder {
755e7230ea7SIlya Biryukov public:
Builder(std::vector<syntax::Token> Expanded,PPExpansions CollectedExpansions,const SourceManager & SM,const LangOptions & LangOpts)7565e69f27eSIlya Biryukov   Builder(std::vector<syntax::Token> Expanded, PPExpansions CollectedExpansions,
7575e69f27eSIlya Biryukov           const SourceManager &SM, const LangOptions &LangOpts)
7585e69f27eSIlya Biryukov       : Result(SM), CollectedExpansions(std::move(CollectedExpansions)), SM(SM),
7595e69f27eSIlya Biryukov         LangOpts(LangOpts) {
760e7230ea7SIlya Biryukov     Result.ExpandedTokens = std::move(Expanded);
761e7230ea7SIlya Biryukov   }
762e7230ea7SIlya Biryukov 
build()763e7230ea7SIlya Biryukov   TokenBuffer build() && {
764e7230ea7SIlya Biryukov     assert(!Result.ExpandedTokens.empty());
765e7230ea7SIlya Biryukov     assert(Result.ExpandedTokens.back().kind() == tok::eof);
766ec0b9908SSam McCall 
767ec0b9908SSam McCall     // Tokenize every file that contributed tokens to the expanded stream.
768ec0b9908SSam McCall     buildSpelledTokens();
769ec0b9908SSam McCall 
770ec0b9908SSam McCall     // The expanded token stream consists of runs of tokens that came from
771ec0b9908SSam McCall     // the same source (a macro expansion, part of a file etc).
772ec0b9908SSam McCall     // Between these runs are the logical positions of spelled tokens that
773ec0b9908SSam McCall     // didn't expand to anything.
774ec0b9908SSam McCall     while (NextExpanded < Result.ExpandedTokens.size() - 1 /* eof */) {
775ec0b9908SSam McCall       // Create empty mappings for spelled tokens that expanded to nothing here.
776ec0b9908SSam McCall       // May advance NextSpelled, but NextExpanded is unchanged.
777ec0b9908SSam McCall       discard();
778ec0b9908SSam McCall       // Create mapping for a contiguous run of expanded tokens.
779ec0b9908SSam McCall       // Advances NextExpanded past the run, and NextSpelled accordingly.
780ec0b9908SSam McCall       unsigned OldPosition = NextExpanded;
781ec0b9908SSam McCall       advance();
782ec0b9908SSam McCall       if (NextExpanded == OldPosition)
783ec0b9908SSam McCall         diagnoseAdvanceFailure();
784e7230ea7SIlya Biryukov     }
785ec0b9908SSam McCall     // If any tokens remain in any of the files, they didn't expand to anything.
786ec0b9908SSam McCall     // Create empty mappings up until the end of the file.
787ec0b9908SSam McCall     for (const auto &File : Result.Files)
788ec0b9908SSam McCall       discard(File.first);
789e7230ea7SIlya Biryukov 
7901bf055c9SMarcel Hlopko #ifndef NDEBUG
7911bf055c9SMarcel Hlopko     for (auto &pair : Result.Files) {
7921bf055c9SMarcel Hlopko       auto &mappings = pair.second.Mappings;
7931647ff6eSGeorgii Rymar       assert(llvm::is_sorted(mappings, [](const TokenBuffer::Mapping &M1,
7941647ff6eSGeorgii Rymar                                           const TokenBuffer::Mapping &M2) {
7951bf055c9SMarcel Hlopko         return M1.BeginSpelled < M2.BeginSpelled &&
7961bf055c9SMarcel Hlopko                M1.EndSpelled < M2.EndSpelled &&
7971bf055c9SMarcel Hlopko                M1.BeginExpanded < M2.BeginExpanded &&
7981bf055c9SMarcel Hlopko                M1.EndExpanded < M2.EndExpanded;
7991bf055c9SMarcel Hlopko       }));
8001bf055c9SMarcel Hlopko     }
8011bf055c9SMarcel Hlopko #endif
8021bf055c9SMarcel Hlopko 
803e7230ea7SIlya Biryukov     return std::move(Result);
804e7230ea7SIlya Biryukov   }
805e7230ea7SIlya Biryukov 
806e7230ea7SIlya Biryukov private:
807ec0b9908SSam McCall   // Consume a sequence of spelled tokens that didn't expand to anything.
808ec0b9908SSam McCall   // In the simplest case, skips spelled tokens until finding one that produced
809ec0b9908SSam McCall   // the NextExpanded token, and creates an empty mapping for them.
810ec0b9908SSam McCall   // If Drain is provided, skips remaining tokens from that file instead.
discard(llvm::Optional<FileID> Drain=llvm::None)811ec0b9908SSam McCall   void discard(llvm::Optional<FileID> Drain = llvm::None) {
812ec0b9908SSam McCall     SourceLocation Target =
813ec0b9908SSam McCall         Drain ? SM.getLocForEndOfFile(*Drain)
814ec0b9908SSam McCall               : SM.getExpansionLoc(
815ec0b9908SSam McCall                     Result.ExpandedTokens[NextExpanded].location());
816ec0b9908SSam McCall     FileID File = SM.getFileID(Target);
817ec0b9908SSam McCall     const auto &SpelledTokens = Result.Files[File].SpelledTokens;
818ec0b9908SSam McCall     auto &NextSpelled = this->NextSpelled[File];
819ec0b9908SSam McCall 
820ec0b9908SSam McCall     TokenBuffer::Mapping Mapping;
821ec0b9908SSam McCall     Mapping.BeginSpelled = NextSpelled;
822ec0b9908SSam McCall     // When dropping trailing tokens from a file, the empty mapping should
823ec0b9908SSam McCall     // be positioned within the file's expanded-token range (at the end).
824ec0b9908SSam McCall     Mapping.BeginExpanded = Mapping.EndExpanded =
825ec0b9908SSam McCall         Drain ? Result.Files[*Drain].EndExpanded : NextExpanded;
826ec0b9908SSam McCall     // We may want to split into several adjacent empty mappings.
827ec0b9908SSam McCall     // FlushMapping() emits the current mapping and starts a new one.
828ec0b9908SSam McCall     auto FlushMapping = [&, this] {
829ec0b9908SSam McCall       Mapping.EndSpelled = NextSpelled;
830ec0b9908SSam McCall       if (Mapping.BeginSpelled != Mapping.EndSpelled)
831ec0b9908SSam McCall         Result.Files[File].Mappings.push_back(Mapping);
832ec0b9908SSam McCall       Mapping.BeginSpelled = NextSpelled;
833ec0b9908SSam McCall     };
834ec0b9908SSam McCall 
835ec0b9908SSam McCall     while (NextSpelled < SpelledTokens.size() &&
836ec0b9908SSam McCall            SpelledTokens[NextSpelled].location() < Target) {
837ec0b9908SSam McCall       // If we know mapping bounds at [NextSpelled, KnownEnd] (macro expansion)
838ec0b9908SSam McCall       // then we want to partition our (empty) mapping.
839ec0b9908SSam McCall       //   [Start, NextSpelled) [NextSpelled, KnownEnd] (KnownEnd, Target)
84078194118SMikhail Maltsev       SourceLocation KnownEnd =
84178194118SMikhail Maltsev           CollectedExpansions.lookup(SpelledTokens[NextSpelled].location());
842ec0b9908SSam McCall       if (KnownEnd.isValid()) {
843ec0b9908SSam McCall         FlushMapping(); // Emits [Start, NextSpelled)
844ec0b9908SSam McCall         while (NextSpelled < SpelledTokens.size() &&
845ec0b9908SSam McCall                SpelledTokens[NextSpelled].location() <= KnownEnd)
846ec0b9908SSam McCall           ++NextSpelled;
847ec0b9908SSam McCall         FlushMapping(); // Emits [NextSpelled, KnownEnd]
848ec0b9908SSam McCall         // Now the loop contitues and will emit (KnownEnd, Target).
849ec0b9908SSam McCall       } else {
850ec0b9908SSam McCall         ++NextSpelled;
851e7230ea7SIlya Biryukov       }
852ec0b9908SSam McCall     }
853ec0b9908SSam McCall     FlushMapping();
854ec0b9908SSam McCall   }
855e7230ea7SIlya Biryukov 
856ec0b9908SSam McCall   // Consumes the NextExpanded token and others that are part of the same run.
857ec0b9908SSam McCall   // Increases NextExpanded and NextSpelled by at least one, and adds a mapping
858ec0b9908SSam McCall   // (unless this is a run of file tokens, which we represent with no mapping).
advance()859ec0b9908SSam McCall   void advance() {
860ec0b9908SSam McCall     const syntax::Token &Tok = Result.ExpandedTokens[NextExpanded];
861ec0b9908SSam McCall     SourceLocation Expansion = SM.getExpansionLoc(Tok.location());
862ec0b9908SSam McCall     FileID File = SM.getFileID(Expansion);
863ec0b9908SSam McCall     const auto &SpelledTokens = Result.Files[File].SpelledTokens;
864ec0b9908SSam McCall     auto &NextSpelled = this->NextSpelled[File];
865e7230ea7SIlya Biryukov 
866ec0b9908SSam McCall     if (Tok.location().isFileID()) {
867ec0b9908SSam McCall       // A run of file tokens continues while the expanded/spelled tokens match.
868ec0b9908SSam McCall       while (NextSpelled < SpelledTokens.size() &&
869ec0b9908SSam McCall              NextExpanded < Result.ExpandedTokens.size() &&
870ec0b9908SSam McCall              SpelledTokens[NextSpelled].location() ==
871ec0b9908SSam McCall                  Result.ExpandedTokens[NextExpanded].location()) {
872ec0b9908SSam McCall         ++NextSpelled;
873ec0b9908SSam McCall         ++NextExpanded;
874ec0b9908SSam McCall       }
875ec0b9908SSam McCall       // We need no mapping for file tokens copied to the expanded stream.
876ec0b9908SSam McCall     } else {
877ec0b9908SSam McCall       // We found a new macro expansion. We should have its spelling bounds.
87878194118SMikhail Maltsev       auto End = CollectedExpansions.lookup(Expansion);
879ec0b9908SSam McCall       assert(End.isValid() && "Macro expansion wasn't captured?");
880ec0b9908SSam McCall 
881ec0b9908SSam McCall       // Mapping starts here...
882ec0b9908SSam McCall       TokenBuffer::Mapping Mapping;
883ec0b9908SSam McCall       Mapping.BeginExpanded = NextExpanded;
884ec0b9908SSam McCall       Mapping.BeginSpelled = NextSpelled;
885ec0b9908SSam McCall       // ... consumes spelled tokens within bounds we captured ...
886ec0b9908SSam McCall       while (NextSpelled < SpelledTokens.size() &&
887ec0b9908SSam McCall              SpelledTokens[NextSpelled].location() <= End)
888ec0b9908SSam McCall         ++NextSpelled;
889ec0b9908SSam McCall       // ... consumes expanded tokens rooted at the same expansion ...
890ec0b9908SSam McCall       while (NextExpanded < Result.ExpandedTokens.size() &&
891ec0b9908SSam McCall              SM.getExpansionLoc(
892ec0b9908SSam McCall                  Result.ExpandedTokens[NextExpanded].location()) == Expansion)
893ec0b9908SSam McCall         ++NextExpanded;
894ec0b9908SSam McCall       // ... and ends here.
895ec0b9908SSam McCall       Mapping.EndExpanded = NextExpanded;
896ec0b9908SSam McCall       Mapping.EndSpelled = NextSpelled;
897ec0b9908SSam McCall       Result.Files[File].Mappings.push_back(Mapping);
898e7230ea7SIlya Biryukov     }
899e7230ea7SIlya Biryukov   }
900e7230ea7SIlya Biryukov 
901ec0b9908SSam McCall   // advance() is supposed to consume at least one token - if not, we crash.
diagnoseAdvanceFailure()902ec0b9908SSam McCall   void diagnoseAdvanceFailure() {
903ec0b9908SSam McCall #ifndef NDEBUG
904ec0b9908SSam McCall     // Show the failed-to-map token in context.
905ec0b9908SSam McCall     for (unsigned I = (NextExpanded < 10) ? 0 : NextExpanded - 10;
906ec0b9908SSam McCall          I < NextExpanded + 5 && I < Result.ExpandedTokens.size(); ++I) {
907ec0b9908SSam McCall       const char *L =
908ec0b9908SSam McCall           (I == NextExpanded) ? "!! " : (I < NextExpanded) ? "ok " : "   ";
909ec0b9908SSam McCall       llvm::errs() << L << Result.ExpandedTokens[I].dumpForTests(SM) << "\n";
910e7230ea7SIlya Biryukov     }
911ec0b9908SSam McCall #endif
912ec0b9908SSam McCall     llvm_unreachable("Couldn't map expanded token to spelled tokens!");
913e7230ea7SIlya Biryukov   }
914e7230ea7SIlya Biryukov 
915e7230ea7SIlya Biryukov   /// Initializes TokenBuffer::Files and fills spelled tokens and expanded
916e7230ea7SIlya Biryukov   /// ranges for each of the files.
buildSpelledTokens()917e7230ea7SIlya Biryukov   void buildSpelledTokens() {
918e7230ea7SIlya Biryukov     for (unsigned I = 0; I < Result.ExpandedTokens.size(); ++I) {
919ec0b9908SSam McCall       const auto &Tok = Result.ExpandedTokens[I];
920ec0b9908SSam McCall       auto FID = SM.getFileID(SM.getExpansionLoc(Tok.location()));
921e7230ea7SIlya Biryukov       auto It = Result.Files.try_emplace(FID);
922e7230ea7SIlya Biryukov       TokenBuffer::MarkedFile &File = It.first->second;
923e7230ea7SIlya Biryukov 
924ec0b9908SSam McCall       // The eof token should not be considered part of the main-file's range.
925ec0b9908SSam McCall       File.EndExpanded = Tok.kind() == tok::eof ? I : I + 1;
926ec0b9908SSam McCall 
927e7230ea7SIlya Biryukov       if (!It.second)
928e7230ea7SIlya Biryukov         continue; // we have seen this file before.
929e7230ea7SIlya Biryukov       // This is the first time we see this file.
930e7230ea7SIlya Biryukov       File.BeginExpanded = I;
931e7230ea7SIlya Biryukov       File.SpelledTokens = tokenize(FID, SM, LangOpts);
932e7230ea7SIlya Biryukov     }
933e7230ea7SIlya Biryukov   }
934e7230ea7SIlya Biryukov 
935e7230ea7SIlya Biryukov   TokenBuffer Result;
936ec0b9908SSam McCall   unsigned NextExpanded = 0;                    // cursor in ExpandedTokens
937ec0b9908SSam McCall   llvm::DenseMap<FileID, unsigned> NextSpelled; // cursor in SpelledTokens
9385e69f27eSIlya Biryukov   PPExpansions CollectedExpansions;
939e7230ea7SIlya Biryukov   const SourceManager &SM;
940e7230ea7SIlya Biryukov   const LangOptions &LangOpts;
941e7230ea7SIlya Biryukov };
942e7230ea7SIlya Biryukov 
consume()943e7230ea7SIlya Biryukov TokenBuffer TokenCollector::consume() && {
944e7230ea7SIlya Biryukov   PP.setTokenWatcher(nullptr);
9455e69f27eSIlya Biryukov   Collector->disable();
9465e69f27eSIlya Biryukov   return Builder(std::move(Expanded), std::move(Expansions),
9475e69f27eSIlya Biryukov                  PP.getSourceManager(), PP.getLangOpts())
948e7230ea7SIlya Biryukov       .build();
949e7230ea7SIlya Biryukov }
950e7230ea7SIlya Biryukov 
str() const951e7230ea7SIlya Biryukov std::string syntax::Token::str() const {
952adcd0268SBenjamin Kramer   return std::string(llvm::formatv("Token({0}, length = {1})",
953adcd0268SBenjamin Kramer                                    tok::getTokenName(kind()), length()));
954e7230ea7SIlya Biryukov }
955e7230ea7SIlya Biryukov 
dumpForTests(const SourceManager & SM) const956e7230ea7SIlya Biryukov std::string syntax::Token::dumpForTests(const SourceManager &SM) const {
957cdce2fe5SMarcel Hlopko   return std::string(llvm::formatv("Token(`{0}`, {1}, length = {2})", text(SM),
958cdce2fe5SMarcel Hlopko                                    tok::getTokenName(kind()), length()));
959e7230ea7SIlya Biryukov }
960e7230ea7SIlya Biryukov 
dumpForTests() const961e7230ea7SIlya Biryukov std::string TokenBuffer::dumpForTests() const {
962e7230ea7SIlya Biryukov   auto PrintToken = [this](const syntax::Token &T) -> std::string {
963e7230ea7SIlya Biryukov     if (T.kind() == tok::eof)
964e7230ea7SIlya Biryukov       return "<eof>";
965adcd0268SBenjamin Kramer     return std::string(T.text(*SourceMgr));
966e7230ea7SIlya Biryukov   };
967e7230ea7SIlya Biryukov 
968e7230ea7SIlya Biryukov   auto DumpTokens = [this, &PrintToken](llvm::raw_ostream &OS,
969e7230ea7SIlya Biryukov                                         llvm::ArrayRef<syntax::Token> Tokens) {
97026c066d6SIlya Biryukov     if (Tokens.empty()) {
971e7230ea7SIlya Biryukov       OS << "<empty>";
972e7230ea7SIlya Biryukov       return;
973e7230ea7SIlya Biryukov     }
974e7230ea7SIlya Biryukov     OS << Tokens[0].text(*SourceMgr);
975e7230ea7SIlya Biryukov     for (unsigned I = 1; I < Tokens.size(); ++I) {
976e7230ea7SIlya Biryukov       if (Tokens[I].kind() == tok::eof)
977e7230ea7SIlya Biryukov         continue;
978e7230ea7SIlya Biryukov       OS << " " << PrintToken(Tokens[I]);
979e7230ea7SIlya Biryukov     }
980e7230ea7SIlya Biryukov   };
981e7230ea7SIlya Biryukov 
982e7230ea7SIlya Biryukov   std::string Dump;
983e7230ea7SIlya Biryukov   llvm::raw_string_ostream OS(Dump);
984e7230ea7SIlya Biryukov 
985e7230ea7SIlya Biryukov   OS << "expanded tokens:\n"
986e7230ea7SIlya Biryukov      << "  ";
98726c066d6SIlya Biryukov   // (!) we do not show '<eof>'.
98826c066d6SIlya Biryukov   DumpTokens(OS, llvm::makeArrayRef(ExpandedTokens).drop_back());
989e7230ea7SIlya Biryukov   OS << "\n";
990e7230ea7SIlya Biryukov 
991e7230ea7SIlya Biryukov   std::vector<FileID> Keys;
992e7230ea7SIlya Biryukov   for (auto F : Files)
993e7230ea7SIlya Biryukov     Keys.push_back(F.first);
994e7230ea7SIlya Biryukov   llvm::sort(Keys);
995e7230ea7SIlya Biryukov 
996e7230ea7SIlya Biryukov   for (FileID ID : Keys) {
997e7230ea7SIlya Biryukov     const MarkedFile &File = Files.find(ID)->second;
998e7230ea7SIlya Biryukov     auto *Entry = SourceMgr->getFileEntryForID(ID);
999e7230ea7SIlya Biryukov     if (!Entry)
1000e7230ea7SIlya Biryukov       continue; // Skip builtin files.
1001e7230ea7SIlya Biryukov     OS << llvm::formatv("file '{0}'\n", Entry->getName())
1002e7230ea7SIlya Biryukov        << "  spelled tokens:\n"
1003e7230ea7SIlya Biryukov        << "    ";
1004e7230ea7SIlya Biryukov     DumpTokens(OS, File.SpelledTokens);
1005e7230ea7SIlya Biryukov     OS << "\n";
1006e7230ea7SIlya Biryukov 
1007e7230ea7SIlya Biryukov     if (File.Mappings.empty()) {
1008e7230ea7SIlya Biryukov       OS << "  no mappings.\n";
1009e7230ea7SIlya Biryukov       continue;
1010e7230ea7SIlya Biryukov     }
1011e7230ea7SIlya Biryukov     OS << "  mappings:\n";
1012e7230ea7SIlya Biryukov     for (auto &M : File.Mappings) {
1013e7230ea7SIlya Biryukov       OS << llvm::formatv(
1014e7230ea7SIlya Biryukov           "    ['{0}'_{1}, '{2}'_{3}) => ['{4}'_{5}, '{6}'_{7})\n",
1015e7230ea7SIlya Biryukov           PrintToken(File.SpelledTokens[M.BeginSpelled]), M.BeginSpelled,
1016e7230ea7SIlya Biryukov           M.EndSpelled == File.SpelledTokens.size()
1017e7230ea7SIlya Biryukov               ? "<eof>"
1018e7230ea7SIlya Biryukov               : PrintToken(File.SpelledTokens[M.EndSpelled]),
1019e7230ea7SIlya Biryukov           M.EndSpelled, PrintToken(ExpandedTokens[M.BeginExpanded]),
1020e7230ea7SIlya Biryukov           M.BeginExpanded, PrintToken(ExpandedTokens[M.EndExpanded]),
1021e7230ea7SIlya Biryukov           M.EndExpanded);
1022e7230ea7SIlya Biryukov     }
1023e7230ea7SIlya Biryukov   }
10245336befeSLogan Smith   return Dump;
1025e7230ea7SIlya Biryukov }
1026