1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file contains the implementation of the UnwrappedLineParser,
12 /// which turns a stream of tokens into UnwrappedLines.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #include "UnwrappedLineParser.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20 
21 #define DEBUG_TYPE "format-parser"
22 
23 namespace clang {
24 namespace format {
25 
26 class FormatTokenSource {
27 public:
28   virtual ~FormatTokenSource() {}
29   virtual FormatToken *getNextToken() = 0;
30 
31   virtual unsigned getPosition() = 0;
32   virtual FormatToken *setPosition(unsigned Position) = 0;
33 };
34 
35 namespace {
36 
37 class ScopedDeclarationState {
38 public:
39   ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
40                          bool MustBeDeclaration)
41       : Line(Line), Stack(Stack) {
42     Line.MustBeDeclaration = MustBeDeclaration;
43     Stack.push_back(MustBeDeclaration);
44   }
45   ~ScopedDeclarationState() {
46     Stack.pop_back();
47     if (!Stack.empty())
48       Line.MustBeDeclaration = Stack.back();
49     else
50       Line.MustBeDeclaration = true;
51   }
52 
53 private:
54   UnwrappedLine &Line;
55   std::vector<bool> &Stack;
56 };
57 
58 class ScopedMacroState : public FormatTokenSource {
59 public:
60   ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
61                    FormatToken *&ResetToken)
62       : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
63         PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
64         Token(nullptr) {
65     TokenSource = this;
66     Line.Level = 0;
67     Line.InPPDirective = true;
68   }
69 
70   ~ScopedMacroState() override {
71     TokenSource = PreviousTokenSource;
72     ResetToken = Token;
73     Line.InPPDirective = false;
74     Line.Level = PreviousLineLevel;
75   }
76 
77   FormatToken *getNextToken() override {
78     // The \c UnwrappedLineParser guards against this by never calling
79     // \c getNextToken() after it has encountered the first eof token.
80     assert(!eof());
81     Token = PreviousTokenSource->getNextToken();
82     if (eof())
83       return getFakeEOF();
84     return Token;
85   }
86 
87   unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
88 
89   FormatToken *setPosition(unsigned Position) override {
90     Token = PreviousTokenSource->setPosition(Position);
91     return Token;
92   }
93 
94 private:
95   bool eof() { return Token && Token->HasUnescapedNewline; }
96 
97   FormatToken *getFakeEOF() {
98     static bool EOFInitialized = false;
99     static FormatToken FormatTok;
100     if (!EOFInitialized) {
101       FormatTok.Tok.startToken();
102       FormatTok.Tok.setKind(tok::eof);
103       EOFInitialized = true;
104     }
105     return &FormatTok;
106   }
107 
108   UnwrappedLine &Line;
109   FormatTokenSource *&TokenSource;
110   FormatToken *&ResetToken;
111   unsigned PreviousLineLevel;
112   FormatTokenSource *PreviousTokenSource;
113 
114   FormatToken *Token;
115 };
116 
117 } // end anonymous namespace
118 
119 class ScopedLineState {
120 public:
121   ScopedLineState(UnwrappedLineParser &Parser,
122                   bool SwitchToPreprocessorLines = false)
123       : Parser(Parser), OriginalLines(Parser.CurrentLines) {
124     if (SwitchToPreprocessorLines)
125       Parser.CurrentLines = &Parser.PreprocessorDirectives;
126     else if (!Parser.Line->Tokens.empty())
127       Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
128     PreBlockLine = std::move(Parser.Line);
129     Parser.Line = llvm::make_unique<UnwrappedLine>();
130     Parser.Line->Level = PreBlockLine->Level;
131     Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
132   }
133 
134   ~ScopedLineState() {
135     if (!Parser.Line->Tokens.empty()) {
136       Parser.addUnwrappedLine();
137     }
138     assert(Parser.Line->Tokens.empty());
139     Parser.Line = std::move(PreBlockLine);
140     if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
141       Parser.MustBreakBeforeNextToken = true;
142     Parser.CurrentLines = OriginalLines;
143   }
144 
145 private:
146   UnwrappedLineParser &Parser;
147 
148   std::unique_ptr<UnwrappedLine> PreBlockLine;
149   SmallVectorImpl<UnwrappedLine> *OriginalLines;
150 };
151 
152 class CompoundStatementIndenter {
153 public:
154   CompoundStatementIndenter(UnwrappedLineParser *Parser,
155                             const FormatStyle &Style, unsigned &LineLevel)
156       : LineLevel(LineLevel), OldLineLevel(LineLevel) {
157     if (Style.BraceWrapping.AfterControlStatement)
158       Parser->addUnwrappedLine();
159     if (Style.BraceWrapping.IndentBraces)
160       ++LineLevel;
161   }
162   ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
163 
164 private:
165   unsigned &LineLevel;
166   unsigned OldLineLevel;
167 };
168 
169 namespace {
170 
171 class IndexedTokenSource : public FormatTokenSource {
172 public:
173   IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
174       : Tokens(Tokens), Position(-1) {}
175 
176   FormatToken *getNextToken() override {
177     ++Position;
178     return Tokens[Position];
179   }
180 
181   unsigned getPosition() override {
182     assert(Position >= 0);
183     return Position;
184   }
185 
186   FormatToken *setPosition(unsigned P) override {
187     Position = P;
188     return Tokens[Position];
189   }
190 
191   void reset() { Position = -1; }
192 
193 private:
194   ArrayRef<FormatToken *> Tokens;
195   int Position;
196 };
197 
198 } // end anonymous namespace
199 
200 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
201                                          const AdditionalKeywords &Keywords,
202                                          ArrayRef<FormatToken *> Tokens,
203                                          UnwrappedLineConsumer &Callback)
204     : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
205       CurrentLines(&Lines), Style(Style), Keywords(Keywords), Tokens(nullptr),
206       Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1) {}
207 
208 void UnwrappedLineParser::reset() {
209   PPBranchLevel = -1;
210   Line.reset(new UnwrappedLine);
211   CommentsBeforeNextToken.clear();
212   FormatTok = nullptr;
213   MustBreakBeforeNextToken = false;
214   PreprocessorDirectives.clear();
215   CurrentLines = &Lines;
216   DeclarationScopeStack.clear();
217   PPStack.clear();
218 }
219 
220 void UnwrappedLineParser::parse() {
221   IndexedTokenSource TokenSource(AllTokens);
222   do {
223     DEBUG(llvm::dbgs() << "----\n");
224     reset();
225     Tokens = &TokenSource;
226     TokenSource.reset();
227 
228     readToken();
229     parseFile();
230     // Create line with eof token.
231     pushToken(FormatTok);
232     addUnwrappedLine();
233 
234     for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
235                                                   E = Lines.end();
236          I != E; ++I) {
237       Callback.consumeUnwrappedLine(*I);
238     }
239     Callback.finishRun();
240     Lines.clear();
241     while (!PPLevelBranchIndex.empty() &&
242            PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
243       PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
244       PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
245     }
246     if (!PPLevelBranchIndex.empty()) {
247       ++PPLevelBranchIndex.back();
248       assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
249       assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
250     }
251   } while (!PPLevelBranchIndex.empty());
252 }
253 
254 void UnwrappedLineParser::parseFile() {
255   // The top-level context in a file always has declarations, except for pre-
256   // processor directives and JavaScript files.
257   bool MustBeDeclaration =
258       !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
259   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
260                                           MustBeDeclaration);
261   parseLevel(/*HasOpeningBrace=*/false);
262   // Make sure to format the remaining tokens.
263   flushComments(true);
264   addUnwrappedLine();
265 }
266 
267 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
268   bool SwitchLabelEncountered = false;
269   do {
270     tok::TokenKind kind = FormatTok->Tok.getKind();
271     if (FormatTok->Type == TT_MacroBlockBegin) {
272       kind = tok::l_brace;
273     } else if (FormatTok->Type == TT_MacroBlockEnd) {
274       kind = tok::r_brace;
275     }
276 
277     switch (kind) {
278     case tok::comment:
279       nextToken();
280       addUnwrappedLine();
281       break;
282     case tok::l_brace:
283       // FIXME: Add parameter whether this can happen - if this happens, we must
284       // be in a non-declaration context.
285       if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
286         continue;
287       parseBlock(/*MustBeDeclaration=*/false);
288       addUnwrappedLine();
289       break;
290     case tok::r_brace:
291       if (HasOpeningBrace)
292         return;
293       nextToken();
294       addUnwrappedLine();
295       break;
296     case tok::kw_default:
297     case tok::kw_case:
298       if (!SwitchLabelEncountered &&
299           (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
300         ++Line->Level;
301       SwitchLabelEncountered = true;
302       parseStructuralElement();
303       break;
304     default:
305       parseStructuralElement();
306       break;
307     }
308   } while (!eof());
309 }
310 
311 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
312   // We'll parse forward through the tokens until we hit
313   // a closing brace or eof - note that getNextToken() will
314   // parse macros, so this will magically work inside macro
315   // definitions, too.
316   unsigned StoredPosition = Tokens->getPosition();
317   FormatToken *Tok = FormatTok;
318   const FormatToken *PrevTok = getPreviousToken();
319   // Keep a stack of positions of lbrace tokens. We will
320   // update information about whether an lbrace starts a
321   // braced init list or a different block during the loop.
322   SmallVector<FormatToken *, 8> LBraceStack;
323   assert(Tok->Tok.is(tok::l_brace));
324   do {
325     // Get next non-comment token.
326     FormatToken *NextTok;
327     unsigned ReadTokens = 0;
328     do {
329       NextTok = Tokens->getNextToken();
330       ++ReadTokens;
331     } while (NextTok->is(tok::comment));
332 
333     switch (Tok->Tok.getKind()) {
334     case tok::l_brace:
335       if (Style.Language == FormatStyle::LK_JavaScript && PrevTok &&
336           PrevTok->is(tok::colon))
337         // In TypeScript's TypeMemberLists, there can be semicolons between the
338         // individual members.
339         Tok->BlockKind = BK_BracedInit;
340       else
341         Tok->BlockKind = BK_Unknown;
342       LBraceStack.push_back(Tok);
343       break;
344     case tok::r_brace:
345       if (LBraceStack.empty())
346         break;
347       if (LBraceStack.back()->BlockKind == BK_Unknown) {
348         bool ProbablyBracedList = false;
349         if (Style.Language == FormatStyle::LK_Proto) {
350           ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
351         } else {
352           // Using OriginalColumn to distinguish between ObjC methods and
353           // binary operators is a bit hacky.
354           bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
355                                   NextTok->OriginalColumn == 0;
356 
357           // If there is a comma, semicolon or right paren after the closing
358           // brace, we assume this is a braced initializer list.  Note that
359           // regardless how we mark inner braces here, we will overwrite the
360           // BlockKind later if we parse a braced list (where all blocks
361           // inside are by default braced lists), or when we explicitly detect
362           // blocks (for example while parsing lambdas).
363           //
364           // We exclude + and - as they can be ObjC visibility modifiers.
365           ProbablyBracedList =
366               NextTok->isOneOf(tok::comma, tok::period, tok::colon,
367                                tok::r_paren, tok::r_square, tok::l_brace,
368                                tok::l_square, tok::l_paren, tok::ellipsis) ||
369               (NextTok->is(tok::semi) &&
370                (!ExpectClassBody || LBraceStack.size() != 1)) ||
371               (NextTok->isBinaryOperator() && !NextIsObjCMethod);
372         }
373         if (ProbablyBracedList) {
374           Tok->BlockKind = BK_BracedInit;
375           LBraceStack.back()->BlockKind = BK_BracedInit;
376         } else {
377           Tok->BlockKind = BK_Block;
378           LBraceStack.back()->BlockKind = BK_Block;
379         }
380       }
381       LBraceStack.pop_back();
382       break;
383     case tok::at:
384     case tok::semi:
385     case tok::kw_if:
386     case tok::kw_while:
387     case tok::kw_for:
388     case tok::kw_switch:
389     case tok::kw_try:
390     case tok::kw___try:
391       if (!LBraceStack.empty() && LBraceStack.back()->BlockKind == BK_Unknown)
392         LBraceStack.back()->BlockKind = BK_Block;
393       break;
394     default:
395       break;
396     }
397     PrevTok = Tok;
398     Tok = NextTok;
399   } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
400 
401   // Assume other blocks for all unclosed opening braces.
402   for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
403     if (LBraceStack[i]->BlockKind == BK_Unknown)
404       LBraceStack[i]->BlockKind = BK_Block;
405   }
406 
407   FormatTok = Tokens->setPosition(StoredPosition);
408 }
409 
410 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, bool AddLevel,
411                                      bool MunchSemi) {
412   assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
413          "'{' or macro block token expected");
414   const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
415   FormatTok->BlockKind = BK_Block;
416 
417   unsigned InitialLevel = Line->Level;
418   nextToken();
419 
420   if (MacroBlock && FormatTok->is(tok::l_paren))
421     parseParens();
422 
423   addUnwrappedLine();
424 
425   ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
426                                           MustBeDeclaration);
427   if (AddLevel)
428     ++Line->Level;
429   parseLevel(/*HasOpeningBrace=*/true);
430 
431   if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
432                  : !FormatTok->is(tok::r_brace)) {
433     Line->Level = InitialLevel;
434     FormatTok->BlockKind = BK_Block;
435     return;
436   }
437 
438   nextToken(); // Munch the closing brace.
439 
440   if (MacroBlock && FormatTok->is(tok::l_paren))
441     parseParens();
442 
443   if (MunchSemi && FormatTok->Tok.is(tok::semi))
444     nextToken();
445   Line->Level = InitialLevel;
446 }
447 
448 static bool isGoogScope(const UnwrappedLine &Line) {
449   // FIXME: Closure-library specific stuff should not be hard-coded but be
450   // configurable.
451   if (Line.Tokens.size() < 4)
452     return false;
453   auto I = Line.Tokens.begin();
454   if (I->Tok->TokenText != "goog")
455     return false;
456   ++I;
457   if (I->Tok->isNot(tok::period))
458     return false;
459   ++I;
460   if (I->Tok->TokenText != "scope")
461     return false;
462   ++I;
463   return I->Tok->is(tok::l_paren);
464 }
465 
466 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
467                                    const FormatToken &InitialToken) {
468   if (InitialToken.is(tok::kw_namespace))
469     return Style.BraceWrapping.AfterNamespace;
470   if (InitialToken.is(tok::kw_class))
471     return Style.BraceWrapping.AfterClass;
472   if (InitialToken.is(tok::kw_union))
473     return Style.BraceWrapping.AfterUnion;
474   if (InitialToken.is(tok::kw_struct))
475     return Style.BraceWrapping.AfterStruct;
476   return false;
477 }
478 
479 void UnwrappedLineParser::parseChildBlock() {
480   FormatTok->BlockKind = BK_Block;
481   nextToken();
482   {
483     bool GoogScope =
484         Style.Language == FormatStyle::LK_JavaScript && isGoogScope(*Line);
485     ScopedLineState LineState(*this);
486     ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
487                                             /*MustBeDeclaration=*/false);
488     Line->Level += GoogScope ? 0 : 1;
489     parseLevel(/*HasOpeningBrace=*/true);
490     flushComments(isOnNewLine(*FormatTok));
491     Line->Level -= GoogScope ? 0 : 1;
492   }
493   nextToken();
494 }
495 
496 void UnwrappedLineParser::parsePPDirective() {
497   assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
498   ScopedMacroState MacroState(*Line, Tokens, FormatTok);
499   nextToken();
500 
501   if (!FormatTok->Tok.getIdentifierInfo()) {
502     parsePPUnknown();
503     return;
504   }
505 
506   switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
507   case tok::pp_define:
508     parsePPDefine();
509     return;
510   case tok::pp_if:
511     parsePPIf(/*IfDef=*/false);
512     break;
513   case tok::pp_ifdef:
514   case tok::pp_ifndef:
515     parsePPIf(/*IfDef=*/true);
516     break;
517   case tok::pp_else:
518     parsePPElse();
519     break;
520   case tok::pp_elif:
521     parsePPElIf();
522     break;
523   case tok::pp_endif:
524     parsePPEndIf();
525     break;
526   default:
527     parsePPUnknown();
528     break;
529   }
530 }
531 
532 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
533   if (Unreachable || (!PPStack.empty() && PPStack.back() == PP_Unreachable))
534     PPStack.push_back(PP_Unreachable);
535   else
536     PPStack.push_back(PP_Conditional);
537 }
538 
539 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
540   ++PPBranchLevel;
541   assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
542   if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
543     PPLevelBranchIndex.push_back(0);
544     PPLevelBranchCount.push_back(0);
545   }
546   PPChainBranchIndex.push(0);
547   bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
548   conditionalCompilationCondition(Unreachable || Skip);
549 }
550 
551 void UnwrappedLineParser::conditionalCompilationAlternative() {
552   if (!PPStack.empty())
553     PPStack.pop_back();
554   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
555   if (!PPChainBranchIndex.empty())
556     ++PPChainBranchIndex.top();
557   conditionalCompilationCondition(
558       PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
559       PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
560 }
561 
562 void UnwrappedLineParser::conditionalCompilationEnd() {
563   assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
564   if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
565     if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
566       PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
567     }
568   }
569   // Guard against #endif's without #if.
570   if (PPBranchLevel > 0)
571     --PPBranchLevel;
572   if (!PPChainBranchIndex.empty())
573     PPChainBranchIndex.pop();
574   if (!PPStack.empty())
575     PPStack.pop_back();
576 }
577 
578 void UnwrappedLineParser::parsePPIf(bool IfDef) {
579   nextToken();
580   bool IsLiteralFalse = (FormatTok->Tok.isLiteral() &&
581                          FormatTok->Tok.getLiteralData() != nullptr &&
582                          StringRef(FormatTok->Tok.getLiteralData(),
583                                    FormatTok->Tok.getLength()) == "0") ||
584                         FormatTok->Tok.is(tok::kw_false);
585   conditionalCompilationStart(!IfDef && IsLiteralFalse);
586   parsePPUnknown();
587 }
588 
589 void UnwrappedLineParser::parsePPElse() {
590   conditionalCompilationAlternative();
591   parsePPUnknown();
592 }
593 
594 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
595 
596 void UnwrappedLineParser::parsePPEndIf() {
597   conditionalCompilationEnd();
598   parsePPUnknown();
599 }
600 
601 void UnwrappedLineParser::parsePPDefine() {
602   nextToken();
603 
604   if (FormatTok->Tok.getKind() != tok::identifier) {
605     parsePPUnknown();
606     return;
607   }
608   nextToken();
609   if (FormatTok->Tok.getKind() == tok::l_paren &&
610       FormatTok->WhitespaceRange.getBegin() ==
611           FormatTok->WhitespaceRange.getEnd()) {
612     parseParens();
613   }
614   addUnwrappedLine();
615   Line->Level = 1;
616 
617   // Errors during a preprocessor directive can only affect the layout of the
618   // preprocessor directive, and thus we ignore them. An alternative approach
619   // would be to use the same approach we use on the file level (no
620   // re-indentation if there was a structural error) within the macro
621   // definition.
622   parseFile();
623 }
624 
625 void UnwrappedLineParser::parsePPUnknown() {
626   do {
627     nextToken();
628   } while (!eof());
629   addUnwrappedLine();
630 }
631 
632 // Here we blacklist certain tokens that are not usually the first token in an
633 // unwrapped line. This is used in attempt to distinguish macro calls without
634 // trailing semicolons from other constructs split to several lines.
635 static bool tokenCanStartNewLine(const clang::Token &Tok) {
636   // Semicolon can be a null-statement, l_square can be a start of a macro or
637   // a C++11 attribute, but this doesn't seem to be common.
638   return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
639          Tok.isNot(tok::l_square) &&
640          // Tokens that can only be used as binary operators and a part of
641          // overloaded operator names.
642          Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
643          Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
644          Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
645          Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
646          Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
647          Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
648          Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
649          Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
650          Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
651          Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
652          Tok.isNot(tok::lesslessequal) &&
653          // Colon is used in labels, base class lists, initializer lists,
654          // range-based for loops, ternary operator, but should never be the
655          // first token in an unwrapped line.
656          Tok.isNot(tok::colon) &&
657          // 'noexcept' is a trailing annotation.
658          Tok.isNot(tok::kw_noexcept);
659 }
660 
661 void UnwrappedLineParser::parseStructuralElement() {
662   assert(!FormatTok->is(tok::l_brace));
663   if (Style.Language == FormatStyle::LK_TableGen &&
664       FormatTok->is(tok::pp_include)) {
665     nextToken();
666     if (FormatTok->is(tok::string_literal))
667       nextToken();
668     addUnwrappedLine();
669     return;
670   }
671   switch (FormatTok->Tok.getKind()) {
672   case tok::at:
673     nextToken();
674     if (FormatTok->Tok.is(tok::l_brace)) {
675       parseBracedList();
676       break;
677     }
678     switch (FormatTok->Tok.getObjCKeywordID()) {
679     case tok::objc_public:
680     case tok::objc_protected:
681     case tok::objc_package:
682     case tok::objc_private:
683       return parseAccessSpecifier();
684     case tok::objc_interface:
685     case tok::objc_implementation:
686       return parseObjCInterfaceOrImplementation();
687     case tok::objc_protocol:
688       return parseObjCProtocol();
689     case tok::objc_end:
690       return; // Handled by the caller.
691     case tok::objc_optional:
692     case tok::objc_required:
693       nextToken();
694       addUnwrappedLine();
695       return;
696     case tok::objc_autoreleasepool:
697       nextToken();
698       if (FormatTok->Tok.is(tok::l_brace)) {
699         if (Style.BraceWrapping.AfterObjCDeclaration)
700           addUnwrappedLine();
701         parseBlock(/*MustBeDeclaration=*/false);
702       }
703       addUnwrappedLine();
704       return;
705     case tok::objc_try:
706       // This branch isn't strictly necessary (the kw_try case below would
707       // do this too after the tok::at is parsed above).  But be explicit.
708       parseTryCatch();
709       return;
710     default:
711       break;
712     }
713     break;
714   case tok::kw_asm:
715     nextToken();
716     if (FormatTok->is(tok::l_brace)) {
717       FormatTok->Type = TT_InlineASMBrace;
718       nextToken();
719       while (FormatTok && FormatTok->isNot(tok::eof)) {
720         if (FormatTok->is(tok::r_brace)) {
721           FormatTok->Type = TT_InlineASMBrace;
722           nextToken();
723           addUnwrappedLine();
724           break;
725         }
726         FormatTok->Finalized = true;
727         nextToken();
728       }
729     }
730     break;
731   case tok::kw_namespace:
732     parseNamespace();
733     return;
734   case tok::kw_inline:
735     nextToken();
736     if (FormatTok->Tok.is(tok::kw_namespace)) {
737       parseNamespace();
738       return;
739     }
740     break;
741   case tok::kw_public:
742   case tok::kw_protected:
743   case tok::kw_private:
744     if (Style.Language == FormatStyle::LK_Java ||
745         Style.Language == FormatStyle::LK_JavaScript)
746       nextToken();
747     else
748       parseAccessSpecifier();
749     return;
750   case tok::kw_if:
751     parseIfThenElse();
752     return;
753   case tok::kw_for:
754   case tok::kw_while:
755     parseForOrWhileLoop();
756     return;
757   case tok::kw_do:
758     parseDoWhile();
759     return;
760   case tok::kw_switch:
761     parseSwitch();
762     return;
763   case tok::kw_default:
764     nextToken();
765     parseLabel();
766     return;
767   case tok::kw_case:
768     parseCaseLabel();
769     return;
770   case tok::kw_try:
771   case tok::kw___try:
772     parseTryCatch();
773     return;
774   case tok::kw_extern:
775     nextToken();
776     if (FormatTok->Tok.is(tok::string_literal)) {
777       nextToken();
778       if (FormatTok->Tok.is(tok::l_brace)) {
779         parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/false);
780         addUnwrappedLine();
781         return;
782       }
783     }
784     break;
785   case tok::kw_export:
786     if (Style.Language == FormatStyle::LK_JavaScript) {
787       parseJavaScriptEs6ImportExport();
788       return;
789     }
790     break;
791   case tok::identifier:
792     if (FormatTok->is(TT_ForEachMacro)) {
793       parseForOrWhileLoop();
794       return;
795     }
796     if (FormatTok->is(TT_MacroBlockBegin)) {
797       parseBlock(/*MustBeDeclaration=*/false, /*AddLevel=*/true,
798                  /*MunchSemi=*/false);
799       return;
800     }
801     if (Style.Language == FormatStyle::LK_JavaScript &&
802         FormatTok->is(Keywords.kw_import)) {
803       parseJavaScriptEs6ImportExport();
804       return;
805     }
806     if (FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
807                            Keywords.kw_slots, Keywords.kw_qslots)) {
808       nextToken();
809       if (FormatTok->is(tok::colon)) {
810         nextToken();
811         addUnwrappedLine();
812       }
813       return;
814     }
815     // In all other cases, parse the declaration.
816     break;
817   default:
818     break;
819   }
820   do {
821     switch (FormatTok->Tok.getKind()) {
822     case tok::at:
823       nextToken();
824       if (FormatTok->Tok.is(tok::l_brace))
825         parseBracedList();
826       break;
827     case tok::kw_enum:
828       // parseEnum falls through and does not yet add an unwrapped line as an
829       // enum definition can start a structural element.
830       if (!parseEnum())
831         break;
832       // This only applies for C++.
833       if (Style.Language != FormatStyle::LK_Cpp) {
834         addUnwrappedLine();
835         return;
836       }
837       break;
838     case tok::kw_typedef:
839       nextToken();
840       if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
841                              Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS))
842         parseEnum();
843       break;
844     case tok::kw_struct:
845     case tok::kw_union:
846     case tok::kw_class:
847       // parseRecord falls through and does not yet add an unwrapped line as a
848       // record declaration or definition can start a structural element.
849       parseRecord();
850       // This does not apply for Java and JavaScript.
851       if (Style.Language == FormatStyle::LK_Java ||
852           Style.Language == FormatStyle::LK_JavaScript) {
853         if (FormatTok->is(tok::semi))
854           nextToken();
855         addUnwrappedLine();
856         return;
857       }
858       break;
859     case tok::period:
860       nextToken();
861       // In Java, classes have an implicit static member "class".
862       if (Style.Language == FormatStyle::LK_Java && FormatTok &&
863           FormatTok->is(tok::kw_class))
864         nextToken();
865       if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
866           FormatTok->Tok.getIdentifierInfo())
867         // JavaScript only has pseudo keywords, all keywords are allowed to
868         // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
869         nextToken();
870       break;
871     case tok::semi:
872       nextToken();
873       addUnwrappedLine();
874       return;
875     case tok::r_brace:
876       addUnwrappedLine();
877       return;
878     case tok::l_paren:
879       parseParens();
880       break;
881     case tok::kw_operator:
882       nextToken();
883       if (FormatTok->isBinaryOperator())
884         nextToken();
885       break;
886     case tok::caret:
887       nextToken();
888       if (FormatTok->Tok.isAnyIdentifier() ||
889           FormatTok->isSimpleTypeSpecifier())
890         nextToken();
891       if (FormatTok->is(tok::l_paren))
892         parseParens();
893       if (FormatTok->is(tok::l_brace))
894         parseChildBlock();
895       break;
896     case tok::l_brace:
897       if (!tryToParseBracedList()) {
898         // A block outside of parentheses must be the last part of a
899         // structural element.
900         // FIXME: Figure out cases where this is not true, and add projections
901         // for them (the one we know is missing are lambdas).
902         if (Style.BraceWrapping.AfterFunction)
903           addUnwrappedLine();
904         FormatTok->Type = TT_FunctionLBrace;
905         parseBlock(/*MustBeDeclaration=*/false);
906         addUnwrappedLine();
907         return;
908       }
909       // Otherwise this was a braced init list, and the structural
910       // element continues.
911       break;
912     case tok::kw_try:
913       // We arrive here when parsing function-try blocks.
914       parseTryCatch();
915       return;
916     case tok::identifier: {
917       if (FormatTok->is(TT_MacroBlockEnd)) {
918         addUnwrappedLine();
919         return;
920       }
921 
922       // Parse function literal unless 'function' is the first token in a line
923       // in which case this should be treated as a free-standing function.
924       if (Style.Language == FormatStyle::LK_JavaScript &&
925           FormatTok->is(Keywords.kw_function) && Line->Tokens.size() > 0) {
926         tryToParseJSFunction();
927         break;
928       }
929       if ((Style.Language == FormatStyle::LK_JavaScript ||
930            Style.Language == FormatStyle::LK_Java) &&
931           FormatTok->is(Keywords.kw_interface)) {
932         parseRecord();
933         addUnwrappedLine();
934         return;
935       }
936 
937       StringRef Text = FormatTok->TokenText;
938       nextToken();
939       if (Line->Tokens.size() == 1 &&
940           // JS doesn't have macros, and within classes colons indicate fields,
941           // not labels.
942           Style.Language != FormatStyle::LK_JavaScript) {
943         if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
944           parseLabel();
945           return;
946         }
947         // Recognize function-like macro usages without trailing semicolon as
948         // well as free-standing macros like Q_OBJECT.
949         bool FunctionLike = FormatTok->is(tok::l_paren);
950         if (FunctionLike)
951           parseParens();
952 
953         bool FollowedByNewline =
954             CommentsBeforeNextToken.empty()
955                 ? FormatTok->NewlinesBefore > 0
956                 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
957 
958         if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
959             tokenCanStartNewLine(FormatTok->Tok) && Text == Text.upper()) {
960           addUnwrappedLine();
961           return;
962         }
963       }
964       break;
965     }
966     case tok::equal:
967       // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
968       // TT_JsFatArrow. The always start an expression or a child block if
969       // followed by a curly.
970       if (FormatTok->is(TT_JsFatArrow)) {
971         nextToken();
972         if (FormatTok->is(tok::l_brace))
973           parseChildBlock();
974         break;
975       }
976 
977       nextToken();
978       if (FormatTok->Tok.is(tok::l_brace)) {
979         parseBracedList();
980       }
981       break;
982     case tok::l_square:
983       parseSquare();
984       break;
985     case tok::kw_new:
986       parseNew();
987       break;
988     default:
989       nextToken();
990       break;
991     }
992   } while (!eof());
993 }
994 
995 bool UnwrappedLineParser::tryToParseLambda() {
996   if (Style.Language != FormatStyle::LK_Cpp) {
997     nextToken();
998     return false;
999   }
1000   const FormatToken* Previous = getPreviousToken();
1001   if (Previous &&
1002       (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1003                          tok::kw_delete) ||
1004        Previous->closesScope() || Previous->isSimpleTypeSpecifier())) {
1005     nextToken();
1006     return false;
1007   }
1008   assert(FormatTok->is(tok::l_square));
1009   FormatToken &LSquare = *FormatTok;
1010   if (!tryToParseLambdaIntroducer())
1011     return false;
1012 
1013   while (FormatTok->isNot(tok::l_brace)) {
1014     if (FormatTok->isSimpleTypeSpecifier()) {
1015       nextToken();
1016       continue;
1017     }
1018     switch (FormatTok->Tok.getKind()) {
1019     case tok::l_brace:
1020       break;
1021     case tok::l_paren:
1022       parseParens();
1023       break;
1024     case tok::amp:
1025     case tok::star:
1026     case tok::kw_const:
1027     case tok::comma:
1028     case tok::less:
1029     case tok::greater:
1030     case tok::identifier:
1031     case tok::numeric_constant:
1032     case tok::coloncolon:
1033     case tok::kw_mutable:
1034       nextToken();
1035       break;
1036     case tok::arrow:
1037       FormatTok->Type = TT_LambdaArrow;
1038       nextToken();
1039       break;
1040     default:
1041       return true;
1042     }
1043   }
1044   LSquare.Type = TT_LambdaLSquare;
1045   parseChildBlock();
1046   return true;
1047 }
1048 
1049 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1050   nextToken();
1051   if (FormatTok->is(tok::equal)) {
1052     nextToken();
1053     if (FormatTok->is(tok::r_square)) {
1054       nextToken();
1055       return true;
1056     }
1057     if (FormatTok->isNot(tok::comma))
1058       return false;
1059     nextToken();
1060   } else if (FormatTok->is(tok::amp)) {
1061     nextToken();
1062     if (FormatTok->is(tok::r_square)) {
1063       nextToken();
1064       return true;
1065     }
1066     if (!FormatTok->isOneOf(tok::comma, tok::identifier)) {
1067       return false;
1068     }
1069     if (FormatTok->is(tok::comma))
1070       nextToken();
1071   } else if (FormatTok->is(tok::r_square)) {
1072     nextToken();
1073     return true;
1074   }
1075   do {
1076     if (FormatTok->is(tok::amp))
1077       nextToken();
1078     if (!FormatTok->isOneOf(tok::identifier, tok::kw_this))
1079       return false;
1080     nextToken();
1081     if (FormatTok->is(tok::ellipsis))
1082       nextToken();
1083     if (FormatTok->is(tok::comma)) {
1084       nextToken();
1085     } else if (FormatTok->is(tok::r_square)) {
1086       nextToken();
1087       return true;
1088     } else {
1089       return false;
1090     }
1091   } while (!eof());
1092   return false;
1093 }
1094 
1095 void UnwrappedLineParser::tryToParseJSFunction() {
1096   nextToken();
1097 
1098   // Consume function name.
1099   if (FormatTok->is(tok::identifier))
1100     nextToken();
1101 
1102   if (FormatTok->isNot(tok::l_paren))
1103     return;
1104 
1105   // Parse formal parameter list.
1106   parseParens();
1107 
1108   if (FormatTok->is(tok::colon)) {
1109     // Parse a type definition.
1110     nextToken();
1111 
1112     // Eat the type declaration. For braced inline object types, balance braces,
1113     // otherwise just parse until finding an l_brace for the function body.
1114     if (FormatTok->is(tok::l_brace))
1115       tryToParseBracedList();
1116     else
1117       while (FormatTok->isNot(tok::l_brace) && !eof())
1118         nextToken();
1119   }
1120 
1121   parseChildBlock();
1122 }
1123 
1124 bool UnwrappedLineParser::tryToParseBracedList() {
1125   if (FormatTok->BlockKind == BK_Unknown)
1126     calculateBraceTypes();
1127   assert(FormatTok->BlockKind != BK_Unknown);
1128   if (FormatTok->BlockKind == BK_Block)
1129     return false;
1130   parseBracedList();
1131   return true;
1132 }
1133 
1134 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons) {
1135   bool HasError = false;
1136   nextToken();
1137 
1138   // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1139   // replace this by using parseAssigmentExpression() inside.
1140   do {
1141     if (Style.Language == FormatStyle::LK_JavaScript) {
1142       if (FormatTok->is(Keywords.kw_function)) {
1143         tryToParseJSFunction();
1144         continue;
1145       }
1146       if (FormatTok->is(TT_JsFatArrow)) {
1147         nextToken();
1148         // Fat arrows can be followed by simple expressions or by child blocks
1149         // in curly braces.
1150         if (FormatTok->is(tok::l_brace)) {
1151           parseChildBlock();
1152           continue;
1153         }
1154       }
1155     }
1156     switch (FormatTok->Tok.getKind()) {
1157     case tok::caret:
1158       nextToken();
1159       if (FormatTok->is(tok::l_brace)) {
1160         parseChildBlock();
1161       }
1162       break;
1163     case tok::l_square:
1164       tryToParseLambda();
1165       break;
1166     case tok::l_brace:
1167       // Assume there are no blocks inside a braced init list apart
1168       // from the ones we explicitly parse out (like lambdas).
1169       FormatTok->BlockKind = BK_BracedInit;
1170       parseBracedList();
1171       break;
1172     case tok::l_paren:
1173       parseParens();
1174       // JavaScript can just have free standing methods and getters/setters in
1175       // object literals. Detect them by a "{" following ")".
1176       if (Style.Language == FormatStyle::LK_JavaScript) {
1177         if (FormatTok->is(tok::l_brace))
1178           parseChildBlock();
1179         break;
1180       }
1181       break;
1182     case tok::r_brace:
1183       nextToken();
1184       return !HasError;
1185     case tok::semi:
1186       // JavaScript (or more precisely TypeScript) can have semicolons in braced
1187       // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1188       // used for error recovery if we have otherwise determined that this is
1189       // a braced list.
1190       if (Style.Language == FormatStyle::LK_JavaScript) {
1191         nextToken();
1192         break;
1193       }
1194       HasError = true;
1195       if (!ContinueOnSemicolons)
1196         return !HasError;
1197       nextToken();
1198       break;
1199     case tok::comma:
1200       nextToken();
1201       break;
1202     default:
1203       nextToken();
1204       break;
1205     }
1206   } while (!eof());
1207   return false;
1208 }
1209 
1210 void UnwrappedLineParser::parseParens() {
1211   assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1212   nextToken();
1213   do {
1214     switch (FormatTok->Tok.getKind()) {
1215     case tok::l_paren:
1216       parseParens();
1217       if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1218         parseChildBlock();
1219       break;
1220     case tok::r_paren:
1221       nextToken();
1222       return;
1223     case tok::r_brace:
1224       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1225       return;
1226     case tok::l_square:
1227       tryToParseLambda();
1228       break;
1229     case tok::l_brace:
1230       if (!tryToParseBracedList())
1231         parseChildBlock();
1232       break;
1233     case tok::at:
1234       nextToken();
1235       if (FormatTok->Tok.is(tok::l_brace))
1236         parseBracedList();
1237       break;
1238     case tok::identifier:
1239       if (Style.Language == FormatStyle::LK_JavaScript &&
1240           FormatTok->is(Keywords.kw_function))
1241         tryToParseJSFunction();
1242       else
1243         nextToken();
1244       break;
1245     default:
1246       nextToken();
1247       break;
1248     }
1249   } while (!eof());
1250 }
1251 
1252 void UnwrappedLineParser::parseSquare() {
1253   assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
1254   if (tryToParseLambda())
1255     return;
1256   do {
1257     switch (FormatTok->Tok.getKind()) {
1258     case tok::l_paren:
1259       parseParens();
1260       break;
1261     case tok::r_square:
1262       nextToken();
1263       return;
1264     case tok::r_brace:
1265       // A "}" inside parenthesis is an error if there wasn't a matching "{".
1266       return;
1267     case tok::l_square:
1268       parseSquare();
1269       break;
1270     case tok::l_brace: {
1271       if (!tryToParseBracedList())
1272         parseChildBlock();
1273       break;
1274     }
1275     case tok::at:
1276       nextToken();
1277       if (FormatTok->Tok.is(tok::l_brace))
1278         parseBracedList();
1279       break;
1280     default:
1281       nextToken();
1282       break;
1283     }
1284   } while (!eof());
1285 }
1286 
1287 void UnwrappedLineParser::parseIfThenElse() {
1288   assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
1289   nextToken();
1290   if (FormatTok->Tok.is(tok::l_paren))
1291     parseParens();
1292   bool NeedsUnwrappedLine = false;
1293   if (FormatTok->Tok.is(tok::l_brace)) {
1294     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1295     parseBlock(/*MustBeDeclaration=*/false);
1296     if (Style.BraceWrapping.BeforeElse)
1297       addUnwrappedLine();
1298     else
1299       NeedsUnwrappedLine = true;
1300   } else {
1301     addUnwrappedLine();
1302     ++Line->Level;
1303     parseStructuralElement();
1304     --Line->Level;
1305   }
1306   if (FormatTok->Tok.is(tok::kw_else)) {
1307     nextToken();
1308     if (FormatTok->Tok.is(tok::l_brace)) {
1309       CompoundStatementIndenter Indenter(this, Style, Line->Level);
1310       parseBlock(/*MustBeDeclaration=*/false);
1311       addUnwrappedLine();
1312     } else if (FormatTok->Tok.is(tok::kw_if)) {
1313       parseIfThenElse();
1314     } else {
1315       addUnwrappedLine();
1316       ++Line->Level;
1317       parseStructuralElement();
1318       --Line->Level;
1319     }
1320   } else if (NeedsUnwrappedLine) {
1321     addUnwrappedLine();
1322   }
1323 }
1324 
1325 void UnwrappedLineParser::parseTryCatch() {
1326   assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
1327   nextToken();
1328   bool NeedsUnwrappedLine = false;
1329   if (FormatTok->is(tok::colon)) {
1330     // We are in a function try block, what comes is an initializer list.
1331     nextToken();
1332     while (FormatTok->is(tok::identifier)) {
1333       nextToken();
1334       if (FormatTok->is(tok::l_paren))
1335         parseParens();
1336       if (FormatTok->is(tok::comma))
1337         nextToken();
1338     }
1339   }
1340   // Parse try with resource.
1341   if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
1342     parseParens();
1343   }
1344   if (FormatTok->is(tok::l_brace)) {
1345     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1346     parseBlock(/*MustBeDeclaration=*/false);
1347     if (Style.BraceWrapping.BeforeCatch) {
1348       addUnwrappedLine();
1349     } else {
1350       NeedsUnwrappedLine = true;
1351     }
1352   } else if (!FormatTok->is(tok::kw_catch)) {
1353     // The C++ standard requires a compound-statement after a try.
1354     // If there's none, we try to assume there's a structuralElement
1355     // and try to continue.
1356     addUnwrappedLine();
1357     ++Line->Level;
1358     parseStructuralElement();
1359     --Line->Level;
1360   }
1361   while (1) {
1362     if (FormatTok->is(tok::at))
1363       nextToken();
1364     if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
1365                              tok::kw___finally) ||
1366           ((Style.Language == FormatStyle::LK_Java ||
1367             Style.Language == FormatStyle::LK_JavaScript) &&
1368            FormatTok->is(Keywords.kw_finally)) ||
1369           (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
1370            FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
1371       break;
1372     nextToken();
1373     while (FormatTok->isNot(tok::l_brace)) {
1374       if (FormatTok->is(tok::l_paren)) {
1375         parseParens();
1376         continue;
1377       }
1378       if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
1379         return;
1380       nextToken();
1381     }
1382     NeedsUnwrappedLine = false;
1383     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1384     parseBlock(/*MustBeDeclaration=*/false);
1385     if (Style.BraceWrapping.BeforeCatch)
1386       addUnwrappedLine();
1387     else
1388       NeedsUnwrappedLine = true;
1389   }
1390   if (NeedsUnwrappedLine)
1391     addUnwrappedLine();
1392 }
1393 
1394 void UnwrappedLineParser::parseNamespace() {
1395   assert(FormatTok->Tok.is(tok::kw_namespace) && "'namespace' expected");
1396 
1397   const FormatToken &InitialToken = *FormatTok;
1398   nextToken();
1399   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon))
1400     nextToken();
1401   if (FormatTok->Tok.is(tok::l_brace)) {
1402     if (ShouldBreakBeforeBrace(Style, InitialToken))
1403       addUnwrappedLine();
1404 
1405     bool AddLevel = Style.NamespaceIndentation == FormatStyle::NI_All ||
1406                     (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
1407                      DeclarationScopeStack.size() > 1);
1408     parseBlock(/*MustBeDeclaration=*/true, AddLevel);
1409     // Munch the semicolon after a namespace. This is more common than one would
1410     // think. Puttin the semicolon into its own line is very ugly.
1411     if (FormatTok->Tok.is(tok::semi))
1412       nextToken();
1413     addUnwrappedLine();
1414   }
1415   // FIXME: Add error handling.
1416 }
1417 
1418 void UnwrappedLineParser::parseNew() {
1419   assert(FormatTok->is(tok::kw_new) && "'new' expected");
1420   nextToken();
1421   if (Style.Language != FormatStyle::LK_Java)
1422     return;
1423 
1424   // In Java, we can parse everything up to the parens, which aren't optional.
1425   do {
1426     // There should not be a ;, { or } before the new's open paren.
1427     if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
1428       return;
1429 
1430     // Consume the parens.
1431     if (FormatTok->is(tok::l_paren)) {
1432       parseParens();
1433 
1434       // If there is a class body of an anonymous class, consume that as child.
1435       if (FormatTok->is(tok::l_brace))
1436         parseChildBlock();
1437       return;
1438     }
1439     nextToken();
1440   } while (!eof());
1441 }
1442 
1443 void UnwrappedLineParser::parseForOrWhileLoop() {
1444   assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
1445          "'for', 'while' or foreach macro expected");
1446   nextToken();
1447   if (FormatTok->Tok.is(tok::l_paren))
1448     parseParens();
1449   if (FormatTok->Tok.is(tok::l_brace)) {
1450     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1451     parseBlock(/*MustBeDeclaration=*/false);
1452     addUnwrappedLine();
1453   } else {
1454     addUnwrappedLine();
1455     ++Line->Level;
1456     parseStructuralElement();
1457     --Line->Level;
1458   }
1459 }
1460 
1461 void UnwrappedLineParser::parseDoWhile() {
1462   assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
1463   nextToken();
1464   if (FormatTok->Tok.is(tok::l_brace)) {
1465     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1466     parseBlock(/*MustBeDeclaration=*/false);
1467     if (Style.BraceWrapping.IndentBraces)
1468       addUnwrappedLine();
1469   } else {
1470     addUnwrappedLine();
1471     ++Line->Level;
1472     parseStructuralElement();
1473     --Line->Level;
1474   }
1475 
1476   // FIXME: Add error handling.
1477   if (!FormatTok->Tok.is(tok::kw_while)) {
1478     addUnwrappedLine();
1479     return;
1480   }
1481 
1482   nextToken();
1483   parseStructuralElement();
1484 }
1485 
1486 void UnwrappedLineParser::parseLabel() {
1487   nextToken();
1488   unsigned OldLineLevel = Line->Level;
1489   if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
1490     --Line->Level;
1491   if (CommentsBeforeNextToken.empty() && FormatTok->Tok.is(tok::l_brace)) {
1492     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1493     parseBlock(/*MustBeDeclaration=*/false);
1494     if (FormatTok->Tok.is(tok::kw_break)) {
1495       if (Style.BraceWrapping.AfterControlStatement)
1496         addUnwrappedLine();
1497       parseStructuralElement();
1498     }
1499     addUnwrappedLine();
1500   } else {
1501     if (FormatTok->is(tok::semi))
1502       nextToken();
1503     addUnwrappedLine();
1504   }
1505   Line->Level = OldLineLevel;
1506 }
1507 
1508 void UnwrappedLineParser::parseCaseLabel() {
1509   assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
1510   // FIXME: fix handling of complex expressions here.
1511   do {
1512     nextToken();
1513   } while (!eof() && !FormatTok->Tok.is(tok::colon));
1514   parseLabel();
1515 }
1516 
1517 void UnwrappedLineParser::parseSwitch() {
1518   assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
1519   nextToken();
1520   if (FormatTok->Tok.is(tok::l_paren))
1521     parseParens();
1522   if (FormatTok->Tok.is(tok::l_brace)) {
1523     CompoundStatementIndenter Indenter(this, Style, Line->Level);
1524     parseBlock(/*MustBeDeclaration=*/false);
1525     addUnwrappedLine();
1526   } else {
1527     addUnwrappedLine();
1528     ++Line->Level;
1529     parseStructuralElement();
1530     --Line->Level;
1531   }
1532 }
1533 
1534 void UnwrappedLineParser::parseAccessSpecifier() {
1535   nextToken();
1536   // Understand Qt's slots.
1537   if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
1538     nextToken();
1539   // Otherwise, we don't know what it is, and we'd better keep the next token.
1540   if (FormatTok->Tok.is(tok::colon))
1541     nextToken();
1542   addUnwrappedLine();
1543 }
1544 
1545 bool UnwrappedLineParser::parseEnum() {
1546   // Won't be 'enum' for NS_ENUMs.
1547   if (FormatTok->Tok.is(tok::kw_enum))
1548     nextToken();
1549 
1550   // In TypeScript, "enum" can also be used as property name, e.g. in interface
1551   // declarations. An "enum" keyword followed by a colon would be a syntax
1552   // error and thus assume it is just an identifier.
1553   if (Style.Language == FormatStyle::LK_JavaScript &&
1554       FormatTok->isOneOf(tok::colon, tok::question))
1555     return false;
1556 
1557   // Eat up enum class ...
1558   if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
1559     nextToken();
1560 
1561   while (FormatTok->Tok.getIdentifierInfo() ||
1562          FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
1563                             tok::greater, tok::comma, tok::question)) {
1564     nextToken();
1565     // We can have macros or attributes in between 'enum' and the enum name.
1566     if (FormatTok->is(tok::l_paren))
1567       parseParens();
1568     if (FormatTok->is(tok::identifier)) {
1569       nextToken();
1570       // If there are two identifiers in a row, this is likely an elaborate
1571       // return type. In Java, this can be "implements", etc.
1572       if (Style.Language == FormatStyle::LK_Cpp &&
1573           FormatTok->is(tok::identifier))
1574         return false;
1575     }
1576   }
1577 
1578   // Just a declaration or something is wrong.
1579   if (FormatTok->isNot(tok::l_brace))
1580     return true;
1581   FormatTok->BlockKind = BK_Block;
1582 
1583   if (Style.Language == FormatStyle::LK_Java) {
1584     // Java enums are different.
1585     parseJavaEnumBody();
1586     return true;
1587   }
1588   if (Style.Language == FormatStyle::LK_Proto) {
1589     parseBlock(/*MustBeDeclaration=*/true);
1590     return true;
1591   }
1592 
1593   // Parse enum body.
1594   bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true);
1595   if (HasError) {
1596     if (FormatTok->is(tok::semi))
1597       nextToken();
1598     addUnwrappedLine();
1599   }
1600   return true;
1601 
1602   // There is no addUnwrappedLine() here so that we fall through to parsing a
1603   // structural element afterwards. Thus, in "enum A {} n, m;",
1604   // "} n, m;" will end up in one unwrapped line.
1605 }
1606 
1607 void UnwrappedLineParser::parseJavaEnumBody() {
1608   // Determine whether the enum is simple, i.e. does not have a semicolon or
1609   // constants with class bodies. Simple enums can be formatted like braced
1610   // lists, contracted to a single line, etc.
1611   unsigned StoredPosition = Tokens->getPosition();
1612   bool IsSimple = true;
1613   FormatToken *Tok = Tokens->getNextToken();
1614   while (Tok) {
1615     if (Tok->is(tok::r_brace))
1616       break;
1617     if (Tok->isOneOf(tok::l_brace, tok::semi)) {
1618       IsSimple = false;
1619       break;
1620     }
1621     // FIXME: This will also mark enums with braces in the arguments to enum
1622     // constants as "not simple". This is probably fine in practice, though.
1623     Tok = Tokens->getNextToken();
1624   }
1625   FormatTok = Tokens->setPosition(StoredPosition);
1626 
1627   if (IsSimple) {
1628     parseBracedList();
1629     addUnwrappedLine();
1630     return;
1631   }
1632 
1633   // Parse the body of a more complex enum.
1634   // First add a line for everything up to the "{".
1635   nextToken();
1636   addUnwrappedLine();
1637   ++Line->Level;
1638 
1639   // Parse the enum constants.
1640   while (FormatTok) {
1641     if (FormatTok->is(tok::l_brace)) {
1642       // Parse the constant's class body.
1643       parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1644                  /*MunchSemi=*/false);
1645     } else if (FormatTok->is(tok::l_paren)) {
1646       parseParens();
1647     } else if (FormatTok->is(tok::comma)) {
1648       nextToken();
1649       addUnwrappedLine();
1650     } else if (FormatTok->is(tok::semi)) {
1651       nextToken();
1652       addUnwrappedLine();
1653       break;
1654     } else if (FormatTok->is(tok::r_brace)) {
1655       addUnwrappedLine();
1656       break;
1657     } else {
1658       nextToken();
1659     }
1660   }
1661 
1662   // Parse the class body after the enum's ";" if any.
1663   parseLevel(/*HasOpeningBrace=*/true);
1664   nextToken();
1665   --Line->Level;
1666   addUnwrappedLine();
1667 }
1668 
1669 void UnwrappedLineParser::parseRecord() {
1670   const FormatToken &InitialToken = *FormatTok;
1671   nextToken();
1672 
1673   // The actual identifier can be a nested name specifier, and in macros
1674   // it is often token-pasted.
1675   while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
1676                             tok::kw___attribute, tok::kw___declspec,
1677                             tok::kw_alignas) ||
1678          ((Style.Language == FormatStyle::LK_Java ||
1679            Style.Language == FormatStyle::LK_JavaScript) &&
1680           FormatTok->isOneOf(tok::period, tok::comma))) {
1681     bool IsNonMacroIdentifier =
1682         FormatTok->is(tok::identifier) &&
1683         FormatTok->TokenText != FormatTok->TokenText.upper();
1684     nextToken();
1685     // We can have macros or attributes in between 'class' and the class name.
1686     if (!IsNonMacroIdentifier && FormatTok->Tok.is(tok::l_paren))
1687       parseParens();
1688   }
1689 
1690   // Note that parsing away template declarations here leads to incorrectly
1691   // accepting function declarations as record declarations.
1692   // In general, we cannot solve this problem. Consider:
1693   // class A<int> B() {}
1694   // which can be a function definition or a class definition when B() is a
1695   // macro. If we find enough real-world cases where this is a problem, we
1696   // can parse for the 'template' keyword in the beginning of the statement,
1697   // and thus rule out the record production in case there is no template
1698   // (this would still leave us with an ambiguity between template function
1699   // and class declarations).
1700   if (FormatTok->isOneOf(tok::colon, tok::less)) {
1701     while (!eof()) {
1702       if (FormatTok->is(tok::l_brace)) {
1703         calculateBraceTypes(/*ExpectClassBody=*/true);
1704         if (!tryToParseBracedList())
1705           break;
1706       }
1707       if (FormatTok->Tok.is(tok::semi))
1708         return;
1709       nextToken();
1710     }
1711   }
1712   if (FormatTok->Tok.is(tok::l_brace)) {
1713     if (ShouldBreakBeforeBrace(Style, InitialToken))
1714       addUnwrappedLine();
1715 
1716     parseBlock(/*MustBeDeclaration=*/true, /*AddLevel=*/true,
1717                /*MunchSemi=*/false);
1718   }
1719   // There is no addUnwrappedLine() here so that we fall through to parsing a
1720   // structural element afterwards. Thus, in "class A {} n, m;",
1721   // "} n, m;" will end up in one unwrapped line.
1722 }
1723 
1724 void UnwrappedLineParser::parseObjCProtocolList() {
1725   assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
1726   do
1727     nextToken();
1728   while (!eof() && FormatTok->Tok.isNot(tok::greater));
1729   nextToken(); // Skip '>'.
1730 }
1731 
1732 void UnwrappedLineParser::parseObjCUntilAtEnd() {
1733   do {
1734     if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
1735       nextToken();
1736       addUnwrappedLine();
1737       break;
1738     }
1739     if (FormatTok->is(tok::l_brace)) {
1740       parseBlock(/*MustBeDeclaration=*/false);
1741       // In ObjC interfaces, nothing should be following the "}".
1742       addUnwrappedLine();
1743     } else if (FormatTok->is(tok::r_brace)) {
1744       // Ignore stray "}". parseStructuralElement doesn't consume them.
1745       nextToken();
1746       addUnwrappedLine();
1747     } else {
1748       parseStructuralElement();
1749     }
1750   } while (!eof());
1751 }
1752 
1753 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
1754   nextToken();
1755   nextToken(); // interface name
1756 
1757   // @interface can be followed by either a base class, or a category.
1758   if (FormatTok->Tok.is(tok::colon)) {
1759     nextToken();
1760     nextToken(); // base class name
1761   } else if (FormatTok->Tok.is(tok::l_paren))
1762     // Skip category, if present.
1763     parseParens();
1764 
1765   if (FormatTok->Tok.is(tok::less))
1766     parseObjCProtocolList();
1767 
1768   if (FormatTok->Tok.is(tok::l_brace)) {
1769     if (Style.BraceWrapping.AfterObjCDeclaration)
1770       addUnwrappedLine();
1771     parseBlock(/*MustBeDeclaration=*/true);
1772   }
1773 
1774   // With instance variables, this puts '}' on its own line.  Without instance
1775   // variables, this ends the @interface line.
1776   addUnwrappedLine();
1777 
1778   parseObjCUntilAtEnd();
1779 }
1780 
1781 void UnwrappedLineParser::parseObjCProtocol() {
1782   nextToken();
1783   nextToken(); // protocol name
1784 
1785   if (FormatTok->Tok.is(tok::less))
1786     parseObjCProtocolList();
1787 
1788   // Check for protocol declaration.
1789   if (FormatTok->Tok.is(tok::semi)) {
1790     nextToken();
1791     return addUnwrappedLine();
1792   }
1793 
1794   addUnwrappedLine();
1795   parseObjCUntilAtEnd();
1796 }
1797 
1798 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
1799   assert(FormatTok->isOneOf(Keywords.kw_import, tok::kw_export));
1800   nextToken();
1801 
1802   // Consume the "default" in "export default class/function".
1803   if (FormatTok->is(tok::kw_default))
1804     nextToken();
1805 
1806   // Consume "function" and "default function", so that these get parsed as
1807   // free-standing JS functions, i.e. do not require a trailing semicolon.
1808   if (FormatTok->is(Keywords.kw_function)) {
1809     nextToken();
1810     return;
1811   }
1812 
1813   // Consume the "abstract" in "export abstract class".
1814   if (FormatTok->is(Keywords.kw_abstract))
1815     nextToken();
1816 
1817   if (FormatTok->isOneOf(tok::kw_const, tok::kw_class, tok::kw_enum,
1818                          Keywords.kw_interface, Keywords.kw_let,
1819                          Keywords.kw_var))
1820     return; // Fall through to parsing the corresponding structure.
1821 
1822   while (!eof() && FormatTok->isNot(tok::semi)) {
1823     if (FormatTok->is(tok::l_brace)) {
1824       FormatTok->BlockKind = BK_Block;
1825       parseBracedList();
1826     } else {
1827       nextToken();
1828     }
1829   }
1830 }
1831 
1832 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
1833                                                  StringRef Prefix = "") {
1834   llvm::dbgs() << Prefix << "Line(" << Line.Level << ")"
1835                << (Line.InPPDirective ? " MACRO" : "") << ": ";
1836   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1837                                                     E = Line.Tokens.end();
1838        I != E; ++I) {
1839     llvm::dbgs() << I->Tok->Tok.getName() << "[" << I->Tok->Type << "] ";
1840   }
1841   for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
1842                                                     E = Line.Tokens.end();
1843        I != E; ++I) {
1844     const UnwrappedLineNode &Node = *I;
1845     for (SmallVectorImpl<UnwrappedLine>::const_iterator
1846              I = Node.Children.begin(),
1847              E = Node.Children.end();
1848          I != E; ++I) {
1849       printDebugInfo(*I, "\nChild: ");
1850     }
1851   }
1852   llvm::dbgs() << "\n";
1853 }
1854 
1855 void UnwrappedLineParser::addUnwrappedLine() {
1856   if (Line->Tokens.empty())
1857     return;
1858   DEBUG({
1859     if (CurrentLines == &Lines)
1860       printDebugInfo(*Line);
1861   });
1862   CurrentLines->push_back(std::move(*Line));
1863   Line->Tokens.clear();
1864   if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
1865     CurrentLines->append(
1866         std::make_move_iterator(PreprocessorDirectives.begin()),
1867         std::make_move_iterator(PreprocessorDirectives.end()));
1868     PreprocessorDirectives.clear();
1869   }
1870 }
1871 
1872 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
1873 
1874 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
1875   return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
1876          FormatTok.NewlinesBefore > 0;
1877 }
1878 
1879 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
1880   bool JustComments = Line->Tokens.empty();
1881   for (SmallVectorImpl<FormatToken *>::const_iterator
1882            I = CommentsBeforeNextToken.begin(),
1883            E = CommentsBeforeNextToken.end();
1884        I != E; ++I) {
1885     if (isOnNewLine(**I) && JustComments)
1886       addUnwrappedLine();
1887     pushToken(*I);
1888   }
1889   if (NewlineBeforeNext && JustComments)
1890     addUnwrappedLine();
1891   CommentsBeforeNextToken.clear();
1892 }
1893 
1894 void UnwrappedLineParser::nextToken() {
1895   if (eof())
1896     return;
1897   flushComments(isOnNewLine(*FormatTok));
1898   pushToken(FormatTok);
1899   readToken();
1900 }
1901 
1902 const FormatToken *UnwrappedLineParser::getPreviousToken() {
1903   // FIXME: This is a dirty way to access the previous token. Find a better
1904   // solution.
1905   if (!Line || Line->Tokens.empty())
1906     return nullptr;
1907   return Line->Tokens.back().Tok;
1908 }
1909 
1910 void UnwrappedLineParser::readToken() {
1911   bool CommentsInCurrentLine = true;
1912   do {
1913     FormatTok = Tokens->getNextToken();
1914     assert(FormatTok);
1915     while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
1916            (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
1917       // If there is an unfinished unwrapped line, we flush the preprocessor
1918       // directives only after that unwrapped line was finished later.
1919       bool SwitchToPreprocessorLines = !Line->Tokens.empty();
1920       ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
1921       // Comments stored before the preprocessor directive need to be output
1922       // before the preprocessor directive, at the same level as the
1923       // preprocessor directive, as we consider them to apply to the directive.
1924       flushComments(isOnNewLine(*FormatTok));
1925       parsePPDirective();
1926     }
1927     while (FormatTok->Type == TT_ConflictStart ||
1928            FormatTok->Type == TT_ConflictEnd ||
1929            FormatTok->Type == TT_ConflictAlternative) {
1930       if (FormatTok->Type == TT_ConflictStart) {
1931         conditionalCompilationStart(/*Unreachable=*/false);
1932       } else if (FormatTok->Type == TT_ConflictAlternative) {
1933         conditionalCompilationAlternative();
1934       } else if (FormatTok->Type == TT_ConflictEnd) {
1935         conditionalCompilationEnd();
1936       }
1937       FormatTok = Tokens->getNextToken();
1938       FormatTok->MustBreakBefore = true;
1939     }
1940 
1941     if (!PPStack.empty() && (PPStack.back() == PP_Unreachable) &&
1942         !Line->InPPDirective) {
1943       continue;
1944     }
1945 
1946     if (!FormatTok->Tok.is(tok::comment))
1947       return;
1948     if (isOnNewLine(*FormatTok) || FormatTok->IsFirst) {
1949       CommentsInCurrentLine = false;
1950     }
1951     if (CommentsInCurrentLine) {
1952       pushToken(FormatTok);
1953     } else {
1954       CommentsBeforeNextToken.push_back(FormatTok);
1955     }
1956   } while (!eof());
1957 }
1958 
1959 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
1960   Line->Tokens.push_back(UnwrappedLineNode(Tok));
1961   if (MustBreakBeforeNextToken) {
1962     Line->Tokens.back().Tok->MustBreakBefore = true;
1963     MustBreakBeforeNextToken = false;
1964   }
1965 }
1966 
1967 } // end namespace format
1968 } // end namespace clang
1969