1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/Support/Debug.h"
19 #include "llvm/Support/raw_ostream.h"
20
21 #include <algorithm>
22
23 #define DEBUG_TYPE "format-parser"
24
25 namespace clang {
26 namespace format {
27
28 class FormatTokenSource {
29 public:
~FormatTokenSource()30 virtual ~FormatTokenSource() {}
31 virtual FormatToken *getNextToken() = 0;
32
33 virtual unsigned getPosition() = 0;
34 virtual FormatToken *setPosition(unsigned Position) = 0;
35 };
36
37 namespace {
38
39 class ScopedDeclarationState {
40 public:
ScopedDeclarationState(UnwrappedLine & Line,std::vector<bool> & Stack,bool MustBeDeclaration)41 ScopedDeclarationState(UnwrappedLine &Line, std::vector<bool> &Stack,
42 bool MustBeDeclaration)
43 : Line(Line), Stack(Stack) {
44 Line.MustBeDeclaration = MustBeDeclaration;
45 Stack.push_back(MustBeDeclaration);
46 }
~ScopedDeclarationState()47 ~ScopedDeclarationState() {
48 Stack.pop_back();
49 if (!Stack.empty())
50 Line.MustBeDeclaration = Stack.back();
51 else
52 Line.MustBeDeclaration = true;
53 }
54
55 private:
56 UnwrappedLine &Line;
57 std::vector<bool> &Stack;
58 };
59
isLineComment(const FormatToken & FormatTok)60 static bool isLineComment(const FormatToken &FormatTok) {
61 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
62 }
63
64 // Checks if \p FormatTok is a line comment that continues the line comment
65 // \p Previous. The original column of \p MinColumnToken is used to determine
66 // whether \p FormatTok is indented enough to the right to continue \p Previous.
continuesLineComment(const FormatToken & FormatTok,const FormatToken * Previous,const FormatToken * MinColumnToken)67 static bool continuesLineComment(const FormatToken &FormatTok,
68 const FormatToken *Previous,
69 const FormatToken *MinColumnToken) {
70 if (!Previous || !MinColumnToken)
71 return false;
72 unsigned MinContinueColumn =
73 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
74 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
75 isLineComment(*Previous) &&
76 FormatTok.OriginalColumn >= MinContinueColumn;
77 }
78
79 class ScopedMacroState : public FormatTokenSource {
80 public:
ScopedMacroState(UnwrappedLine & Line,FormatTokenSource * & TokenSource,FormatToken * & ResetToken)81 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
82 FormatToken *&ResetToken)
83 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
84 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
85 Token(nullptr), PreviousToken(nullptr) {
86 FakeEOF.Tok.startToken();
87 FakeEOF.Tok.setKind(tok::eof);
88 TokenSource = this;
89 Line.Level = 0;
90 Line.InPPDirective = true;
91 }
92
~ScopedMacroState()93 ~ScopedMacroState() override {
94 TokenSource = PreviousTokenSource;
95 ResetToken = Token;
96 Line.InPPDirective = false;
97 Line.Level = PreviousLineLevel;
98 }
99
getNextToken()100 FormatToken *getNextToken() override {
101 // The \c UnwrappedLineParser guards against this by never calling
102 // \c getNextToken() after it has encountered the first eof token.
103 assert(!eof());
104 PreviousToken = Token;
105 Token = PreviousTokenSource->getNextToken();
106 if (eof())
107 return &FakeEOF;
108 return Token;
109 }
110
getPosition()111 unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
112
setPosition(unsigned Position)113 FormatToken *setPosition(unsigned Position) override {
114 PreviousToken = nullptr;
115 Token = PreviousTokenSource->setPosition(Position);
116 return Token;
117 }
118
119 private:
eof()120 bool eof() {
121 return Token && Token->HasUnescapedNewline &&
122 !continuesLineComment(*Token, PreviousToken,
123 /*MinColumnToken=*/PreviousToken);
124 }
125
126 FormatToken FakeEOF;
127 UnwrappedLine &Line;
128 FormatTokenSource *&TokenSource;
129 FormatToken *&ResetToken;
130 unsigned PreviousLineLevel;
131 FormatTokenSource *PreviousTokenSource;
132
133 FormatToken *Token;
134 FormatToken *PreviousToken;
135 };
136
137 } // end anonymous namespace
138
139 class ScopedLineState {
140 public:
ScopedLineState(UnwrappedLineParser & Parser,bool SwitchToPreprocessorLines=false)141 ScopedLineState(UnwrappedLineParser &Parser,
142 bool SwitchToPreprocessorLines = false)
143 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
144 if (SwitchToPreprocessorLines)
145 Parser.CurrentLines = &Parser.PreprocessorDirectives;
146 else if (!Parser.Line->Tokens.empty())
147 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
148 PreBlockLine = std::move(Parser.Line);
149 Parser.Line = std::make_unique<UnwrappedLine>();
150 Parser.Line->Level = PreBlockLine->Level;
151 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
152 }
153
~ScopedLineState()154 ~ScopedLineState() {
155 if (!Parser.Line->Tokens.empty()) {
156 Parser.addUnwrappedLine();
157 }
158 assert(Parser.Line->Tokens.empty());
159 Parser.Line = std::move(PreBlockLine);
160 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
161 Parser.MustBreakBeforeNextToken = true;
162 Parser.CurrentLines = OriginalLines;
163 }
164
165 private:
166 UnwrappedLineParser &Parser;
167
168 std::unique_ptr<UnwrappedLine> PreBlockLine;
169 SmallVectorImpl<UnwrappedLine> *OriginalLines;
170 };
171
172 class CompoundStatementIndenter {
173 public:
CompoundStatementIndenter(UnwrappedLineParser * Parser,const FormatStyle & Style,unsigned & LineLevel)174 CompoundStatementIndenter(UnwrappedLineParser *Parser,
175 const FormatStyle &Style, unsigned &LineLevel)
176 : CompoundStatementIndenter(Parser, LineLevel,
177 Style.BraceWrapping.AfterControlStatement,
178 Style.BraceWrapping.IndentBraces) {}
CompoundStatementIndenter(UnwrappedLineParser * Parser,unsigned & LineLevel,bool WrapBrace,bool IndentBrace)179 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
180 bool WrapBrace, bool IndentBrace)
181 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
182 if (WrapBrace)
183 Parser->addUnwrappedLine();
184 if (IndentBrace)
185 ++LineLevel;
186 }
~CompoundStatementIndenter()187 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
188
189 private:
190 unsigned &LineLevel;
191 unsigned OldLineLevel;
192 };
193
194 namespace {
195
196 class IndexedTokenSource : public FormatTokenSource {
197 public:
IndexedTokenSource(ArrayRef<FormatToken * > Tokens)198 IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
199 : Tokens(Tokens), Position(-1) {}
200
getNextToken()201 FormatToken *getNextToken() override {
202 ++Position;
203 return Tokens[Position];
204 }
205
getPosition()206 unsigned getPosition() override {
207 assert(Position >= 0);
208 return Position;
209 }
210
setPosition(unsigned P)211 FormatToken *setPosition(unsigned P) override {
212 Position = P;
213 return Tokens[Position];
214 }
215
reset()216 void reset() { Position = -1; }
217
218 private:
219 ArrayRef<FormatToken *> Tokens;
220 int Position;
221 };
222
223 } // end anonymous namespace
224
UnwrappedLineParser(const FormatStyle & Style,const AdditionalKeywords & Keywords,unsigned FirstStartColumn,ArrayRef<FormatToken * > Tokens,UnwrappedLineConsumer & Callback)225 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
226 const AdditionalKeywords &Keywords,
227 unsigned FirstStartColumn,
228 ArrayRef<FormatToken *> Tokens,
229 UnwrappedLineConsumer &Callback)
230 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
231 CurrentLines(&Lines), Style(Style), Keywords(Keywords),
232 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
233 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
234 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
235 ? IG_Rejected
236 : IG_Inited),
237 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
238
reset()239 void UnwrappedLineParser::reset() {
240 PPBranchLevel = -1;
241 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
242 ? IG_Rejected
243 : IG_Inited;
244 IncludeGuardToken = nullptr;
245 Line.reset(new UnwrappedLine);
246 CommentsBeforeNextToken.clear();
247 FormatTok = nullptr;
248 MustBreakBeforeNextToken = false;
249 PreprocessorDirectives.clear();
250 CurrentLines = &Lines;
251 DeclarationScopeStack.clear();
252 PPStack.clear();
253 Line->FirstStartColumn = FirstStartColumn;
254 }
255
parse()256 void UnwrappedLineParser::parse() {
257 IndexedTokenSource TokenSource(AllTokens);
258 Line->FirstStartColumn = FirstStartColumn;
259 do {
260 LLVM_DEBUG(llvm::dbgs() << "----\n");
261 reset();
262 Tokens = &TokenSource;
263 TokenSource.reset();
264
265 readToken();
266 parseFile();
267
268 // If we found an include guard then all preprocessor directives (other than
269 // the guard) are over-indented by one.
270 if (IncludeGuard == IG_Found)
271 for (auto &Line : Lines)
272 if (Line.InPPDirective && Line.Level > 0)
273 --Line.Level;
274
275 // Create line with eof token.
276 pushToken(FormatTok);
277 addUnwrappedLine();
278
279 for (SmallVectorImpl<UnwrappedLine>::iterator I = Lines.begin(),
280 E = Lines.end();
281 I != E; ++I) {
282 Callback.consumeUnwrappedLine(*I);
283 }
284 Callback.finishRun();
285 Lines.clear();
286 while (!PPLevelBranchIndex.empty() &&
287 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
288 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
289 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
290 }
291 if (!PPLevelBranchIndex.empty()) {
292 ++PPLevelBranchIndex.back();
293 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
294 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
295 }
296 } while (!PPLevelBranchIndex.empty());
297 }
298
parseFile()299 void UnwrappedLineParser::parseFile() {
300 // The top-level context in a file always has declarations, except for pre-
301 // processor directives and JavaScript files.
302 bool MustBeDeclaration =
303 !Line->InPPDirective && Style.Language != FormatStyle::LK_JavaScript;
304 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
305 MustBeDeclaration);
306 if (Style.Language == FormatStyle::LK_TextProto)
307 parseBracedList();
308 else
309 parseLevel(/*HasOpeningBrace=*/false);
310 // Make sure to format the remaining tokens.
311 //
312 // LK_TextProto is special since its top-level is parsed as the body of a
313 // braced list, which does not necessarily have natural line separators such
314 // as a semicolon. Comments after the last entry that have been determined to
315 // not belong to that line, as in:
316 // key: value
317 // // endfile comment
318 // do not have a chance to be put on a line of their own until this point.
319 // Here we add this newline before end-of-file comments.
320 if (Style.Language == FormatStyle::LK_TextProto &&
321 !CommentsBeforeNextToken.empty())
322 addUnwrappedLine();
323 flushComments(true);
324 addUnwrappedLine();
325 }
326
parseCSharpGenericTypeConstraint()327 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
328 do {
329 switch (FormatTok->Tok.getKind()) {
330 case tok::l_brace:
331 return;
332 default:
333 if (FormatTok->is(Keywords.kw_where)) {
334 addUnwrappedLine();
335 nextToken();
336 parseCSharpGenericTypeConstraint();
337 break;
338 }
339 nextToken();
340 break;
341 }
342 } while (!eof());
343 }
344
parseCSharpAttribute()345 void UnwrappedLineParser::parseCSharpAttribute() {
346 int UnpairedSquareBrackets = 1;
347 do {
348 switch (FormatTok->Tok.getKind()) {
349 case tok::r_square:
350 nextToken();
351 --UnpairedSquareBrackets;
352 if (UnpairedSquareBrackets == 0) {
353 addUnwrappedLine();
354 return;
355 }
356 break;
357 case tok::l_square:
358 ++UnpairedSquareBrackets;
359 nextToken();
360 break;
361 default:
362 nextToken();
363 break;
364 }
365 } while (!eof());
366 }
367
parseLevel(bool HasOpeningBrace)368 void UnwrappedLineParser::parseLevel(bool HasOpeningBrace) {
369 bool SwitchLabelEncountered = false;
370 do {
371 tok::TokenKind kind = FormatTok->Tok.getKind();
372 if (FormatTok->getType() == TT_MacroBlockBegin) {
373 kind = tok::l_brace;
374 } else if (FormatTok->getType() == TT_MacroBlockEnd) {
375 kind = tok::r_brace;
376 }
377
378 switch (kind) {
379 case tok::comment:
380 nextToken();
381 addUnwrappedLine();
382 break;
383 case tok::l_brace:
384 // FIXME: Add parameter whether this can happen - if this happens, we must
385 // be in a non-declaration context.
386 if (!FormatTok->is(TT_MacroBlockBegin) && tryToParseBracedList())
387 continue;
388 parseBlock(/*MustBeDeclaration=*/false);
389 addUnwrappedLine();
390 break;
391 case tok::r_brace:
392 if (HasOpeningBrace)
393 return;
394 nextToken();
395 addUnwrappedLine();
396 break;
397 case tok::kw_default: {
398 unsigned StoredPosition = Tokens->getPosition();
399 FormatToken *Next;
400 do {
401 Next = Tokens->getNextToken();
402 } while (Next && Next->is(tok::comment));
403 FormatTok = Tokens->setPosition(StoredPosition);
404 if (Next && Next->isNot(tok::colon)) {
405 // default not followed by ':' is not a case label; treat it like
406 // an identifier.
407 parseStructuralElement();
408 break;
409 }
410 // Else, if it is 'default:', fall through to the case handling.
411 LLVM_FALLTHROUGH;
412 }
413 case tok::kw_case:
414 if (Style.Language == FormatStyle::LK_JavaScript &&
415 Line->MustBeDeclaration) {
416 // A 'case: string' style field declaration.
417 parseStructuralElement();
418 break;
419 }
420 if (!SwitchLabelEncountered &&
421 (Style.IndentCaseLabels || (Line->InPPDirective && Line->Level == 1)))
422 ++Line->Level;
423 SwitchLabelEncountered = true;
424 parseStructuralElement();
425 break;
426 case tok::l_square:
427 if (Style.isCSharp()) {
428 nextToken();
429 parseCSharpAttribute();
430 break;
431 }
432 LLVM_FALLTHROUGH;
433 default:
434 parseStructuralElement(/*IsTopLevel=*/true);
435 break;
436 }
437 } while (!eof());
438 }
439
calculateBraceTypes(bool ExpectClassBody)440 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
441 // We'll parse forward through the tokens until we hit
442 // a closing brace or eof - note that getNextToken() will
443 // parse macros, so this will magically work inside macro
444 // definitions, too.
445 unsigned StoredPosition = Tokens->getPosition();
446 FormatToken *Tok = FormatTok;
447 const FormatToken *PrevTok = Tok->Previous;
448 // Keep a stack of positions of lbrace tokens. We will
449 // update information about whether an lbrace starts a
450 // braced init list or a different block during the loop.
451 SmallVector<FormatToken *, 8> LBraceStack;
452 assert(Tok->Tok.is(tok::l_brace));
453 do {
454 // Get next non-comment token.
455 FormatToken *NextTok;
456 unsigned ReadTokens = 0;
457 do {
458 NextTok = Tokens->getNextToken();
459 ++ReadTokens;
460 } while (NextTok->is(tok::comment));
461
462 switch (Tok->Tok.getKind()) {
463 case tok::l_brace:
464 if (Style.Language == FormatStyle::LK_JavaScript && PrevTok) {
465 if (PrevTok->isOneOf(tok::colon, tok::less))
466 // A ':' indicates this code is in a type, or a braced list
467 // following a label in an object literal ({a: {b: 1}}).
468 // A '<' could be an object used in a comparison, but that is nonsense
469 // code (can never return true), so more likely it is a generic type
470 // argument (`X<{a: string; b: number}>`).
471 // The code below could be confused by semicolons between the
472 // individual members in a type member list, which would normally
473 // trigger BK_Block. In both cases, this must be parsed as an inline
474 // braced init.
475 Tok->setBlockKind(BK_BracedInit);
476 else if (PrevTok->is(tok::r_paren))
477 // `) { }` can only occur in function or method declarations in JS.
478 Tok->setBlockKind(BK_Block);
479 } else {
480 Tok->setBlockKind(BK_Unknown);
481 }
482 LBraceStack.push_back(Tok);
483 break;
484 case tok::r_brace:
485 if (LBraceStack.empty())
486 break;
487 if (LBraceStack.back()->is(BK_Unknown)) {
488 bool ProbablyBracedList = false;
489 if (Style.Language == FormatStyle::LK_Proto) {
490 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
491 } else {
492 // Using OriginalColumn to distinguish between ObjC methods and
493 // binary operators is a bit hacky.
494 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
495 NextTok->OriginalColumn == 0;
496
497 // If there is a comma, semicolon or right paren after the closing
498 // brace, we assume this is a braced initializer list. Note that
499 // regardless how we mark inner braces here, we will overwrite the
500 // BlockKind later if we parse a braced list (where all blocks
501 // inside are by default braced lists), or when we explicitly detect
502 // blocks (for example while parsing lambdas).
503 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
504 // braced list in JS.
505 ProbablyBracedList =
506 (Style.Language == FormatStyle::LK_JavaScript &&
507 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
508 Keywords.kw_as)) ||
509 (Style.isCpp() && NextTok->is(tok::l_paren)) ||
510 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
511 tok::r_paren, tok::r_square, tok::l_brace,
512 tok::ellipsis) ||
513 (NextTok->is(tok::identifier) &&
514 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace)) ||
515 (NextTok->is(tok::semi) &&
516 (!ExpectClassBody || LBraceStack.size() != 1)) ||
517 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
518 if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
519 // We can have an array subscript after a braced init
520 // list, but C++11 attributes are expected after blocks.
521 NextTok = Tokens->getNextToken();
522 ++ReadTokens;
523 ProbablyBracedList = NextTok->isNot(tok::l_square);
524 }
525 }
526 if (ProbablyBracedList) {
527 Tok->setBlockKind(BK_BracedInit);
528 LBraceStack.back()->setBlockKind(BK_BracedInit);
529 } else {
530 Tok->setBlockKind(BK_Block);
531 LBraceStack.back()->setBlockKind(BK_Block);
532 }
533 }
534 LBraceStack.pop_back();
535 break;
536 case tok::identifier:
537 if (!Tok->is(TT_StatementMacro))
538 break;
539 LLVM_FALLTHROUGH;
540 case tok::at:
541 case tok::semi:
542 case tok::kw_if:
543 case tok::kw_while:
544 case tok::kw_for:
545 case tok::kw_switch:
546 case tok::kw_try:
547 case tok::kw___try:
548 if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
549 LBraceStack.back()->setBlockKind(BK_Block);
550 break;
551 default:
552 break;
553 }
554 PrevTok = Tok;
555 Tok = NextTok;
556 } while (Tok->Tok.isNot(tok::eof) && !LBraceStack.empty());
557
558 // Assume other blocks for all unclosed opening braces.
559 for (unsigned i = 0, e = LBraceStack.size(); i != e; ++i) {
560 if (LBraceStack[i]->is(BK_Unknown))
561 LBraceStack[i]->setBlockKind(BK_Block);
562 }
563
564 FormatTok = Tokens->setPosition(StoredPosition);
565 }
566
567 template <class T>
hash_combine(std::size_t & seed,const T & v)568 static inline void hash_combine(std::size_t &seed, const T &v) {
569 std::hash<T> hasher;
570 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
571 }
572
computePPHash() const573 size_t UnwrappedLineParser::computePPHash() const {
574 size_t h = 0;
575 for (const auto &i : PPStack) {
576 hash_combine(h, size_t(i.Kind));
577 hash_combine(h, i.Line);
578 }
579 return h;
580 }
581
parseBlock(bool MustBeDeclaration,unsigned AddLevels,bool MunchSemi,bool UnindentWhitesmithsBraces)582 void UnwrappedLineParser::parseBlock(bool MustBeDeclaration, unsigned AddLevels,
583 bool MunchSemi,
584 bool UnindentWhitesmithsBraces) {
585 assert(FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) &&
586 "'{' or macro block token expected");
587 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
588 FormatTok->setBlockKind(BK_Block);
589
590 // For Whitesmiths mode, jump to the next level prior to skipping over the
591 // braces.
592 if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
593 ++Line->Level;
594
595 size_t PPStartHash = computePPHash();
596
597 unsigned InitialLevel = Line->Level;
598 nextToken(/*LevelDifference=*/AddLevels);
599
600 if (MacroBlock && FormatTok->is(tok::l_paren))
601 parseParens();
602
603 size_t NbPreprocessorDirectives =
604 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
605 addUnwrappedLine();
606 size_t OpeningLineIndex =
607 CurrentLines->empty()
608 ? (UnwrappedLine::kInvalidIndex)
609 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
610
611 // Whitesmiths is weird here. The brace needs to be indented for the namespace
612 // block, but the block itself may not be indented depending on the style
613 // settings. This allows the format to back up one level in those cases.
614 if (UnindentWhitesmithsBraces)
615 --Line->Level;
616
617 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
618 MustBeDeclaration);
619 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
620 Line->Level += AddLevels;
621 parseLevel(/*HasOpeningBrace=*/true);
622
623 if (eof())
624 return;
625
626 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
627 : !FormatTok->is(tok::r_brace)) {
628 Line->Level = InitialLevel;
629 FormatTok->setBlockKind(BK_Block);
630 return;
631 }
632
633 size_t PPEndHash = computePPHash();
634
635 // Munch the closing brace.
636 nextToken(/*LevelDifference=*/-AddLevels);
637
638 if (MacroBlock && FormatTok->is(tok::l_paren))
639 parseParens();
640
641 if (FormatTok->is(tok::arrow)) {
642 // Following the } we can find a trailing return type arrow
643 // as part of an implicit conversion constraint.
644 nextToken();
645 parseStructuralElement();
646 }
647
648 if (MunchSemi && FormatTok->Tok.is(tok::semi))
649 nextToken();
650
651 Line->Level = InitialLevel;
652
653 if (PPStartHash == PPEndHash) {
654 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
655 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
656 // Update the opening line to add the forward reference as well
657 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
658 CurrentLines->size() - 1;
659 }
660 }
661 }
662
isGoogScope(const UnwrappedLine & Line)663 static bool isGoogScope(const UnwrappedLine &Line) {
664 // FIXME: Closure-library specific stuff should not be hard-coded but be
665 // configurable.
666 if (Line.Tokens.size() < 4)
667 return false;
668 auto I = Line.Tokens.begin();
669 if (I->Tok->TokenText != "goog")
670 return false;
671 ++I;
672 if (I->Tok->isNot(tok::period))
673 return false;
674 ++I;
675 if (I->Tok->TokenText != "scope")
676 return false;
677 ++I;
678 return I->Tok->is(tok::l_paren);
679 }
680
isIIFE(const UnwrappedLine & Line,const AdditionalKeywords & Keywords)681 static bool isIIFE(const UnwrappedLine &Line,
682 const AdditionalKeywords &Keywords) {
683 // Look for the start of an immediately invoked anonymous function.
684 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
685 // This is commonly done in JavaScript to create a new, anonymous scope.
686 // Example: (function() { ... })()
687 if (Line.Tokens.size() < 3)
688 return false;
689 auto I = Line.Tokens.begin();
690 if (I->Tok->isNot(tok::l_paren))
691 return false;
692 ++I;
693 if (I->Tok->isNot(Keywords.kw_function))
694 return false;
695 ++I;
696 return I->Tok->is(tok::l_paren);
697 }
698
ShouldBreakBeforeBrace(const FormatStyle & Style,const FormatToken & InitialToken)699 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
700 const FormatToken &InitialToken) {
701 if (InitialToken.isOneOf(tok::kw_namespace, TT_NamespaceMacro))
702 return Style.BraceWrapping.AfterNamespace;
703 if (InitialToken.is(tok::kw_class))
704 return Style.BraceWrapping.AfterClass;
705 if (InitialToken.is(tok::kw_union))
706 return Style.BraceWrapping.AfterUnion;
707 if (InitialToken.is(tok::kw_struct))
708 return Style.BraceWrapping.AfterStruct;
709 return false;
710 }
711
parseChildBlock()712 void UnwrappedLineParser::parseChildBlock() {
713 FormatTok->setBlockKind(BK_Block);
714 nextToken();
715 {
716 bool SkipIndent = (Style.Language == FormatStyle::LK_JavaScript &&
717 (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
718 ScopedLineState LineState(*this);
719 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
720 /*MustBeDeclaration=*/false);
721 Line->Level += SkipIndent ? 0 : 1;
722 parseLevel(/*HasOpeningBrace=*/true);
723 flushComments(isOnNewLine(*FormatTok));
724 Line->Level -= SkipIndent ? 0 : 1;
725 }
726 nextToken();
727 }
728
parsePPDirective()729 void UnwrappedLineParser::parsePPDirective() {
730 assert(FormatTok->Tok.is(tok::hash) && "'#' expected");
731 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
732
733 nextToken();
734
735 if (!FormatTok->Tok.getIdentifierInfo()) {
736 parsePPUnknown();
737 return;
738 }
739
740 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
741 case tok::pp_define:
742 parsePPDefine();
743 return;
744 case tok::pp_if:
745 parsePPIf(/*IfDef=*/false);
746 break;
747 case tok::pp_ifdef:
748 case tok::pp_ifndef:
749 parsePPIf(/*IfDef=*/true);
750 break;
751 case tok::pp_else:
752 parsePPElse();
753 break;
754 case tok::pp_elifdef:
755 case tok::pp_elifndef:
756 case tok::pp_elif:
757 parsePPElIf();
758 break;
759 case tok::pp_endif:
760 parsePPEndIf();
761 break;
762 default:
763 parsePPUnknown();
764 break;
765 }
766 }
767
conditionalCompilationCondition(bool Unreachable)768 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
769 size_t Line = CurrentLines->size();
770 if (CurrentLines == &PreprocessorDirectives)
771 Line += Lines.size();
772
773 if (Unreachable ||
774 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable))
775 PPStack.push_back({PP_Unreachable, Line});
776 else
777 PPStack.push_back({PP_Conditional, Line});
778 }
779
conditionalCompilationStart(bool Unreachable)780 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
781 ++PPBranchLevel;
782 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
783 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
784 PPLevelBranchIndex.push_back(0);
785 PPLevelBranchCount.push_back(0);
786 }
787 PPChainBranchIndex.push(0);
788 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
789 conditionalCompilationCondition(Unreachable || Skip);
790 }
791
conditionalCompilationAlternative()792 void UnwrappedLineParser::conditionalCompilationAlternative() {
793 if (!PPStack.empty())
794 PPStack.pop_back();
795 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
796 if (!PPChainBranchIndex.empty())
797 ++PPChainBranchIndex.top();
798 conditionalCompilationCondition(
799 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
800 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
801 }
802
conditionalCompilationEnd()803 void UnwrappedLineParser::conditionalCompilationEnd() {
804 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
805 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
806 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel]) {
807 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
808 }
809 }
810 // Guard against #endif's without #if.
811 if (PPBranchLevel > -1)
812 --PPBranchLevel;
813 if (!PPChainBranchIndex.empty())
814 PPChainBranchIndex.pop();
815 if (!PPStack.empty())
816 PPStack.pop_back();
817 }
818
parsePPIf(bool IfDef)819 void UnwrappedLineParser::parsePPIf(bool IfDef) {
820 bool IfNDef = FormatTok->is(tok::pp_ifndef);
821 nextToken();
822 bool Unreachable = false;
823 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
824 Unreachable = true;
825 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
826 Unreachable = true;
827 conditionalCompilationStart(Unreachable);
828 FormatToken *IfCondition = FormatTok;
829 // If there's a #ifndef on the first line, and the only lines before it are
830 // comments, it could be an include guard.
831 bool MaybeIncludeGuard = IfNDef;
832 if (IncludeGuard == IG_Inited && MaybeIncludeGuard)
833 for (auto &Line : Lines) {
834 if (!Line.Tokens.front().Tok->is(tok::comment)) {
835 MaybeIncludeGuard = false;
836 IncludeGuard = IG_Rejected;
837 break;
838 }
839 }
840 --PPBranchLevel;
841 parsePPUnknown();
842 ++PPBranchLevel;
843 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
844 IncludeGuard = IG_IfNdefed;
845 IncludeGuardToken = IfCondition;
846 }
847 }
848
parsePPElse()849 void UnwrappedLineParser::parsePPElse() {
850 // If a potential include guard has an #else, it's not an include guard.
851 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
852 IncludeGuard = IG_Rejected;
853 conditionalCompilationAlternative();
854 if (PPBranchLevel > -1)
855 --PPBranchLevel;
856 parsePPUnknown();
857 ++PPBranchLevel;
858 }
859
parsePPElIf()860 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
861
parsePPEndIf()862 void UnwrappedLineParser::parsePPEndIf() {
863 conditionalCompilationEnd();
864 parsePPUnknown();
865 // If the #endif of a potential include guard is the last thing in the file,
866 // then we found an include guard.
867 unsigned TokenPosition = Tokens->getPosition();
868 FormatToken *PeekNext = AllTokens[TokenPosition];
869 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 &&
870 PeekNext->is(tok::eof) &&
871 Style.IndentPPDirectives != FormatStyle::PPDIS_None)
872 IncludeGuard = IG_Found;
873 }
874
parsePPDefine()875 void UnwrappedLineParser::parsePPDefine() {
876 nextToken();
877
878 if (!FormatTok->Tok.getIdentifierInfo()) {
879 IncludeGuard = IG_Rejected;
880 IncludeGuardToken = nullptr;
881 parsePPUnknown();
882 return;
883 }
884
885 if (IncludeGuard == IG_IfNdefed &&
886 IncludeGuardToken->TokenText == FormatTok->TokenText) {
887 IncludeGuard = IG_Defined;
888 IncludeGuardToken = nullptr;
889 for (auto &Line : Lines) {
890 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
891 IncludeGuard = IG_Rejected;
892 break;
893 }
894 }
895 }
896
897 nextToken();
898 if (FormatTok->Tok.getKind() == tok::l_paren &&
899 FormatTok->WhitespaceRange.getBegin() ==
900 FormatTok->WhitespaceRange.getEnd()) {
901 parseParens();
902 }
903 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
904 Line->Level += PPBranchLevel + 1;
905 addUnwrappedLine();
906 ++Line->Level;
907
908 // Errors during a preprocessor directive can only affect the layout of the
909 // preprocessor directive, and thus we ignore them. An alternative approach
910 // would be to use the same approach we use on the file level (no
911 // re-indentation if there was a structural error) within the macro
912 // definition.
913 parseFile();
914 }
915
parsePPUnknown()916 void UnwrappedLineParser::parsePPUnknown() {
917 do {
918 nextToken();
919 } while (!eof());
920 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
921 Line->Level += PPBranchLevel + 1;
922 addUnwrappedLine();
923 }
924
925 // Here we exclude certain tokens that are not usually the first token in an
926 // unwrapped line. This is used in attempt to distinguish macro calls without
927 // trailing semicolons from other constructs split to several lines.
tokenCanStartNewLine(const FormatToken & Tok)928 static bool tokenCanStartNewLine(const FormatToken &Tok) {
929 // Semicolon can be a null-statement, l_square can be a start of a macro or
930 // a C++11 attribute, but this doesn't seem to be common.
931 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
932 Tok.isNot(TT_AttributeSquare) &&
933 // Tokens that can only be used as binary operators and a part of
934 // overloaded operator names.
935 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
936 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
937 Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
938 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
939 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
940 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
941 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
942 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
943 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
944 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
945 Tok.isNot(tok::lesslessequal) &&
946 // Colon is used in labels, base class lists, initializer lists,
947 // range-based for loops, ternary operator, but should never be the
948 // first token in an unwrapped line.
949 Tok.isNot(tok::colon) &&
950 // 'noexcept' is a trailing annotation.
951 Tok.isNot(tok::kw_noexcept);
952 }
953
mustBeJSIdent(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)954 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
955 const FormatToken *FormatTok) {
956 // FIXME: This returns true for C/C++ keywords like 'struct'.
957 return FormatTok->is(tok::identifier) &&
958 (FormatTok->Tok.getIdentifierInfo() == nullptr ||
959 !FormatTok->isOneOf(
960 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
961 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
962 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
963 Keywords.kw_let, Keywords.kw_var, tok::kw_const,
964 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
965 Keywords.kw_instanceof, Keywords.kw_interface, Keywords.kw_throws,
966 Keywords.kw_from));
967 }
968
mustBeJSIdentOrValue(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)969 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
970 const FormatToken *FormatTok) {
971 return FormatTok->Tok.isLiteral() ||
972 FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
973 mustBeJSIdent(Keywords, FormatTok);
974 }
975
976 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
977 // when encountered after a value (see mustBeJSIdentOrValue).
isJSDeclOrStmt(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)978 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
979 const FormatToken *FormatTok) {
980 return FormatTok->isOneOf(
981 tok::kw_return, Keywords.kw_yield,
982 // conditionals
983 tok::kw_if, tok::kw_else,
984 // loops
985 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
986 // switch/case
987 tok::kw_switch, tok::kw_case,
988 // exceptions
989 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
990 // declaration
991 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
992 Keywords.kw_async, Keywords.kw_function,
993 // import/export
994 Keywords.kw_import, tok::kw_export);
995 }
996
997 // Checks whether a token is a type in K&R C (aka C78).
isC78Type(const FormatToken & Tok)998 static bool isC78Type(const FormatToken &Tok) {
999 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1000 tok::kw_unsigned, tok::kw_float, tok::kw_double,
1001 tok::identifier);
1002 }
1003
1004 // This function checks whether a token starts the first parameter declaration
1005 // in a K&R C (aka C78) function definition, e.g.:
1006 // int f(a, b)
1007 // short a, b;
1008 // {
1009 // return a + b;
1010 // }
isC78ParameterDecl(const FormatToken * Tok,const FormatToken * Next,const FormatToken * FuncName)1011 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1012 const FormatToken *FuncName) {
1013 assert(Tok);
1014 assert(Next);
1015 assert(FuncName);
1016
1017 if (FuncName->isNot(tok::identifier))
1018 return false;
1019
1020 const FormatToken *Prev = FuncName->Previous;
1021 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1022 return false;
1023
1024 if (!isC78Type(*Tok) &&
1025 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union))
1026 return false;
1027
1028 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1029 return false;
1030
1031 Tok = Tok->Previous;
1032 if (!Tok || Tok->isNot(tok::r_paren))
1033 return false;
1034
1035 Tok = Tok->Previous;
1036 if (!Tok || Tok->isNot(tok::identifier))
1037 return false;
1038
1039 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1040 }
1041
1042 // readTokenWithJavaScriptASI reads the next token and terminates the current
1043 // line if JavaScript Automatic Semicolon Insertion must
1044 // happen between the current token and the next token.
1045 //
1046 // This method is conservative - it cannot cover all edge cases of JavaScript,
1047 // but only aims to correctly handle certain well known cases. It *must not*
1048 // return true in speculative cases.
readTokenWithJavaScriptASI()1049 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1050 FormatToken *Previous = FormatTok;
1051 readToken();
1052 FormatToken *Next = FormatTok;
1053
1054 bool IsOnSameLine =
1055 CommentsBeforeNextToken.empty()
1056 ? Next->NewlinesBefore == 0
1057 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1058 if (IsOnSameLine)
1059 return;
1060
1061 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1062 bool PreviousStartsTemplateExpr =
1063 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1064 if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1065 // If the line contains an '@' sign, the previous token might be an
1066 // annotation, which can precede another identifier/value.
1067 bool HasAt = std::find_if(Line->Tokens.begin(), Line->Tokens.end(),
1068 [](UnwrappedLineNode &LineNode) {
1069 return LineNode.Tok->is(tok::at);
1070 }) != Line->Tokens.end();
1071 if (HasAt)
1072 return;
1073 }
1074 if (Next->is(tok::exclaim) && PreviousMustBeValue)
1075 return addUnwrappedLine();
1076 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1077 bool NextEndsTemplateExpr =
1078 Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1079 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1080 (PreviousMustBeValue ||
1081 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1082 tok::minusminus)))
1083 return addUnwrappedLine();
1084 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1085 isJSDeclOrStmt(Keywords, Next))
1086 return addUnwrappedLine();
1087 }
1088
parseStructuralElement(bool IsTopLevel)1089 void UnwrappedLineParser::parseStructuralElement(bool IsTopLevel) {
1090 assert(!FormatTok->is(tok::l_brace));
1091 if (Style.Language == FormatStyle::LK_TableGen &&
1092 FormatTok->is(tok::pp_include)) {
1093 nextToken();
1094 if (FormatTok->is(tok::string_literal))
1095 nextToken();
1096 addUnwrappedLine();
1097 return;
1098 }
1099 switch (FormatTok->Tok.getKind()) {
1100 case tok::kw_asm:
1101 nextToken();
1102 if (FormatTok->is(tok::l_brace)) {
1103 FormatTok->setType(TT_InlineASMBrace);
1104 nextToken();
1105 while (FormatTok && FormatTok->isNot(tok::eof)) {
1106 if (FormatTok->is(tok::r_brace)) {
1107 FormatTok->setType(TT_InlineASMBrace);
1108 nextToken();
1109 addUnwrappedLine();
1110 break;
1111 }
1112 FormatTok->Finalized = true;
1113 nextToken();
1114 }
1115 }
1116 break;
1117 case tok::kw_namespace:
1118 parseNamespace();
1119 return;
1120 case tok::kw_public:
1121 case tok::kw_protected:
1122 case tok::kw_private:
1123 if (Style.Language == FormatStyle::LK_Java ||
1124 Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp())
1125 nextToken();
1126 else
1127 parseAccessSpecifier();
1128 return;
1129 case tok::kw_if:
1130 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1131 // field/method declaration.
1132 break;
1133 parseIfThenElse();
1134 return;
1135 case tok::kw_for:
1136 case tok::kw_while:
1137 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1138 // field/method declaration.
1139 break;
1140 parseForOrWhileLoop();
1141 return;
1142 case tok::kw_do:
1143 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1144 // field/method declaration.
1145 break;
1146 parseDoWhile();
1147 return;
1148 case tok::kw_switch:
1149 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1150 // 'switch: string' field declaration.
1151 break;
1152 parseSwitch();
1153 return;
1154 case tok::kw_default:
1155 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1156 // 'default: string' field declaration.
1157 break;
1158 nextToken();
1159 if (FormatTok->is(tok::colon)) {
1160 parseLabel();
1161 return;
1162 }
1163 // e.g. "default void f() {}" in a Java interface.
1164 break;
1165 case tok::kw_case:
1166 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1167 // 'case: string' field declaration.
1168 break;
1169 parseCaseLabel();
1170 return;
1171 case tok::kw_try:
1172 case tok::kw___try:
1173 if (Style.Language == FormatStyle::LK_JavaScript && Line->MustBeDeclaration)
1174 // field/method declaration.
1175 break;
1176 parseTryCatch();
1177 return;
1178 case tok::kw_extern:
1179 nextToken();
1180 if (FormatTok->Tok.is(tok::string_literal)) {
1181 nextToken();
1182 if (FormatTok->Tok.is(tok::l_brace)) {
1183 if (!Style.IndentExternBlock) {
1184 if (Style.BraceWrapping.AfterExternBlock) {
1185 addUnwrappedLine();
1186 }
1187 unsigned AddLevels = Style.BraceWrapping.AfterExternBlock ? 1u : 0u;
1188 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1189 } else {
1190 unsigned AddLevels =
1191 Style.IndentExternBlock == FormatStyle::IEBS_Indent ? 1u : 0u;
1192 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1193 }
1194 addUnwrappedLine();
1195 return;
1196 }
1197 }
1198 break;
1199 case tok::kw_export:
1200 if (Style.Language == FormatStyle::LK_JavaScript) {
1201 parseJavaScriptEs6ImportExport();
1202 return;
1203 }
1204 if (!Style.isCpp())
1205 break;
1206 // Handle C++ "(inline|export) namespace".
1207 LLVM_FALLTHROUGH;
1208 case tok::kw_inline:
1209 nextToken();
1210 if (FormatTok->Tok.is(tok::kw_namespace)) {
1211 parseNamespace();
1212 return;
1213 }
1214 break;
1215 case tok::identifier:
1216 if (FormatTok->is(TT_ForEachMacro)) {
1217 parseForOrWhileLoop();
1218 return;
1219 }
1220 if (FormatTok->is(TT_MacroBlockBegin)) {
1221 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1222 /*MunchSemi=*/false);
1223 return;
1224 }
1225 if (FormatTok->is(Keywords.kw_import)) {
1226 if (Style.Language == FormatStyle::LK_JavaScript) {
1227 parseJavaScriptEs6ImportExport();
1228 return;
1229 }
1230 if (Style.Language == FormatStyle::LK_Proto) {
1231 nextToken();
1232 if (FormatTok->is(tok::kw_public))
1233 nextToken();
1234 if (!FormatTok->is(tok::string_literal))
1235 return;
1236 nextToken();
1237 if (FormatTok->is(tok::semi))
1238 nextToken();
1239 addUnwrappedLine();
1240 return;
1241 }
1242 }
1243 if (Style.isCpp() &&
1244 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1245 Keywords.kw_slots, Keywords.kw_qslots)) {
1246 nextToken();
1247 if (FormatTok->is(tok::colon)) {
1248 nextToken();
1249 addUnwrappedLine();
1250 return;
1251 }
1252 }
1253 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1254 parseStatementMacro();
1255 return;
1256 }
1257 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1258 parseNamespace();
1259 return;
1260 }
1261 // In all other cases, parse the declaration.
1262 break;
1263 default:
1264 break;
1265 }
1266 do {
1267 const FormatToken *Previous = FormatTok->Previous;
1268 switch (FormatTok->Tok.getKind()) {
1269 case tok::at:
1270 nextToken();
1271 if (FormatTok->Tok.is(tok::l_brace)) {
1272 nextToken();
1273 parseBracedList();
1274 break;
1275 } else if (Style.Language == FormatStyle::LK_Java &&
1276 FormatTok->is(Keywords.kw_interface)) {
1277 nextToken();
1278 break;
1279 }
1280 switch (FormatTok->Tok.getObjCKeywordID()) {
1281 case tok::objc_public:
1282 case tok::objc_protected:
1283 case tok::objc_package:
1284 case tok::objc_private:
1285 return parseAccessSpecifier();
1286 case tok::objc_interface:
1287 case tok::objc_implementation:
1288 return parseObjCInterfaceOrImplementation();
1289 case tok::objc_protocol:
1290 if (parseObjCProtocol())
1291 return;
1292 break;
1293 case tok::objc_end:
1294 return; // Handled by the caller.
1295 case tok::objc_optional:
1296 case tok::objc_required:
1297 nextToken();
1298 addUnwrappedLine();
1299 return;
1300 case tok::objc_autoreleasepool:
1301 nextToken();
1302 if (FormatTok->Tok.is(tok::l_brace)) {
1303 if (Style.BraceWrapping.AfterControlStatement ==
1304 FormatStyle::BWACS_Always)
1305 addUnwrappedLine();
1306 parseBlock(/*MustBeDeclaration=*/false);
1307 }
1308 addUnwrappedLine();
1309 return;
1310 case tok::objc_synchronized:
1311 nextToken();
1312 if (FormatTok->Tok.is(tok::l_paren))
1313 // Skip synchronization object
1314 parseParens();
1315 if (FormatTok->Tok.is(tok::l_brace)) {
1316 if (Style.BraceWrapping.AfterControlStatement ==
1317 FormatStyle::BWACS_Always)
1318 addUnwrappedLine();
1319 parseBlock(/*MustBeDeclaration=*/false);
1320 }
1321 addUnwrappedLine();
1322 return;
1323 case tok::objc_try:
1324 // This branch isn't strictly necessary (the kw_try case below would
1325 // do this too after the tok::at is parsed above). But be explicit.
1326 parseTryCatch();
1327 return;
1328 default:
1329 break;
1330 }
1331 break;
1332 case tok::kw_concept:
1333 parseConcept();
1334 break;
1335 case tok::kw_requires:
1336 parseRequires();
1337 break;
1338 case tok::kw_enum:
1339 // Ignore if this is part of "template <enum ...".
1340 if (Previous && Previous->is(tok::less)) {
1341 nextToken();
1342 break;
1343 }
1344
1345 // parseEnum falls through and does not yet add an unwrapped line as an
1346 // enum definition can start a structural element.
1347 if (!parseEnum())
1348 break;
1349 // This only applies for C++.
1350 if (!Style.isCpp()) {
1351 addUnwrappedLine();
1352 return;
1353 }
1354 break;
1355 case tok::kw_typedef:
1356 nextToken();
1357 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1358 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1359 Keywords.kw_CF_CLOSED_ENUM,
1360 Keywords.kw_NS_CLOSED_ENUM))
1361 parseEnum();
1362 break;
1363 case tok::kw_struct:
1364 case tok::kw_union:
1365 case tok::kw_class:
1366 if (parseStructLike()) {
1367 return;
1368 }
1369 break;
1370 case tok::period:
1371 nextToken();
1372 // In Java, classes have an implicit static member "class".
1373 if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1374 FormatTok->is(tok::kw_class))
1375 nextToken();
1376 if (Style.Language == FormatStyle::LK_JavaScript && FormatTok &&
1377 FormatTok->Tok.getIdentifierInfo())
1378 // JavaScript only has pseudo keywords, all keywords are allowed to
1379 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1380 nextToken();
1381 break;
1382 case tok::semi:
1383 nextToken();
1384 addUnwrappedLine();
1385 return;
1386 case tok::r_brace:
1387 addUnwrappedLine();
1388 return;
1389 case tok::l_paren: {
1390 parseParens();
1391 // Break the unwrapped line if a K&R C function definition has a parameter
1392 // declaration.
1393 if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1394 break;
1395 const unsigned Position = Tokens->getPosition() + 1;
1396 assert(Position < AllTokens.size());
1397 if (isC78ParameterDecl(FormatTok, AllTokens[Position], Previous)) {
1398 addUnwrappedLine();
1399 return;
1400 }
1401 break;
1402 }
1403 case tok::kw_operator:
1404 nextToken();
1405 if (FormatTok->isBinaryOperator())
1406 nextToken();
1407 break;
1408 case tok::caret:
1409 nextToken();
1410 if (FormatTok->Tok.isAnyIdentifier() ||
1411 FormatTok->isSimpleTypeSpecifier())
1412 nextToken();
1413 if (FormatTok->is(tok::l_paren))
1414 parseParens();
1415 if (FormatTok->is(tok::l_brace))
1416 parseChildBlock();
1417 break;
1418 case tok::l_brace:
1419 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1420 // A block outside of parentheses must be the last part of a
1421 // structural element.
1422 // FIXME: Figure out cases where this is not true, and add projections
1423 // for them (the one we know is missing are lambdas).
1424 if (Style.BraceWrapping.AfterFunction)
1425 addUnwrappedLine();
1426 FormatTok->setType(TT_FunctionLBrace);
1427 parseBlock(/*MustBeDeclaration=*/false);
1428 addUnwrappedLine();
1429 return;
1430 }
1431 // Otherwise this was a braced init list, and the structural
1432 // element continues.
1433 break;
1434 case tok::kw_try:
1435 if (Style.Language == FormatStyle::LK_JavaScript &&
1436 Line->MustBeDeclaration) {
1437 // field/method declaration.
1438 nextToken();
1439 break;
1440 }
1441 // We arrive here when parsing function-try blocks.
1442 if (Style.BraceWrapping.AfterFunction)
1443 addUnwrappedLine();
1444 parseTryCatch();
1445 return;
1446 case tok::identifier: {
1447 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1448 Line->MustBeDeclaration) {
1449 addUnwrappedLine();
1450 parseCSharpGenericTypeConstraint();
1451 break;
1452 }
1453 if (FormatTok->is(TT_MacroBlockEnd)) {
1454 addUnwrappedLine();
1455 return;
1456 }
1457
1458 // Function declarations (as opposed to function expressions) are parsed
1459 // on their own unwrapped line by continuing this loop. Function
1460 // expressions (functions that are not on their own line) must not create
1461 // a new unwrapped line, so they are special cased below.
1462 size_t TokenCount = Line->Tokens.size();
1463 if (Style.Language == FormatStyle::LK_JavaScript &&
1464 FormatTok->is(Keywords.kw_function) &&
1465 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1466 Keywords.kw_async)))) {
1467 tryToParseJSFunction();
1468 break;
1469 }
1470 if ((Style.Language == FormatStyle::LK_JavaScript ||
1471 Style.Language == FormatStyle::LK_Java) &&
1472 FormatTok->is(Keywords.kw_interface)) {
1473 if (Style.Language == FormatStyle::LK_JavaScript) {
1474 // In JavaScript/TypeScript, "interface" can be used as a standalone
1475 // identifier, e.g. in `var interface = 1;`. If "interface" is
1476 // followed by another identifier, it is very like to be an actual
1477 // interface declaration.
1478 unsigned StoredPosition = Tokens->getPosition();
1479 FormatToken *Next = Tokens->getNextToken();
1480 FormatTok = Tokens->setPosition(StoredPosition);
1481 if (Next && !mustBeJSIdent(Keywords, Next)) {
1482 nextToken();
1483 break;
1484 }
1485 }
1486 parseRecord();
1487 addUnwrappedLine();
1488 return;
1489 }
1490
1491 if (FormatTok->is(Keywords.kw_interface)) {
1492 if (parseStructLike()) {
1493 return;
1494 }
1495 break;
1496 }
1497
1498 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1499 parseStatementMacro();
1500 return;
1501 }
1502
1503 // See if the following token should start a new unwrapped line.
1504 StringRef Text = FormatTok->TokenText;
1505 nextToken();
1506
1507 // JS doesn't have macros, and within classes colons indicate fields, not
1508 // labels.
1509 if (Style.Language == FormatStyle::LK_JavaScript)
1510 break;
1511
1512 TokenCount = Line->Tokens.size();
1513 if (TokenCount == 1 ||
1514 (TokenCount == 2 && Line->Tokens.front().Tok->is(tok::comment))) {
1515 if (FormatTok->Tok.is(tok::colon) && !Line->MustBeDeclaration) {
1516 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1517 parseLabel(!Style.IndentGotoLabels);
1518 return;
1519 }
1520 // Recognize function-like macro usages without trailing semicolon as
1521 // well as free-standing macros like Q_OBJECT.
1522 bool FunctionLike = FormatTok->is(tok::l_paren);
1523 if (FunctionLike)
1524 parseParens();
1525
1526 bool FollowedByNewline =
1527 CommentsBeforeNextToken.empty()
1528 ? FormatTok->NewlinesBefore > 0
1529 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1530
1531 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1532 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1533 addUnwrappedLine();
1534 return;
1535 }
1536 }
1537 break;
1538 }
1539 case tok::equal:
1540 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1541 // TT_FatArrow. They always start an expression or a child block if
1542 // followed by a curly brace.
1543 if (FormatTok->is(TT_FatArrow)) {
1544 nextToken();
1545 if (FormatTok->is(tok::l_brace)) {
1546 // C# may break after => if the next character is a newline.
1547 if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) {
1548 // calling `addUnwrappedLine()` here causes odd parsing errors.
1549 FormatTok->MustBreakBefore = true;
1550 }
1551 parseChildBlock();
1552 }
1553 break;
1554 }
1555
1556 nextToken();
1557 if (FormatTok->Tok.is(tok::l_brace)) {
1558 // Block kind should probably be set to BK_BracedInit for any language.
1559 // C# needs this change to ensure that array initialisers and object
1560 // initialisers are indented the same way.
1561 if (Style.isCSharp())
1562 FormatTok->setBlockKind(BK_BracedInit);
1563 nextToken();
1564 parseBracedList();
1565 } else if (Style.Language == FormatStyle::LK_Proto &&
1566 FormatTok->Tok.is(tok::less)) {
1567 nextToken();
1568 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1569 /*ClosingBraceKind=*/tok::greater);
1570 }
1571 break;
1572 case tok::l_square:
1573 parseSquare();
1574 break;
1575 case tok::kw_new:
1576 parseNew();
1577 break;
1578 default:
1579 nextToken();
1580 break;
1581 }
1582 } while (!eof());
1583 }
1584
tryToParsePropertyAccessor()1585 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1586 assert(FormatTok->is(tok::l_brace));
1587 if (!Style.isCSharp())
1588 return false;
1589 // See if it's a property accessor.
1590 if (FormatTok->Previous->isNot(tok::identifier))
1591 return false;
1592
1593 // See if we are inside a property accessor.
1594 //
1595 // Record the current tokenPosition so that we can advance and
1596 // reset the current token. `Next` is not set yet so we need
1597 // another way to advance along the token stream.
1598 unsigned int StoredPosition = Tokens->getPosition();
1599 FormatToken *Tok = Tokens->getNextToken();
1600
1601 // A trivial property accessor is of the form:
1602 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set] }
1603 // Track these as they do not require line breaks to be introduced.
1604 bool HasGetOrSet = false;
1605 bool IsTrivialPropertyAccessor = true;
1606 while (!eof()) {
1607 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
1608 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
1609 Keywords.kw_set)) {
1610 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_set))
1611 HasGetOrSet = true;
1612 Tok = Tokens->getNextToken();
1613 continue;
1614 }
1615 if (Tok->isNot(tok::r_brace))
1616 IsTrivialPropertyAccessor = false;
1617 break;
1618 }
1619
1620 if (!HasGetOrSet) {
1621 Tokens->setPosition(StoredPosition);
1622 return false;
1623 }
1624
1625 // Try to parse the property accessor:
1626 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
1627 Tokens->setPosition(StoredPosition);
1628 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction == true)
1629 addUnwrappedLine();
1630 nextToken();
1631 do {
1632 switch (FormatTok->Tok.getKind()) {
1633 case tok::r_brace:
1634 nextToken();
1635 if (FormatTok->is(tok::equal)) {
1636 while (!eof() && FormatTok->isNot(tok::semi))
1637 nextToken();
1638 nextToken();
1639 }
1640 addUnwrappedLine();
1641 return true;
1642 case tok::l_brace:
1643 ++Line->Level;
1644 parseBlock(/*MustBeDeclaration=*/true);
1645 addUnwrappedLine();
1646 --Line->Level;
1647 break;
1648 case tok::equal:
1649 if (FormatTok->is(TT_FatArrow)) {
1650 ++Line->Level;
1651 do {
1652 nextToken();
1653 } while (!eof() && FormatTok->isNot(tok::semi));
1654 nextToken();
1655 addUnwrappedLine();
1656 --Line->Level;
1657 break;
1658 }
1659 nextToken();
1660 break;
1661 default:
1662 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_set) &&
1663 !IsTrivialPropertyAccessor) {
1664 // Non-trivial get/set needs to be on its own line.
1665 addUnwrappedLine();
1666 }
1667 nextToken();
1668 }
1669 } while (!eof());
1670
1671 // Unreachable for well-formed code (paired '{' and '}').
1672 return true;
1673 }
1674
tryToParseLambda()1675 bool UnwrappedLineParser::tryToParseLambda() {
1676 if (!Style.isCpp()) {
1677 nextToken();
1678 return false;
1679 }
1680 assert(FormatTok->is(tok::l_square));
1681 FormatToken &LSquare = *FormatTok;
1682 if (!tryToParseLambdaIntroducer())
1683 return false;
1684
1685 bool SeenArrow = false;
1686
1687 while (FormatTok->isNot(tok::l_brace)) {
1688 if (FormatTok->isSimpleTypeSpecifier()) {
1689 nextToken();
1690 continue;
1691 }
1692 switch (FormatTok->Tok.getKind()) {
1693 case tok::l_brace:
1694 break;
1695 case tok::l_paren:
1696 parseParens();
1697 break;
1698 case tok::amp:
1699 case tok::star:
1700 case tok::kw_const:
1701 case tok::comma:
1702 case tok::less:
1703 case tok::greater:
1704 case tok::identifier:
1705 case tok::numeric_constant:
1706 case tok::coloncolon:
1707 case tok::kw_class:
1708 case tok::kw_mutable:
1709 case tok::kw_noexcept:
1710 case tok::kw_template:
1711 case tok::kw_typename:
1712 nextToken();
1713 break;
1714 // Specialization of a template with an integer parameter can contain
1715 // arithmetic, logical, comparison and ternary operators.
1716 //
1717 // FIXME: This also accepts sequences of operators that are not in the scope
1718 // of a template argument list.
1719 //
1720 // In a C++ lambda a template type can only occur after an arrow. We use
1721 // this as an heuristic to distinguish between Objective-C expressions
1722 // followed by an `a->b` expression, such as:
1723 // ([obj func:arg] + a->b)
1724 // Otherwise the code below would parse as a lambda.
1725 //
1726 // FIXME: This heuristic is incorrect for C++20 generic lambdas with
1727 // explicit template lists: []<bool b = true && false>(U &&u){}
1728 case tok::plus:
1729 case tok::minus:
1730 case tok::exclaim:
1731 case tok::tilde:
1732 case tok::slash:
1733 case tok::percent:
1734 case tok::lessless:
1735 case tok::pipe:
1736 case tok::pipepipe:
1737 case tok::ampamp:
1738 case tok::caret:
1739 case tok::equalequal:
1740 case tok::exclaimequal:
1741 case tok::greaterequal:
1742 case tok::lessequal:
1743 case tok::question:
1744 case tok::colon:
1745 case tok::ellipsis:
1746 case tok::kw_true:
1747 case tok::kw_false:
1748 if (SeenArrow) {
1749 nextToken();
1750 break;
1751 }
1752 return true;
1753 case tok::arrow:
1754 // This might or might not actually be a lambda arrow (this could be an
1755 // ObjC method invocation followed by a dereferencing arrow). We might
1756 // reset this back to TT_Unknown in TokenAnnotator.
1757 FormatTok->setType(TT_LambdaArrow);
1758 SeenArrow = true;
1759 nextToken();
1760 break;
1761 default:
1762 return true;
1763 }
1764 }
1765 FormatTok->setType(TT_LambdaLBrace);
1766 LSquare.setType(TT_LambdaLSquare);
1767 parseChildBlock();
1768 return true;
1769 }
1770
tryToParseLambdaIntroducer()1771 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
1772 const FormatToken *Previous = FormatTok->Previous;
1773 if (Previous &&
1774 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
1775 tok::kw_delete, tok::l_square) ||
1776 FormatTok->isCppStructuredBinding(Style) || Previous->closesScope() ||
1777 Previous->isSimpleTypeSpecifier())) {
1778 nextToken();
1779 return false;
1780 }
1781 nextToken();
1782 if (FormatTok->is(tok::l_square)) {
1783 return false;
1784 }
1785 parseSquare(/*LambdaIntroducer=*/true);
1786 return true;
1787 }
1788
tryToParseJSFunction()1789 void UnwrappedLineParser::tryToParseJSFunction() {
1790 assert(FormatTok->is(Keywords.kw_function) ||
1791 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
1792 if (FormatTok->is(Keywords.kw_async))
1793 nextToken();
1794 // Consume "function".
1795 nextToken();
1796
1797 // Consume * (generator function). Treat it like C++'s overloaded operators.
1798 if (FormatTok->is(tok::star)) {
1799 FormatTok->setType(TT_OverloadedOperator);
1800 nextToken();
1801 }
1802
1803 // Consume function name.
1804 if (FormatTok->is(tok::identifier))
1805 nextToken();
1806
1807 if (FormatTok->isNot(tok::l_paren))
1808 return;
1809
1810 // Parse formal parameter list.
1811 parseParens();
1812
1813 if (FormatTok->is(tok::colon)) {
1814 // Parse a type definition.
1815 nextToken();
1816
1817 // Eat the type declaration. For braced inline object types, balance braces,
1818 // otherwise just parse until finding an l_brace for the function body.
1819 if (FormatTok->is(tok::l_brace))
1820 tryToParseBracedList();
1821 else
1822 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
1823 nextToken();
1824 }
1825
1826 if (FormatTok->is(tok::semi))
1827 return;
1828
1829 parseChildBlock();
1830 }
1831
tryToParseBracedList()1832 bool UnwrappedLineParser::tryToParseBracedList() {
1833 if (FormatTok->is(BK_Unknown))
1834 calculateBraceTypes();
1835 assert(FormatTok->isNot(BK_Unknown));
1836 if (FormatTok->is(BK_Block))
1837 return false;
1838 nextToken();
1839 parseBracedList();
1840 return true;
1841 }
1842
parseBracedList(bool ContinueOnSemicolons,bool IsEnum,tok::TokenKind ClosingBraceKind)1843 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
1844 bool IsEnum,
1845 tok::TokenKind ClosingBraceKind) {
1846 bool HasError = false;
1847
1848 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
1849 // replace this by using parseAssignmentExpression() inside.
1850 do {
1851 if (Style.isCSharp()) {
1852 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType
1853 // TT_FatArrow. They always start an expression or a child block if
1854 // followed by a curly brace.
1855 if (FormatTok->is(TT_FatArrow)) {
1856 nextToken();
1857 if (FormatTok->is(tok::l_brace)) {
1858 // C# may break after => if the next character is a newline.
1859 if (Style.isCSharp() && Style.BraceWrapping.AfterFunction == true) {
1860 // calling `addUnwrappedLine()` here causes odd parsing errors.
1861 FormatTok->MustBreakBefore = true;
1862 }
1863 parseChildBlock();
1864 continue;
1865 }
1866 }
1867 }
1868 if (Style.Language == FormatStyle::LK_JavaScript) {
1869 if (FormatTok->is(Keywords.kw_function) ||
1870 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
1871 tryToParseJSFunction();
1872 continue;
1873 }
1874 if (FormatTok->is(TT_FatArrow)) {
1875 nextToken();
1876 // Fat arrows can be followed by simple expressions or by child blocks
1877 // in curly braces.
1878 if (FormatTok->is(tok::l_brace)) {
1879 parseChildBlock();
1880 continue;
1881 }
1882 }
1883 if (FormatTok->is(tok::l_brace)) {
1884 // Could be a method inside of a braced list `{a() { return 1; }}`.
1885 if (tryToParseBracedList())
1886 continue;
1887 parseChildBlock();
1888 }
1889 }
1890 if (FormatTok->Tok.getKind() == ClosingBraceKind) {
1891 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1892 addUnwrappedLine();
1893 nextToken();
1894 return !HasError;
1895 }
1896 switch (FormatTok->Tok.getKind()) {
1897 case tok::caret:
1898 nextToken();
1899 if (FormatTok->is(tok::l_brace)) {
1900 parseChildBlock();
1901 }
1902 break;
1903 case tok::l_square:
1904 if (Style.isCSharp())
1905 parseSquare();
1906 else
1907 tryToParseLambda();
1908 break;
1909 case tok::l_paren:
1910 parseParens();
1911 // JavaScript can just have free standing methods and getters/setters in
1912 // object literals. Detect them by a "{" following ")".
1913 if (Style.Language == FormatStyle::LK_JavaScript) {
1914 if (FormatTok->is(tok::l_brace))
1915 parseChildBlock();
1916 break;
1917 }
1918 break;
1919 case tok::l_brace:
1920 // Assume there are no blocks inside a braced init list apart
1921 // from the ones we explicitly parse out (like lambdas).
1922 FormatTok->setBlockKind(BK_BracedInit);
1923 nextToken();
1924 parseBracedList();
1925 break;
1926 case tok::less:
1927 if (Style.Language == FormatStyle::LK_Proto) {
1928 nextToken();
1929 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1930 /*ClosingBraceKind=*/tok::greater);
1931 } else {
1932 nextToken();
1933 }
1934 break;
1935 case tok::semi:
1936 // JavaScript (or more precisely TypeScript) can have semicolons in braced
1937 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
1938 // used for error recovery if we have otherwise determined that this is
1939 // a braced list.
1940 if (Style.Language == FormatStyle::LK_JavaScript) {
1941 nextToken();
1942 break;
1943 }
1944 HasError = true;
1945 if (!ContinueOnSemicolons)
1946 return !HasError;
1947 nextToken();
1948 break;
1949 case tok::comma:
1950 nextToken();
1951 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
1952 addUnwrappedLine();
1953 break;
1954 default:
1955 nextToken();
1956 break;
1957 }
1958 } while (!eof());
1959 return false;
1960 }
1961
parseParens()1962 void UnwrappedLineParser::parseParens() {
1963 assert(FormatTok->Tok.is(tok::l_paren) && "'(' expected.");
1964 nextToken();
1965 do {
1966 switch (FormatTok->Tok.getKind()) {
1967 case tok::l_paren:
1968 parseParens();
1969 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
1970 parseChildBlock();
1971 break;
1972 case tok::r_paren:
1973 nextToken();
1974 return;
1975 case tok::r_brace:
1976 // A "}" inside parenthesis is an error if there wasn't a matching "{".
1977 return;
1978 case tok::l_square:
1979 tryToParseLambda();
1980 break;
1981 case tok::l_brace:
1982 if (!tryToParseBracedList())
1983 parseChildBlock();
1984 break;
1985 case tok::at:
1986 nextToken();
1987 if (FormatTok->Tok.is(tok::l_brace)) {
1988 nextToken();
1989 parseBracedList();
1990 }
1991 break;
1992 case tok::equal:
1993 if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
1994 parseStructuralElement();
1995 else
1996 nextToken();
1997 break;
1998 case tok::kw_class:
1999 if (Style.Language == FormatStyle::LK_JavaScript)
2000 parseRecord(/*ParseAsExpr=*/true);
2001 else
2002 nextToken();
2003 break;
2004 case tok::identifier:
2005 if (Style.Language == FormatStyle::LK_JavaScript &&
2006 (FormatTok->is(Keywords.kw_function) ||
2007 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)))
2008 tryToParseJSFunction();
2009 else
2010 nextToken();
2011 break;
2012 default:
2013 nextToken();
2014 break;
2015 }
2016 } while (!eof());
2017 }
2018
parseSquare(bool LambdaIntroducer)2019 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2020 if (!LambdaIntroducer) {
2021 assert(FormatTok->Tok.is(tok::l_square) && "'[' expected.");
2022 if (tryToParseLambda())
2023 return;
2024 }
2025 do {
2026 switch (FormatTok->Tok.getKind()) {
2027 case tok::l_paren:
2028 parseParens();
2029 break;
2030 case tok::r_square:
2031 nextToken();
2032 return;
2033 case tok::r_brace:
2034 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2035 return;
2036 case tok::l_square:
2037 parseSquare();
2038 break;
2039 case tok::l_brace: {
2040 if (!tryToParseBracedList())
2041 parseChildBlock();
2042 break;
2043 }
2044 case tok::at:
2045 nextToken();
2046 if (FormatTok->Tok.is(tok::l_brace)) {
2047 nextToken();
2048 parseBracedList();
2049 }
2050 break;
2051 default:
2052 nextToken();
2053 break;
2054 }
2055 } while (!eof());
2056 }
2057
parseIfThenElse()2058 void UnwrappedLineParser::parseIfThenElse() {
2059 assert(FormatTok->Tok.is(tok::kw_if) && "'if' expected");
2060 nextToken();
2061 if (FormatTok->Tok.isOneOf(tok::kw_constexpr, tok::identifier))
2062 nextToken();
2063 if (FormatTok->Tok.is(tok::l_paren))
2064 parseParens();
2065 // handle [[likely]] / [[unlikely]]
2066 if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute())
2067 parseSquare();
2068 bool NeedsUnwrappedLine = false;
2069 if (FormatTok->Tok.is(tok::l_brace)) {
2070 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2071 parseBlock(/*MustBeDeclaration=*/false);
2072 if (Style.BraceWrapping.BeforeElse)
2073 addUnwrappedLine();
2074 else
2075 NeedsUnwrappedLine = true;
2076 } else {
2077 addUnwrappedLine();
2078 ++Line->Level;
2079 parseStructuralElement();
2080 --Line->Level;
2081 }
2082 if (FormatTok->Tok.is(tok::kw_else)) {
2083 nextToken();
2084 // handle [[likely]] / [[unlikely]]
2085 if (FormatTok->Tok.is(tok::l_square) && tryToParseSimpleAttribute())
2086 parseSquare();
2087 if (FormatTok->Tok.is(tok::l_brace)) {
2088 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2089 parseBlock(/*MustBeDeclaration=*/false);
2090 addUnwrappedLine();
2091 } else if (FormatTok->Tok.is(tok::kw_if)) {
2092 FormatToken *Previous = AllTokens[Tokens->getPosition() - 1];
2093 bool PrecededByComment = Previous->is(tok::comment);
2094 if (PrecededByComment) {
2095 addUnwrappedLine();
2096 ++Line->Level;
2097 }
2098 parseIfThenElse();
2099 if (PrecededByComment)
2100 --Line->Level;
2101 } else {
2102 addUnwrappedLine();
2103 ++Line->Level;
2104 parseStructuralElement();
2105 if (FormatTok->is(tok::eof))
2106 addUnwrappedLine();
2107 --Line->Level;
2108 }
2109 } else if (NeedsUnwrappedLine) {
2110 addUnwrappedLine();
2111 }
2112 }
2113
parseTryCatch()2114 void UnwrappedLineParser::parseTryCatch() {
2115 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2116 nextToken();
2117 bool NeedsUnwrappedLine = false;
2118 if (FormatTok->is(tok::colon)) {
2119 // We are in a function try block, what comes is an initializer list.
2120 nextToken();
2121
2122 // In case identifiers were removed by clang-tidy, what might follow is
2123 // multiple commas in sequence - before the first identifier.
2124 while (FormatTok->is(tok::comma))
2125 nextToken();
2126
2127 while (FormatTok->is(tok::identifier)) {
2128 nextToken();
2129 if (FormatTok->is(tok::l_paren))
2130 parseParens();
2131 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2132 FormatTok->is(tok::l_brace)) {
2133 do {
2134 nextToken();
2135 } while (!FormatTok->is(tok::r_brace));
2136 nextToken();
2137 }
2138
2139 // In case identifiers were removed by clang-tidy, what might follow is
2140 // multiple commas in sequence - after the first identifier.
2141 while (FormatTok->is(tok::comma))
2142 nextToken();
2143 }
2144 }
2145 // Parse try with resource.
2146 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren)) {
2147 parseParens();
2148 }
2149 if (FormatTok->is(tok::l_brace)) {
2150 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2151 parseBlock(/*MustBeDeclaration=*/false);
2152 if (Style.BraceWrapping.BeforeCatch) {
2153 addUnwrappedLine();
2154 } else {
2155 NeedsUnwrappedLine = true;
2156 }
2157 } else if (!FormatTok->is(tok::kw_catch)) {
2158 // The C++ standard requires a compound-statement after a try.
2159 // If there's none, we try to assume there's a structuralElement
2160 // and try to continue.
2161 addUnwrappedLine();
2162 ++Line->Level;
2163 parseStructuralElement();
2164 --Line->Level;
2165 }
2166 while (1) {
2167 if (FormatTok->is(tok::at))
2168 nextToken();
2169 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2170 tok::kw___finally) ||
2171 ((Style.Language == FormatStyle::LK_Java ||
2172 Style.Language == FormatStyle::LK_JavaScript) &&
2173 FormatTok->is(Keywords.kw_finally)) ||
2174 (FormatTok->Tok.isObjCAtKeyword(tok::objc_catch) ||
2175 FormatTok->Tok.isObjCAtKeyword(tok::objc_finally))))
2176 break;
2177 nextToken();
2178 while (FormatTok->isNot(tok::l_brace)) {
2179 if (FormatTok->is(tok::l_paren)) {
2180 parseParens();
2181 continue;
2182 }
2183 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof))
2184 return;
2185 nextToken();
2186 }
2187 NeedsUnwrappedLine = false;
2188 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2189 parseBlock(/*MustBeDeclaration=*/false);
2190 if (Style.BraceWrapping.BeforeCatch)
2191 addUnwrappedLine();
2192 else
2193 NeedsUnwrappedLine = true;
2194 }
2195 if (NeedsUnwrappedLine)
2196 addUnwrappedLine();
2197 }
2198
parseNamespace()2199 void UnwrappedLineParser::parseNamespace() {
2200 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2201 "'namespace' expected");
2202
2203 const FormatToken &InitialToken = *FormatTok;
2204 nextToken();
2205 if (InitialToken.is(TT_NamespaceMacro)) {
2206 parseParens();
2207 } else {
2208 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2209 tok::l_square)) {
2210 if (FormatTok->is(tok::l_square))
2211 parseSquare();
2212 else
2213 nextToken();
2214 }
2215 }
2216 if (FormatTok->Tok.is(tok::l_brace)) {
2217 if (ShouldBreakBeforeBrace(Style, InitialToken))
2218 addUnwrappedLine();
2219
2220 unsigned AddLevels =
2221 Style.NamespaceIndentation == FormatStyle::NI_All ||
2222 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2223 DeclarationScopeStack.size() > 1)
2224 ? 1u
2225 : 0u;
2226 bool ManageWhitesmithsBraces =
2227 AddLevels == 0u &&
2228 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2229
2230 // If we're in Whitesmiths mode, indent the brace if we're not indenting
2231 // the whole block.
2232 if (ManageWhitesmithsBraces)
2233 ++Line->Level;
2234
2235 parseBlock(/*MustBeDeclaration=*/true, AddLevels,
2236 /*MunchSemi=*/true,
2237 /*UnindentWhitesmithsBraces=*/ManageWhitesmithsBraces);
2238
2239 // Munch the semicolon after a namespace. This is more common than one would
2240 // think. Putting the semicolon into its own line is very ugly.
2241 if (FormatTok->Tok.is(tok::semi))
2242 nextToken();
2243
2244 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2245
2246 if (ManageWhitesmithsBraces)
2247 --Line->Level;
2248 }
2249 // FIXME: Add error handling.
2250 }
2251
parseNew()2252 void UnwrappedLineParser::parseNew() {
2253 assert(FormatTok->is(tok::kw_new) && "'new' expected");
2254 nextToken();
2255
2256 if (Style.isCSharp()) {
2257 do {
2258 if (FormatTok->is(tok::l_brace))
2259 parseBracedList();
2260
2261 if (FormatTok->isOneOf(tok::semi, tok::comma))
2262 return;
2263
2264 nextToken();
2265 } while (!eof());
2266 }
2267
2268 if (Style.Language != FormatStyle::LK_Java)
2269 return;
2270
2271 // In Java, we can parse everything up to the parens, which aren't optional.
2272 do {
2273 // There should not be a ;, { or } before the new's open paren.
2274 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2275 return;
2276
2277 // Consume the parens.
2278 if (FormatTok->is(tok::l_paren)) {
2279 parseParens();
2280
2281 // If there is a class body of an anonymous class, consume that as child.
2282 if (FormatTok->is(tok::l_brace))
2283 parseChildBlock();
2284 return;
2285 }
2286 nextToken();
2287 } while (!eof());
2288 }
2289
parseForOrWhileLoop()2290 void UnwrappedLineParser::parseForOrWhileLoop() {
2291 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2292 "'for', 'while' or foreach macro expected");
2293 nextToken();
2294 // JS' for await ( ...
2295 if (Style.Language == FormatStyle::LK_JavaScript &&
2296 FormatTok->is(Keywords.kw_await))
2297 nextToken();
2298 if (FormatTok->Tok.is(tok::l_paren))
2299 parseParens();
2300 if (FormatTok->Tok.is(tok::l_brace)) {
2301 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2302 parseBlock(/*MustBeDeclaration=*/false);
2303 addUnwrappedLine();
2304 } else {
2305 addUnwrappedLine();
2306 ++Line->Level;
2307 parseStructuralElement();
2308 --Line->Level;
2309 }
2310 }
2311
parseDoWhile()2312 void UnwrappedLineParser::parseDoWhile() {
2313 assert(FormatTok->Tok.is(tok::kw_do) && "'do' expected");
2314 nextToken();
2315 if (FormatTok->Tok.is(tok::l_brace)) {
2316 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2317 parseBlock(/*MustBeDeclaration=*/false);
2318 if (Style.BraceWrapping.BeforeWhile)
2319 addUnwrappedLine();
2320 } else {
2321 addUnwrappedLine();
2322 ++Line->Level;
2323 parseStructuralElement();
2324 --Line->Level;
2325 }
2326
2327 // FIXME: Add error handling.
2328 if (!FormatTok->Tok.is(tok::kw_while)) {
2329 addUnwrappedLine();
2330 return;
2331 }
2332
2333 // If in Whitesmiths mode, the line with the while() needs to be indented
2334 // to the same level as the block.
2335 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2336 ++Line->Level;
2337
2338 nextToken();
2339 parseStructuralElement();
2340 }
2341
parseLabel(bool LeftAlignLabel)2342 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2343 nextToken();
2344 unsigned OldLineLevel = Line->Level;
2345 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2346 --Line->Level;
2347 if (LeftAlignLabel)
2348 Line->Level = 0;
2349
2350 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2351 FormatTok->Tok.is(tok::l_brace)) {
2352
2353 CompoundStatementIndenter Indenter(this, Line->Level,
2354 Style.BraceWrapping.AfterCaseLabel,
2355 Style.BraceWrapping.IndentBraces);
2356 parseBlock(/*MustBeDeclaration=*/false);
2357 if (FormatTok->Tok.is(tok::kw_break)) {
2358 if (Style.BraceWrapping.AfterControlStatement ==
2359 FormatStyle::BWACS_Always) {
2360 addUnwrappedLine();
2361 if (!Style.IndentCaseBlocks &&
2362 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2363 Line->Level++;
2364 }
2365 }
2366 parseStructuralElement();
2367 }
2368 addUnwrappedLine();
2369 } else {
2370 if (FormatTok->is(tok::semi))
2371 nextToken();
2372 addUnwrappedLine();
2373 }
2374 Line->Level = OldLineLevel;
2375 if (FormatTok->isNot(tok::l_brace)) {
2376 parseStructuralElement();
2377 addUnwrappedLine();
2378 }
2379 }
2380
parseCaseLabel()2381 void UnwrappedLineParser::parseCaseLabel() {
2382 assert(FormatTok->Tok.is(tok::kw_case) && "'case' expected");
2383
2384 // FIXME: fix handling of complex expressions here.
2385 do {
2386 nextToken();
2387 } while (!eof() && !FormatTok->Tok.is(tok::colon));
2388 parseLabel();
2389 }
2390
parseSwitch()2391 void UnwrappedLineParser::parseSwitch() {
2392 assert(FormatTok->Tok.is(tok::kw_switch) && "'switch' expected");
2393 nextToken();
2394 if (FormatTok->Tok.is(tok::l_paren))
2395 parseParens();
2396 if (FormatTok->Tok.is(tok::l_brace)) {
2397 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2398 parseBlock(/*MustBeDeclaration=*/false);
2399 addUnwrappedLine();
2400 } else {
2401 addUnwrappedLine();
2402 ++Line->Level;
2403 parseStructuralElement();
2404 --Line->Level;
2405 }
2406 }
2407
parseAccessSpecifier()2408 void UnwrappedLineParser::parseAccessSpecifier() {
2409 nextToken();
2410 // Understand Qt's slots.
2411 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
2412 nextToken();
2413 // Otherwise, we don't know what it is, and we'd better keep the next token.
2414 if (FormatTok->Tok.is(tok::colon))
2415 nextToken();
2416 addUnwrappedLine();
2417 }
2418
parseConcept()2419 void UnwrappedLineParser::parseConcept() {
2420 assert(FormatTok->Tok.is(tok::kw_concept) && "'concept' expected");
2421 nextToken();
2422 if (!FormatTok->Tok.is(tok::identifier))
2423 return;
2424 nextToken();
2425 if (!FormatTok->Tok.is(tok::equal))
2426 return;
2427 nextToken();
2428 if (FormatTok->Tok.is(tok::kw_requires)) {
2429 nextToken();
2430 parseRequiresExpression(Line->Level);
2431 } else {
2432 parseConstraintExpression(Line->Level);
2433 }
2434 }
2435
parseRequiresExpression(unsigned int OriginalLevel)2436 void UnwrappedLineParser::parseRequiresExpression(unsigned int OriginalLevel) {
2437 // requires (R range)
2438 if (FormatTok->Tok.is(tok::l_paren)) {
2439 parseParens();
2440 if (Style.IndentRequires && OriginalLevel != Line->Level) {
2441 addUnwrappedLine();
2442 --Line->Level;
2443 }
2444 }
2445
2446 if (FormatTok->Tok.is(tok::l_brace)) {
2447 if (Style.BraceWrapping.AfterFunction)
2448 addUnwrappedLine();
2449 FormatTok->setType(TT_FunctionLBrace);
2450 parseBlock(/*MustBeDeclaration=*/false);
2451 addUnwrappedLine();
2452 } else {
2453 parseConstraintExpression(OriginalLevel);
2454 }
2455 }
2456
parseConstraintExpression(unsigned int OriginalLevel)2457 void UnwrappedLineParser::parseConstraintExpression(
2458 unsigned int OriginalLevel) {
2459 // requires Id<T> && Id<T> || Id<T>
2460 while (
2461 FormatTok->isOneOf(tok::identifier, tok::kw_requires, tok::coloncolon)) {
2462 nextToken();
2463 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::less,
2464 tok::greater, tok::comma, tok::ellipsis)) {
2465 if (FormatTok->Tok.is(tok::less)) {
2466 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2467 /*ClosingBraceKind=*/tok::greater);
2468 continue;
2469 }
2470 nextToken();
2471 }
2472 if (FormatTok->Tok.is(tok::kw_requires)) {
2473 parseRequiresExpression(OriginalLevel);
2474 }
2475 if (FormatTok->Tok.is(tok::less)) {
2476 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2477 /*ClosingBraceKind=*/tok::greater);
2478 }
2479
2480 if (FormatTok->Tok.is(tok::l_paren)) {
2481 parseParens();
2482 }
2483 if (FormatTok->Tok.is(tok::l_brace)) {
2484 if (Style.BraceWrapping.AfterFunction)
2485 addUnwrappedLine();
2486 FormatTok->setType(TT_FunctionLBrace);
2487 parseBlock(/*MustBeDeclaration=*/false);
2488 }
2489 if (FormatTok->Tok.is(tok::semi)) {
2490 // Eat any trailing semi.
2491 nextToken();
2492 addUnwrappedLine();
2493 }
2494 if (FormatTok->Tok.is(tok::colon)) {
2495 return;
2496 }
2497 if (!FormatTok->Tok.isOneOf(tok::ampamp, tok::pipepipe)) {
2498 if (FormatTok->Previous &&
2499 !FormatTok->Previous->isOneOf(tok::identifier, tok::kw_requires,
2500 tok::coloncolon)) {
2501 addUnwrappedLine();
2502 }
2503 if (Style.IndentRequires && OriginalLevel != Line->Level) {
2504 --Line->Level;
2505 }
2506 break;
2507 } else {
2508 FormatTok->setType(TT_ConstraintJunctions);
2509 }
2510
2511 nextToken();
2512 }
2513 }
2514
parseRequires()2515 void UnwrappedLineParser::parseRequires() {
2516 assert(FormatTok->Tok.is(tok::kw_requires) && "'requires' expected");
2517
2518 unsigned OriginalLevel = Line->Level;
2519 if (FormatTok->Previous && FormatTok->Previous->is(tok::greater)) {
2520 addUnwrappedLine();
2521 if (Style.IndentRequires) {
2522 Line->Level++;
2523 }
2524 }
2525 nextToken();
2526
2527 parseRequiresExpression(OriginalLevel);
2528 }
2529
parseEnum()2530 bool UnwrappedLineParser::parseEnum() {
2531 // Won't be 'enum' for NS_ENUMs.
2532 if (FormatTok->Tok.is(tok::kw_enum))
2533 nextToken();
2534
2535 // In TypeScript, "enum" can also be used as property name, e.g. in interface
2536 // declarations. An "enum" keyword followed by a colon would be a syntax
2537 // error and thus assume it is just an identifier.
2538 if (Style.Language == FormatStyle::LK_JavaScript &&
2539 FormatTok->isOneOf(tok::colon, tok::question))
2540 return false;
2541
2542 // In protobuf, "enum" can be used as a field name.
2543 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
2544 return false;
2545
2546 // Eat up enum class ...
2547 if (FormatTok->Tok.is(tok::kw_class) || FormatTok->Tok.is(tok::kw_struct))
2548 nextToken();
2549
2550 while (FormatTok->Tok.getIdentifierInfo() ||
2551 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
2552 tok::greater, tok::comma, tok::question)) {
2553 nextToken();
2554 // We can have macros or attributes in between 'enum' and the enum name.
2555 if (FormatTok->is(tok::l_paren))
2556 parseParens();
2557 if (FormatTok->is(tok::identifier)) {
2558 nextToken();
2559 // If there are two identifiers in a row, this is likely an elaborate
2560 // return type. In Java, this can be "implements", etc.
2561 if (Style.isCpp() && FormatTok->is(tok::identifier))
2562 return false;
2563 }
2564 }
2565
2566 // Just a declaration or something is wrong.
2567 if (FormatTok->isNot(tok::l_brace))
2568 return true;
2569 FormatTok->setBlockKind(BK_Block);
2570
2571 if (Style.Language == FormatStyle::LK_Java) {
2572 // Java enums are different.
2573 parseJavaEnumBody();
2574 return true;
2575 }
2576 if (Style.Language == FormatStyle::LK_Proto) {
2577 parseBlock(/*MustBeDeclaration=*/true);
2578 return true;
2579 }
2580
2581 if (!Style.AllowShortEnumsOnASingleLine)
2582 addUnwrappedLine();
2583 // Parse enum body.
2584 nextToken();
2585 if (!Style.AllowShortEnumsOnASingleLine) {
2586 addUnwrappedLine();
2587 Line->Level += 1;
2588 }
2589 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
2590 /*IsEnum=*/true);
2591 if (!Style.AllowShortEnumsOnASingleLine)
2592 Line->Level -= 1;
2593 if (HasError) {
2594 if (FormatTok->is(tok::semi))
2595 nextToken();
2596 addUnwrappedLine();
2597 }
2598 return true;
2599
2600 // There is no addUnwrappedLine() here so that we fall through to parsing a
2601 // structural element afterwards. Thus, in "enum A {} n, m;",
2602 // "} n, m;" will end up in one unwrapped line.
2603 }
2604
parseStructLike()2605 bool UnwrappedLineParser::parseStructLike() {
2606 // parseRecord falls through and does not yet add an unwrapped line as a
2607 // record declaration or definition can start a structural element.
2608 parseRecord();
2609 // This does not apply to Java, JavaScript and C#.
2610 if (Style.Language == FormatStyle::LK_Java ||
2611 Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp()) {
2612 if (FormatTok->is(tok::semi))
2613 nextToken();
2614 addUnwrappedLine();
2615 return true;
2616 }
2617 return false;
2618 }
2619
2620 namespace {
2621 // A class used to set and restore the Token position when peeking
2622 // ahead in the token source.
2623 class ScopedTokenPosition {
2624 unsigned StoredPosition;
2625 FormatTokenSource *Tokens;
2626
2627 public:
ScopedTokenPosition(FormatTokenSource * Tokens)2628 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
2629 assert(Tokens && "Tokens expected to not be null");
2630 StoredPosition = Tokens->getPosition();
2631 }
2632
~ScopedTokenPosition()2633 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
2634 };
2635 } // namespace
2636
2637 // Look to see if we have [[ by looking ahead, if
2638 // its not then rewind to the original position.
tryToParseSimpleAttribute()2639 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
2640 ScopedTokenPosition AutoPosition(Tokens);
2641 FormatToken *Tok = Tokens->getNextToken();
2642 // We already read the first [ check for the second.
2643 if (Tok && !Tok->is(tok::l_square)) {
2644 return false;
2645 }
2646 // Double check that the attribute is just something
2647 // fairly simple.
2648 while (Tok) {
2649 if (Tok->is(tok::r_square)) {
2650 break;
2651 }
2652 Tok = Tokens->getNextToken();
2653 }
2654 Tok = Tokens->getNextToken();
2655 if (Tok && !Tok->is(tok::r_square)) {
2656 return false;
2657 }
2658 Tok = Tokens->getNextToken();
2659 if (Tok && Tok->is(tok::semi)) {
2660 return false;
2661 }
2662 return true;
2663 }
2664
parseJavaEnumBody()2665 void UnwrappedLineParser::parseJavaEnumBody() {
2666 // Determine whether the enum is simple, i.e. does not have a semicolon or
2667 // constants with class bodies. Simple enums can be formatted like braced
2668 // lists, contracted to a single line, etc.
2669 unsigned StoredPosition = Tokens->getPosition();
2670 bool IsSimple = true;
2671 FormatToken *Tok = Tokens->getNextToken();
2672 while (Tok) {
2673 if (Tok->is(tok::r_brace))
2674 break;
2675 if (Tok->isOneOf(tok::l_brace, tok::semi)) {
2676 IsSimple = false;
2677 break;
2678 }
2679 // FIXME: This will also mark enums with braces in the arguments to enum
2680 // constants as "not simple". This is probably fine in practice, though.
2681 Tok = Tokens->getNextToken();
2682 }
2683 FormatTok = Tokens->setPosition(StoredPosition);
2684
2685 if (IsSimple) {
2686 nextToken();
2687 parseBracedList();
2688 addUnwrappedLine();
2689 return;
2690 }
2691
2692 // Parse the body of a more complex enum.
2693 // First add a line for everything up to the "{".
2694 nextToken();
2695 addUnwrappedLine();
2696 ++Line->Level;
2697
2698 // Parse the enum constants.
2699 while (FormatTok) {
2700 if (FormatTok->is(tok::l_brace)) {
2701 // Parse the constant's class body.
2702 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
2703 /*MunchSemi=*/false);
2704 } else if (FormatTok->is(tok::l_paren)) {
2705 parseParens();
2706 } else if (FormatTok->is(tok::comma)) {
2707 nextToken();
2708 addUnwrappedLine();
2709 } else if (FormatTok->is(tok::semi)) {
2710 nextToken();
2711 addUnwrappedLine();
2712 break;
2713 } else if (FormatTok->is(tok::r_brace)) {
2714 addUnwrappedLine();
2715 break;
2716 } else {
2717 nextToken();
2718 }
2719 }
2720
2721 // Parse the class body after the enum's ";" if any.
2722 parseLevel(/*HasOpeningBrace=*/true);
2723 nextToken();
2724 --Line->Level;
2725 addUnwrappedLine();
2726 }
2727
parseRecord(bool ParseAsExpr)2728 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
2729 const FormatToken &InitialToken = *FormatTok;
2730 nextToken();
2731
2732 // The actual identifier can be a nested name specifier, and in macros
2733 // it is often token-pasted.
2734 // An [[attribute]] can be before the identifier.
2735 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
2736 tok::kw___attribute, tok::kw___declspec,
2737 tok::kw_alignas, tok::l_square, tok::r_square) ||
2738 ((Style.Language == FormatStyle::LK_Java ||
2739 Style.Language == FormatStyle::LK_JavaScript) &&
2740 FormatTok->isOneOf(tok::period, tok::comma))) {
2741 if (Style.Language == FormatStyle::LK_JavaScript &&
2742 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
2743 // JavaScript/TypeScript supports inline object types in
2744 // extends/implements positions:
2745 // class Foo implements {bar: number} { }
2746 nextToken();
2747 if (FormatTok->is(tok::l_brace)) {
2748 tryToParseBracedList();
2749 continue;
2750 }
2751 }
2752 bool IsNonMacroIdentifier =
2753 FormatTok->is(tok::identifier) &&
2754 FormatTok->TokenText != FormatTok->TokenText.upper();
2755 nextToken();
2756 // We can have macros or attributes in between 'class' and the class name.
2757 if (!IsNonMacroIdentifier) {
2758 if (FormatTok->Tok.is(tok::l_paren)) {
2759 parseParens();
2760 } else if (FormatTok->is(TT_AttributeSquare)) {
2761 parseSquare();
2762 // Consume the closing TT_AttributeSquare.
2763 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
2764 nextToken();
2765 }
2766 }
2767 }
2768
2769 // Note that parsing away template declarations here leads to incorrectly
2770 // accepting function declarations as record declarations.
2771 // In general, we cannot solve this problem. Consider:
2772 // class A<int> B() {}
2773 // which can be a function definition or a class definition when B() is a
2774 // macro. If we find enough real-world cases where this is a problem, we
2775 // can parse for the 'template' keyword in the beginning of the statement,
2776 // and thus rule out the record production in case there is no template
2777 // (this would still leave us with an ambiguity between template function
2778 // and class declarations).
2779 if (FormatTok->isOneOf(tok::colon, tok::less)) {
2780 while (!eof()) {
2781 if (FormatTok->is(tok::l_brace)) {
2782 calculateBraceTypes(/*ExpectClassBody=*/true);
2783 if (!tryToParseBracedList())
2784 break;
2785 }
2786 if (FormatTok->Tok.is(tok::semi))
2787 return;
2788 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
2789 addUnwrappedLine();
2790 nextToken();
2791 parseCSharpGenericTypeConstraint();
2792 break;
2793 }
2794 nextToken();
2795 }
2796 }
2797 if (FormatTok->Tok.is(tok::l_brace)) {
2798 if (ParseAsExpr) {
2799 parseChildBlock();
2800 } else {
2801 if (ShouldBreakBeforeBrace(Style, InitialToken))
2802 addUnwrappedLine();
2803
2804 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
2805 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
2806 }
2807 }
2808 // There is no addUnwrappedLine() here so that we fall through to parsing a
2809 // structural element afterwards. Thus, in "class A {} n, m;",
2810 // "} n, m;" will end up in one unwrapped line.
2811 }
2812
parseObjCMethod()2813 void UnwrappedLineParser::parseObjCMethod() {
2814 assert(FormatTok->Tok.isOneOf(tok::l_paren, tok::identifier) &&
2815 "'(' or identifier expected.");
2816 do {
2817 if (FormatTok->Tok.is(tok::semi)) {
2818 nextToken();
2819 addUnwrappedLine();
2820 return;
2821 } else if (FormatTok->Tok.is(tok::l_brace)) {
2822 if (Style.BraceWrapping.AfterFunction)
2823 addUnwrappedLine();
2824 parseBlock(/*MustBeDeclaration=*/false);
2825 addUnwrappedLine();
2826 return;
2827 } else {
2828 nextToken();
2829 }
2830 } while (!eof());
2831 }
2832
parseObjCProtocolList()2833 void UnwrappedLineParser::parseObjCProtocolList() {
2834 assert(FormatTok->Tok.is(tok::less) && "'<' expected.");
2835 do {
2836 nextToken();
2837 // Early exit in case someone forgot a close angle.
2838 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2839 FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2840 return;
2841 } while (!eof() && FormatTok->Tok.isNot(tok::greater));
2842 nextToken(); // Skip '>'.
2843 }
2844
parseObjCUntilAtEnd()2845 void UnwrappedLineParser::parseObjCUntilAtEnd() {
2846 do {
2847 if (FormatTok->Tok.isObjCAtKeyword(tok::objc_end)) {
2848 nextToken();
2849 addUnwrappedLine();
2850 break;
2851 }
2852 if (FormatTok->is(tok::l_brace)) {
2853 parseBlock(/*MustBeDeclaration=*/false);
2854 // In ObjC interfaces, nothing should be following the "}".
2855 addUnwrappedLine();
2856 } else if (FormatTok->is(tok::r_brace)) {
2857 // Ignore stray "}". parseStructuralElement doesn't consume them.
2858 nextToken();
2859 addUnwrappedLine();
2860 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
2861 nextToken();
2862 parseObjCMethod();
2863 } else {
2864 parseStructuralElement();
2865 }
2866 } while (!eof());
2867 }
2868
parseObjCInterfaceOrImplementation()2869 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
2870 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
2871 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
2872 nextToken();
2873 nextToken(); // interface name
2874
2875 // @interface can be followed by a lightweight generic
2876 // specialization list, then either a base class or a category.
2877 if (FormatTok->Tok.is(tok::less)) {
2878 parseObjCLightweightGenerics();
2879 }
2880 if (FormatTok->Tok.is(tok::colon)) {
2881 nextToken();
2882 nextToken(); // base class name
2883 // The base class can also have lightweight generics applied to it.
2884 if (FormatTok->Tok.is(tok::less)) {
2885 parseObjCLightweightGenerics();
2886 }
2887 } else if (FormatTok->Tok.is(tok::l_paren))
2888 // Skip category, if present.
2889 parseParens();
2890
2891 if (FormatTok->Tok.is(tok::less))
2892 parseObjCProtocolList();
2893
2894 if (FormatTok->Tok.is(tok::l_brace)) {
2895 if (Style.BraceWrapping.AfterObjCDeclaration)
2896 addUnwrappedLine();
2897 parseBlock(/*MustBeDeclaration=*/true);
2898 }
2899
2900 // With instance variables, this puts '}' on its own line. Without instance
2901 // variables, this ends the @interface line.
2902 addUnwrappedLine();
2903
2904 parseObjCUntilAtEnd();
2905 }
2906
parseObjCLightweightGenerics()2907 void UnwrappedLineParser::parseObjCLightweightGenerics() {
2908 assert(FormatTok->Tok.is(tok::less));
2909 // Unlike protocol lists, generic parameterizations support
2910 // nested angles:
2911 //
2912 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
2913 // NSObject <NSCopying, NSSecureCoding>
2914 //
2915 // so we need to count how many open angles we have left.
2916 unsigned NumOpenAngles = 1;
2917 do {
2918 nextToken();
2919 // Early exit in case someone forgot a close angle.
2920 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
2921 FormatTok->Tok.isObjCAtKeyword(tok::objc_end))
2922 break;
2923 if (FormatTok->Tok.is(tok::less))
2924 ++NumOpenAngles;
2925 else if (FormatTok->Tok.is(tok::greater)) {
2926 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
2927 --NumOpenAngles;
2928 }
2929 } while (!eof() && NumOpenAngles != 0);
2930 nextToken(); // Skip '>'.
2931 }
2932
2933 // Returns true for the declaration/definition form of @protocol,
2934 // false for the expression form.
parseObjCProtocol()2935 bool UnwrappedLineParser::parseObjCProtocol() {
2936 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
2937 nextToken();
2938
2939 if (FormatTok->is(tok::l_paren))
2940 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
2941 return false;
2942
2943 // The definition/declaration form,
2944 // @protocol Foo
2945 // - (int)someMethod;
2946 // @end
2947
2948 nextToken(); // protocol name
2949
2950 if (FormatTok->Tok.is(tok::less))
2951 parseObjCProtocolList();
2952
2953 // Check for protocol declaration.
2954 if (FormatTok->Tok.is(tok::semi)) {
2955 nextToken();
2956 addUnwrappedLine();
2957 return true;
2958 }
2959
2960 addUnwrappedLine();
2961 parseObjCUntilAtEnd();
2962 return true;
2963 }
2964
parseJavaScriptEs6ImportExport()2965 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
2966 bool IsImport = FormatTok->is(Keywords.kw_import);
2967 assert(IsImport || FormatTok->is(tok::kw_export));
2968 nextToken();
2969
2970 // Consume the "default" in "export default class/function".
2971 if (FormatTok->is(tok::kw_default))
2972 nextToken();
2973
2974 // Consume "async function", "function" and "default function", so that these
2975 // get parsed as free-standing JS functions, i.e. do not require a trailing
2976 // semicolon.
2977 if (FormatTok->is(Keywords.kw_async))
2978 nextToken();
2979 if (FormatTok->is(Keywords.kw_function)) {
2980 nextToken();
2981 return;
2982 }
2983
2984 // For imports, `export *`, `export {...}`, consume the rest of the line up
2985 // to the terminating `;`. For everything else, just return and continue
2986 // parsing the structural element, i.e. the declaration or expression for
2987 // `export default`.
2988 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
2989 !FormatTok->isStringLiteral())
2990 return;
2991
2992 while (!eof()) {
2993 if (FormatTok->is(tok::semi))
2994 return;
2995 if (Line->Tokens.empty()) {
2996 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
2997 // import statement should terminate.
2998 return;
2999 }
3000 if (FormatTok->is(tok::l_brace)) {
3001 FormatTok->setBlockKind(BK_Block);
3002 nextToken();
3003 parseBracedList();
3004 } else {
3005 nextToken();
3006 }
3007 }
3008 }
3009
parseStatementMacro()3010 void UnwrappedLineParser::parseStatementMacro() {
3011 nextToken();
3012 if (FormatTok->is(tok::l_paren))
3013 parseParens();
3014 if (FormatTok->is(tok::semi))
3015 nextToken();
3016 addUnwrappedLine();
3017 }
3018
printDebugInfo(const UnwrappedLine & Line,StringRef Prefix="")3019 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3020 StringRef Prefix = "") {
3021 llvm::dbgs() << Prefix << "Line(" << Line.Level
3022 << ", FSC=" << Line.FirstStartColumn << ")"
3023 << (Line.InPPDirective ? " MACRO" : "") << ": ";
3024 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
3025 E = Line.Tokens.end();
3026 I != E; ++I) {
3027 llvm::dbgs() << I->Tok->Tok.getName() << "["
3028 << "T=" << (unsigned)I->Tok->getType()
3029 << ", OC=" << I->Tok->OriginalColumn << "] ";
3030 }
3031 for (std::list<UnwrappedLineNode>::const_iterator I = Line.Tokens.begin(),
3032 E = Line.Tokens.end();
3033 I != E; ++I) {
3034 const UnwrappedLineNode &Node = *I;
3035 for (SmallVectorImpl<UnwrappedLine>::const_iterator
3036 I = Node.Children.begin(),
3037 E = Node.Children.end();
3038 I != E; ++I) {
3039 printDebugInfo(*I, "\nChild: ");
3040 }
3041 }
3042 llvm::dbgs() << "\n";
3043 }
3044
addUnwrappedLine(LineLevel AdjustLevel)3045 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
3046 if (Line->Tokens.empty())
3047 return;
3048 LLVM_DEBUG({
3049 if (CurrentLines == &Lines)
3050 printDebugInfo(*Line);
3051 });
3052
3053 // If this line closes a block when in Whitesmiths mode, remember that
3054 // information so that the level can be decreased after the line is added.
3055 // This has to happen after the addition of the line since the line itself
3056 // needs to be indented.
3057 bool ClosesWhitesmithsBlock =
3058 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
3059 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
3060
3061 CurrentLines->push_back(std::move(*Line));
3062 Line->Tokens.clear();
3063 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
3064 Line->FirstStartColumn = 0;
3065
3066 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
3067 --Line->Level;
3068 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
3069 CurrentLines->append(
3070 std::make_move_iterator(PreprocessorDirectives.begin()),
3071 std::make_move_iterator(PreprocessorDirectives.end()));
3072 PreprocessorDirectives.clear();
3073 }
3074 // Disconnect the current token from the last token on the previous line.
3075 FormatTok->Previous = nullptr;
3076 }
3077
eof() const3078 bool UnwrappedLineParser::eof() const { return FormatTok->Tok.is(tok::eof); }
3079
isOnNewLine(const FormatToken & FormatTok)3080 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
3081 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
3082 FormatTok.NewlinesBefore > 0;
3083 }
3084
3085 // Checks if \p FormatTok is a line comment that continues the line comment
3086 // section on \p Line.
3087 static bool
continuesLineCommentSection(const FormatToken & FormatTok,const UnwrappedLine & Line,const llvm::Regex & CommentPragmasRegex)3088 continuesLineCommentSection(const FormatToken &FormatTok,
3089 const UnwrappedLine &Line,
3090 const llvm::Regex &CommentPragmasRegex) {
3091 if (Line.Tokens.empty())
3092 return false;
3093
3094 StringRef IndentContent = FormatTok.TokenText;
3095 if (FormatTok.TokenText.startswith("//") ||
3096 FormatTok.TokenText.startswith("/*"))
3097 IndentContent = FormatTok.TokenText.substr(2);
3098 if (CommentPragmasRegex.match(IndentContent))
3099 return false;
3100
3101 // If Line starts with a line comment, then FormatTok continues the comment
3102 // section if its original column is greater or equal to the original start
3103 // column of the line.
3104 //
3105 // Define the min column token of a line as follows: if a line ends in '{' or
3106 // contains a '{' followed by a line comment, then the min column token is
3107 // that '{'. Otherwise, the min column token of the line is the first token of
3108 // the line.
3109 //
3110 // If Line starts with a token other than a line comment, then FormatTok
3111 // continues the comment section if its original column is greater than the
3112 // original start column of the min column token of the line.
3113 //
3114 // For example, the second line comment continues the first in these cases:
3115 //
3116 // // first line
3117 // // second line
3118 //
3119 // and:
3120 //
3121 // // first line
3122 // // second line
3123 //
3124 // and:
3125 //
3126 // int i; // first line
3127 // // second line
3128 //
3129 // and:
3130 //
3131 // do { // first line
3132 // // second line
3133 // int i;
3134 // } while (true);
3135 //
3136 // and:
3137 //
3138 // enum {
3139 // a, // first line
3140 // // second line
3141 // b
3142 // };
3143 //
3144 // The second line comment doesn't continue the first in these cases:
3145 //
3146 // // first line
3147 // // second line
3148 //
3149 // and:
3150 //
3151 // int i; // first line
3152 // // second line
3153 //
3154 // and:
3155 //
3156 // do { // first line
3157 // // second line
3158 // int i;
3159 // } while (true);
3160 //
3161 // and:
3162 //
3163 // enum {
3164 // a, // first line
3165 // // second line
3166 // };
3167 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
3168
3169 // Scan for '{//'. If found, use the column of '{' as a min column for line
3170 // comment section continuation.
3171 const FormatToken *PreviousToken = nullptr;
3172 for (const UnwrappedLineNode &Node : Line.Tokens) {
3173 if (PreviousToken && PreviousToken->is(tok::l_brace) &&
3174 isLineComment(*Node.Tok)) {
3175 MinColumnToken = PreviousToken;
3176 break;
3177 }
3178 PreviousToken = Node.Tok;
3179
3180 // Grab the last newline preceding a token in this unwrapped line.
3181 if (Node.Tok->NewlinesBefore > 0) {
3182 MinColumnToken = Node.Tok;
3183 }
3184 }
3185 if (PreviousToken && PreviousToken->is(tok::l_brace)) {
3186 MinColumnToken = PreviousToken;
3187 }
3188
3189 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
3190 MinColumnToken);
3191 }
3192
flushComments(bool NewlineBeforeNext)3193 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
3194 bool JustComments = Line->Tokens.empty();
3195 for (SmallVectorImpl<FormatToken *>::const_iterator
3196 I = CommentsBeforeNextToken.begin(),
3197 E = CommentsBeforeNextToken.end();
3198 I != E; ++I) {
3199 // Line comments that belong to the same line comment section are put on the
3200 // same line since later we might want to reflow content between them.
3201 // Additional fine-grained breaking of line comment sections is controlled
3202 // by the class BreakableLineCommentSection in case it is desirable to keep
3203 // several line comment sections in the same unwrapped line.
3204 //
3205 // FIXME: Consider putting separate line comment sections as children to the
3206 // unwrapped line instead.
3207 (*I)->ContinuesLineCommentSection =
3208 continuesLineCommentSection(**I, *Line, CommentPragmasRegex);
3209 if (isOnNewLine(**I) && JustComments && !(*I)->ContinuesLineCommentSection)
3210 addUnwrappedLine();
3211 pushToken(*I);
3212 }
3213 if (NewlineBeforeNext && JustComments)
3214 addUnwrappedLine();
3215 CommentsBeforeNextToken.clear();
3216 }
3217
nextToken(int LevelDifference)3218 void UnwrappedLineParser::nextToken(int LevelDifference) {
3219 if (eof())
3220 return;
3221 flushComments(isOnNewLine(*FormatTok));
3222 pushToken(FormatTok);
3223 FormatToken *Previous = FormatTok;
3224 if (Style.Language != FormatStyle::LK_JavaScript)
3225 readToken(LevelDifference);
3226 else
3227 readTokenWithJavaScriptASI();
3228 FormatTok->Previous = Previous;
3229 }
3230
distributeComments(const SmallVectorImpl<FormatToken * > & Comments,const FormatToken * NextTok)3231 void UnwrappedLineParser::distributeComments(
3232 const SmallVectorImpl<FormatToken *> &Comments,
3233 const FormatToken *NextTok) {
3234 // Whether or not a line comment token continues a line is controlled by
3235 // the method continuesLineCommentSection, with the following caveat:
3236 //
3237 // Define a trail of Comments to be a nonempty proper postfix of Comments such
3238 // that each comment line from the trail is aligned with the next token, if
3239 // the next token exists. If a trail exists, the beginning of the maximal
3240 // trail is marked as a start of a new comment section.
3241 //
3242 // For example in this code:
3243 //
3244 // int a; // line about a
3245 // // line 1 about b
3246 // // line 2 about b
3247 // int b;
3248 //
3249 // the two lines about b form a maximal trail, so there are two sections, the
3250 // first one consisting of the single comment "// line about a" and the
3251 // second one consisting of the next two comments.
3252 if (Comments.empty())
3253 return;
3254 bool ShouldPushCommentsInCurrentLine = true;
3255 bool HasTrailAlignedWithNextToken = false;
3256 unsigned StartOfTrailAlignedWithNextToken = 0;
3257 if (NextTok) {
3258 // We are skipping the first element intentionally.
3259 for (unsigned i = Comments.size() - 1; i > 0; --i) {
3260 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
3261 HasTrailAlignedWithNextToken = true;
3262 StartOfTrailAlignedWithNextToken = i;
3263 }
3264 }
3265 }
3266 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
3267 FormatToken *FormatTok = Comments[i];
3268 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
3269 FormatTok->ContinuesLineCommentSection = false;
3270 } else {
3271 FormatTok->ContinuesLineCommentSection =
3272 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
3273 }
3274 if (!FormatTok->ContinuesLineCommentSection &&
3275 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
3276 ShouldPushCommentsInCurrentLine = false;
3277 }
3278 if (ShouldPushCommentsInCurrentLine) {
3279 pushToken(FormatTok);
3280 } else {
3281 CommentsBeforeNextToken.push_back(FormatTok);
3282 }
3283 }
3284 }
3285
readToken(int LevelDifference)3286 void UnwrappedLineParser::readToken(int LevelDifference) {
3287 SmallVector<FormatToken *, 1> Comments;
3288 do {
3289 FormatTok = Tokens->getNextToken();
3290 assert(FormatTok);
3291 while (!Line->InPPDirective && FormatTok->Tok.is(tok::hash) &&
3292 (FormatTok->HasUnescapedNewline || FormatTok->IsFirst)) {
3293 distributeComments(Comments, FormatTok);
3294 Comments.clear();
3295 // If there is an unfinished unwrapped line, we flush the preprocessor
3296 // directives only after that unwrapped line was finished later.
3297 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
3298 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
3299 assert((LevelDifference >= 0 ||
3300 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
3301 "LevelDifference makes Line->Level negative");
3302 Line->Level += LevelDifference;
3303 // Comments stored before the preprocessor directive need to be output
3304 // before the preprocessor directive, at the same level as the
3305 // preprocessor directive, as we consider them to apply to the directive.
3306 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
3307 PPBranchLevel > 0)
3308 Line->Level += PPBranchLevel;
3309 flushComments(isOnNewLine(*FormatTok));
3310 parsePPDirective();
3311 }
3312 while (FormatTok->getType() == TT_ConflictStart ||
3313 FormatTok->getType() == TT_ConflictEnd ||
3314 FormatTok->getType() == TT_ConflictAlternative) {
3315 if (FormatTok->getType() == TT_ConflictStart) {
3316 conditionalCompilationStart(/*Unreachable=*/false);
3317 } else if (FormatTok->getType() == TT_ConflictAlternative) {
3318 conditionalCompilationAlternative();
3319 } else if (FormatTok->getType() == TT_ConflictEnd) {
3320 conditionalCompilationEnd();
3321 }
3322 FormatTok = Tokens->getNextToken();
3323 FormatTok->MustBreakBefore = true;
3324 }
3325
3326 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
3327 !Line->InPPDirective) {
3328 continue;
3329 }
3330
3331 if (!FormatTok->Tok.is(tok::comment)) {
3332 distributeComments(Comments, FormatTok);
3333 Comments.clear();
3334 return;
3335 }
3336
3337 Comments.push_back(FormatTok);
3338 } while (!eof());
3339
3340 distributeComments(Comments, nullptr);
3341 Comments.clear();
3342 }
3343
pushToken(FormatToken * Tok)3344 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
3345 Line->Tokens.push_back(UnwrappedLineNode(Tok));
3346 if (MustBreakBeforeNextToken) {
3347 Line->Tokens.back().Tok->MustBreakBefore = true;
3348 MustBreakBeforeNextToken = false;
3349 }
3350 }
3351
3352 } // end namespace format
3353 } // end namespace clang
3354