1 //===--- UnwrappedLineParser.cpp - Format C++ code ------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the implementation of the UnwrappedLineParser,
11 /// which turns a stream of tokens into UnwrappedLines.
12 ///
13 //===----------------------------------------------------------------------===//
14
15 #include "UnwrappedLineParser.h"
16 #include "FormatToken.h"
17 #include "TokenAnnotator.h"
18 #include "clang/Basic/TokenKinds.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/Support/Debug.h"
21 #include "llvm/Support/raw_ostream.h"
22
23 #include <algorithm>
24 #include <utility>
25
26 #define DEBUG_TYPE "format-parser"
27
28 namespace clang {
29 namespace format {
30
31 class FormatTokenSource {
32 public:
~FormatTokenSource()33 virtual ~FormatTokenSource() {}
34
35 // Returns the next token in the token stream.
36 virtual FormatToken *getNextToken() = 0;
37
38 // Returns the token preceding the token returned by the last call to
39 // getNextToken() in the token stream, or nullptr if no such token exists.
40 virtual FormatToken *getPreviousToken() = 0;
41
42 // Returns the token that would be returned by the next call to
43 // getNextToken().
44 virtual FormatToken *peekNextToken() = 0;
45
46 // Returns the token that would be returned after the next N calls to
47 // getNextToken(). N needs to be greater than zero, and small enough that
48 // there are still tokens. Check for tok::eof with N-1 before calling it with
49 // N.
50 virtual FormatToken *peekNextToken(int N) = 0;
51
52 // Returns whether we are at the end of the file.
53 // This can be different from whether getNextToken() returned an eof token
54 // when the FormatTokenSource is a view on a part of the token stream.
55 virtual bool isEOF() = 0;
56
57 // Gets the current position in the token stream, to be used by setPosition().
58 virtual unsigned getPosition() = 0;
59
60 // Resets the token stream to the state it was in when getPosition() returned
61 // Position, and return the token at that position in the stream.
62 virtual FormatToken *setPosition(unsigned Position) = 0;
63 };
64
65 namespace {
66
67 class ScopedDeclarationState {
68 public:
ScopedDeclarationState(UnwrappedLine & Line,llvm::BitVector & Stack,bool MustBeDeclaration)69 ScopedDeclarationState(UnwrappedLine &Line, llvm::BitVector &Stack,
70 bool MustBeDeclaration)
71 : Line(Line), Stack(Stack) {
72 Line.MustBeDeclaration = MustBeDeclaration;
73 Stack.push_back(MustBeDeclaration);
74 }
~ScopedDeclarationState()75 ~ScopedDeclarationState() {
76 Stack.pop_back();
77 if (!Stack.empty())
78 Line.MustBeDeclaration = Stack.back();
79 else
80 Line.MustBeDeclaration = true;
81 }
82
83 private:
84 UnwrappedLine &Line;
85 llvm::BitVector &Stack;
86 };
87
isLineComment(const FormatToken & FormatTok)88 static bool isLineComment(const FormatToken &FormatTok) {
89 return FormatTok.is(tok::comment) && !FormatTok.TokenText.startswith("/*");
90 }
91
92 // Checks if \p FormatTok is a line comment that continues the line comment
93 // \p Previous. The original column of \p MinColumnToken is used to determine
94 // whether \p FormatTok is indented enough to the right to continue \p Previous.
continuesLineComment(const FormatToken & FormatTok,const FormatToken * Previous,const FormatToken * MinColumnToken)95 static bool continuesLineComment(const FormatToken &FormatTok,
96 const FormatToken *Previous,
97 const FormatToken *MinColumnToken) {
98 if (!Previous || !MinColumnToken)
99 return false;
100 unsigned MinContinueColumn =
101 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1);
102 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 &&
103 isLineComment(*Previous) &&
104 FormatTok.OriginalColumn >= MinContinueColumn;
105 }
106
107 class ScopedMacroState : public FormatTokenSource {
108 public:
ScopedMacroState(UnwrappedLine & Line,FormatTokenSource * & TokenSource,FormatToken * & ResetToken)109 ScopedMacroState(UnwrappedLine &Line, FormatTokenSource *&TokenSource,
110 FormatToken *&ResetToken)
111 : Line(Line), TokenSource(TokenSource), ResetToken(ResetToken),
112 PreviousLineLevel(Line.Level), PreviousTokenSource(TokenSource),
113 Token(nullptr), PreviousToken(nullptr) {
114 FakeEOF.Tok.startToken();
115 FakeEOF.Tok.setKind(tok::eof);
116 TokenSource = this;
117 Line.Level = 0;
118 Line.InPPDirective = true;
119 }
120
~ScopedMacroState()121 ~ScopedMacroState() override {
122 TokenSource = PreviousTokenSource;
123 ResetToken = Token;
124 Line.InPPDirective = false;
125 Line.Level = PreviousLineLevel;
126 }
127
getNextToken()128 FormatToken *getNextToken() override {
129 // The \c UnwrappedLineParser guards against this by never calling
130 // \c getNextToken() after it has encountered the first eof token.
131 assert(!eof());
132 PreviousToken = Token;
133 Token = PreviousTokenSource->getNextToken();
134 if (eof())
135 return &FakeEOF;
136 return Token;
137 }
138
getPreviousToken()139 FormatToken *getPreviousToken() override {
140 return PreviousTokenSource->getPreviousToken();
141 }
142
peekNextToken()143 FormatToken *peekNextToken() override {
144 if (eof())
145 return &FakeEOF;
146 return PreviousTokenSource->peekNextToken();
147 }
148
peekNextToken(int N)149 FormatToken *peekNextToken(int N) override {
150 assert(N > 0);
151 if (eof())
152 return &FakeEOF;
153 return PreviousTokenSource->peekNextToken(N);
154 }
155
isEOF()156 bool isEOF() override { return PreviousTokenSource->isEOF(); }
157
getPosition()158 unsigned getPosition() override { return PreviousTokenSource->getPosition(); }
159
setPosition(unsigned Position)160 FormatToken *setPosition(unsigned Position) override {
161 PreviousToken = nullptr;
162 Token = PreviousTokenSource->setPosition(Position);
163 return Token;
164 }
165
166 private:
eof()167 bool eof() {
168 return Token && Token->HasUnescapedNewline &&
169 !continuesLineComment(*Token, PreviousToken,
170 /*MinColumnToken=*/PreviousToken);
171 }
172
173 FormatToken FakeEOF;
174 UnwrappedLine &Line;
175 FormatTokenSource *&TokenSource;
176 FormatToken *&ResetToken;
177 unsigned PreviousLineLevel;
178 FormatTokenSource *PreviousTokenSource;
179
180 FormatToken *Token;
181 FormatToken *PreviousToken;
182 };
183
184 } // end anonymous namespace
185
186 class ScopedLineState {
187 public:
ScopedLineState(UnwrappedLineParser & Parser,bool SwitchToPreprocessorLines=false)188 ScopedLineState(UnwrappedLineParser &Parser,
189 bool SwitchToPreprocessorLines = false)
190 : Parser(Parser), OriginalLines(Parser.CurrentLines) {
191 if (SwitchToPreprocessorLines)
192 Parser.CurrentLines = &Parser.PreprocessorDirectives;
193 else if (!Parser.Line->Tokens.empty())
194 Parser.CurrentLines = &Parser.Line->Tokens.back().Children;
195 PreBlockLine = std::move(Parser.Line);
196 Parser.Line = std::make_unique<UnwrappedLine>();
197 Parser.Line->Level = PreBlockLine->Level;
198 Parser.Line->InPPDirective = PreBlockLine->InPPDirective;
199 }
200
~ScopedLineState()201 ~ScopedLineState() {
202 if (!Parser.Line->Tokens.empty())
203 Parser.addUnwrappedLine();
204 assert(Parser.Line->Tokens.empty());
205 Parser.Line = std::move(PreBlockLine);
206 if (Parser.CurrentLines == &Parser.PreprocessorDirectives)
207 Parser.MustBreakBeforeNextToken = true;
208 Parser.CurrentLines = OriginalLines;
209 }
210
211 private:
212 UnwrappedLineParser &Parser;
213
214 std::unique_ptr<UnwrappedLine> PreBlockLine;
215 SmallVectorImpl<UnwrappedLine> *OriginalLines;
216 };
217
218 class CompoundStatementIndenter {
219 public:
CompoundStatementIndenter(UnwrappedLineParser * Parser,const FormatStyle & Style,unsigned & LineLevel)220 CompoundStatementIndenter(UnwrappedLineParser *Parser,
221 const FormatStyle &Style, unsigned &LineLevel)
222 : CompoundStatementIndenter(Parser, LineLevel,
223 Style.BraceWrapping.AfterControlStatement,
224 Style.BraceWrapping.IndentBraces) {}
CompoundStatementIndenter(UnwrappedLineParser * Parser,unsigned & LineLevel,bool WrapBrace,bool IndentBrace)225 CompoundStatementIndenter(UnwrappedLineParser *Parser, unsigned &LineLevel,
226 bool WrapBrace, bool IndentBrace)
227 : LineLevel(LineLevel), OldLineLevel(LineLevel) {
228 if (WrapBrace)
229 Parser->addUnwrappedLine();
230 if (IndentBrace)
231 ++LineLevel;
232 }
~CompoundStatementIndenter()233 ~CompoundStatementIndenter() { LineLevel = OldLineLevel; }
234
235 private:
236 unsigned &LineLevel;
237 unsigned OldLineLevel;
238 };
239
240 namespace {
241
242 class IndexedTokenSource : public FormatTokenSource {
243 public:
IndexedTokenSource(ArrayRef<FormatToken * > Tokens)244 IndexedTokenSource(ArrayRef<FormatToken *> Tokens)
245 : Tokens(Tokens), Position(-1) {}
246
getNextToken()247 FormatToken *getNextToken() override {
248 if (Position >= 0 && Tokens[Position]->is(tok::eof)) {
249 LLVM_DEBUG({
250 llvm::dbgs() << "Next ";
251 dbgToken(Position);
252 });
253 return Tokens[Position];
254 }
255 ++Position;
256 LLVM_DEBUG({
257 llvm::dbgs() << "Next ";
258 dbgToken(Position);
259 });
260 return Tokens[Position];
261 }
262
getPreviousToken()263 FormatToken *getPreviousToken() override {
264 return Position > 0 ? Tokens[Position - 1] : nullptr;
265 }
266
peekNextToken()267 FormatToken *peekNextToken() override {
268 int Next = Position + 1;
269 LLVM_DEBUG({
270 llvm::dbgs() << "Peeking ";
271 dbgToken(Next);
272 });
273 return Tokens[Next];
274 }
275
peekNextToken(int N)276 FormatToken *peekNextToken(int N) override {
277 assert(N > 0);
278 int Next = Position + N;
279 LLVM_DEBUG({
280 llvm::dbgs() << "Peeking (+" << (N - 1) << ") ";
281 dbgToken(Next);
282 });
283 return Tokens[Next];
284 }
285
isEOF()286 bool isEOF() override { return Tokens[Position]->is(tok::eof); }
287
getPosition()288 unsigned getPosition() override {
289 LLVM_DEBUG(llvm::dbgs() << "Getting Position: " << Position << "\n");
290 assert(Position >= 0);
291 return Position;
292 }
293
setPosition(unsigned P)294 FormatToken *setPosition(unsigned P) override {
295 LLVM_DEBUG(llvm::dbgs() << "Setting Position: " << P << "\n");
296 Position = P;
297 return Tokens[Position];
298 }
299
reset()300 void reset() { Position = -1; }
301
302 private:
dbgToken(int Position,llvm::StringRef Indent="")303 void dbgToken(int Position, llvm::StringRef Indent = "") {
304 FormatToken *Tok = Tokens[Position];
305 llvm::dbgs() << Indent << "[" << Position
306 << "] Token: " << Tok->Tok.getName() << " / " << Tok->TokenText
307 << ", Macro: " << !!Tok->MacroCtx << "\n";
308 }
309
310 ArrayRef<FormatToken *> Tokens;
311 int Position;
312 };
313
314 } // end anonymous namespace
315
UnwrappedLineParser(const FormatStyle & Style,const AdditionalKeywords & Keywords,unsigned FirstStartColumn,ArrayRef<FormatToken * > Tokens,UnwrappedLineConsumer & Callback)316 UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style,
317 const AdditionalKeywords &Keywords,
318 unsigned FirstStartColumn,
319 ArrayRef<FormatToken *> Tokens,
320 UnwrappedLineConsumer &Callback)
321 : Line(new UnwrappedLine), MustBreakBeforeNextToken(false),
322 CurrentLines(&Lines), Style(Style), Keywords(Keywords),
323 CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr),
324 Callback(Callback), AllTokens(Tokens), PPBranchLevel(-1),
325 IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None
326 ? IG_Rejected
327 : IG_Inited),
328 IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {}
329
reset()330 void UnwrappedLineParser::reset() {
331 PPBranchLevel = -1;
332 IncludeGuard = Style.IndentPPDirectives == FormatStyle::PPDIS_None
333 ? IG_Rejected
334 : IG_Inited;
335 IncludeGuardToken = nullptr;
336 Line.reset(new UnwrappedLine);
337 CommentsBeforeNextToken.clear();
338 FormatTok = nullptr;
339 MustBreakBeforeNextToken = false;
340 PreprocessorDirectives.clear();
341 CurrentLines = &Lines;
342 DeclarationScopeStack.clear();
343 NestedTooDeep.clear();
344 PPStack.clear();
345 Line->FirstStartColumn = FirstStartColumn;
346 }
347
parse()348 void UnwrappedLineParser::parse() {
349 IndexedTokenSource TokenSource(AllTokens);
350 Line->FirstStartColumn = FirstStartColumn;
351 do {
352 LLVM_DEBUG(llvm::dbgs() << "----\n");
353 reset();
354 Tokens = &TokenSource;
355 TokenSource.reset();
356
357 readToken();
358 parseFile();
359
360 // If we found an include guard then all preprocessor directives (other than
361 // the guard) are over-indented by one.
362 if (IncludeGuard == IG_Found) {
363 for (auto &Line : Lines)
364 if (Line.InPPDirective && Line.Level > 0)
365 --Line.Level;
366 }
367
368 // Create line with eof token.
369 pushToken(FormatTok);
370 addUnwrappedLine();
371
372 for (const UnwrappedLine &Line : Lines)
373 Callback.consumeUnwrappedLine(Line);
374
375 Callback.finishRun();
376 Lines.clear();
377 while (!PPLevelBranchIndex.empty() &&
378 PPLevelBranchIndex.back() + 1 >= PPLevelBranchCount.back()) {
379 PPLevelBranchIndex.resize(PPLevelBranchIndex.size() - 1);
380 PPLevelBranchCount.resize(PPLevelBranchCount.size() - 1);
381 }
382 if (!PPLevelBranchIndex.empty()) {
383 ++PPLevelBranchIndex.back();
384 assert(PPLevelBranchIndex.size() == PPLevelBranchCount.size());
385 assert(PPLevelBranchIndex.back() <= PPLevelBranchCount.back());
386 }
387 } while (!PPLevelBranchIndex.empty());
388 }
389
parseFile()390 void UnwrappedLineParser::parseFile() {
391 // The top-level context in a file always has declarations, except for pre-
392 // processor directives and JavaScript files.
393 bool MustBeDeclaration = !Line->InPPDirective && !Style.isJavaScript();
394 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
395 MustBeDeclaration);
396 if (Style.Language == FormatStyle::LK_TextProto)
397 parseBracedList();
398 else
399 parseLevel();
400 // Make sure to format the remaining tokens.
401 //
402 // LK_TextProto is special since its top-level is parsed as the body of a
403 // braced list, which does not necessarily have natural line separators such
404 // as a semicolon. Comments after the last entry that have been determined to
405 // not belong to that line, as in:
406 // key: value
407 // // endfile comment
408 // do not have a chance to be put on a line of their own until this point.
409 // Here we add this newline before end-of-file comments.
410 if (Style.Language == FormatStyle::LK_TextProto &&
411 !CommentsBeforeNextToken.empty()) {
412 addUnwrappedLine();
413 }
414 flushComments(true);
415 addUnwrappedLine();
416 }
417
parseCSharpGenericTypeConstraint()418 void UnwrappedLineParser::parseCSharpGenericTypeConstraint() {
419 do {
420 switch (FormatTok->Tok.getKind()) {
421 case tok::l_brace:
422 return;
423 default:
424 if (FormatTok->is(Keywords.kw_where)) {
425 addUnwrappedLine();
426 nextToken();
427 parseCSharpGenericTypeConstraint();
428 break;
429 }
430 nextToken();
431 break;
432 }
433 } while (!eof());
434 }
435
parseCSharpAttribute()436 void UnwrappedLineParser::parseCSharpAttribute() {
437 int UnpairedSquareBrackets = 1;
438 do {
439 switch (FormatTok->Tok.getKind()) {
440 case tok::r_square:
441 nextToken();
442 --UnpairedSquareBrackets;
443 if (UnpairedSquareBrackets == 0) {
444 addUnwrappedLine();
445 return;
446 }
447 break;
448 case tok::l_square:
449 ++UnpairedSquareBrackets;
450 nextToken();
451 break;
452 default:
453 nextToken();
454 break;
455 }
456 } while (!eof());
457 }
458
precededByCommentOrPPDirective() const459 bool UnwrappedLineParser::precededByCommentOrPPDirective() const {
460 if (!Lines.empty() && Lines.back().InPPDirective)
461 return true;
462
463 const FormatToken *Previous = Tokens->getPreviousToken();
464 return Previous && Previous->is(tok::comment) &&
465 (Previous->IsMultiline || Previous->NewlinesBefore > 0);
466 }
467
468 /// \brief Parses a level, that is ???.
469 /// \param OpeningBrace Opening brace (\p nullptr if absent) of that level
470 /// \param CanContainBracedList If the content can contain (at any level) a
471 /// braced list.
472 /// \param NextLBracesType The type for left brace found in this level.
473 /// \param IfKind The \p if statement kind in the level.
474 /// \param IfLeftBrace The left brace of the \p if block in the level.
475 /// \returns true if a simple block of if/else/for/while, or false otherwise.
476 /// (A simple block has a single statement.)
parseLevel(const FormatToken * OpeningBrace,bool CanContainBracedList,TokenType NextLBracesType,IfStmtKind * IfKind,FormatToken ** IfLeftBrace)477 bool UnwrappedLineParser::parseLevel(const FormatToken *OpeningBrace,
478 bool CanContainBracedList,
479 TokenType NextLBracesType,
480 IfStmtKind *IfKind,
481 FormatToken **IfLeftBrace) {
482 auto NextLevelLBracesType = NextLBracesType == TT_CompoundRequirementLBrace
483 ? TT_BracedListLBrace
484 : TT_Unknown;
485 const bool IsPrecededByCommentOrPPDirective =
486 !Style.RemoveBracesLLVM || precededByCommentOrPPDirective();
487 FormatToken *IfLBrace = nullptr;
488 bool HasDoWhile = false;
489 bool HasLabel = false;
490 unsigned StatementCount = 0;
491 bool SwitchLabelEncountered = false;
492
493 do {
494 if (FormatTok->getType() == TT_AttributeMacro) {
495 nextToken();
496 continue;
497 }
498 tok::TokenKind kind = FormatTok->Tok.getKind();
499 if (FormatTok->getType() == TT_MacroBlockBegin)
500 kind = tok::l_brace;
501 else if (FormatTok->getType() == TT_MacroBlockEnd)
502 kind = tok::r_brace;
503
504 auto ParseDefault = [this, OpeningBrace, NextLevelLBracesType, IfKind,
505 &IfLBrace, &HasDoWhile, &HasLabel, &StatementCount] {
506 parseStructuralElement(!OpeningBrace, NextLevelLBracesType, IfKind,
507 &IfLBrace, HasDoWhile ? nullptr : &HasDoWhile,
508 HasLabel ? nullptr : &HasLabel);
509 ++StatementCount;
510 assert(StatementCount > 0 && "StatementCount overflow!");
511 };
512
513 switch (kind) {
514 case tok::comment:
515 nextToken();
516 addUnwrappedLine();
517 break;
518 case tok::l_brace:
519 if (NextLBracesType != TT_Unknown) {
520 FormatTok->setFinalizedType(NextLBracesType);
521 } else if (FormatTok->Previous &&
522 FormatTok->Previous->ClosesRequiresClause) {
523 // We need the 'default' case here to correctly parse a function
524 // l_brace.
525 ParseDefault();
526 continue;
527 }
528 if (CanContainBracedList && !FormatTok->is(TT_MacroBlockBegin) &&
529 tryToParseBracedList()) {
530 continue;
531 }
532 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
533 /*MunchSemi=*/true, /*KeepBraces=*/true, /*IfKind=*/nullptr,
534 /*UnindentWhitesmithsBraces=*/false, CanContainBracedList,
535 NextLBracesType);
536 ++StatementCount;
537 assert(StatementCount > 0 && "StatementCount overflow!");
538 addUnwrappedLine();
539 break;
540 case tok::r_brace:
541 if (OpeningBrace) {
542 if (!Style.RemoveBracesLLVM || Line->InPPDirective ||
543 !OpeningBrace->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace)) {
544 return false;
545 }
546 if (FormatTok->isNot(tok::r_brace) || StatementCount != 1 || HasLabel ||
547 HasDoWhile || IsPrecededByCommentOrPPDirective ||
548 precededByCommentOrPPDirective()) {
549 return false;
550 }
551 const FormatToken *Next = Tokens->peekNextToken();
552 if (Next->is(tok::comment) && Next->NewlinesBefore == 0)
553 return false;
554 if (IfLeftBrace)
555 *IfLeftBrace = IfLBrace;
556 return true;
557 }
558 nextToken();
559 addUnwrappedLine();
560 break;
561 case tok::kw_default: {
562 unsigned StoredPosition = Tokens->getPosition();
563 FormatToken *Next;
564 do {
565 Next = Tokens->getNextToken();
566 assert(Next);
567 } while (Next->is(tok::comment));
568 FormatTok = Tokens->setPosition(StoredPosition);
569 if (Next->isNot(tok::colon)) {
570 // default not followed by ':' is not a case label; treat it like
571 // an identifier.
572 parseStructuralElement();
573 break;
574 }
575 // Else, if it is 'default:', fall through to the case handling.
576 LLVM_FALLTHROUGH;
577 }
578 case tok::kw_case:
579 if (Style.isJavaScript() && Line->MustBeDeclaration) {
580 // A 'case: string' style field declaration.
581 parseStructuralElement();
582 break;
583 }
584 if (!SwitchLabelEncountered &&
585 (Style.IndentCaseLabels ||
586 (Line->InPPDirective && Line->Level == 1))) {
587 ++Line->Level;
588 }
589 SwitchLabelEncountered = true;
590 parseStructuralElement();
591 break;
592 case tok::l_square:
593 if (Style.isCSharp()) {
594 nextToken();
595 parseCSharpAttribute();
596 break;
597 }
598 if (handleCppAttributes())
599 break;
600 LLVM_FALLTHROUGH;
601 default:
602 ParseDefault();
603 break;
604 }
605 } while (!eof());
606
607 return false;
608 }
609
calculateBraceTypes(bool ExpectClassBody)610 void UnwrappedLineParser::calculateBraceTypes(bool ExpectClassBody) {
611 // We'll parse forward through the tokens until we hit
612 // a closing brace or eof - note that getNextToken() will
613 // parse macros, so this will magically work inside macro
614 // definitions, too.
615 unsigned StoredPosition = Tokens->getPosition();
616 FormatToken *Tok = FormatTok;
617 const FormatToken *PrevTok = Tok->Previous;
618 // Keep a stack of positions of lbrace tokens. We will
619 // update information about whether an lbrace starts a
620 // braced init list or a different block during the loop.
621 SmallVector<FormatToken *, 8> LBraceStack;
622 assert(Tok->is(tok::l_brace));
623 do {
624 // Get next non-comment token.
625 FormatToken *NextTok;
626 do {
627 NextTok = Tokens->getNextToken();
628 } while (NextTok->is(tok::comment));
629
630 switch (Tok->Tok.getKind()) {
631 case tok::l_brace:
632 if (Style.isJavaScript() && PrevTok) {
633 if (PrevTok->isOneOf(tok::colon, tok::less)) {
634 // A ':' indicates this code is in a type, or a braced list
635 // following a label in an object literal ({a: {b: 1}}).
636 // A '<' could be an object used in a comparison, but that is nonsense
637 // code (can never return true), so more likely it is a generic type
638 // argument (`X<{a: string; b: number}>`).
639 // The code below could be confused by semicolons between the
640 // individual members in a type member list, which would normally
641 // trigger BK_Block. In both cases, this must be parsed as an inline
642 // braced init.
643 Tok->setBlockKind(BK_BracedInit);
644 } else if (PrevTok->is(tok::r_paren)) {
645 // `) { }` can only occur in function or method declarations in JS.
646 Tok->setBlockKind(BK_Block);
647 }
648 } else {
649 Tok->setBlockKind(BK_Unknown);
650 }
651 LBraceStack.push_back(Tok);
652 break;
653 case tok::r_brace:
654 if (LBraceStack.empty())
655 break;
656 if (LBraceStack.back()->is(BK_Unknown)) {
657 bool ProbablyBracedList = false;
658 if (Style.Language == FormatStyle::LK_Proto) {
659 ProbablyBracedList = NextTok->isOneOf(tok::comma, tok::r_square);
660 } else {
661 // Skip NextTok over preprocessor lines, otherwise we may not
662 // properly diagnose the block as a braced intializer
663 // if the comma separator appears after the pp directive.
664 while (NextTok->is(tok::hash)) {
665 ScopedMacroState MacroState(*Line, Tokens, NextTok);
666 do {
667 NextTok = Tokens->getNextToken();
668 } while (NextTok->isNot(tok::eof));
669 }
670
671 // Using OriginalColumn to distinguish between ObjC methods and
672 // binary operators is a bit hacky.
673 bool NextIsObjCMethod = NextTok->isOneOf(tok::plus, tok::minus) &&
674 NextTok->OriginalColumn == 0;
675
676 // Try to detect a braced list. Note that regardless how we mark inner
677 // braces here, we will overwrite the BlockKind later if we parse a
678 // braced list (where all blocks inside are by default braced lists),
679 // or when we explicitly detect blocks (for example while parsing
680 // lambdas).
681
682 // If we already marked the opening brace as braced list, the closing
683 // must also be part of it.
684 ProbablyBracedList = LBraceStack.back()->is(TT_BracedListLBrace);
685
686 ProbablyBracedList = ProbablyBracedList ||
687 (Style.isJavaScript() &&
688 NextTok->isOneOf(Keywords.kw_of, Keywords.kw_in,
689 Keywords.kw_as));
690 ProbablyBracedList = ProbablyBracedList ||
691 (Style.isCpp() && NextTok->is(tok::l_paren));
692
693 // If there is a comma, semicolon or right paren after the closing
694 // brace, we assume this is a braced initializer list.
695 // FIXME: Some of these do not apply to JS, e.g. "} {" can never be a
696 // braced list in JS.
697 ProbablyBracedList =
698 ProbablyBracedList ||
699 NextTok->isOneOf(tok::comma, tok::period, tok::colon,
700 tok::r_paren, tok::r_square, tok::l_brace,
701 tok::ellipsis);
702
703 ProbablyBracedList =
704 ProbablyBracedList ||
705 (NextTok->is(tok::identifier) &&
706 !PrevTok->isOneOf(tok::semi, tok::r_brace, tok::l_brace));
707
708 ProbablyBracedList = ProbablyBracedList ||
709 (NextTok->is(tok::semi) &&
710 (!ExpectClassBody || LBraceStack.size() != 1));
711
712 ProbablyBracedList =
713 ProbablyBracedList ||
714 (NextTok->isBinaryOperator() && !NextIsObjCMethod);
715
716 if (!Style.isCSharp() && NextTok->is(tok::l_square)) {
717 // We can have an array subscript after a braced init
718 // list, but C++11 attributes are expected after blocks.
719 NextTok = Tokens->getNextToken();
720 ProbablyBracedList = NextTok->isNot(tok::l_square);
721 }
722 }
723 if (ProbablyBracedList) {
724 Tok->setBlockKind(BK_BracedInit);
725 LBraceStack.back()->setBlockKind(BK_BracedInit);
726 } else {
727 Tok->setBlockKind(BK_Block);
728 LBraceStack.back()->setBlockKind(BK_Block);
729 }
730 }
731 LBraceStack.pop_back();
732 break;
733 case tok::identifier:
734 if (!Tok->is(TT_StatementMacro))
735 break;
736 LLVM_FALLTHROUGH;
737 case tok::at:
738 case tok::semi:
739 case tok::kw_if:
740 case tok::kw_while:
741 case tok::kw_for:
742 case tok::kw_switch:
743 case tok::kw_try:
744 case tok::kw___try:
745 if (!LBraceStack.empty() && LBraceStack.back()->is(BK_Unknown))
746 LBraceStack.back()->setBlockKind(BK_Block);
747 break;
748 default:
749 break;
750 }
751 PrevTok = Tok;
752 Tok = NextTok;
753 } while (Tok->isNot(tok::eof) && !LBraceStack.empty());
754
755 // Assume other blocks for all unclosed opening braces.
756 for (FormatToken *LBrace : LBraceStack)
757 if (LBrace->is(BK_Unknown))
758 LBrace->setBlockKind(BK_Block);
759
760 FormatTok = Tokens->setPosition(StoredPosition);
761 }
762
763 template <class T>
hash_combine(std::size_t & seed,const T & v)764 static inline void hash_combine(std::size_t &seed, const T &v) {
765 std::hash<T> hasher;
766 seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
767 }
768
computePPHash() const769 size_t UnwrappedLineParser::computePPHash() const {
770 size_t h = 0;
771 for (const auto &i : PPStack) {
772 hash_combine(h, size_t(i.Kind));
773 hash_combine(h, i.Line);
774 }
775 return h;
776 }
777
778 // Checks whether \p ParsedLine might fit on a single line. If \p OpeningBrace
779 // is not null, subtracts its length (plus the preceding space) when computing
780 // the length of \p ParsedLine. We must clone the tokens of \p ParsedLine before
781 // running the token annotator on it so that we can restore them afterward.
mightFitOnOneLine(UnwrappedLine & ParsedLine,const FormatToken * OpeningBrace) const782 bool UnwrappedLineParser::mightFitOnOneLine(
783 UnwrappedLine &ParsedLine, const FormatToken *OpeningBrace) const {
784 const auto ColumnLimit = Style.ColumnLimit;
785 if (ColumnLimit == 0)
786 return true;
787
788 auto &Tokens = ParsedLine.Tokens;
789 assert(!Tokens.empty());
790
791 const auto *LastToken = Tokens.back().Tok;
792 assert(LastToken);
793
794 SmallVector<UnwrappedLineNode> SavedTokens(Tokens.size());
795
796 int Index = 0;
797 for (const auto &Token : Tokens) {
798 assert(Token.Tok);
799 auto &SavedToken = SavedTokens[Index++];
800 SavedToken.Tok = new FormatToken;
801 SavedToken.Tok->copyFrom(*Token.Tok);
802 SavedToken.Children = std::move(Token.Children);
803 }
804
805 AnnotatedLine Line(ParsedLine);
806 assert(Line.Last == LastToken);
807
808 TokenAnnotator Annotator(Style, Keywords);
809 Annotator.annotate(Line);
810 Annotator.calculateFormattingInformation(Line);
811
812 auto Length = LastToken->TotalLength;
813 if (OpeningBrace) {
814 assert(OpeningBrace != Tokens.front().Tok);
815 Length -= OpeningBrace->TokenText.size() + 1;
816 }
817
818 Index = 0;
819 for (auto &Token : Tokens) {
820 const auto &SavedToken = SavedTokens[Index++];
821 Token.Tok->copyFrom(*SavedToken.Tok);
822 Token.Children = std::move(SavedToken.Children);
823 delete SavedToken.Tok;
824 }
825
826 return Line.Level * Style.IndentWidth + Length <= ColumnLimit;
827 }
828
parseBlock(bool MustBeDeclaration,unsigned AddLevels,bool MunchSemi,bool KeepBraces,IfStmtKind * IfKind,bool UnindentWhitesmithsBraces,bool CanContainBracedList,TokenType NextLBracesType)829 FormatToken *UnwrappedLineParser::parseBlock(
830 bool MustBeDeclaration, unsigned AddLevels, bool MunchSemi, bool KeepBraces,
831 IfStmtKind *IfKind, bool UnindentWhitesmithsBraces,
832 bool CanContainBracedList, TokenType NextLBracesType) {
833 auto HandleVerilogBlockLabel = [this]() {
834 // ":" name
835 if (Style.isVerilog() && FormatTok->is(tok::colon)) {
836 nextToken();
837 if (Keywords.isVerilogIdentifier(*FormatTok))
838 nextToken();
839 }
840 };
841
842 assert((FormatTok->isOneOf(tok::l_brace, TT_MacroBlockBegin) ||
843 (Style.isVerilog() && Keywords.isVerilogBegin(*FormatTok))) &&
844 "'{' or macro block token expected");
845 FormatToken *Tok = FormatTok;
846 const bool FollowedByComment = Tokens->peekNextToken()->is(tok::comment);
847 auto Index = CurrentLines->size();
848 const bool MacroBlock = FormatTok->is(TT_MacroBlockBegin);
849 FormatTok->setBlockKind(BK_Block);
850
851 // For Whitesmiths mode, jump to the next level prior to skipping over the
852 // braces.
853 if (AddLevels > 0 && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
854 ++Line->Level;
855
856 size_t PPStartHash = computePPHash();
857
858 const unsigned InitialLevel = Line->Level;
859 nextToken(/*LevelDifference=*/AddLevels);
860 HandleVerilogBlockLabel();
861
862 // Bail out if there are too many levels. Otherwise, the stack might overflow.
863 if (Line->Level > 300)
864 return nullptr;
865
866 if (MacroBlock && FormatTok->is(tok::l_paren))
867 parseParens();
868
869 size_t NbPreprocessorDirectives =
870 CurrentLines == &Lines ? PreprocessorDirectives.size() : 0;
871 addUnwrappedLine();
872 size_t OpeningLineIndex =
873 CurrentLines->empty()
874 ? (UnwrappedLine::kInvalidIndex)
875 : (CurrentLines->size() - 1 - NbPreprocessorDirectives);
876
877 // Whitesmiths is weird here. The brace needs to be indented for the namespace
878 // block, but the block itself may not be indented depending on the style
879 // settings. This allows the format to back up one level in those cases.
880 if (UnindentWhitesmithsBraces)
881 --Line->Level;
882
883 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
884 MustBeDeclaration);
885 if (AddLevels > 0u && Style.BreakBeforeBraces != FormatStyle::BS_Whitesmiths)
886 Line->Level += AddLevels;
887
888 FormatToken *IfLBrace = nullptr;
889 const bool SimpleBlock =
890 parseLevel(Tok, CanContainBracedList, NextLBracesType, IfKind, &IfLBrace);
891
892 if (eof())
893 return IfLBrace;
894
895 if (MacroBlock ? !FormatTok->is(TT_MacroBlockEnd)
896 : !FormatTok->is(tok::r_brace)) {
897 Line->Level = InitialLevel;
898 FormatTok->setBlockKind(BK_Block);
899 return IfLBrace;
900 }
901
902 auto RemoveBraces = [=]() mutable {
903 if (!SimpleBlock)
904 return false;
905 assert(Tok->isOneOf(TT_ControlStatementLBrace, TT_ElseLBrace));
906 assert(FormatTok->is(tok::r_brace));
907 const bool WrappedOpeningBrace = !Tok->Previous;
908 if (WrappedOpeningBrace && FollowedByComment)
909 return false;
910 const bool HasRequiredIfBraces = IfLBrace && !IfLBrace->Optional;
911 if (KeepBraces && !HasRequiredIfBraces)
912 return false;
913 if (Tok->isNot(TT_ElseLBrace) || !HasRequiredIfBraces) {
914 const FormatToken *Previous = Tokens->getPreviousToken();
915 assert(Previous);
916 if (Previous->is(tok::r_brace) && !Previous->Optional)
917 return false;
918 }
919 assert(!CurrentLines->empty());
920 auto &LastLine = CurrentLines->back();
921 if (LastLine.Level == InitialLevel + 1 && !mightFitOnOneLine(LastLine))
922 return false;
923 if (Tok->is(TT_ElseLBrace))
924 return true;
925 if (WrappedOpeningBrace) {
926 assert(Index > 0);
927 --Index; // The line above the wrapped l_brace.
928 Tok = nullptr;
929 }
930 return mightFitOnOneLine((*CurrentLines)[Index], Tok);
931 };
932 if (RemoveBraces()) {
933 Tok->MatchingParen = FormatTok;
934 FormatTok->MatchingParen = Tok;
935 }
936
937 size_t PPEndHash = computePPHash();
938
939 // Munch the closing brace.
940 nextToken(/*LevelDifference=*/-AddLevels);
941 HandleVerilogBlockLabel();
942
943 if (MacroBlock && FormatTok->is(tok::l_paren))
944 parseParens();
945
946 if (FormatTok->is(tok::kw_noexcept)) {
947 // A noexcept in a requires expression.
948 nextToken();
949 }
950
951 if (FormatTok->is(tok::arrow)) {
952 // Following the } or noexcept we can find a trailing return type arrow
953 // as part of an implicit conversion constraint.
954 nextToken();
955 parseStructuralElement();
956 }
957
958 if (MunchSemi && FormatTok->is(tok::semi))
959 nextToken();
960
961 Line->Level = InitialLevel;
962
963 if (PPStartHash == PPEndHash) {
964 Line->MatchingOpeningBlockLineIndex = OpeningLineIndex;
965 if (OpeningLineIndex != UnwrappedLine::kInvalidIndex) {
966 // Update the opening line to add the forward reference as well
967 (*CurrentLines)[OpeningLineIndex].MatchingClosingBlockLineIndex =
968 CurrentLines->size() - 1;
969 }
970 }
971
972 return IfLBrace;
973 }
974
isGoogScope(const UnwrappedLine & Line)975 static bool isGoogScope(const UnwrappedLine &Line) {
976 // FIXME: Closure-library specific stuff should not be hard-coded but be
977 // configurable.
978 if (Line.Tokens.size() < 4)
979 return false;
980 auto I = Line.Tokens.begin();
981 if (I->Tok->TokenText != "goog")
982 return false;
983 ++I;
984 if (I->Tok->isNot(tok::period))
985 return false;
986 ++I;
987 if (I->Tok->TokenText != "scope")
988 return false;
989 ++I;
990 return I->Tok->is(tok::l_paren);
991 }
992
isIIFE(const UnwrappedLine & Line,const AdditionalKeywords & Keywords)993 static bool isIIFE(const UnwrappedLine &Line,
994 const AdditionalKeywords &Keywords) {
995 // Look for the start of an immediately invoked anonymous function.
996 // https://en.wikipedia.org/wiki/Immediately-invoked_function_expression
997 // This is commonly done in JavaScript to create a new, anonymous scope.
998 // Example: (function() { ... })()
999 if (Line.Tokens.size() < 3)
1000 return false;
1001 auto I = Line.Tokens.begin();
1002 if (I->Tok->isNot(tok::l_paren))
1003 return false;
1004 ++I;
1005 if (I->Tok->isNot(Keywords.kw_function))
1006 return false;
1007 ++I;
1008 return I->Tok->is(tok::l_paren);
1009 }
1010
ShouldBreakBeforeBrace(const FormatStyle & Style,const FormatToken & InitialToken)1011 static bool ShouldBreakBeforeBrace(const FormatStyle &Style,
1012 const FormatToken &InitialToken) {
1013 tok::TokenKind Kind = InitialToken.Tok.getKind();
1014 if (InitialToken.is(TT_NamespaceMacro))
1015 Kind = tok::kw_namespace;
1016
1017 switch (Kind) {
1018 case tok::kw_namespace:
1019 return Style.BraceWrapping.AfterNamespace;
1020 case tok::kw_class:
1021 return Style.BraceWrapping.AfterClass;
1022 case tok::kw_union:
1023 return Style.BraceWrapping.AfterUnion;
1024 case tok::kw_struct:
1025 return Style.BraceWrapping.AfterStruct;
1026 case tok::kw_enum:
1027 return Style.BraceWrapping.AfterEnum;
1028 default:
1029 return false;
1030 }
1031 }
1032
parseChildBlock(bool CanContainBracedList,clang::format::TokenType NextLBracesType)1033 void UnwrappedLineParser::parseChildBlock(
1034 bool CanContainBracedList, clang::format::TokenType NextLBracesType) {
1035 assert(FormatTok->is(tok::l_brace));
1036 FormatTok->setBlockKind(BK_Block);
1037 const FormatToken *OpeningBrace = FormatTok;
1038 nextToken();
1039 {
1040 bool SkipIndent = (Style.isJavaScript() &&
1041 (isGoogScope(*Line) || isIIFE(*Line, Keywords)));
1042 ScopedLineState LineState(*this);
1043 ScopedDeclarationState DeclarationState(*Line, DeclarationScopeStack,
1044 /*MustBeDeclaration=*/false);
1045 Line->Level += SkipIndent ? 0 : 1;
1046 parseLevel(OpeningBrace, CanContainBracedList, NextLBracesType);
1047 flushComments(isOnNewLine(*FormatTok));
1048 Line->Level -= SkipIndent ? 0 : 1;
1049 }
1050 nextToken();
1051 }
1052
parsePPDirective()1053 void UnwrappedLineParser::parsePPDirective() {
1054 assert(FormatTok->is(tok::hash) && "'#' expected");
1055 ScopedMacroState MacroState(*Line, Tokens, FormatTok);
1056
1057 nextToken();
1058
1059 if (!FormatTok->Tok.getIdentifierInfo()) {
1060 parsePPUnknown();
1061 return;
1062 }
1063
1064 switch (FormatTok->Tok.getIdentifierInfo()->getPPKeywordID()) {
1065 case tok::pp_define:
1066 parsePPDefine();
1067 return;
1068 case tok::pp_if:
1069 parsePPIf(/*IfDef=*/false);
1070 break;
1071 case tok::pp_ifdef:
1072 case tok::pp_ifndef:
1073 parsePPIf(/*IfDef=*/true);
1074 break;
1075 case tok::pp_else:
1076 parsePPElse();
1077 break;
1078 case tok::pp_elifdef:
1079 case tok::pp_elifndef:
1080 case tok::pp_elif:
1081 parsePPElIf();
1082 break;
1083 case tok::pp_endif:
1084 parsePPEndIf();
1085 break;
1086 default:
1087 parsePPUnknown();
1088 break;
1089 }
1090 }
1091
conditionalCompilationCondition(bool Unreachable)1092 void UnwrappedLineParser::conditionalCompilationCondition(bool Unreachable) {
1093 size_t Line = CurrentLines->size();
1094 if (CurrentLines == &PreprocessorDirectives)
1095 Line += Lines.size();
1096
1097 if (Unreachable ||
1098 (!PPStack.empty() && PPStack.back().Kind == PP_Unreachable)) {
1099 PPStack.push_back({PP_Unreachable, Line});
1100 } else {
1101 PPStack.push_back({PP_Conditional, Line});
1102 }
1103 }
1104
conditionalCompilationStart(bool Unreachable)1105 void UnwrappedLineParser::conditionalCompilationStart(bool Unreachable) {
1106 ++PPBranchLevel;
1107 assert(PPBranchLevel >= 0 && PPBranchLevel <= (int)PPLevelBranchIndex.size());
1108 if (PPBranchLevel == (int)PPLevelBranchIndex.size()) {
1109 PPLevelBranchIndex.push_back(0);
1110 PPLevelBranchCount.push_back(0);
1111 }
1112 PPChainBranchIndex.push(0);
1113 bool Skip = PPLevelBranchIndex[PPBranchLevel] > 0;
1114 conditionalCompilationCondition(Unreachable || Skip);
1115 }
1116
conditionalCompilationAlternative()1117 void UnwrappedLineParser::conditionalCompilationAlternative() {
1118 if (!PPStack.empty())
1119 PPStack.pop_back();
1120 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1121 if (!PPChainBranchIndex.empty())
1122 ++PPChainBranchIndex.top();
1123 conditionalCompilationCondition(
1124 PPBranchLevel >= 0 && !PPChainBranchIndex.empty() &&
1125 PPLevelBranchIndex[PPBranchLevel] != PPChainBranchIndex.top());
1126 }
1127
conditionalCompilationEnd()1128 void UnwrappedLineParser::conditionalCompilationEnd() {
1129 assert(PPBranchLevel < (int)PPLevelBranchIndex.size());
1130 if (PPBranchLevel >= 0 && !PPChainBranchIndex.empty()) {
1131 if (PPChainBranchIndex.top() + 1 > PPLevelBranchCount[PPBranchLevel])
1132 PPLevelBranchCount[PPBranchLevel] = PPChainBranchIndex.top() + 1;
1133 }
1134 // Guard against #endif's without #if.
1135 if (PPBranchLevel > -1)
1136 --PPBranchLevel;
1137 if (!PPChainBranchIndex.empty())
1138 PPChainBranchIndex.pop();
1139 if (!PPStack.empty())
1140 PPStack.pop_back();
1141 }
1142
parsePPIf(bool IfDef)1143 void UnwrappedLineParser::parsePPIf(bool IfDef) {
1144 bool IfNDef = FormatTok->is(tok::pp_ifndef);
1145 nextToken();
1146 bool Unreachable = false;
1147 if (!IfDef && (FormatTok->is(tok::kw_false) || FormatTok->TokenText == "0"))
1148 Unreachable = true;
1149 if (IfDef && !IfNDef && FormatTok->TokenText == "SWIG")
1150 Unreachable = true;
1151 conditionalCompilationStart(Unreachable);
1152 FormatToken *IfCondition = FormatTok;
1153 // If there's a #ifndef on the first line, and the only lines before it are
1154 // comments, it could be an include guard.
1155 bool MaybeIncludeGuard = IfNDef;
1156 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1157 for (auto &Line : Lines) {
1158 if (!Line.Tokens.front().Tok->is(tok::comment)) {
1159 MaybeIncludeGuard = false;
1160 IncludeGuard = IG_Rejected;
1161 break;
1162 }
1163 }
1164 }
1165 --PPBranchLevel;
1166 parsePPUnknown();
1167 ++PPBranchLevel;
1168 if (IncludeGuard == IG_Inited && MaybeIncludeGuard) {
1169 IncludeGuard = IG_IfNdefed;
1170 IncludeGuardToken = IfCondition;
1171 }
1172 }
1173
parsePPElse()1174 void UnwrappedLineParser::parsePPElse() {
1175 // If a potential include guard has an #else, it's not an include guard.
1176 if (IncludeGuard == IG_Defined && PPBranchLevel == 0)
1177 IncludeGuard = IG_Rejected;
1178 conditionalCompilationAlternative();
1179 if (PPBranchLevel > -1)
1180 --PPBranchLevel;
1181 parsePPUnknown();
1182 ++PPBranchLevel;
1183 }
1184
parsePPElIf()1185 void UnwrappedLineParser::parsePPElIf() { parsePPElse(); }
1186
parsePPEndIf()1187 void UnwrappedLineParser::parsePPEndIf() {
1188 conditionalCompilationEnd();
1189 parsePPUnknown();
1190 // If the #endif of a potential include guard is the last thing in the file,
1191 // then we found an include guard.
1192 if (IncludeGuard == IG_Defined && PPBranchLevel == -1 && Tokens->isEOF() &&
1193 Style.IndentPPDirectives != FormatStyle::PPDIS_None) {
1194 IncludeGuard = IG_Found;
1195 }
1196 }
1197
parsePPDefine()1198 void UnwrappedLineParser::parsePPDefine() {
1199 nextToken();
1200
1201 if (!FormatTok->Tok.getIdentifierInfo()) {
1202 IncludeGuard = IG_Rejected;
1203 IncludeGuardToken = nullptr;
1204 parsePPUnknown();
1205 return;
1206 }
1207
1208 if (IncludeGuard == IG_IfNdefed &&
1209 IncludeGuardToken->TokenText == FormatTok->TokenText) {
1210 IncludeGuard = IG_Defined;
1211 IncludeGuardToken = nullptr;
1212 for (auto &Line : Lines) {
1213 if (!Line.Tokens.front().Tok->isOneOf(tok::comment, tok::hash)) {
1214 IncludeGuard = IG_Rejected;
1215 break;
1216 }
1217 }
1218 }
1219
1220 // In the context of a define, even keywords should be treated as normal
1221 // identifiers. Setting the kind to identifier is not enough, because we need
1222 // to treat additional keywords like __except as well, which are already
1223 // identifiers. Setting the identifier info to null interferes with include
1224 // guard processing above, and changes preprocessing nesting.
1225 FormatTok->Tok.setKind(tok::identifier);
1226 FormatTok->Tok.setIdentifierInfo(Keywords.kw_internal_ident_after_define);
1227 nextToken();
1228 if (FormatTok->Tok.getKind() == tok::l_paren &&
1229 !FormatTok->hasWhitespaceBefore()) {
1230 parseParens();
1231 }
1232 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1233 Line->Level += PPBranchLevel + 1;
1234 addUnwrappedLine();
1235 ++Line->Level;
1236
1237 // Errors during a preprocessor directive can only affect the layout of the
1238 // preprocessor directive, and thus we ignore them. An alternative approach
1239 // would be to use the same approach we use on the file level (no
1240 // re-indentation if there was a structural error) within the macro
1241 // definition.
1242 parseFile();
1243 }
1244
parsePPUnknown()1245 void UnwrappedLineParser::parsePPUnknown() {
1246 do {
1247 nextToken();
1248 } while (!eof());
1249 if (Style.IndentPPDirectives != FormatStyle::PPDIS_None)
1250 Line->Level += PPBranchLevel + 1;
1251 addUnwrappedLine();
1252 }
1253
1254 // Here we exclude certain tokens that are not usually the first token in an
1255 // unwrapped line. This is used in attempt to distinguish macro calls without
1256 // trailing semicolons from other constructs split to several lines.
tokenCanStartNewLine(const FormatToken & Tok)1257 static bool tokenCanStartNewLine(const FormatToken &Tok) {
1258 // Semicolon can be a null-statement, l_square can be a start of a macro or
1259 // a C++11 attribute, but this doesn't seem to be common.
1260 return Tok.isNot(tok::semi) && Tok.isNot(tok::l_brace) &&
1261 Tok.isNot(TT_AttributeSquare) &&
1262 // Tokens that can only be used as binary operators and a part of
1263 // overloaded operator names.
1264 Tok.isNot(tok::period) && Tok.isNot(tok::periodstar) &&
1265 Tok.isNot(tok::arrow) && Tok.isNot(tok::arrowstar) &&
1266 Tok.isNot(tok::less) && Tok.isNot(tok::greater) &&
1267 Tok.isNot(tok::slash) && Tok.isNot(tok::percent) &&
1268 Tok.isNot(tok::lessless) && Tok.isNot(tok::greatergreater) &&
1269 Tok.isNot(tok::equal) && Tok.isNot(tok::plusequal) &&
1270 Tok.isNot(tok::minusequal) && Tok.isNot(tok::starequal) &&
1271 Tok.isNot(tok::slashequal) && Tok.isNot(tok::percentequal) &&
1272 Tok.isNot(tok::ampequal) && Tok.isNot(tok::pipeequal) &&
1273 Tok.isNot(tok::caretequal) && Tok.isNot(tok::greatergreaterequal) &&
1274 Tok.isNot(tok::lesslessequal) &&
1275 // Colon is used in labels, base class lists, initializer lists,
1276 // range-based for loops, ternary operator, but should never be the
1277 // first token in an unwrapped line.
1278 Tok.isNot(tok::colon) &&
1279 // 'noexcept' is a trailing annotation.
1280 Tok.isNot(tok::kw_noexcept);
1281 }
1282
mustBeJSIdent(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)1283 static bool mustBeJSIdent(const AdditionalKeywords &Keywords,
1284 const FormatToken *FormatTok) {
1285 // FIXME: This returns true for C/C++ keywords like 'struct'.
1286 return FormatTok->is(tok::identifier) &&
1287 (FormatTok->Tok.getIdentifierInfo() == nullptr ||
1288 !FormatTok->isOneOf(
1289 Keywords.kw_in, Keywords.kw_of, Keywords.kw_as, Keywords.kw_async,
1290 Keywords.kw_await, Keywords.kw_yield, Keywords.kw_finally,
1291 Keywords.kw_function, Keywords.kw_import, Keywords.kw_is,
1292 Keywords.kw_let, Keywords.kw_var, tok::kw_const,
1293 Keywords.kw_abstract, Keywords.kw_extends, Keywords.kw_implements,
1294 Keywords.kw_instanceof, Keywords.kw_interface,
1295 Keywords.kw_override, Keywords.kw_throws, Keywords.kw_from));
1296 }
1297
mustBeJSIdentOrValue(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)1298 static bool mustBeJSIdentOrValue(const AdditionalKeywords &Keywords,
1299 const FormatToken *FormatTok) {
1300 return FormatTok->Tok.isLiteral() ||
1301 FormatTok->isOneOf(tok::kw_true, tok::kw_false) ||
1302 mustBeJSIdent(Keywords, FormatTok);
1303 }
1304
1305 // isJSDeclOrStmt returns true if |FormatTok| starts a declaration or statement
1306 // when encountered after a value (see mustBeJSIdentOrValue).
isJSDeclOrStmt(const AdditionalKeywords & Keywords,const FormatToken * FormatTok)1307 static bool isJSDeclOrStmt(const AdditionalKeywords &Keywords,
1308 const FormatToken *FormatTok) {
1309 return FormatTok->isOneOf(
1310 tok::kw_return, Keywords.kw_yield,
1311 // conditionals
1312 tok::kw_if, tok::kw_else,
1313 // loops
1314 tok::kw_for, tok::kw_while, tok::kw_do, tok::kw_continue, tok::kw_break,
1315 // switch/case
1316 tok::kw_switch, tok::kw_case,
1317 // exceptions
1318 tok::kw_throw, tok::kw_try, tok::kw_catch, Keywords.kw_finally,
1319 // declaration
1320 tok::kw_const, tok::kw_class, Keywords.kw_var, Keywords.kw_let,
1321 Keywords.kw_async, Keywords.kw_function,
1322 // import/export
1323 Keywords.kw_import, tok::kw_export);
1324 }
1325
1326 // Checks whether a token is a type in K&R C (aka C78).
isC78Type(const FormatToken & Tok)1327 static bool isC78Type(const FormatToken &Tok) {
1328 return Tok.isOneOf(tok::kw_char, tok::kw_short, tok::kw_int, tok::kw_long,
1329 tok::kw_unsigned, tok::kw_float, tok::kw_double,
1330 tok::identifier);
1331 }
1332
1333 // This function checks whether a token starts the first parameter declaration
1334 // in a K&R C (aka C78) function definition, e.g.:
1335 // int f(a, b)
1336 // short a, b;
1337 // {
1338 // return a + b;
1339 // }
isC78ParameterDecl(const FormatToken * Tok,const FormatToken * Next,const FormatToken * FuncName)1340 static bool isC78ParameterDecl(const FormatToken *Tok, const FormatToken *Next,
1341 const FormatToken *FuncName) {
1342 assert(Tok);
1343 assert(Next);
1344 assert(FuncName);
1345
1346 if (FuncName->isNot(tok::identifier))
1347 return false;
1348
1349 const FormatToken *Prev = FuncName->Previous;
1350 if (!Prev || (Prev->isNot(tok::star) && !isC78Type(*Prev)))
1351 return false;
1352
1353 if (!isC78Type(*Tok) &&
1354 !Tok->isOneOf(tok::kw_register, tok::kw_struct, tok::kw_union)) {
1355 return false;
1356 }
1357
1358 if (Next->isNot(tok::star) && !Next->Tok.getIdentifierInfo())
1359 return false;
1360
1361 Tok = Tok->Previous;
1362 if (!Tok || Tok->isNot(tok::r_paren))
1363 return false;
1364
1365 Tok = Tok->Previous;
1366 if (!Tok || Tok->isNot(tok::identifier))
1367 return false;
1368
1369 return Tok->Previous && Tok->Previous->isOneOf(tok::l_paren, tok::comma);
1370 }
1371
parseModuleImport()1372 void UnwrappedLineParser::parseModuleImport() {
1373 nextToken();
1374 while (!eof()) {
1375 if (FormatTok->is(tok::colon)) {
1376 FormatTok->setFinalizedType(TT_ModulePartitionColon);
1377 }
1378 // Handle import <foo/bar.h> as we would an include statement.
1379 else if (FormatTok->is(tok::less)) {
1380 nextToken();
1381 while (!FormatTok->isOneOf(tok::semi, tok::greater, tok::eof)) {
1382 // Mark tokens up to the trailing line comments as implicit string
1383 // literals.
1384 if (FormatTok->isNot(tok::comment) &&
1385 !FormatTok->TokenText.startswith("//")) {
1386 FormatTok->setFinalizedType(TT_ImplicitStringLiteral);
1387 }
1388 nextToken();
1389 }
1390 }
1391 if (FormatTok->is(tok::semi)) {
1392 nextToken();
1393 break;
1394 }
1395 nextToken();
1396 }
1397
1398 addUnwrappedLine();
1399 }
1400
1401 // readTokenWithJavaScriptASI reads the next token and terminates the current
1402 // line if JavaScript Automatic Semicolon Insertion must
1403 // happen between the current token and the next token.
1404 //
1405 // This method is conservative - it cannot cover all edge cases of JavaScript,
1406 // but only aims to correctly handle certain well known cases. It *must not*
1407 // return true in speculative cases.
readTokenWithJavaScriptASI()1408 void UnwrappedLineParser::readTokenWithJavaScriptASI() {
1409 FormatToken *Previous = FormatTok;
1410 readToken();
1411 FormatToken *Next = FormatTok;
1412
1413 bool IsOnSameLine =
1414 CommentsBeforeNextToken.empty()
1415 ? Next->NewlinesBefore == 0
1416 : CommentsBeforeNextToken.front()->NewlinesBefore == 0;
1417 if (IsOnSameLine)
1418 return;
1419
1420 bool PreviousMustBeValue = mustBeJSIdentOrValue(Keywords, Previous);
1421 bool PreviousStartsTemplateExpr =
1422 Previous->is(TT_TemplateString) && Previous->TokenText.endswith("${");
1423 if (PreviousMustBeValue || Previous->is(tok::r_paren)) {
1424 // If the line contains an '@' sign, the previous token might be an
1425 // annotation, which can precede another identifier/value.
1426 bool HasAt = llvm::any_of(Line->Tokens, [](UnwrappedLineNode &LineNode) {
1427 return LineNode.Tok->is(tok::at);
1428 });
1429 if (HasAt)
1430 return;
1431 }
1432 if (Next->is(tok::exclaim) && PreviousMustBeValue)
1433 return addUnwrappedLine();
1434 bool NextMustBeValue = mustBeJSIdentOrValue(Keywords, Next);
1435 bool NextEndsTemplateExpr =
1436 Next->is(TT_TemplateString) && Next->TokenText.startswith("}");
1437 if (NextMustBeValue && !NextEndsTemplateExpr && !PreviousStartsTemplateExpr &&
1438 (PreviousMustBeValue ||
1439 Previous->isOneOf(tok::r_square, tok::r_paren, tok::plusplus,
1440 tok::minusminus))) {
1441 return addUnwrappedLine();
1442 }
1443 if ((PreviousMustBeValue || Previous->is(tok::r_paren)) &&
1444 isJSDeclOrStmt(Keywords, Next)) {
1445 return addUnwrappedLine();
1446 }
1447 }
1448
parseStructuralElement(bool IsTopLevel,TokenType NextLBracesType,IfStmtKind * IfKind,FormatToken ** IfLeftBrace,bool * HasDoWhile,bool * HasLabel)1449 void UnwrappedLineParser::parseStructuralElement(
1450 bool IsTopLevel, TokenType NextLBracesType, IfStmtKind *IfKind,
1451 FormatToken **IfLeftBrace, bool *HasDoWhile, bool *HasLabel) {
1452 if (Style.Language == FormatStyle::LK_TableGen &&
1453 FormatTok->is(tok::pp_include)) {
1454 nextToken();
1455 if (FormatTok->is(tok::string_literal))
1456 nextToken();
1457 addUnwrappedLine();
1458 return;
1459 }
1460 switch (FormatTok->Tok.getKind()) {
1461 case tok::kw_asm:
1462 nextToken();
1463 if (FormatTok->is(tok::l_brace)) {
1464 FormatTok->setFinalizedType(TT_InlineASMBrace);
1465 nextToken();
1466 while (FormatTok && FormatTok->isNot(tok::eof)) {
1467 if (FormatTok->is(tok::r_brace)) {
1468 FormatTok->setFinalizedType(TT_InlineASMBrace);
1469 nextToken();
1470 addUnwrappedLine();
1471 break;
1472 }
1473 FormatTok->Finalized = true;
1474 nextToken();
1475 }
1476 }
1477 break;
1478 case tok::kw_namespace:
1479 parseNamespace();
1480 return;
1481 case tok::kw_public:
1482 case tok::kw_protected:
1483 case tok::kw_private:
1484 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
1485 Style.isCSharp()) {
1486 nextToken();
1487 } else {
1488 parseAccessSpecifier();
1489 }
1490 return;
1491 case tok::kw_if: {
1492 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1493 // field/method declaration.
1494 break;
1495 }
1496 FormatToken *Tok = parseIfThenElse(IfKind);
1497 if (IfLeftBrace)
1498 *IfLeftBrace = Tok;
1499 return;
1500 }
1501 case tok::kw_for:
1502 case tok::kw_while:
1503 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1504 // field/method declaration.
1505 break;
1506 }
1507 parseForOrWhileLoop();
1508 return;
1509 case tok::kw_do:
1510 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1511 // field/method declaration.
1512 break;
1513 }
1514 parseDoWhile();
1515 if (HasDoWhile)
1516 *HasDoWhile = true;
1517 return;
1518 case tok::kw_switch:
1519 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1520 // 'switch: string' field declaration.
1521 break;
1522 }
1523 parseSwitch();
1524 return;
1525 case tok::kw_default:
1526 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1527 // 'default: string' field declaration.
1528 break;
1529 }
1530 nextToken();
1531 if (FormatTok->is(tok::colon)) {
1532 parseLabel();
1533 return;
1534 }
1535 // e.g. "default void f() {}" in a Java interface.
1536 break;
1537 case tok::kw_case:
1538 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1539 // 'case: string' field declaration.
1540 nextToken();
1541 break;
1542 }
1543 parseCaseLabel();
1544 return;
1545 case tok::kw_try:
1546 case tok::kw___try:
1547 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1548 // field/method declaration.
1549 break;
1550 }
1551 parseTryCatch();
1552 return;
1553 case tok::kw_extern:
1554 nextToken();
1555 if (FormatTok->is(tok::string_literal)) {
1556 nextToken();
1557 if (FormatTok->is(tok::l_brace)) {
1558 if (Style.BraceWrapping.AfterExternBlock)
1559 addUnwrappedLine();
1560 // Either we indent or for backwards compatibility we follow the
1561 // AfterExternBlock style.
1562 unsigned AddLevels =
1563 (Style.IndentExternBlock == FormatStyle::IEBS_Indent) ||
1564 (Style.BraceWrapping.AfterExternBlock &&
1565 Style.IndentExternBlock ==
1566 FormatStyle::IEBS_AfterExternBlock)
1567 ? 1u
1568 : 0u;
1569 parseBlock(/*MustBeDeclaration=*/true, AddLevels);
1570 addUnwrappedLine();
1571 return;
1572 }
1573 }
1574 break;
1575 case tok::kw_export:
1576 if (Style.isJavaScript()) {
1577 parseJavaScriptEs6ImportExport();
1578 return;
1579 }
1580 if (!Style.isCpp())
1581 break;
1582 // Handle C++ "(inline|export) namespace".
1583 LLVM_FALLTHROUGH;
1584 case tok::kw_inline:
1585 nextToken();
1586 if (FormatTok->is(tok::kw_namespace)) {
1587 parseNamespace();
1588 return;
1589 }
1590 break;
1591 case tok::identifier:
1592 if (FormatTok->is(TT_ForEachMacro)) {
1593 parseForOrWhileLoop();
1594 return;
1595 }
1596 if (FormatTok->is(TT_MacroBlockBegin)) {
1597 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
1598 /*MunchSemi=*/false);
1599 return;
1600 }
1601 if (FormatTok->is(Keywords.kw_import)) {
1602 if (Style.isJavaScript()) {
1603 parseJavaScriptEs6ImportExport();
1604 return;
1605 }
1606 if (Style.Language == FormatStyle::LK_Proto) {
1607 nextToken();
1608 if (FormatTok->is(tok::kw_public))
1609 nextToken();
1610 if (!FormatTok->is(tok::string_literal))
1611 return;
1612 nextToken();
1613 if (FormatTok->is(tok::semi))
1614 nextToken();
1615 addUnwrappedLine();
1616 return;
1617 }
1618 if (Style.isCpp()) {
1619 parseModuleImport();
1620 return;
1621 }
1622 }
1623 if (Style.isCpp() &&
1624 FormatTok->isOneOf(Keywords.kw_signals, Keywords.kw_qsignals,
1625 Keywords.kw_slots, Keywords.kw_qslots)) {
1626 nextToken();
1627 if (FormatTok->is(tok::colon)) {
1628 nextToken();
1629 addUnwrappedLine();
1630 return;
1631 }
1632 }
1633 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1634 parseStatementMacro();
1635 return;
1636 }
1637 if (Style.isCpp() && FormatTok->is(TT_NamespaceMacro)) {
1638 parseNamespace();
1639 return;
1640 }
1641 // In all other cases, parse the declaration.
1642 break;
1643 default:
1644 break;
1645 }
1646 do {
1647 const FormatToken *Previous = FormatTok->Previous;
1648 switch (FormatTok->Tok.getKind()) {
1649 case tok::at:
1650 nextToken();
1651 if (FormatTok->is(tok::l_brace)) {
1652 nextToken();
1653 parseBracedList();
1654 break;
1655 } else if (Style.Language == FormatStyle::LK_Java &&
1656 FormatTok->is(Keywords.kw_interface)) {
1657 nextToken();
1658 break;
1659 }
1660 switch (FormatTok->Tok.getObjCKeywordID()) {
1661 case tok::objc_public:
1662 case tok::objc_protected:
1663 case tok::objc_package:
1664 case tok::objc_private:
1665 return parseAccessSpecifier();
1666 case tok::objc_interface:
1667 case tok::objc_implementation:
1668 return parseObjCInterfaceOrImplementation();
1669 case tok::objc_protocol:
1670 if (parseObjCProtocol())
1671 return;
1672 break;
1673 case tok::objc_end:
1674 return; // Handled by the caller.
1675 case tok::objc_optional:
1676 case tok::objc_required:
1677 nextToken();
1678 addUnwrappedLine();
1679 return;
1680 case tok::objc_autoreleasepool:
1681 nextToken();
1682 if (FormatTok->is(tok::l_brace)) {
1683 if (Style.BraceWrapping.AfterControlStatement ==
1684 FormatStyle::BWACS_Always) {
1685 addUnwrappedLine();
1686 }
1687 parseBlock();
1688 }
1689 addUnwrappedLine();
1690 return;
1691 case tok::objc_synchronized:
1692 nextToken();
1693 if (FormatTok->is(tok::l_paren)) {
1694 // Skip synchronization object
1695 parseParens();
1696 }
1697 if (FormatTok->is(tok::l_brace)) {
1698 if (Style.BraceWrapping.AfterControlStatement ==
1699 FormatStyle::BWACS_Always) {
1700 addUnwrappedLine();
1701 }
1702 parseBlock();
1703 }
1704 addUnwrappedLine();
1705 return;
1706 case tok::objc_try:
1707 // This branch isn't strictly necessary (the kw_try case below would
1708 // do this too after the tok::at is parsed above). But be explicit.
1709 parseTryCatch();
1710 return;
1711 default:
1712 break;
1713 }
1714 break;
1715 case tok::kw_concept:
1716 parseConcept();
1717 return;
1718 case tok::kw_requires: {
1719 if (Style.isCpp()) {
1720 bool ParsedClause = parseRequires();
1721 if (ParsedClause)
1722 return;
1723 } else {
1724 nextToken();
1725 }
1726 break;
1727 }
1728 case tok::kw_enum:
1729 // Ignore if this is part of "template <enum ...".
1730 if (Previous && Previous->is(tok::less)) {
1731 nextToken();
1732 break;
1733 }
1734
1735 // parseEnum falls through and does not yet add an unwrapped line as an
1736 // enum definition can start a structural element.
1737 if (!parseEnum())
1738 break;
1739 // This only applies for C++.
1740 if (!Style.isCpp()) {
1741 addUnwrappedLine();
1742 return;
1743 }
1744 break;
1745 case tok::kw_typedef:
1746 nextToken();
1747 if (FormatTok->isOneOf(Keywords.kw_NS_ENUM, Keywords.kw_NS_OPTIONS,
1748 Keywords.kw_CF_ENUM, Keywords.kw_CF_OPTIONS,
1749 Keywords.kw_CF_CLOSED_ENUM,
1750 Keywords.kw_NS_CLOSED_ENUM)) {
1751 parseEnum();
1752 }
1753 break;
1754 case tok::kw_struct:
1755 case tok::kw_union:
1756 case tok::kw_class:
1757 if (parseStructLike())
1758 return;
1759 break;
1760 case tok::period:
1761 nextToken();
1762 // In Java, classes have an implicit static member "class".
1763 if (Style.Language == FormatStyle::LK_Java && FormatTok &&
1764 FormatTok->is(tok::kw_class)) {
1765 nextToken();
1766 }
1767 if (Style.isJavaScript() && FormatTok &&
1768 FormatTok->Tok.getIdentifierInfo()) {
1769 // JavaScript only has pseudo keywords, all keywords are allowed to
1770 // appear in "IdentifierName" positions. See http://es5.github.io/#x7.6
1771 nextToken();
1772 }
1773 break;
1774 case tok::semi:
1775 nextToken();
1776 addUnwrappedLine();
1777 return;
1778 case tok::r_brace:
1779 addUnwrappedLine();
1780 return;
1781 case tok::l_paren: {
1782 parseParens();
1783 // Break the unwrapped line if a K&R C function definition has a parameter
1784 // declaration.
1785 if (!IsTopLevel || !Style.isCpp() || !Previous || FormatTok->is(tok::eof))
1786 break;
1787 if (isC78ParameterDecl(FormatTok, Tokens->peekNextToken(), Previous)) {
1788 addUnwrappedLine();
1789 return;
1790 }
1791 break;
1792 }
1793 case tok::kw_operator:
1794 nextToken();
1795 if (FormatTok->isBinaryOperator())
1796 nextToken();
1797 break;
1798 case tok::caret:
1799 nextToken();
1800 if (FormatTok->Tok.isAnyIdentifier() ||
1801 FormatTok->isSimpleTypeSpecifier()) {
1802 nextToken();
1803 }
1804 if (FormatTok->is(tok::l_paren))
1805 parseParens();
1806 if (FormatTok->is(tok::l_brace))
1807 parseChildBlock();
1808 break;
1809 case tok::l_brace:
1810 if (NextLBracesType != TT_Unknown)
1811 FormatTok->setFinalizedType(NextLBracesType);
1812 if (!tryToParsePropertyAccessor() && !tryToParseBracedList()) {
1813 // A block outside of parentheses must be the last part of a
1814 // structural element.
1815 // FIXME: Figure out cases where this is not true, and add projections
1816 // for them (the one we know is missing are lambdas).
1817 if (Style.Language == FormatStyle::LK_Java &&
1818 Line->Tokens.front().Tok->is(Keywords.kw_synchronized)) {
1819 // If necessary, we could set the type to something different than
1820 // TT_FunctionLBrace.
1821 if (Style.BraceWrapping.AfterControlStatement ==
1822 FormatStyle::BWACS_Always) {
1823 addUnwrappedLine();
1824 }
1825 } else if (Style.BraceWrapping.AfterFunction) {
1826 addUnwrappedLine();
1827 }
1828 if (!Line->InPPDirective)
1829 FormatTok->setFinalizedType(TT_FunctionLBrace);
1830 parseBlock();
1831 addUnwrappedLine();
1832 return;
1833 }
1834 // Otherwise this was a braced init list, and the structural
1835 // element continues.
1836 break;
1837 case tok::kw_try:
1838 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1839 // field/method declaration.
1840 nextToken();
1841 break;
1842 }
1843 // We arrive here when parsing function-try blocks.
1844 if (Style.BraceWrapping.AfterFunction)
1845 addUnwrappedLine();
1846 parseTryCatch();
1847 return;
1848 case tok::identifier: {
1849 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where) &&
1850 Line->MustBeDeclaration) {
1851 addUnwrappedLine();
1852 parseCSharpGenericTypeConstraint();
1853 break;
1854 }
1855 if (FormatTok->is(TT_MacroBlockEnd)) {
1856 addUnwrappedLine();
1857 return;
1858 }
1859
1860 // Function declarations (as opposed to function expressions) are parsed
1861 // on their own unwrapped line by continuing this loop. Function
1862 // expressions (functions that are not on their own line) must not create
1863 // a new unwrapped line, so they are special cased below.
1864 size_t TokenCount = Line->Tokens.size();
1865 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_function) &&
1866 (TokenCount > 1 || (TokenCount == 1 && !Line->Tokens.front().Tok->is(
1867 Keywords.kw_async)))) {
1868 tryToParseJSFunction();
1869 break;
1870 }
1871 if ((Style.isJavaScript() || Style.Language == FormatStyle::LK_Java) &&
1872 FormatTok->is(Keywords.kw_interface)) {
1873 if (Style.isJavaScript()) {
1874 // In JavaScript/TypeScript, "interface" can be used as a standalone
1875 // identifier, e.g. in `var interface = 1;`. If "interface" is
1876 // followed by another identifier, it is very like to be an actual
1877 // interface declaration.
1878 unsigned StoredPosition = Tokens->getPosition();
1879 FormatToken *Next = Tokens->getNextToken();
1880 FormatTok = Tokens->setPosition(StoredPosition);
1881 if (!mustBeJSIdent(Keywords, Next)) {
1882 nextToken();
1883 break;
1884 }
1885 }
1886 parseRecord();
1887 addUnwrappedLine();
1888 return;
1889 }
1890
1891 if (FormatTok->is(Keywords.kw_interface)) {
1892 if (parseStructLike())
1893 return;
1894 break;
1895 }
1896
1897 if (Style.isCpp() && FormatTok->is(TT_StatementMacro)) {
1898 parseStatementMacro();
1899 return;
1900 }
1901
1902 // See if the following token should start a new unwrapped line.
1903 StringRef Text = FormatTok->TokenText;
1904
1905 FormatToken *PreviousToken = FormatTok;
1906 nextToken();
1907
1908 // JS doesn't have macros, and within classes colons indicate fields, not
1909 // labels.
1910 if (Style.isJavaScript())
1911 break;
1912
1913 auto OneTokenSoFar = [&]() {
1914 auto I = Line->Tokens.begin(), E = Line->Tokens.end();
1915 while (I != E && I->Tok->is(tok::comment))
1916 ++I;
1917 while (I != E && Style.isVerilog() && I->Tok->is(tok::hash))
1918 ++I;
1919 return I != E && (++I == E);
1920 };
1921 if (OneTokenSoFar()) {
1922 if (FormatTok->is(tok::colon) && !Line->MustBeDeclaration) {
1923 Line->Tokens.begin()->Tok->MustBreakBefore = true;
1924 parseLabel(!Style.IndentGotoLabels);
1925 if (HasLabel)
1926 *HasLabel = true;
1927 return;
1928 }
1929 // Recognize function-like macro usages without trailing semicolon as
1930 // well as free-standing macros like Q_OBJECT.
1931 bool FunctionLike = FormatTok->is(tok::l_paren);
1932 if (FunctionLike)
1933 parseParens();
1934
1935 bool FollowedByNewline =
1936 CommentsBeforeNextToken.empty()
1937 ? FormatTok->NewlinesBefore > 0
1938 : CommentsBeforeNextToken.front()->NewlinesBefore > 0;
1939
1940 if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) &&
1941 tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) {
1942 PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro);
1943 addUnwrappedLine();
1944 return;
1945 }
1946 }
1947 break;
1948 }
1949 case tok::equal:
1950 if ((Style.isJavaScript() || Style.isCSharp()) &&
1951 FormatTok->is(TT_FatArrow)) {
1952 tryToParseChildBlock();
1953 break;
1954 }
1955
1956 nextToken();
1957 if (FormatTok->is(tok::l_brace)) {
1958 // Block kind should probably be set to BK_BracedInit for any language.
1959 // C# needs this change to ensure that array initialisers and object
1960 // initialisers are indented the same way.
1961 if (Style.isCSharp())
1962 FormatTok->setBlockKind(BK_BracedInit);
1963 nextToken();
1964 parseBracedList();
1965 } else if (Style.Language == FormatStyle::LK_Proto &&
1966 FormatTok->is(tok::less)) {
1967 nextToken();
1968 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
1969 /*ClosingBraceKind=*/tok::greater);
1970 }
1971 break;
1972 case tok::l_square:
1973 parseSquare();
1974 break;
1975 case tok::kw_new:
1976 parseNew();
1977 break;
1978 case tok::kw_case:
1979 if (Style.isJavaScript() && Line->MustBeDeclaration) {
1980 // 'case: string' field declaration.
1981 nextToken();
1982 break;
1983 }
1984 parseCaseLabel();
1985 break;
1986 default:
1987 nextToken();
1988 break;
1989 }
1990 } while (!eof());
1991 }
1992
tryToParsePropertyAccessor()1993 bool UnwrappedLineParser::tryToParsePropertyAccessor() {
1994 assert(FormatTok->is(tok::l_brace));
1995 if (!Style.isCSharp())
1996 return false;
1997 // See if it's a property accessor.
1998 if (FormatTok->Previous->isNot(tok::identifier))
1999 return false;
2000
2001 // See if we are inside a property accessor.
2002 //
2003 // Record the current tokenPosition so that we can advance and
2004 // reset the current token. `Next` is not set yet so we need
2005 // another way to advance along the token stream.
2006 unsigned int StoredPosition = Tokens->getPosition();
2007 FormatToken *Tok = Tokens->getNextToken();
2008
2009 // A trivial property accessor is of the form:
2010 // { [ACCESS_SPECIFIER] [get]; [ACCESS_SPECIFIER] [set|init] }
2011 // Track these as they do not require line breaks to be introduced.
2012 bool HasSpecialAccessor = false;
2013 bool IsTrivialPropertyAccessor = true;
2014 while (!eof()) {
2015 if (Tok->isOneOf(tok::semi, tok::kw_public, tok::kw_private,
2016 tok::kw_protected, Keywords.kw_internal, Keywords.kw_get,
2017 Keywords.kw_init, Keywords.kw_set)) {
2018 if (Tok->isOneOf(Keywords.kw_get, Keywords.kw_init, Keywords.kw_set))
2019 HasSpecialAccessor = true;
2020 Tok = Tokens->getNextToken();
2021 continue;
2022 }
2023 if (Tok->isNot(tok::r_brace))
2024 IsTrivialPropertyAccessor = false;
2025 break;
2026 }
2027
2028 if (!HasSpecialAccessor) {
2029 Tokens->setPosition(StoredPosition);
2030 return false;
2031 }
2032
2033 // Try to parse the property accessor:
2034 // https://docs.microsoft.com/en-us/dotnet/csharp/programming-guide/classes-and-structs/properties
2035 Tokens->setPosition(StoredPosition);
2036 if (!IsTrivialPropertyAccessor && Style.BraceWrapping.AfterFunction)
2037 addUnwrappedLine();
2038 nextToken();
2039 do {
2040 switch (FormatTok->Tok.getKind()) {
2041 case tok::r_brace:
2042 nextToken();
2043 if (FormatTok->is(tok::equal)) {
2044 while (!eof() && FormatTok->isNot(tok::semi))
2045 nextToken();
2046 nextToken();
2047 }
2048 addUnwrappedLine();
2049 return true;
2050 case tok::l_brace:
2051 ++Line->Level;
2052 parseBlock(/*MustBeDeclaration=*/true);
2053 addUnwrappedLine();
2054 --Line->Level;
2055 break;
2056 case tok::equal:
2057 if (FormatTok->is(TT_FatArrow)) {
2058 ++Line->Level;
2059 do {
2060 nextToken();
2061 } while (!eof() && FormatTok->isNot(tok::semi));
2062 nextToken();
2063 addUnwrappedLine();
2064 --Line->Level;
2065 break;
2066 }
2067 nextToken();
2068 break;
2069 default:
2070 if (FormatTok->isOneOf(Keywords.kw_get, Keywords.kw_init,
2071 Keywords.kw_set) &&
2072 !IsTrivialPropertyAccessor) {
2073 // Non-trivial get/set needs to be on its own line.
2074 addUnwrappedLine();
2075 }
2076 nextToken();
2077 }
2078 } while (!eof());
2079
2080 // Unreachable for well-formed code (paired '{' and '}').
2081 return true;
2082 }
2083
tryToParseLambda()2084 bool UnwrappedLineParser::tryToParseLambda() {
2085 assert(FormatTok->is(tok::l_square));
2086 if (!Style.isCpp()) {
2087 nextToken();
2088 return false;
2089 }
2090 FormatToken &LSquare = *FormatTok;
2091 if (!tryToParseLambdaIntroducer())
2092 return false;
2093
2094 bool SeenArrow = false;
2095 bool InTemplateParameterList = false;
2096
2097 while (FormatTok->isNot(tok::l_brace)) {
2098 if (FormatTok->isSimpleTypeSpecifier()) {
2099 nextToken();
2100 continue;
2101 }
2102 switch (FormatTok->Tok.getKind()) {
2103 case tok::l_brace:
2104 break;
2105 case tok::l_paren:
2106 parseParens();
2107 break;
2108 case tok::l_square:
2109 parseSquare();
2110 break;
2111 case tok::kw_class:
2112 case tok::kw_template:
2113 case tok::kw_typename:
2114 assert(FormatTok->Previous);
2115 if (FormatTok->Previous->is(tok::less))
2116 InTemplateParameterList = true;
2117 nextToken();
2118 break;
2119 case tok::amp:
2120 case tok::star:
2121 case tok::kw_const:
2122 case tok::kw_constexpr:
2123 case tok::comma:
2124 case tok::less:
2125 case tok::greater:
2126 case tok::identifier:
2127 case tok::numeric_constant:
2128 case tok::coloncolon:
2129 case tok::kw_mutable:
2130 case tok::kw_noexcept:
2131 nextToken();
2132 break;
2133 // Specialization of a template with an integer parameter can contain
2134 // arithmetic, logical, comparison and ternary operators.
2135 //
2136 // FIXME: This also accepts sequences of operators that are not in the scope
2137 // of a template argument list.
2138 //
2139 // In a C++ lambda a template type can only occur after an arrow. We use
2140 // this as an heuristic to distinguish between Objective-C expressions
2141 // followed by an `a->b` expression, such as:
2142 // ([obj func:arg] + a->b)
2143 // Otherwise the code below would parse as a lambda.
2144 //
2145 // FIXME: This heuristic is incorrect for C++20 generic lambdas with
2146 // explicit template lists: []<bool b = true && false>(U &&u){}
2147 case tok::plus:
2148 case tok::minus:
2149 case tok::exclaim:
2150 case tok::tilde:
2151 case tok::slash:
2152 case tok::percent:
2153 case tok::lessless:
2154 case tok::pipe:
2155 case tok::pipepipe:
2156 case tok::ampamp:
2157 case tok::caret:
2158 case tok::equalequal:
2159 case tok::exclaimequal:
2160 case tok::greaterequal:
2161 case tok::lessequal:
2162 case tok::question:
2163 case tok::colon:
2164 case tok::ellipsis:
2165 case tok::kw_true:
2166 case tok::kw_false:
2167 if (SeenArrow || InTemplateParameterList) {
2168 nextToken();
2169 break;
2170 }
2171 return true;
2172 case tok::arrow:
2173 // This might or might not actually be a lambda arrow (this could be an
2174 // ObjC method invocation followed by a dereferencing arrow). We might
2175 // reset this back to TT_Unknown in TokenAnnotator.
2176 FormatTok->setFinalizedType(TT_LambdaArrow);
2177 SeenArrow = true;
2178 nextToken();
2179 break;
2180 default:
2181 return true;
2182 }
2183 }
2184 FormatTok->setFinalizedType(TT_LambdaLBrace);
2185 LSquare.setFinalizedType(TT_LambdaLSquare);
2186 parseChildBlock();
2187 return true;
2188 }
2189
tryToParseLambdaIntroducer()2190 bool UnwrappedLineParser::tryToParseLambdaIntroducer() {
2191 const FormatToken *Previous = FormatTok->Previous;
2192 const FormatToken *LeftSquare = FormatTok;
2193 nextToken();
2194 if (Previous &&
2195 (Previous->isOneOf(tok::identifier, tok::kw_operator, tok::kw_new,
2196 tok::kw_delete, tok::l_square) ||
2197 LeftSquare->isCppStructuredBinding(Style) || Previous->closesScope() ||
2198 Previous->isSimpleTypeSpecifier())) {
2199 return false;
2200 }
2201 if (FormatTok->is(tok::l_square))
2202 return false;
2203 if (FormatTok->is(tok::r_square)) {
2204 const FormatToken *Next = Tokens->peekNextToken();
2205 if (Next->is(tok::greater))
2206 return false;
2207 }
2208 parseSquare(/*LambdaIntroducer=*/true);
2209 return true;
2210 }
2211
tryToParseJSFunction()2212 void UnwrappedLineParser::tryToParseJSFunction() {
2213 assert(FormatTok->is(Keywords.kw_function) ||
2214 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function));
2215 if (FormatTok->is(Keywords.kw_async))
2216 nextToken();
2217 // Consume "function".
2218 nextToken();
2219
2220 // Consume * (generator function). Treat it like C++'s overloaded operators.
2221 if (FormatTok->is(tok::star)) {
2222 FormatTok->setFinalizedType(TT_OverloadedOperator);
2223 nextToken();
2224 }
2225
2226 // Consume function name.
2227 if (FormatTok->is(tok::identifier))
2228 nextToken();
2229
2230 if (FormatTok->isNot(tok::l_paren))
2231 return;
2232
2233 // Parse formal parameter list.
2234 parseParens();
2235
2236 if (FormatTok->is(tok::colon)) {
2237 // Parse a type definition.
2238 nextToken();
2239
2240 // Eat the type declaration. For braced inline object types, balance braces,
2241 // otherwise just parse until finding an l_brace for the function body.
2242 if (FormatTok->is(tok::l_brace))
2243 tryToParseBracedList();
2244 else
2245 while (!FormatTok->isOneOf(tok::l_brace, tok::semi) && !eof())
2246 nextToken();
2247 }
2248
2249 if (FormatTok->is(tok::semi))
2250 return;
2251
2252 parseChildBlock();
2253 }
2254
tryToParseBracedList()2255 bool UnwrappedLineParser::tryToParseBracedList() {
2256 if (FormatTok->is(BK_Unknown))
2257 calculateBraceTypes();
2258 assert(FormatTok->isNot(BK_Unknown));
2259 if (FormatTok->is(BK_Block))
2260 return false;
2261 nextToken();
2262 parseBracedList();
2263 return true;
2264 }
2265
tryToParseChildBlock()2266 bool UnwrappedLineParser::tryToParseChildBlock() {
2267 assert(Style.isJavaScript() || Style.isCSharp());
2268 assert(FormatTok->is(TT_FatArrow));
2269 // Fat arrows (=>) have tok::TokenKind tok::equal but TokenType TT_FatArrow.
2270 // They always start an expression or a child block if followed by a curly
2271 // brace.
2272 nextToken();
2273 if (FormatTok->isNot(tok::l_brace))
2274 return false;
2275 parseChildBlock();
2276 return true;
2277 }
2278
parseBracedList(bool ContinueOnSemicolons,bool IsEnum,tok::TokenKind ClosingBraceKind)2279 bool UnwrappedLineParser::parseBracedList(bool ContinueOnSemicolons,
2280 bool IsEnum,
2281 tok::TokenKind ClosingBraceKind) {
2282 bool HasError = false;
2283
2284 // FIXME: Once we have an expression parser in the UnwrappedLineParser,
2285 // replace this by using parseAssignmentExpression() inside.
2286 do {
2287 if (Style.isCSharp() && FormatTok->is(TT_FatArrow) &&
2288 tryToParseChildBlock()) {
2289 continue;
2290 }
2291 if (Style.isJavaScript()) {
2292 if (FormatTok->is(Keywords.kw_function) ||
2293 FormatTok->startsSequence(Keywords.kw_async, Keywords.kw_function)) {
2294 tryToParseJSFunction();
2295 continue;
2296 }
2297 if (FormatTok->is(tok::l_brace)) {
2298 // Could be a method inside of a braced list `{a() { return 1; }}`.
2299 if (tryToParseBracedList())
2300 continue;
2301 parseChildBlock();
2302 }
2303 }
2304 if (FormatTok->Tok.getKind() == ClosingBraceKind) {
2305 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2306 addUnwrappedLine();
2307 nextToken();
2308 return !HasError;
2309 }
2310 switch (FormatTok->Tok.getKind()) {
2311 case tok::l_square:
2312 if (Style.isCSharp())
2313 parseSquare();
2314 else
2315 tryToParseLambda();
2316 break;
2317 case tok::l_paren:
2318 parseParens();
2319 // JavaScript can just have free standing methods and getters/setters in
2320 // object literals. Detect them by a "{" following ")".
2321 if (Style.isJavaScript()) {
2322 if (FormatTok->is(tok::l_brace))
2323 parseChildBlock();
2324 break;
2325 }
2326 break;
2327 case tok::l_brace:
2328 // Assume there are no blocks inside a braced init list apart
2329 // from the ones we explicitly parse out (like lambdas).
2330 FormatTok->setBlockKind(BK_BracedInit);
2331 nextToken();
2332 parseBracedList();
2333 break;
2334 case tok::less:
2335 if (Style.Language == FormatStyle::LK_Proto ||
2336 ClosingBraceKind == tok::greater) {
2337 nextToken();
2338 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
2339 /*ClosingBraceKind=*/tok::greater);
2340 } else {
2341 nextToken();
2342 }
2343 break;
2344 case tok::semi:
2345 // JavaScript (or more precisely TypeScript) can have semicolons in braced
2346 // lists (in so-called TypeMemberLists). Thus, the semicolon cannot be
2347 // used for error recovery if we have otherwise determined that this is
2348 // a braced list.
2349 if (Style.isJavaScript()) {
2350 nextToken();
2351 break;
2352 }
2353 HasError = true;
2354 if (!ContinueOnSemicolons)
2355 return !HasError;
2356 nextToken();
2357 break;
2358 case tok::comma:
2359 nextToken();
2360 if (IsEnum && !Style.AllowShortEnumsOnASingleLine)
2361 addUnwrappedLine();
2362 break;
2363 default:
2364 nextToken();
2365 break;
2366 }
2367 } while (!eof());
2368 return false;
2369 }
2370
2371 /// \brief Parses a pair of parentheses (and everything between them).
2372 /// \param AmpAmpTokenType If different than TT_Unknown sets this type for all
2373 /// double ampersands. This only counts for the current parens scope.
parseParens(TokenType AmpAmpTokenType)2374 void UnwrappedLineParser::parseParens(TokenType AmpAmpTokenType) {
2375 assert(FormatTok->is(tok::l_paren) && "'(' expected.");
2376 nextToken();
2377 do {
2378 switch (FormatTok->Tok.getKind()) {
2379 case tok::l_paren:
2380 parseParens();
2381 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_brace))
2382 parseChildBlock();
2383 break;
2384 case tok::r_paren:
2385 nextToken();
2386 return;
2387 case tok::r_brace:
2388 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2389 return;
2390 case tok::l_square:
2391 tryToParseLambda();
2392 break;
2393 case tok::l_brace:
2394 if (!tryToParseBracedList())
2395 parseChildBlock();
2396 break;
2397 case tok::at:
2398 nextToken();
2399 if (FormatTok->is(tok::l_brace)) {
2400 nextToken();
2401 parseBracedList();
2402 }
2403 break;
2404 case tok::equal:
2405 if (Style.isCSharp() && FormatTok->is(TT_FatArrow))
2406 tryToParseChildBlock();
2407 else
2408 nextToken();
2409 break;
2410 case tok::kw_class:
2411 if (Style.isJavaScript())
2412 parseRecord(/*ParseAsExpr=*/true);
2413 else
2414 nextToken();
2415 break;
2416 case tok::identifier:
2417 if (Style.isJavaScript() &&
2418 (FormatTok->is(Keywords.kw_function) ||
2419 FormatTok->startsSequence(Keywords.kw_async,
2420 Keywords.kw_function))) {
2421 tryToParseJSFunction();
2422 } else {
2423 nextToken();
2424 }
2425 break;
2426 case tok::kw_requires: {
2427 auto RequiresToken = FormatTok;
2428 nextToken();
2429 parseRequiresExpression(RequiresToken);
2430 break;
2431 }
2432 case tok::ampamp:
2433 if (AmpAmpTokenType != TT_Unknown)
2434 FormatTok->setFinalizedType(AmpAmpTokenType);
2435 LLVM_FALLTHROUGH;
2436 default:
2437 nextToken();
2438 break;
2439 }
2440 } while (!eof());
2441 }
2442
parseSquare(bool LambdaIntroducer)2443 void UnwrappedLineParser::parseSquare(bool LambdaIntroducer) {
2444 if (!LambdaIntroducer) {
2445 assert(FormatTok->is(tok::l_square) && "'[' expected.");
2446 if (tryToParseLambda())
2447 return;
2448 }
2449 do {
2450 switch (FormatTok->Tok.getKind()) {
2451 case tok::l_paren:
2452 parseParens();
2453 break;
2454 case tok::r_square:
2455 nextToken();
2456 return;
2457 case tok::r_brace:
2458 // A "}" inside parenthesis is an error if there wasn't a matching "{".
2459 return;
2460 case tok::l_square:
2461 parseSquare();
2462 break;
2463 case tok::l_brace: {
2464 if (!tryToParseBracedList())
2465 parseChildBlock();
2466 break;
2467 }
2468 case tok::at:
2469 nextToken();
2470 if (FormatTok->is(tok::l_brace)) {
2471 nextToken();
2472 parseBracedList();
2473 }
2474 break;
2475 default:
2476 nextToken();
2477 break;
2478 }
2479 } while (!eof());
2480 }
2481
keepAncestorBraces()2482 void UnwrappedLineParser::keepAncestorBraces() {
2483 if (!Style.RemoveBracesLLVM)
2484 return;
2485
2486 const int MaxNestingLevels = 2;
2487 const int Size = NestedTooDeep.size();
2488 if (Size >= MaxNestingLevels)
2489 NestedTooDeep[Size - MaxNestingLevels] = true;
2490 NestedTooDeep.push_back(false);
2491 }
2492
getLastNonComment(const UnwrappedLine & Line)2493 static FormatToken *getLastNonComment(const UnwrappedLine &Line) {
2494 for (const auto &Token : llvm::reverse(Line.Tokens))
2495 if (Token.Tok->isNot(tok::comment))
2496 return Token.Tok;
2497
2498 return nullptr;
2499 }
2500
parseUnbracedBody(bool CheckEOF)2501 void UnwrappedLineParser::parseUnbracedBody(bool CheckEOF) {
2502 FormatToken *Tok = nullptr;
2503
2504 if (Style.InsertBraces && !Line->InPPDirective && !Line->Tokens.empty() &&
2505 PreprocessorDirectives.empty()) {
2506 Tok = getLastNonComment(*Line);
2507 assert(Tok);
2508 if (Tok->BraceCount < 0) {
2509 assert(Tok->BraceCount == -1);
2510 Tok = nullptr;
2511 } else {
2512 Tok->BraceCount = -1;
2513 }
2514 }
2515
2516 addUnwrappedLine();
2517 ++Line->Level;
2518 parseStructuralElement();
2519
2520 if (Tok) {
2521 assert(!Line->InPPDirective);
2522 Tok = nullptr;
2523 for (const auto &L : llvm::reverse(*CurrentLines)) {
2524 if (!L.InPPDirective && getLastNonComment(L)) {
2525 Tok = L.Tokens.back().Tok;
2526 break;
2527 }
2528 }
2529 assert(Tok);
2530 ++Tok->BraceCount;
2531 }
2532
2533 if (CheckEOF && FormatTok->is(tok::eof))
2534 addUnwrappedLine();
2535
2536 --Line->Level;
2537 }
2538
markOptionalBraces(FormatToken * LeftBrace)2539 static void markOptionalBraces(FormatToken *LeftBrace) {
2540 if (!LeftBrace)
2541 return;
2542
2543 assert(LeftBrace->is(tok::l_brace));
2544
2545 FormatToken *RightBrace = LeftBrace->MatchingParen;
2546 if (!RightBrace) {
2547 assert(!LeftBrace->Optional);
2548 return;
2549 }
2550
2551 assert(RightBrace->is(tok::r_brace));
2552 assert(RightBrace->MatchingParen == LeftBrace);
2553 assert(LeftBrace->Optional == RightBrace->Optional);
2554
2555 LeftBrace->Optional = true;
2556 RightBrace->Optional = true;
2557 }
2558
handleAttributes()2559 void UnwrappedLineParser::handleAttributes() {
2560 // Handle AttributeMacro, e.g. `if (x) UNLIKELY`.
2561 if (FormatTok->is(TT_AttributeMacro))
2562 nextToken();
2563 handleCppAttributes();
2564 }
2565
handleCppAttributes()2566 bool UnwrappedLineParser::handleCppAttributes() {
2567 // Handle [[likely]] / [[unlikely]] attributes.
2568 if (FormatTok->is(tok::l_square) && tryToParseSimpleAttribute()) {
2569 parseSquare();
2570 return true;
2571 }
2572 return false;
2573 }
2574
parseIfThenElse(IfStmtKind * IfKind,bool KeepBraces)2575 FormatToken *UnwrappedLineParser::parseIfThenElse(IfStmtKind *IfKind,
2576 bool KeepBraces) {
2577 assert(FormatTok->is(tok::kw_if) && "'if' expected");
2578 nextToken();
2579 if (FormatTok->is(tok::exclaim))
2580 nextToken();
2581
2582 bool KeepIfBraces = true;
2583 if (FormatTok->is(tok::kw_consteval)) {
2584 nextToken();
2585 } else {
2586 KeepIfBraces = !Style.RemoveBracesLLVM || KeepBraces;
2587 if (FormatTok->isOneOf(tok::kw_constexpr, tok::identifier))
2588 nextToken();
2589 if (FormatTok->is(tok::l_paren))
2590 parseParens();
2591 }
2592 handleAttributes();
2593
2594 bool NeedsUnwrappedLine = false;
2595 keepAncestorBraces();
2596
2597 FormatToken *IfLeftBrace = nullptr;
2598 IfStmtKind IfBlockKind = IfStmtKind::NotIf;
2599
2600 if (Keywords.isBlockBegin(*FormatTok, Style)) {
2601 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2602 IfLeftBrace = FormatTok;
2603 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2604 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2605 /*MunchSemi=*/true, KeepIfBraces, &IfBlockKind);
2606 if (Style.BraceWrapping.BeforeElse)
2607 addUnwrappedLine();
2608 else
2609 NeedsUnwrappedLine = true;
2610 } else {
2611 parseUnbracedBody();
2612 }
2613
2614 if (Style.RemoveBracesLLVM) {
2615 assert(!NestedTooDeep.empty());
2616 KeepIfBraces = KeepIfBraces ||
2617 (IfLeftBrace && !IfLeftBrace->MatchingParen) ||
2618 NestedTooDeep.back() || IfBlockKind == IfStmtKind::IfOnly ||
2619 IfBlockKind == IfStmtKind::IfElseIf;
2620 }
2621
2622 bool KeepElseBraces = KeepIfBraces;
2623 FormatToken *ElseLeftBrace = nullptr;
2624 IfStmtKind Kind = IfStmtKind::IfOnly;
2625
2626 if (FormatTok->is(tok::kw_else)) {
2627 if (Style.RemoveBracesLLVM) {
2628 NestedTooDeep.back() = false;
2629 Kind = IfStmtKind::IfElse;
2630 }
2631 nextToken();
2632 handleAttributes();
2633 if (Keywords.isBlockBegin(*FormatTok, Style)) {
2634 const bool FollowedByIf = Tokens->peekNextToken()->is(tok::kw_if);
2635 FormatTok->setFinalizedType(TT_ElseLBrace);
2636 ElseLeftBrace = FormatTok;
2637 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2638 IfStmtKind ElseBlockKind = IfStmtKind::NotIf;
2639 FormatToken *IfLBrace =
2640 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2641 /*MunchSemi=*/true, KeepElseBraces, &ElseBlockKind);
2642 if (FormatTok->is(tok::kw_else)) {
2643 KeepElseBraces = KeepElseBraces ||
2644 ElseBlockKind == IfStmtKind::IfOnly ||
2645 ElseBlockKind == IfStmtKind::IfElseIf;
2646 } else if (FollowedByIf && IfLBrace && !IfLBrace->Optional) {
2647 KeepElseBraces = true;
2648 assert(ElseLeftBrace->MatchingParen);
2649 markOptionalBraces(ElseLeftBrace);
2650 }
2651 addUnwrappedLine();
2652 } else if (FormatTok->is(tok::kw_if)) {
2653 const FormatToken *Previous = Tokens->getPreviousToken();
2654 assert(Previous);
2655 const bool IsPrecededByComment = Previous->is(tok::comment);
2656 if (IsPrecededByComment) {
2657 addUnwrappedLine();
2658 ++Line->Level;
2659 }
2660 bool TooDeep = true;
2661 if (Style.RemoveBracesLLVM) {
2662 Kind = IfStmtKind::IfElseIf;
2663 TooDeep = NestedTooDeep.pop_back_val();
2664 }
2665 ElseLeftBrace = parseIfThenElse(/*IfKind=*/nullptr, KeepIfBraces);
2666 if (Style.RemoveBracesLLVM)
2667 NestedTooDeep.push_back(TooDeep);
2668 if (IsPrecededByComment)
2669 --Line->Level;
2670 } else {
2671 parseUnbracedBody(/*CheckEOF=*/true);
2672 }
2673 } else {
2674 KeepIfBraces = KeepIfBraces || IfBlockKind == IfStmtKind::IfElse;
2675 if (NeedsUnwrappedLine)
2676 addUnwrappedLine();
2677 }
2678
2679 if (!Style.RemoveBracesLLVM)
2680 return nullptr;
2681
2682 assert(!NestedTooDeep.empty());
2683 KeepElseBraces = KeepElseBraces ||
2684 (ElseLeftBrace && !ElseLeftBrace->MatchingParen) ||
2685 NestedTooDeep.back();
2686
2687 NestedTooDeep.pop_back();
2688
2689 if (!KeepIfBraces && !KeepElseBraces) {
2690 markOptionalBraces(IfLeftBrace);
2691 markOptionalBraces(ElseLeftBrace);
2692 } else if (IfLeftBrace) {
2693 FormatToken *IfRightBrace = IfLeftBrace->MatchingParen;
2694 if (IfRightBrace) {
2695 assert(IfRightBrace->MatchingParen == IfLeftBrace);
2696 assert(!IfLeftBrace->Optional);
2697 assert(!IfRightBrace->Optional);
2698 IfLeftBrace->MatchingParen = nullptr;
2699 IfRightBrace->MatchingParen = nullptr;
2700 }
2701 }
2702
2703 if (IfKind)
2704 *IfKind = Kind;
2705
2706 return IfLeftBrace;
2707 }
2708
parseTryCatch()2709 void UnwrappedLineParser::parseTryCatch() {
2710 assert(FormatTok->isOneOf(tok::kw_try, tok::kw___try) && "'try' expected");
2711 nextToken();
2712 bool NeedsUnwrappedLine = false;
2713 if (FormatTok->is(tok::colon)) {
2714 // We are in a function try block, what comes is an initializer list.
2715 nextToken();
2716
2717 // In case identifiers were removed by clang-tidy, what might follow is
2718 // multiple commas in sequence - before the first identifier.
2719 while (FormatTok->is(tok::comma))
2720 nextToken();
2721
2722 while (FormatTok->is(tok::identifier)) {
2723 nextToken();
2724 if (FormatTok->is(tok::l_paren))
2725 parseParens();
2726 if (FormatTok->Previous && FormatTok->Previous->is(tok::identifier) &&
2727 FormatTok->is(tok::l_brace)) {
2728 do {
2729 nextToken();
2730 } while (!FormatTok->is(tok::r_brace));
2731 nextToken();
2732 }
2733
2734 // In case identifiers were removed by clang-tidy, what might follow is
2735 // multiple commas in sequence - after the first identifier.
2736 while (FormatTok->is(tok::comma))
2737 nextToken();
2738 }
2739 }
2740 // Parse try with resource.
2741 if (Style.Language == FormatStyle::LK_Java && FormatTok->is(tok::l_paren))
2742 parseParens();
2743
2744 keepAncestorBraces();
2745
2746 if (FormatTok->is(tok::l_brace)) {
2747 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2748 parseBlock();
2749 if (Style.BraceWrapping.BeforeCatch)
2750 addUnwrappedLine();
2751 else
2752 NeedsUnwrappedLine = true;
2753 } else if (!FormatTok->is(tok::kw_catch)) {
2754 // The C++ standard requires a compound-statement after a try.
2755 // If there's none, we try to assume there's a structuralElement
2756 // and try to continue.
2757 addUnwrappedLine();
2758 ++Line->Level;
2759 parseStructuralElement();
2760 --Line->Level;
2761 }
2762 while (true) {
2763 if (FormatTok->is(tok::at))
2764 nextToken();
2765 if (!(FormatTok->isOneOf(tok::kw_catch, Keywords.kw___except,
2766 tok::kw___finally) ||
2767 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
2768 FormatTok->is(Keywords.kw_finally)) ||
2769 (FormatTok->isObjCAtKeyword(tok::objc_catch) ||
2770 FormatTok->isObjCAtKeyword(tok::objc_finally)))) {
2771 break;
2772 }
2773 nextToken();
2774 while (FormatTok->isNot(tok::l_brace)) {
2775 if (FormatTok->is(tok::l_paren)) {
2776 parseParens();
2777 continue;
2778 }
2779 if (FormatTok->isOneOf(tok::semi, tok::r_brace, tok::eof)) {
2780 if (Style.RemoveBracesLLVM)
2781 NestedTooDeep.pop_back();
2782 return;
2783 }
2784 nextToken();
2785 }
2786 NeedsUnwrappedLine = false;
2787 Line->MustBeDeclaration = false;
2788 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2789 parseBlock();
2790 if (Style.BraceWrapping.BeforeCatch)
2791 addUnwrappedLine();
2792 else
2793 NeedsUnwrappedLine = true;
2794 }
2795
2796 if (Style.RemoveBracesLLVM)
2797 NestedTooDeep.pop_back();
2798
2799 if (NeedsUnwrappedLine)
2800 addUnwrappedLine();
2801 }
2802
parseNamespace()2803 void UnwrappedLineParser::parseNamespace() {
2804 assert(FormatTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
2805 "'namespace' expected");
2806
2807 const FormatToken &InitialToken = *FormatTok;
2808 nextToken();
2809 if (InitialToken.is(TT_NamespaceMacro)) {
2810 parseParens();
2811 } else {
2812 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::kw_inline,
2813 tok::l_square, tok::period, tok::l_paren) ||
2814 (Style.isCSharp() && FormatTok->is(tok::kw_union))) {
2815 if (FormatTok->is(tok::l_square))
2816 parseSquare();
2817 else if (FormatTok->is(tok::l_paren))
2818 parseParens();
2819 else
2820 nextToken();
2821 }
2822 }
2823 if (FormatTok->is(tok::l_brace)) {
2824 if (ShouldBreakBeforeBrace(Style, InitialToken))
2825 addUnwrappedLine();
2826
2827 unsigned AddLevels =
2828 Style.NamespaceIndentation == FormatStyle::NI_All ||
2829 (Style.NamespaceIndentation == FormatStyle::NI_Inner &&
2830 DeclarationScopeStack.size() > 1)
2831 ? 1u
2832 : 0u;
2833 bool ManageWhitesmithsBraces =
2834 AddLevels == 0u &&
2835 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
2836
2837 // If we're in Whitesmiths mode, indent the brace if we're not indenting
2838 // the whole block.
2839 if (ManageWhitesmithsBraces)
2840 ++Line->Level;
2841
2842 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/true,
2843 /*KeepBraces=*/true, /*IfKind=*/nullptr,
2844 ManageWhitesmithsBraces);
2845
2846 // Munch the semicolon after a namespace. This is more common than one would
2847 // think. Putting the semicolon into its own line is very ugly.
2848 if (FormatTok->is(tok::semi))
2849 nextToken();
2850
2851 addUnwrappedLine(AddLevels > 0 ? LineLevel::Remove : LineLevel::Keep);
2852
2853 if (ManageWhitesmithsBraces)
2854 --Line->Level;
2855 }
2856 // FIXME: Add error handling.
2857 }
2858
parseNew()2859 void UnwrappedLineParser::parseNew() {
2860 assert(FormatTok->is(tok::kw_new) && "'new' expected");
2861 nextToken();
2862
2863 if (Style.isCSharp()) {
2864 do {
2865 if (FormatTok->is(tok::l_brace))
2866 parseBracedList();
2867
2868 if (FormatTok->isOneOf(tok::semi, tok::comma))
2869 return;
2870
2871 nextToken();
2872 } while (!eof());
2873 }
2874
2875 if (Style.Language != FormatStyle::LK_Java)
2876 return;
2877
2878 // In Java, we can parse everything up to the parens, which aren't optional.
2879 do {
2880 // There should not be a ;, { or } before the new's open paren.
2881 if (FormatTok->isOneOf(tok::semi, tok::l_brace, tok::r_brace))
2882 return;
2883
2884 // Consume the parens.
2885 if (FormatTok->is(tok::l_paren)) {
2886 parseParens();
2887
2888 // If there is a class body of an anonymous class, consume that as child.
2889 if (FormatTok->is(tok::l_brace))
2890 parseChildBlock();
2891 return;
2892 }
2893 nextToken();
2894 } while (!eof());
2895 }
2896
parseLoopBody(bool KeepBraces,bool WrapRightBrace)2897 void UnwrappedLineParser::parseLoopBody(bool KeepBraces, bool WrapRightBrace) {
2898 keepAncestorBraces();
2899
2900 if (Keywords.isBlockBegin(*FormatTok, Style)) {
2901 if (!KeepBraces)
2902 FormatTok->setFinalizedType(TT_ControlStatementLBrace);
2903 FormatToken *LeftBrace = FormatTok;
2904 CompoundStatementIndenter Indenter(this, Style, Line->Level);
2905 parseBlock(/*MustBeDeclaration=*/false, /*AddLevels=*/1u,
2906 /*MunchSemi=*/true, KeepBraces);
2907 if (!KeepBraces) {
2908 assert(!NestedTooDeep.empty());
2909 if (!NestedTooDeep.back())
2910 markOptionalBraces(LeftBrace);
2911 }
2912 if (WrapRightBrace)
2913 addUnwrappedLine();
2914 } else {
2915 parseUnbracedBody();
2916 }
2917
2918 if (!KeepBraces)
2919 NestedTooDeep.pop_back();
2920 }
2921
parseForOrWhileLoop()2922 void UnwrappedLineParser::parseForOrWhileLoop() {
2923 assert(FormatTok->isOneOf(tok::kw_for, tok::kw_while, TT_ForEachMacro) &&
2924 "'for', 'while' or foreach macro expected");
2925 const bool KeepBraces = !Style.RemoveBracesLLVM ||
2926 !FormatTok->isOneOf(tok::kw_for, tok::kw_while);
2927
2928 nextToken();
2929 // JS' for await ( ...
2930 if (Style.isJavaScript() && FormatTok->is(Keywords.kw_await))
2931 nextToken();
2932 if (Style.isCpp() && FormatTok->is(tok::kw_co_await))
2933 nextToken();
2934 if (FormatTok->is(tok::l_paren))
2935 parseParens();
2936
2937 handleAttributes();
2938 parseLoopBody(KeepBraces, /*WrapRightBrace=*/true);
2939 }
2940
parseDoWhile()2941 void UnwrappedLineParser::parseDoWhile() {
2942 assert(FormatTok->is(tok::kw_do) && "'do' expected");
2943 nextToken();
2944
2945 parseLoopBody(/*KeepBraces=*/true, Style.BraceWrapping.BeforeWhile);
2946
2947 // FIXME: Add error handling.
2948 if (!FormatTok->is(tok::kw_while)) {
2949 addUnwrappedLine();
2950 return;
2951 }
2952
2953 // If in Whitesmiths mode, the line with the while() needs to be indented
2954 // to the same level as the block.
2955 if (Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths)
2956 ++Line->Level;
2957
2958 nextToken();
2959 parseStructuralElement();
2960 }
2961
parseLabel(bool LeftAlignLabel)2962 void UnwrappedLineParser::parseLabel(bool LeftAlignLabel) {
2963 nextToken();
2964 unsigned OldLineLevel = Line->Level;
2965 if (Line->Level > 1 || (!Line->InPPDirective && Line->Level > 0))
2966 --Line->Level;
2967 if (LeftAlignLabel)
2968 Line->Level = 0;
2969
2970 if (!Style.IndentCaseBlocks && CommentsBeforeNextToken.empty() &&
2971 FormatTok->is(tok::l_brace)) {
2972
2973 CompoundStatementIndenter Indenter(this, Line->Level,
2974 Style.BraceWrapping.AfterCaseLabel,
2975 Style.BraceWrapping.IndentBraces);
2976 parseBlock();
2977 if (FormatTok->is(tok::kw_break)) {
2978 if (Style.BraceWrapping.AfterControlStatement ==
2979 FormatStyle::BWACS_Always) {
2980 addUnwrappedLine();
2981 if (!Style.IndentCaseBlocks &&
2982 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths) {
2983 ++Line->Level;
2984 }
2985 }
2986 parseStructuralElement();
2987 }
2988 addUnwrappedLine();
2989 } else {
2990 if (FormatTok->is(tok::semi))
2991 nextToken();
2992 addUnwrappedLine();
2993 }
2994 Line->Level = OldLineLevel;
2995 if (FormatTok->isNot(tok::l_brace)) {
2996 parseStructuralElement();
2997 addUnwrappedLine();
2998 }
2999 }
3000
parseCaseLabel()3001 void UnwrappedLineParser::parseCaseLabel() {
3002 assert(FormatTok->is(tok::kw_case) && "'case' expected");
3003
3004 // FIXME: fix handling of complex expressions here.
3005 do {
3006 nextToken();
3007 } while (!eof() && !FormatTok->is(tok::colon));
3008 parseLabel();
3009 }
3010
parseSwitch()3011 void UnwrappedLineParser::parseSwitch() {
3012 assert(FormatTok->is(tok::kw_switch) && "'switch' expected");
3013 nextToken();
3014 if (FormatTok->is(tok::l_paren))
3015 parseParens();
3016
3017 keepAncestorBraces();
3018
3019 if (FormatTok->is(tok::l_brace)) {
3020 CompoundStatementIndenter Indenter(this, Style, Line->Level);
3021 parseBlock();
3022 addUnwrappedLine();
3023 } else {
3024 addUnwrappedLine();
3025 ++Line->Level;
3026 parseStructuralElement();
3027 --Line->Level;
3028 }
3029
3030 if (Style.RemoveBracesLLVM)
3031 NestedTooDeep.pop_back();
3032 }
3033
3034 // Operators that can follow a C variable.
isCOperatorFollowingVar(tok::TokenKind kind)3035 static bool isCOperatorFollowingVar(tok::TokenKind kind) {
3036 switch (kind) {
3037 case tok::ampamp:
3038 case tok::ampequal:
3039 case tok::arrow:
3040 case tok::caret:
3041 case tok::caretequal:
3042 case tok::comma:
3043 case tok::ellipsis:
3044 case tok::equal:
3045 case tok::equalequal:
3046 case tok::exclaim:
3047 case tok::exclaimequal:
3048 case tok::greater:
3049 case tok::greaterequal:
3050 case tok::greatergreater:
3051 case tok::greatergreaterequal:
3052 case tok::l_paren:
3053 case tok::l_square:
3054 case tok::less:
3055 case tok::lessequal:
3056 case tok::lessless:
3057 case tok::lesslessequal:
3058 case tok::minus:
3059 case tok::minusequal:
3060 case tok::minusminus:
3061 case tok::percent:
3062 case tok::percentequal:
3063 case tok::period:
3064 case tok::pipe:
3065 case tok::pipeequal:
3066 case tok::pipepipe:
3067 case tok::plus:
3068 case tok::plusequal:
3069 case tok::plusplus:
3070 case tok::question:
3071 case tok::r_brace:
3072 case tok::r_paren:
3073 case tok::r_square:
3074 case tok::semi:
3075 case tok::slash:
3076 case tok::slashequal:
3077 case tok::star:
3078 case tok::starequal:
3079 return true;
3080 default:
3081 return false;
3082 }
3083 }
3084
parseAccessSpecifier()3085 void UnwrappedLineParser::parseAccessSpecifier() {
3086 FormatToken *AccessSpecifierCandidate = FormatTok;
3087 nextToken();
3088 // Understand Qt's slots.
3089 if (FormatTok->isOneOf(Keywords.kw_slots, Keywords.kw_qslots))
3090 nextToken();
3091 // Otherwise, we don't know what it is, and we'd better keep the next token.
3092 if (FormatTok->is(tok::colon)) {
3093 nextToken();
3094 addUnwrappedLine();
3095 } else if (!FormatTok->is(tok::coloncolon) &&
3096 !isCOperatorFollowingVar(FormatTok->Tok.getKind())) {
3097 // Not a variable name nor namespace name.
3098 addUnwrappedLine();
3099 } else if (AccessSpecifierCandidate) {
3100 // Consider the access specifier to be a C identifier.
3101 AccessSpecifierCandidate->Tok.setKind(tok::identifier);
3102 }
3103 }
3104
3105 /// \brief Parses a concept definition.
3106 /// \pre The current token has to be the concept keyword.
3107 ///
3108 /// Returns if either the concept has been completely parsed, or if it detects
3109 /// that the concept definition is incorrect.
parseConcept()3110 void UnwrappedLineParser::parseConcept() {
3111 assert(FormatTok->is(tok::kw_concept) && "'concept' expected");
3112 nextToken();
3113 if (!FormatTok->is(tok::identifier))
3114 return;
3115 nextToken();
3116 if (!FormatTok->is(tok::equal))
3117 return;
3118 nextToken();
3119 parseConstraintExpression();
3120 if (FormatTok->is(tok::semi))
3121 nextToken();
3122 addUnwrappedLine();
3123 }
3124
3125 /// \brief Parses a requires, decides if it is a clause or an expression.
3126 /// \pre The current token has to be the requires keyword.
3127 /// \returns true if it parsed a clause.
parseRequires()3128 bool clang::format::UnwrappedLineParser::parseRequires() {
3129 assert(FormatTok->is(tok::kw_requires) && "'requires' expected");
3130 auto RequiresToken = FormatTok;
3131
3132 // We try to guess if it is a requires clause, or a requires expression. For
3133 // that we first consume the keyword and check the next token.
3134 nextToken();
3135
3136 switch (FormatTok->Tok.getKind()) {
3137 case tok::l_brace:
3138 // This can only be an expression, never a clause.
3139 parseRequiresExpression(RequiresToken);
3140 return false;
3141 case tok::l_paren:
3142 // Clauses and expression can start with a paren, it's unclear what we have.
3143 break;
3144 default:
3145 // All other tokens can only be a clause.
3146 parseRequiresClause(RequiresToken);
3147 return true;
3148 }
3149
3150 // Looking forward we would have to decide if there are function declaration
3151 // like arguments to the requires expression:
3152 // requires (T t) {
3153 // Or there is a constraint expression for the requires clause:
3154 // requires (C<T> && ...
3155
3156 // But first let's look behind.
3157 auto *PreviousNonComment = RequiresToken->getPreviousNonComment();
3158
3159 if (!PreviousNonComment ||
3160 PreviousNonComment->is(TT_RequiresExpressionLBrace)) {
3161 // If there is no token, or an expression left brace, we are a requires
3162 // clause within a requires expression.
3163 parseRequiresClause(RequiresToken);
3164 return true;
3165 }
3166
3167 switch (PreviousNonComment->Tok.getKind()) {
3168 case tok::greater:
3169 case tok::r_paren:
3170 case tok::kw_noexcept:
3171 case tok::kw_const:
3172 // This is a requires clause.
3173 parseRequiresClause(RequiresToken);
3174 return true;
3175 case tok::amp:
3176 case tok::ampamp: {
3177 // This can be either:
3178 // if (... && requires (T t) ...)
3179 // Or
3180 // void member(...) && requires (C<T> ...
3181 // We check the one token before that for a const:
3182 // void member(...) const && requires (C<T> ...
3183 auto PrevPrev = PreviousNonComment->getPreviousNonComment();
3184 if (PrevPrev && PrevPrev->is(tok::kw_const)) {
3185 parseRequiresClause(RequiresToken);
3186 return true;
3187 }
3188 break;
3189 }
3190 default:
3191 if (PreviousNonComment->isTypeOrIdentifier()) {
3192 // This is a requires clause.
3193 parseRequiresClause(RequiresToken);
3194 return true;
3195 }
3196 // It's an expression.
3197 parseRequiresExpression(RequiresToken);
3198 return false;
3199 }
3200
3201 // Now we look forward and try to check if the paren content is a parameter
3202 // list. The parameters can be cv-qualified and contain references or
3203 // pointers.
3204 // So we want basically to check for TYPE NAME, but TYPE can contain all kinds
3205 // of stuff: typename, const, *, &, &&, ::, identifiers.
3206
3207 int NextTokenOffset = 1;
3208 auto NextToken = Tokens->peekNextToken(NextTokenOffset);
3209 auto PeekNext = [&NextTokenOffset, &NextToken, this] {
3210 ++NextTokenOffset;
3211 NextToken = Tokens->peekNextToken(NextTokenOffset);
3212 };
3213
3214 bool FoundType = false;
3215 bool LastWasColonColon = false;
3216 int OpenAngles = 0;
3217
3218 for (; NextTokenOffset < 50; PeekNext()) {
3219 switch (NextToken->Tok.getKind()) {
3220 case tok::kw_volatile:
3221 case tok::kw_const:
3222 case tok::comma:
3223 parseRequiresExpression(RequiresToken);
3224 return false;
3225 case tok::r_paren:
3226 case tok::pipepipe:
3227 parseRequiresClause(RequiresToken);
3228 return true;
3229 case tok::eof:
3230 // Break out of the loop.
3231 NextTokenOffset = 50;
3232 break;
3233 case tok::coloncolon:
3234 LastWasColonColon = true;
3235 break;
3236 case tok::identifier:
3237 if (FoundType && !LastWasColonColon && OpenAngles == 0) {
3238 parseRequiresExpression(RequiresToken);
3239 return false;
3240 }
3241 FoundType = true;
3242 LastWasColonColon = false;
3243 break;
3244 case tok::less:
3245 ++OpenAngles;
3246 break;
3247 case tok::greater:
3248 --OpenAngles;
3249 break;
3250 default:
3251 if (NextToken->isSimpleTypeSpecifier()) {
3252 parseRequiresExpression(RequiresToken);
3253 return false;
3254 }
3255 break;
3256 }
3257 }
3258
3259 // This seems to be a complicated expression, just assume it's a clause.
3260 parseRequiresClause(RequiresToken);
3261 return true;
3262 }
3263
3264 /// \brief Parses a requires clause.
3265 /// \param RequiresToken The requires keyword token, which starts this clause.
3266 /// \pre We need to be on the next token after the requires keyword.
3267 /// \sa parseRequiresExpression
3268 ///
3269 /// Returns if it either has finished parsing the clause, or it detects, that
3270 /// the clause is incorrect.
parseRequiresClause(FormatToken * RequiresToken)3271 void UnwrappedLineParser::parseRequiresClause(FormatToken *RequiresToken) {
3272 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3273 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3274
3275 // If there is no previous token, we are within a requires expression,
3276 // otherwise we will always have the template or function declaration in front
3277 // of it.
3278 bool InRequiresExpression =
3279 !RequiresToken->Previous ||
3280 RequiresToken->Previous->is(TT_RequiresExpressionLBrace);
3281
3282 RequiresToken->setFinalizedType(InRequiresExpression
3283 ? TT_RequiresClauseInARequiresExpression
3284 : TT_RequiresClause);
3285
3286 parseConstraintExpression();
3287
3288 if (!InRequiresExpression)
3289 FormatTok->Previous->ClosesRequiresClause = true;
3290 }
3291
3292 /// \brief Parses a requires expression.
3293 /// \param RequiresToken The requires keyword token, which starts this clause.
3294 /// \pre We need to be on the next token after the requires keyword.
3295 /// \sa parseRequiresClause
3296 ///
3297 /// Returns if it either has finished parsing the expression, or it detects,
3298 /// that the expression is incorrect.
parseRequiresExpression(FormatToken * RequiresToken)3299 void UnwrappedLineParser::parseRequiresExpression(FormatToken *RequiresToken) {
3300 assert(FormatTok->getPreviousNonComment() == RequiresToken);
3301 assert(RequiresToken->is(tok::kw_requires) && "'requires' expected");
3302
3303 RequiresToken->setFinalizedType(TT_RequiresExpression);
3304
3305 if (FormatTok->is(tok::l_paren)) {
3306 FormatTok->setFinalizedType(TT_RequiresExpressionLParen);
3307 parseParens();
3308 }
3309
3310 if (FormatTok->is(tok::l_brace)) {
3311 FormatTok->setFinalizedType(TT_RequiresExpressionLBrace);
3312 parseChildBlock(/*CanContainBracedList=*/false,
3313 /*NextLBracesType=*/TT_CompoundRequirementLBrace);
3314 }
3315 }
3316
3317 /// \brief Parses a constraint expression.
3318 ///
3319 /// This is either the definition of a concept, or the body of a requires
3320 /// clause. It returns, when the parsing is complete, or the expression is
3321 /// incorrect.
parseConstraintExpression()3322 void UnwrappedLineParser::parseConstraintExpression() {
3323 // The special handling for lambdas is needed since tryToParseLambda() eats a
3324 // token and if a requires expression is the last part of a requires clause
3325 // and followed by an attribute like [[nodiscard]] the ClosesRequiresClause is
3326 // not set on the correct token. Thus we need to be aware if we even expect a
3327 // lambda to be possible.
3328 // template <typename T> requires requires { ... } [[nodiscard]] ...;
3329 bool LambdaNextTimeAllowed = true;
3330 do {
3331 bool LambdaThisTimeAllowed = std::exchange(LambdaNextTimeAllowed, false);
3332
3333 switch (FormatTok->Tok.getKind()) {
3334 case tok::kw_requires: {
3335 auto RequiresToken = FormatTok;
3336 nextToken();
3337 parseRequiresExpression(RequiresToken);
3338 break;
3339 }
3340
3341 case tok::l_paren:
3342 parseParens(/*AmpAmpTokenType=*/TT_BinaryOperator);
3343 break;
3344
3345 case tok::l_square:
3346 if (!LambdaThisTimeAllowed || !tryToParseLambda())
3347 return;
3348 break;
3349
3350 case tok::kw_const:
3351 case tok::semi:
3352 case tok::kw_class:
3353 case tok::kw_struct:
3354 case tok::kw_union:
3355 return;
3356
3357 case tok::l_brace:
3358 // Potential function body.
3359 return;
3360
3361 case tok::ampamp:
3362 case tok::pipepipe:
3363 FormatTok->setFinalizedType(TT_BinaryOperator);
3364 nextToken();
3365 LambdaNextTimeAllowed = true;
3366 break;
3367
3368 case tok::comma:
3369 case tok::comment:
3370 LambdaNextTimeAllowed = LambdaThisTimeAllowed;
3371 nextToken();
3372 break;
3373
3374 case tok::kw_sizeof:
3375 case tok::greater:
3376 case tok::greaterequal:
3377 case tok::greatergreater:
3378 case tok::less:
3379 case tok::lessequal:
3380 case tok::lessless:
3381 case tok::equalequal:
3382 case tok::exclaim:
3383 case tok::exclaimequal:
3384 case tok::plus:
3385 case tok::minus:
3386 case tok::star:
3387 case tok::slash:
3388 case tok::kw_decltype:
3389 LambdaNextTimeAllowed = true;
3390 // Just eat them.
3391 nextToken();
3392 break;
3393
3394 case tok::numeric_constant:
3395 case tok::coloncolon:
3396 case tok::kw_true:
3397 case tok::kw_false:
3398 // Just eat them.
3399 nextToken();
3400 break;
3401
3402 case tok::kw_static_cast:
3403 case tok::kw_const_cast:
3404 case tok::kw_reinterpret_cast:
3405 case tok::kw_dynamic_cast:
3406 nextToken();
3407 if (!FormatTok->is(tok::less))
3408 return;
3409
3410 nextToken();
3411 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3412 /*ClosingBraceKind=*/tok::greater);
3413 break;
3414
3415 case tok::kw_bool:
3416 // bool is only allowed if it is directly followed by a paren for a cast:
3417 // concept C = bool(...);
3418 // and bool is the only type, all other types as cast must be inside a
3419 // cast to bool an thus are handled by the other cases.
3420 nextToken();
3421 if (FormatTok->isNot(tok::l_paren))
3422 return;
3423 parseParens();
3424 break;
3425
3426 default:
3427 if (!FormatTok->Tok.getIdentifierInfo()) {
3428 // Identifiers are part of the default case, we check for more then
3429 // tok::identifier to handle builtin type traits.
3430 return;
3431 }
3432
3433 // We need to differentiate identifiers for a template deduction guide,
3434 // variables, or function return types (the constraint expression has
3435 // ended before that), and basically all other cases. But it's easier to
3436 // check the other way around.
3437 assert(FormatTok->Previous);
3438 switch (FormatTok->Previous->Tok.getKind()) {
3439 case tok::coloncolon: // Nested identifier.
3440 case tok::ampamp: // Start of a function or variable for the
3441 case tok::pipepipe: // constraint expression.
3442 case tok::kw_requires: // Initial identifier of a requires clause.
3443 case tok::equal: // Initial identifier of a concept declaration.
3444 break;
3445 default:
3446 return;
3447 }
3448
3449 // Read identifier with optional template declaration.
3450 nextToken();
3451 if (FormatTok->is(tok::less)) {
3452 nextToken();
3453 parseBracedList(/*ContinueOnSemicolons=*/false, /*IsEnum=*/false,
3454 /*ClosingBraceKind=*/tok::greater);
3455 }
3456 break;
3457 }
3458 } while (!eof());
3459 }
3460
parseEnum()3461 bool UnwrappedLineParser::parseEnum() {
3462 const FormatToken &InitialToken = *FormatTok;
3463
3464 // Won't be 'enum' for NS_ENUMs.
3465 if (FormatTok->is(tok::kw_enum))
3466 nextToken();
3467
3468 // In TypeScript, "enum" can also be used as property name, e.g. in interface
3469 // declarations. An "enum" keyword followed by a colon would be a syntax
3470 // error and thus assume it is just an identifier.
3471 if (Style.isJavaScript() && FormatTok->isOneOf(tok::colon, tok::question))
3472 return false;
3473
3474 // In protobuf, "enum" can be used as a field name.
3475 if (Style.Language == FormatStyle::LK_Proto && FormatTok->is(tok::equal))
3476 return false;
3477
3478 // Eat up enum class ...
3479 if (FormatTok->isOneOf(tok::kw_class, tok::kw_struct))
3480 nextToken();
3481
3482 while (FormatTok->Tok.getIdentifierInfo() ||
3483 FormatTok->isOneOf(tok::colon, tok::coloncolon, tok::less,
3484 tok::greater, tok::comma, tok::question,
3485 tok::l_square, tok::r_square)) {
3486 nextToken();
3487 // We can have macros or attributes in between 'enum' and the enum name.
3488 if (FormatTok->is(tok::l_paren))
3489 parseParens();
3490 if (FormatTok->is(TT_AttributeSquare)) {
3491 parseSquare();
3492 // Consume the closing TT_AttributeSquare.
3493 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3494 nextToken();
3495 }
3496 if (FormatTok->is(tok::identifier)) {
3497 nextToken();
3498 // If there are two identifiers in a row, this is likely an elaborate
3499 // return type. In Java, this can be "implements", etc.
3500 if (Style.isCpp() && FormatTok->is(tok::identifier))
3501 return false;
3502 }
3503 }
3504
3505 // Just a declaration or something is wrong.
3506 if (FormatTok->isNot(tok::l_brace))
3507 return true;
3508 FormatTok->setFinalizedType(TT_EnumLBrace);
3509 FormatTok->setBlockKind(BK_Block);
3510
3511 if (Style.Language == FormatStyle::LK_Java) {
3512 // Java enums are different.
3513 parseJavaEnumBody();
3514 return true;
3515 }
3516 if (Style.Language == FormatStyle::LK_Proto) {
3517 parseBlock(/*MustBeDeclaration=*/true);
3518 return true;
3519 }
3520
3521 if (!Style.AllowShortEnumsOnASingleLine &&
3522 ShouldBreakBeforeBrace(Style, InitialToken)) {
3523 addUnwrappedLine();
3524 }
3525 // Parse enum body.
3526 nextToken();
3527 if (!Style.AllowShortEnumsOnASingleLine) {
3528 addUnwrappedLine();
3529 Line->Level += 1;
3530 }
3531 bool HasError = !parseBracedList(/*ContinueOnSemicolons=*/true,
3532 /*IsEnum=*/true);
3533 if (!Style.AllowShortEnumsOnASingleLine)
3534 Line->Level -= 1;
3535 if (HasError) {
3536 if (FormatTok->is(tok::semi))
3537 nextToken();
3538 addUnwrappedLine();
3539 }
3540 return true;
3541
3542 // There is no addUnwrappedLine() here so that we fall through to parsing a
3543 // structural element afterwards. Thus, in "enum A {} n, m;",
3544 // "} n, m;" will end up in one unwrapped line.
3545 }
3546
parseStructLike()3547 bool UnwrappedLineParser::parseStructLike() {
3548 // parseRecord falls through and does not yet add an unwrapped line as a
3549 // record declaration or definition can start a structural element.
3550 parseRecord();
3551 // This does not apply to Java, JavaScript and C#.
3552 if (Style.Language == FormatStyle::LK_Java || Style.isJavaScript() ||
3553 Style.isCSharp()) {
3554 if (FormatTok->is(tok::semi))
3555 nextToken();
3556 addUnwrappedLine();
3557 return true;
3558 }
3559 return false;
3560 }
3561
3562 namespace {
3563 // A class used to set and restore the Token position when peeking
3564 // ahead in the token source.
3565 class ScopedTokenPosition {
3566 unsigned StoredPosition;
3567 FormatTokenSource *Tokens;
3568
3569 public:
ScopedTokenPosition(FormatTokenSource * Tokens)3570 ScopedTokenPosition(FormatTokenSource *Tokens) : Tokens(Tokens) {
3571 assert(Tokens && "Tokens expected to not be null");
3572 StoredPosition = Tokens->getPosition();
3573 }
3574
~ScopedTokenPosition()3575 ~ScopedTokenPosition() { Tokens->setPosition(StoredPosition); }
3576 };
3577 } // namespace
3578
3579 // Look to see if we have [[ by looking ahead, if
3580 // its not then rewind to the original position.
tryToParseSimpleAttribute()3581 bool UnwrappedLineParser::tryToParseSimpleAttribute() {
3582 ScopedTokenPosition AutoPosition(Tokens);
3583 FormatToken *Tok = Tokens->getNextToken();
3584 // We already read the first [ check for the second.
3585 if (!Tok->is(tok::l_square))
3586 return false;
3587 // Double check that the attribute is just something
3588 // fairly simple.
3589 while (Tok->isNot(tok::eof)) {
3590 if (Tok->is(tok::r_square))
3591 break;
3592 Tok = Tokens->getNextToken();
3593 }
3594 if (Tok->is(tok::eof))
3595 return false;
3596 Tok = Tokens->getNextToken();
3597 if (!Tok->is(tok::r_square))
3598 return false;
3599 Tok = Tokens->getNextToken();
3600 if (Tok->is(tok::semi))
3601 return false;
3602 return true;
3603 }
3604
parseJavaEnumBody()3605 void UnwrappedLineParser::parseJavaEnumBody() {
3606 assert(FormatTok->is(tok::l_brace));
3607 const FormatToken *OpeningBrace = FormatTok;
3608
3609 // Determine whether the enum is simple, i.e. does not have a semicolon or
3610 // constants with class bodies. Simple enums can be formatted like braced
3611 // lists, contracted to a single line, etc.
3612 unsigned StoredPosition = Tokens->getPosition();
3613 bool IsSimple = true;
3614 FormatToken *Tok = Tokens->getNextToken();
3615 while (!Tok->is(tok::eof)) {
3616 if (Tok->is(tok::r_brace))
3617 break;
3618 if (Tok->isOneOf(tok::l_brace, tok::semi)) {
3619 IsSimple = false;
3620 break;
3621 }
3622 // FIXME: This will also mark enums with braces in the arguments to enum
3623 // constants as "not simple". This is probably fine in practice, though.
3624 Tok = Tokens->getNextToken();
3625 }
3626 FormatTok = Tokens->setPosition(StoredPosition);
3627
3628 if (IsSimple) {
3629 nextToken();
3630 parseBracedList();
3631 addUnwrappedLine();
3632 return;
3633 }
3634
3635 // Parse the body of a more complex enum.
3636 // First add a line for everything up to the "{".
3637 nextToken();
3638 addUnwrappedLine();
3639 ++Line->Level;
3640
3641 // Parse the enum constants.
3642 while (FormatTok->isNot(tok::eof)) {
3643 if (FormatTok->is(tok::l_brace)) {
3644 // Parse the constant's class body.
3645 parseBlock(/*MustBeDeclaration=*/true, /*AddLevels=*/1u,
3646 /*MunchSemi=*/false);
3647 } else if (FormatTok->is(tok::l_paren)) {
3648 parseParens();
3649 } else if (FormatTok->is(tok::comma)) {
3650 nextToken();
3651 addUnwrappedLine();
3652 } else if (FormatTok->is(tok::semi)) {
3653 nextToken();
3654 addUnwrappedLine();
3655 break;
3656 } else if (FormatTok->is(tok::r_brace)) {
3657 addUnwrappedLine();
3658 break;
3659 } else {
3660 nextToken();
3661 }
3662 }
3663
3664 // Parse the class body after the enum's ";" if any.
3665 parseLevel(OpeningBrace);
3666 nextToken();
3667 --Line->Level;
3668 addUnwrappedLine();
3669 }
3670
parseRecord(bool ParseAsExpr)3671 void UnwrappedLineParser::parseRecord(bool ParseAsExpr) {
3672 const FormatToken &InitialToken = *FormatTok;
3673 nextToken();
3674
3675 // The actual identifier can be a nested name specifier, and in macros
3676 // it is often token-pasted.
3677 // An [[attribute]] can be before the identifier.
3678 while (FormatTok->isOneOf(tok::identifier, tok::coloncolon, tok::hashhash,
3679 tok::kw___attribute, tok::kw___declspec,
3680 tok::kw_alignas, tok::l_square, tok::r_square) ||
3681 ((Style.Language == FormatStyle::LK_Java || Style.isJavaScript()) &&
3682 FormatTok->isOneOf(tok::period, tok::comma))) {
3683 if (Style.isJavaScript() &&
3684 FormatTok->isOneOf(Keywords.kw_extends, Keywords.kw_implements)) {
3685 // JavaScript/TypeScript supports inline object types in
3686 // extends/implements positions:
3687 // class Foo implements {bar: number} { }
3688 nextToken();
3689 if (FormatTok->is(tok::l_brace)) {
3690 tryToParseBracedList();
3691 continue;
3692 }
3693 }
3694 bool IsNonMacroIdentifier =
3695 FormatTok->is(tok::identifier) &&
3696 FormatTok->TokenText != FormatTok->TokenText.upper();
3697 nextToken();
3698 // We can have macros or attributes in between 'class' and the class name.
3699 if (!IsNonMacroIdentifier) {
3700 if (FormatTok->is(tok::l_paren)) {
3701 parseParens();
3702 } else if (FormatTok->is(TT_AttributeSquare)) {
3703 parseSquare();
3704 // Consume the closing TT_AttributeSquare.
3705 if (FormatTok->Next && FormatTok->is(TT_AttributeSquare))
3706 nextToken();
3707 }
3708 }
3709 }
3710
3711 // Note that parsing away template declarations here leads to incorrectly
3712 // accepting function declarations as record declarations.
3713 // In general, we cannot solve this problem. Consider:
3714 // class A<int> B() {}
3715 // which can be a function definition or a class definition when B() is a
3716 // macro. If we find enough real-world cases where this is a problem, we
3717 // can parse for the 'template' keyword in the beginning of the statement,
3718 // and thus rule out the record production in case there is no template
3719 // (this would still leave us with an ambiguity between template function
3720 // and class declarations).
3721 if (FormatTok->isOneOf(tok::colon, tok::less)) {
3722 do {
3723 if (FormatTok->is(tok::l_brace)) {
3724 calculateBraceTypes(/*ExpectClassBody=*/true);
3725 if (!tryToParseBracedList())
3726 break;
3727 }
3728 if (FormatTok->is(tok::l_square)) {
3729 FormatToken *Previous = FormatTok->Previous;
3730 if (!Previous ||
3731 !(Previous->is(tok::r_paren) || Previous->isTypeOrIdentifier())) {
3732 // Don't try parsing a lambda if we had a closing parenthesis before,
3733 // it was probably a pointer to an array: int (*)[].
3734 if (!tryToParseLambda())
3735 break;
3736 } else {
3737 parseSquare();
3738 continue;
3739 }
3740 }
3741 if (FormatTok->is(tok::semi))
3742 return;
3743 if (Style.isCSharp() && FormatTok->is(Keywords.kw_where)) {
3744 addUnwrappedLine();
3745 nextToken();
3746 parseCSharpGenericTypeConstraint();
3747 break;
3748 }
3749 nextToken();
3750 } while (!eof());
3751 }
3752
3753 auto GetBraceType = [](const FormatToken &RecordTok) {
3754 switch (RecordTok.Tok.getKind()) {
3755 case tok::kw_class:
3756 return TT_ClassLBrace;
3757 case tok::kw_struct:
3758 return TT_StructLBrace;
3759 case tok::kw_union:
3760 return TT_UnionLBrace;
3761 default:
3762 // Useful for e.g. interface.
3763 return TT_RecordLBrace;
3764 }
3765 };
3766 if (FormatTok->is(tok::l_brace)) {
3767 FormatTok->setFinalizedType(GetBraceType(InitialToken));
3768 if (ParseAsExpr) {
3769 parseChildBlock();
3770 } else {
3771 if (ShouldBreakBeforeBrace(Style, InitialToken))
3772 addUnwrappedLine();
3773
3774 unsigned AddLevels = Style.IndentAccessModifiers ? 2u : 1u;
3775 parseBlock(/*MustBeDeclaration=*/true, AddLevels, /*MunchSemi=*/false);
3776 }
3777 }
3778 // There is no addUnwrappedLine() here so that we fall through to parsing a
3779 // structural element afterwards. Thus, in "class A {} n, m;",
3780 // "} n, m;" will end up in one unwrapped line.
3781 }
3782
parseObjCMethod()3783 void UnwrappedLineParser::parseObjCMethod() {
3784 assert(FormatTok->isOneOf(tok::l_paren, tok::identifier) &&
3785 "'(' or identifier expected.");
3786 do {
3787 if (FormatTok->is(tok::semi)) {
3788 nextToken();
3789 addUnwrappedLine();
3790 return;
3791 } else if (FormatTok->is(tok::l_brace)) {
3792 if (Style.BraceWrapping.AfterFunction)
3793 addUnwrappedLine();
3794 parseBlock();
3795 addUnwrappedLine();
3796 return;
3797 } else {
3798 nextToken();
3799 }
3800 } while (!eof());
3801 }
3802
parseObjCProtocolList()3803 void UnwrappedLineParser::parseObjCProtocolList() {
3804 assert(FormatTok->is(tok::less) && "'<' expected.");
3805 do {
3806 nextToken();
3807 // Early exit in case someone forgot a close angle.
3808 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3809 FormatTok->isObjCAtKeyword(tok::objc_end)) {
3810 return;
3811 }
3812 } while (!eof() && FormatTok->isNot(tok::greater));
3813 nextToken(); // Skip '>'.
3814 }
3815
parseObjCUntilAtEnd()3816 void UnwrappedLineParser::parseObjCUntilAtEnd() {
3817 do {
3818 if (FormatTok->isObjCAtKeyword(tok::objc_end)) {
3819 nextToken();
3820 addUnwrappedLine();
3821 break;
3822 }
3823 if (FormatTok->is(tok::l_brace)) {
3824 parseBlock();
3825 // In ObjC interfaces, nothing should be following the "}".
3826 addUnwrappedLine();
3827 } else if (FormatTok->is(tok::r_brace)) {
3828 // Ignore stray "}". parseStructuralElement doesn't consume them.
3829 nextToken();
3830 addUnwrappedLine();
3831 } else if (FormatTok->isOneOf(tok::minus, tok::plus)) {
3832 nextToken();
3833 parseObjCMethod();
3834 } else {
3835 parseStructuralElement();
3836 }
3837 } while (!eof());
3838 }
3839
parseObjCInterfaceOrImplementation()3840 void UnwrappedLineParser::parseObjCInterfaceOrImplementation() {
3841 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_interface ||
3842 FormatTok->Tok.getObjCKeywordID() == tok::objc_implementation);
3843 nextToken();
3844 nextToken(); // interface name
3845
3846 // @interface can be followed by a lightweight generic
3847 // specialization list, then either a base class or a category.
3848 if (FormatTok->is(tok::less))
3849 parseObjCLightweightGenerics();
3850 if (FormatTok->is(tok::colon)) {
3851 nextToken();
3852 nextToken(); // base class name
3853 // The base class can also have lightweight generics applied to it.
3854 if (FormatTok->is(tok::less))
3855 parseObjCLightweightGenerics();
3856 } else if (FormatTok->is(tok::l_paren)) {
3857 // Skip category, if present.
3858 parseParens();
3859 }
3860
3861 if (FormatTok->is(tok::less))
3862 parseObjCProtocolList();
3863
3864 if (FormatTok->is(tok::l_brace)) {
3865 if (Style.BraceWrapping.AfterObjCDeclaration)
3866 addUnwrappedLine();
3867 parseBlock(/*MustBeDeclaration=*/true);
3868 }
3869
3870 // With instance variables, this puts '}' on its own line. Without instance
3871 // variables, this ends the @interface line.
3872 addUnwrappedLine();
3873
3874 parseObjCUntilAtEnd();
3875 }
3876
parseObjCLightweightGenerics()3877 void UnwrappedLineParser::parseObjCLightweightGenerics() {
3878 assert(FormatTok->is(tok::less));
3879 // Unlike protocol lists, generic parameterizations support
3880 // nested angles:
3881 //
3882 // @interface Foo<ValueType : id <NSCopying, NSSecureCoding>> :
3883 // NSObject <NSCopying, NSSecureCoding>
3884 //
3885 // so we need to count how many open angles we have left.
3886 unsigned NumOpenAngles = 1;
3887 do {
3888 nextToken();
3889 // Early exit in case someone forgot a close angle.
3890 if (FormatTok->isOneOf(tok::semi, tok::l_brace) ||
3891 FormatTok->isObjCAtKeyword(tok::objc_end)) {
3892 break;
3893 }
3894 if (FormatTok->is(tok::less)) {
3895 ++NumOpenAngles;
3896 } else if (FormatTok->is(tok::greater)) {
3897 assert(NumOpenAngles > 0 && "'>' makes NumOpenAngles negative");
3898 --NumOpenAngles;
3899 }
3900 } while (!eof() && NumOpenAngles != 0);
3901 nextToken(); // Skip '>'.
3902 }
3903
3904 // Returns true for the declaration/definition form of @protocol,
3905 // false for the expression form.
parseObjCProtocol()3906 bool UnwrappedLineParser::parseObjCProtocol() {
3907 assert(FormatTok->Tok.getObjCKeywordID() == tok::objc_protocol);
3908 nextToken();
3909
3910 if (FormatTok->is(tok::l_paren)) {
3911 // The expression form of @protocol, e.g. "Protocol* p = @protocol(foo);".
3912 return false;
3913 }
3914
3915 // The definition/declaration form,
3916 // @protocol Foo
3917 // - (int)someMethod;
3918 // @end
3919
3920 nextToken(); // protocol name
3921
3922 if (FormatTok->is(tok::less))
3923 parseObjCProtocolList();
3924
3925 // Check for protocol declaration.
3926 if (FormatTok->is(tok::semi)) {
3927 nextToken();
3928 addUnwrappedLine();
3929 return true;
3930 }
3931
3932 addUnwrappedLine();
3933 parseObjCUntilAtEnd();
3934 return true;
3935 }
3936
parseJavaScriptEs6ImportExport()3937 void UnwrappedLineParser::parseJavaScriptEs6ImportExport() {
3938 bool IsImport = FormatTok->is(Keywords.kw_import);
3939 assert(IsImport || FormatTok->is(tok::kw_export));
3940 nextToken();
3941
3942 // Consume the "default" in "export default class/function".
3943 if (FormatTok->is(tok::kw_default))
3944 nextToken();
3945
3946 // Consume "async function", "function" and "default function", so that these
3947 // get parsed as free-standing JS functions, i.e. do not require a trailing
3948 // semicolon.
3949 if (FormatTok->is(Keywords.kw_async))
3950 nextToken();
3951 if (FormatTok->is(Keywords.kw_function)) {
3952 nextToken();
3953 return;
3954 }
3955
3956 // For imports, `export *`, `export {...}`, consume the rest of the line up
3957 // to the terminating `;`. For everything else, just return and continue
3958 // parsing the structural element, i.e. the declaration or expression for
3959 // `export default`.
3960 if (!IsImport && !FormatTok->isOneOf(tok::l_brace, tok::star) &&
3961 !FormatTok->isStringLiteral()) {
3962 return;
3963 }
3964
3965 while (!eof()) {
3966 if (FormatTok->is(tok::semi))
3967 return;
3968 if (Line->Tokens.empty()) {
3969 // Common issue: Automatic Semicolon Insertion wrapped the line, so the
3970 // import statement should terminate.
3971 return;
3972 }
3973 if (FormatTok->is(tok::l_brace)) {
3974 FormatTok->setBlockKind(BK_Block);
3975 nextToken();
3976 parseBracedList();
3977 } else {
3978 nextToken();
3979 }
3980 }
3981 }
3982
parseStatementMacro()3983 void UnwrappedLineParser::parseStatementMacro() {
3984 nextToken();
3985 if (FormatTok->is(tok::l_paren))
3986 parseParens();
3987 if (FormatTok->is(tok::semi))
3988 nextToken();
3989 addUnwrappedLine();
3990 }
3991
printDebugInfo(const UnwrappedLine & Line,StringRef Prefix="")3992 LLVM_ATTRIBUTE_UNUSED static void printDebugInfo(const UnwrappedLine &Line,
3993 StringRef Prefix = "") {
3994 llvm::dbgs() << Prefix << "Line(" << Line.Level
3995 << ", FSC=" << Line.FirstStartColumn << ")"
3996 << (Line.InPPDirective ? " MACRO" : "") << ": ";
3997 for (const auto &Node : Line.Tokens) {
3998 llvm::dbgs() << Node.Tok->Tok.getName() << "["
3999 << "T=" << static_cast<unsigned>(Node.Tok->getType())
4000 << ", OC=" << Node.Tok->OriginalColumn << "] ";
4001 }
4002 for (const auto &Node : Line.Tokens)
4003 for (const auto &ChildNode : Node.Children)
4004 printDebugInfo(ChildNode, "\nChild: ");
4005
4006 llvm::dbgs() << "\n";
4007 }
4008
addUnwrappedLine(LineLevel AdjustLevel)4009 void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) {
4010 if (Line->Tokens.empty())
4011 return;
4012 LLVM_DEBUG({
4013 if (CurrentLines == &Lines)
4014 printDebugInfo(*Line);
4015 });
4016
4017 // If this line closes a block when in Whitesmiths mode, remember that
4018 // information so that the level can be decreased after the line is added.
4019 // This has to happen after the addition of the line since the line itself
4020 // needs to be indented.
4021 bool ClosesWhitesmithsBlock =
4022 Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex &&
4023 Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths;
4024
4025 CurrentLines->push_back(std::move(*Line));
4026 Line->Tokens.clear();
4027 Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex;
4028 Line->FirstStartColumn = 0;
4029
4030 if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove)
4031 --Line->Level;
4032 if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) {
4033 CurrentLines->append(
4034 std::make_move_iterator(PreprocessorDirectives.begin()),
4035 std::make_move_iterator(PreprocessorDirectives.end()));
4036 PreprocessorDirectives.clear();
4037 }
4038 // Disconnect the current token from the last token on the previous line.
4039 FormatTok->Previous = nullptr;
4040 }
4041
eof() const4042 bool UnwrappedLineParser::eof() const { return FormatTok->is(tok::eof); }
4043
isOnNewLine(const FormatToken & FormatTok)4044 bool UnwrappedLineParser::isOnNewLine(const FormatToken &FormatTok) {
4045 return (Line->InPPDirective || FormatTok.HasUnescapedNewline) &&
4046 FormatTok.NewlinesBefore > 0;
4047 }
4048
4049 // Checks if \p FormatTok is a line comment that continues the line comment
4050 // section on \p Line.
4051 static bool
continuesLineCommentSection(const FormatToken & FormatTok,const UnwrappedLine & Line,const llvm::Regex & CommentPragmasRegex)4052 continuesLineCommentSection(const FormatToken &FormatTok,
4053 const UnwrappedLine &Line,
4054 const llvm::Regex &CommentPragmasRegex) {
4055 if (Line.Tokens.empty())
4056 return false;
4057
4058 StringRef IndentContent = FormatTok.TokenText;
4059 if (FormatTok.TokenText.startswith("//") ||
4060 FormatTok.TokenText.startswith("/*")) {
4061 IndentContent = FormatTok.TokenText.substr(2);
4062 }
4063 if (CommentPragmasRegex.match(IndentContent))
4064 return false;
4065
4066 // If Line starts with a line comment, then FormatTok continues the comment
4067 // section if its original column is greater or equal to the original start
4068 // column of the line.
4069 //
4070 // Define the min column token of a line as follows: if a line ends in '{' or
4071 // contains a '{' followed by a line comment, then the min column token is
4072 // that '{'. Otherwise, the min column token of the line is the first token of
4073 // the line.
4074 //
4075 // If Line starts with a token other than a line comment, then FormatTok
4076 // continues the comment section if its original column is greater than the
4077 // original start column of the min column token of the line.
4078 //
4079 // For example, the second line comment continues the first in these cases:
4080 //
4081 // // first line
4082 // // second line
4083 //
4084 // and:
4085 //
4086 // // first line
4087 // // second line
4088 //
4089 // and:
4090 //
4091 // int i; // first line
4092 // // second line
4093 //
4094 // and:
4095 //
4096 // do { // first line
4097 // // second line
4098 // int i;
4099 // } while (true);
4100 //
4101 // and:
4102 //
4103 // enum {
4104 // a, // first line
4105 // // second line
4106 // b
4107 // };
4108 //
4109 // The second line comment doesn't continue the first in these cases:
4110 //
4111 // // first line
4112 // // second line
4113 //
4114 // and:
4115 //
4116 // int i; // first line
4117 // // second line
4118 //
4119 // and:
4120 //
4121 // do { // first line
4122 // // second line
4123 // int i;
4124 // } while (true);
4125 //
4126 // and:
4127 //
4128 // enum {
4129 // a, // first line
4130 // // second line
4131 // };
4132 const FormatToken *MinColumnToken = Line.Tokens.front().Tok;
4133
4134 // Scan for '{//'. If found, use the column of '{' as a min column for line
4135 // comment section continuation.
4136 const FormatToken *PreviousToken = nullptr;
4137 for (const UnwrappedLineNode &Node : Line.Tokens) {
4138 if (PreviousToken && PreviousToken->is(tok::l_brace) &&
4139 isLineComment(*Node.Tok)) {
4140 MinColumnToken = PreviousToken;
4141 break;
4142 }
4143 PreviousToken = Node.Tok;
4144
4145 // Grab the last newline preceding a token in this unwrapped line.
4146 if (Node.Tok->NewlinesBefore > 0)
4147 MinColumnToken = Node.Tok;
4148 }
4149 if (PreviousToken && PreviousToken->is(tok::l_brace))
4150 MinColumnToken = PreviousToken;
4151
4152 return continuesLineComment(FormatTok, /*Previous=*/Line.Tokens.back().Tok,
4153 MinColumnToken);
4154 }
4155
flushComments(bool NewlineBeforeNext)4156 void UnwrappedLineParser::flushComments(bool NewlineBeforeNext) {
4157 bool JustComments = Line->Tokens.empty();
4158 for (FormatToken *Tok : CommentsBeforeNextToken) {
4159 // Line comments that belong to the same line comment section are put on the
4160 // same line since later we might want to reflow content between them.
4161 // Additional fine-grained breaking of line comment sections is controlled
4162 // by the class BreakableLineCommentSection in case it is desirable to keep
4163 // several line comment sections in the same unwrapped line.
4164 //
4165 // FIXME: Consider putting separate line comment sections as children to the
4166 // unwrapped line instead.
4167 Tok->ContinuesLineCommentSection =
4168 continuesLineCommentSection(*Tok, *Line, CommentPragmasRegex);
4169 if (isOnNewLine(*Tok) && JustComments && !Tok->ContinuesLineCommentSection)
4170 addUnwrappedLine();
4171 pushToken(Tok);
4172 }
4173 if (NewlineBeforeNext && JustComments)
4174 addUnwrappedLine();
4175 CommentsBeforeNextToken.clear();
4176 }
4177
nextToken(int LevelDifference)4178 void UnwrappedLineParser::nextToken(int LevelDifference) {
4179 if (eof())
4180 return;
4181 flushComments(isOnNewLine(*FormatTok));
4182 pushToken(FormatTok);
4183 FormatToken *Previous = FormatTok;
4184 if (!Style.isJavaScript())
4185 readToken(LevelDifference);
4186 else
4187 readTokenWithJavaScriptASI();
4188 FormatTok->Previous = Previous;
4189 if (Style.isVerilog()) {
4190 // Blocks in Verilog can have `begin` and `end` instead of braces. For
4191 // keywords like `begin`, we can't treat them the same as left braces
4192 // because some contexts require one of them. For example structs use
4193 // braces and if blocks use keywords, and a left brace can occur in an if
4194 // statement, but it is not a block. For keywords like `end`, we simply
4195 // treat them the same as right braces.
4196 if (Keywords.isVerilogEnd(*FormatTok))
4197 FormatTok->Tok.setKind(tok::r_brace);
4198 }
4199 }
4200
distributeComments(const SmallVectorImpl<FormatToken * > & Comments,const FormatToken * NextTok)4201 void UnwrappedLineParser::distributeComments(
4202 const SmallVectorImpl<FormatToken *> &Comments,
4203 const FormatToken *NextTok) {
4204 // Whether or not a line comment token continues a line is controlled by
4205 // the method continuesLineCommentSection, with the following caveat:
4206 //
4207 // Define a trail of Comments to be a nonempty proper postfix of Comments such
4208 // that each comment line from the trail is aligned with the next token, if
4209 // the next token exists. If a trail exists, the beginning of the maximal
4210 // trail is marked as a start of a new comment section.
4211 //
4212 // For example in this code:
4213 //
4214 // int a; // line about a
4215 // // line 1 about b
4216 // // line 2 about b
4217 // int b;
4218 //
4219 // the two lines about b form a maximal trail, so there are two sections, the
4220 // first one consisting of the single comment "// line about a" and the
4221 // second one consisting of the next two comments.
4222 if (Comments.empty())
4223 return;
4224 bool ShouldPushCommentsInCurrentLine = true;
4225 bool HasTrailAlignedWithNextToken = false;
4226 unsigned StartOfTrailAlignedWithNextToken = 0;
4227 if (NextTok) {
4228 // We are skipping the first element intentionally.
4229 for (unsigned i = Comments.size() - 1; i > 0; --i) {
4230 if (Comments[i]->OriginalColumn == NextTok->OriginalColumn) {
4231 HasTrailAlignedWithNextToken = true;
4232 StartOfTrailAlignedWithNextToken = i;
4233 }
4234 }
4235 }
4236 for (unsigned i = 0, e = Comments.size(); i < e; ++i) {
4237 FormatToken *FormatTok = Comments[i];
4238 if (HasTrailAlignedWithNextToken && i == StartOfTrailAlignedWithNextToken) {
4239 FormatTok->ContinuesLineCommentSection = false;
4240 } else {
4241 FormatTok->ContinuesLineCommentSection =
4242 continuesLineCommentSection(*FormatTok, *Line, CommentPragmasRegex);
4243 }
4244 if (!FormatTok->ContinuesLineCommentSection &&
4245 (isOnNewLine(*FormatTok) || FormatTok->IsFirst)) {
4246 ShouldPushCommentsInCurrentLine = false;
4247 }
4248 if (ShouldPushCommentsInCurrentLine)
4249 pushToken(FormatTok);
4250 else
4251 CommentsBeforeNextToken.push_back(FormatTok);
4252 }
4253 }
4254
readToken(int LevelDifference)4255 void UnwrappedLineParser::readToken(int LevelDifference) {
4256 SmallVector<FormatToken *, 1> Comments;
4257 bool PreviousWasComment = false;
4258 bool FirstNonCommentOnLine = false;
4259 do {
4260 FormatTok = Tokens->getNextToken();
4261 assert(FormatTok);
4262 while (FormatTok->getType() == TT_ConflictStart ||
4263 FormatTok->getType() == TT_ConflictEnd ||
4264 FormatTok->getType() == TT_ConflictAlternative) {
4265 if (FormatTok->getType() == TT_ConflictStart)
4266 conditionalCompilationStart(/*Unreachable=*/false);
4267 else if (FormatTok->getType() == TT_ConflictAlternative)
4268 conditionalCompilationAlternative();
4269 else if (FormatTok->getType() == TT_ConflictEnd)
4270 conditionalCompilationEnd();
4271 FormatTok = Tokens->getNextToken();
4272 FormatTok->MustBreakBefore = true;
4273 }
4274
4275 auto IsFirstNonCommentOnLine = [](bool FirstNonCommentOnLine,
4276 const FormatToken &Tok,
4277 bool PreviousWasComment) {
4278 auto IsFirstOnLine = [](const FormatToken &Tok) {
4279 return Tok.HasUnescapedNewline || Tok.IsFirst;
4280 };
4281
4282 // Consider preprocessor directives preceded by block comments as first
4283 // on line.
4284 if (PreviousWasComment)
4285 return FirstNonCommentOnLine || IsFirstOnLine(Tok);
4286 return IsFirstOnLine(Tok);
4287 };
4288
4289 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4290 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4291 PreviousWasComment = FormatTok->is(tok::comment);
4292
4293 while (!Line->InPPDirective && FormatTok->is(tok::hash) &&
4294 (!Style.isVerilog() ||
4295 Keywords.isVerilogPPDirective(*Tokens->peekNextToken())) &&
4296 FirstNonCommentOnLine) {
4297 distributeComments(Comments, FormatTok);
4298 Comments.clear();
4299 // If there is an unfinished unwrapped line, we flush the preprocessor
4300 // directives only after that unwrapped line was finished later.
4301 bool SwitchToPreprocessorLines = !Line->Tokens.empty();
4302 ScopedLineState BlockState(*this, SwitchToPreprocessorLines);
4303 assert((LevelDifference >= 0 ||
4304 static_cast<unsigned>(-LevelDifference) <= Line->Level) &&
4305 "LevelDifference makes Line->Level negative");
4306 Line->Level += LevelDifference;
4307 // Comments stored before the preprocessor directive need to be output
4308 // before the preprocessor directive, at the same level as the
4309 // preprocessor directive, as we consider them to apply to the directive.
4310 if (Style.IndentPPDirectives == FormatStyle::PPDIS_BeforeHash &&
4311 PPBranchLevel > 0) {
4312 Line->Level += PPBranchLevel;
4313 }
4314 flushComments(isOnNewLine(*FormatTok));
4315 parsePPDirective();
4316 PreviousWasComment = FormatTok->is(tok::comment);
4317 FirstNonCommentOnLine = IsFirstNonCommentOnLine(
4318 FirstNonCommentOnLine, *FormatTok, PreviousWasComment);
4319 }
4320
4321 if (!PPStack.empty() && (PPStack.back().Kind == PP_Unreachable) &&
4322 !Line->InPPDirective) {
4323 continue;
4324 }
4325
4326 if (!FormatTok->is(tok::comment)) {
4327 distributeComments(Comments, FormatTok);
4328 Comments.clear();
4329 return;
4330 }
4331
4332 Comments.push_back(FormatTok);
4333 } while (!eof());
4334
4335 distributeComments(Comments, nullptr);
4336 Comments.clear();
4337 }
4338
pushToken(FormatToken * Tok)4339 void UnwrappedLineParser::pushToken(FormatToken *Tok) {
4340 Line->Tokens.push_back(UnwrappedLineNode(Tok));
4341 if (MustBreakBeforeNextToken) {
4342 Line->Tokens.back().Tok->MustBreakBefore = true;
4343 MustBreakBeforeNextToken = false;
4344 }
4345 }
4346
4347 } // end namespace format
4348 } // end namespace clang
4349