1 //===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// This file contains the declaration of the FormatToken, a wrapper 12 /// around Token with additional information related to formatting. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H 17 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H 18 19 #include "clang/Basic/IdentifierTable.h" 20 #include "clang/Basic/OperatorPrecedence.h" 21 #include "clang/Format/Format.h" 22 #include "clang/Lex/Lexer.h" 23 #include <memory> 24 #include <unordered_set> 25 26 namespace clang { 27 namespace format { 28 29 #define LIST_TOKEN_TYPES \ 30 TYPE(ArrayInitializerLSquare) \ 31 TYPE(ArraySubscriptLSquare) \ 32 TYPE(AttributeColon) \ 33 TYPE(AttributeParen) \ 34 TYPE(AttributeSquare) \ 35 TYPE(BinaryOperator) \ 36 TYPE(BitFieldColon) \ 37 TYPE(BlockComment) \ 38 TYPE(CastRParen) \ 39 TYPE(ConditionalExpr) \ 40 TYPE(ConflictAlternative) \ 41 TYPE(ConflictEnd) \ 42 TYPE(ConflictStart) \ 43 TYPE(CtorInitializerColon) \ 44 TYPE(CtorInitializerComma) \ 45 TYPE(DesignatedInitializerLSquare) \ 46 TYPE(DesignatedInitializerPeriod) \ 47 TYPE(DictLiteral) \ 48 TYPE(ForEachMacro) \ 49 TYPE(FunctionAnnotationRParen) \ 50 TYPE(FunctionDeclarationName) \ 51 TYPE(FunctionLBrace) \ 52 TYPE(FunctionTypeLParen) \ 53 TYPE(ImplicitStringLiteral) \ 54 TYPE(InheritanceColon) \ 55 TYPE(InheritanceComma) \ 56 TYPE(InlineASMBrace) \ 57 TYPE(InlineASMColon) \ 58 TYPE(JavaAnnotation) \ 59 TYPE(JsComputedPropertyName) \ 60 TYPE(JsExponentiation) \ 61 TYPE(JsExponentiationEqual) \ 62 TYPE(JsFatArrow) \ 63 TYPE(JsNonNullAssertion) \ 64 TYPE(JsTypeColon) \ 65 TYPE(JsTypeOperator) \ 66 TYPE(JsTypeOptionalQuestion) \ 67 TYPE(LambdaArrow) \ 68 TYPE(LambdaLSquare) \ 69 TYPE(LeadingJavaAnnotation) \ 70 TYPE(LineComment) \ 71 TYPE(MacroBlockBegin) \ 72 TYPE(MacroBlockEnd) \ 73 TYPE(ObjCBlockLBrace) \ 74 TYPE(ObjCBlockLParen) \ 75 TYPE(ObjCDecl) \ 76 TYPE(ObjCForIn) \ 77 TYPE(ObjCMethodExpr) \ 78 TYPE(ObjCMethodSpecifier) \ 79 TYPE(ObjCProperty) \ 80 TYPE(ObjCStringLiteral) \ 81 TYPE(OverloadedOperator) \ 82 TYPE(OverloadedOperatorLParen) \ 83 TYPE(PointerOrReference) \ 84 TYPE(PureVirtualSpecifier) \ 85 TYPE(RangeBasedForLoopColon) \ 86 TYPE(RegexLiteral) \ 87 TYPE(SelectorName) \ 88 TYPE(StartOfName) \ 89 TYPE(StatementMacro) \ 90 TYPE(StructuredBindingLSquare) \ 91 TYPE(TemplateCloser) \ 92 TYPE(TemplateOpener) \ 93 TYPE(TemplateString) \ 94 TYPE(ProtoExtensionLSquare) \ 95 TYPE(TrailingAnnotation) \ 96 TYPE(TrailingReturnArrow) \ 97 TYPE(TrailingUnaryOperator) \ 98 TYPE(UnaryOperator) \ 99 TYPE(Unknown) 100 101 enum TokenType { 102 #define TYPE(X) TT_##X, 103 LIST_TOKEN_TYPES 104 #undef TYPE 105 NUM_TOKEN_TYPES 106 }; 107 108 /// Determines the name of a token type. 109 const char *getTokenTypeName(TokenType Type); 110 111 // Represents what type of block a set of braces open. 112 enum BraceBlockKind { BK_Unknown, BK_Block, BK_BracedInit }; 113 114 // The packing kind of a function's parameters. 115 enum ParameterPackingKind { PPK_BinPacked, PPK_OnePerLine, PPK_Inconclusive }; 116 117 enum FormatDecision { FD_Unformatted, FD_Continue, FD_Break }; 118 119 class TokenRole; 120 class AnnotatedLine; 121 122 /// A wrapper around a \c Token storing information about the 123 /// whitespace characters preceding it. 124 struct FormatToken { FormatTokenFormatToken125 FormatToken() {} 126 127 /// The \c Token. 128 Token Tok; 129 130 /// The number of newlines immediately before the \c Token. 131 /// 132 /// This can be used to determine what the user wrote in the original code 133 /// and thereby e.g. leave an empty line between two function definitions. 134 unsigned NewlinesBefore = 0; 135 136 /// Whether there is at least one unescaped newline before the \c 137 /// Token. 138 bool HasUnescapedNewline = false; 139 140 /// The range of the whitespace immediately preceding the \c Token. 141 SourceRange WhitespaceRange; 142 143 /// The offset just past the last '\n' in this token's leading 144 /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'. 145 unsigned LastNewlineOffset = 0; 146 147 /// The width of the non-whitespace parts of the token (or its first 148 /// line for multi-line tokens) in columns. 149 /// We need this to correctly measure number of columns a token spans. 150 unsigned ColumnWidth = 0; 151 152 /// Contains the width in columns of the last line of a multi-line 153 /// token. 154 unsigned LastLineColumnWidth = 0; 155 156 /// Whether the token text contains newlines (escaped or not). 157 bool IsMultiline = false; 158 159 /// Indicates that this is the first token of the file. 160 bool IsFirst = false; 161 162 /// Whether there must be a line break before this token. 163 /// 164 /// This happens for example when a preprocessor directive ended directly 165 /// before the token. 166 bool MustBreakBefore = false; 167 168 /// The raw text of the token. 169 /// 170 /// Contains the raw token text without leading whitespace and without leading 171 /// escaped newlines. 172 StringRef TokenText; 173 174 /// Set to \c true if this token is an unterminated literal. 175 bool IsUnterminatedLiteral = 0; 176 177 /// Contains the kind of block if this token is a brace. 178 BraceBlockKind BlockKind = BK_Unknown; 179 180 TokenType Type = TT_Unknown; 181 182 /// The number of spaces that should be inserted before this token. 183 unsigned SpacesRequiredBefore = 0; 184 185 /// \c true if it is allowed to break before this token. 186 bool CanBreakBefore = false; 187 188 /// \c true if this is the ">" of "template<..>". 189 bool ClosesTemplateDeclaration = false; 190 191 /// Number of parameters, if this is "(", "[" or "<". 192 unsigned ParameterCount = 0; 193 194 /// Number of parameters that are nested blocks, 195 /// if this is "(", "[" or "<". 196 unsigned BlockParameterCount = 0; 197 198 /// If this is a bracket ("<", "(", "[" or "{"), contains the kind of 199 /// the surrounding bracket. 200 tok::TokenKind ParentBracket = tok::unknown; 201 202 /// A token can have a special role that can carry extra information 203 /// about the token's formatting. 204 std::unique_ptr<TokenRole> Role; 205 206 /// If this is an opening parenthesis, how are the parameters packed? 207 ParameterPackingKind PackingKind = PPK_Inconclusive; 208 209 /// The total length of the unwrapped line up to and including this 210 /// token. 211 unsigned TotalLength = 0; 212 213 /// The original 0-based column of this token, including expanded tabs. 214 /// The configured TabWidth is used as tab width. 215 unsigned OriginalColumn = 0; 216 217 /// The length of following tokens until the next natural split point, 218 /// or the next token that can be broken. 219 unsigned UnbreakableTailLength = 0; 220 221 // FIXME: Come up with a 'cleaner' concept. 222 /// The binding strength of a token. This is a combined value of 223 /// operator precedence, parenthesis nesting, etc. 224 unsigned BindingStrength = 0; 225 226 /// The nesting level of this token, i.e. the number of surrounding (), 227 /// [], {} or <>. 228 unsigned NestingLevel = 0; 229 230 /// The indent level of this token. Copied from the surrounding line. 231 unsigned IndentLevel = 0; 232 233 /// Penalty for inserting a line break before this token. 234 unsigned SplitPenalty = 0; 235 236 /// If this is the first ObjC selector name in an ObjC method 237 /// definition or call, this contains the length of the longest name. 238 /// 239 /// This being set to 0 means that the selectors should not be colon-aligned, 240 /// e.g. because several of them are block-type. 241 unsigned LongestObjCSelectorName = 0; 242 243 /// If this is the first ObjC selector name in an ObjC method 244 /// definition or call, this contains the number of parts that the whole 245 /// selector consist of. 246 unsigned ObjCSelectorNameParts = 0; 247 248 /// The 0-based index of the parameter/argument. For ObjC it is set 249 /// for the selector name token. 250 /// For now calculated only for ObjC. 251 unsigned ParameterIndex = 0; 252 253 /// Stores the number of required fake parentheses and the 254 /// corresponding operator precedence. 255 /// 256 /// If multiple fake parentheses start at a token, this vector stores them in 257 /// reverse order, i.e. inner fake parenthesis first. 258 SmallVector<prec::Level, 4> FakeLParens; 259 /// Insert this many fake ) after this token for correct indentation. 260 unsigned FakeRParens = 0; 261 262 /// \c true if this token starts a binary expression, i.e. has at least 263 /// one fake l_paren with a precedence greater than prec::Unknown. 264 bool StartsBinaryExpression = false; 265 /// \c true if this token ends a binary expression. 266 bool EndsBinaryExpression = false; 267 268 /// If this is an operator (or "."/"->") in a sequence of operators 269 /// with the same precedence, contains the 0-based operator index. 270 unsigned OperatorIndex = 0; 271 272 /// If this is an operator (or "."/"->") in a sequence of operators 273 /// with the same precedence, points to the next operator. 274 FormatToken *NextOperator = nullptr; 275 276 /// Is this token part of a \c DeclStmt defining multiple variables? 277 /// 278 /// Only set if \c Type == \c TT_StartOfName. 279 bool PartOfMultiVariableDeclStmt = false; 280 281 /// Does this line comment continue a line comment section? 282 /// 283 /// Only set to true if \c Type == \c TT_LineComment. 284 bool ContinuesLineCommentSection = false; 285 286 /// If this is a bracket, this points to the matching one. 287 FormatToken *MatchingParen = nullptr; 288 289 /// The previous token in the unwrapped line. 290 FormatToken *Previous = nullptr; 291 292 /// The next token in the unwrapped line. 293 FormatToken *Next = nullptr; 294 295 /// If this token starts a block, this contains all the unwrapped lines 296 /// in it. 297 SmallVector<AnnotatedLine *, 1> Children; 298 299 /// Stores the formatting decision for the token once it was made. 300 FormatDecision Decision = FD_Unformatted; 301 302 /// If \c true, this token has been fully formatted (indented and 303 /// potentially re-formatted inside), and we do not allow further formatting 304 /// changes. 305 bool Finalized = false; 306 isFormatToken307 bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } isFormatToken308 bool is(TokenType TT) const { return Type == TT; } isFormatToken309 bool is(const IdentifierInfo *II) const { 310 return II && II == Tok.getIdentifierInfo(); 311 } isFormatToken312 bool is(tok::PPKeywordKind Kind) const { 313 return Tok.getIdentifierInfo() && 314 Tok.getIdentifierInfo()->getPPKeywordID() == Kind; 315 } isOneOfFormatToken316 template <typename A, typename B> bool isOneOf(A K1, B K2) const { 317 return is(K1) || is(K2); 318 } 319 template <typename A, typename B, typename... Ts> isOneOfFormatToken320 bool isOneOf(A K1, B K2, Ts... Ks) const { 321 return is(K1) || isOneOf(K2, Ks...); 322 } isNotFormatToken323 template <typename T> bool isNot(T Kind) const { return !is(Kind); } 324 closesScopeAfterBlockFormatToken325 bool closesScopeAfterBlock() const { 326 if (BlockKind == BK_Block) 327 return true; 328 if (closesScope()) 329 return Previous->closesScopeAfterBlock(); 330 return false; 331 } 332 333 /// \c true if this token starts a sequence with the given tokens in order, 334 /// following the ``Next`` pointers, ignoring comments. 335 template <typename A, typename... Ts> startsSequenceFormatToken336 bool startsSequence(A K1, Ts... Tokens) const { 337 return startsSequenceInternal(K1, Tokens...); 338 } 339 340 /// \c true if this token ends a sequence with the given tokens in order, 341 /// following the ``Previous`` pointers, ignoring comments. 342 template <typename A, typename... Ts> endsSequenceFormatToken343 bool endsSequence(A K1, Ts... Tokens) const { 344 return endsSequenceInternal(K1, Tokens...); 345 } 346 isStringLiteralFormatToken347 bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); } 348 isObjCAtKeywordFormatToken349 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { 350 return Tok.isObjCAtKeyword(Kind); 351 } 352 353 bool isAccessSpecifier(bool ColonRequired = true) const { 354 return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) && 355 (!ColonRequired || (Next && Next->is(tok::colon))); 356 } 357 358 /// Determine whether the token is a simple-type-specifier. 359 bool isSimpleTypeSpecifier() const; 360 isObjCAccessSpecifierFormatToken361 bool isObjCAccessSpecifier() const { 362 return is(tok::at) && Next && 363 (Next->isObjCAtKeyword(tok::objc_public) || 364 Next->isObjCAtKeyword(tok::objc_protected) || 365 Next->isObjCAtKeyword(tok::objc_package) || 366 Next->isObjCAtKeyword(tok::objc_private)); 367 } 368 369 /// Returns whether \p Tok is ([{ or an opening < of a template or in 370 /// protos. opensScopeFormatToken371 bool opensScope() const { 372 if (is(TT_TemplateString) && TokenText.endswith("${")) 373 return true; 374 if (is(TT_DictLiteral) && is(tok::less)) 375 return true; 376 return isOneOf(tok::l_paren, tok::l_brace, tok::l_square, 377 TT_TemplateOpener); 378 } 379 /// Returns whether \p Tok is )]} or a closing > of a template or in 380 /// protos. closesScopeFormatToken381 bool closesScope() const { 382 if (is(TT_TemplateString) && TokenText.startswith("}")) 383 return true; 384 if (is(TT_DictLiteral) && is(tok::greater)) 385 return true; 386 return isOneOf(tok::r_paren, tok::r_brace, tok::r_square, 387 TT_TemplateCloser); 388 } 389 390 /// Returns \c true if this is a "." or "->" accessing a member. isMemberAccessFormatToken391 bool isMemberAccess() const { 392 return isOneOf(tok::arrow, tok::period, tok::arrowstar) && 393 !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow, 394 TT_LambdaArrow); 395 } 396 isUnaryOperatorFormatToken397 bool isUnaryOperator() const { 398 switch (Tok.getKind()) { 399 case tok::plus: 400 case tok::plusplus: 401 case tok::minus: 402 case tok::minusminus: 403 case tok::exclaim: 404 case tok::tilde: 405 case tok::kw_sizeof: 406 case tok::kw_alignof: 407 return true; 408 default: 409 return false; 410 } 411 } 412 isBinaryOperatorFormatToken413 bool isBinaryOperator() const { 414 // Comma is a binary operator, but does not behave as such wrt. formatting. 415 return getPrecedence() > prec::Comma; 416 } 417 isTrailingCommentFormatToken418 bool isTrailingComment() const { 419 return is(tok::comment) && 420 (is(TT_LineComment) || !Next || Next->NewlinesBefore > 0); 421 } 422 423 /// Returns \c true if this is a keyword that can be used 424 /// like a function call (e.g. sizeof, typeid, ...). isFunctionLikeKeywordFormatToken425 bool isFunctionLikeKeyword() const { 426 switch (Tok.getKind()) { 427 case tok::kw_throw: 428 case tok::kw_typeid: 429 case tok::kw_return: 430 case tok::kw_sizeof: 431 case tok::kw_alignof: 432 case tok::kw_alignas: 433 case tok::kw_decltype: 434 case tok::kw_noexcept: 435 case tok::kw_static_assert: 436 case tok::kw___attribute: 437 return true; 438 default: 439 return false; 440 } 441 } 442 443 /// Returns \c true if this is a string literal that's like a label, 444 /// e.g. ends with "=" or ":". isLabelStringFormatToken445 bool isLabelString() const { 446 if (!is(tok::string_literal)) 447 return false; 448 StringRef Content = TokenText; 449 if (Content.startswith("\"") || Content.startswith("'")) 450 Content = Content.drop_front(1); 451 if (Content.endswith("\"") || Content.endswith("'")) 452 Content = Content.drop_back(1); 453 Content = Content.trim(); 454 return Content.size() > 1 && 455 (Content.back() == ':' || Content.back() == '='); 456 } 457 458 /// Returns actual token start location without leading escaped 459 /// newlines and whitespace. 460 /// 461 /// This can be different to Tok.getLocation(), which includes leading escaped 462 /// newlines. getStartOfNonWhitespaceFormatToken463 SourceLocation getStartOfNonWhitespace() const { 464 return WhitespaceRange.getEnd(); 465 } 466 getPrecedenceFormatToken467 prec::Level getPrecedence() const { 468 return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true, 469 /*CPlusPlus11=*/true); 470 } 471 472 /// Returns the previous token ignoring comments. getPreviousNonCommentFormatToken473 FormatToken *getPreviousNonComment() const { 474 FormatToken *Tok = Previous; 475 while (Tok && Tok->is(tok::comment)) 476 Tok = Tok->Previous; 477 return Tok; 478 } 479 480 /// Returns the next token ignoring comments. getNextNonCommentFormatToken481 const FormatToken *getNextNonComment() const { 482 const FormatToken *Tok = Next; 483 while (Tok && Tok->is(tok::comment)) 484 Tok = Tok->Next; 485 return Tok; 486 } 487 488 /// Returns \c true if this tokens starts a block-type list, i.e. a 489 /// list that should be indented with a block indent. opensBlockOrBlockTypeListFormatToken490 bool opensBlockOrBlockTypeList(const FormatStyle &Style) const { 491 if (is(TT_TemplateString) && opensScope()) 492 return true; 493 return is(TT_ArrayInitializerLSquare) || 494 is(TT_ProtoExtensionLSquare) || 495 (is(tok::l_brace) && 496 (BlockKind == BK_Block || is(TT_DictLiteral) || 497 (!Style.Cpp11BracedListStyle && NestingLevel == 0))) || 498 (is(tok::less) && (Style.Language == FormatStyle::LK_Proto || 499 Style.Language == FormatStyle::LK_TextProto)); 500 } 501 502 /// Returns whether the token is the left square bracket of a C++ 503 /// structured binding declaration. isCppStructuredBindingFormatToken504 bool isCppStructuredBinding(const FormatStyle &Style) const { 505 if (!Style.isCpp() || isNot(tok::l_square)) 506 return false; 507 const FormatToken *T = this; 508 do { 509 T = T->getPreviousNonComment(); 510 } while (T && T->isOneOf(tok::kw_const, tok::kw_volatile, tok::amp, 511 tok::ampamp)); 512 return T && T->is(tok::kw_auto); 513 } 514 515 /// Same as opensBlockOrBlockTypeList, but for the closing token. closesBlockOrBlockTypeListFormatToken516 bool closesBlockOrBlockTypeList(const FormatStyle &Style) const { 517 if (is(TT_TemplateString) && closesScope()) 518 return true; 519 return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style); 520 } 521 522 /// Return the actual namespace token, if this token starts a namespace 523 /// block. getNamespaceTokenFormatToken524 const FormatToken *getNamespaceToken() const { 525 const FormatToken *NamespaceTok = this; 526 if (is(tok::comment)) 527 NamespaceTok = NamespaceTok->getNextNonComment(); 528 // Detect "(inline|export)? namespace" in the beginning of a line. 529 if (NamespaceTok && NamespaceTok->isOneOf(tok::kw_inline, tok::kw_export)) 530 NamespaceTok = NamespaceTok->getNextNonComment(); 531 return NamespaceTok && NamespaceTok->is(tok::kw_namespace) ? NamespaceTok 532 : nullptr; 533 } 534 535 private: 536 // Disallow copying. 537 FormatToken(const FormatToken &) = delete; 538 void operator=(const FormatToken &) = delete; 539 540 template <typename A, typename... Ts> startsSequenceInternalFormatToken541 bool startsSequenceInternal(A K1, Ts... Tokens) const { 542 if (is(tok::comment) && Next) 543 return Next->startsSequenceInternal(K1, Tokens...); 544 return is(K1) && Next && Next->startsSequenceInternal(Tokens...); 545 } 546 startsSequenceInternalFormatToken547 template <typename A> bool startsSequenceInternal(A K1) const { 548 if (is(tok::comment) && Next) 549 return Next->startsSequenceInternal(K1); 550 return is(K1); 551 } 552 endsSequenceInternalFormatToken553 template <typename A, typename... Ts> bool endsSequenceInternal(A K1) const { 554 if (is(tok::comment) && Previous) 555 return Previous->endsSequenceInternal(K1); 556 return is(K1); 557 } 558 559 template <typename A, typename... Ts> endsSequenceInternalFormatToken560 bool endsSequenceInternal(A K1, Ts... Tokens) const { 561 if (is(tok::comment) && Previous) 562 return Previous->endsSequenceInternal(K1, Tokens...); 563 return is(K1) && Previous && Previous->endsSequenceInternal(Tokens...); 564 } 565 }; 566 567 class ContinuationIndenter; 568 struct LineState; 569 570 class TokenRole { 571 public: TokenRole(const FormatStyle & Style)572 TokenRole(const FormatStyle &Style) : Style(Style) {} 573 virtual ~TokenRole(); 574 575 /// After the \c TokenAnnotator has finished annotating all the tokens, 576 /// this function precomputes required information for formatting. 577 virtual void precomputeFormattingInfos(const FormatToken *Token); 578 579 /// Apply the special formatting that the given role demands. 580 /// 581 /// Assumes that the token having this role is already formatted. 582 /// 583 /// Continues formatting from \p State leaving indentation to \p Indenter and 584 /// returns the total penalty that this formatting incurs. formatFromToken(LineState & State,ContinuationIndenter * Indenter,bool DryRun)585 virtual unsigned formatFromToken(LineState &State, 586 ContinuationIndenter *Indenter, 587 bool DryRun) { 588 return 0; 589 } 590 591 /// Same as \c formatFromToken, but assumes that the first token has 592 /// already been set thereby deciding on the first line break. formatAfterToken(LineState & State,ContinuationIndenter * Indenter,bool DryRun)593 virtual unsigned formatAfterToken(LineState &State, 594 ContinuationIndenter *Indenter, 595 bool DryRun) { 596 return 0; 597 } 598 599 /// Notifies the \c Role that a comma was found. CommaFound(const FormatToken * Token)600 virtual void CommaFound(const FormatToken *Token) {} 601 lastComma()602 virtual const FormatToken *lastComma() { return nullptr; } 603 604 protected: 605 const FormatStyle &Style; 606 }; 607 608 class CommaSeparatedList : public TokenRole { 609 public: CommaSeparatedList(const FormatStyle & Style)610 CommaSeparatedList(const FormatStyle &Style) 611 : TokenRole(Style), HasNestedBracedList(false) {} 612 613 void precomputeFormattingInfos(const FormatToken *Token) override; 614 615 unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter, 616 bool DryRun) override; 617 618 unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter, 619 bool DryRun) override; 620 621 /// Adds \p Token as the next comma to the \c CommaSeparated list. CommaFound(const FormatToken * Token)622 void CommaFound(const FormatToken *Token) override { 623 Commas.push_back(Token); 624 } 625 lastComma()626 const FormatToken *lastComma() override { 627 if (Commas.empty()) 628 return nullptr; 629 return Commas.back(); 630 } 631 632 private: 633 /// A struct that holds information on how to format a given list with 634 /// a specific number of columns. 635 struct ColumnFormat { 636 /// The number of columns to use. 637 unsigned Columns; 638 639 /// The total width in characters. 640 unsigned TotalWidth; 641 642 /// The number of lines required for this format. 643 unsigned LineCount; 644 645 /// The size of each column in characters. 646 SmallVector<unsigned, 8> ColumnSizes; 647 }; 648 649 /// Calculate which \c ColumnFormat fits best into 650 /// \p RemainingCharacters. 651 const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const; 652 653 /// The ordered \c FormatTokens making up the commas of this list. 654 SmallVector<const FormatToken *, 8> Commas; 655 656 /// The length of each of the list's items in characters including the 657 /// trailing comma. 658 SmallVector<unsigned, 8> ItemLengths; 659 660 /// Precomputed formats that can be used for this list. 661 SmallVector<ColumnFormat, 4> Formats; 662 663 bool HasNestedBracedList; 664 }; 665 666 /// Encapsulates keywords that are context sensitive or for languages not 667 /// properly supported by Clang's lexer. 668 struct AdditionalKeywords { AdditionalKeywordsAdditionalKeywords669 AdditionalKeywords(IdentifierTable &IdentTable) { 670 kw_final = &IdentTable.get("final"); 671 kw_override = &IdentTable.get("override"); 672 kw_in = &IdentTable.get("in"); 673 kw_of = &IdentTable.get("of"); 674 kw_CF_ENUM = &IdentTable.get("CF_ENUM"); 675 kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS"); 676 kw_NS_ENUM = &IdentTable.get("NS_ENUM"); 677 kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS"); 678 679 kw_as = &IdentTable.get("as"); 680 kw_async = &IdentTable.get("async"); 681 kw_await = &IdentTable.get("await"); 682 kw_declare = &IdentTable.get("declare"); 683 kw_finally = &IdentTable.get("finally"); 684 kw_from = &IdentTable.get("from"); 685 kw_function = &IdentTable.get("function"); 686 kw_get = &IdentTable.get("get"); 687 kw_import = &IdentTable.get("import"); 688 kw_infer = &IdentTable.get("infer"); 689 kw_is = &IdentTable.get("is"); 690 kw_let = &IdentTable.get("let"); 691 kw_module = &IdentTable.get("module"); 692 kw_readonly = &IdentTable.get("readonly"); 693 kw_set = &IdentTable.get("set"); 694 kw_type = &IdentTable.get("type"); 695 kw_typeof = &IdentTable.get("typeof"); 696 kw_var = &IdentTable.get("var"); 697 kw_yield = &IdentTable.get("yield"); 698 699 kw_abstract = &IdentTable.get("abstract"); 700 kw_assert = &IdentTable.get("assert"); 701 kw_extends = &IdentTable.get("extends"); 702 kw_implements = &IdentTable.get("implements"); 703 kw_instanceof = &IdentTable.get("instanceof"); 704 kw_interface = &IdentTable.get("interface"); 705 kw_native = &IdentTable.get("native"); 706 kw_package = &IdentTable.get("package"); 707 kw_synchronized = &IdentTable.get("synchronized"); 708 kw_throws = &IdentTable.get("throws"); 709 kw___except = &IdentTable.get("__except"); 710 kw___has_include = &IdentTable.get("__has_include"); 711 kw___has_include_next = &IdentTable.get("__has_include_next"); 712 713 kw_mark = &IdentTable.get("mark"); 714 715 kw_extend = &IdentTable.get("extend"); 716 kw_option = &IdentTable.get("option"); 717 kw_optional = &IdentTable.get("optional"); 718 kw_repeated = &IdentTable.get("repeated"); 719 kw_required = &IdentTable.get("required"); 720 kw_returns = &IdentTable.get("returns"); 721 722 kw_signals = &IdentTable.get("signals"); 723 kw_qsignals = &IdentTable.get("Q_SIGNALS"); 724 kw_slots = &IdentTable.get("slots"); 725 kw_qslots = &IdentTable.get("Q_SLOTS"); 726 727 // Keep this at the end of the constructor to make sure everything here is 728 // already initialized. 729 JsExtraKeywords = std::unordered_set<IdentifierInfo *>( 730 {kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, 731 kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly, 732 kw_set, kw_type, kw_typeof, kw_var, kw_yield, 733 // Keywords from the Java section. 734 kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); 735 } 736 737 // Context sensitive keywords. 738 IdentifierInfo *kw_final; 739 IdentifierInfo *kw_override; 740 IdentifierInfo *kw_in; 741 IdentifierInfo *kw_of; 742 IdentifierInfo *kw_CF_ENUM; 743 IdentifierInfo *kw_CF_OPTIONS; 744 IdentifierInfo *kw_NS_ENUM; 745 IdentifierInfo *kw_NS_OPTIONS; 746 IdentifierInfo *kw___except; 747 IdentifierInfo *kw___has_include; 748 IdentifierInfo *kw___has_include_next; 749 750 // JavaScript keywords. 751 IdentifierInfo *kw_as; 752 IdentifierInfo *kw_async; 753 IdentifierInfo *kw_await; 754 IdentifierInfo *kw_declare; 755 IdentifierInfo *kw_finally; 756 IdentifierInfo *kw_from; 757 IdentifierInfo *kw_function; 758 IdentifierInfo *kw_get; 759 IdentifierInfo *kw_import; 760 IdentifierInfo *kw_infer; 761 IdentifierInfo *kw_is; 762 IdentifierInfo *kw_let; 763 IdentifierInfo *kw_module; 764 IdentifierInfo *kw_readonly; 765 IdentifierInfo *kw_set; 766 IdentifierInfo *kw_type; 767 IdentifierInfo *kw_typeof; 768 IdentifierInfo *kw_var; 769 IdentifierInfo *kw_yield; 770 771 // Java keywords. 772 IdentifierInfo *kw_abstract; 773 IdentifierInfo *kw_assert; 774 IdentifierInfo *kw_extends; 775 IdentifierInfo *kw_implements; 776 IdentifierInfo *kw_instanceof; 777 IdentifierInfo *kw_interface; 778 IdentifierInfo *kw_native; 779 IdentifierInfo *kw_package; 780 IdentifierInfo *kw_synchronized; 781 IdentifierInfo *kw_throws; 782 783 // Pragma keywords. 784 IdentifierInfo *kw_mark; 785 786 // Proto keywords. 787 IdentifierInfo *kw_extend; 788 IdentifierInfo *kw_option; 789 IdentifierInfo *kw_optional; 790 IdentifierInfo *kw_repeated; 791 IdentifierInfo *kw_required; 792 IdentifierInfo *kw_returns; 793 794 // QT keywords. 795 IdentifierInfo *kw_signals; 796 IdentifierInfo *kw_qsignals; 797 IdentifierInfo *kw_slots; 798 IdentifierInfo *kw_qslots; 799 800 /// Returns \c true if \p Tok is a true JavaScript identifier, returns 801 /// \c false if it is a keyword or a pseudo keyword. IsJavaScriptIdentifierAdditionalKeywords802 bool IsJavaScriptIdentifier(const FormatToken &Tok) const { 803 return Tok.is(tok::identifier) && 804 JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == 805 JsExtraKeywords.end(); 806 } 807 808 private: 809 /// The JavaScript keywords beyond the C++ keyword set. 810 std::unordered_set<IdentifierInfo *> JsExtraKeywords; 811 }; 812 813 } // namespace format 814 } // namespace clang 815 816 #endif 817