1 //===--- Format.cpp - Format C++ code -------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file implements functions declared in Format.h. This will be 12 /// split into separate files as we go. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "format-formatter" 17 18 #include "BreakableToken.h" 19 #include "TokenAnnotator.h" 20 #include "UnwrappedLineParser.h" 21 #include "WhitespaceManager.h" 22 #include "clang/Basic/Diagnostic.h" 23 #include "clang/Basic/OperatorPrecedence.h" 24 #include "clang/Basic/SourceManager.h" 25 #include "clang/Format/Format.h" 26 #include "clang/Lex/Lexer.h" 27 #include "llvm/ADT/STLExtras.h" 28 #include "llvm/Support/Allocator.h" 29 #include "llvm/Support/Debug.h" 30 #include "llvm/Support/YAMLTraits.h" 31 #include <queue> 32 #include <string> 33 34 namespace llvm { 35 namespace yaml { 36 template <> 37 struct ScalarEnumerationTraits<clang::format::FormatStyle::LanguageStandard> { 38 static void enumeration(IO &IO, 39 clang::format::FormatStyle::LanguageStandard &Value) { 40 IO.enumCase(Value, "C++03", clang::format::FormatStyle::LS_Cpp03); 41 IO.enumCase(Value, "C++11", clang::format::FormatStyle::LS_Cpp11); 42 IO.enumCase(Value, "Auto", clang::format::FormatStyle::LS_Auto); 43 } 44 }; 45 46 template <> 47 struct ScalarEnumerationTraits<clang::format::FormatStyle::BraceBreakingStyle> { 48 static void 49 enumeration(IO &IO, clang::format::FormatStyle::BraceBreakingStyle &Value) { 50 IO.enumCase(Value, "Attach", clang::format::FormatStyle::BS_Attach); 51 IO.enumCase(Value, "Linux", clang::format::FormatStyle::BS_Linux); 52 IO.enumCase(Value, "Stroustrup", clang::format::FormatStyle::BS_Stroustrup); 53 } 54 }; 55 56 template <> struct MappingTraits<clang::format::FormatStyle> { 57 static void mapping(llvm::yaml::IO &IO, clang::format::FormatStyle &Style) { 58 if (IO.outputting()) { 59 StringRef StylesArray[] = { "LLVM", "Google", "Chromium", "Mozilla" }; 60 ArrayRef<StringRef> Styles(StylesArray); 61 for (size_t i = 0, e = Styles.size(); i < e; ++i) { 62 StringRef StyleName(Styles[i]); 63 clang::format::FormatStyle PredefinedStyle; 64 if (clang::format::getPredefinedStyle(StyleName, &PredefinedStyle) && 65 Style == PredefinedStyle) { 66 IO.mapOptional("# BasedOnStyle", StyleName); 67 break; 68 } 69 } 70 } else { 71 StringRef BasedOnStyle; 72 IO.mapOptional("BasedOnStyle", BasedOnStyle); 73 if (!BasedOnStyle.empty()) 74 if (!clang::format::getPredefinedStyle(BasedOnStyle, &Style)) { 75 IO.setError(Twine("Unknown value for BasedOnStyle: ", BasedOnStyle)); 76 return; 77 } 78 } 79 80 IO.mapOptional("AccessModifierOffset", Style.AccessModifierOffset); 81 IO.mapOptional("AlignEscapedNewlinesLeft", Style.AlignEscapedNewlinesLeft); 82 IO.mapOptional("AllowAllParametersOfDeclarationOnNextLine", 83 Style.AllowAllParametersOfDeclarationOnNextLine); 84 IO.mapOptional("AllowShortIfStatementsOnASingleLine", 85 Style.AllowShortIfStatementsOnASingleLine); 86 IO.mapOptional("AllowShortLoopsOnASingleLine", 87 Style.AllowShortLoopsOnASingleLine); 88 IO.mapOptional("AlwaysBreakTemplateDeclarations", 89 Style.AlwaysBreakTemplateDeclarations); 90 IO.mapOptional("AlwaysBreakBeforeMultilineStrings", 91 Style.AlwaysBreakBeforeMultilineStrings); 92 IO.mapOptional("BinPackParameters", Style.BinPackParameters); 93 IO.mapOptional("ColumnLimit", Style.ColumnLimit); 94 IO.mapOptional("ConstructorInitializerAllOnOneLineOrOnePerLine", 95 Style.ConstructorInitializerAllOnOneLineOrOnePerLine); 96 IO.mapOptional("DerivePointerBinding", Style.DerivePointerBinding); 97 IO.mapOptional("IndentCaseLabels", Style.IndentCaseLabels); 98 IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep); 99 IO.mapOptional("ObjCSpaceBeforeProtocolList", 100 Style.ObjCSpaceBeforeProtocolList); 101 IO.mapOptional("PenaltyBreakComment", Style.PenaltyBreakComment); 102 IO.mapOptional("PenaltyBreakString", Style.PenaltyBreakString); 103 IO.mapOptional("PenaltyExcessCharacter", Style.PenaltyExcessCharacter); 104 IO.mapOptional("PenaltyReturnTypeOnItsOwnLine", 105 Style.PenaltyReturnTypeOnItsOwnLine); 106 IO.mapOptional("PointerBindsToType", Style.PointerBindsToType); 107 IO.mapOptional("SpacesBeforeTrailingComments", 108 Style.SpacesBeforeTrailingComments); 109 IO.mapOptional("SpacesInBracedLists", Style.SpacesInBracedLists); 110 IO.mapOptional("Standard", Style.Standard); 111 IO.mapOptional("IndentWidth", Style.IndentWidth); 112 IO.mapOptional("UseTab", Style.UseTab); 113 IO.mapOptional("BreakBeforeBraces", Style.BreakBeforeBraces); 114 IO.mapOptional("IndentFunctionDeclarationAfterType", 115 Style.IndentFunctionDeclarationAfterType); 116 } 117 }; 118 } 119 } 120 121 namespace clang { 122 namespace format { 123 124 FormatStyle getLLVMStyle() { 125 FormatStyle LLVMStyle; 126 LLVMStyle.AccessModifierOffset = -2; 127 LLVMStyle.AlignEscapedNewlinesLeft = false; 128 LLVMStyle.AllowAllParametersOfDeclarationOnNextLine = true; 129 LLVMStyle.AllowShortIfStatementsOnASingleLine = false; 130 LLVMStyle.AllowShortLoopsOnASingleLine = false; 131 LLVMStyle.AlwaysBreakTemplateDeclarations = false; 132 LLVMStyle.AlwaysBreakBeforeMultilineStrings = false; 133 LLVMStyle.BinPackParameters = true; 134 LLVMStyle.ColumnLimit = 80; 135 LLVMStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = false; 136 LLVMStyle.DerivePointerBinding = false; 137 LLVMStyle.IndentCaseLabels = false; 138 LLVMStyle.MaxEmptyLinesToKeep = 1; 139 LLVMStyle.ObjCSpaceBeforeProtocolList = true; 140 LLVMStyle.PenaltyBreakComment = 45; 141 LLVMStyle.PenaltyBreakString = 1000; 142 LLVMStyle.PenaltyExcessCharacter = 1000000; 143 LLVMStyle.PenaltyReturnTypeOnItsOwnLine = 60; 144 LLVMStyle.PointerBindsToType = false; 145 LLVMStyle.SpacesBeforeTrailingComments = 1; 146 LLVMStyle.SpacesInBracedLists = true; 147 LLVMStyle.Standard = FormatStyle::LS_Cpp03; 148 LLVMStyle.IndentWidth = 2; 149 LLVMStyle.UseTab = false; 150 LLVMStyle.BreakBeforeBraces = FormatStyle::BS_Attach; 151 LLVMStyle.IndentFunctionDeclarationAfterType = false; 152 return LLVMStyle; 153 } 154 155 FormatStyle getGoogleStyle() { 156 FormatStyle GoogleStyle; 157 GoogleStyle.AccessModifierOffset = -1; 158 GoogleStyle.AlignEscapedNewlinesLeft = true; 159 GoogleStyle.AllowAllParametersOfDeclarationOnNextLine = true; 160 GoogleStyle.AllowShortIfStatementsOnASingleLine = true; 161 GoogleStyle.AllowShortLoopsOnASingleLine = true; 162 GoogleStyle.AlwaysBreakTemplateDeclarations = true; 163 GoogleStyle.AlwaysBreakBeforeMultilineStrings = true; 164 GoogleStyle.BinPackParameters = true; 165 GoogleStyle.ColumnLimit = 80; 166 GoogleStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 167 GoogleStyle.DerivePointerBinding = true; 168 GoogleStyle.IndentCaseLabels = true; 169 GoogleStyle.MaxEmptyLinesToKeep = 1; 170 GoogleStyle.ObjCSpaceBeforeProtocolList = false; 171 GoogleStyle.PenaltyBreakComment = 45; 172 GoogleStyle.PenaltyBreakString = 1000; 173 GoogleStyle.PenaltyExcessCharacter = 1000000; 174 GoogleStyle.PenaltyReturnTypeOnItsOwnLine = 200; 175 GoogleStyle.PointerBindsToType = true; 176 GoogleStyle.SpacesBeforeTrailingComments = 2; 177 GoogleStyle.SpacesInBracedLists = false; 178 GoogleStyle.Standard = FormatStyle::LS_Auto; 179 GoogleStyle.IndentWidth = 2; 180 GoogleStyle.UseTab = false; 181 GoogleStyle.BreakBeforeBraces = FormatStyle::BS_Attach; 182 GoogleStyle.IndentFunctionDeclarationAfterType = true; 183 return GoogleStyle; 184 } 185 186 FormatStyle getChromiumStyle() { 187 FormatStyle ChromiumStyle = getGoogleStyle(); 188 ChromiumStyle.AllowAllParametersOfDeclarationOnNextLine = false; 189 ChromiumStyle.AllowShortIfStatementsOnASingleLine = false; 190 ChromiumStyle.AllowShortLoopsOnASingleLine = false; 191 ChromiumStyle.BinPackParameters = false; 192 ChromiumStyle.Standard = FormatStyle::LS_Cpp03; 193 ChromiumStyle.DerivePointerBinding = false; 194 return ChromiumStyle; 195 } 196 197 FormatStyle getMozillaStyle() { 198 FormatStyle MozillaStyle = getLLVMStyle(); 199 MozillaStyle.AllowAllParametersOfDeclarationOnNextLine = false; 200 MozillaStyle.ConstructorInitializerAllOnOneLineOrOnePerLine = true; 201 MozillaStyle.DerivePointerBinding = true; 202 MozillaStyle.IndentCaseLabels = true; 203 MozillaStyle.ObjCSpaceBeforeProtocolList = false; 204 MozillaStyle.PenaltyReturnTypeOnItsOwnLine = 200; 205 MozillaStyle.PointerBindsToType = true; 206 return MozillaStyle; 207 } 208 209 bool getPredefinedStyle(StringRef Name, FormatStyle *Style) { 210 if (Name.equals_lower("llvm")) 211 *Style = getLLVMStyle(); 212 else if (Name.equals_lower("chromium")) 213 *Style = getChromiumStyle(); 214 else if (Name.equals_lower("mozilla")) 215 *Style = getMozillaStyle(); 216 else if (Name.equals_lower("google")) 217 *Style = getGoogleStyle(); 218 else 219 return false; 220 221 return true; 222 } 223 224 llvm::error_code parseConfiguration(StringRef Text, FormatStyle *Style) { 225 if (Text.trim().empty()) 226 return llvm::make_error_code(llvm::errc::invalid_argument); 227 llvm::yaml::Input Input(Text); 228 Input >> *Style; 229 return Input.error(); 230 } 231 232 std::string configurationAsText(const FormatStyle &Style) { 233 std::string Text; 234 llvm::raw_string_ostream Stream(Text); 235 llvm::yaml::Output Output(Stream); 236 // We use the same mapping method for input and output, so we need a non-const 237 // reference here. 238 FormatStyle NonConstStyle = Style; 239 Output << NonConstStyle; 240 return Stream.str(); 241 } 242 243 // Returns the length of everything up to the first possible line break after 244 // the ), ], } or > matching \c Tok. 245 static unsigned getLengthToMatchingParen(const FormatToken &Tok) { 246 if (Tok.MatchingParen == NULL) 247 return 0; 248 FormatToken *End = Tok.MatchingParen; 249 while (End->Next && !End->Next->CanBreakBefore) { 250 End = End->Next; 251 } 252 return End->TotalLength - Tok.TotalLength + 1; 253 } 254 255 namespace { 256 257 class UnwrappedLineFormatter { 258 public: 259 UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr, 260 const AnnotatedLine &Line, unsigned FirstIndent, 261 const FormatToken *RootToken, 262 WhitespaceManager &Whitespaces, 263 encoding::Encoding Encoding) 264 : Style(Style), SourceMgr(SourceMgr), Line(Line), 265 FirstIndent(FirstIndent), RootToken(RootToken), 266 Whitespaces(Whitespaces), Count(0), Encoding(Encoding) {} 267 268 /// \brief Formats an \c UnwrappedLine. 269 void format(const AnnotatedLine *NextLine) { 270 // Initialize state dependent on indent. 271 LineState State; 272 State.Column = FirstIndent; 273 State.NextToken = RootToken; 274 State.Stack.push_back(ParenState(FirstIndent, FirstIndent, 275 /*AvoidBinPacking=*/false, 276 /*NoLineBreak=*/false)); 277 State.LineContainsContinuedForLoopSection = false; 278 State.ParenLevel = 0; 279 State.StartOfStringLiteral = 0; 280 State.StartOfLineLevel = State.ParenLevel; 281 State.LowestLevelOnLine = State.ParenLevel; 282 State.IgnoreStackForComparison = false; 283 284 // The first token has already been indented and thus consumed. 285 moveStateToNextToken(State, /*DryRun=*/false); 286 287 // If everything fits on a single line, just put it there. 288 unsigned ColumnLimit = Style.ColumnLimit; 289 if (NextLine && NextLine->InPPDirective && 290 !NextLine->First->HasUnescapedNewline) 291 ColumnLimit = getColumnLimit(); 292 if (Line.Last->TotalLength <= ColumnLimit - FirstIndent) { 293 while (State.NextToken != NULL) { 294 addTokenToState(false, false, State); 295 } 296 } 297 298 // If the ObjC method declaration does not fit on a line, we should format 299 // it with one arg per line. 300 if (Line.Type == LT_ObjCMethodDecl) 301 State.Stack.back().BreakBeforeParameter = true; 302 303 // Find best solution in solution space. 304 analyzeSolutionSpace(State); 305 } 306 307 private: 308 void DebugTokenState(const FormatToken &FormatTok) { 309 const Token &Tok = FormatTok.Tok; 310 llvm::dbgs() << StringRef(SourceMgr.getCharacterData(Tok.getLocation()), 311 Tok.getLength()); 312 llvm::dbgs(); 313 } 314 315 struct ParenState { 316 ParenState(unsigned Indent, unsigned LastSpace, bool AvoidBinPacking, 317 bool NoLineBreak) 318 : Indent(Indent), LastSpace(LastSpace), FirstLessLess(0), 319 BreakBeforeClosingBrace(false), QuestionColumn(0), 320 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), 321 NoLineBreak(NoLineBreak), ColonPos(0), StartOfFunctionCall(0), 322 NestedNameSpecifierContinuation(0), CallContinuation(0), 323 VariablePos(0), ContainsLineBreak(false) {} 324 325 /// \brief The position to which a specific parenthesis level needs to be 326 /// indented. 327 unsigned Indent; 328 329 /// \brief The position of the last space on each level. 330 /// 331 /// Used e.g. to break like: 332 /// functionCall(Parameter, otherCall( 333 /// OtherParameter)); 334 unsigned LastSpace; 335 336 /// \brief The position the first "<<" operator encountered on each level. 337 /// 338 /// Used to align "<<" operators. 0 if no such operator has been encountered 339 /// on a level. 340 unsigned FirstLessLess; 341 342 /// \brief Whether a newline needs to be inserted before the block's closing 343 /// brace. 344 /// 345 /// We only want to insert a newline before the closing brace if there also 346 /// was a newline after the beginning left brace. 347 bool BreakBeforeClosingBrace; 348 349 /// \brief The column of a \c ? in a conditional expression; 350 unsigned QuestionColumn; 351 352 /// \brief Avoid bin packing, i.e. multiple parameters/elements on multiple 353 /// lines, in this context. 354 bool AvoidBinPacking; 355 356 /// \brief Break after the next comma (or all the commas in this context if 357 /// \c AvoidBinPacking is \c true). 358 bool BreakBeforeParameter; 359 360 /// \brief Line breaking in this context would break a formatting rule. 361 bool NoLineBreak; 362 363 /// \brief The position of the colon in an ObjC method declaration/call. 364 unsigned ColonPos; 365 366 /// \brief The start of the most recent function in a builder-type call. 367 unsigned StartOfFunctionCall; 368 369 /// \brief If a nested name specifier was broken over multiple lines, this 370 /// contains the start column of the second line. Otherwise 0. 371 unsigned NestedNameSpecifierContinuation; 372 373 /// \brief If a call expression was broken over multiple lines, this 374 /// contains the start column of the second line. Otherwise 0. 375 unsigned CallContinuation; 376 377 /// \brief The column of the first variable name in a variable declaration. 378 /// 379 /// Used to align further variables if necessary. 380 unsigned VariablePos; 381 382 /// \brief \c true if this \c ParenState already contains a line-break. 383 /// 384 /// The first line break in a certain \c ParenState causes extra penalty so 385 /// that clang-format prefers similar breaks, i.e. breaks in the same 386 /// parenthesis. 387 bool ContainsLineBreak; 388 389 bool operator<(const ParenState &Other) const { 390 if (Indent != Other.Indent) 391 return Indent < Other.Indent; 392 if (LastSpace != Other.LastSpace) 393 return LastSpace < Other.LastSpace; 394 if (FirstLessLess != Other.FirstLessLess) 395 return FirstLessLess < Other.FirstLessLess; 396 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 397 return BreakBeforeClosingBrace; 398 if (QuestionColumn != Other.QuestionColumn) 399 return QuestionColumn < Other.QuestionColumn; 400 if (AvoidBinPacking != Other.AvoidBinPacking) 401 return AvoidBinPacking; 402 if (BreakBeforeParameter != Other.BreakBeforeParameter) 403 return BreakBeforeParameter; 404 if (NoLineBreak != Other.NoLineBreak) 405 return NoLineBreak; 406 if (ColonPos != Other.ColonPos) 407 return ColonPos < Other.ColonPos; 408 if (StartOfFunctionCall != Other.StartOfFunctionCall) 409 return StartOfFunctionCall < Other.StartOfFunctionCall; 410 if (CallContinuation != Other.CallContinuation) 411 return CallContinuation < Other.CallContinuation; 412 if (VariablePos != Other.VariablePos) 413 return VariablePos < Other.VariablePos; 414 if (ContainsLineBreak != Other.ContainsLineBreak) 415 return ContainsLineBreak < Other.ContainsLineBreak; 416 return false; 417 } 418 }; 419 420 /// \brief The current state when indenting a unwrapped line. 421 /// 422 /// As the indenting tries different combinations this is copied by value. 423 struct LineState { 424 /// \brief The number of used columns in the current line. 425 unsigned Column; 426 427 /// \brief The token that needs to be next formatted. 428 const FormatToken *NextToken; 429 430 /// \brief \c true if this line contains a continued for-loop section. 431 bool LineContainsContinuedForLoopSection; 432 433 /// \brief The level of nesting inside (), [], <> and {}. 434 unsigned ParenLevel; 435 436 /// \brief The \c ParenLevel at the start of this line. 437 unsigned StartOfLineLevel; 438 439 /// \brief The lowest \c ParenLevel on the current line. 440 unsigned LowestLevelOnLine; 441 442 /// \brief The start column of the string literal, if we're in a string 443 /// literal sequence, 0 otherwise. 444 unsigned StartOfStringLiteral; 445 446 /// \brief A stack keeping track of properties applying to parenthesis 447 /// levels. 448 std::vector<ParenState> Stack; 449 450 /// \brief Ignore the stack of \c ParenStates for state comparison. 451 /// 452 /// In long and deeply nested unwrapped lines, the current algorithm can 453 /// be insufficient for finding the best formatting with a reasonable amount 454 /// of time and memory. Setting this flag will effectively lead to the 455 /// algorithm not analyzing some combinations. However, these combinations 456 /// rarely contain the optimal solution: In short, accepting a higher 457 /// penalty early would need to lead to different values in the \c 458 /// ParenState stack (in an otherwise identical state) and these different 459 /// values would need to lead to a significant amount of avoided penalty 460 /// later. 461 /// 462 /// FIXME: Come up with a better algorithm instead. 463 bool IgnoreStackForComparison; 464 465 /// \brief Comparison operator to be able to used \c LineState in \c map. 466 bool operator<(const LineState &Other) const { 467 if (NextToken != Other.NextToken) 468 return NextToken < Other.NextToken; 469 if (Column != Other.Column) 470 return Column < Other.Column; 471 if (LineContainsContinuedForLoopSection != 472 Other.LineContainsContinuedForLoopSection) 473 return LineContainsContinuedForLoopSection; 474 if (ParenLevel != Other.ParenLevel) 475 return ParenLevel < Other.ParenLevel; 476 if (StartOfLineLevel != Other.StartOfLineLevel) 477 return StartOfLineLevel < Other.StartOfLineLevel; 478 if (LowestLevelOnLine != Other.LowestLevelOnLine) 479 return LowestLevelOnLine < Other.LowestLevelOnLine; 480 if (StartOfStringLiteral != Other.StartOfStringLiteral) 481 return StartOfStringLiteral < Other.StartOfStringLiteral; 482 if (IgnoreStackForComparison || Other.IgnoreStackForComparison) 483 return false; 484 return Stack < Other.Stack; 485 } 486 }; 487 488 /// \brief Appends the next token to \p State and updates information 489 /// necessary for indentation. 490 /// 491 /// Puts the token on the current line if \p Newline is \c false and adds a 492 /// line break and necessary indentation otherwise. 493 /// 494 /// If \p DryRun is \c false, also creates and stores the required 495 /// \c Replacement. 496 unsigned addTokenToState(bool Newline, bool DryRun, LineState &State) { 497 const FormatToken &Current = *State.NextToken; 498 const FormatToken &Previous = *State.NextToken->Previous; 499 500 if (State.Stack.size() == 0 || Current.Type == TT_ImplicitStringLiteral) { 501 // FIXME: Is this correct? 502 int WhitespaceLength = SourceMgr.getSpellingColumnNumber( 503 State.NextToken->WhitespaceRange.getEnd()) - 504 SourceMgr.getSpellingColumnNumber( 505 State.NextToken->WhitespaceRange.getBegin()); 506 State.Column += WhitespaceLength + State.NextToken->CodePointCount; 507 State.NextToken = State.NextToken->Next; 508 return 0; 509 } 510 511 // If we are continuing an expression, we want to indent an extra 4 spaces. 512 unsigned ContinuationIndent = 513 std::max(State.Stack.back().LastSpace, State.Stack.back().Indent) + 4; 514 if (Newline) { 515 State.Stack.back().ContainsLineBreak = true; 516 if (Current.is(tok::r_brace)) { 517 State.Column = Line.Level * Style.IndentWidth; 518 } else if (Current.is(tok::string_literal) && 519 State.StartOfStringLiteral != 0) { 520 State.Column = State.StartOfStringLiteral; 521 State.Stack.back().BreakBeforeParameter = true; 522 } else if (Current.is(tok::lessless) && 523 State.Stack.back().FirstLessLess != 0) { 524 State.Column = State.Stack.back().FirstLessLess; 525 } else if (Current.isOneOf(tok::period, tok::arrow) && 526 Current.Type != TT_DesignatedInitializerPeriod) { 527 if (State.Stack.back().CallContinuation == 0) { 528 State.Column = ContinuationIndent; 529 State.Stack.back().CallContinuation = State.Column; 530 } else { 531 State.Column = State.Stack.back().CallContinuation; 532 } 533 } else if (Current.Type == TT_ConditionalExpr) { 534 State.Column = State.Stack.back().QuestionColumn; 535 } else if (Previous.is(tok::comma) && 536 State.Stack.back().VariablePos != 0) { 537 State.Column = State.Stack.back().VariablePos; 538 } else if (Previous.ClosesTemplateDeclaration || 539 (Current.Type == TT_StartOfName && State.ParenLevel == 0 && 540 (!Style.IndentFunctionDeclarationAfterType || 541 Line.StartsDefinition))) { 542 State.Column = State.Stack.back().Indent; 543 } else if (Current.Type == TT_ObjCSelectorName) { 544 if (State.Stack.back().ColonPos > Current.CodePointCount) { 545 State.Column = State.Stack.back().ColonPos - Current.CodePointCount; 546 } else { 547 State.Column = State.Stack.back().Indent; 548 State.Stack.back().ColonPos = State.Column + Current.CodePointCount; 549 } 550 } else if (Current.Type == TT_StartOfName || 551 Previous.isOneOf(tok::coloncolon, tok::equal) || 552 Previous.Type == TT_ObjCMethodExpr) { 553 State.Column = ContinuationIndent; 554 } else { 555 State.Column = State.Stack.back().Indent; 556 // Ensure that we fall back to indenting 4 spaces instead of just 557 // flushing continuations left. 558 if (State.Column == FirstIndent) 559 State.Column += 4; 560 } 561 562 if (Current.is(tok::question)) 563 State.Stack.back().BreakBeforeParameter = true; 564 if ((Previous.isOneOf(tok::comma, tok::semi) && 565 !State.Stack.back().AvoidBinPacking) || 566 Previous.Type == TT_BinaryOperator) 567 State.Stack.back().BreakBeforeParameter = false; 568 if (Previous.Type == TT_TemplateCloser && State.ParenLevel == 0) 569 State.Stack.back().BreakBeforeParameter = false; 570 571 if (!DryRun) { 572 unsigned NewLines = 1; 573 if (Current.is(tok::comment)) 574 NewLines = std::max( 575 NewLines, 576 std::min(Current.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1)); 577 Whitespaces.replaceWhitespace(Current, NewLines, State.Column, 578 State.Column, Line.InPPDirective); 579 } 580 581 State.Stack.back().LastSpace = State.Column; 582 if (Current.isOneOf(tok::arrow, tok::period) && 583 Current.Type != TT_DesignatedInitializerPeriod) 584 State.Stack.back().LastSpace += Current.CodePointCount; 585 State.StartOfLineLevel = State.ParenLevel; 586 State.LowestLevelOnLine = State.ParenLevel; 587 588 // Any break on this level means that the parent level has been broken 589 // and we need to avoid bin packing there. 590 for (unsigned i = 0, e = State.Stack.size() - 1; i != e; ++i) { 591 State.Stack[i].BreakBeforeParameter = true; 592 } 593 const FormatToken *TokenBefore = Current.getPreviousNonComment(); 594 if (TokenBefore && !TokenBefore->isOneOf(tok::comma, tok::semi) && 595 TokenBefore->Type != TT_TemplateCloser && 596 TokenBefore->Type != TT_BinaryOperator && !TokenBefore->opensScope()) 597 State.Stack.back().BreakBeforeParameter = true; 598 599 // If we break after {, we should also break before the corresponding }. 600 if (Previous.is(tok::l_brace)) 601 State.Stack.back().BreakBeforeClosingBrace = true; 602 603 if (State.Stack.back().AvoidBinPacking) { 604 // If we are breaking after '(', '{', '<', this is not bin packing 605 // unless AllowAllParametersOfDeclarationOnNextLine is false. 606 if (!(Previous.isOneOf(tok::l_paren, tok::l_brace) || 607 Previous.Type == TT_BinaryOperator) || 608 (!Style.AllowAllParametersOfDeclarationOnNextLine && 609 Line.MustBeDeclaration)) 610 State.Stack.back().BreakBeforeParameter = true; 611 } 612 } else { 613 if (Current.is(tok::equal) && 614 (RootToken->is(tok::kw_for) || State.ParenLevel == 0) && 615 State.Stack.back().VariablePos == 0) { 616 State.Stack.back().VariablePos = State.Column; 617 // Move over * and & if they are bound to the variable name. 618 const FormatToken *Tok = &Previous; 619 while (Tok && State.Stack.back().VariablePos >= Tok->CodePointCount) { 620 State.Stack.back().VariablePos -= Tok->CodePointCount; 621 if (Tok->SpacesRequiredBefore != 0) 622 break; 623 Tok = Tok->Previous; 624 } 625 if (Previous.PartOfMultiVariableDeclStmt) 626 State.Stack.back().LastSpace = State.Stack.back().VariablePos; 627 } 628 629 unsigned Spaces = State.NextToken->SpacesRequiredBefore; 630 631 if (!DryRun) 632 Whitespaces.replaceWhitespace(Current, 0, Spaces, 633 State.Column + Spaces); 634 635 if (Current.Type == TT_ObjCSelectorName && 636 State.Stack.back().ColonPos == 0) { 637 if (State.Stack.back().Indent + Current.LongestObjCSelectorName > 638 State.Column + Spaces + Current.CodePointCount) 639 State.Stack.back().ColonPos = 640 State.Stack.back().Indent + Current.LongestObjCSelectorName; 641 else 642 State.Stack.back().ColonPos = 643 State.Column + Spaces + Current.CodePointCount; 644 } 645 646 if (Previous.opensScope() && Previous.Type != TT_ObjCMethodExpr && 647 Current.Type != TT_LineComment) 648 State.Stack.back().Indent = State.Column + Spaces; 649 if (Previous.is(tok::comma) && !Current.isTrailingComment() && 650 State.Stack.back().AvoidBinPacking) 651 State.Stack.back().NoLineBreak = true; 652 653 State.Column += Spaces; 654 if (Current.is(tok::l_paren) && Previous.isOneOf(tok::kw_if, tok::kw_for)) 655 // Treat the condition inside an if as if it was a second function 656 // parameter, i.e. let nested calls have an indent of 4. 657 State.Stack.back().LastSpace = State.Column + 1; // 1 is length of "(". 658 else if (Previous.is(tok::comma)) 659 State.Stack.back().LastSpace = State.Column; 660 else if ((Previous.Type == TT_BinaryOperator || 661 Previous.Type == TT_ConditionalExpr || 662 Previous.Type == TT_CtorInitializerColon) && 663 !(Previous.getPrecedence() == prec::Assignment && 664 Current.FakeLParens.empty())) 665 // Always indent relative to the RHS of the expression unless this is a 666 // simple assignment without binary expression on the RHS. 667 State.Stack.back().LastSpace = State.Column; 668 else if (Previous.Type == TT_InheritanceColon) 669 State.Stack.back().Indent = State.Column; 670 else if (Previous.opensScope() && !Current.FakeLParens.empty()) 671 // If this function has multiple parameters or a binary expression 672 // parameter, indent nested calls from the start of the first parameter. 673 State.Stack.back().LastSpace = State.Column; 674 } 675 676 return moveStateToNextToken(State, DryRun); 677 } 678 679 /// \brief Mark the next token as consumed in \p State and modify its stacks 680 /// accordingly. 681 unsigned moveStateToNextToken(LineState &State, bool DryRun) { 682 const FormatToken &Current = *State.NextToken; 683 assert(State.Stack.size()); 684 685 if (Current.Type == TT_InheritanceColon) 686 State.Stack.back().AvoidBinPacking = true; 687 if (Current.is(tok::lessless) && State.Stack.back().FirstLessLess == 0) 688 State.Stack.back().FirstLessLess = State.Column; 689 if (Current.is(tok::question)) 690 State.Stack.back().QuestionColumn = State.Column; 691 if (!Current.opensScope() && !Current.closesScope()) 692 State.LowestLevelOnLine = 693 std::min(State.LowestLevelOnLine, State.ParenLevel); 694 if (Current.isOneOf(tok::period, tok::arrow) && 695 Line.Type == LT_BuilderTypeCall && State.ParenLevel == 0) 696 State.Stack.back().StartOfFunctionCall = 697 Current.LastInChainOfCalls ? 0 698 : State.Column + Current.CodePointCount; 699 if (Current.Type == TT_CtorInitializerColon) { 700 // Indent 2 from the column, so: 701 // SomeClass::SomeClass() 702 // : First(...), ... 703 // Next(...) 704 // ^ line up here. 705 State.Stack.back().Indent = State.Column + 2; 706 if (Style.ConstructorInitializerAllOnOneLineOrOnePerLine) 707 State.Stack.back().AvoidBinPacking = true; 708 State.Stack.back().BreakBeforeParameter = false; 709 } 710 711 // If return returns a binary expression, align after it. 712 if (Current.is(tok::kw_return) && !Current.FakeLParens.empty()) 713 State.Stack.back().LastSpace = State.Column + 7; 714 715 // In ObjC method declaration we align on the ":" of parameters, but we need 716 // to ensure that we indent parameters on subsequent lines by at least 4. 717 if (Current.Type == TT_ObjCMethodSpecifier) 718 State.Stack.back().Indent += 4; 719 720 // Insert scopes created by fake parenthesis. 721 const FormatToken *Previous = Current.getPreviousNonComment(); 722 // Don't add extra indentation for the first fake parenthesis after 723 // 'return', assignements or opening <({[. The indentation for these cases 724 // is special cased. 725 bool SkipFirstExtraIndent = 726 Current.is(tok::kw_return) || 727 (Previous && (Previous->opensScope() || 728 Previous->getPrecedence() == prec::Assignment)); 729 for (SmallVectorImpl<prec::Level>::const_reverse_iterator 730 I = Current.FakeLParens.rbegin(), 731 E = Current.FakeLParens.rend(); 732 I != E; ++I) { 733 ParenState NewParenState = State.Stack.back(); 734 NewParenState.ContainsLineBreak = false; 735 NewParenState.Indent = 736 std::max(std::max(State.Column, NewParenState.Indent), 737 State.Stack.back().LastSpace); 738 739 // Always indent conditional expressions. Never indent expression where 740 // the 'operator' is ',', ';' or an assignment (i.e. *I <= 741 // prec::Assignment) as those have different indentation rules. Indent 742 // other expression, unless the indentation needs to be skipped. 743 if (*I == prec::Conditional || 744 (!SkipFirstExtraIndent && *I > prec::Assignment)) 745 NewParenState.Indent += 4; 746 if (Previous && !Previous->opensScope()) 747 NewParenState.BreakBeforeParameter = false; 748 State.Stack.push_back(NewParenState); 749 SkipFirstExtraIndent = false; 750 } 751 752 // If we encounter an opening (, [, { or <, we add a level to our stacks to 753 // prepare for the following tokens. 754 if (Current.opensScope()) { 755 unsigned NewIndent; 756 unsigned LastSpace = State.Stack.back().LastSpace; 757 bool AvoidBinPacking; 758 if (Current.is(tok::l_brace)) { 759 NewIndent = Style.IndentWidth + LastSpace; 760 const FormatToken *NextNoComment = Current.getNextNonComment(); 761 AvoidBinPacking = NextNoComment && 762 NextNoComment->Type == TT_DesignatedInitializerPeriod; 763 } else { 764 NewIndent = 765 4 + std::max(LastSpace, State.Stack.back().StartOfFunctionCall); 766 AvoidBinPacking = !Style.BinPackParameters; 767 } 768 769 State.Stack.push_back(ParenState(NewIndent, LastSpace, AvoidBinPacking, 770 State.Stack.back().NoLineBreak)); 771 ++State.ParenLevel; 772 } 773 774 // If this '[' opens an ObjC call, determine whether all parameters fit into 775 // one line and put one per line if they don't. 776 if (Current.is(tok::l_square) && Current.Type == TT_ObjCMethodExpr && 777 Current.MatchingParen != NULL) { 778 if (getLengthToMatchingParen(Current) + State.Column > getColumnLimit()) 779 State.Stack.back().BreakBeforeParameter = true; 780 } 781 782 // If we encounter a closing ), ], } or >, we can remove a level from our 783 // stacks. 784 if (Current.isOneOf(tok::r_paren, tok::r_square) || 785 (Current.is(tok::r_brace) && State.NextToken != RootToken) || 786 State.NextToken->Type == TT_TemplateCloser) { 787 State.Stack.pop_back(); 788 --State.ParenLevel; 789 } 790 791 // Remove scopes created by fake parenthesis. 792 for (unsigned i = 0, e = Current.FakeRParens; i != e; ++i) { 793 unsigned VariablePos = State.Stack.back().VariablePos; 794 State.Stack.pop_back(); 795 State.Stack.back().VariablePos = VariablePos; 796 } 797 798 if (Current.is(tok::string_literal) && State.StartOfStringLiteral == 0) { 799 State.StartOfStringLiteral = State.Column; 800 } else if (!Current.isOneOf(tok::comment, tok::identifier, tok::hash, 801 tok::string_literal)) { 802 State.StartOfStringLiteral = 0; 803 } 804 805 State.Column += Current.CodePointCount; 806 807 State.NextToken = State.NextToken->Next; 808 809 return breakProtrudingToken(Current, State, DryRun); 810 } 811 812 /// \brief If the current token sticks out over the end of the line, break 813 /// it if possible. 814 /// 815 /// \returns An extra penalty if a token was broken, otherwise 0. 816 /// 817 /// The returned penalty will cover the cost of the additional line breaks and 818 /// column limit violation in all lines except for the last one. The penalty 819 /// for the column limit violation in the last line (and in single line 820 /// tokens) is handled in \c addNextStateToQueue. 821 unsigned breakProtrudingToken(const FormatToken &Current, LineState &State, 822 bool DryRun) { 823 llvm::OwningPtr<BreakableToken> Token; 824 unsigned StartColumn = State.Column - Current.CodePointCount; 825 unsigned OriginalStartColumn = 826 SourceMgr.getSpellingColumnNumber(Current.getStartOfNonWhitespace()) - 827 1; 828 829 if (Current.is(tok::string_literal) && 830 Current.Type != TT_ImplicitStringLiteral) { 831 // Only break up default narrow strings. 832 if (!Current.TokenText.startswith("\"")) 833 return 0; 834 835 Token.reset(new BreakableStringLiteral(Current, StartColumn, 836 Line.InPPDirective, Encoding)); 837 } else if (Current.Type == TT_BlockComment) { 838 Token.reset(new BreakableBlockComment( 839 Style, Current, StartColumn, OriginalStartColumn, !Current.Previous, 840 Line.InPPDirective, Encoding)); 841 } else if (Current.Type == TT_LineComment && 842 (Current.Previous == NULL || 843 Current.Previous->Type != TT_ImplicitStringLiteral)) { 844 Token.reset(new BreakableLineComment(Current, StartColumn, 845 Line.InPPDirective, Encoding)); 846 } else { 847 return 0; 848 } 849 if (Current.UnbreakableTailLength >= getColumnLimit()) 850 return 0; 851 852 unsigned RemainingSpace = getColumnLimit() - Current.UnbreakableTailLength; 853 bool BreakInserted = false; 854 unsigned Penalty = 0; 855 unsigned RemainingTokenColumns = 0; 856 for (unsigned LineIndex = 0, EndIndex = Token->getLineCount(); 857 LineIndex != EndIndex; ++LineIndex) { 858 if (!DryRun) 859 Token->replaceWhitespaceBefore(LineIndex, Whitespaces); 860 unsigned TailOffset = 0; 861 RemainingTokenColumns = Token->getLineLengthAfterSplit( 862 LineIndex, TailOffset, StringRef::npos); 863 while (RemainingTokenColumns > RemainingSpace) { 864 BreakableToken::Split Split = 865 Token->getSplit(LineIndex, TailOffset, getColumnLimit()); 866 if (Split.first == StringRef::npos) { 867 // The last line's penalty is handled in addNextStateToQueue(). 868 if (LineIndex < EndIndex - 1) 869 Penalty += Style.PenaltyExcessCharacter * 870 (RemainingTokenColumns - RemainingSpace); 871 break; 872 } 873 assert(Split.first != 0); 874 unsigned NewRemainingTokenColumns = Token->getLineLengthAfterSplit( 875 LineIndex, TailOffset + Split.first + Split.second, 876 StringRef::npos); 877 assert(NewRemainingTokenColumns < RemainingTokenColumns); 878 if (!DryRun) 879 Token->insertBreak(LineIndex, TailOffset, Split, Whitespaces); 880 Penalty += Current.is(tok::string_literal) ? Style.PenaltyBreakString 881 : Style.PenaltyBreakComment; 882 unsigned ColumnsUsed = 883 Token->getLineLengthAfterSplit(LineIndex, TailOffset, Split.first); 884 if (ColumnsUsed > getColumnLimit()) { 885 Penalty += 886 Style.PenaltyExcessCharacter * (ColumnsUsed - getColumnLimit()); 887 } 888 TailOffset += Split.first + Split.second; 889 RemainingTokenColumns = NewRemainingTokenColumns; 890 BreakInserted = true; 891 } 892 } 893 894 State.Column = RemainingTokenColumns; 895 896 if (BreakInserted) { 897 // If we break the token inside a parameter list, we need to break before 898 // the next parameter on all levels, so that the next parameter is clearly 899 // visible. Line comments already introduce a break. 900 if (Current.Type != TT_LineComment) { 901 for (unsigned i = 0, e = State.Stack.size(); i != e; ++i) 902 State.Stack[i].BreakBeforeParameter = true; 903 } 904 905 State.Stack.back().LastSpace = StartColumn; 906 } 907 return Penalty; 908 } 909 910 unsigned getColumnLimit() { 911 // In preprocessor directives reserve two chars for trailing " \" 912 return Style.ColumnLimit - (Line.InPPDirective ? 2 : 0); 913 } 914 915 /// \brief An edge in the solution space from \c Previous->State to \c State, 916 /// inserting a newline dependent on the \c NewLine. 917 struct StateNode { 918 StateNode(const LineState &State, bool NewLine, StateNode *Previous) 919 : State(State), NewLine(NewLine), Previous(Previous) {} 920 LineState State; 921 bool NewLine; 922 StateNode *Previous; 923 }; 924 925 /// \brief A pair of <penalty, count> that is used to prioritize the BFS on. 926 /// 927 /// In case of equal penalties, we want to prefer states that were inserted 928 /// first. During state generation we make sure that we insert states first 929 /// that break the line as late as possible. 930 typedef std::pair<unsigned, unsigned> OrderedPenalty; 931 932 /// \brief An item in the prioritized BFS search queue. The \c StateNode's 933 /// \c State has the given \c OrderedPenalty. 934 typedef std::pair<OrderedPenalty, StateNode *> QueueItem; 935 936 /// \brief The BFS queue type. 937 typedef std::priority_queue<QueueItem, std::vector<QueueItem>, 938 std::greater<QueueItem> > QueueType; 939 940 /// \brief Analyze the entire solution space starting from \p InitialState. 941 /// 942 /// This implements a variant of Dijkstra's algorithm on the graph that spans 943 /// the solution space (\c LineStates are the nodes). The algorithm tries to 944 /// find the shortest path (the one with lowest penalty) from \p InitialState 945 /// to a state where all tokens are placed. 946 void analyzeSolutionSpace(LineState &InitialState) { 947 std::set<LineState> Seen; 948 949 // Insert start element into queue. 950 StateNode *Node = 951 new (Allocator.Allocate()) StateNode(InitialState, false, NULL); 952 Queue.push(QueueItem(OrderedPenalty(0, Count), Node)); 953 ++Count; 954 955 // While not empty, take first element and follow edges. 956 while (!Queue.empty()) { 957 unsigned Penalty = Queue.top().first.first; 958 StateNode *Node = Queue.top().second; 959 if (Node->State.NextToken == NULL) { 960 DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n"); 961 break; 962 } 963 Queue.pop(); 964 965 // Cut off the analysis of certain solutions if the analysis gets too 966 // complex. See description of IgnoreStackForComparison. 967 if (Count > 10000) 968 Node->State.IgnoreStackForComparison = true; 969 970 if (!Seen.insert(Node->State).second) 971 // State already examined with lower penalty. 972 continue; 973 974 addNextStateToQueue(Penalty, Node, /*NewLine=*/false); 975 addNextStateToQueue(Penalty, Node, /*NewLine=*/true); 976 } 977 978 if (Queue.empty()) 979 // We were unable to find a solution, do nothing. 980 // FIXME: Add diagnostic? 981 return; 982 983 // Reconstruct the solution. 984 reconstructPath(InitialState, Queue.top().second); 985 DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n"); 986 DEBUG(llvm::dbgs() << "---\n"); 987 } 988 989 void reconstructPath(LineState &State, StateNode *Current) { 990 std::deque<StateNode *> Path; 991 // We do not need a break before the initial token. 992 while (Current->Previous) { 993 Path.push_front(Current); 994 Current = Current->Previous; 995 } 996 for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end(); 997 I != E; ++I) { 998 DEBUG({ 999 if ((*I)->NewLine) { 1000 llvm::dbgs() << "Penalty for splitting before " 1001 << (*I)->Previous->State.NextToken->Tok.getName() << ": " 1002 << (*I)->Previous->State.NextToken->SplitPenalty << "\n"; 1003 } 1004 }); 1005 addTokenToState((*I)->NewLine, false, State); 1006 } 1007 } 1008 1009 /// \brief Add the following state to the analysis queue \c Queue. 1010 /// 1011 /// Assume the current state is \p PreviousNode and has been reached with a 1012 /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true. 1013 void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode, 1014 bool NewLine) { 1015 if (NewLine && !canBreak(PreviousNode->State)) 1016 return; 1017 if (!NewLine && mustBreak(PreviousNode->State)) 1018 return; 1019 if (NewLine) { 1020 if (!PreviousNode->State.Stack.back().ContainsLineBreak) 1021 Penalty += 15; 1022 Penalty += PreviousNode->State.NextToken->SplitPenalty; 1023 } 1024 1025 StateNode *Node = new (Allocator.Allocate()) 1026 StateNode(PreviousNode->State, NewLine, PreviousNode); 1027 Penalty += addTokenToState(NewLine, true, Node->State); 1028 if (Node->State.Column > getColumnLimit()) { 1029 unsigned ExcessCharacters = Node->State.Column - getColumnLimit(); 1030 Penalty += Style.PenaltyExcessCharacter * ExcessCharacters; 1031 } 1032 1033 Queue.push(QueueItem(OrderedPenalty(Penalty, Count), Node)); 1034 ++Count; 1035 } 1036 1037 /// \brief Returns \c true, if a line break after \p State is allowed. 1038 bool canBreak(const LineState &State) { 1039 const FormatToken &Current = *State.NextToken; 1040 const FormatToken &Previous = *Current.Previous; 1041 assert(&Previous == Current.Previous); 1042 if (!Current.CanBreakBefore && 1043 !(Current.is(tok::r_brace) && 1044 State.Stack.back().BreakBeforeClosingBrace)) 1045 return false; 1046 // The opening "{" of a braced list has to be on the same line as the first 1047 // element if it is nested in another braced init list or function call. 1048 if (!Current.MustBreakBefore && Previous.is(tok::l_brace) && 1049 Previous.Previous && 1050 Previous.Previous->isOneOf(tok::l_brace, tok::l_paren, tok::comma)) 1051 return false; 1052 // This prevents breaks like: 1053 // ... 1054 // SomeParameter, OtherParameter).DoSomething( 1055 // ... 1056 // As they hide "DoSomething" and are generally bad for readability. 1057 if (Previous.opensScope() && 1058 State.LowestLevelOnLine < State.StartOfLineLevel) 1059 return false; 1060 return !State.Stack.back().NoLineBreak; 1061 } 1062 1063 /// \brief Returns \c true, if a line break after \p State is mandatory. 1064 bool mustBreak(const LineState &State) { 1065 const FormatToken &Current = *State.NextToken; 1066 const FormatToken &Previous = *Current.Previous; 1067 if (Current.MustBreakBefore || Current.Type == TT_InlineASMColon) 1068 return true; 1069 if (Current.is(tok::r_brace) && State.Stack.back().BreakBeforeClosingBrace) 1070 return true; 1071 if (Previous.is(tok::semi) && State.LineContainsContinuedForLoopSection) 1072 return true; 1073 if ((Previous.isOneOf(tok::comma, tok::semi) || Current.is(tok::question) || 1074 Current.Type == TT_ConditionalExpr) && 1075 State.Stack.back().BreakBeforeParameter && 1076 !Current.isTrailingComment() && 1077 !Current.isOneOf(tok::r_paren, tok::r_brace)) 1078 return true; 1079 1080 // If we need to break somewhere inside the LHS of a binary expression, we 1081 // should also break after the operator. Otherwise, the formatting would 1082 // hide the operator precedence, e.g. in: 1083 // if (aaaaaaaaaaaaaa == 1084 // bbbbbbbbbbbbbb && c) {.. 1085 // For comparisons, we only apply this rule, if the LHS is a binary 1086 // expression itself as otherwise, the line breaks seem superfluous. 1087 // We need special cases for ">>" which we have split into two ">" while 1088 // lexing in order to make template parsing easier. 1089 bool IsComparison = (Previous.getPrecedence() == prec::Relational || 1090 Previous.getPrecedence() == prec::Equality) && 1091 Previous.Previous && 1092 Previous.Previous->Type != TT_BinaryOperator; // For >>. 1093 bool LHSIsBinaryExpr = 1094 Previous.Previous && Previous.Previous->FakeRParens > 0; 1095 if (Previous.Type == TT_BinaryOperator && 1096 (!IsComparison || LHSIsBinaryExpr) && 1097 Current.Type != TT_BinaryOperator && // For >>. 1098 !Current.isTrailingComment() && 1099 !Previous.isOneOf(tok::lessless, tok::question) && 1100 Previous.getPrecedence() != prec::Assignment && 1101 State.Stack.back().BreakBeforeParameter) 1102 return true; 1103 1104 // FIXME: Comparing LongestObjCSelectorName to 0 is a hacky way of finding 1105 // out whether it is the first parameter. Clean this up. 1106 if (Current.Type == TT_ObjCSelectorName && 1107 Current.LongestObjCSelectorName == 0 && 1108 State.Stack.back().BreakBeforeParameter) 1109 return true; 1110 if ((Current.Type == TT_CtorInitializerColon || 1111 (Previous.ClosesTemplateDeclaration && State.ParenLevel == 0))) 1112 return true; 1113 1114 if (Current.Type == TT_StartOfName && Line.MightBeFunctionDecl && 1115 State.Stack.back().BreakBeforeParameter && State.ParenLevel == 0) 1116 return true; 1117 return false; 1118 } 1119 1120 // Returns the total number of columns required for the remaining tokens. 1121 unsigned getRemainingLength(const LineState &State) { 1122 if (State.NextToken && State.NextToken->Previous) 1123 return Line.Last->TotalLength - State.NextToken->Previous->TotalLength; 1124 return 0; 1125 } 1126 1127 FormatStyle Style; 1128 SourceManager &SourceMgr; 1129 const AnnotatedLine &Line; 1130 const unsigned FirstIndent; 1131 const FormatToken *RootToken; 1132 WhitespaceManager &Whitespaces; 1133 1134 llvm::SpecificBumpPtrAllocator<StateNode> Allocator; 1135 QueueType Queue; 1136 // Increasing count of \c StateNode items we have created. This is used 1137 // to create a deterministic order independent of the container. 1138 unsigned Count; 1139 encoding::Encoding Encoding; 1140 }; 1141 1142 class FormatTokenLexer { 1143 public: 1144 FormatTokenLexer(Lexer &Lex, SourceManager &SourceMgr, 1145 encoding::Encoding Encoding) 1146 : FormatTok(NULL), GreaterStashed(false), TrailingWhitespace(0), Lex(Lex), 1147 SourceMgr(SourceMgr), IdentTable(Lex.getLangOpts()), 1148 Encoding(Encoding) { 1149 Lex.SetKeepWhitespaceMode(true); 1150 } 1151 1152 ArrayRef<FormatToken *> lex() { 1153 assert(Tokens.empty()); 1154 do { 1155 Tokens.push_back(getNextToken()); 1156 } while (Tokens.back()->Tok.isNot(tok::eof)); 1157 return Tokens; 1158 } 1159 1160 IdentifierTable &getIdentTable() { return IdentTable; } 1161 1162 private: 1163 FormatToken *getNextToken() { 1164 if (GreaterStashed) { 1165 // Create a synthesized second '>' token. 1166 Token Greater = FormatTok->Tok; 1167 FormatTok = new (Allocator.Allocate()) FormatToken; 1168 FormatTok->Tok = Greater; 1169 SourceLocation GreaterLocation = 1170 FormatTok->Tok.getLocation().getLocWithOffset(1); 1171 FormatTok->WhitespaceRange = 1172 SourceRange(GreaterLocation, GreaterLocation); 1173 FormatTok->TokenText = ">"; 1174 FormatTok->CodePointCount = 1; 1175 GreaterStashed = false; 1176 return FormatTok; 1177 } 1178 1179 FormatTok = new (Allocator.Allocate()) FormatToken; 1180 Lex.LexFromRawLexer(FormatTok->Tok); 1181 StringRef Text = rawTokenText(FormatTok->Tok); 1182 SourceLocation WhitespaceStart = 1183 FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace); 1184 if (SourceMgr.getFileOffset(WhitespaceStart) == 0) 1185 FormatTok->IsFirst = true; 1186 1187 // Consume and record whitespace until we find a significant token. 1188 unsigned WhitespaceLength = TrailingWhitespace; 1189 while (FormatTok->Tok.is(tok::unknown)) { 1190 unsigned Newlines = Text.count('\n'); 1191 if (Newlines > 0) 1192 FormatTok->LastNewlineOffset = WhitespaceLength + Text.rfind('\n') + 1; 1193 FormatTok->NewlinesBefore += Newlines; 1194 unsigned EscapedNewlines = Text.count("\\\n"); 1195 FormatTok->HasUnescapedNewline |= EscapedNewlines != Newlines; 1196 WhitespaceLength += FormatTok->Tok.getLength(); 1197 1198 Lex.LexFromRawLexer(FormatTok->Tok); 1199 Text = rawTokenText(FormatTok->Tok); 1200 } 1201 1202 // In case the token starts with escaped newlines, we want to 1203 // take them into account as whitespace - this pattern is quite frequent 1204 // in macro definitions. 1205 // FIXME: What do we want to do with other escaped spaces, and escaped 1206 // spaces or newlines in the middle of tokens? 1207 // FIXME: Add a more explicit test. 1208 while (Text.size() > 1 && Text[0] == '\\' && Text[1] == '\n') { 1209 // FIXME: ++FormatTok->NewlinesBefore is missing... 1210 WhitespaceLength += 2; 1211 Text = Text.substr(2); 1212 } 1213 1214 TrailingWhitespace = 0; 1215 if (FormatTok->Tok.is(tok::comment)) { 1216 StringRef UntrimmedText = Text; 1217 Text = Text.rtrim(); 1218 TrailingWhitespace = UntrimmedText.size() - Text.size(); 1219 } else if (FormatTok->Tok.is(tok::raw_identifier)) { 1220 IdentifierInfo &Info = IdentTable.get(Text); 1221 FormatTok->Tok.setIdentifierInfo(&Info); 1222 FormatTok->Tok.setKind(Info.getTokenID()); 1223 } else if (FormatTok->Tok.is(tok::greatergreater)) { 1224 FormatTok->Tok.setKind(tok::greater); 1225 Text = Text.substr(0, 1); 1226 GreaterStashed = true; 1227 } 1228 1229 // Now FormatTok is the next non-whitespace token. 1230 FormatTok->TokenText = Text; 1231 FormatTok->CodePointCount = encoding::getCodePointCount(Text, Encoding); 1232 1233 FormatTok->WhitespaceRange = SourceRange( 1234 WhitespaceStart, WhitespaceStart.getLocWithOffset(WhitespaceLength)); 1235 return FormatTok; 1236 } 1237 1238 FormatToken *FormatTok; 1239 bool GreaterStashed; 1240 unsigned TrailingWhitespace; 1241 Lexer &Lex; 1242 SourceManager &SourceMgr; 1243 IdentifierTable IdentTable; 1244 encoding::Encoding Encoding; 1245 llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; 1246 SmallVector<FormatToken *, 16> Tokens; 1247 1248 /// Returns the text of \c FormatTok. 1249 StringRef rawTokenText(Token &Tok) { 1250 return StringRef(SourceMgr.getCharacterData(Tok.getLocation()), 1251 Tok.getLength()); 1252 } 1253 }; 1254 1255 class Formatter : public UnwrappedLineConsumer { 1256 public: 1257 Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr, 1258 const std::vector<CharSourceRange> &Ranges) 1259 : Style(Style), Lex(Lex), SourceMgr(SourceMgr), 1260 Whitespaces(SourceMgr, Style), Ranges(Ranges), 1261 Encoding(encoding::detectEncoding(Lex.getBuffer())) { 1262 DEBUG(llvm::dbgs() 1263 << "File encoding: " 1264 << (Encoding == encoding::Encoding_UTF8 ? "UTF8" : "unknown") 1265 << "\n"); 1266 } 1267 1268 virtual ~Formatter() {} 1269 1270 tooling::Replacements format() { 1271 FormatTokenLexer Tokens(Lex, SourceMgr, Encoding); 1272 1273 UnwrappedLineParser Parser(Style, Tokens.lex(), *this); 1274 bool StructuralError = Parser.parse(); 1275 TokenAnnotator Annotator(Style, Tokens.getIdentTable().get("in")); 1276 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1277 Annotator.annotate(AnnotatedLines[i]); 1278 } 1279 deriveLocalStyle(); 1280 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1281 Annotator.calculateFormattingInformation(AnnotatedLines[i]); 1282 } 1283 1284 // Adapt level to the next line if this is a comment. 1285 // FIXME: Can/should this be done in the UnwrappedLineParser? 1286 const AnnotatedLine *NextNonCommentLine = NULL; 1287 for (unsigned i = AnnotatedLines.size() - 1; i > 0; --i) { 1288 if (NextNonCommentLine && AnnotatedLines[i].First->is(tok::comment) && 1289 !AnnotatedLines[i].First->Next) 1290 AnnotatedLines[i].Level = NextNonCommentLine->Level; 1291 else 1292 NextNonCommentLine = AnnotatedLines[i].First->isNot(tok::r_brace) 1293 ? &AnnotatedLines[i] 1294 : NULL; 1295 } 1296 1297 std::vector<int> IndentForLevel; 1298 bool PreviousLineWasTouched = false; 1299 const FormatToken *PreviousLineLastToken = 0; 1300 bool FormatPPDirective = false; 1301 for (std::vector<AnnotatedLine>::iterator I = AnnotatedLines.begin(), 1302 E = AnnotatedLines.end(); 1303 I != E; ++I) { 1304 const AnnotatedLine &TheLine = *I; 1305 const FormatToken *FirstTok = TheLine.First; 1306 int Offset = getIndentOffset(*TheLine.First); 1307 1308 // Check whether this line is part of a formatted preprocessor directive. 1309 if (FirstTok->HasUnescapedNewline) 1310 FormatPPDirective = false; 1311 if (!FormatPPDirective && TheLine.InPPDirective && 1312 (touchesLine(TheLine) || touchesPPDirective(I + 1, E))) 1313 FormatPPDirective = true; 1314 1315 // Determine indent and try to merge multiple unwrapped lines. 1316 while (IndentForLevel.size() <= TheLine.Level) 1317 IndentForLevel.push_back(-1); 1318 IndentForLevel.resize(TheLine.Level + 1); 1319 unsigned Indent = getIndent(IndentForLevel, TheLine.Level); 1320 if (static_cast<int>(Indent) + Offset >= 0) 1321 Indent += Offset; 1322 tryFitMultipleLinesInOne(Indent, I, E); 1323 1324 bool WasMoved = PreviousLineWasTouched && FirstTok->NewlinesBefore == 0; 1325 if (TheLine.First->is(tok::eof)) { 1326 if (PreviousLineWasTouched) { 1327 unsigned NewLines = std::min(FirstTok->NewlinesBefore, 1u); 1328 Whitespaces.replaceWhitespace(*TheLine.First, NewLines, /*Indent*/ 0, 1329 /*TargetColumn*/ 0); 1330 } 1331 } else if (TheLine.Type != LT_Invalid && 1332 (WasMoved || FormatPPDirective || touchesLine(TheLine))) { 1333 unsigned LevelIndent = getIndent(IndentForLevel, TheLine.Level); 1334 if (FirstTok->WhitespaceRange.isValid() && 1335 // Insert a break even if there is a structural error in case where 1336 // we break apart a line consisting of multiple unwrapped lines. 1337 (FirstTok->NewlinesBefore == 0 || !StructuralError)) { 1338 formatFirstToken(*TheLine.First, PreviousLineLastToken, Indent, 1339 TheLine.InPPDirective); 1340 } else { 1341 Indent = LevelIndent = 1342 SourceMgr.getSpellingColumnNumber(FirstTok->Tok.getLocation()) - 1343 1; 1344 } 1345 UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine, Indent, 1346 TheLine.First, Whitespaces, Encoding); 1347 Formatter.format(I + 1 != E ? &*(I + 1) : NULL); 1348 IndentForLevel[TheLine.Level] = LevelIndent; 1349 PreviousLineWasTouched = true; 1350 } else { 1351 // Format the first token if necessary, and notify the WhitespaceManager 1352 // about the unchanged whitespace. 1353 for (const FormatToken *Tok = TheLine.First; Tok != NULL; 1354 Tok = Tok->Next) { 1355 if (Tok == TheLine.First && 1356 (Tok->NewlinesBefore > 0 || Tok->IsFirst)) { 1357 unsigned LevelIndent = 1358 SourceMgr.getSpellingColumnNumber(Tok->Tok.getLocation()) - 1; 1359 // Remove trailing whitespace of the previous line if it was 1360 // touched. 1361 if (PreviousLineWasTouched || touchesEmptyLineBefore(TheLine)) { 1362 formatFirstToken(*Tok, PreviousLineLastToken, LevelIndent, 1363 TheLine.InPPDirective); 1364 } else { 1365 Whitespaces.addUntouchableToken(*Tok, TheLine.InPPDirective); 1366 } 1367 1368 if (static_cast<int>(LevelIndent) - Offset >= 0) 1369 LevelIndent -= Offset; 1370 if (Tok->isNot(tok::comment)) 1371 IndentForLevel[TheLine.Level] = LevelIndent; 1372 } else { 1373 Whitespaces.addUntouchableToken(*Tok, TheLine.InPPDirective); 1374 } 1375 } 1376 // If we did not reformat this unwrapped line, the column at the end of 1377 // the last token is unchanged - thus, we can calculate the end of the 1378 // last token. 1379 PreviousLineWasTouched = false; 1380 } 1381 PreviousLineLastToken = I->Last; 1382 } 1383 return Whitespaces.generateReplacements(); 1384 } 1385 1386 private: 1387 void deriveLocalStyle() { 1388 unsigned CountBoundToVariable = 0; 1389 unsigned CountBoundToType = 0; 1390 bool HasCpp03IncompatibleFormat = false; 1391 for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) { 1392 if (!AnnotatedLines[i].First->Next) 1393 continue; 1394 FormatToken *Tok = AnnotatedLines[i].First->Next; 1395 while (Tok->Next) { 1396 if (Tok->Type == TT_PointerOrReference) { 1397 bool SpacesBefore = 1398 Tok->WhitespaceRange.getBegin() != Tok->WhitespaceRange.getEnd(); 1399 bool SpacesAfter = Tok->Next->WhitespaceRange.getBegin() != 1400 Tok->Next->WhitespaceRange.getEnd(); 1401 if (SpacesBefore && !SpacesAfter) 1402 ++CountBoundToVariable; 1403 else if (!SpacesBefore && SpacesAfter) 1404 ++CountBoundToType; 1405 } 1406 1407 if (Tok->Type == TT_TemplateCloser && 1408 Tok->Previous->Type == TT_TemplateCloser && 1409 Tok->WhitespaceRange.getBegin() == Tok->WhitespaceRange.getEnd()) 1410 HasCpp03IncompatibleFormat = true; 1411 Tok = Tok->Next; 1412 } 1413 } 1414 if (Style.DerivePointerBinding) { 1415 if (CountBoundToType > CountBoundToVariable) 1416 Style.PointerBindsToType = true; 1417 else if (CountBoundToType < CountBoundToVariable) 1418 Style.PointerBindsToType = false; 1419 } 1420 if (Style.Standard == FormatStyle::LS_Auto) { 1421 Style.Standard = HasCpp03IncompatibleFormat ? FormatStyle::LS_Cpp11 1422 : FormatStyle::LS_Cpp03; 1423 } 1424 } 1425 1426 /// \brief Get the indent of \p Level from \p IndentForLevel. 1427 /// 1428 /// \p IndentForLevel must contain the indent for the level \c l 1429 /// at \p IndentForLevel[l], or a value < 0 if the indent for 1430 /// that level is unknown. 1431 unsigned getIndent(const std::vector<int> IndentForLevel, unsigned Level) { 1432 if (IndentForLevel[Level] != -1) 1433 return IndentForLevel[Level]; 1434 if (Level == 0) 1435 return 0; 1436 return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth; 1437 } 1438 1439 /// \brief Get the offset of the line relatively to the level. 1440 /// 1441 /// For example, 'public:' labels in classes are offset by 1 or 2 1442 /// characters to the left from their level. 1443 int getIndentOffset(const FormatToken &RootToken) { 1444 if (RootToken.isAccessSpecifier(false) || RootToken.isObjCAccessSpecifier()) 1445 return Style.AccessModifierOffset; 1446 return 0; 1447 } 1448 1449 /// \brief Tries to merge lines into one. 1450 /// 1451 /// This will change \c Line and \c AnnotatedLine to contain the merged line, 1452 /// if possible; note that \c I will be incremented when lines are merged. 1453 void tryFitMultipleLinesInOne(unsigned Indent, 1454 std::vector<AnnotatedLine>::iterator &I, 1455 std::vector<AnnotatedLine>::iterator E) { 1456 // We can never merge stuff if there are trailing line comments. 1457 if (I->Last->Type == TT_LineComment) 1458 return; 1459 1460 unsigned Limit = Style.ColumnLimit - Indent; 1461 // If we already exceed the column limit, we set 'Limit' to 0. The different 1462 // tryMerge..() functions can then decide whether to still do merging. 1463 Limit = I->Last->TotalLength > Limit ? 0 : Limit - I->Last->TotalLength; 1464 1465 if (I + 1 == E || (I + 1)->Type == LT_Invalid) 1466 return; 1467 1468 if (I->Last->is(tok::l_brace)) { 1469 tryMergeSimpleBlock(I, E, Limit); 1470 } else if (Style.AllowShortIfStatementsOnASingleLine && 1471 I->First->is(tok::kw_if)) { 1472 tryMergeSimpleControlStatement(I, E, Limit); 1473 } else if (Style.AllowShortLoopsOnASingleLine && 1474 I->First->isOneOf(tok::kw_for, tok::kw_while)) { 1475 tryMergeSimpleControlStatement(I, E, Limit); 1476 } else if (I->InPPDirective && 1477 (I->First->HasUnescapedNewline || I->First->IsFirst)) { 1478 tryMergeSimplePPDirective(I, E, Limit); 1479 } 1480 } 1481 1482 void tryMergeSimplePPDirective(std::vector<AnnotatedLine>::iterator &I, 1483 std::vector<AnnotatedLine>::iterator E, 1484 unsigned Limit) { 1485 if (Limit == 0) 1486 return; 1487 AnnotatedLine &Line = *I; 1488 if (!(I + 1)->InPPDirective || (I + 1)->First->HasUnescapedNewline) 1489 return; 1490 if (I + 2 != E && (I + 2)->InPPDirective && 1491 !(I + 2)->First->HasUnescapedNewline) 1492 return; 1493 if (1 + (I + 1)->Last->TotalLength > Limit) 1494 return; 1495 join(Line, *(++I)); 1496 } 1497 1498 void tryMergeSimpleControlStatement(std::vector<AnnotatedLine>::iterator &I, 1499 std::vector<AnnotatedLine>::iterator E, 1500 unsigned Limit) { 1501 if (Limit == 0) 1502 return; 1503 if ((I + 1)->InPPDirective != I->InPPDirective || 1504 ((I + 1)->InPPDirective && (I + 1)->First->HasUnescapedNewline)) 1505 return; 1506 AnnotatedLine &Line = *I; 1507 if (Line.Last->isNot(tok::r_paren)) 1508 return; 1509 if (1 + (I + 1)->Last->TotalLength > Limit) 1510 return; 1511 if ((I + 1)->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, 1512 tok::kw_while) || 1513 (I + 1)->First->Type == TT_LineComment) 1514 return; 1515 // Only inline simple if's (no nested if or else). 1516 if (I + 2 != E && Line.First->is(tok::kw_if) && 1517 (I + 2)->First->is(tok::kw_else)) 1518 return; 1519 join(Line, *(++I)); 1520 } 1521 1522 void tryMergeSimpleBlock(std::vector<AnnotatedLine>::iterator &I, 1523 std::vector<AnnotatedLine>::iterator E, 1524 unsigned Limit) { 1525 // No merging if the brace already is on the next line. 1526 if (Style.BreakBeforeBraces != FormatStyle::BS_Attach) 1527 return; 1528 1529 // First, check that the current line allows merging. This is the case if 1530 // we're not in a control flow statement and the last token is an opening 1531 // brace. 1532 AnnotatedLine &Line = *I; 1533 if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::r_brace, 1534 tok::kw_else, tok::kw_try, tok::kw_catch, 1535 tok::kw_for, 1536 // This gets rid of all ObjC @ keywords and methods. 1537 tok::at, tok::minus, tok::plus)) 1538 return; 1539 1540 FormatToken *Tok = (I + 1)->First; 1541 if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore && 1542 (Tok->getNextNonComment() == NULL || 1543 Tok->getNextNonComment()->is(tok::semi))) { 1544 // We merge empty blocks even if the line exceeds the column limit. 1545 Tok->SpacesRequiredBefore = 0; 1546 Tok->CanBreakBefore = true; 1547 join(Line, *(I + 1)); 1548 I += 1; 1549 } else if (Limit != 0 && Line.First->isNot(tok::kw_namespace)) { 1550 // Check that we still have three lines and they fit into the limit. 1551 if (I + 2 == E || (I + 2)->Type == LT_Invalid || 1552 !nextTwoLinesFitInto(I, Limit)) 1553 return; 1554 1555 // Second, check that the next line does not contain any braces - if it 1556 // does, readability declines when putting it into a single line. 1557 if ((I + 1)->Last->Type == TT_LineComment || Tok->MustBreakBefore) 1558 return; 1559 do { 1560 if (Tok->isOneOf(tok::l_brace, tok::r_brace)) 1561 return; 1562 Tok = Tok->Next; 1563 } while (Tok != NULL); 1564 1565 // Last, check that the third line contains a single closing brace. 1566 Tok = (I + 2)->First; 1567 if (Tok->getNextNonComment() != NULL || Tok->isNot(tok::r_brace) || 1568 Tok->MustBreakBefore) 1569 return; 1570 1571 join(Line, *(I + 1)); 1572 join(Line, *(I + 2)); 1573 I += 2; 1574 } 1575 } 1576 1577 bool nextTwoLinesFitInto(std::vector<AnnotatedLine>::iterator I, 1578 unsigned Limit) { 1579 return 1 + (I + 1)->Last->TotalLength + 1 + (I + 2)->Last->TotalLength <= 1580 Limit; 1581 } 1582 1583 void join(AnnotatedLine &A, const AnnotatedLine &B) { 1584 assert(!A.Last->Next); 1585 assert(!B.First->Previous); 1586 A.Last->Next = B.First; 1587 B.First->Previous = A.Last; 1588 unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore; 1589 for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) { 1590 Tok->TotalLength += LengthA; 1591 A.Last = Tok; 1592 } 1593 } 1594 1595 bool touchesRanges(const CharSourceRange &Range) { 1596 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { 1597 if (!SourceMgr.isBeforeInTranslationUnit(Range.getEnd(), 1598 Ranges[i].getBegin()) && 1599 !SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(), 1600 Range.getBegin())) 1601 return true; 1602 } 1603 return false; 1604 } 1605 1606 bool touchesLine(const AnnotatedLine &TheLine) { 1607 const FormatToken *First = TheLine.First; 1608 const FormatToken *Last = TheLine.Last; 1609 CharSourceRange LineRange = CharSourceRange::getCharRange( 1610 First->WhitespaceRange.getBegin().getLocWithOffset( 1611 First->LastNewlineOffset), 1612 Last->Tok.getLocation().getLocWithOffset(Last->TokenText.size() - 1)); 1613 return touchesRanges(LineRange); 1614 } 1615 1616 bool touchesPPDirective(std::vector<AnnotatedLine>::iterator I, 1617 std::vector<AnnotatedLine>::iterator E) { 1618 for (; I != E; ++I) { 1619 if (I->First->HasUnescapedNewline) 1620 return false; 1621 if (touchesLine(*I)) 1622 return true; 1623 } 1624 return false; 1625 } 1626 1627 bool touchesEmptyLineBefore(const AnnotatedLine &TheLine) { 1628 const FormatToken *First = TheLine.First; 1629 CharSourceRange LineRange = CharSourceRange::getCharRange( 1630 First->WhitespaceRange.getBegin(), 1631 First->WhitespaceRange.getBegin().getLocWithOffset( 1632 First->LastNewlineOffset)); 1633 return touchesRanges(LineRange); 1634 } 1635 1636 virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) { 1637 AnnotatedLines.push_back(AnnotatedLine(TheLine)); 1638 } 1639 1640 /// \brief Add a new line and the required indent before the first Token 1641 /// of the \c UnwrappedLine if there was no structural parsing error. 1642 /// Returns the indent level of the \c UnwrappedLine. 1643 void formatFirstToken(const FormatToken &RootToken, 1644 const FormatToken *PreviousToken, unsigned Indent, 1645 bool InPPDirective) { 1646 unsigned Newlines = 1647 std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1); 1648 // Remove empty lines before "}" where applicable. 1649 if (RootToken.is(tok::r_brace) && 1650 (!RootToken.Next || 1651 (RootToken.Next->is(tok::semi) && !RootToken.Next->Next))) 1652 Newlines = std::min(Newlines, 1u); 1653 if (Newlines == 0 && !RootToken.IsFirst) 1654 Newlines = 1; 1655 1656 // Insert extra new line before access specifiers. 1657 if (PreviousToken && PreviousToken->isOneOf(tok::semi, tok::r_brace) && 1658 RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1) 1659 ++Newlines; 1660 1661 Whitespaces.replaceWhitespace( 1662 RootToken, Newlines, Indent, Indent, 1663 InPPDirective && !RootToken.HasUnescapedNewline); 1664 } 1665 1666 FormatStyle Style; 1667 Lexer &Lex; 1668 SourceManager &SourceMgr; 1669 WhitespaceManager Whitespaces; 1670 std::vector<CharSourceRange> Ranges; 1671 std::vector<AnnotatedLine> AnnotatedLines; 1672 1673 encoding::Encoding Encoding; 1674 }; 1675 1676 } // end anonymous namespace 1677 1678 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex, 1679 SourceManager &SourceMgr, 1680 std::vector<CharSourceRange> Ranges) { 1681 Formatter formatter(Style, Lex, SourceMgr, Ranges); 1682 return formatter.format(); 1683 } 1684 1685 tooling::Replacements reformat(const FormatStyle &Style, StringRef Code, 1686 std::vector<tooling::Range> Ranges, 1687 StringRef FileName) { 1688 FileManager Files((FileSystemOptions())); 1689 DiagnosticsEngine Diagnostics( 1690 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), 1691 new DiagnosticOptions); 1692 SourceManager SourceMgr(Diagnostics, Files); 1693 llvm::MemoryBuffer *Buf = llvm::MemoryBuffer::getMemBuffer(Code, FileName); 1694 const clang::FileEntry *Entry = 1695 Files.getVirtualFile(FileName, Buf->getBufferSize(), 0); 1696 SourceMgr.overrideFileContents(Entry, Buf); 1697 FileID ID = 1698 SourceMgr.createFileID(Entry, SourceLocation(), clang::SrcMgr::C_User); 1699 Lexer Lex(ID, SourceMgr.getBuffer(ID), SourceMgr, 1700 getFormattingLangOpts(Style.Standard)); 1701 SourceLocation StartOfFile = SourceMgr.getLocForStartOfFile(ID); 1702 std::vector<CharSourceRange> CharRanges; 1703 for (unsigned i = 0, e = Ranges.size(); i != e; ++i) { 1704 SourceLocation Start = StartOfFile.getLocWithOffset(Ranges[i].getOffset()); 1705 SourceLocation End = Start.getLocWithOffset(Ranges[i].getLength()); 1706 CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); 1707 } 1708 return reformat(Style, Lex, SourceMgr, CharRanges); 1709 } 1710 1711 LangOptions getFormattingLangOpts(FormatStyle::LanguageStandard Standard) { 1712 LangOptions LangOpts; 1713 LangOpts.CPlusPlus = 1; 1714 LangOpts.CPlusPlus11 = Standard == FormatStyle::LS_Cpp03 ? 0 : 1; 1715 LangOpts.LineComment = 1; 1716 LangOpts.Bool = 1; 1717 LangOpts.ObjC1 = 1; 1718 LangOpts.ObjC2 = 1; 1719 return LangOpts; 1720 } 1721 1722 } // namespace format 1723 } // namespace clang 1724