1 //===--- ContinuationIndenter.h - Format C++ code ---------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// This file implements an indenter that manages the indentation of 12 /// continuations. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H 17 #define LLVM_CLANG_LIB_FORMAT_CONTINUATIONINDENTER_H 18 19 #include "Encoding.h" 20 #include "FormatToken.h" 21 #include "clang/Format/Format.h" 22 #include "llvm/Support/Regex.h" 23 #include <map> 24 #include <tuple> 25 26 namespace clang { 27 class SourceManager; 28 29 namespace format { 30 31 class AnnotatedLine; 32 class BreakableToken; 33 struct FormatToken; 34 struct LineState; 35 struct ParenState; 36 struct RawStringFormatStyleManager; 37 class WhitespaceManager; 38 39 struct RawStringFormatStyleManager { 40 llvm::StringMap<FormatStyle> DelimiterStyle; 41 llvm::StringMap<FormatStyle> EnclosingFunctionStyle; 42 43 RawStringFormatStyleManager(const FormatStyle &CodeStyle); 44 45 llvm::Optional<FormatStyle> getDelimiterStyle(StringRef Delimiter) const; 46 47 llvm::Optional<FormatStyle> 48 getEnclosingFunctionStyle(StringRef EnclosingFunction) const; 49 }; 50 51 class ContinuationIndenter { 52 public: 53 /// Constructs a \c ContinuationIndenter to format \p Line starting in 54 /// column \p FirstIndent. 55 ContinuationIndenter(const FormatStyle &Style, 56 const AdditionalKeywords &Keywords, 57 const SourceManager &SourceMgr, 58 WhitespaceManager &Whitespaces, 59 encoding::Encoding Encoding, 60 bool BinPackInconclusiveFunctions); 61 62 /// Get the initial state, i.e. the state after placing \p Line's 63 /// first token at \p FirstIndent. When reformatting a fragment of code, as in 64 /// the case of formatting inside raw string literals, \p FirstStartColumn is 65 /// the column at which the state of the parent formatter is. 66 LineState getInitialState(unsigned FirstIndent, unsigned FirstStartColumn, 67 const AnnotatedLine *Line, bool DryRun); 68 69 // FIXME: canBreak and mustBreak aren't strictly indentation-related. Find a 70 // better home. 71 /// Returns \c true, if a line break after \p State is allowed. 72 bool canBreak(const LineState &State); 73 74 /// Returns \c true, if a line break after \p State is mandatory. 75 bool mustBreak(const LineState &State); 76 77 /// Appends the next token to \p State and updates information 78 /// necessary for indentation. 79 /// 80 /// Puts the token on the current line if \p Newline is \c false and adds a 81 /// line break and necessary indentation otherwise. 82 /// 83 /// If \p DryRun is \c false, also creates and stores the required 84 /// \c Replacement. 85 unsigned addTokenToState(LineState &State, bool Newline, bool DryRun, 86 unsigned ExtraSpaces = 0); 87 88 /// Get the column limit for this line. This is the style's column 89 /// limit, potentially reduced for preprocessor definitions. 90 unsigned getColumnLimit(const LineState &State) const; 91 92 private: 93 /// Mark the next token as consumed in \p State and modify its stacks 94 /// accordingly. 95 unsigned moveStateToNextToken(LineState &State, bool DryRun, bool Newline); 96 97 /// Update 'State' according to the next token's fake left parentheses. 98 void moveStatePastFakeLParens(LineState &State, bool Newline); 99 /// Update 'State' according to the next token's fake r_parens. 100 void moveStatePastFakeRParens(LineState &State); 101 102 /// Update 'State' according to the next token being one of "(<{[". 103 void moveStatePastScopeOpener(LineState &State, bool Newline); 104 /// Update 'State' according to the next token being one of ")>}]". 105 void moveStatePastScopeCloser(LineState &State); 106 /// Update 'State' with the next token opening a nested block. 107 void moveStateToNewBlock(LineState &State); 108 109 /// Reformats a raw string literal. 110 /// 111 /// \returns An extra penalty induced by reformatting the token. 112 unsigned reformatRawStringLiteral(const FormatToken &Current, 113 LineState &State, 114 const FormatStyle &RawStringStyle, 115 bool DryRun); 116 117 /// If the current token is at the end of the current line, handle 118 /// the transition to the next line. 119 unsigned handleEndOfLine(const FormatToken &Current, LineState &State, 120 bool DryRun, bool AllowBreak); 121 122 /// If \p Current is a raw string that is configured to be reformatted, 123 /// return the style to be used. 124 llvm::Optional<FormatStyle> getRawStringStyle(const FormatToken &Current, 125 const LineState &State); 126 127 /// If the current token sticks out over the end of the line, break 128 /// it if possible. 129 /// 130 /// \returns A pair (penalty, exceeded), where penalty is the extra penalty 131 /// when tokens are broken or lines exceed the column limit, and exceeded 132 /// indicates whether the algorithm purposefully left lines exceeding the 133 /// column limit. 134 /// 135 /// The returned penalty will cover the cost of the additional line breaks 136 /// and column limit violation in all lines except for the last one. The 137 /// penalty for the column limit violation in the last line (and in single 138 /// line tokens) is handled in \c addNextStateToQueue. 139 /// 140 /// \p Strict indicates whether reflowing is allowed to leave characters 141 /// protruding the column limit; if true, lines will be split strictly within 142 /// the column limit where possible; if false, words are allowed to protrude 143 /// over the column limit as long as the penalty is less than the penalty 144 /// of a break. 145 std::pair<unsigned, bool> breakProtrudingToken(const FormatToken &Current, 146 LineState &State, 147 bool AllowBreak, bool DryRun, 148 bool Strict); 149 150 /// Returns the \c BreakableToken starting at \p Current, or nullptr 151 /// if the current token cannot be broken. 152 std::unique_ptr<BreakableToken> 153 createBreakableToken(const FormatToken &Current, LineState &State, 154 bool AllowBreak); 155 156 /// Appends the next token to \p State and updates information 157 /// necessary for indentation. 158 /// 159 /// Puts the token on the current line. 160 /// 161 /// If \p DryRun is \c false, also creates and stores the required 162 /// \c Replacement. 163 void addTokenOnCurrentLine(LineState &State, bool DryRun, 164 unsigned ExtraSpaces); 165 166 /// Appends the next token to \p State and updates information 167 /// necessary for indentation. 168 /// 169 /// Adds a line break and necessary indentation. 170 /// 171 /// If \p DryRun is \c false, also creates and stores the required 172 /// \c Replacement. 173 unsigned addTokenOnNewLine(LineState &State, bool DryRun); 174 175 /// Calculate the new column for a line wrap before the next token. 176 unsigned getNewLineColumn(const LineState &State); 177 178 /// Adds a multiline token to the \p State. 179 /// 180 /// \returns Extra penalty for the first line of the literal: last line is 181 /// handled in \c addNextStateToQueue, and the penalty for other lines doesn't 182 /// matter, as we don't change them. 183 unsigned addMultilineToken(const FormatToken &Current, LineState &State); 184 185 /// Returns \c true if the next token starts a multiline string 186 /// literal. 187 /// 188 /// This includes implicitly concatenated strings, strings that will be broken 189 /// by clang-format and string literals with escaped newlines. 190 bool nextIsMultilineString(const LineState &State); 191 192 FormatStyle Style; 193 const AdditionalKeywords &Keywords; 194 const SourceManager &SourceMgr; 195 WhitespaceManager &Whitespaces; 196 encoding::Encoding Encoding; 197 bool BinPackInconclusiveFunctions; 198 llvm::Regex CommentPragmasRegex; 199 const RawStringFormatStyleManager RawStringFormats; 200 }; 201 202 struct ParenState { ParenStateParenState203 ParenState(const FormatToken *Tok, unsigned Indent, unsigned LastSpace, 204 bool AvoidBinPacking, bool NoLineBreak) 205 : Tok(Tok), Indent(Indent), LastSpace(LastSpace), 206 NestedBlockIndent(Indent), BreakBeforeClosingBrace(false), 207 AvoidBinPacking(AvoidBinPacking), BreakBeforeParameter(false), 208 NoLineBreak(NoLineBreak), NoLineBreakInOperand(false), 209 LastOperatorWrapped(true), ContainsLineBreak(false), 210 ContainsUnwrappedBuilder(false), AlignColons(true), 211 ObjCSelectorNameFound(false), HasMultipleNestedBlocks(false), 212 NestedBlockInlined(false), IsInsideObjCArrayLiteral(false) {} 213 214 /// \brief The token opening this parenthesis level, or nullptr if this level 215 /// is opened by fake parenthesis. 216 /// 217 /// Not considered for memoization as it will always have the same value at 218 /// the same token. 219 const FormatToken *Tok; 220 221 /// The position to which a specific parenthesis level needs to be 222 /// indented. 223 unsigned Indent; 224 225 /// The position of the last space on each level. 226 /// 227 /// Used e.g. to break like: 228 /// functionCall(Parameter, otherCall( 229 /// OtherParameter)); 230 unsigned LastSpace; 231 232 /// If a block relative to this parenthesis level gets wrapped, indent 233 /// it this much. 234 unsigned NestedBlockIndent; 235 236 /// The position the first "<<" operator encountered on each level. 237 /// 238 /// Used to align "<<" operators. 0 if no such operator has been encountered 239 /// on a level. 240 unsigned FirstLessLess = 0; 241 242 /// The column of a \c ? in a conditional expression; 243 unsigned QuestionColumn = 0; 244 245 /// The position of the colon in an ObjC method declaration/call. 246 unsigned ColonPos = 0; 247 248 /// The start of the most recent function in a builder-type call. 249 unsigned StartOfFunctionCall = 0; 250 251 /// Contains the start of array subscript expressions, so that they 252 /// can be aligned. 253 unsigned StartOfArraySubscripts = 0; 254 255 /// If a nested name specifier was broken over multiple lines, this 256 /// contains the start column of the second line. Otherwise 0. 257 unsigned NestedNameSpecifierContinuation = 0; 258 259 /// If a call expression was broken over multiple lines, this 260 /// contains the start column of the second line. Otherwise 0. 261 unsigned CallContinuation = 0; 262 263 /// The column of the first variable name in a variable declaration. 264 /// 265 /// Used to align further variables if necessary. 266 unsigned VariablePos = 0; 267 268 /// Whether a newline needs to be inserted before the block's closing 269 /// brace. 270 /// 271 /// We only want to insert a newline before the closing brace if there also 272 /// was a newline after the beginning left brace. 273 bool BreakBeforeClosingBrace : 1; 274 275 /// Avoid bin packing, i.e. multiple parameters/elements on multiple 276 /// lines, in this context. 277 bool AvoidBinPacking : 1; 278 279 /// Break after the next comma (or all the commas in this context if 280 /// \c AvoidBinPacking is \c true). 281 bool BreakBeforeParameter : 1; 282 283 /// Line breaking in this context would break a formatting rule. 284 bool NoLineBreak : 1; 285 286 /// Same as \c NoLineBreak, but is restricted until the end of the 287 /// operand (including the next ","). 288 bool NoLineBreakInOperand : 1; 289 290 /// True if the last binary operator on this level was wrapped to the 291 /// next line. 292 bool LastOperatorWrapped : 1; 293 294 /// \c true if this \c ParenState already contains a line-break. 295 /// 296 /// The first line break in a certain \c ParenState causes extra penalty so 297 /// that clang-format prefers similar breaks, i.e. breaks in the same 298 /// parenthesis. 299 bool ContainsLineBreak : 1; 300 301 /// \c true if this \c ParenState contains multiple segments of a 302 /// builder-type call on one line. 303 bool ContainsUnwrappedBuilder : 1; 304 305 /// \c true if the colons of the curren ObjC method expression should 306 /// be aligned. 307 /// 308 /// Not considered for memoization as it will always have the same value at 309 /// the same token. 310 bool AlignColons : 1; 311 312 /// \c true if at least one selector name was found in the current 313 /// ObjC method expression. 314 /// 315 /// Not considered for memoization as it will always have the same value at 316 /// the same token. 317 bool ObjCSelectorNameFound : 1; 318 319 /// \c true if there are multiple nested blocks inside these parens. 320 /// 321 /// Not considered for memoization as it will always have the same value at 322 /// the same token. 323 bool HasMultipleNestedBlocks : 1; 324 325 /// The start of a nested block (e.g. lambda introducer in C++ or 326 /// "function" in JavaScript) is not wrapped to a new line. 327 bool NestedBlockInlined : 1; 328 329 /// \c true if the current \c ParenState represents an Objective-C 330 /// array literal. 331 bool IsInsideObjCArrayLiteral : 1; 332 333 bool operator<(const ParenState &Other) const { 334 if (Indent != Other.Indent) 335 return Indent < Other.Indent; 336 if (LastSpace != Other.LastSpace) 337 return LastSpace < Other.LastSpace; 338 if (NestedBlockIndent != Other.NestedBlockIndent) 339 return NestedBlockIndent < Other.NestedBlockIndent; 340 if (FirstLessLess != Other.FirstLessLess) 341 return FirstLessLess < Other.FirstLessLess; 342 if (BreakBeforeClosingBrace != Other.BreakBeforeClosingBrace) 343 return BreakBeforeClosingBrace; 344 if (QuestionColumn != Other.QuestionColumn) 345 return QuestionColumn < Other.QuestionColumn; 346 if (AvoidBinPacking != Other.AvoidBinPacking) 347 return AvoidBinPacking; 348 if (BreakBeforeParameter != Other.BreakBeforeParameter) 349 return BreakBeforeParameter; 350 if (NoLineBreak != Other.NoLineBreak) 351 return NoLineBreak; 352 if (LastOperatorWrapped != Other.LastOperatorWrapped) 353 return LastOperatorWrapped; 354 if (ColonPos != Other.ColonPos) 355 return ColonPos < Other.ColonPos; 356 if (StartOfFunctionCall != Other.StartOfFunctionCall) 357 return StartOfFunctionCall < Other.StartOfFunctionCall; 358 if (StartOfArraySubscripts != Other.StartOfArraySubscripts) 359 return StartOfArraySubscripts < Other.StartOfArraySubscripts; 360 if (CallContinuation != Other.CallContinuation) 361 return CallContinuation < Other.CallContinuation; 362 if (VariablePos != Other.VariablePos) 363 return VariablePos < Other.VariablePos; 364 if (ContainsLineBreak != Other.ContainsLineBreak) 365 return ContainsLineBreak; 366 if (ContainsUnwrappedBuilder != Other.ContainsUnwrappedBuilder) 367 return ContainsUnwrappedBuilder; 368 if (NestedBlockInlined != Other.NestedBlockInlined) 369 return NestedBlockInlined; 370 return false; 371 } 372 }; 373 374 /// The current state when indenting a unwrapped line. 375 /// 376 /// As the indenting tries different combinations this is copied by value. 377 struct LineState { 378 /// The number of used columns in the current line. 379 unsigned Column; 380 381 /// The token that needs to be next formatted. 382 FormatToken *NextToken; 383 384 /// \c true if this line contains a continued for-loop section. 385 bool LineContainsContinuedForLoopSection; 386 387 /// \c true if \p NextToken should not continue this line. 388 bool NoContinuation; 389 390 /// The \c NestingLevel at the start of this line. 391 unsigned StartOfLineLevel; 392 393 /// The lowest \c NestingLevel on the current line. 394 unsigned LowestLevelOnLine; 395 396 /// The start column of the string literal, if we're in a string 397 /// literal sequence, 0 otherwise. 398 unsigned StartOfStringLiteral; 399 400 /// A stack keeping track of properties applying to parenthesis 401 /// levels. 402 std::vector<ParenState> Stack; 403 404 /// Ignore the stack of \c ParenStates for state comparison. 405 /// 406 /// In long and deeply nested unwrapped lines, the current algorithm can 407 /// be insufficient for finding the best formatting with a reasonable amount 408 /// of time and memory. Setting this flag will effectively lead to the 409 /// algorithm not analyzing some combinations. However, these combinations 410 /// rarely contain the optimal solution: In short, accepting a higher 411 /// penalty early would need to lead to different values in the \c 412 /// ParenState stack (in an otherwise identical state) and these different 413 /// values would need to lead to a significant amount of avoided penalty 414 /// later. 415 /// 416 /// FIXME: Come up with a better algorithm instead. 417 bool IgnoreStackForComparison; 418 419 /// The indent of the first token. 420 unsigned FirstIndent; 421 422 /// The line that is being formatted. 423 /// 424 /// Does not need to be considered for memoization because it doesn't change. 425 const AnnotatedLine *Line; 426 427 /// Comparison operator to be able to used \c LineState in \c map. 428 bool operator<(const LineState &Other) const { 429 if (NextToken != Other.NextToken) 430 return NextToken < Other.NextToken; 431 if (Column != Other.Column) 432 return Column < Other.Column; 433 if (LineContainsContinuedForLoopSection != 434 Other.LineContainsContinuedForLoopSection) 435 return LineContainsContinuedForLoopSection; 436 if (NoContinuation != Other.NoContinuation) 437 return NoContinuation; 438 if (StartOfLineLevel != Other.StartOfLineLevel) 439 return StartOfLineLevel < Other.StartOfLineLevel; 440 if (LowestLevelOnLine != Other.LowestLevelOnLine) 441 return LowestLevelOnLine < Other.LowestLevelOnLine; 442 if (StartOfStringLiteral != Other.StartOfStringLiteral) 443 return StartOfStringLiteral < Other.StartOfStringLiteral; 444 if (IgnoreStackForComparison || Other.IgnoreStackForComparison) 445 return false; 446 return Stack < Other.Stack; 447 } 448 }; 449 450 } // end namespace format 451 } // end namespace clang 452 453 #endif 454