1 //===--- BreakableToken.cpp - Format C++ code -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Contains implementation of BreakableToken class and classes derived 12 /// from it. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "BreakableToken.h" 17 #include "ContinuationIndenter.h" 18 #include "clang/Basic/CharInfo.h" 19 #include "clang/Format/Format.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/Support/Debug.h" 22 #include <algorithm> 23 24 #define DEBUG_TYPE "format-token-breaker" 25 26 namespace clang { 27 namespace format { 28 29 static const char *const Blanks = " \t\v\f\r"; 30 static bool IsBlank(char C) { 31 switch (C) { 32 case ' ': 33 case '\t': 34 case '\v': 35 case '\f': 36 case '\r': 37 return true; 38 default: 39 return false; 40 } 41 } 42 43 static StringRef getLineCommentIndentPrefix(StringRef Comment) { 44 static const char *const KnownPrefixes[] = { 45 "///<", "//!<", "///", "//", "//!"}; 46 StringRef LongestPrefix; 47 for (StringRef KnownPrefix : KnownPrefixes) { 48 if (Comment.startswith(KnownPrefix)) { 49 size_t PrefixLength = KnownPrefix.size(); 50 while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ') 51 ++PrefixLength; 52 if (PrefixLength > LongestPrefix.size()) 53 LongestPrefix = Comment.substr(0, PrefixLength); 54 } 55 } 56 return LongestPrefix; 57 } 58 59 static BreakableToken::Split getCommentSplit(StringRef Text, 60 unsigned ContentStartColumn, 61 unsigned ColumnLimit, 62 unsigned TabWidth, 63 encoding::Encoding Encoding) { 64 if (ColumnLimit <= ContentStartColumn + 1) 65 return BreakableToken::Split(StringRef::npos, 0); 66 67 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1; 68 unsigned MaxSplitBytes = 0; 69 70 for (unsigned NumChars = 0; 71 NumChars < MaxSplit && MaxSplitBytes < Text.size();) { 72 unsigned BytesInChar = 73 encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding); 74 NumChars += 75 encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar), 76 ContentStartColumn, TabWidth, Encoding); 77 MaxSplitBytes += BytesInChar; 78 } 79 80 StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); 81 82 // Do not split before a number followed by a dot: this would be interpreted 83 // as a numbered list, which would prevent re-flowing in subsequent passes. 84 static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\."); 85 if (SpaceOffset != StringRef::npos && 86 kNumberedListRegexp.match(Text.substr(SpaceOffset).ltrim(Blanks))) 87 SpaceOffset = Text.find_last_of(Blanks, SpaceOffset); 88 89 if (SpaceOffset == StringRef::npos || 90 // Don't break at leading whitespace. 91 Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) { 92 // Make sure that we don't break at leading whitespace that 93 // reaches past MaxSplit. 94 StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks); 95 if (FirstNonWhitespace == StringRef::npos) 96 // If the comment is only whitespace, we cannot split. 97 return BreakableToken::Split(StringRef::npos, 0); 98 SpaceOffset = Text.find_first_of( 99 Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace)); 100 } 101 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) { 102 StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks); 103 StringRef AfterCut = Text.substr(SpaceOffset).ltrim(Blanks); 104 return BreakableToken::Split(BeforeCut.size(), 105 AfterCut.begin() - BeforeCut.end()); 106 } 107 return BreakableToken::Split(StringRef::npos, 0); 108 } 109 110 static BreakableToken::Split 111 getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, 112 unsigned TabWidth, encoding::Encoding Encoding) { 113 // FIXME: Reduce unit test case. 114 if (Text.empty()) 115 return BreakableToken::Split(StringRef::npos, 0); 116 if (ColumnLimit <= UsedColumns) 117 return BreakableToken::Split(StringRef::npos, 0); 118 unsigned MaxSplit = ColumnLimit - UsedColumns; 119 StringRef::size_type SpaceOffset = 0; 120 StringRef::size_type SlashOffset = 0; 121 StringRef::size_type WordStartOffset = 0; 122 StringRef::size_type SplitPoint = 0; 123 for (unsigned Chars = 0;;) { 124 unsigned Advance; 125 if (Text[0] == '\\') { 126 Advance = encoding::getEscapeSequenceLength(Text); 127 Chars += Advance; 128 } else { 129 Advance = encoding::getCodePointNumBytes(Text[0], Encoding); 130 Chars += encoding::columnWidthWithTabs( 131 Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding); 132 } 133 134 if (Chars > MaxSplit || Text.size() <= Advance) 135 break; 136 137 if (IsBlank(Text[0])) 138 SpaceOffset = SplitPoint; 139 if (Text[0] == '/') 140 SlashOffset = SplitPoint; 141 if (Advance == 1 && !isAlphanumeric(Text[0])) 142 WordStartOffset = SplitPoint; 143 144 SplitPoint += Advance; 145 Text = Text.substr(Advance); 146 } 147 148 if (SpaceOffset != 0) 149 return BreakableToken::Split(SpaceOffset + 1, 0); 150 if (SlashOffset != 0) 151 return BreakableToken::Split(SlashOffset + 1, 0); 152 if (WordStartOffset != 0) 153 return BreakableToken::Split(WordStartOffset + 1, 0); 154 if (SplitPoint != 0) 155 return BreakableToken::Split(SplitPoint, 0); 156 return BreakableToken::Split(StringRef::npos, 0); 157 } 158 159 bool switchesFormatting(const FormatToken &Token) { 160 assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) && 161 "formatting regions are switched by comment tokens"); 162 StringRef Content = Token.TokenText.substr(2).ltrim(); 163 return Content.startswith("clang-format on") || 164 Content.startswith("clang-format off"); 165 } 166 167 unsigned 168 BreakableToken::getLineLengthAfterCompression(unsigned RemainingTokenColumns, 169 Split Split) const { 170 // Example: consider the content 171 // lala lala 172 // - RemainingTokenColumns is the original number of columns, 10; 173 // - Split is (4, 2), denoting the two spaces between the two words; 174 // 175 // We compute the number of columns when the split is compressed into a single 176 // space, like: 177 // lala lala 178 return RemainingTokenColumns + 1 - Split.second; 179 } 180 181 unsigned BreakableSingleLineToken::getLineCount() const { return 1; } 182 183 unsigned BreakableSingleLineToken::getLineLengthAfterSplit( 184 unsigned LineIndex, unsigned TailOffset, 185 StringRef::size_type Length) const { 186 return StartColumn + Prefix.size() + Postfix.size() + 187 encoding::columnWidthWithTabs(Line.substr(TailOffset, Length), 188 StartColumn + Prefix.size(), 189 Style.TabWidth, Encoding); 190 } 191 192 BreakableSingleLineToken::BreakableSingleLineToken( 193 const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, 194 StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding, 195 const FormatStyle &Style) 196 : BreakableToken(Tok, InPPDirective, Encoding, Style), 197 StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) { 198 assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix)); 199 Line = Tok.TokenText.substr( 200 Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); 201 } 202 203 BreakableStringLiteral::BreakableStringLiteral( 204 const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, 205 StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding, 206 const FormatStyle &Style) 207 : BreakableSingleLineToken(Tok, StartColumn, Prefix, Postfix, InPPDirective, 208 Encoding, Style) {} 209 210 BreakableToken::Split 211 BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, 212 unsigned ColumnLimit, 213 llvm::Regex &CommentPragmasRegex) const { 214 return getStringSplit(Line.substr(TailOffset), 215 StartColumn + Prefix.size() + Postfix.size(), 216 ColumnLimit, Style.TabWidth, Encoding); 217 } 218 219 void BreakableStringLiteral::insertBreak(unsigned LineIndex, 220 unsigned TailOffset, Split Split, 221 WhitespaceManager &Whitespaces) { 222 Whitespaces.replaceWhitespaceInToken( 223 Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, 224 Prefix, InPPDirective, 1, StartColumn); 225 } 226 227 BreakableComment::BreakableComment(const FormatToken &Token, 228 unsigned StartColumn, 229 bool InPPDirective, 230 encoding::Encoding Encoding, 231 const FormatStyle &Style) 232 : BreakableToken(Token, InPPDirective, Encoding, Style), 233 StartColumn(StartColumn) {} 234 235 unsigned BreakableComment::getLineCount() const { return Lines.size(); } 236 237 BreakableToken::Split 238 BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset, 239 unsigned ColumnLimit, 240 llvm::Regex &CommentPragmasRegex) const { 241 // Don't break lines matching the comment pragmas regex. 242 if (CommentPragmasRegex.match(Content[LineIndex])) 243 return Split(StringRef::npos, 0); 244 return getCommentSplit(Content[LineIndex].substr(TailOffset), 245 getContentStartColumn(LineIndex, TailOffset), 246 ColumnLimit, Style.TabWidth, Encoding); 247 } 248 249 void BreakableComment::compressWhitespace(unsigned LineIndex, 250 unsigned TailOffset, Split Split, 251 WhitespaceManager &Whitespaces) { 252 StringRef Text = Content[LineIndex].substr(TailOffset); 253 // Text is relative to the content line, but Whitespaces operates relative to 254 // the start of the corresponding token, so compute the start of the Split 255 // that needs to be compressed into a single space relative to the start of 256 // its token. 257 unsigned BreakOffsetInToken = 258 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; 259 unsigned CharsToRemove = Split.second; 260 Whitespaces.replaceWhitespaceInToken( 261 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "", 262 /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1); 263 } 264 265 BreakableToken::Split 266 BreakableComment::getReflowSplit(StringRef Text, StringRef ReflowPrefix, 267 unsigned PreviousEndColumn, 268 unsigned ColumnLimit) const { 269 unsigned ReflowStartColumn = PreviousEndColumn + ReflowPrefix.size(); 270 StringRef TrimmedText = Text.rtrim(Blanks); 271 // This is the width of the resulting line in case the full line of Text gets 272 // reflown up starting at ReflowStartColumn. 273 unsigned FullWidth = ReflowStartColumn + encoding::columnWidthWithTabs( 274 TrimmedText, ReflowStartColumn, 275 Style.TabWidth, Encoding); 276 // If the full line fits up, we return a reflow split after it, 277 // otherwise we compute the largest piece of text that fits after 278 // ReflowStartColumn. 279 Split ReflowSplit = 280 FullWidth <= ColumnLimit 281 ? Split(TrimmedText.size(), Text.size() - TrimmedText.size()) 282 : getCommentSplit(Text, ReflowStartColumn, ColumnLimit, 283 Style.TabWidth, Encoding); 284 285 // We need to be extra careful here, because while it's OK to keep a long line 286 // if it can't be broken into smaller pieces (like when the first word of a 287 // long line is longer than the column limit), it's not OK to reflow that long 288 // word up. So we recompute the size of the previous line after reflowing and 289 // only return the reflow split if that's under the line limit. 290 if (ReflowSplit.first != StringRef::npos && 291 // Check if the width of the newly reflown line is under the limit. 292 PreviousEndColumn + ReflowPrefix.size() + 293 encoding::columnWidthWithTabs(Text.substr(0, ReflowSplit.first), 294 PreviousEndColumn + 295 ReflowPrefix.size(), 296 Style.TabWidth, Encoding) <= 297 ColumnLimit) { 298 return ReflowSplit; 299 } 300 return Split(StringRef::npos, 0); 301 } 302 303 const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const { 304 return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok; 305 } 306 307 static bool mayReflowContent(StringRef Content) { 308 Content = Content.trim(Blanks); 309 // Lines starting with '@' commonly have special meaning. 310 // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists. 311 static const SmallVector<StringRef, 8> kSpecialMeaningPrefixes = { 312 "@", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* " }; 313 bool hasSpecialMeaningPrefix = false; 314 for (StringRef Prefix : kSpecialMeaningPrefixes) { 315 if (Content.startswith(Prefix)) { 316 hasSpecialMeaningPrefix = true; 317 break; 318 } 319 } 320 321 // Numbered lists may also start with a number followed by '.' 322 // To avoid issues if a line starts with a number which is actually the end 323 // of a previous line, we only consider numbers with up to 2 digits. 324 static llvm::Regex kNumberedListRegexp = llvm::Regex("^[1-9][0-9]?\\. "); 325 hasSpecialMeaningPrefix = hasSpecialMeaningPrefix || 326 kNumberedListRegexp.match(Content); 327 328 // Simple heuristic for what to reflow: content should contain at least two 329 // characters and either the first or second character must be 330 // non-punctuation. 331 return Content.size() >= 2 && !hasSpecialMeaningPrefix && 332 !Content.endswith("\\") && 333 // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is 334 // true, then the first code point must be 1 byte long. 335 (!isPunctuation(Content[0]) || !isPunctuation(Content[1])); 336 } 337 338 BreakableBlockComment::BreakableBlockComment( 339 const FormatToken &Token, unsigned StartColumn, 340 unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, 341 encoding::Encoding Encoding, const FormatStyle &Style) 342 : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) { 343 assert(Tok.is(TT_BlockComment) && 344 "block comment section must start with a block comment"); 345 346 StringRef TokenText(Tok.TokenText); 347 assert(TokenText.startswith("/*") && TokenText.endswith("*/")); 348 TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); 349 350 int IndentDelta = StartColumn - OriginalStartColumn; 351 Content.resize(Lines.size()); 352 Content[0] = Lines[0]; 353 ContentColumn.resize(Lines.size()); 354 // Account for the initial '/*'. 355 ContentColumn[0] = StartColumn + 2; 356 Tokens.resize(Lines.size()); 357 for (size_t i = 1; i < Lines.size(); ++i) 358 adjustWhitespace(i, IndentDelta); 359 360 // Align decorations with the column of the star on the first line, 361 // that is one column after the start "/*". 362 DecorationColumn = StartColumn + 1; 363 364 // Account for comment decoration patterns like this: 365 // 366 // /* 367 // ** blah blah blah 368 // */ 369 if (Lines.size() >= 2 && Content[1].startswith("**") && 370 static_cast<unsigned>(ContentColumn[1]) == StartColumn) { 371 DecorationColumn = StartColumn; 372 } 373 374 Decoration = "* "; 375 if (Lines.size() == 1 && !FirstInLine) { 376 // Comments for which FirstInLine is false can start on arbitrary column, 377 // and available horizontal space can be too small to align consecutive 378 // lines with the first one. 379 // FIXME: We could, probably, align them to current indentation level, but 380 // now we just wrap them without stars. 381 Decoration = ""; 382 } 383 for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) { 384 // If the last line is empty, the closing "*/" will have a star. 385 if (i + 1 == e && Content[i].empty()) 386 break; 387 if (!Content[i].empty() && i + 1 != e && 388 Decoration.startswith(Content[i])) 389 continue; 390 while (!Content[i].startswith(Decoration)) 391 Decoration = Decoration.substr(0, Decoration.size() - 1); 392 } 393 394 LastLineNeedsDecoration = true; 395 IndentAtLineBreak = ContentColumn[0] + 1; 396 for (size_t i = 1, e = Lines.size(); i < e; ++i) { 397 if (Content[i].empty()) { 398 if (i + 1 == e) { 399 // Empty last line means that we already have a star as a part of the 400 // trailing */. We also need to preserve whitespace, so that */ is 401 // correctly indented. 402 LastLineNeedsDecoration = false; 403 // Align the star in the last '*/' with the stars on the previous lines. 404 if (e >= 2 && !Decoration.empty()) { 405 ContentColumn[i] = DecorationColumn; 406 } 407 } else if (Decoration.empty()) { 408 // For all other lines, set the start column to 0 if they're empty, so 409 // we do not insert trailing whitespace anywhere. 410 ContentColumn[i] = 0; 411 } 412 continue; 413 } 414 415 // The first line already excludes the star. 416 // The last line excludes the star if LastLineNeedsDecoration is false. 417 // For all other lines, adjust the line to exclude the star and 418 // (optionally) the first whitespace. 419 unsigned DecorationSize = Decoration.startswith(Content[i]) 420 ? Content[i].size() 421 : Decoration.size(); 422 if (DecorationSize) { 423 ContentColumn[i] = DecorationColumn + DecorationSize; 424 } 425 Content[i] = Content[i].substr(DecorationSize); 426 if (!Decoration.startswith(Content[i])) 427 IndentAtLineBreak = 428 std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i])); 429 } 430 IndentAtLineBreak = 431 std::max<unsigned>(IndentAtLineBreak, Decoration.size()); 432 433 DEBUG({ 434 llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n"; 435 for (size_t i = 0; i < Lines.size(); ++i) { 436 llvm::dbgs() << i << " |" << Content[i] << "| " 437 << "CC=" << ContentColumn[i] << "| " 438 << "IN=" << (Content[i].data() - Lines[i].data()) << "\n"; 439 } 440 }); 441 } 442 443 void BreakableBlockComment::adjustWhitespace(unsigned LineIndex, 444 int IndentDelta) { 445 // When in a preprocessor directive, the trailing backslash in a block comment 446 // is not needed, but can serve a purpose of uniformity with necessary escaped 447 // newlines outside the comment. In this case we remove it here before 448 // trimming the trailing whitespace. The backslash will be re-added later when 449 // inserting a line break. 450 size_t EndOfPreviousLine = Lines[LineIndex - 1].size(); 451 if (InPPDirective && Lines[LineIndex - 1].endswith("\\")) 452 --EndOfPreviousLine; 453 454 // Calculate the end of the non-whitespace text in the previous line. 455 EndOfPreviousLine = 456 Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine); 457 if (EndOfPreviousLine == StringRef::npos) 458 EndOfPreviousLine = 0; 459 else 460 ++EndOfPreviousLine; 461 // Calculate the start of the non-whitespace text in the current line. 462 size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks); 463 if (StartOfLine == StringRef::npos) 464 StartOfLine = Lines[LineIndex].rtrim("\r\n").size(); 465 466 StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine); 467 // Adjust Lines to only contain relevant text. 468 size_t PreviousContentOffset = 469 Content[LineIndex - 1].data() - Lines[LineIndex - 1].data(); 470 Content[LineIndex - 1] = Lines[LineIndex - 1].substr( 471 PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset); 472 Content[LineIndex] = Lines[LineIndex].substr(StartOfLine); 473 474 // Adjust the start column uniformly across all lines. 475 ContentColumn[LineIndex] = 476 encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) + 477 IndentDelta; 478 } 479 480 unsigned BreakableBlockComment::getLineLengthAfterSplit( 481 unsigned LineIndex, unsigned TailOffset, 482 StringRef::size_type Length) const { 483 unsigned ContentStartColumn = getContentStartColumn(LineIndex, TailOffset); 484 unsigned LineLength = 485 ContentStartColumn + encoding::columnWidthWithTabs( 486 Content[LineIndex].substr(TailOffset, Length), 487 ContentStartColumn, Style.TabWidth, Encoding); 488 // The last line gets a "*/" postfix. 489 if (LineIndex + 1 == Lines.size()) { 490 LineLength += 2; 491 // We never need a decoration when breaking just the trailing "*/" postfix. 492 // Note that checking that Length == 0 is not enough, since Length could 493 // also be StringRef::npos. 494 if (Content[LineIndex].substr(TailOffset, Length).empty()) { 495 LineLength -= Decoration.size(); 496 } 497 } 498 return LineLength; 499 } 500 501 void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, 502 Split Split, 503 WhitespaceManager &Whitespaces) { 504 StringRef Text = Content[LineIndex].substr(TailOffset); 505 StringRef Prefix = Decoration; 506 // We need this to account for the case when we have a decoration "* " for all 507 // the lines except for the last one, where the star in "*/" acts as a 508 // decoration. 509 unsigned LocalIndentAtLineBreak = IndentAtLineBreak; 510 if (LineIndex + 1 == Lines.size() && 511 Text.size() == Split.first + Split.second) { 512 // For the last line we need to break before "*/", but not to add "* ". 513 Prefix = ""; 514 if (LocalIndentAtLineBreak >= 2) 515 LocalIndentAtLineBreak -= 2; 516 } 517 // The split offset is from the beginning of the line. Convert it to an offset 518 // from the beginning of the token text. 519 unsigned BreakOffsetInToken = 520 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; 521 unsigned CharsToRemove = Split.second; 522 assert(LocalIndentAtLineBreak >= Prefix.size()); 523 Whitespaces.replaceWhitespaceInToken( 524 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", Prefix, 525 InPPDirective, /*Newlines=*/1, 526 /*Spaces=*/LocalIndentAtLineBreak - Prefix.size()); 527 } 528 529 BreakableToken::Split BreakableBlockComment::getSplitBefore( 530 unsigned LineIndex, 531 unsigned PreviousEndColumn, 532 unsigned ColumnLimit, 533 llvm::Regex &CommentPragmasRegex) const { 534 if (!mayReflow(LineIndex, CommentPragmasRegex)) 535 return Split(StringRef::npos, 0); 536 StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); 537 return getReflowSplit(TrimmedContent, ReflowPrefix, PreviousEndColumn, 538 ColumnLimit); 539 } 540 541 unsigned BreakableBlockComment::getReflownColumn( 542 StringRef Content, 543 unsigned LineIndex, 544 unsigned PreviousEndColumn) const { 545 unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size(); 546 // If this is the last line, it will carry around its '*/' postfix. 547 unsigned PostfixLength = (LineIndex + 1 == Lines.size() ? 2 : 0); 548 // The line is composed of previous text, reflow prefix, reflown text and 549 // postfix. 550 unsigned ReflownColumn = 551 StartColumn + encoding::columnWidthWithTabs(Content, StartColumn, 552 Style.TabWidth, Encoding) + 553 PostfixLength; 554 return ReflownColumn; 555 } 556 557 unsigned BreakableBlockComment::getLineLengthAfterSplitBefore( 558 unsigned LineIndex, unsigned TailOffset, 559 unsigned PreviousEndColumn, 560 unsigned ColumnLimit, 561 Split SplitBefore) const { 562 if (SplitBefore.first == StringRef::npos || 563 // Block comment line contents contain the trailing whitespace after the 564 // decoration, so the need of left trim. Note that this behavior is 565 // consistent with the breaking of block comments where the indentation of 566 // a broken line is uniform across all the lines of the block comment. 567 SplitBefore.first + SplitBefore.second < 568 Content[LineIndex].ltrim().size()) { 569 // A piece of line, not the whole, gets reflown. 570 return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); 571 } else { 572 // The whole line gets reflown, need to check if we need to insert a break 573 // for the postfix or not. 574 StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); 575 unsigned ReflownColumn = 576 getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn); 577 if (ReflownColumn <= ColumnLimit) { 578 return ReflownColumn; 579 } 580 return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); 581 } 582 } 583 void BreakableBlockComment::replaceWhitespaceBefore( 584 unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit, 585 Split SplitBefore, WhitespaceManager &Whitespaces) { 586 if (LineIndex == 0) return; 587 StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks); 588 if (SplitBefore.first != StringRef::npos) { 589 // Here we need to reflow. 590 assert(Tokens[LineIndex - 1] == Tokens[LineIndex] && 591 "Reflowing whitespace within a token"); 592 // This is the offset of the end of the last line relative to the start of 593 // the token text in the token. 594 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() + 595 Content[LineIndex - 1].size() - 596 tokenAt(LineIndex).TokenText.data(); 597 unsigned WhitespaceLength = TrimmedContent.data() - 598 tokenAt(LineIndex).TokenText.data() - 599 WhitespaceOffsetInToken; 600 Whitespaces.replaceWhitespaceInToken( 601 tokenAt(LineIndex), WhitespaceOffsetInToken, 602 /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"", 603 /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0, 604 /*Spaces=*/0); 605 // Check if we need to also insert a break at the whitespace range. 606 // For this we first adapt the reflow split relative to the beginning of the 607 // content. 608 // Note that we don't need a penalty for this break, since it doesn't change 609 // the total number of lines. 610 Split BreakSplit = SplitBefore; 611 BreakSplit.first += TrimmedContent.data() - Content[LineIndex].data(); 612 unsigned ReflownColumn = 613 getReflownColumn(TrimmedContent, LineIndex, PreviousEndColumn); 614 if (ReflownColumn > ColumnLimit) { 615 insertBreak(LineIndex, 0, BreakSplit, Whitespaces); 616 } 617 return; 618 } 619 620 // Here no reflow with the previous line will happen. 621 // Fix the decoration of the line at LineIndex. 622 StringRef Prefix = Decoration; 623 if (Content[LineIndex].empty()) { 624 if (LineIndex + 1 == Lines.size()) { 625 if (!LastLineNeedsDecoration) { 626 // If the last line was empty, we don't need a prefix, as the */ will 627 // line up with the decoration (if it exists). 628 Prefix = ""; 629 } 630 } else if (!Decoration.empty()) { 631 // For other empty lines, if we do have a decoration, adapt it to not 632 // contain a trailing whitespace. 633 Prefix = Prefix.substr(0, 1); 634 } 635 } else { 636 if (ContentColumn[LineIndex] == 1) { 637 // This line starts immediately after the decorating *. 638 Prefix = Prefix.substr(0, 1); 639 } 640 } 641 // This is the offset of the end of the last line relative to the start of the 642 // token text in the token. 643 unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() + 644 Content[LineIndex - 1].size() - 645 tokenAt(LineIndex).TokenText.data(); 646 unsigned WhitespaceLength = Content[LineIndex].data() - 647 tokenAt(LineIndex).TokenText.data() - 648 WhitespaceOffsetInToken; 649 Whitespaces.replaceWhitespaceInToken( 650 tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix, 651 InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size()); 652 } 653 654 bool BreakableBlockComment::mayReflow(unsigned LineIndex, 655 llvm::Regex &CommentPragmasRegex) const { 656 // Content[LineIndex] may exclude the indent after the '*' decoration. In that 657 // case, we compute the start of the comment pragma manually. 658 StringRef IndentContent = Content[LineIndex]; 659 if (Lines[LineIndex].ltrim(Blanks).startswith("*")) { 660 IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1); 661 } 662 return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) && 663 mayReflowContent(Content[LineIndex]) && !Tok.Finalized && 664 !switchesFormatting(tokenAt(LineIndex)); 665 } 666 667 unsigned 668 BreakableBlockComment::getContentStartColumn(unsigned LineIndex, 669 unsigned TailOffset) const { 670 // If we break, we always break at the predefined indent. 671 if (TailOffset != 0) 672 return IndentAtLineBreak; 673 return std::max(0, ContentColumn[LineIndex]); 674 } 675 676 BreakableLineCommentSection::BreakableLineCommentSection( 677 const FormatToken &Token, unsigned StartColumn, 678 unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, 679 encoding::Encoding Encoding, const FormatStyle &Style) 680 : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) { 681 assert(Tok.is(TT_LineComment) && 682 "line comment section must start with a line comment"); 683 FormatToken *LineTok = nullptr; 684 for (const FormatToken *CurrentTok = &Tok; 685 CurrentTok && CurrentTok->is(TT_LineComment); 686 CurrentTok = CurrentTok->Next) { 687 LastLineTok = LineTok; 688 StringRef TokenText(CurrentTok->TokenText); 689 assert(TokenText.startswith("//")); 690 size_t FirstLineIndex = Lines.size(); 691 TokenText.split(Lines, "\n"); 692 Content.resize(Lines.size()); 693 ContentColumn.resize(Lines.size()); 694 OriginalContentColumn.resize(Lines.size()); 695 Tokens.resize(Lines.size()); 696 Prefix.resize(Lines.size()); 697 OriginalPrefix.resize(Lines.size()); 698 for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) { 699 // We need to trim the blanks in case this is not the first line in a 700 // multiline comment. Then the indent is included in Lines[i]. 701 StringRef IndentPrefix = 702 getLineCommentIndentPrefix(Lines[i].ltrim(Blanks)); 703 assert(IndentPrefix.startswith("//")); 704 OriginalPrefix[i] = Prefix[i] = IndentPrefix; 705 if (Lines[i].size() > Prefix[i].size() && 706 isAlphanumeric(Lines[i][Prefix[i].size()])) { 707 if (Prefix[i] == "//") 708 Prefix[i] = "// "; 709 else if (Prefix[i] == "///") 710 Prefix[i] = "/// "; 711 else if (Prefix[i] == "//!") 712 Prefix[i] = "//! "; 713 else if (Prefix[i] == "///<") 714 Prefix[i] = "///< "; 715 else if (Prefix[i] == "//!<") 716 Prefix[i] = "//!< "; 717 } 718 719 Tokens[i] = LineTok; 720 Content[i] = Lines[i].substr(IndentPrefix.size()); 721 OriginalContentColumn[i] = 722 StartColumn + 723 encoding::columnWidthWithTabs(OriginalPrefix[i], 724 StartColumn, 725 Style.TabWidth, 726 Encoding); 727 ContentColumn[i] = 728 StartColumn + 729 encoding::columnWidthWithTabs(Prefix[i], 730 StartColumn, 731 Style.TabWidth, 732 Encoding); 733 734 // Calculate the end of the non-whitespace text in this line. 735 size_t EndOfLine = Content[i].find_last_not_of(Blanks); 736 if (EndOfLine == StringRef::npos) 737 EndOfLine = Content[i].size(); 738 else 739 ++EndOfLine; 740 Content[i] = Content[i].substr(0, EndOfLine); 741 } 742 LineTok = CurrentTok->Next; 743 if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) { 744 // A line comment section needs to broken by a line comment that is 745 // preceded by at least two newlines. Note that we put this break here 746 // instead of breaking at a previous stage during parsing, since that 747 // would split the contents of the enum into two unwrapped lines in this 748 // example, which is undesirable: 749 // enum A { 750 // a, // comment about a 751 // 752 // // comment about b 753 // b 754 // }; 755 // 756 // FIXME: Consider putting separate line comment sections as children to 757 // the unwrapped line instead. 758 break; 759 } 760 } 761 } 762 763 unsigned BreakableLineCommentSection::getLineLengthAfterSplit( 764 unsigned LineIndex, unsigned TailOffset, 765 StringRef::size_type Length) const { 766 unsigned ContentStartColumn = 767 (TailOffset == 0 ? ContentColumn[LineIndex] 768 : OriginalContentColumn[LineIndex]); 769 return ContentStartColumn + encoding::columnWidthWithTabs( 770 Content[LineIndex].substr(TailOffset, Length), 771 ContentStartColumn, Style.TabWidth, Encoding); 772 } 773 774 void BreakableLineCommentSection::insertBreak(unsigned LineIndex, 775 unsigned TailOffset, Split Split, 776 WhitespaceManager &Whitespaces) { 777 StringRef Text = Content[LineIndex].substr(TailOffset); 778 // Compute the offset of the split relative to the beginning of the token 779 // text. 780 unsigned BreakOffsetInToken = 781 Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first; 782 unsigned CharsToRemove = Split.second; 783 // Compute the size of the new indent, including the size of the new prefix of 784 // the newly broken line. 785 unsigned IndentAtLineBreak = OriginalContentColumn[LineIndex] + 786 Prefix[LineIndex].size() - 787 OriginalPrefix[LineIndex].size(); 788 assert(IndentAtLineBreak >= Prefix[LineIndex].size()); 789 Whitespaces.replaceWhitespaceInToken( 790 tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", 791 Prefix[LineIndex], InPPDirective, /*Newlines=*/1, 792 /*Spaces=*/IndentAtLineBreak - Prefix[LineIndex].size()); 793 } 794 795 BreakableComment::Split BreakableLineCommentSection::getSplitBefore( 796 unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit, 797 llvm::Regex &CommentPragmasRegex) const { 798 if (!mayReflow(LineIndex, CommentPragmasRegex)) 799 return Split(StringRef::npos, 0); 800 return getReflowSplit(Content[LineIndex], ReflowPrefix, PreviousEndColumn, 801 ColumnLimit); 802 } 803 804 unsigned BreakableLineCommentSection::getLineLengthAfterSplitBefore( 805 unsigned LineIndex, unsigned TailOffset, 806 unsigned PreviousEndColumn, 807 unsigned ColumnLimit, 808 Split SplitBefore) const { 809 if (SplitBefore.first == StringRef::npos || 810 SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) { 811 // A piece of line, not the whole line, gets reflown. 812 return getLineLengthAfterSplit(LineIndex, TailOffset, StringRef::npos); 813 } else { 814 // The whole line gets reflown. 815 unsigned StartColumn = PreviousEndColumn + ReflowPrefix.size(); 816 return StartColumn + encoding::columnWidthWithTabs(Content[LineIndex], 817 StartColumn, 818 Style.TabWidth, 819 Encoding); 820 } 821 } 822 823 void BreakableLineCommentSection::replaceWhitespaceBefore( 824 unsigned LineIndex, unsigned PreviousEndColumn, unsigned ColumnLimit, 825 Split SplitBefore, WhitespaceManager &Whitespaces) { 826 // If this is the first line of a token, we need to inform Whitespace Manager 827 // about it: either adapt the whitespace range preceding it, or mark it as an 828 // untouchable token. 829 // This happens for instance here: 830 // // line 1 \ 831 // // line 2 832 if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) { 833 if (SplitBefore.first != StringRef::npos) { 834 // Reflow happens between tokens. Replace the whitespace between the 835 // tokens by the empty string. 836 Whitespaces.replaceWhitespace( 837 *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0, 838 /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false); 839 // Replace the indent and prefix of the token with the reflow prefix. 840 unsigned WhitespaceLength = 841 Content[LineIndex].data() - tokenAt(LineIndex).TokenText.data(); 842 Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], 843 /*Offset=*/0, 844 /*ReplaceChars=*/WhitespaceLength, 845 /*PreviousPostfix=*/"", 846 /*CurrentPrefix=*/ReflowPrefix, 847 /*InPPDirective=*/false, 848 /*Newlines=*/0, 849 /*Spaces=*/0); 850 } else { 851 // This is the first line for the current token, but no reflow with the 852 // previous token is necessary. However, we still may need to adjust the 853 // start column. Note that ContentColumn[LineIndex] is the expected 854 // content column after a possible update to the prefix, hence the prefix 855 // length change is included. 856 unsigned LineColumn = 857 ContentColumn[LineIndex] - 858 (Content[LineIndex].data() - Lines[LineIndex].data()) + 859 (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size()); 860 861 // We always want to create a replacement instead of adding an untouchable 862 // token, even if LineColumn is the same as the original column of the 863 // token. This is because WhitespaceManager doesn't align trailing 864 // comments if they are untouchable. 865 Whitespaces.replaceWhitespace(*Tokens[LineIndex], 866 /*Newlines=*/1, 867 /*Spaces=*/LineColumn, 868 /*StartOfTokenColumn=*/LineColumn, 869 /*InPPDirective=*/false); 870 } 871 } 872 if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) { 873 // Adjust the prefix if necessary. 874 875 // Take care of the space possibly introduced after a decoration. 876 assert(Prefix[LineIndex] == (OriginalPrefix[LineIndex] + " ").str() && 877 "Expecting a line comment prefix to differ from original by at most " 878 "a space"); 879 Whitespaces.replaceWhitespaceInToken( 880 tokenAt(LineIndex), OriginalPrefix[LineIndex].size(), 0, "", "", 881 /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1); 882 } 883 // Add a break after a reflow split has been introduced, if necessary. 884 // Note that this break doesn't need to be penalized, since it doesn't change 885 // the number of lines. 886 if (SplitBefore.first != StringRef::npos && 887 SplitBefore.first + SplitBefore.second < Content[LineIndex].size()) { 888 insertBreak(LineIndex, 0, SplitBefore, Whitespaces); 889 } 890 } 891 892 void BreakableLineCommentSection::updateNextToken(LineState& State) const { 893 if (LastLineTok) { 894 State.NextToken = LastLineTok->Next; 895 } 896 } 897 898 bool BreakableLineCommentSection::mayReflow( 899 unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const { 900 // Line comments have the indent as part of the prefix, so we need to 901 // recompute the start of the line. 902 StringRef IndentContent = Content[LineIndex]; 903 if (Lines[LineIndex].startswith("//")) { 904 IndentContent = Lines[LineIndex].substr(2); 905 } 906 return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) && 907 mayReflowContent(Content[LineIndex]) && !Tok.Finalized && 908 !switchesFormatting(tokenAt(LineIndex)) && 909 OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1]; 910 } 911 912 unsigned 913 BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex, 914 unsigned TailOffset) const { 915 if (TailOffset != 0) { 916 return OriginalContentColumn[LineIndex]; 917 } 918 return ContentColumn[LineIndex]; 919 } 920 921 } // namespace format 922 } // namespace clang 923