1 //===--- BreakableToken.cpp - Format C++ code -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Contains implementation of BreakableToken class and classes derived 12 /// from it. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #include "BreakableToken.h" 17 #include "Comments.h" 18 #include "clang/Basic/CharInfo.h" 19 #include "clang/Format/Format.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/Support/Debug.h" 22 #include <algorithm> 23 24 #define DEBUG_TYPE "format-token-breaker" 25 26 namespace clang { 27 namespace format { 28 29 static const char *const Blanks = " \t\v\f\r"; 30 static bool IsBlank(char C) { 31 switch (C) { 32 case ' ': 33 case '\t': 34 case '\v': 35 case '\f': 36 case '\r': 37 return true; 38 default: 39 return false; 40 } 41 } 42 43 static BreakableToken::Split getCommentSplit(StringRef Text, 44 unsigned ContentStartColumn, 45 unsigned ColumnLimit, 46 unsigned TabWidth, 47 encoding::Encoding Encoding) { 48 if (ColumnLimit <= ContentStartColumn + 1) 49 return BreakableToken::Split(StringRef::npos, 0); 50 51 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1; 52 unsigned MaxSplitBytes = 0; 53 54 for (unsigned NumChars = 0; 55 NumChars < MaxSplit && MaxSplitBytes < Text.size();) { 56 unsigned BytesInChar = 57 encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding); 58 NumChars += 59 encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar), 60 ContentStartColumn, TabWidth, Encoding); 61 MaxSplitBytes += BytesInChar; 62 } 63 64 StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); 65 if (SpaceOffset == StringRef::npos || 66 // Don't break at leading whitespace. 67 Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) { 68 // Make sure that we don't break at leading whitespace that 69 // reaches past MaxSplit. 70 StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks); 71 if (FirstNonWhitespace == StringRef::npos) 72 // If the comment is only whitespace, we cannot split. 73 return BreakableToken::Split(StringRef::npos, 0); 74 SpaceOffset = Text.find_first_of( 75 Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace)); 76 } 77 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) { 78 StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks); 79 StringRef AfterCut = Text.substr(SpaceOffset).ltrim(Blanks); 80 return BreakableToken::Split(BeforeCut.size(), 81 AfterCut.begin() - BeforeCut.end()); 82 } 83 return BreakableToken::Split(StringRef::npos, 0); 84 } 85 86 static BreakableToken::Split 87 getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit, 88 unsigned TabWidth, encoding::Encoding Encoding) { 89 // FIXME: Reduce unit test case. 90 if (Text.empty()) 91 return BreakableToken::Split(StringRef::npos, 0); 92 if (ColumnLimit <= UsedColumns) 93 return BreakableToken::Split(StringRef::npos, 0); 94 unsigned MaxSplit = ColumnLimit - UsedColumns; 95 StringRef::size_type SpaceOffset = 0; 96 StringRef::size_type SlashOffset = 0; 97 StringRef::size_type WordStartOffset = 0; 98 StringRef::size_type SplitPoint = 0; 99 for (unsigned Chars = 0;;) { 100 unsigned Advance; 101 if (Text[0] == '\\') { 102 Advance = encoding::getEscapeSequenceLength(Text); 103 Chars += Advance; 104 } else { 105 Advance = encoding::getCodePointNumBytes(Text[0], Encoding); 106 Chars += encoding::columnWidthWithTabs( 107 Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding); 108 } 109 110 if (Chars > MaxSplit || Text.size() <= Advance) 111 break; 112 113 if (IsBlank(Text[0])) 114 SpaceOffset = SplitPoint; 115 if (Text[0] == '/') 116 SlashOffset = SplitPoint; 117 if (Advance == 1 && !isAlphanumeric(Text[0])) 118 WordStartOffset = SplitPoint; 119 120 SplitPoint += Advance; 121 Text = Text.substr(Advance); 122 } 123 124 if (SpaceOffset != 0) 125 return BreakableToken::Split(SpaceOffset + 1, 0); 126 if (SlashOffset != 0) 127 return BreakableToken::Split(SlashOffset + 1, 0); 128 if (WordStartOffset != 0) 129 return BreakableToken::Split(WordStartOffset + 1, 0); 130 if (SplitPoint != 0) 131 return BreakableToken::Split(SplitPoint, 0); 132 return BreakableToken::Split(StringRef::npos, 0); 133 } 134 135 unsigned BreakableSingleLineToken::getLineCount() const { return 1; } 136 137 unsigned BreakableSingleLineToken::getLineLengthAfterSplit( 138 unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { 139 return StartColumn + Prefix.size() + Postfix.size() + 140 encoding::columnWidthWithTabs(Line.substr(Offset, Length), 141 StartColumn + Prefix.size(), 142 Style.TabWidth, Encoding); 143 } 144 145 BreakableSingleLineToken::BreakableSingleLineToken( 146 const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn, 147 StringRef Prefix, StringRef Postfix, bool InPPDirective, 148 encoding::Encoding Encoding, const FormatStyle &Style) 149 : BreakableToken(Tok, IndentLevel, InPPDirective, Encoding, Style), 150 StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) { 151 assert(Tok.TokenText.endswith(Postfix)); 152 Line = Tok.TokenText.substr( 153 Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); 154 } 155 156 BreakableStringLiteral::BreakableStringLiteral( 157 const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn, 158 StringRef Prefix, StringRef Postfix, bool InPPDirective, 159 encoding::Encoding Encoding, const FormatStyle &Style) 160 : BreakableSingleLineToken(Tok, IndentLevel, StartColumn, Prefix, Postfix, 161 InPPDirective, Encoding, Style) {} 162 163 BreakableToken::Split 164 BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, 165 unsigned ColumnLimit) const { 166 return getStringSplit(Line.substr(TailOffset), 167 StartColumn + Prefix.size() + Postfix.size(), 168 ColumnLimit, Style.TabWidth, Encoding); 169 } 170 171 void BreakableStringLiteral::insertBreak(unsigned LineIndex, 172 unsigned TailOffset, Split Split, 173 WhitespaceManager &Whitespaces) { 174 unsigned LeadingSpaces = StartColumn; 175 // The '@' of an ObjC string literal (@"Test") does not become part of the 176 // string token. 177 // FIXME: It might be a cleaner solution to merge the tokens as a 178 // precomputation step. 179 if (Prefix.startswith("@")) 180 --LeadingSpaces; 181 Whitespaces.replaceWhitespaceInToken( 182 Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, 183 Prefix, InPPDirective, 1, IndentLevel, LeadingSpaces); 184 } 185 186 BreakableLineComment::BreakableLineComment( 187 const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, 188 bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) 189 : BreakableSingleLineToken(Token, IndentLevel, StartColumn, 190 getLineCommentIndentPrefix(Token.TokenText), "", 191 InPPDirective, Encoding, Style) { 192 OriginalPrefix = Prefix; 193 if (Token.TokenText.size() > Prefix.size() && 194 isAlphanumeric(Token.TokenText[Prefix.size()])) { 195 if (Prefix == "//") 196 Prefix = "// "; 197 else if (Prefix == "///") 198 Prefix = "/// "; 199 else if (Prefix == "//!") 200 Prefix = "//! "; 201 } 202 } 203 204 BreakableToken::Split 205 BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset, 206 unsigned ColumnLimit) const { 207 return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(), 208 ColumnLimit, Style.TabWidth, Encoding); 209 } 210 211 void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset, 212 Split Split, 213 WhitespaceManager &Whitespaces) { 214 Whitespaces.replaceWhitespaceInToken( 215 Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, 216 Postfix, Prefix, InPPDirective, /*Newlines=*/1, IndentLevel, StartColumn); 217 } 218 219 void BreakableLineComment::replaceWhitespace(unsigned LineIndex, 220 unsigned TailOffset, Split Split, 221 WhitespaceManager &Whitespaces) { 222 Whitespaces.replaceWhitespaceInToken( 223 Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, "", 224 "", /*InPPDirective=*/false, /*Newlines=*/0, /*IndentLevel=*/0, 225 /*Spaces=*/1); 226 } 227 228 void BreakableLineComment::replaceWhitespaceBefore( 229 unsigned LineIndex, WhitespaceManager &Whitespaces) { 230 if (OriginalPrefix != Prefix) { 231 Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "", 232 /*InPPDirective=*/false, 233 /*Newlines=*/0, /*IndentLevel=*/0, 234 /*Spaces=*/1); 235 } 236 } 237 238 BreakableBlockComment::BreakableBlockComment( 239 const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, 240 unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, 241 encoding::Encoding Encoding, const FormatStyle &Style) 242 : BreakableToken(Token, IndentLevel, InPPDirective, Encoding, Style) { 243 StringRef TokenText(Token.TokenText); 244 assert(TokenText.startswith("/*") && TokenText.endswith("*/")); 245 TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); 246 247 int IndentDelta = StartColumn - OriginalStartColumn; 248 LeadingWhitespace.resize(Lines.size()); 249 StartOfLineColumn.resize(Lines.size()); 250 StartOfLineColumn[0] = StartColumn + 2; 251 for (size_t i = 1; i < Lines.size(); ++i) 252 adjustWhitespace(i, IndentDelta); 253 254 Decoration = "* "; 255 if (Lines.size() == 1 && !FirstInLine) { 256 // Comments for which FirstInLine is false can start on arbitrary column, 257 // and available horizontal space can be too small to align consecutive 258 // lines with the first one. 259 // FIXME: We could, probably, align them to current indentation level, but 260 // now we just wrap them without stars. 261 Decoration = ""; 262 } 263 for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) { 264 // If the last line is empty, the closing "*/" will have a star. 265 if (i + 1 == e && Lines[i].empty()) 266 break; 267 if (!Lines[i].empty() && i + 1 != e && Decoration.startswith(Lines[i])) 268 continue; 269 while (!Lines[i].startswith(Decoration)) 270 Decoration = Decoration.substr(0, Decoration.size() - 1); 271 } 272 273 LastLineNeedsDecoration = true; 274 IndentAtLineBreak = StartOfLineColumn[0] + 1; 275 for (size_t i = 1; i < Lines.size(); ++i) { 276 if (Lines[i].empty()) { 277 if (i + 1 == Lines.size()) { 278 // Empty last line means that we already have a star as a part of the 279 // trailing */. We also need to preserve whitespace, so that */ is 280 // correctly indented. 281 LastLineNeedsDecoration = false; 282 } else if (Decoration.empty()) { 283 // For all other lines, set the start column to 0 if they're empty, so 284 // we do not insert trailing whitespace anywhere. 285 StartOfLineColumn[i] = 0; 286 } 287 continue; 288 } 289 290 // The first line already excludes the star. 291 // For all other lines, adjust the line to exclude the star and 292 // (optionally) the first whitespace. 293 unsigned DecorationSize = 294 Decoration.startswith(Lines[i]) ? Lines[i].size() : Decoration.size(); 295 StartOfLineColumn[i] += DecorationSize; 296 Lines[i] = Lines[i].substr(DecorationSize); 297 LeadingWhitespace[i] += DecorationSize; 298 if (!Decoration.startswith(Lines[i])) 299 IndentAtLineBreak = 300 std::min<int>(IndentAtLineBreak, std::max(0, StartOfLineColumn[i])); 301 } 302 IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size()); 303 DEBUG({ 304 llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n"; 305 for (size_t i = 0; i < Lines.size(); ++i) { 306 llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i] 307 << "\n"; 308 } 309 }); 310 } 311 312 void BreakableBlockComment::adjustWhitespace(unsigned LineIndex, 313 int IndentDelta) { 314 // When in a preprocessor directive, the trailing backslash in a block comment 315 // is not needed, but can serve a purpose of uniformity with necessary escaped 316 // newlines outside the comment. In this case we remove it here before 317 // trimming the trailing whitespace. The backslash will be re-added later when 318 // inserting a line break. 319 size_t EndOfPreviousLine = Lines[LineIndex - 1].size(); 320 if (InPPDirective && Lines[LineIndex - 1].endswith("\\")) 321 --EndOfPreviousLine; 322 323 // Calculate the end of the non-whitespace text in the previous line. 324 EndOfPreviousLine = 325 Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine); 326 if (EndOfPreviousLine == StringRef::npos) 327 EndOfPreviousLine = 0; 328 else 329 ++EndOfPreviousLine; 330 // Calculate the start of the non-whitespace text in the current line. 331 size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks); 332 if (StartOfLine == StringRef::npos) 333 StartOfLine = Lines[LineIndex].rtrim("\r\n").size(); 334 335 StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine); 336 // Adjust Lines to only contain relevant text. 337 Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine); 338 Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine); 339 // Adjust LeadingWhitespace to account all whitespace between the lines 340 // to the current line. 341 LeadingWhitespace[LineIndex] = 342 Lines[LineIndex].begin() - Lines[LineIndex - 1].end(); 343 344 // Adjust the start column uniformly across all lines. 345 StartOfLineColumn[LineIndex] = 346 encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) + 347 IndentDelta; 348 } 349 350 unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); } 351 352 unsigned BreakableBlockComment::getLineLengthAfterSplit( 353 unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { 354 unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset); 355 return ContentStartColumn + 356 encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length), 357 ContentStartColumn, Style.TabWidth, 358 Encoding) + 359 // The last line gets a "*/" postfix. 360 (LineIndex + 1 == Lines.size() ? 2 : 0); 361 } 362 363 BreakableToken::Split 364 BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset, 365 unsigned ColumnLimit) const { 366 return getCommentSplit(Lines[LineIndex].substr(TailOffset), 367 getContentStartColumn(LineIndex, TailOffset), 368 ColumnLimit, Style.TabWidth, Encoding); 369 } 370 371 void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, 372 Split Split, 373 WhitespaceManager &Whitespaces) { 374 StringRef Text = Lines[LineIndex].substr(TailOffset); 375 StringRef Prefix = Decoration; 376 if (LineIndex + 1 == Lines.size() && 377 Text.size() == Split.first + Split.second) { 378 // For the last line we need to break before "*/", but not to add "* ". 379 Prefix = ""; 380 } 381 382 unsigned BreakOffsetInToken = 383 Text.data() - Tok.TokenText.data() + Split.first; 384 unsigned CharsToRemove = Split.second; 385 assert(IndentAtLineBreak >= Decoration.size()); 386 Whitespaces.replaceWhitespaceInToken( 387 Tok, BreakOffsetInToken, CharsToRemove, "", Prefix, InPPDirective, 1, 388 IndentLevel, IndentAtLineBreak - Decoration.size()); 389 } 390 391 void BreakableBlockComment::replaceWhitespace(unsigned LineIndex, 392 unsigned TailOffset, Split Split, 393 WhitespaceManager &Whitespaces) { 394 StringRef Text = Lines[LineIndex].substr(TailOffset); 395 unsigned BreakOffsetInToken = 396 Text.data() - Tok.TokenText.data() + Split.first; 397 unsigned CharsToRemove = Split.second; 398 Whitespaces.replaceWhitespaceInToken( 399 Tok, BreakOffsetInToken, CharsToRemove, "", "", /*InPPDirective=*/false, 400 /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1); 401 } 402 403 void BreakableBlockComment::replaceWhitespaceBefore( 404 unsigned LineIndex, WhitespaceManager &Whitespaces) { 405 if (LineIndex == 0) 406 return; 407 StringRef Prefix = Decoration; 408 if (Lines[LineIndex].empty()) { 409 if (LineIndex + 1 == Lines.size()) { 410 if (!LastLineNeedsDecoration) { 411 // If the last line was empty, we don't need a prefix, as the */ will 412 // line up with the decoration (if it exists). 413 Prefix = ""; 414 } 415 } else if (!Decoration.empty()) { 416 // For other empty lines, if we do have a decoration, adapt it to not 417 // contain a trailing whitespace. 418 Prefix = Prefix.substr(0, 1); 419 } 420 } else { 421 if (StartOfLineColumn[LineIndex] == 1) { 422 // This line starts immediately after the decorating *. 423 Prefix = Prefix.substr(0, 1); 424 } 425 } 426 427 unsigned WhitespaceOffsetInToken = Lines[LineIndex].data() - 428 Tok.TokenText.data() - 429 LeadingWhitespace[LineIndex]; 430 Whitespaces.replaceWhitespaceInToken( 431 Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix, 432 InPPDirective, 1, IndentLevel, 433 StartOfLineColumn[LineIndex] - Prefix.size()); 434 } 435 436 unsigned 437 BreakableBlockComment::getContentStartColumn(unsigned LineIndex, 438 unsigned TailOffset) const { 439 // If we break, we always break at the predefined indent. 440 if (TailOffset != 0) 441 return IndentAtLineBreak; 442 return std::max(0, StartOfLineColumn[LineIndex]); 443 } 444 445 } // namespace format 446 } // namespace clang 447