1 //===--- BreakableToken.cpp - Format C++ code -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Contains implementation of BreakableToken class and classes derived 12 /// from it. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "format-token-breaker" 17 18 #include "BreakableToken.h" 19 #include "clang/Basic/CharInfo.h" 20 #include "clang/Format/Format.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/Support/Debug.h" 23 #include <algorithm> 24 25 namespace clang { 26 namespace format { 27 28 static const char *const Blanks = " \t\v\f\r"; 29 static bool IsBlank(char C) { 30 switch (C) { 31 case ' ': 32 case '\t': 33 case '\v': 34 case '\f': 35 case '\r': 36 return true; 37 default: 38 return false; 39 } 40 } 41 42 static BreakableToken::Split getCommentSplit(StringRef Text, 43 unsigned ContentStartColumn, 44 unsigned ColumnLimit, 45 unsigned TabWidth, 46 encoding::Encoding Encoding) { 47 if (ColumnLimit <= ContentStartColumn + 1) 48 return BreakableToken::Split(StringRef::npos, 0); 49 50 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1; 51 unsigned MaxSplitBytes = 0; 52 53 for (unsigned NumChars = 0; 54 NumChars < MaxSplit && MaxSplitBytes < Text.size();) { 55 unsigned BytesInChar = 56 encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding); 57 NumChars += 58 encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar), 59 ContentStartColumn, TabWidth, Encoding); 60 MaxSplitBytes += BytesInChar; 61 } 62 63 StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); 64 if (SpaceOffset == StringRef::npos || 65 // Don't break at leading whitespace. 66 Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) { 67 // Make sure that we don't break at leading whitespace that 68 // reaches past MaxSplit. 69 StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks); 70 if (FirstNonWhitespace == StringRef::npos) 71 // If the comment is only whitespace, we cannot split. 72 return BreakableToken::Split(StringRef::npos, 0); 73 SpaceOffset = Text.find_first_of( 74 Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace)); 75 } 76 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) { 77 StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks); 78 StringRef AfterCut = Text.substr(SpaceOffset).ltrim(Blanks); 79 return BreakableToken::Split(BeforeCut.size(), 80 AfterCut.begin() - BeforeCut.end()); 81 } 82 return BreakableToken::Split(StringRef::npos, 0); 83 } 84 85 static BreakableToken::Split getStringSplit(StringRef Text, 86 unsigned UsedColumns, 87 unsigned ColumnLimit, 88 unsigned TabWidth, 89 encoding::Encoding Encoding) { 90 // FIXME: Reduce unit test case. 91 if (Text.empty()) 92 return BreakableToken::Split(StringRef::npos, 0); 93 if (ColumnLimit <= UsedColumns) 94 return BreakableToken::Split(StringRef::npos, 0); 95 unsigned MaxSplit = ColumnLimit - UsedColumns; 96 StringRef::size_type SpaceOffset = 0; 97 StringRef::size_type SlashOffset = 0; 98 StringRef::size_type WordStartOffset = 0; 99 StringRef::size_type SplitPoint = 0; 100 for (unsigned Chars = 0;;) { 101 unsigned Advance; 102 if (Text[0] == '\\') { 103 Advance = encoding::getEscapeSequenceLength(Text); 104 Chars += Advance; 105 } else { 106 Advance = encoding::getCodePointNumBytes(Text[0], Encoding); 107 Chars += encoding::columnWidthWithTabs( 108 Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding); 109 } 110 111 if (Chars > MaxSplit || Text.size() == Advance) 112 break; 113 114 if (IsBlank(Text[0])) 115 SpaceOffset = SplitPoint; 116 if (Text[0] == '/') 117 SlashOffset = SplitPoint; 118 if (Advance == 1 && !isAlphanumeric(Text[0])) 119 WordStartOffset = SplitPoint; 120 121 SplitPoint += Advance; 122 Text = Text.substr(Advance); 123 } 124 125 if (SpaceOffset != 0) 126 return BreakableToken::Split(SpaceOffset + 1, 0); 127 if (SlashOffset != 0) 128 return BreakableToken::Split(SlashOffset + 1, 0); 129 if (WordStartOffset != 0) 130 return BreakableToken::Split(WordStartOffset + 1, 0); 131 if (SplitPoint != 0) 132 return BreakableToken::Split(SplitPoint, 0); 133 return BreakableToken::Split(StringRef::npos, 0); 134 } 135 136 unsigned BreakableSingleLineToken::getLineCount() const { return 1; } 137 138 unsigned BreakableSingleLineToken::getLineLengthAfterSplit( 139 unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { 140 return StartColumn + Prefix.size() + Postfix.size() + 141 encoding::columnWidthWithTabs(Line.substr(Offset, Length), 142 StartColumn + Prefix.size(), 143 Style.TabWidth, Encoding); 144 } 145 146 BreakableSingleLineToken::BreakableSingleLineToken( 147 const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn, 148 StringRef Prefix, StringRef Postfix, bool InPPDirective, 149 encoding::Encoding Encoding, const FormatStyle &Style) 150 : BreakableToken(Tok, IndentLevel, InPPDirective, Encoding, Style), 151 StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) { 152 assert(Tok.TokenText.endswith(Postfix)); 153 Line = Tok.TokenText.substr( 154 Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); 155 } 156 157 BreakableStringLiteral::BreakableStringLiteral( 158 const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn, 159 StringRef Prefix, StringRef Postfix, bool InPPDirective, 160 encoding::Encoding Encoding, const FormatStyle &Style) 161 : BreakableSingleLineToken(Tok, IndentLevel, StartColumn, Prefix, Postfix, 162 InPPDirective, Encoding, Style) {} 163 164 BreakableToken::Split 165 BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, 166 unsigned ColumnLimit) const { 167 return getStringSplit(Line.substr(TailOffset), 168 StartColumn + Prefix.size() + Postfix.size(), 169 ColumnLimit, Style.TabWidth, Encoding); 170 } 171 172 void BreakableStringLiteral::insertBreak(unsigned LineIndex, 173 unsigned TailOffset, Split Split, 174 WhitespaceManager &Whitespaces) { 175 unsigned LeadingSpaces = StartColumn; 176 // The '@' of an ObjC string literal (@"Test") does not become part of the 177 // string token. 178 // FIXME: It might be a cleaner solution to merge the tokens as a 179 // precomputation step. 180 if (Prefix.startswith("@")) 181 --LeadingSpaces; 182 Whitespaces.replaceWhitespaceInToken( 183 Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, 184 Prefix, InPPDirective, 1, IndentLevel, LeadingSpaces); 185 } 186 187 static StringRef getLineCommentPrefix(StringRef Comment) { 188 static const char *const KnownPrefixes[] = { "/// ", "///", "// ", "//" }; 189 for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i) 190 if (Comment.startswith(KnownPrefixes[i])) 191 return KnownPrefixes[i]; 192 return ""; 193 } 194 195 BreakableLineComment::BreakableLineComment( 196 const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, 197 bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) 198 : BreakableSingleLineToken(Token, IndentLevel, StartColumn, 199 getLineCommentPrefix(Token.TokenText), "", 200 InPPDirective, Encoding, Style) { 201 OriginalPrefix = Prefix; 202 if (Token.TokenText.size() > Prefix.size() && 203 isAlphanumeric(Token.TokenText[Prefix.size()])) { 204 if (Prefix == "//") 205 Prefix = "// "; 206 else if (Prefix == "///") 207 Prefix = "/// "; 208 } 209 } 210 211 BreakableToken::Split 212 BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset, 213 unsigned ColumnLimit) const { 214 return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(), 215 ColumnLimit, Style.TabWidth, Encoding); 216 } 217 218 void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset, 219 Split Split, 220 WhitespaceManager &Whitespaces) { 221 Whitespaces.replaceWhitespaceInToken( 222 Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, 223 Postfix, Prefix, InPPDirective, /*Newlines=*/1, IndentLevel, StartColumn); 224 } 225 226 void BreakableLineComment::replaceWhitespace(unsigned LineIndex, 227 unsigned TailOffset, Split Split, 228 WhitespaceManager &Whitespaces) { 229 Whitespaces.replaceWhitespaceInToken( 230 Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, "", 231 "", /*InPPDirective=*/false, /*Newlines=*/0, /*IndentLevel=*/0, 232 /*Spaces=*/1); 233 } 234 235 void 236 BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex, 237 WhitespaceManager &Whitespaces) { 238 if (OriginalPrefix != Prefix) { 239 Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "", 240 /*InPPDirective=*/false, 241 /*Newlines=*/0, /*IndentLevel=*/0, 242 /*Spaces=*/1); 243 } 244 } 245 246 BreakableBlockComment::BreakableBlockComment( 247 const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, 248 unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, 249 encoding::Encoding Encoding, const FormatStyle &Style) 250 : BreakableToken(Token, IndentLevel, InPPDirective, Encoding, Style) { 251 StringRef TokenText(Token.TokenText); 252 assert(TokenText.startswith("/*") && TokenText.endswith("*/")); 253 TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); 254 255 int IndentDelta = StartColumn - OriginalStartColumn; 256 LeadingWhitespace.resize(Lines.size()); 257 StartOfLineColumn.resize(Lines.size()); 258 StartOfLineColumn[0] = StartColumn + 2; 259 for (size_t i = 1; i < Lines.size(); ++i) 260 adjustWhitespace(i, IndentDelta); 261 262 Decoration = "* "; 263 if (Lines.size() == 1 && !FirstInLine) { 264 // Comments for which FirstInLine is false can start on arbitrary column, 265 // and available horizontal space can be too small to align consecutive 266 // lines with the first one. 267 // FIXME: We could, probably, align them to current indentation level, but 268 // now we just wrap them without stars. 269 Decoration = ""; 270 } 271 for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) { 272 // If the last line is empty, the closing "*/" will have a star. 273 if (i + 1 == e && Lines[i].empty()) 274 break; 275 while (!Lines[i].startswith(Decoration)) 276 Decoration = Decoration.substr(0, Decoration.size() - 1); 277 } 278 279 LastLineNeedsDecoration = true; 280 IndentAtLineBreak = StartOfLineColumn[0] + 1; 281 for (size_t i = 1; i < Lines.size(); ++i) { 282 if (Lines[i].empty()) { 283 if (i + 1 == Lines.size()) { 284 // Empty last line means that we already have a star as a part of the 285 // trailing */. We also need to preserve whitespace, so that */ is 286 // correctly indented. 287 LastLineNeedsDecoration = false; 288 } else if (Decoration.empty()) { 289 // For all other lines, set the start column to 0 if they're empty, so 290 // we do not insert trailing whitespace anywhere. 291 StartOfLineColumn[i] = 0; 292 } 293 continue; 294 } 295 // The first line already excludes the star. 296 // For all other lines, adjust the line to exclude the star and 297 // (optionally) the first whitespace. 298 StartOfLineColumn[i] += Decoration.size(); 299 Lines[i] = Lines[i].substr(Decoration.size()); 300 LeadingWhitespace[i] += Decoration.size(); 301 IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]); 302 } 303 IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size()); 304 DEBUG({ 305 llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n"; 306 for (size_t i = 0; i < Lines.size(); ++i) { 307 llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i] 308 << "\n"; 309 } 310 }); 311 } 312 313 void BreakableBlockComment::adjustWhitespace(unsigned LineIndex, 314 int IndentDelta) { 315 // When in a preprocessor directive, the trailing backslash in a block comment 316 // is not needed, but can serve a purpose of uniformity with necessary escaped 317 // newlines outside the comment. In this case we remove it here before 318 // trimming the trailing whitespace. The backslash will be re-added later when 319 // inserting a line break. 320 size_t EndOfPreviousLine = Lines[LineIndex - 1].size(); 321 if (InPPDirective && Lines[LineIndex - 1].endswith("\\")) 322 --EndOfPreviousLine; 323 324 // Calculate the end of the non-whitespace text in the previous line. 325 EndOfPreviousLine = 326 Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine); 327 if (EndOfPreviousLine == StringRef::npos) 328 EndOfPreviousLine = 0; 329 else 330 ++EndOfPreviousLine; 331 // Calculate the start of the non-whitespace text in the current line. 332 size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks); 333 if (StartOfLine == StringRef::npos) 334 StartOfLine = Lines[LineIndex].size(); 335 336 StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine); 337 // Adjust Lines to only contain relevant text. 338 Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine); 339 Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine); 340 // Adjust LeadingWhitespace to account all whitespace between the lines 341 // to the current line. 342 LeadingWhitespace[LineIndex] = 343 Lines[LineIndex].begin() - Lines[LineIndex - 1].end(); 344 345 // Adjust the start column uniformly across all lines. 346 StartOfLineColumn[LineIndex] = std::max<int>( 347 0, 348 encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) + 349 IndentDelta); 350 } 351 352 unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); } 353 354 unsigned BreakableBlockComment::getLineLengthAfterSplit( 355 unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { 356 unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset); 357 return ContentStartColumn + 358 encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length), 359 ContentStartColumn, Style.TabWidth, 360 Encoding) + 361 // The last line gets a "*/" postfix. 362 (LineIndex + 1 == Lines.size() ? 2 : 0); 363 } 364 365 BreakableToken::Split 366 BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset, 367 unsigned ColumnLimit) const { 368 return getCommentSplit(Lines[LineIndex].substr(TailOffset), 369 getContentStartColumn(LineIndex, TailOffset), 370 ColumnLimit, Style.TabWidth, Encoding); 371 } 372 373 void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, 374 Split Split, 375 WhitespaceManager &Whitespaces) { 376 StringRef Text = Lines[LineIndex].substr(TailOffset); 377 StringRef Prefix = Decoration; 378 if (LineIndex + 1 == Lines.size() && 379 Text.size() == Split.first + Split.second) { 380 // For the last line we need to break before "*/", but not to add "* ". 381 Prefix = ""; 382 } 383 384 unsigned BreakOffsetInToken = 385 Text.data() - Tok.TokenText.data() + Split.first; 386 unsigned CharsToRemove = Split.second; 387 assert(IndentAtLineBreak >= Decoration.size()); 388 Whitespaces.replaceWhitespaceInToken( 389 Tok, BreakOffsetInToken, CharsToRemove, "", Prefix, InPPDirective, 1, 390 IndentLevel, IndentAtLineBreak - Decoration.size()); 391 } 392 393 void BreakableBlockComment::replaceWhitespace(unsigned LineIndex, 394 unsigned TailOffset, Split Split, 395 WhitespaceManager &Whitespaces) { 396 StringRef Text = Lines[LineIndex].substr(TailOffset); 397 unsigned BreakOffsetInToken = 398 Text.data() - Tok.TokenText.data() + Split.first; 399 unsigned CharsToRemove = Split.second; 400 Whitespaces.replaceWhitespaceInToken( 401 Tok, BreakOffsetInToken, CharsToRemove, "", "", /*InPPDirective=*/false, 402 /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1); 403 } 404 405 void 406 BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex, 407 WhitespaceManager &Whitespaces) { 408 if (LineIndex == 0) 409 return; 410 StringRef Prefix = Decoration; 411 if (Lines[LineIndex].empty()) { 412 if (LineIndex + 1 == Lines.size()) { 413 if (!LastLineNeedsDecoration) { 414 // If the last line was empty, we don't need a prefix, as the */ will 415 // line up with the decoration (if it exists). 416 Prefix = ""; 417 } 418 } else if (!Decoration.empty()) { 419 // For other empty lines, if we do have a decoration, adapt it to not 420 // contain a trailing whitespace. 421 Prefix = Prefix.substr(0, 1); 422 } 423 } else { 424 if (StartOfLineColumn[LineIndex] == 1) { 425 // This line starts immediately after the decorating *. 426 Prefix = Prefix.substr(0, 1); 427 } 428 } 429 430 unsigned WhitespaceOffsetInToken = Lines[LineIndex].data() - 431 Tok.TokenText.data() - 432 LeadingWhitespace[LineIndex]; 433 assert(StartOfLineColumn[LineIndex] >= Prefix.size()); 434 Whitespaces.replaceWhitespaceInToken( 435 Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix, 436 InPPDirective, 1, IndentLevel, 437 StartOfLineColumn[LineIndex] - Prefix.size()); 438 } 439 440 unsigned 441 BreakableBlockComment::getContentStartColumn(unsigned LineIndex, 442 unsigned TailOffset) const { 443 // If we break, we always break at the predefined indent. 444 if (TailOffset != 0) 445 return IndentAtLineBreak; 446 return StartOfLineColumn[LineIndex]; 447 } 448 449 } // namespace format 450 } // namespace clang 451