1 //===--- BreakableToken.cpp - Format C++ code -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Contains implementation of BreakableToken class and classes derived 12 /// from it. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "format-token-breaker" 17 18 #include "BreakableToken.h" 19 #include "clang/Basic/CharInfo.h" 20 #include "clang/Format/Format.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/Support/Debug.h" 23 #include <algorithm> 24 25 namespace clang { 26 namespace format { 27 28 static const char *const Blanks = " \t\v\f"; 29 static bool IsBlank(char C) { 30 switch (C) { 31 case ' ': 32 case '\t': 33 case '\v': 34 case '\f': 35 return true; 36 default: 37 return false; 38 } 39 } 40 41 static BreakableToken::Split getCommentSplit(StringRef Text, 42 unsigned ContentStartColumn, 43 unsigned ColumnLimit, 44 unsigned TabWidth, 45 encoding::Encoding Encoding) { 46 if (ColumnLimit <= ContentStartColumn + 1) 47 return BreakableToken::Split(StringRef::npos, 0); 48 49 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1; 50 unsigned MaxSplitBytes = 0; 51 52 for (unsigned NumChars = 0; 53 NumChars < MaxSplit && MaxSplitBytes < Text.size();) { 54 unsigned BytesInChar = 55 encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding); 56 NumChars += 57 encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar), 58 ContentStartColumn, TabWidth, Encoding); 59 MaxSplitBytes += BytesInChar; 60 } 61 62 StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); 63 if (SpaceOffset == StringRef::npos || 64 // Don't break at leading whitespace. 65 Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) { 66 // Make sure that we don't break at leading whitespace that 67 // reaches past MaxSplit. 68 StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks); 69 if (FirstNonWhitespace == StringRef::npos) 70 // If the comment is only whitespace, we cannot split. 71 return BreakableToken::Split(StringRef::npos, 0); 72 SpaceOffset = Text.find_first_of( 73 Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace)); 74 } 75 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) { 76 StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks); 77 StringRef AfterCut = Text.substr(SpaceOffset).ltrim(Blanks); 78 return BreakableToken::Split(BeforeCut.size(), 79 AfterCut.begin() - BeforeCut.end()); 80 } 81 return BreakableToken::Split(StringRef::npos, 0); 82 } 83 84 static BreakableToken::Split getStringSplit(StringRef Text, 85 unsigned UsedColumns, 86 unsigned ColumnLimit, 87 unsigned TabWidth, 88 encoding::Encoding Encoding) { 89 // FIXME: Reduce unit test case. 90 if (Text.empty()) 91 return BreakableToken::Split(StringRef::npos, 0); 92 if (ColumnLimit <= UsedColumns) 93 return BreakableToken::Split(StringRef::npos, 0); 94 unsigned MaxSplit = std::min<unsigned>( 95 ColumnLimit - UsedColumns, 96 encoding::columnWidthWithTabs(Text, UsedColumns, TabWidth, Encoding) - 1); 97 StringRef::size_type SpaceOffset = 0; 98 StringRef::size_type SlashOffset = 0; 99 StringRef::size_type WordStartOffset = 0; 100 StringRef::size_type SplitPoint = 0; 101 for (unsigned Chars = 0;;) { 102 unsigned Advance; 103 if (Text[0] == '\\') { 104 Advance = encoding::getEscapeSequenceLength(Text); 105 Chars += Advance; 106 } else { 107 Advance = encoding::getCodePointNumBytes(Text[0], Encoding); 108 Chars += encoding::columnWidthWithTabs( 109 Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding); 110 } 111 112 if (Chars > MaxSplit) 113 break; 114 115 if (IsBlank(Text[0])) 116 SpaceOffset = SplitPoint; 117 if (Text[0] == '/') 118 SlashOffset = SplitPoint; 119 if (Advance == 1 && !isAlphanumeric(Text[0])) 120 WordStartOffset = SplitPoint; 121 122 SplitPoint += Advance; 123 Text = Text.substr(Advance); 124 } 125 126 if (SpaceOffset != 0) 127 return BreakableToken::Split(SpaceOffset + 1, 0); 128 if (SlashOffset != 0) 129 return BreakableToken::Split(SlashOffset + 1, 0); 130 if (WordStartOffset != 0) 131 return BreakableToken::Split(WordStartOffset + 1, 0); 132 if (SplitPoint != 0) 133 return BreakableToken::Split(SplitPoint, 0); 134 return BreakableToken::Split(StringRef::npos, 0); 135 } 136 137 unsigned BreakableSingleLineToken::getLineCount() const { return 1; } 138 139 unsigned BreakableSingleLineToken::getLineLengthAfterSplit( 140 unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { 141 return StartColumn + Prefix.size() + Postfix.size() + 142 encoding::columnWidthWithTabs(Line.substr(Offset, Length), 143 StartColumn + Prefix.size(), 144 Style.TabWidth, Encoding); 145 } 146 147 BreakableSingleLineToken::BreakableSingleLineToken( 148 const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn, 149 StringRef Prefix, StringRef Postfix, bool InPPDirective, 150 encoding::Encoding Encoding, const FormatStyle &Style) 151 : BreakableToken(Tok, IndentLevel, InPPDirective, Encoding, Style), 152 StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix) { 153 assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix)); 154 Line = Tok.TokenText.substr( 155 Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); 156 } 157 158 BreakableStringLiteral::BreakableStringLiteral( 159 const FormatToken &Tok, unsigned IndentLevel, unsigned StartColumn, 160 StringRef Prefix, StringRef Postfix, bool InPPDirective, 161 encoding::Encoding Encoding, const FormatStyle &Style) 162 : BreakableSingleLineToken(Tok, IndentLevel, StartColumn, Prefix, Postfix, 163 InPPDirective, Encoding, Style) {} 164 165 BreakableToken::Split 166 BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, 167 unsigned ColumnLimit) const { 168 return getStringSplit(Line.substr(TailOffset), 169 StartColumn + Prefix.size() + Postfix.size(), 170 ColumnLimit, Style.TabWidth, Encoding); 171 } 172 173 void BreakableStringLiteral::insertBreak(unsigned LineIndex, 174 unsigned TailOffset, Split Split, 175 WhitespaceManager &Whitespaces) { 176 Whitespaces.replaceWhitespaceInToken( 177 Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, 178 Prefix, InPPDirective, 1, IndentLevel, StartColumn); 179 } 180 181 static StringRef getLineCommentPrefix(StringRef Comment) { 182 static const char *const KnownPrefixes[] = { "/// ", "///", "// ", "//" }; 183 for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i) 184 if (Comment.startswith(KnownPrefixes[i])) 185 return KnownPrefixes[i]; 186 return ""; 187 } 188 189 BreakableLineComment::BreakableLineComment( 190 const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, 191 bool InPPDirective, encoding::Encoding Encoding, const FormatStyle &Style) 192 : BreakableSingleLineToken(Token, IndentLevel, StartColumn, 193 getLineCommentPrefix(Token.TokenText), "", 194 InPPDirective, Encoding, Style) { 195 OriginalPrefix = Prefix; 196 if (Token.TokenText.size() > Prefix.size() && 197 isAlphanumeric(Token.TokenText[Prefix.size()])) { 198 if (Prefix == "//") 199 Prefix = "// "; 200 else if (Prefix == "///") 201 Prefix = "/// "; 202 } 203 } 204 205 BreakableToken::Split 206 BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset, 207 unsigned ColumnLimit) const { 208 return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(), 209 ColumnLimit, Style.TabWidth, Encoding); 210 } 211 212 void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset, 213 Split Split, 214 WhitespaceManager &Whitespaces) { 215 Whitespaces.replaceWhitespaceInToken( 216 Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, 217 Postfix, Prefix, InPPDirective, 1, IndentLevel, StartColumn); 218 } 219 220 void 221 BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex, 222 WhitespaceManager &Whitespaces) { 223 if (OriginalPrefix != Prefix) { 224 Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "", 225 false, 0, /*IndentLevel=*/0, 1); 226 } 227 } 228 229 BreakableBlockComment::BreakableBlockComment( 230 const FormatToken &Token, unsigned IndentLevel, unsigned StartColumn, 231 unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, 232 encoding::Encoding Encoding, const FormatStyle &Style) 233 : BreakableToken(Token, IndentLevel, InPPDirective, Encoding, Style) { 234 StringRef TokenText(Token.TokenText); 235 assert(TokenText.startswith("/*") && TokenText.endswith("*/")); 236 TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); 237 238 int IndentDelta = StartColumn - OriginalStartColumn; 239 LeadingWhitespace.resize(Lines.size()); 240 StartOfLineColumn.resize(Lines.size()); 241 StartOfLineColumn[0] = StartColumn + 2; 242 for (size_t i = 1; i < Lines.size(); ++i) 243 adjustWhitespace(i, IndentDelta); 244 245 Decoration = "* "; 246 if (Lines.size() == 1 && !FirstInLine) { 247 // Comments for which FirstInLine is false can start on arbitrary column, 248 // and available horizontal space can be too small to align consecutive 249 // lines with the first one. 250 // FIXME: We could, probably, align them to current indentation level, but 251 // now we just wrap them without stars. 252 Decoration = ""; 253 } 254 for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) { 255 // If the last line is empty, the closing "*/" will have a star. 256 if (i + 1 == e && Lines[i].empty()) 257 break; 258 while (!Lines[i].startswith(Decoration)) 259 Decoration = Decoration.substr(0, Decoration.size() - 1); 260 } 261 262 LastLineNeedsDecoration = true; 263 IndentAtLineBreak = StartOfLineColumn[0] + 1; 264 for (size_t i = 1; i < Lines.size(); ++i) { 265 if (Lines[i].empty()) { 266 if (i + 1 == Lines.size()) { 267 // Empty last line means that we already have a star as a part of the 268 // trailing */. We also need to preserve whitespace, so that */ is 269 // correctly indented. 270 LastLineNeedsDecoration = false; 271 } else if (Decoration.empty()) { 272 // For all other lines, set the start column to 0 if they're empty, so 273 // we do not insert trailing whitespace anywhere. 274 StartOfLineColumn[i] = 0; 275 } 276 continue; 277 } 278 // The first line already excludes the star. 279 // For all other lines, adjust the line to exclude the star and 280 // (optionally) the first whitespace. 281 StartOfLineColumn[i] += Decoration.size(); 282 Lines[i] = Lines[i].substr(Decoration.size()); 283 LeadingWhitespace[i] += Decoration.size(); 284 IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]); 285 } 286 IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size()); 287 DEBUG({ 288 llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n"; 289 for (size_t i = 0; i < Lines.size(); ++i) { 290 llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i] 291 << "\n"; 292 } 293 }); 294 } 295 296 void BreakableBlockComment::adjustWhitespace(unsigned LineIndex, 297 int IndentDelta) { 298 // When in a preprocessor directive, the trailing backslash in a block comment 299 // is not needed, but can serve a purpose of uniformity with necessary escaped 300 // newlines outside the comment. In this case we remove it here before 301 // trimming the trailing whitespace. The backslash will be re-added later when 302 // inserting a line break. 303 size_t EndOfPreviousLine = Lines[LineIndex - 1].size(); 304 if (InPPDirective && Lines[LineIndex - 1].endswith("\\")) 305 --EndOfPreviousLine; 306 307 // Calculate the end of the non-whitespace text in the previous line. 308 EndOfPreviousLine = 309 Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine); 310 if (EndOfPreviousLine == StringRef::npos) 311 EndOfPreviousLine = 0; 312 else 313 ++EndOfPreviousLine; 314 // Calculate the start of the non-whitespace text in the current line. 315 size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks); 316 if (StartOfLine == StringRef::npos) 317 StartOfLine = Lines[LineIndex].size(); 318 319 StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine); 320 // Adjust Lines to only contain relevant text. 321 Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine); 322 Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine); 323 // Adjust LeadingWhitespace to account all whitespace between the lines 324 // to the current line. 325 LeadingWhitespace[LineIndex] = 326 Lines[LineIndex].begin() - Lines[LineIndex - 1].end(); 327 328 // Adjust the start column uniformly accross all lines. 329 StartOfLineColumn[LineIndex] = std::max<int>( 330 0, 331 encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) + 332 IndentDelta); 333 } 334 335 unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); } 336 337 unsigned BreakableBlockComment::getLineLengthAfterSplit( 338 unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { 339 unsigned ContentStartColumn = getContentStartColumn(LineIndex, Offset); 340 return ContentStartColumn + 341 encoding::columnWidthWithTabs(Lines[LineIndex].substr(Offset, Length), 342 ContentStartColumn, Style.TabWidth, 343 Encoding) + 344 // The last line gets a "*/" postfix. 345 (LineIndex + 1 == Lines.size() ? 2 : 0); 346 } 347 348 BreakableToken::Split 349 BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset, 350 unsigned ColumnLimit) const { 351 return getCommentSplit(Lines[LineIndex].substr(TailOffset), 352 getContentStartColumn(LineIndex, TailOffset), 353 ColumnLimit, Style.TabWidth, Encoding); 354 } 355 356 void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, 357 Split Split, 358 WhitespaceManager &Whitespaces) { 359 StringRef Text = Lines[LineIndex].substr(TailOffset); 360 StringRef Prefix = Decoration; 361 if (LineIndex + 1 == Lines.size() && 362 Text.size() == Split.first + Split.second) { 363 // For the last line we need to break before "*/", but not to add "* ". 364 Prefix = ""; 365 } 366 367 unsigned BreakOffsetInToken = 368 Text.data() - Tok.TokenText.data() + Split.first; 369 unsigned CharsToRemove = Split.second; 370 assert(IndentAtLineBreak >= Decoration.size()); 371 Whitespaces.replaceWhitespaceInToken( 372 Tok, BreakOffsetInToken, CharsToRemove, "", Prefix, InPPDirective, 1, 373 IndentLevel, IndentAtLineBreak - Decoration.size()); 374 } 375 376 void 377 BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex, 378 WhitespaceManager &Whitespaces) { 379 if (LineIndex == 0) 380 return; 381 StringRef Prefix = Decoration; 382 if (Lines[LineIndex].empty()) { 383 if (LineIndex + 1 == Lines.size()) { 384 if (!LastLineNeedsDecoration) { 385 // If the last line was empty, we don't need a prefix, as the */ will 386 // line up with the decoration (if it exists). 387 Prefix = ""; 388 } 389 } else if (!Decoration.empty()) { 390 // For other empty lines, if we do have a decoration, adapt it to not 391 // contain a trailing whitespace. 392 Prefix = Prefix.substr(0, 1); 393 } 394 } else { 395 if (StartOfLineColumn[LineIndex] == 1) { 396 // This line starts immediately after the decorating *. 397 Prefix = Prefix.substr(0, 1); 398 } 399 } 400 401 unsigned WhitespaceOffsetInToken = Lines[LineIndex].data() - 402 Tok.TokenText.data() - 403 LeadingWhitespace[LineIndex]; 404 assert(StartOfLineColumn[LineIndex] >= Prefix.size()); 405 Whitespaces.replaceWhitespaceInToken( 406 Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix, 407 InPPDirective, 1, IndentLevel, 408 StartOfLineColumn[LineIndex] - Prefix.size()); 409 } 410 411 unsigned 412 BreakableBlockComment::getContentStartColumn(unsigned LineIndex, 413 unsigned TailOffset) const { 414 // If we break, we always break at the predefined indent. 415 if (TailOffset != 0) 416 return IndentAtLineBreak; 417 return StartOfLineColumn[LineIndex]; 418 } 419 420 } // namespace format 421 } // namespace clang 422