1 //===--- BreakableToken.cpp - Format C++ code -----------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief Contains implementation of BreakableToken class and classes derived 12 /// from it. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "format-token-breaker" 17 18 #include "BreakableToken.h" 19 #include "clang/Basic/CharInfo.h" 20 #include "clang/Format/Format.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/Support/Debug.h" 23 #include <algorithm> 24 25 namespace clang { 26 namespace format { 27 namespace { 28 29 static const char *Blanks = " \t\v\f"; 30 static bool IsBlank(char C) { 31 switch (C) { 32 case ' ': 33 case '\t': 34 case '\v': 35 case '\f': 36 return true; 37 default: 38 return false; 39 } 40 } 41 42 BreakableToken::Split getCommentSplit(StringRef Text, 43 unsigned ContentStartColumn, 44 unsigned ColumnLimit, 45 encoding::Encoding Encoding) { 46 if (ColumnLimit <= ContentStartColumn + 1) 47 return BreakableToken::Split(StringRef::npos, 0); 48 49 unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1; 50 unsigned MaxSplitBytes = 0; 51 52 for (unsigned NumChars = 0; 53 NumChars < MaxSplit && MaxSplitBytes < Text.size(); ++NumChars) 54 MaxSplitBytes += 55 encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding); 56 57 StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes); 58 if (SpaceOffset == StringRef::npos || 59 // Don't break at leading whitespace. 60 Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) { 61 // Make sure that we don't break at leading whitespace that 62 // reaches past MaxSplit. 63 StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks); 64 if (FirstNonWhitespace == StringRef::npos) 65 // If the comment is only whitespace, we cannot split. 66 return BreakableToken::Split(StringRef::npos, 0); 67 SpaceOffset = Text.find_first_of( 68 Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace)); 69 } 70 if (SpaceOffset != StringRef::npos && SpaceOffset != 0) { 71 StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks); 72 StringRef AfterCut = Text.substr(SpaceOffset).ltrim(Blanks); 73 return BreakableToken::Split(BeforeCut.size(), 74 AfterCut.begin() - BeforeCut.end()); 75 } 76 return BreakableToken::Split(StringRef::npos, 0); 77 } 78 79 BreakableToken::Split getStringSplit(StringRef Text, 80 unsigned ContentStartColumn, 81 unsigned ColumnLimit, 82 encoding::Encoding Encoding) { 83 // FIXME: Reduce unit test case. 84 if (Text.empty()) 85 return BreakableToken::Split(StringRef::npos, 0); 86 if (ColumnLimit <= ContentStartColumn) 87 return BreakableToken::Split(StringRef::npos, 0); 88 unsigned MaxSplit = 89 std::min<unsigned>(ColumnLimit - ContentStartColumn, 90 encoding::getCodePointCount(Text, Encoding) - 1); 91 StringRef::size_type SpaceOffset = 0; 92 StringRef::size_type SlashOffset = 0; 93 StringRef::size_type WordStartOffset = 0; 94 StringRef::size_type SplitPoint = 0; 95 for (unsigned Chars = 0;;) { 96 unsigned Advance; 97 if (Text[0] == '\\') { 98 Advance = encoding::getEscapeSequenceLength(Text); 99 Chars += Advance; 100 } else { 101 Advance = encoding::getCodePointNumBytes(Text[0], Encoding); 102 Chars += 1; 103 } 104 105 if (Chars > MaxSplit) 106 break; 107 108 if (IsBlank(Text[0])) 109 SpaceOffset = SplitPoint; 110 if (Text[0] == '/') 111 SlashOffset = SplitPoint; 112 if (Advance == 1 && !isAlphanumeric(Text[0])) 113 WordStartOffset = SplitPoint; 114 115 SplitPoint += Advance; 116 Text = Text.substr(Advance); 117 } 118 119 if (SpaceOffset != 0) 120 return BreakableToken::Split(SpaceOffset + 1, 0); 121 if (SlashOffset != 0) 122 return BreakableToken::Split(SlashOffset + 1, 0); 123 if (WordStartOffset != 0) 124 return BreakableToken::Split(WordStartOffset + 1, 0); 125 if (SplitPoint != 0) 126 return BreakableToken::Split(SplitPoint, 0); 127 return BreakableToken::Split(StringRef::npos, 0); 128 } 129 130 } // namespace 131 132 unsigned BreakableSingleLineToken::getLineCount() const { return 1; } 133 134 unsigned BreakableSingleLineToken::getLineLengthAfterSplit( 135 unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { 136 return StartColumn + Prefix.size() + Postfix.size() + 137 encoding::getCodePointCount(Line.substr(Offset, Length), Encoding); 138 } 139 140 BreakableSingleLineToken::BreakableSingleLineToken( 141 const FormatToken &Tok, unsigned StartColumn, StringRef Prefix, 142 StringRef Postfix, bool InPPDirective, encoding::Encoding Encoding) 143 : BreakableToken(Tok, InPPDirective, Encoding), StartColumn(StartColumn), 144 Prefix(Prefix), Postfix(Postfix) { 145 assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix)); 146 Line = Tok.TokenText.substr( 147 Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size()); 148 } 149 150 BreakableStringLiteral::BreakableStringLiteral(const FormatToken &Tok, 151 unsigned StartColumn, 152 bool InPPDirective, 153 encoding::Encoding Encoding) 154 : BreakableSingleLineToken(Tok, StartColumn, "\"", "\"", InPPDirective, 155 Encoding) {} 156 157 BreakableToken::Split 158 BreakableStringLiteral::getSplit(unsigned LineIndex, unsigned TailOffset, 159 unsigned ColumnLimit) const { 160 return getStringSplit(Line.substr(TailOffset), StartColumn + 2, ColumnLimit, 161 Encoding); 162 } 163 164 void BreakableStringLiteral::insertBreak(unsigned LineIndex, 165 unsigned TailOffset, Split Split, 166 WhitespaceManager &Whitespaces) { 167 Whitespaces.replaceWhitespaceInToken( 168 Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix, 169 Prefix, InPPDirective, 1, StartColumn); 170 } 171 172 static StringRef getLineCommentPrefix(StringRef Comment) { 173 const char *KnownPrefixes[] = { "/// ", "///", "// ", "//" }; 174 for (size_t i = 0, e = llvm::array_lengthof(KnownPrefixes); i != e; ++i) 175 if (Comment.startswith(KnownPrefixes[i])) 176 return KnownPrefixes[i]; 177 return ""; 178 } 179 180 BreakableLineComment::BreakableLineComment(const FormatToken &Token, 181 unsigned StartColumn, 182 bool InPPDirective, 183 encoding::Encoding Encoding) 184 : BreakableSingleLineToken(Token, StartColumn, 185 getLineCommentPrefix(Token.TokenText), "", 186 InPPDirective, Encoding) { 187 OriginalPrefix = Prefix; 188 if (Token.TokenText.size() > Prefix.size() && 189 isAlphanumeric(Token.TokenText[Prefix.size()])) { 190 if (Prefix == "//") 191 Prefix = "// "; 192 else if (Prefix == "///") 193 Prefix = "/// "; 194 } 195 } 196 197 BreakableToken::Split 198 BreakableLineComment::getSplit(unsigned LineIndex, unsigned TailOffset, 199 unsigned ColumnLimit) const { 200 return getCommentSplit(Line.substr(TailOffset), StartColumn + Prefix.size(), 201 ColumnLimit, Encoding); 202 } 203 204 void BreakableLineComment::insertBreak(unsigned LineIndex, unsigned TailOffset, 205 Split Split, 206 WhitespaceManager &Whitespaces) { 207 Whitespaces.replaceWhitespaceInToken( 208 Tok, OriginalPrefix.size() + TailOffset + Split.first, Split.second, 209 Postfix, Prefix, InPPDirective, 1, StartColumn); 210 } 211 212 void 213 BreakableLineComment::replaceWhitespaceBefore(unsigned LineIndex, 214 WhitespaceManager &Whitespaces) { 215 if (OriginalPrefix != Prefix) { 216 Whitespaces.replaceWhitespaceInToken(Tok, OriginalPrefix.size(), 0, "", "", 217 false, 0, 1); 218 } 219 } 220 221 BreakableBlockComment::BreakableBlockComment( 222 const FormatStyle &Style, const FormatToken &Token, unsigned StartColumn, 223 unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective, 224 encoding::Encoding Encoding) 225 : BreakableToken(Token, InPPDirective, Encoding) { 226 StringRef TokenText(Token.TokenText); 227 assert(TokenText.startswith("/*") && TokenText.endswith("*/")); 228 TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n"); 229 230 int IndentDelta = StartColumn - OriginalStartColumn; 231 bool NeedsStar = true; 232 LeadingWhitespace.resize(Lines.size()); 233 StartOfLineColumn.resize(Lines.size()); 234 if (Lines.size() == 1 && !FirstInLine) { 235 // Comments for which FirstInLine is false can start on arbitrary column, 236 // and available horizontal space can be too small to align consecutive 237 // lines with the first one. 238 // FIXME: We could, probably, align them to current indentation level, but 239 // now we just wrap them without stars. 240 NeedsStar = false; 241 } 242 StartOfLineColumn[0] = StartColumn + 2; 243 for (size_t i = 1; i < Lines.size(); ++i) { 244 adjustWhitespace(Style, i, IndentDelta); 245 if (Lines[i].empty()) 246 // If the last line is empty, the closing "*/" will have a star. 247 NeedsStar = NeedsStar && i + 1 == Lines.size(); 248 else 249 NeedsStar = NeedsStar && Lines[i][0] == '*'; 250 } 251 Decoration = NeedsStar ? "* " : ""; 252 IndentAtLineBreak = StartOfLineColumn[0] + 1; 253 for (size_t i = 1; i < Lines.size(); ++i) { 254 if (Lines[i].empty()) { 255 if (!NeedsStar && i + 1 != Lines.size()) 256 // For all but the last line (which always ends in */), set the 257 // start column to 0 if they're empty, so we do not insert 258 // trailing whitespace anywhere. 259 StartOfLineColumn[i] = 0; 260 continue; 261 } 262 if (NeedsStar) { 263 // The first line already excludes the star. 264 // For all other lines, adjust the line to exclude the star and 265 // (optionally) the first whitespace. 266 int Offset = Lines[i].startswith("* ") ? 2 : 1; 267 StartOfLineColumn[i] += Offset; 268 Lines[i] = Lines[i].substr(Offset); 269 LeadingWhitespace[i] += Offset; 270 } 271 IndentAtLineBreak = std::min<int>(IndentAtLineBreak, StartOfLineColumn[i]); 272 } 273 IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size()); 274 DEBUG({ 275 for (size_t i = 0; i < Lines.size(); ++i) { 276 llvm::dbgs() << i << " |" << Lines[i] << "| " << LeadingWhitespace[i] 277 << "\n"; 278 } 279 }); 280 } 281 282 void BreakableBlockComment::adjustWhitespace(const FormatStyle &Style, 283 unsigned LineIndex, 284 int IndentDelta) { 285 // When in a preprocessor directive, the trailing backslash in a block comment 286 // is not needed, but can serve a purpose of uniformity with necessary escaped 287 // newlines outside the comment. In this case we remove it here before 288 // trimming the trailing whitespace. The backslash will be re-added later when 289 // inserting a line break. 290 size_t EndOfPreviousLine = Lines[LineIndex - 1].size(); 291 if (InPPDirective && Lines[LineIndex - 1].endswith("\\")) 292 --EndOfPreviousLine; 293 294 // Calculate the end of the non-whitespace text in the previous line. 295 EndOfPreviousLine = 296 Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine); 297 if (EndOfPreviousLine == StringRef::npos) 298 EndOfPreviousLine = 0; 299 else 300 ++EndOfPreviousLine; 301 // Calculate the start of the non-whitespace text in the current line. 302 size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks); 303 if (StartOfLine == StringRef::npos) 304 StartOfLine = Lines[LineIndex].size(); 305 306 // Adjust Lines to only contain relevant text. 307 Lines[LineIndex - 1] = Lines[LineIndex - 1].substr(0, EndOfPreviousLine); 308 Lines[LineIndex] = Lines[LineIndex].substr(StartOfLine); 309 // Adjust LeadingWhitespace to account all whitespace between the lines 310 // to the current line. 311 LeadingWhitespace[LineIndex] = 312 Lines[LineIndex].begin() - Lines[LineIndex - 1].end(); 313 314 // FIXME: We currently count tabs as 1 character. To solve this, we need to 315 // get the correct indentation width of the start of the comment, which 316 // requires correct counting of the tab expansions before the comment, and 317 // a configurable tab width. Since the current implementation only breaks 318 // if leading tabs are intermixed with spaces, that is not a high priority. 319 320 // Adjust the start column uniformly accross all lines. 321 StartOfLineColumn[LineIndex] = std::max<int>(0, StartOfLine + IndentDelta); 322 } 323 324 unsigned BreakableBlockComment::getLineCount() const { return Lines.size(); } 325 326 unsigned BreakableBlockComment::getLineLengthAfterSplit( 327 unsigned LineIndex, unsigned Offset, StringRef::size_type Length) const { 328 return getContentStartColumn(LineIndex, Offset) + 329 encoding::getCodePointCount(Lines[LineIndex].substr(Offset, Length), 330 Encoding) + 331 // The last line gets a "*/" postfix. 332 (LineIndex + 1 == Lines.size() ? 2 : 0); 333 } 334 335 BreakableToken::Split 336 BreakableBlockComment::getSplit(unsigned LineIndex, unsigned TailOffset, 337 unsigned ColumnLimit) const { 338 return getCommentSplit(Lines[LineIndex].substr(TailOffset), 339 getContentStartColumn(LineIndex, TailOffset), 340 ColumnLimit, Encoding); 341 } 342 343 void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset, 344 Split Split, 345 WhitespaceManager &Whitespaces) { 346 StringRef Text = Lines[LineIndex].substr(TailOffset); 347 StringRef Prefix = Decoration; 348 if (LineIndex + 1 == Lines.size() && 349 Text.size() == Split.first + Split.second) { 350 // For the last line we need to break before "*/", but not to add "* ". 351 Prefix = ""; 352 } 353 354 unsigned BreakOffsetInToken = 355 Text.data() - Tok.TokenText.data() + Split.first; 356 unsigned CharsToRemove = Split.second; 357 assert(IndentAtLineBreak >= Decoration.size()); 358 Whitespaces.replaceWhitespaceInToken(Tok, BreakOffsetInToken, CharsToRemove, 359 "", Prefix, InPPDirective, 1, 360 IndentAtLineBreak - Decoration.size()); 361 } 362 363 void 364 BreakableBlockComment::replaceWhitespaceBefore(unsigned LineIndex, 365 WhitespaceManager &Whitespaces) { 366 if (LineIndex == 0) 367 return; 368 StringRef Prefix = Decoration; 369 if (Lines[LineIndex].empty()) { 370 if (LineIndex + 1 == Lines.size()) { 371 // If the last line is empty, we don't need a prefix, as the */ will line 372 // up with the decoration (if it exists). 373 Prefix = ""; 374 } else if (!Decoration.empty()) { 375 // For other empty lines, if we do have a decoration, adapt it to not 376 // contain a trailing whitespace. 377 Prefix = Prefix.substr(0, 1); 378 } 379 } else { 380 if (StartOfLineColumn[LineIndex] == 1) { 381 // This lines starts immediately after the decorating *. 382 Prefix = Prefix.substr(0, 1); 383 } 384 } 385 386 unsigned WhitespaceOffsetInToken = 387 Lines[LineIndex].data() - Tok.TokenText.data() - 388 LeadingWhitespace[LineIndex]; 389 assert(StartOfLineColumn[LineIndex] >= Prefix.size()); 390 Whitespaces.replaceWhitespaceInToken( 391 Tok, WhitespaceOffsetInToken, LeadingWhitespace[LineIndex], "", Prefix, 392 InPPDirective, 1, StartOfLineColumn[LineIndex] - Prefix.size()); 393 } 394 395 unsigned 396 BreakableBlockComment::getContentStartColumn(unsigned LineIndex, 397 unsigned TailOffset) const { 398 // If we break, we always break at the predefined indent. 399 if (TailOffset != 0) 400 return IndentAtLineBreak; 401 return StartOfLineColumn[LineIndex]; 402 } 403 404 } // namespace format 405 } // namespace clang 406