1 //===--- WhitespaceManager.cpp - Format C++ code --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements WhitespaceManager class.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "WhitespaceManager.h"
15 #include "llvm/ADT/STLExtras.h"
16 
17 namespace clang {
18 namespace format {
19 
20 bool WhitespaceManager::Change::IsBeforeInFile::
21 operator()(const Change &C1, const Change &C2) const {
22   return SourceMgr.isBeforeInTranslationUnit(
23       C1.OriginalWhitespaceRange.getBegin(),
24       C2.OriginalWhitespaceRange.getBegin());
25 }
26 
27 WhitespaceManager::Change::Change(const FormatToken &Tok,
28                                   bool CreateReplacement,
29                                   SourceRange OriginalWhitespaceRange,
30                                   int Spaces, unsigned StartOfTokenColumn,
31                                   unsigned NewlinesBefore,
32                                   StringRef PreviousLinePostfix,
33                                   StringRef CurrentLinePrefix,
34                                   bool ContinuesPPDirective, bool IsInsideToken)
35     : Tok(&Tok), CreateReplacement(CreateReplacement),
36       OriginalWhitespaceRange(OriginalWhitespaceRange),
37       StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore),
38       PreviousLinePostfix(PreviousLinePostfix),
39       CurrentLinePrefix(CurrentLinePrefix),
40       ContinuesPPDirective(ContinuesPPDirective), Spaces(Spaces),
41       IsInsideToken(IsInsideToken), IsTrailingComment(false), TokenLength(0),
42       PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0),
43       StartOfBlockComment(nullptr), IndentationOffset(0) {}
44 
45 void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines,
46                                           unsigned Spaces,
47                                           unsigned StartOfTokenColumn,
48                                           bool InPPDirective) {
49   if (Tok.Finalized)
50     return;
51   Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue;
52   Changes.push_back(Change(Tok, /*CreateReplacement=*/true, Tok.WhitespaceRange,
53                            Spaces, StartOfTokenColumn, Newlines, "", "",
54                            InPPDirective && !Tok.IsFirst,
55                            /*IsInsideToken=*/false));
56 }
57 
58 void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
59                                             bool InPPDirective) {
60   if (Tok.Finalized)
61     return;
62   Changes.push_back(Change(Tok, /*CreateReplacement=*/false,
63                            Tok.WhitespaceRange, /*Spaces=*/0,
64                            Tok.OriginalColumn, Tok.NewlinesBefore, "", "",
65                            InPPDirective && !Tok.IsFirst,
66                            /*IsInsideToken=*/false));
67 }
68 
69 llvm::Error
70 WhitespaceManager::addReplacement(const tooling::Replacement &Replacement) {
71   return Replaces.add(Replacement);
72 }
73 
74 void WhitespaceManager::replaceWhitespaceInToken(
75     const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars,
76     StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective,
77     unsigned Newlines, int Spaces) {
78   if (Tok.Finalized)
79     return;
80   SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset);
81   Changes.push_back(
82       Change(Tok, /*CreateReplacement=*/true,
83              SourceRange(Start, Start.getLocWithOffset(ReplaceChars)), Spaces,
84              std::max(0, Spaces), Newlines, PreviousPostfix, CurrentPrefix,
85              InPPDirective && !Tok.IsFirst, /*IsInsideToken=*/true));
86 }
87 
88 const tooling::Replacements &WhitespaceManager::generateReplacements() {
89   if (Changes.empty())
90     return Replaces;
91 
92   llvm::sort(Changes, Change::IsBeforeInFile(SourceMgr));
93   calculateLineBreakInformation();
94   alignConsecutiveDeclarations();
95   alignConsecutiveAssignments();
96   alignTrailingComments();
97   alignEscapedNewlines();
98   generateChanges();
99 
100   return Replaces;
101 }
102 
103 void WhitespaceManager::calculateLineBreakInformation() {
104   Changes[0].PreviousEndOfTokenColumn = 0;
105   Change *LastOutsideTokenChange = &Changes[0];
106   for (unsigned i = 1, e = Changes.size(); i != e; ++i) {
107     SourceLocation OriginalWhitespaceStart =
108         Changes[i].OriginalWhitespaceRange.getBegin();
109     SourceLocation PreviousOriginalWhitespaceEnd =
110         Changes[i - 1].OriginalWhitespaceRange.getEnd();
111     unsigned OriginalWhitespaceStartOffset =
112         SourceMgr.getFileOffset(OriginalWhitespaceStart);
113     unsigned PreviousOriginalWhitespaceEndOffset =
114         SourceMgr.getFileOffset(PreviousOriginalWhitespaceEnd);
115     assert(PreviousOriginalWhitespaceEndOffset <=
116            OriginalWhitespaceStartOffset);
117     const char *const PreviousOriginalWhitespaceEndData =
118         SourceMgr.getCharacterData(PreviousOriginalWhitespaceEnd);
119     StringRef Text(PreviousOriginalWhitespaceEndData,
120                    SourceMgr.getCharacterData(OriginalWhitespaceStart) -
121                        PreviousOriginalWhitespaceEndData);
122     // Usually consecutive changes would occur in consecutive tokens. This is
123     // not the case however when analyzing some preprocessor runs of the
124     // annotated lines. For example, in this code:
125     //
126     // #if A // line 1
127     // int i = 1;
128     // #else B // line 2
129     // int i = 2;
130     // #endif // line 3
131     //
132     // one of the runs will produce the sequence of lines marked with line 1, 2
133     // and 3. So the two consecutive whitespace changes just before '// line 2'
134     // and before '#endif // line 3' span multiple lines and tokens:
135     //
136     // #else B{change X}[// line 2
137     // int i = 2;
138     // ]{change Y}#endif // line 3
139     //
140     // For this reason, if the text between consecutive changes spans multiple
141     // newlines, the token length must be adjusted to the end of the original
142     // line of the token.
143     auto NewlinePos = Text.find_first_of('\n');
144     if (NewlinePos == StringRef::npos) {
145       Changes[i - 1].TokenLength = OriginalWhitespaceStartOffset -
146                                    PreviousOriginalWhitespaceEndOffset +
147                                    Changes[i].PreviousLinePostfix.size() +
148                                    Changes[i - 1].CurrentLinePrefix.size();
149     } else {
150       Changes[i - 1].TokenLength =
151           NewlinePos + Changes[i - 1].CurrentLinePrefix.size();
152     }
153 
154     // If there are multiple changes in this token, sum up all the changes until
155     // the end of the line.
156     if (Changes[i - 1].IsInsideToken && Changes[i - 1].NewlinesBefore == 0)
157       LastOutsideTokenChange->TokenLength +=
158           Changes[i - 1].TokenLength + Changes[i - 1].Spaces;
159     else
160       LastOutsideTokenChange = &Changes[i - 1];
161 
162     Changes[i].PreviousEndOfTokenColumn =
163         Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength;
164 
165     Changes[i - 1].IsTrailingComment =
166         (Changes[i].NewlinesBefore > 0 || Changes[i].Tok->is(tok::eof) ||
167          (Changes[i].IsInsideToken && Changes[i].Tok->is(tok::comment))) &&
168         Changes[i - 1].Tok->is(tok::comment) &&
169         // FIXME: This is a dirty hack. The problem is that
170         // BreakableLineCommentSection does comment reflow changes and here is
171         // the aligning of trailing comments. Consider the case where we reflow
172         // the second line up in this example:
173         //
174         // // line 1
175         // // line 2
176         //
177         // That amounts to 2 changes by BreakableLineCommentSection:
178         //  - the first, delimited by (), for the whitespace between the tokens,
179         //  - and second, delimited by [], for the whitespace at the beginning
180         //  of the second token:
181         //
182         // // line 1(
183         // )[// ]line 2
184         //
185         // So in the end we have two changes like this:
186         //
187         // // line1()[ ]line 2
188         //
189         // Note that the OriginalWhitespaceStart of the second change is the
190         // same as the PreviousOriginalWhitespaceEnd of the first change.
191         // In this case, the below check ensures that the second change doesn't
192         // get treated as a trailing comment change here, since this might
193         // trigger additional whitespace to be wrongly inserted before "line 2"
194         // by the comment aligner here.
195         //
196         // For a proper solution we need a mechanism to say to WhitespaceManager
197         // that a particular change breaks the current sequence of trailing
198         // comments.
199         OriginalWhitespaceStart != PreviousOriginalWhitespaceEnd;
200   }
201   // FIXME: The last token is currently not always an eof token; in those
202   // cases, setting TokenLength of the last token to 0 is wrong.
203   Changes.back().TokenLength = 0;
204   Changes.back().IsTrailingComment = Changes.back().Tok->is(tok::comment);
205 
206   const WhitespaceManager::Change *LastBlockComment = nullptr;
207   for (auto &Change : Changes) {
208     // Reset the IsTrailingComment flag for changes inside of trailing comments
209     // so they don't get realigned later. Comment line breaks however still need
210     // to be aligned.
211     if (Change.IsInsideToken && Change.NewlinesBefore == 0)
212       Change.IsTrailingComment = false;
213     Change.StartOfBlockComment = nullptr;
214     Change.IndentationOffset = 0;
215     if (Change.Tok->is(tok::comment)) {
216       if (Change.Tok->is(TT_LineComment) || !Change.IsInsideToken)
217         LastBlockComment = &Change;
218       else {
219         if ((Change.StartOfBlockComment = LastBlockComment))
220           Change.IndentationOffset =
221               Change.StartOfTokenColumn -
222               Change.StartOfBlockComment->StartOfTokenColumn;
223       }
224     } else {
225       LastBlockComment = nullptr;
226     }
227   }
228 }
229 
230 // Align a single sequence of tokens, see AlignTokens below.
231 template <typename F>
232 static void
233 AlignTokenSequence(unsigned Start, unsigned End, unsigned Column, F &&Matches,
234                    SmallVector<WhitespaceManager::Change, 16> &Changes) {
235   bool FoundMatchOnLine = false;
236   int Shift = 0;
237 
238   // ScopeStack keeps track of the current scope depth. It contains indices of
239   // the first token on each scope.
240   // We only run the "Matches" function on tokens from the outer-most scope.
241   // However, we do need to pay special attention to one class of tokens
242   // that are not in the outer-most scope, and that is function parameters
243   // which are split across multiple lines, as illustrated by this example:
244   //   double a(int x);
245   //   int    b(int  y,
246   //          double z);
247   // In the above example, we need to take special care to ensure that
248   // 'double z' is indented along with it's owning function 'b'.
249   SmallVector<unsigned, 16> ScopeStack;
250 
251   for (unsigned i = Start; i != End; ++i) {
252     if (ScopeStack.size() != 0 &&
253         Changes[i].indentAndNestingLevel() <
254             Changes[ScopeStack.back()].indentAndNestingLevel())
255       ScopeStack.pop_back();
256 
257     // Compare current token to previous non-comment token to ensure whether
258     // it is in a deeper scope or not.
259     unsigned PreviousNonComment = i - 1;
260     while (PreviousNonComment > Start &&
261            Changes[PreviousNonComment].Tok->is(tok::comment))
262       PreviousNonComment--;
263     if (i != Start && Changes[i].indentAndNestingLevel() >
264                           Changes[PreviousNonComment].indentAndNestingLevel())
265       ScopeStack.push_back(i);
266 
267     bool InsideNestedScope = ScopeStack.size() != 0;
268 
269     if (Changes[i].NewlinesBefore > 0 && !InsideNestedScope) {
270       Shift = 0;
271       FoundMatchOnLine = false;
272     }
273 
274     // If this is the first matching token to be aligned, remember by how many
275     // spaces it has to be shifted, so the rest of the changes on the line are
276     // shifted by the same amount
277     if (!FoundMatchOnLine && !InsideNestedScope && Matches(Changes[i])) {
278       FoundMatchOnLine = true;
279       Shift = Column - Changes[i].StartOfTokenColumn;
280       Changes[i].Spaces += Shift;
281     }
282 
283     // This is for function parameters that are split across multiple lines,
284     // as mentioned in the ScopeStack comment.
285     if (InsideNestedScope && Changes[i].NewlinesBefore > 0) {
286       unsigned ScopeStart = ScopeStack.back();
287       if (Changes[ScopeStart - 1].Tok->is(TT_FunctionDeclarationName) ||
288           (ScopeStart > Start + 1 &&
289            Changes[ScopeStart - 2].Tok->is(TT_FunctionDeclarationName)))
290         Changes[i].Spaces += Shift;
291     }
292 
293     assert(Shift >= 0);
294     Changes[i].StartOfTokenColumn += Shift;
295     if (i + 1 != Changes.size())
296       Changes[i + 1].PreviousEndOfTokenColumn += Shift;
297   }
298 }
299 
300 // Walk through a subset of the changes, starting at StartAt, and find
301 // sequences of matching tokens to align. To do so, keep track of the lines and
302 // whether or not a matching token was found on a line. If a matching token is
303 // found, extend the current sequence. If the current line cannot be part of a
304 // sequence, e.g. because there is an empty line before it or it contains only
305 // non-matching tokens, finalize the previous sequence.
306 // The value returned is the token on which we stopped, either because we
307 // exhausted all items inside Changes, or because we hit a scope level higher
308 // than our initial scope.
309 // This function is recursive. Each invocation processes only the scope level
310 // equal to the initial level, which is the level of Changes[StartAt].
311 // If we encounter a scope level greater than the initial level, then we call
312 // ourselves recursively, thereby avoiding the pollution of the current state
313 // with the alignment requirements of the nested sub-level. This recursive
314 // behavior is necessary for aligning function prototypes that have one or more
315 // arguments.
316 // If this function encounters a scope level less than the initial level,
317 // it returns the current position.
318 // There is a non-obvious subtlety in the recursive behavior: Even though we
319 // defer processing of nested levels to recursive invocations of this
320 // function, when it comes time to align a sequence of tokens, we run the
321 // alignment on the entire sequence, including the nested levels.
322 // When doing so, most of the nested tokens are skipped, because their
323 // alignment was already handled by the recursive invocations of this function.
324 // However, the special exception is that we do NOT skip function parameters
325 // that are split across multiple lines. See the test case in FormatTest.cpp
326 // that mentions "split function parameter alignment" for an example of this.
327 template <typename F>
328 static unsigned AlignTokens(const FormatStyle &Style, F &&Matches,
329                             SmallVector<WhitespaceManager::Change, 16> &Changes,
330                             unsigned StartAt) {
331   unsigned MinColumn = 0;
332   unsigned MaxColumn = UINT_MAX;
333 
334   // Line number of the start and the end of the current token sequence.
335   unsigned StartOfSequence = 0;
336   unsigned EndOfSequence = 0;
337 
338   // Measure the scope level (i.e. depth of (), [], {}) of the first token, and
339   // abort when we hit any token in a higher scope than the starting one.
340   auto IndentAndNestingLevel = StartAt < Changes.size()
341                                    ? Changes[StartAt].indentAndNestingLevel()
342                                    : std::pair<unsigned, unsigned>(0, 0);
343 
344   // Keep track of the number of commas before the matching tokens, we will only
345   // align a sequence of matching tokens if they are preceded by the same number
346   // of commas.
347   unsigned CommasBeforeLastMatch = 0;
348   unsigned CommasBeforeMatch = 0;
349 
350   // Whether a matching token has been found on the current line.
351   bool FoundMatchOnLine = false;
352 
353   // Aligns a sequence of matching tokens, on the MinColumn column.
354   //
355   // Sequences start from the first matching token to align, and end at the
356   // first token of the first line that doesn't need to be aligned.
357   //
358   // We need to adjust the StartOfTokenColumn of each Change that is on a line
359   // containing any matching token to be aligned and located after such token.
360   auto AlignCurrentSequence = [&] {
361     if (StartOfSequence > 0 && StartOfSequence < EndOfSequence)
362       AlignTokenSequence(StartOfSequence, EndOfSequence, MinColumn, Matches,
363                          Changes);
364     MinColumn = 0;
365     MaxColumn = UINT_MAX;
366     StartOfSequence = 0;
367     EndOfSequence = 0;
368   };
369 
370   unsigned i = StartAt;
371   for (unsigned e = Changes.size(); i != e; ++i) {
372     if (Changes[i].indentAndNestingLevel() < IndentAndNestingLevel)
373       break;
374 
375     if (Changes[i].NewlinesBefore != 0) {
376       CommasBeforeMatch = 0;
377       EndOfSequence = i;
378       // If there is a blank line, or if the last line didn't contain any
379       // matching token, the sequence ends here.
380       if (Changes[i].NewlinesBefore > 1 || !FoundMatchOnLine)
381         AlignCurrentSequence();
382 
383       FoundMatchOnLine = false;
384     }
385 
386     if (Changes[i].Tok->is(tok::comma)) {
387       ++CommasBeforeMatch;
388     } else if (Changes[i].indentAndNestingLevel() > IndentAndNestingLevel) {
389       // Call AlignTokens recursively, skipping over this scope block.
390       unsigned StoppedAt = AlignTokens(Style, Matches, Changes, i);
391       i = StoppedAt - 1;
392       continue;
393     }
394 
395     if (!Matches(Changes[i]))
396       continue;
397 
398     // If there is more than one matching token per line, or if the number of
399     // preceding commas, do not match anymore, end the sequence.
400     if (FoundMatchOnLine || CommasBeforeMatch != CommasBeforeLastMatch)
401       AlignCurrentSequence();
402 
403     CommasBeforeLastMatch = CommasBeforeMatch;
404     FoundMatchOnLine = true;
405 
406     if (StartOfSequence == 0)
407       StartOfSequence = i;
408 
409     unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
410     int LineLengthAfter = -Changes[i].Spaces;
411     for (unsigned j = i; j != e && Changes[j].NewlinesBefore == 0; ++j)
412       LineLengthAfter += Changes[j].Spaces + Changes[j].TokenLength;
413     unsigned ChangeMaxColumn = Style.ColumnLimit - LineLengthAfter;
414 
415     // If we are restricted by the maximum column width, end the sequence.
416     if (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn ||
417         CommasBeforeLastMatch != CommasBeforeMatch) {
418       AlignCurrentSequence();
419       StartOfSequence = i;
420     }
421 
422     MinColumn = std::max(MinColumn, ChangeMinColumn);
423     MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
424   }
425 
426   EndOfSequence = i;
427   AlignCurrentSequence();
428   return i;
429 }
430 
431 void WhitespaceManager::alignConsecutiveAssignments() {
432   if (!Style.AlignConsecutiveAssignments)
433     return;
434 
435   AlignTokens(
436       Style,
437       [&](const Change &C) {
438         // Do not align on equal signs that are first on a line.
439         if (C.NewlinesBefore > 0)
440           return false;
441 
442         // Do not align on equal signs that are last on a line.
443         if (&C != &Changes.back() && (&C + 1)->NewlinesBefore > 0)
444           return false;
445 
446         return C.Tok->is(tok::equal);
447       },
448       Changes, /*StartAt=*/0);
449 }
450 
451 void WhitespaceManager::alignConsecutiveDeclarations() {
452   if (!Style.AlignConsecutiveDeclarations)
453     return;
454 
455   // FIXME: Currently we don't handle properly the PointerAlignment: Right
456   // The * and & are not aligned and are left dangling. Something has to be done
457   // about it, but it raises the question of alignment of code like:
458   //   const char* const* v1;
459   //   float const* v2;
460   //   SomeVeryLongType const& v3;
461   AlignTokens(
462       Style,
463       [](Change const &C) {
464         // tok::kw_operator is necessary for aligning operator overload
465         // definitions.
466         if (C.Tok->isOneOf(TT_FunctionDeclarationName, tok::kw_operator))
467           return true;
468         if (C.Tok->isNot(TT_StartOfName))
469           return false;
470         // Check if there is a subsequent name that starts the same declaration.
471         for (FormatToken *Next = C.Tok->Next; Next; Next = Next->Next) {
472           if (Next->is(tok::comment))
473             continue;
474           if (!Next->Tok.getIdentifierInfo())
475             break;
476           if (Next->isOneOf(TT_StartOfName, TT_FunctionDeclarationName,
477                             tok::kw_operator))
478             return false;
479         }
480         return true;
481       },
482       Changes, /*StartAt=*/0);
483 }
484 
485 void WhitespaceManager::alignTrailingComments() {
486   unsigned MinColumn = 0;
487   unsigned MaxColumn = UINT_MAX;
488   unsigned StartOfSequence = 0;
489   bool BreakBeforeNext = false;
490   unsigned Newlines = 0;
491   for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
492     if (Changes[i].StartOfBlockComment)
493       continue;
494     Newlines += Changes[i].NewlinesBefore;
495     if (!Changes[i].IsTrailingComment)
496       continue;
497 
498     unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
499     unsigned ChangeMaxColumn;
500 
501     if (Style.ColumnLimit == 0)
502       ChangeMaxColumn = UINT_MAX;
503     else if (Style.ColumnLimit >= Changes[i].TokenLength)
504       ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength;
505     else
506       ChangeMaxColumn = ChangeMinColumn;
507 
508     // If we don't create a replacement for this change, we have to consider
509     // it to be immovable.
510     if (!Changes[i].CreateReplacement)
511       ChangeMaxColumn = ChangeMinColumn;
512 
513     if (i + 1 != e && Changes[i + 1].ContinuesPPDirective)
514       ChangeMaxColumn -= 2;
515     // If this comment follows an } in column 0, it probably documents the
516     // closing of a namespace and we don't want to align it.
517     bool FollowsRBraceInColumn0 = i > 0 && Changes[i].NewlinesBefore == 0 &&
518                                   Changes[i - 1].Tok->is(tok::r_brace) &&
519                                   Changes[i - 1].StartOfTokenColumn == 0;
520     bool WasAlignedWithStartOfNextLine = false;
521     if (Changes[i].NewlinesBefore == 1) { // A comment on its own line.
522       unsigned CommentColumn = SourceMgr.getSpellingColumnNumber(
523           Changes[i].OriginalWhitespaceRange.getEnd());
524       for (unsigned j = i + 1; j != e; ++j) {
525         if (Changes[j].Tok->is(tok::comment))
526           continue;
527 
528         unsigned NextColumn = SourceMgr.getSpellingColumnNumber(
529             Changes[j].OriginalWhitespaceRange.getEnd());
530         // The start of the next token was previously aligned with the
531         // start of this comment.
532         WasAlignedWithStartOfNextLine =
533             CommentColumn == NextColumn ||
534             CommentColumn == NextColumn + Style.IndentWidth;
535         break;
536       }
537     }
538     if (!Style.AlignTrailingComments || FollowsRBraceInColumn0) {
539       alignTrailingComments(StartOfSequence, i, MinColumn);
540       MinColumn = ChangeMinColumn;
541       MaxColumn = ChangeMinColumn;
542       StartOfSequence = i;
543     } else if (BreakBeforeNext || Newlines > 1 ||
544                (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn) ||
545                // Break the comment sequence if the previous line did not end
546                // in a trailing comment.
547                (Changes[i].NewlinesBefore == 1 && i > 0 &&
548                 !Changes[i - 1].IsTrailingComment) ||
549                WasAlignedWithStartOfNextLine) {
550       alignTrailingComments(StartOfSequence, i, MinColumn);
551       MinColumn = ChangeMinColumn;
552       MaxColumn = ChangeMaxColumn;
553       StartOfSequence = i;
554     } else {
555       MinColumn = std::max(MinColumn, ChangeMinColumn);
556       MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
557     }
558     BreakBeforeNext = (i == 0) || (Changes[i].NewlinesBefore > 1) ||
559                       // Never start a sequence with a comment at the beginning
560                       // of the line.
561                       (Changes[i].NewlinesBefore == 1 && StartOfSequence == i);
562     Newlines = 0;
563   }
564   alignTrailingComments(StartOfSequence, Changes.size(), MinColumn);
565 }
566 
567 void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End,
568                                               unsigned Column) {
569   for (unsigned i = Start; i != End; ++i) {
570     int Shift = 0;
571     if (Changes[i].IsTrailingComment) {
572       Shift = Column - Changes[i].StartOfTokenColumn;
573     }
574     if (Changes[i].StartOfBlockComment) {
575       Shift = Changes[i].IndentationOffset +
576               Changes[i].StartOfBlockComment->StartOfTokenColumn -
577               Changes[i].StartOfTokenColumn;
578     }
579     assert(Shift >= 0);
580     Changes[i].Spaces += Shift;
581     if (i + 1 != Changes.size())
582       Changes[i + 1].PreviousEndOfTokenColumn += Shift;
583     Changes[i].StartOfTokenColumn += Shift;
584   }
585 }
586 
587 void WhitespaceManager::alignEscapedNewlines() {
588   if (Style.AlignEscapedNewlines == FormatStyle::ENAS_DontAlign)
589     return;
590 
591   bool AlignLeft = Style.AlignEscapedNewlines == FormatStyle::ENAS_Left;
592   unsigned MaxEndOfLine = AlignLeft ? 0 : Style.ColumnLimit;
593   unsigned StartOfMacro = 0;
594   for (unsigned i = 1, e = Changes.size(); i < e; ++i) {
595     Change &C = Changes[i];
596     if (C.NewlinesBefore > 0) {
597       if (C.ContinuesPPDirective) {
598         MaxEndOfLine = std::max(C.PreviousEndOfTokenColumn + 2, MaxEndOfLine);
599       } else {
600         alignEscapedNewlines(StartOfMacro + 1, i, MaxEndOfLine);
601         MaxEndOfLine = AlignLeft ? 0 : Style.ColumnLimit;
602         StartOfMacro = i;
603       }
604     }
605   }
606   alignEscapedNewlines(StartOfMacro + 1, Changes.size(), MaxEndOfLine);
607 }
608 
609 void WhitespaceManager::alignEscapedNewlines(unsigned Start, unsigned End,
610                                              unsigned Column) {
611   for (unsigned i = Start; i < End; ++i) {
612     Change &C = Changes[i];
613     if (C.NewlinesBefore > 0) {
614       assert(C.ContinuesPPDirective);
615       if (C.PreviousEndOfTokenColumn + 1 > Column)
616         C.EscapedNewlineColumn = 0;
617       else
618         C.EscapedNewlineColumn = Column;
619     }
620   }
621 }
622 
623 void WhitespaceManager::generateChanges() {
624   for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
625     const Change &C = Changes[i];
626     if (i > 0) {
627       assert(Changes[i - 1].OriginalWhitespaceRange.getBegin() !=
628                  C.OriginalWhitespaceRange.getBegin() &&
629              "Generating two replacements for the same location");
630     }
631     if (C.CreateReplacement) {
632       std::string ReplacementText = C.PreviousLinePostfix;
633       if (C.ContinuesPPDirective)
634         appendEscapedNewlineText(ReplacementText, C.NewlinesBefore,
635                                  C.PreviousEndOfTokenColumn,
636                                  C.EscapedNewlineColumn);
637       else
638         appendNewlineText(ReplacementText, C.NewlinesBefore);
639       appendIndentText(ReplacementText, C.Tok->IndentLevel,
640                        std::max(0, C.Spaces),
641                        C.StartOfTokenColumn - std::max(0, C.Spaces));
642       ReplacementText.append(C.CurrentLinePrefix);
643       storeReplacement(C.OriginalWhitespaceRange, ReplacementText);
644     }
645   }
646 }
647 
648 void WhitespaceManager::storeReplacement(SourceRange Range, StringRef Text) {
649   unsigned WhitespaceLength = SourceMgr.getFileOffset(Range.getEnd()) -
650                               SourceMgr.getFileOffset(Range.getBegin());
651   // Don't create a replacement, if it does not change anything.
652   if (StringRef(SourceMgr.getCharacterData(Range.getBegin()),
653                 WhitespaceLength) == Text)
654     return;
655   auto Err = Replaces.add(tooling::Replacement(
656       SourceMgr, CharSourceRange::getCharRange(Range), Text));
657   // FIXME: better error handling. For now, just print an error message in the
658   // release version.
659   if (Err) {
660     llvm::errs() << llvm::toString(std::move(Err)) << "\n";
661     assert(false);
662   }
663 }
664 
665 void WhitespaceManager::appendNewlineText(std::string &Text,
666                                           unsigned Newlines) {
667   for (unsigned i = 0; i < Newlines; ++i)
668     Text.append(UseCRLF ? "\r\n" : "\n");
669 }
670 
671 void WhitespaceManager::appendEscapedNewlineText(
672     std::string &Text, unsigned Newlines, unsigned PreviousEndOfTokenColumn,
673     unsigned EscapedNewlineColumn) {
674   if (Newlines > 0) {
675     unsigned Spaces =
676         std::max<int>(1, EscapedNewlineColumn - PreviousEndOfTokenColumn - 1);
677     for (unsigned i = 0; i < Newlines; ++i) {
678       Text.append(Spaces, ' ');
679       Text.append(UseCRLF ? "\\\r\n" : "\\\n");
680       Spaces = std::max<int>(0, EscapedNewlineColumn - 1);
681     }
682   }
683 }
684 
685 void WhitespaceManager::appendIndentText(std::string &Text,
686                                          unsigned IndentLevel, unsigned Spaces,
687                                          unsigned WhitespaceStartColumn) {
688   switch (Style.UseTab) {
689   case FormatStyle::UT_Never:
690     Text.append(Spaces, ' ');
691     break;
692   case FormatStyle::UT_Always: {
693     unsigned FirstTabWidth =
694         Style.TabWidth - WhitespaceStartColumn % Style.TabWidth;
695     // Insert only spaces when we want to end up before the next tab.
696     if (Spaces < FirstTabWidth || Spaces == 1) {
697       Text.append(Spaces, ' ');
698       break;
699     }
700     // Align to the next tab.
701     Spaces -= FirstTabWidth;
702     Text.append("\t");
703 
704     Text.append(Spaces / Style.TabWidth, '\t');
705     Text.append(Spaces % Style.TabWidth, ' ');
706     break;
707   }
708   case FormatStyle::UT_ForIndentation:
709     if (WhitespaceStartColumn == 0) {
710       unsigned Indentation = IndentLevel * Style.IndentWidth;
711       // This happens, e.g. when a line in a block comment is indented less than
712       // the first one.
713       if (Indentation > Spaces)
714         Indentation = Spaces;
715       unsigned Tabs = Indentation / Style.TabWidth;
716       Text.append(Tabs, '\t');
717       Spaces -= Tabs * Style.TabWidth;
718     }
719     Text.append(Spaces, ' ');
720     break;
721   case FormatStyle::UT_ForContinuationAndIndentation:
722     if (WhitespaceStartColumn == 0) {
723       unsigned Tabs = Spaces / Style.TabWidth;
724       Text.append(Tabs, '\t');
725       Spaces -= Tabs * Style.TabWidth;
726     }
727     Text.append(Spaces, ' ');
728     break;
729   }
730 }
731 
732 } // namespace format
733 } // namespace clang
734