1 //===--- WhitespaceManager.cpp - Format C++ code --------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements WhitespaceManager class.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "WhitespaceManager.h"
16 #include "llvm/ADT/STLExtras.h"
17 
18 namespace clang {
19 namespace format {
20 
21 bool WhitespaceManager::Change::IsBeforeInFile::
22 operator()(const Change &C1, const Change &C2) const {
23   return SourceMgr.isBeforeInTranslationUnit(
24       C1.OriginalWhitespaceRange.getBegin(),
25       C2.OriginalWhitespaceRange.getBegin());
26 }
27 
28 WhitespaceManager::Change::Change(
29     bool CreateReplacement, const SourceRange &OriginalWhitespaceRange,
30     unsigned IndentLevel, int Spaces, unsigned StartOfTokenColumn,
31     unsigned NewlinesBefore, StringRef PreviousLinePostfix,
32     StringRef CurrentLinePrefix, tok::TokenKind Kind, bool ContinuesPPDirective)
33     : CreateReplacement(CreateReplacement),
34       OriginalWhitespaceRange(OriginalWhitespaceRange),
35       StartOfTokenColumn(StartOfTokenColumn), NewlinesBefore(NewlinesBefore),
36       PreviousLinePostfix(PreviousLinePostfix),
37       CurrentLinePrefix(CurrentLinePrefix), Kind(Kind),
38       ContinuesPPDirective(ContinuesPPDirective), IndentLevel(IndentLevel),
39       Spaces(Spaces), IsTrailingComment(false), TokenLength(0),
40       PreviousEndOfTokenColumn(0), EscapedNewlineColumn(0),
41       StartOfBlockComment(nullptr), IndentationOffset(0) {}
42 
43 void WhitespaceManager::reset() {
44   Changes.clear();
45   Replaces.clear();
46 }
47 
48 void WhitespaceManager::replaceWhitespace(FormatToken &Tok, unsigned Newlines,
49                                           unsigned IndentLevel, unsigned Spaces,
50                                           unsigned StartOfTokenColumn,
51                                           bool InPPDirective) {
52   if (Tok.Finalized)
53     return;
54   Tok.Decision = (Newlines > 0) ? FD_Break : FD_Continue;
55   Changes.push_back(Change(true, Tok.WhitespaceRange, IndentLevel, Spaces,
56                            StartOfTokenColumn, Newlines, "", "",
57                            Tok.Tok.getKind(), InPPDirective && !Tok.IsFirst));
58 }
59 
60 void WhitespaceManager::addUntouchableToken(const FormatToken &Tok,
61                                             bool InPPDirective) {
62   if (Tok.Finalized)
63     return;
64   Changes.push_back(Change(false, Tok.WhitespaceRange, /*IndentLevel=*/0,
65                            /*Spaces=*/0, Tok.OriginalColumn, Tok.NewlinesBefore,
66                            "", "", Tok.Tok.getKind(),
67                            InPPDirective && !Tok.IsFirst));
68 }
69 
70 void WhitespaceManager::replaceWhitespaceInToken(
71     const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars,
72     StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective,
73     unsigned Newlines, unsigned IndentLevel, int Spaces) {
74   if (Tok.Finalized)
75     return;
76   SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset);
77   Changes.push_back(Change(
78       true, SourceRange(Start, Start.getLocWithOffset(ReplaceChars)),
79       IndentLevel, Spaces, std::max(0, Spaces), Newlines, PreviousPostfix,
80       CurrentPrefix,
81       // If we don't add a newline this change doesn't start a comment. Thus,
82       // when we align line comments, we don't need to treat this change as one.
83       // FIXME: We still need to take this change in account to properly
84       // calculate the new length of the comment and to calculate the changes
85       // for which to do the alignment when aligning comments.
86       Tok.is(TT_LineComment) && Newlines > 0 ? tok::comment : tok::unknown,
87       InPPDirective && !Tok.IsFirst));
88 }
89 
90 const tooling::Replacements &WhitespaceManager::generateReplacements() {
91   if (Changes.empty())
92     return Replaces;
93 
94   std::sort(Changes.begin(), Changes.end(), Change::IsBeforeInFile(SourceMgr));
95   calculateLineBreakInformation();
96   alignConsecutiveAssignments();
97   alignTrailingComments();
98   alignEscapedNewlines();
99   generateChanges();
100 
101   return Replaces;
102 }
103 
104 void WhitespaceManager::calculateLineBreakInformation() {
105   Changes[0].PreviousEndOfTokenColumn = 0;
106   for (unsigned i = 1, e = Changes.size(); i != e; ++i) {
107     unsigned OriginalWhitespaceStart =
108         SourceMgr.getFileOffset(Changes[i].OriginalWhitespaceRange.getBegin());
109     unsigned PreviousOriginalWhitespaceEnd = SourceMgr.getFileOffset(
110         Changes[i - 1].OriginalWhitespaceRange.getEnd());
111     Changes[i - 1].TokenLength = OriginalWhitespaceStart -
112                                  PreviousOriginalWhitespaceEnd +
113                                  Changes[i].PreviousLinePostfix.size() +
114                                  Changes[i - 1].CurrentLinePrefix.size();
115 
116     Changes[i].PreviousEndOfTokenColumn =
117         Changes[i - 1].StartOfTokenColumn + Changes[i - 1].TokenLength;
118 
119     Changes[i - 1].IsTrailingComment =
120         (Changes[i].NewlinesBefore > 0 || Changes[i].Kind == tok::eof) &&
121         Changes[i - 1].Kind == tok::comment;
122   }
123   // FIXME: The last token is currently not always an eof token; in those
124   // cases, setting TokenLength of the last token to 0 is wrong.
125   Changes.back().TokenLength = 0;
126   Changes.back().IsTrailingComment = Changes.back().Kind == tok::comment;
127 
128   const WhitespaceManager::Change *LastBlockComment = nullptr;
129   for (auto &Change : Changes) {
130     Change.StartOfBlockComment = nullptr;
131     Change.IndentationOffset = 0;
132     if (Change.Kind == tok::comment) {
133       LastBlockComment = &Change;
134     } else if (Change.Kind == tok::unknown) {
135       if ((Change.StartOfBlockComment = LastBlockComment))
136         Change.IndentationOffset =
137             Change.StartOfTokenColumn -
138             Change.StartOfBlockComment->StartOfTokenColumn;
139     } else {
140       LastBlockComment = nullptr;
141     }
142   }
143 }
144 
145 // Walk through all of the changes and find sequences of "=" to align.  To do
146 // so, keep track of the lines and whether or not an "=" was found on align. If
147 // a "=" is found on a line, extend the current sequence. If the current line
148 // cannot be part of a sequence, e.g. because there is an empty line before it
149 // or it contains non-assignments, finalize the previous sequence.
150 void WhitespaceManager::alignConsecutiveAssignments() {
151   if (!Style.AlignConsecutiveAssignments)
152     return;
153 
154   unsigned MinColumn = 0;
155   unsigned StartOfSequence = 0;
156   unsigned EndOfSequence = 0;
157   bool FoundAssignmentOnLine = false;
158   bool FoundLeftParenOnLine = false;
159 
160   // Aligns a sequence of assignment tokens, on the MinColumn column.
161   //
162   // Sequences start from the first assignment token to align, and end at the
163   // first token of the first line that doesn't need to be aligned.
164   //
165   // We need to adjust the StartOfTokenColumn of each Change that is on a line
166   // containing any assignment to be aligned and located after such assignment
167   auto AlignSequence = [&] {
168     if (StartOfSequence > 0 && StartOfSequence < EndOfSequence)
169       alignConsecutiveAssignments(StartOfSequence, EndOfSequence, MinColumn);
170     MinColumn = 0;
171     StartOfSequence = 0;
172     EndOfSequence = 0;
173   };
174 
175   for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
176     if (Changes[i].NewlinesBefore > 0) {
177       EndOfSequence = i;
178       // If there is a blank line or if the last line didn't contain any
179       // assignment, the sequence ends here.
180       if (Changes[i].NewlinesBefore > 1 || !FoundAssignmentOnLine) {
181         // NB: In the latter case, the sequence should end at the beggining of
182         // the previous line, but it doesn't really matter as there is no
183         // assignment on it
184         AlignSequence();
185       }
186 
187       FoundAssignmentOnLine = false;
188       FoundLeftParenOnLine = false;
189     }
190 
191     // If there is more than one "=" per line, or if the "=" appears first on
192     // the line of if it appears last, end the sequence
193     if (Changes[i].Kind == tok::equal &&
194         (FoundAssignmentOnLine || Changes[i].NewlinesBefore > 0 ||
195          Changes[i + 1].NewlinesBefore > 0)) {
196       AlignSequence();
197     } else if (!FoundLeftParenOnLine && Changes[i].Kind == tok::r_paren) {
198       AlignSequence();
199     } else if (Changes[i].Kind == tok::l_paren) {
200       FoundLeftParenOnLine = true;
201       if (!FoundAssignmentOnLine)
202         AlignSequence();
203     } else if (!FoundAssignmentOnLine && !FoundLeftParenOnLine &&
204                Changes[i].Kind == tok::equal) {
205       FoundAssignmentOnLine = true;
206       if (StartOfSequence == 0)
207         StartOfSequence = i;
208 
209       unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
210       MinColumn = std::max(MinColumn, ChangeMinColumn);
211     }
212   }
213 
214   EndOfSequence = Changes.size();
215   AlignSequence();
216 }
217 
218 void WhitespaceManager::alignConsecutiveAssignments(unsigned Start,
219                                                     unsigned End,
220                                                     unsigned Column) {
221   bool FoundAssignmentOnLine = false;
222   int Shift = 0;
223   for (unsigned i = Start; i != End; ++i) {
224     if (Changes[i].NewlinesBefore > 0) {
225       FoundAssignmentOnLine = false;
226       Shift = 0;
227     }
228 
229     // If this is the first assignment to be aligned, remember by how many
230     // spaces it has to be shifted, so the rest of the changes on the line are
231     // shifted by the same amount
232     if (!FoundAssignmentOnLine && Changes[i].Kind == tok::equal) {
233       FoundAssignmentOnLine = true;
234       Shift = Column - Changes[i].StartOfTokenColumn;
235       Changes[i].Spaces += Shift;
236     }
237 
238     assert(Shift >= 0);
239     Changes[i].StartOfTokenColumn += Shift;
240     if (i + 1 != Changes.size())
241       Changes[i + 1].PreviousEndOfTokenColumn += Shift;
242   }
243 }
244 
245 void WhitespaceManager::alignTrailingComments() {
246   unsigned MinColumn = 0;
247   unsigned MaxColumn = UINT_MAX;
248   unsigned StartOfSequence = 0;
249   bool BreakBeforeNext = false;
250   unsigned Newlines = 0;
251   for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
252     if (Changes[i].StartOfBlockComment)
253       continue;
254     Newlines += Changes[i].NewlinesBefore;
255     if (!Changes[i].IsTrailingComment)
256       continue;
257 
258     unsigned ChangeMinColumn = Changes[i].StartOfTokenColumn;
259     unsigned ChangeMaxColumn = Style.ColumnLimit - Changes[i].TokenLength;
260     if (i + 1 != e && Changes[i + 1].ContinuesPPDirective)
261       ChangeMaxColumn -= 2;
262     // If this comment follows an } in column 0, it probably documents the
263     // closing of a namespace and we don't want to align it.
264     bool FollowsRBraceInColumn0 = i > 0 && Changes[i].NewlinesBefore == 0 &&
265                                   Changes[i - 1].Kind == tok::r_brace &&
266                                   Changes[i - 1].StartOfTokenColumn == 0;
267     bool WasAlignedWithStartOfNextLine = false;
268     if (Changes[i].NewlinesBefore == 1) { // A comment on its own line.
269       unsigned CommentColumn = SourceMgr.getSpellingColumnNumber(
270           Changes[i].OriginalWhitespaceRange.getEnd());
271       for (unsigned j = i + 1; j != e; ++j) {
272         if (Changes[j].Kind != tok::comment) { // Skip over comments.
273           unsigned NextColumn = SourceMgr.getSpellingColumnNumber(
274               Changes[j].OriginalWhitespaceRange.getEnd());
275           // The start of the next token was previously aligned with the
276           // start of this comment.
277           WasAlignedWithStartOfNextLine =
278               CommentColumn == NextColumn ||
279               CommentColumn == NextColumn + Style.IndentWidth;
280           break;
281         }
282       }
283     }
284     if (!Style.AlignTrailingComments || FollowsRBraceInColumn0) {
285       alignTrailingComments(StartOfSequence, i, MinColumn);
286       MinColumn = ChangeMinColumn;
287       MaxColumn = ChangeMinColumn;
288       StartOfSequence = i;
289     } else if (BreakBeforeNext || Newlines > 1 ||
290                (ChangeMinColumn > MaxColumn || ChangeMaxColumn < MinColumn) ||
291                // Break the comment sequence if the previous line did not end
292                // in a trailing comment.
293                (Changes[i].NewlinesBefore == 1 && i > 0 &&
294                 !Changes[i - 1].IsTrailingComment) ||
295                WasAlignedWithStartOfNextLine) {
296       alignTrailingComments(StartOfSequence, i, MinColumn);
297       MinColumn = ChangeMinColumn;
298       MaxColumn = ChangeMaxColumn;
299       StartOfSequence = i;
300     } else {
301       MinColumn = std::max(MinColumn, ChangeMinColumn);
302       MaxColumn = std::min(MaxColumn, ChangeMaxColumn);
303     }
304     BreakBeforeNext =
305         (i == 0) || (Changes[i].NewlinesBefore > 1) ||
306         // Never start a sequence with a comment at the beginning of
307         // the line.
308         (Changes[i].NewlinesBefore == 1 && StartOfSequence == i);
309     Newlines = 0;
310   }
311   alignTrailingComments(StartOfSequence, Changes.size(), MinColumn);
312 }
313 
314 void WhitespaceManager::alignTrailingComments(unsigned Start, unsigned End,
315                                               unsigned Column) {
316   for (unsigned i = Start; i != End; ++i) {
317     int Shift = 0;
318     if (Changes[i].IsTrailingComment) {
319       Shift = Column - Changes[i].StartOfTokenColumn;
320     }
321     if (Changes[i].StartOfBlockComment) {
322       Shift = Changes[i].IndentationOffset +
323               Changes[i].StartOfBlockComment->StartOfTokenColumn -
324               Changes[i].StartOfTokenColumn;
325     }
326     assert(Shift >= 0);
327     Changes[i].Spaces += Shift;
328     if (i + 1 != End)
329       Changes[i + 1].PreviousEndOfTokenColumn += Shift;
330     Changes[i].StartOfTokenColumn += Shift;
331   }
332 }
333 
334 void WhitespaceManager::alignEscapedNewlines() {
335   unsigned MaxEndOfLine =
336       Style.AlignEscapedNewlinesLeft ? 0 : Style.ColumnLimit;
337   unsigned StartOfMacro = 0;
338   for (unsigned i = 1, e = Changes.size(); i < e; ++i) {
339     Change &C = Changes[i];
340     if (C.NewlinesBefore > 0) {
341       if (C.ContinuesPPDirective) {
342         MaxEndOfLine = std::max(C.PreviousEndOfTokenColumn + 2, MaxEndOfLine);
343       } else {
344         alignEscapedNewlines(StartOfMacro + 1, i, MaxEndOfLine);
345         MaxEndOfLine = Style.AlignEscapedNewlinesLeft ? 0 : Style.ColumnLimit;
346         StartOfMacro = i;
347       }
348     }
349   }
350   alignEscapedNewlines(StartOfMacro + 1, Changes.size(), MaxEndOfLine);
351 }
352 
353 void WhitespaceManager::alignEscapedNewlines(unsigned Start, unsigned End,
354                                              unsigned Column) {
355   for (unsigned i = Start; i < End; ++i) {
356     Change &C = Changes[i];
357     if (C.NewlinesBefore > 0) {
358       assert(C.ContinuesPPDirective);
359       if (C.PreviousEndOfTokenColumn + 1 > Column)
360         C.EscapedNewlineColumn = 0;
361       else
362         C.EscapedNewlineColumn = Column;
363     }
364   }
365 }
366 
367 void WhitespaceManager::generateChanges() {
368   for (unsigned i = 0, e = Changes.size(); i != e; ++i) {
369     const Change &C = Changes[i];
370     if (i > 0) {
371       assert(Changes[i - 1].OriginalWhitespaceRange.getBegin() !=
372                  C.OriginalWhitespaceRange.getBegin() &&
373              "Generating two replacements for the same location");
374     }
375     if (C.CreateReplacement) {
376       std::string ReplacementText = C.PreviousLinePostfix;
377       if (C.ContinuesPPDirective)
378         appendNewlineText(ReplacementText, C.NewlinesBefore,
379                           C.PreviousEndOfTokenColumn, C.EscapedNewlineColumn);
380       else
381         appendNewlineText(ReplacementText, C.NewlinesBefore);
382       appendIndentText(ReplacementText, C.IndentLevel, std::max(0, C.Spaces),
383                        C.StartOfTokenColumn - std::max(0, C.Spaces));
384       ReplacementText.append(C.CurrentLinePrefix);
385       storeReplacement(C.OriginalWhitespaceRange, ReplacementText);
386     }
387   }
388 }
389 
390 void WhitespaceManager::storeReplacement(const SourceRange &Range,
391                                          StringRef Text) {
392   unsigned WhitespaceLength = SourceMgr.getFileOffset(Range.getEnd()) -
393                               SourceMgr.getFileOffset(Range.getBegin());
394   // Don't create a replacement, if it does not change anything.
395   if (StringRef(SourceMgr.getCharacterData(Range.getBegin()),
396                 WhitespaceLength) == Text)
397     return;
398   Replaces.insert(tooling::Replacement(
399       SourceMgr, CharSourceRange::getCharRange(Range), Text));
400 }
401 
402 void WhitespaceManager::appendNewlineText(std::string &Text,
403                                           unsigned Newlines) {
404   for (unsigned i = 0; i < Newlines; ++i)
405     Text.append(UseCRLF ? "\r\n" : "\n");
406 }
407 
408 void WhitespaceManager::appendNewlineText(std::string &Text, unsigned Newlines,
409                                           unsigned PreviousEndOfTokenColumn,
410                                           unsigned EscapedNewlineColumn) {
411   if (Newlines > 0) {
412     unsigned Offset =
413         std::min<int>(EscapedNewlineColumn - 1, PreviousEndOfTokenColumn);
414     for (unsigned i = 0; i < Newlines; ++i) {
415       Text.append(EscapedNewlineColumn - Offset - 1, ' ');
416       Text.append(UseCRLF ? "\\\r\n" : "\\\n");
417       Offset = 0;
418     }
419   }
420 }
421 
422 void WhitespaceManager::appendIndentText(std::string &Text,
423                                          unsigned IndentLevel, unsigned Spaces,
424                                          unsigned WhitespaceStartColumn) {
425   switch (Style.UseTab) {
426   case FormatStyle::UT_Never:
427     Text.append(Spaces, ' ');
428     break;
429   case FormatStyle::UT_Always: {
430     unsigned FirstTabWidth =
431         Style.TabWidth - WhitespaceStartColumn % Style.TabWidth;
432     // Indent with tabs only when there's at least one full tab.
433     if (FirstTabWidth + Style.TabWidth <= Spaces) {
434       Spaces -= FirstTabWidth;
435       Text.append("\t");
436     }
437     Text.append(Spaces / Style.TabWidth, '\t');
438     Text.append(Spaces % Style.TabWidth, ' ');
439     break;
440   }
441   case FormatStyle::UT_ForIndentation:
442     if (WhitespaceStartColumn == 0) {
443       unsigned Indentation = IndentLevel * Style.IndentWidth;
444       // This happens, e.g. when a line in a block comment is indented less than
445       // the first one.
446       if (Indentation > Spaces)
447         Indentation = Spaces;
448       unsigned Tabs = Indentation / Style.TabWidth;
449       Text.append(Tabs, '\t');
450       Spaces -= Tabs * Style.TabWidth;
451     }
452     Text.append(Spaces, ' ');
453     break;
454   }
455 }
456 
457 } // namespace format
458 } // namespace clang
459