1 //===--- Format.cpp - Format C++ code -------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// \brief This file implements functions declared in Format.h. This will be
12 /// split into separate files as we go.
13 ///
14 /// This is EXPERIMENTAL code under heavy development. It is not in a state yet,
15 /// where it can be used to format real code.
16 ///
17 //===----------------------------------------------------------------------===//
18 
19 #include "clang/Format/Format.h"
20 #include "UnwrappedLineParser.h"
21 #include "clang/Basic/OperatorPrecedence.h"
22 #include "clang/Basic/SourceManager.h"
23 #include "clang/Lex/Lexer.h"
24 #include <string>
25 
26 namespace clang {
27 namespace format {
28 
29 // FIXME: Move somewhere sane.
30 struct TokenAnnotation {
31   enum TokenType {
32     TT_Unknown,
33     TT_TemplateOpener,
34     TT_TemplateCloser,
35     TT_BinaryOperator,
36     TT_UnaryOperator,
37     TT_TrailingUnaryOperator,
38     TT_OverloadedOperator,
39     TT_PointerOrReference,
40     TT_ConditionalExpr,
41     TT_CtorInitializerColon,
42     TT_LineComment,
43     TT_BlockComment,
44     TT_DirectorySeparator,
45     TT_ObjCMethodSpecifier
46   };
47 
48   TokenType Type;
49 
50   bool SpaceRequiredBefore;
51   bool CanBreakBefore;
52   bool MustBreakBefore;
53 
54   bool ClosesTemplateDeclaration;
55 };
56 
57 static prec::Level getPrecedence(const FormatToken &Tok) {
58   return getBinOpPrecedence(Tok.Tok.getKind(), true, true);
59 }
60 
61 using llvm::MutableArrayRef;
62 
63 FormatStyle getLLVMStyle() {
64   FormatStyle LLVMStyle;
65   LLVMStyle.ColumnLimit = 80;
66   LLVMStyle.MaxEmptyLinesToKeep = 1;
67   LLVMStyle.PointerAndReferenceBindToType = false;
68   LLVMStyle.AccessModifierOffset = -2;
69   LLVMStyle.SplitTemplateClosingGreater = true;
70   LLVMStyle.IndentCaseLabels = false;
71   return LLVMStyle;
72 }
73 
74 FormatStyle getGoogleStyle() {
75   FormatStyle GoogleStyle;
76   GoogleStyle.ColumnLimit = 80;
77   GoogleStyle.MaxEmptyLinesToKeep = 1;
78   GoogleStyle.PointerAndReferenceBindToType = true;
79   GoogleStyle.AccessModifierOffset = -1;
80   GoogleStyle.SplitTemplateClosingGreater = false;
81   GoogleStyle.IndentCaseLabels = true;
82   return GoogleStyle;
83 }
84 
85 struct OptimizationParameters {
86   unsigned PenaltyIndentLevel;
87   unsigned PenaltyLevelDecrease;
88 };
89 
90 class UnwrappedLineFormatter {
91 public:
92   UnwrappedLineFormatter(const FormatStyle &Style, SourceManager &SourceMgr,
93                          const UnwrappedLine &Line,
94                          const std::vector<TokenAnnotation> &Annotations,
95                          tooling::Replacements &Replaces, bool StructuralError)
96       : Style(Style), SourceMgr(SourceMgr), Line(Line),
97         Annotations(Annotations), Replaces(Replaces),
98         StructuralError(StructuralError) {
99     Parameters.PenaltyIndentLevel = 15;
100     Parameters.PenaltyLevelDecrease = 10;
101   }
102 
103   void format() {
104     // Format first token and initialize indent.
105     unsigned Indent = formatFirstToken();
106 
107     // Initialize state dependent on indent.
108     IndentState State;
109     State.Column = Indent;
110     State.ConsumedTokens = 0;
111     State.Indent.push_back(Indent + 4);
112     State.LastSpace.push_back(Indent);
113     State.FirstLessLess.push_back(0);
114     State.ForLoopVariablePos = 0;
115     State.LineContainsContinuedForLoopSection = false;
116     State.StartOfLineLevel = 1;
117 
118     // The first token has already been indented and thus consumed.
119     moveStateToNextToken(State);
120 
121     // Check whether the UnwrappedLine can be put onto a single line. If so,
122     // this is bound to be the optimal solution (by definition) and we don't
123     // need to analyze the entire solution space.
124     unsigned Columns = State.Column;
125     bool FitsOnALine = true;
126     for (unsigned i = 1, n = Line.Tokens.size(); i != n; ++i) {
127       Columns += (Annotations[i].SpaceRequiredBefore ? 1 : 0) +
128                  Line.Tokens[i].Tok.getLength();
129       // A special case for the colon of a constructor initializer as this only
130       // needs to be put on a new line if the line needs to be split.
131       if (Columns > Style.ColumnLimit ||
132           (Annotations[i].MustBreakBefore &&
133            Annotations[i].Type != TokenAnnotation::TT_CtorInitializerColon)) {
134         FitsOnALine = false;
135         break;
136       }
137     }
138 
139     // Start iterating at 1 as we have correctly formatted of Token #0 above.
140     for (unsigned i = 1, n = Line.Tokens.size(); i != n; ++i) {
141       if (FitsOnALine) {
142         addTokenToState(false, false, State);
143       } else {
144         unsigned NoBreak = calcPenalty(State, false, UINT_MAX);
145         unsigned Break = calcPenalty(State, true, NoBreak);
146         addTokenToState(Break < NoBreak, false, State);
147       }
148     }
149   }
150 
151 private:
152   /// \brief The current state when indenting a unwrapped line.
153   ///
154   /// As the indenting tries different combinations this is copied by value.
155   struct IndentState {
156     /// \brief The number of used columns in the current line.
157     unsigned Column;
158 
159     /// \brief The number of tokens already consumed.
160     unsigned ConsumedTokens;
161 
162     /// \brief The parenthesis level of the first token on the current line.
163     unsigned StartOfLineLevel;
164 
165     /// \brief The position to which a specific parenthesis level needs to be
166     /// indented.
167     std::vector<unsigned> Indent;
168 
169     /// \brief The position of the last space on each level.
170     ///
171     /// Used e.g. to break like:
172     /// functionCall(Parameter, otherCall(
173     ///                             OtherParameter));
174     std::vector<unsigned> LastSpace;
175 
176     /// \brief The position the first "<<" operator encountered on each level.
177     ///
178     /// Used to align "<<" operators. 0 if no such operator has been encountered
179     /// on a level.
180     std::vector<unsigned> FirstLessLess;
181 
182     /// \brief The column of the first variable in a for-loop declaration.
183     ///
184     /// Used to align the second variable if necessary.
185     unsigned ForLoopVariablePos;
186 
187     /// \brief \c true if this line contains a continued for-loop section.
188     bool LineContainsContinuedForLoopSection;
189 
190     /// \brief Comparison operator to be able to used \c IndentState in \c map.
191     bool operator<(const IndentState &Other) const {
192       if (Other.ConsumedTokens != ConsumedTokens)
193         return Other.ConsumedTokens > ConsumedTokens;
194       if (Other.Column != Column)
195         return Other.Column > Column;
196       if (Other.StartOfLineLevel != StartOfLineLevel)
197         return Other.StartOfLineLevel > StartOfLineLevel;
198       if (Other.Indent.size() != Indent.size())
199         return Other.Indent.size() > Indent.size();
200       for (int i = 0, e = Indent.size(); i != e; ++i) {
201         if (Other.Indent[i] != Indent[i])
202           return Other.Indent[i] > Indent[i];
203       }
204       if (Other.LastSpace.size() != LastSpace.size())
205         return Other.LastSpace.size() > LastSpace.size();
206       for (int i = 0, e = LastSpace.size(); i != e; ++i) {
207         if (Other.LastSpace[i] != LastSpace[i])
208           return Other.LastSpace[i] > LastSpace[i];
209       }
210       if (Other.FirstLessLess.size() != FirstLessLess.size())
211         return Other.FirstLessLess.size() > FirstLessLess.size();
212       for (int i = 0, e = FirstLessLess.size(); i != e; ++i) {
213         if (Other.FirstLessLess[i] != FirstLessLess[i])
214           return Other.FirstLessLess[i] > FirstLessLess[i];
215       }
216       if (Other.ForLoopVariablePos != ForLoopVariablePos)
217         return Other.ForLoopVariablePos < ForLoopVariablePos;
218       if (Other.LineContainsContinuedForLoopSection !=
219           LineContainsContinuedForLoopSection)
220         return LineContainsContinuedForLoopSection;
221       return false;
222     }
223   };
224 
225   /// \brief Appends the next token to \p State and updates information
226   /// necessary for indentation.
227   ///
228   /// Puts the token on the current line if \p Newline is \c true and adds a
229   /// line break and necessary indentation otherwise.
230   ///
231   /// If \p DryRun is \c false, also creates and stores the required
232   /// \c Replacement.
233   void addTokenToState(bool Newline, bool DryRun, IndentState &State) {
234     unsigned Index = State.ConsumedTokens;
235     const FormatToken &Current = Line.Tokens[Index];
236     const FormatToken &Previous = Line.Tokens[Index - 1];
237     unsigned ParenLevel = State.Indent.size() - 1;
238 
239     if (Newline) {
240       unsigned WhitespaceStartColumn = State.Column;
241       if (Current.Tok.is(tok::string_literal) &&
242           Previous.Tok.is(tok::string_literal)) {
243         State.Column = State.Column - Previous.Tok.getLength();
244       } else if (Current.Tok.is(tok::lessless) &&
245                  State.FirstLessLess[ParenLevel] != 0) {
246         State.Column = State.FirstLessLess[ParenLevel];
247       } else if (ParenLevel != 0 &&
248                  (Previous.Tok.is(tok::equal) || Current.Tok.is(tok::arrow) ||
249                   Current.Tok.is(tok::period))) {
250         // Indent and extra 4 spaces after '=' as it continues an expression.
251         // Don't do that on the top level, as we already indent 4 there.
252         State.Column = State.Indent[ParenLevel] + 4;
253       } else if (
254           Line.Tokens[0].Tok.is(tok::kw_for) && Previous.Tok.is(tok::comma)) {
255         State.Column = State.ForLoopVariablePos;
256       } else if (Annotations[Index - 1].ClosesTemplateDeclaration) {
257         State.Column = State.Indent[ParenLevel] - 4;
258       } else {
259         State.Column = State.Indent[ParenLevel];
260       }
261 
262       State.StartOfLineLevel = ParenLevel + 1;
263 
264       if (Line.Tokens[0].Tok.is(tok::kw_for))
265         State.LineContainsContinuedForLoopSection =
266             Previous.Tok.isNot(tok::semi);
267 
268       if (!DryRun) {
269         if (!Line.InPPDirective)
270           replaceWhitespace(Current, 1, State.Column);
271         else
272           replacePPWhitespace(Current, 1, State.Column, WhitespaceStartColumn);
273       }
274 
275       State.LastSpace[ParenLevel] = State.Indent[ParenLevel];
276       if (Current.Tok.is(tok::colon) &&
277           Annotations[Index].Type != TokenAnnotation::TT_ConditionalExpr &&
278           Annotations[0].Type != TokenAnnotation::TT_ObjCMethodSpecifier)
279         State.Indent[ParenLevel] += 2;
280     } else {
281       if (Current.Tok.is(tok::equal) && Line.Tokens[0].Tok.is(tok::kw_for))
282         State.ForLoopVariablePos = State.Column - Previous.Tok.getLength();
283 
284       unsigned Spaces = Annotations[Index].SpaceRequiredBefore ? 1 : 0;
285       if (Annotations[Index].Type == TokenAnnotation::TT_LineComment)
286         Spaces = 2;
287 
288       if (!DryRun)
289         replaceWhitespace(Current, 0, Spaces);
290 
291       // FIXME: Look into using this alignment at other ParenLevels.
292       if (ParenLevel == 0 && (getPrecedence(Previous) == prec::Assignment ||
293                               Previous.Tok.is(tok::kw_return)))
294         State.Indent[ParenLevel] = State.Column + Spaces;
295       if (Previous.Tok.is(tok::l_paren) ||
296           Annotations[Index - 1].Type == TokenAnnotation::TT_TemplateOpener)
297         State.Indent[ParenLevel] = State.Column;
298 
299       // Top-level spaces are exempt as that mostly leads to better results.
300       State.Column += Spaces;
301       if (Spaces > 0 && ParenLevel != 0)
302         State.LastSpace[ParenLevel] = State.Column;
303     }
304     moveStateToNextToken(State);
305   }
306 
307   /// \brief Mark the next token as consumed in \p State and modify its stacks
308   /// accordingly.
309   void moveStateToNextToken(IndentState &State) {
310     unsigned Index = State.ConsumedTokens;
311     const FormatToken &Current = Line.Tokens[Index];
312     unsigned ParenLevel = State.Indent.size() - 1;
313 
314     if (Current.Tok.is(tok::lessless) && State.FirstLessLess[ParenLevel] == 0)
315       State.FirstLessLess[ParenLevel] = State.Column;
316 
317     State.Column += Current.Tok.getLength();
318 
319     // If we encounter an opening (, [, { or <, we add a level to our stacks to
320     // prepare for the following tokens.
321     if (Current.Tok.is(tok::l_paren) || Current.Tok.is(tok::l_square) ||
322         Current.Tok.is(tok::l_brace) ||
323         Annotations[Index].Type == TokenAnnotation::TT_TemplateOpener) {
324       State.Indent.push_back(4 + State.LastSpace.back());
325       State.LastSpace.push_back(State.LastSpace.back());
326       State.FirstLessLess.push_back(0);
327     }
328 
329     // If we encounter a closing ), ], } or >, we can remove a level from our
330     // stacks.
331     if (Current.Tok.is(tok::r_paren) || Current.Tok.is(tok::r_square) ||
332         (Current.Tok.is(tok::r_brace) && State.ConsumedTokens > 0) ||
333         Annotations[Index].Type == TokenAnnotation::TT_TemplateCloser) {
334       State.Indent.pop_back();
335       State.LastSpace.pop_back();
336       State.FirstLessLess.pop_back();
337     }
338 
339     ++State.ConsumedTokens;
340   }
341 
342   /// \brief Calculate the panelty for splitting after the token at \p Index.
343   unsigned splitPenalty(unsigned Index) {
344     assert(Index < Line.Tokens.size() &&
345            "Tried to calculate penalty for splitting after the last token");
346     const FormatToken &Left = Line.Tokens[Index];
347     const FormatToken &Right = Line.Tokens[Index + 1];
348 
349     // In for-loops, prefer breaking at ',' and ';'.
350     if (Line.Tokens[0].Tok.is(tok::kw_for) &&
351         (Left.Tok.isNot(tok::comma) && Left.Tok.isNot(tok::semi)))
352       return 20;
353 
354     if (Left.Tok.is(tok::semi) || Left.Tok.is(tok::comma) ||
355         Annotations[Index].ClosesTemplateDeclaration)
356       return 0;
357     if (Left.Tok.is(tok::l_paren))
358       return 20;
359 
360     prec::Level Level = getPrecedence(Line.Tokens[Index]);
361     if (Level != prec::Unknown)
362       return Level;
363 
364     if (Right.Tok.is(tok::arrow) || Right.Tok.is(tok::period))
365       return 50;
366 
367     return 3;
368   }
369 
370   /// \brief Calculate the number of lines needed to format the remaining part
371   /// of the unwrapped line.
372   ///
373   /// Assumes the formatting so far has led to
374   /// the \c IndentState \p State. If \p NewLine is set, a new line will be
375   /// added after the previous token.
376   ///
377   /// \param StopAt is used for optimization. If we can determine that we'll
378   /// definitely need at least \p StopAt additional lines, we already know of a
379   /// better solution.
380   unsigned calcPenalty(IndentState State, bool NewLine, unsigned StopAt) {
381     // We are at the end of the unwrapped line, so we don't need any more lines.
382     if (State.ConsumedTokens >= Line.Tokens.size())
383       return 0;
384 
385     if (!NewLine && Annotations[State.ConsumedTokens].MustBreakBefore)
386       return UINT_MAX;
387     if (NewLine && !Annotations[State.ConsumedTokens].CanBreakBefore)
388       return UINT_MAX;
389     if (!NewLine && Line.Tokens[State.ConsumedTokens - 1].Tok.is(tok::semi) &&
390         State.LineContainsContinuedForLoopSection)
391       return UINT_MAX;
392 
393     unsigned CurrentPenalty = 0;
394     if (NewLine) {
395       CurrentPenalty += Parameters.PenaltyIndentLevel * State.Indent.size() +
396                         splitPenalty(State.ConsumedTokens - 1);
397     } else {
398       if (State.Indent.size() < State.StartOfLineLevel)
399         CurrentPenalty += Parameters.PenaltyLevelDecrease *
400                           (State.StartOfLineLevel - State.Indent.size());
401     }
402 
403     addTokenToState(NewLine, true, State);
404 
405     // Exceeding column limit is bad.
406     if (State.Column > Style.ColumnLimit - (Line.InPPDirective ? 1 : 0))
407       return UINT_MAX;
408 
409     if (StopAt <= CurrentPenalty)
410       return UINT_MAX;
411     StopAt -= CurrentPenalty;
412 
413     StateMap::iterator I = Memory.find(State);
414     if (I != Memory.end()) {
415       // If this state has already been examined, we can safely return the
416       // previous result if we
417       // - have not hit the optimatization (and thus returned UINT_MAX) OR
418       // - are now computing for a smaller or equal StopAt.
419       unsigned SavedResult = I->second.first;
420       unsigned SavedStopAt = I->second.second;
421       if (SavedResult != UINT_MAX)
422         return SavedResult + CurrentPenalty;
423       else if (StopAt <= SavedStopAt)
424         return UINT_MAX;
425     }
426 
427     unsigned NoBreak = calcPenalty(State, false, StopAt);
428     unsigned WithBreak = calcPenalty(State, true, std::min(StopAt, NoBreak));
429     unsigned Result = std::min(NoBreak, WithBreak);
430 
431     // We have to store 'Result' without adding 'CurrentPenalty' as the latter
432     // can depend on 'NewLine'.
433     Memory[State] = std::pair<unsigned, unsigned>(Result, StopAt);
434 
435     return Result == UINT_MAX ? UINT_MAX : Result + CurrentPenalty;
436   }
437 
438   /// \brief Replaces the whitespace in front of \p Tok. Only call once for
439   /// each \c FormatToken.
440   void replaceWhitespace(const FormatToken &Tok, unsigned NewLines,
441                          unsigned Spaces) {
442     Replaces.insert(tooling::Replacement(
443         SourceMgr, Tok.WhiteSpaceStart, Tok.WhiteSpaceLength,
444         std::string(NewLines, '\n') + std::string(Spaces, ' ')));
445   }
446 
447   /// \brief Like \c replaceWhitespace, but additionally adds right-aligned
448   /// backslashes to escape newlines inside a preprocessor directive.
449   ///
450   /// This function and \c replaceWhitespace have the same behavior if
451   /// \c Newlines == 0.
452   void replacePPWhitespace(const FormatToken &Tok, unsigned NewLines,
453                            unsigned Spaces, unsigned WhitespaceStartColumn) {
454     std::string NewLineText;
455     if (NewLines > 0) {
456       unsigned Offset =
457           std::min<int>(Style.ColumnLimit - 1, WhitespaceStartColumn);
458       for (unsigned i = 0; i < NewLines; ++i) {
459         NewLineText += std::string(Style.ColumnLimit - Offset - 1, ' ');
460         NewLineText += "\\\n";
461         Offset = 0;
462       }
463     }
464     Replaces.insert(tooling::Replacement(
465         SourceMgr, Tok.WhiteSpaceStart, Tok.WhiteSpaceLength,
466         NewLineText + std::string(Spaces, ' ')));
467   }
468 
469   /// \brief Add a new line and the required indent before the first Token
470   /// of the \c UnwrappedLine if there was no structural parsing error.
471   /// Returns the indent level of the \c UnwrappedLine.
472   unsigned formatFirstToken() {
473     const FormatToken &Token = Line.Tokens[0];
474     if (!Token.WhiteSpaceStart.isValid() || StructuralError)
475       return SourceMgr.getSpellingColumnNumber(Token.Tok.getLocation()) - 1;
476 
477     unsigned Newlines =
478         std::min(Token.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
479     unsigned Offset = SourceMgr.getFileOffset(Token.WhiteSpaceStart);
480     if (Newlines == 0 && Offset != 0)
481       Newlines = 1;
482     unsigned Indent = Line.Level * 2;
483     if ((Token.Tok.is(tok::kw_public) || Token.Tok.is(tok::kw_protected) ||
484          Token.Tok.is(tok::kw_private)) &&
485         static_cast<int>(Indent) + Style.AccessModifierOffset >= 0)
486       Indent += Style.AccessModifierOffset;
487     if (!Line.InPPDirective || Token.HasUnescapedNewline)
488       replaceWhitespace(Token, Newlines, Indent);
489     else
490       // FIXME: Figure out how to get the previous end-of-line column.
491       replacePPWhitespace(Token, Newlines, Indent, 0);
492     return Indent;
493   }
494 
495   FormatStyle Style;
496   SourceManager &SourceMgr;
497   const UnwrappedLine &Line;
498   const std::vector<TokenAnnotation> &Annotations;
499   tooling::Replacements &Replaces;
500   bool StructuralError;
501 
502   // A map from an indent state to a pair (Result, Used-StopAt).
503   typedef std::map<IndentState, std::pair<unsigned, unsigned> > StateMap;
504   StateMap Memory;
505 
506   OptimizationParameters Parameters;
507 };
508 
509 /// \brief Determines extra information about the tokens comprising an
510 /// \c UnwrappedLine.
511 class TokenAnnotator {
512 public:
513   TokenAnnotator(const UnwrappedLine &Line, const FormatStyle &Style,
514                  SourceManager &SourceMgr)
515       : Line(Line), Style(Style), SourceMgr(SourceMgr) {
516   }
517 
518   /// \brief A parser that gathers additional information about tokens.
519   ///
520   /// The \c TokenAnnotator tries to matches parenthesis and square brakets and
521   /// store a parenthesis levels. It also tries to resolve matching "<" and ">"
522   /// into template parameter lists.
523   class AnnotatingParser {
524   public:
525     AnnotatingParser(const SmallVector<FormatToken, 16> &Tokens,
526                      std::vector<TokenAnnotation> &Annotations)
527         : Tokens(Tokens), Annotations(Annotations), Index(0) {
528     }
529 
530     bool parseAngle() {
531       while (Index < Tokens.size()) {
532         if (Tokens[Index].Tok.is(tok::greater)) {
533           Annotations[Index].Type = TokenAnnotation::TT_TemplateCloser;
534           next();
535           return true;
536         }
537         if (Tokens[Index].Tok.is(tok::r_paren) ||
538             Tokens[Index].Tok.is(tok::r_square) ||
539             Tokens[Index].Tok.is(tok::r_brace))
540           return false;
541         if (Tokens[Index].Tok.is(tok::pipepipe) ||
542             Tokens[Index].Tok.is(tok::ampamp) ||
543             Tokens[Index].Tok.is(tok::question) ||
544             Tokens[Index].Tok.is(tok::colon))
545           return false;
546         if (!consumeToken())
547           return false;
548       }
549       return false;
550     }
551 
552     bool parseParens() {
553       while (Index < Tokens.size()) {
554         if (Tokens[Index].Tok.is(tok::r_paren)) {
555           next();
556           return true;
557         }
558         if (Tokens[Index].Tok.is(tok::r_square) ||
559             Tokens[Index].Tok.is(tok::r_brace))
560           return false;
561         if (!consumeToken())
562           return false;
563       }
564       return false;
565     }
566 
567     bool parseSquare() {
568       while (Index < Tokens.size()) {
569         if (Tokens[Index].Tok.is(tok::r_square)) {
570           next();
571           return true;
572         }
573         if (Tokens[Index].Tok.is(tok::r_paren) ||
574             Tokens[Index].Tok.is(tok::r_brace))
575           return false;
576         if (!consumeToken())
577           return false;
578       }
579       return false;
580     }
581 
582     bool parseConditional() {
583       while (Index < Tokens.size()) {
584         if (Tokens[Index].Tok.is(tok::colon)) {
585           Annotations[Index].Type = TokenAnnotation::TT_ConditionalExpr;
586           next();
587           return true;
588         }
589         if (!consumeToken())
590           return false;
591       }
592       return false;
593     }
594 
595     bool parseTemplateDeclaration() {
596       if (Index < Tokens.size() && Tokens[Index].Tok.is(tok::less)) {
597         Annotations[Index].Type = TokenAnnotation::TT_TemplateOpener;
598         next();
599         if (!parseAngle())
600           return false;
601         Annotations[Index - 1].ClosesTemplateDeclaration = true;
602         parseLine();
603         return true;
604       }
605       return false;
606     }
607 
608     bool consumeToken() {
609       unsigned CurrentIndex = Index;
610       next();
611       switch (Tokens[CurrentIndex].Tok.getKind()) {
612       case tok::l_paren:
613         if (!parseParens())
614           return false;
615         if (Index < Tokens.size() && Tokens[Index].Tok.is(tok::colon)) {
616           Annotations[Index].Type = TokenAnnotation::TT_CtorInitializerColon;
617           next();
618         }
619         break;
620       case tok::l_square:
621         if (!parseSquare())
622           return false;
623         break;
624       case tok::less:
625         if (parseAngle())
626           Annotations[CurrentIndex].Type = TokenAnnotation::TT_TemplateOpener;
627         else {
628           Annotations[CurrentIndex].Type = TokenAnnotation::TT_BinaryOperator;
629           Index = CurrentIndex + 1;
630         }
631         break;
632       case tok::r_paren:
633       case tok::r_square:
634         return false;
635       case tok::greater:
636         Annotations[CurrentIndex].Type = TokenAnnotation::TT_BinaryOperator;
637         break;
638       case tok::kw_operator:
639         if (Tokens[Index].Tok.is(tok::l_paren)) {
640           Annotations[Index].Type = TokenAnnotation::TT_OverloadedOperator;
641           next();
642           if (Index < Tokens.size() && Tokens[Index].Tok.is(tok::r_paren)) {
643             Annotations[Index].Type = TokenAnnotation::TT_OverloadedOperator;
644             next();
645           }
646         } else {
647           while (Index < Tokens.size() && !Tokens[Index].Tok.is(tok::l_paren)) {
648             Annotations[Index].Type = TokenAnnotation::TT_OverloadedOperator;
649             next();
650           }
651         }
652         break;
653       case tok::question:
654         parseConditional();
655         break;
656       case tok::kw_template:
657         parseTemplateDeclaration();
658         break;
659       default:
660         break;
661       }
662       return true;
663     }
664 
665     void parseIncludeDirective() {
666       while (Index < Tokens.size()) {
667         if (Tokens[Index].Tok.is(tok::slash))
668           Annotations[Index].Type = TokenAnnotation::TT_DirectorySeparator;
669         else if (Tokens[Index].Tok.is(tok::less))
670           Annotations[Index].Type = TokenAnnotation::TT_TemplateOpener;
671         else if (Tokens[Index].Tok.is(tok::greater))
672           Annotations[Index].Type = TokenAnnotation::TT_TemplateCloser;
673         next();
674       }
675     }
676 
677     void parsePreprocessorDirective() {
678       next();
679       if (Index >= Tokens.size())
680         return;
681       // It is the responsibility of the UnwrappedLineParser to make sure
682       // this sequence is not produced inside an unwrapped line.
683       assert(Tokens[Index].Tok.getIdentifierInfo() != NULL);
684       switch (Tokens[Index].Tok.getIdentifierInfo()->getPPKeywordID()) {
685       case tok::pp_include:
686       case tok::pp_import:
687         parseIncludeDirective();
688         break;
689       default:
690         break;
691       }
692     }
693 
694     bool parseLine() {
695       if (Tokens[Index].Tok.is(tok::hash)) {
696         parsePreprocessorDirective();
697         return true;
698       }
699       while (Index < Tokens.size()) {
700         if (!consumeToken())
701           return false;
702       }
703       return true;
704     }
705 
706     void next() {
707       ++Index;
708     }
709 
710   private:
711     const SmallVector<FormatToken, 16> &Tokens;
712     std::vector<TokenAnnotation> &Annotations;
713     unsigned Index;
714   };
715 
716   bool annotate() {
717     Annotations.clear();
718     for (int i = 0, e = Line.Tokens.size(); i != e; ++i) {
719       Annotations.push_back(TokenAnnotation());
720     }
721 
722     AnnotatingParser Parser(Line.Tokens, Annotations);
723     if (!Parser.parseLine())
724       return false;
725 
726     determineTokenTypes();
727     bool IsObjCMethodDecl =
728         (Line.Tokens.size() > 0 &&
729          (Annotations[0].Type == TokenAnnotation::TT_ObjCMethodSpecifier));
730     for (int i = 1, e = Line.Tokens.size(); i != e; ++i) {
731       TokenAnnotation &Annotation = Annotations[i];
732 
733       Annotation.CanBreakBefore = canBreakBefore(i);
734 
735       if (Annotation.Type == TokenAnnotation::TT_CtorInitializerColon) {
736         Annotation.MustBreakBefore = true;
737         Annotation.SpaceRequiredBefore = true;
738       } else if (Annotation.Type == TokenAnnotation::TT_OverloadedOperator) {
739         Annotation.SpaceRequiredBefore =
740             Line.Tokens[i].Tok.is(tok::identifier) ||
741             Line.Tokens[i].Tok.is(tok::kw_new) ||
742             Line.Tokens[i].Tok.is(tok::kw_delete);
743       } else if (
744           Annotations[i - 1].Type == TokenAnnotation::TT_OverloadedOperator) {
745         Annotation.SpaceRequiredBefore = false;
746       } else if (IsObjCMethodDecl && Line.Tokens[i].Tok.is(tok::identifier) &&
747                  (i != e - 1) && Line.Tokens[i + 1].Tok.is(tok::colon) &&
748                  Line.Tokens[i - 1].Tok.is(tok::identifier)) {
749         Annotation.CanBreakBefore = true;
750         Annotation.SpaceRequiredBefore = true;
751       } else if (IsObjCMethodDecl && Line.Tokens[i].Tok.is(tok::identifier) &&
752                  Line.Tokens[i - 1].Tok.is(tok::l_paren) &&
753                  Line.Tokens[i - 2].Tok.is(tok::colon)) {
754         // Don't break this identifier as ':' or identifier
755         // before it will break.
756         Annotation.CanBreakBefore = false;
757       } else if (Line.Tokens[i].Tok.is(tok::at) &&
758                  Line.Tokens[i - 2].Tok.is(tok::at)) {
759         // Don't put two objc's '@' on the same line. This could happen,
760         // as in, @optional @property ...
761         Annotation.MustBreakBefore = true;
762       } else if (Line.Tokens[i].Tok.is(tok::colon)) {
763         Annotation.SpaceRequiredBefore =
764             Line.Tokens[0].Tok.isNot(tok::kw_case) && !IsObjCMethodDecl &&
765             (i != e - 1);
766         // Don't break at ':' if identifier before it can beak.
767         if (IsObjCMethodDecl && Line.Tokens[i - 1].Tok.is(tok::identifier) &&
768             Annotations[i - 1].CanBreakBefore)
769           Annotation.CanBreakBefore = false;
770       } else if (
771           Annotations[i - 1].Type == TokenAnnotation::TT_ObjCMethodSpecifier) {
772         Annotation.SpaceRequiredBefore = true;
773       } else if (Annotations[i - 1].Type == TokenAnnotation::TT_UnaryOperator) {
774         Annotation.SpaceRequiredBefore = false;
775       } else if (Annotation.Type == TokenAnnotation::TT_UnaryOperator) {
776         Annotation.SpaceRequiredBefore =
777             Line.Tokens[i - 1].Tok.isNot(tok::l_paren) &&
778             Line.Tokens[i - 1].Tok.isNot(tok::l_square);
779       } else if (Line.Tokens[i - 1].Tok.is(tok::greater) &&
780                  Line.Tokens[i].Tok.is(tok::greater)) {
781         if (Annotation.Type == TokenAnnotation::TT_TemplateCloser &&
782             Annotations[i - 1].Type == TokenAnnotation::TT_TemplateCloser)
783           Annotation.SpaceRequiredBefore = Style.SplitTemplateClosingGreater;
784         else
785           Annotation.SpaceRequiredBefore = false;
786       } else if (
787           Annotation.Type == TokenAnnotation::TT_DirectorySeparator ||
788           Annotations[i - 1].Type == TokenAnnotation::TT_DirectorySeparator) {
789         Annotation.SpaceRequiredBefore = false;
790       } else if (
791           Annotation.Type == TokenAnnotation::TT_BinaryOperator ||
792           Annotations[i - 1].Type == TokenAnnotation::TT_BinaryOperator) {
793         Annotation.SpaceRequiredBefore = true;
794       } else if (
795           Annotations[i - 1].Type == TokenAnnotation::TT_TemplateCloser &&
796           Line.Tokens[i].Tok.is(tok::l_paren)) {
797         Annotation.SpaceRequiredBefore = false;
798       } else if (Line.Tokens[i].Tok.is(tok::less) &&
799                  Line.Tokens[0].Tok.is(tok::hash)) {
800         Annotation.SpaceRequiredBefore = true;
801       } else if (IsObjCMethodDecl && Line.Tokens[i - 1].Tok.is(tok::r_paren) &&
802                  Line.Tokens[i].Tok.is(tok::identifier)) {
803         // Don't space between ')' and <id>
804         Annotation.SpaceRequiredBefore = false;
805       } else if (IsObjCMethodDecl && Line.Tokens[i - 1].Tok.is(tok::colon) &&
806                  Line.Tokens[i].Tok.is(tok::l_paren)) {
807         // Don't space between ':' and '('
808         Annotation.SpaceRequiredBefore = false;
809       } else if (Annotation.Type == TokenAnnotation::TT_TrailingUnaryOperator) {
810         Annotation.SpaceRequiredBefore = false;
811       } else {
812         Annotation.SpaceRequiredBefore =
813             spaceRequiredBetween(Line.Tokens[i - 1].Tok, Line.Tokens[i].Tok);
814       }
815 
816       if (Annotations[i - 1].Type == TokenAnnotation::TT_LineComment ||
817           (Line.Tokens[i].Tok.is(tok::string_literal) &&
818            Line.Tokens[i - 1].Tok.is(tok::string_literal))) {
819         Annotation.MustBreakBefore = true;
820       }
821 
822       if (Annotation.MustBreakBefore)
823         Annotation.CanBreakBefore = true;
824     }
825     return true;
826   }
827 
828   const std::vector<TokenAnnotation> &getAnnotations() {
829     return Annotations;
830   }
831 
832 private:
833   void determineTokenTypes() {
834     bool IsRHS = false;
835     for (int i = 0, e = Line.Tokens.size(); i != e; ++i) {
836       TokenAnnotation &Annotation = Annotations[i];
837       const FormatToken &Tok = Line.Tokens[i];
838 
839       if (getPrecedence(Tok) == prec::Assignment)
840         IsRHS = true;
841       else if (Tok.Tok.is(tok::kw_return))
842         IsRHS = true;
843 
844       if (Annotation.Type != TokenAnnotation::TT_Unknown)
845         continue;
846 
847       if (Tok.Tok.is(tok::star) || Tok.Tok.is(tok::amp)) {
848         Annotation.Type = determineStarAmpUsage(i, IsRHS);
849       } else if (Tok.Tok.is(tok::minus) || Tok.Tok.is(tok::plus)) {
850         Annotation.Type = determinePlusMinusUsage(i);
851       } else if (Tok.Tok.is(tok::minusminus) || Tok.Tok.is(tok::plusplus)) {
852         Annotation.Type = determineIncrementUsage(i);
853       } else if (Tok.Tok.is(tok::exclaim)) {
854         Annotation.Type = TokenAnnotation::TT_UnaryOperator;
855       } else if (isBinaryOperator(Line.Tokens[i])) {
856         Annotation.Type = TokenAnnotation::TT_BinaryOperator;
857       } else if (Tok.Tok.is(tok::comment)) {
858         StringRef Data(SourceMgr.getCharacterData(Tok.Tok.getLocation()),
859                        Tok.Tok.getLength());
860         if (Data.startswith("//"))
861           Annotation.Type = TokenAnnotation::TT_LineComment;
862         else
863           Annotation.Type = TokenAnnotation::TT_BlockComment;
864       }
865     }
866   }
867 
868   bool isBinaryOperator(const FormatToken &Tok) {
869     // Comma is a binary operator, but does not behave as such wrt. formatting.
870     return getPrecedence(Tok) > prec::Comma;
871   }
872 
873   TokenAnnotation::TokenType determineStarAmpUsage(unsigned Index, bool IsRHS) {
874     if (Index == 0)
875       return TokenAnnotation::TT_UnaryOperator;
876     if (Index == Annotations.size())
877       return TokenAnnotation::TT_Unknown;
878     const FormatToken &PrevToken = Line.Tokens[Index - 1];
879     const FormatToken &NextToken = Line.Tokens[Index + 1];
880 
881     if (PrevToken.Tok.is(tok::l_paren) || PrevToken.Tok.is(tok::comma) ||
882         PrevToken.Tok.is(tok::kw_return) || PrevToken.Tok.is(tok::colon) ||
883         Annotations[Index - 1].Type == TokenAnnotation::TT_BinaryOperator)
884       return TokenAnnotation::TT_UnaryOperator;
885 
886     if (PrevToken.Tok.isLiteral() || NextToken.Tok.isLiteral() ||
887         NextToken.Tok.is(tok::plus) || NextToken.Tok.is(tok::minus) ||
888         NextToken.Tok.is(tok::plusplus) || NextToken.Tok.is(tok::minusminus) ||
889         NextToken.Tok.is(tok::tilde) || NextToken.Tok.is(tok::exclaim) ||
890         NextToken.Tok.is(tok::kw_alignof) || NextToken.Tok.is(tok::kw_sizeof))
891       return TokenAnnotation::TT_BinaryOperator;
892 
893     if (NextToken.Tok.is(tok::comma) || NextToken.Tok.is(tok::r_paren) ||
894         NextToken.Tok.is(tok::greater))
895       return TokenAnnotation::TT_PointerOrReference;
896 
897     // It is very unlikely that we are going to find a pointer or reference type
898     // definition on the RHS of an assignment.
899     if (IsRHS)
900       return TokenAnnotation::TT_BinaryOperator;
901 
902     return TokenAnnotation::TT_PointerOrReference;
903   }
904 
905   TokenAnnotation::TokenType determinePlusMinusUsage(unsigned Index) {
906     // At the start of the line, +/- specific ObjectiveC method declarations.
907     if (Index == 0)
908       return TokenAnnotation::TT_ObjCMethodSpecifier;
909 
910     // Use heuristics to recognize unary operators.
911     const Token &PreviousTok = Line.Tokens[Index - 1].Tok;
912     if (PreviousTok.is(tok::equal) || PreviousTok.is(tok::l_paren) ||
913         PreviousTok.is(tok::comma) || PreviousTok.is(tok::l_square) ||
914         PreviousTok.is(tok::question) || PreviousTok.is(tok::colon) ||
915         PreviousTok.is(tok::kw_return) || PreviousTok.is(tok::kw_case))
916       return TokenAnnotation::TT_UnaryOperator;
917 
918     // There can't be to consecutive binary operators.
919     if (Annotations[Index - 1].Type == TokenAnnotation::TT_BinaryOperator)
920       return TokenAnnotation::TT_UnaryOperator;
921 
922     // Fall back to marking the token as binary operator.
923     return TokenAnnotation::TT_BinaryOperator;
924   }
925 
926   /// \brief Determine whether ++/-- are pre- or post-increments/-decrements.
927   TokenAnnotation::TokenType determineIncrementUsage(unsigned Index) {
928     if (Index != 0 && Line.Tokens[Index - 1].Tok.is(tok::identifier))
929       return TokenAnnotation::TT_TrailingUnaryOperator;
930 
931     return TokenAnnotation::TT_UnaryOperator;
932   }
933 
934   bool spaceRequiredBetween(Token Left, Token Right) {
935     if (Right.is(tok::r_paren) || Right.is(tok::semi) || Right.is(tok::comma))
936       return false;
937     if (Left.is(tok::kw_template) && Right.is(tok::less))
938       return true;
939     if (Left.is(tok::arrow) || Right.is(tok::arrow))
940       return false;
941     if (Left.is(tok::exclaim) || Left.is(tok::tilde))
942       return false;
943     if (Left.is(tok::at) && Right.is(tok::identifier))
944       return false;
945     if (Left.is(tok::less) || Right.is(tok::greater) || Right.is(tok::less))
946       return false;
947     if (Right.is(tok::amp) || Right.is(tok::star))
948       return Left.isLiteral() ||
949              (Left.isNot(tok::star) && Left.isNot(tok::amp) &&
950               !Style.PointerAndReferenceBindToType);
951     if (Left.is(tok::amp) || Left.is(tok::star))
952       return Right.isLiteral() || Style.PointerAndReferenceBindToType;
953     if (Right.is(tok::star) && Left.is(tok::l_paren))
954       return false;
955     if (Left.is(tok::l_square) || Right.is(tok::l_square) ||
956         Right.is(tok::r_square))
957       return false;
958     if (Left.is(tok::coloncolon) ||
959         (Right.is(tok::coloncolon) &&
960          (Left.is(tok::identifier) || Left.is(tok::greater))))
961       return false;
962     if (Left.is(tok::period) || Right.is(tok::period))
963       return false;
964     if (Left.is(tok::colon) || Right.is(tok::colon))
965       return true;
966     if (Left.is(tok::l_paren))
967       return false;
968     if (Left.is(tok::hash))
969       return false;
970     if (Right.is(tok::l_paren)) {
971       return Left.is(tok::kw_if) || Left.is(tok::kw_for) ||
972              Left.is(tok::kw_while) || Left.is(tok::kw_switch) ||
973              (Left.isNot(tok::identifier) && Left.isNot(tok::kw_sizeof) &&
974               Left.isNot(tok::kw_typeof) && Left.isNot(tok::kw_alignof));
975     }
976     return true;
977   }
978 
979   bool canBreakBefore(unsigned i) {
980     if (Annotations[i - 1].ClosesTemplateDeclaration)
981       return true;
982     if (Annotations[i - 1].Type == TokenAnnotation::TT_PointerOrReference ||
983         Annotations[i - 1].Type == TokenAnnotation::TT_TemplateCloser ||
984         Annotations[i].Type == TokenAnnotation::TT_ConditionalExpr) {
985       return false;
986     }
987     const FormatToken &Left = Line.Tokens[i - 1];
988     const FormatToken &Right = Line.Tokens[i];
989     if (Right.Tok.is(tok::r_paren) || Right.Tok.is(tok::l_brace) ||
990         Right.Tok.is(tok::comment) || Right.Tok.is(tok::greater))
991       return false;
992     return (isBinaryOperator(Left) && Left.Tok.isNot(tok::lessless)) ||
993            Left.Tok.is(tok::comma) || Right.Tok.is(tok::lessless) ||
994            Right.Tok.is(tok::arrow) || Right.Tok.is(tok::period) ||
995            Right.Tok.is(tok::colon) || Left.Tok.is(tok::semi) ||
996            Left.Tok.is(tok::l_brace) ||
997            (Left.Tok.is(tok::l_paren) && !Right.Tok.is(tok::r_paren));
998   }
999 
1000   const UnwrappedLine &Line;
1001   FormatStyle Style;
1002   SourceManager &SourceMgr;
1003   std::vector<TokenAnnotation> Annotations;
1004 };
1005 
1006 class LexerBasedFormatTokenSource : public FormatTokenSource {
1007 public:
1008   LexerBasedFormatTokenSource(Lexer &Lex, SourceManager &SourceMgr)
1009       : GreaterStashed(false), Lex(Lex), SourceMgr(SourceMgr),
1010         IdentTable(Lex.getLangOpts()) {
1011     Lex.SetKeepWhitespaceMode(true);
1012   }
1013 
1014   virtual FormatToken getNextToken() {
1015     if (GreaterStashed) {
1016       FormatTok.NewlinesBefore = 0;
1017       FormatTok.WhiteSpaceStart =
1018           FormatTok.Tok.getLocation().getLocWithOffset(1);
1019       FormatTok.WhiteSpaceLength = 0;
1020       GreaterStashed = false;
1021       return FormatTok;
1022     }
1023 
1024     FormatTok = FormatToken();
1025     Lex.LexFromRawLexer(FormatTok.Tok);
1026     FormatTok.WhiteSpaceStart = FormatTok.Tok.getLocation();
1027 
1028     // Consume and record whitespace until we find a significant token.
1029     while (FormatTok.Tok.is(tok::unknown)) {
1030       StringRef Text = tokenText(FormatTok.Tok);
1031       FormatTok.NewlinesBefore += Text.count('\n');
1032       FormatTok.HasUnescapedNewline =
1033           Text.count("\\\n") != FormatTok.NewlinesBefore;
1034       FormatTok.WhiteSpaceLength += FormatTok.Tok.getLength();
1035 
1036       if (FormatTok.Tok.is(tok::eof))
1037         return FormatTok;
1038       Lex.LexFromRawLexer(FormatTok.Tok);
1039     }
1040 
1041     if (FormatTok.Tok.is(tok::raw_identifier)) {
1042       IdentifierInfo &Info = IdentTable.get(tokenText(FormatTok.Tok));
1043       FormatTok.Tok.setIdentifierInfo(&Info);
1044       FormatTok.Tok.setKind(Info.getTokenID());
1045     }
1046 
1047     if (FormatTok.Tok.is(tok::greatergreater)) {
1048       FormatTok.Tok.setKind(tok::greater);
1049       GreaterStashed = true;
1050     }
1051 
1052     return FormatTok;
1053   }
1054 
1055 private:
1056   FormatToken FormatTok;
1057   bool GreaterStashed;
1058   Lexer &Lex;
1059   SourceManager &SourceMgr;
1060   IdentifierTable IdentTable;
1061 
1062   /// Returns the text of \c FormatTok.
1063   StringRef tokenText(Token &Tok) {
1064     return StringRef(SourceMgr.getCharacterData(Tok.getLocation()),
1065                      Tok.getLength());
1066   }
1067 };
1068 
1069 class Formatter : public UnwrappedLineConsumer {
1070 public:
1071   Formatter(const FormatStyle &Style, Lexer &Lex, SourceManager &SourceMgr,
1072             const std::vector<CharSourceRange> &Ranges)
1073       : Style(Style), Lex(Lex), SourceMgr(SourceMgr), Ranges(Ranges),
1074         StructuralError(false) {
1075   }
1076 
1077   virtual ~Formatter() {
1078   }
1079 
1080   tooling::Replacements format() {
1081     LexerBasedFormatTokenSource Tokens(Lex, SourceMgr);
1082     UnwrappedLineParser Parser(Style, Tokens, *this);
1083     StructuralError = Parser.parse();
1084     for (std::vector<UnwrappedLine>::iterator I = UnwrappedLines.begin(),
1085                                               E = UnwrappedLines.end();
1086          I != E; ++I)
1087       formatUnwrappedLine(*I);
1088     return Replaces;
1089   }
1090 
1091 private:
1092   virtual void consumeUnwrappedLine(const UnwrappedLine &TheLine) {
1093     UnwrappedLines.push_back(TheLine);
1094   }
1095 
1096   void formatUnwrappedLine(const UnwrappedLine &TheLine) {
1097     if (TheLine.Tokens.size() == 0)
1098       return;
1099 
1100     CharSourceRange LineRange =
1101         CharSourceRange::getTokenRange(TheLine.Tokens.front().Tok.getLocation(),
1102                                        TheLine.Tokens.back().Tok.getLocation());
1103 
1104     for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
1105       if (SourceMgr.isBeforeInTranslationUnit(LineRange.getEnd(),
1106                                               Ranges[i].getBegin()) ||
1107           SourceMgr.isBeforeInTranslationUnit(Ranges[i].getEnd(),
1108                                               LineRange.getBegin()))
1109         continue;
1110 
1111       TokenAnnotator Annotator(TheLine, Style, SourceMgr);
1112       if (!Annotator.annotate())
1113         return;
1114       UnwrappedLineFormatter Formatter(Style, SourceMgr, TheLine,
1115                                        Annotator.getAnnotations(), Replaces,
1116                                        StructuralError);
1117       Formatter.format();
1118       return;
1119     }
1120   }
1121 
1122   FormatStyle Style;
1123   Lexer &Lex;
1124   SourceManager &SourceMgr;
1125   tooling::Replacements Replaces;
1126   std::vector<CharSourceRange> Ranges;
1127   std::vector<UnwrappedLine> UnwrappedLines;
1128   bool StructuralError;
1129 };
1130 
1131 tooling::Replacements reformat(const FormatStyle &Style, Lexer &Lex,
1132                                SourceManager &SourceMgr,
1133                                std::vector<CharSourceRange> Ranges) {
1134   Formatter formatter(Style, Lex, SourceMgr, Ranges);
1135   return formatter.format();
1136 }
1137 
1138 }  // namespace format
1139 }  // namespace clang
1140