1 //===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that
11 /// fixes namespace end comments.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "NamespaceEndCommentsFixer.h"
16 #include "llvm/Support/Debug.h"
17 #include "llvm/Support/Regex.h"
18 
19 #define DEBUG_TYPE "namespace-end-comments-fixer"
20 
21 namespace clang {
22 namespace format {
23 
24 namespace {
25 // Computes the name of a namespace given the namespace token.
26 // Returns "" for anonymous namespace.
27 std::string computeName(const FormatToken *NamespaceTok) {
28   assert(NamespaceTok &&
29          NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) &&
30          "expecting a namespace token");
31   std::string name;
32   const FormatToken *Tok = NamespaceTok->getNextNonComment();
33   if (NamespaceTok->is(TT_NamespaceMacro)) {
34     // Collects all the non-comment tokens between opening parenthesis
35     // and closing parenthesis or comma.
36     assert(Tok && Tok->is(tok::l_paren) && "expected an opening parenthesis");
37     Tok = Tok->getNextNonComment();
38     while (Tok && !Tok->isOneOf(tok::r_paren, tok::comma)) {
39       name += Tok->TokenText;
40       Tok = Tok->getNextNonComment();
41     }
42   } else {
43     // Skip attributes.
44     if (Tok && Tok->is(tok::l_square)) {
45       for (int NestLevel = 1; NestLevel > 0;) {
46         Tok = Tok->getNextNonComment();
47         if (!Tok)
48           break;
49         if (Tok->is(tok::l_square))
50           ++NestLevel;
51         else if (Tok->is(tok::r_square))
52           --NestLevel;
53       }
54       if (Tok)
55         Tok = Tok->getNextNonComment();
56     }
57 
58     // Use the string after `namespace` as a name candidate until `{` or `::` or
59     // `(`. If the name is empty, use the candicate.
60     std::string FirstNSName;
61     // For `namespace [[foo]] A::B::inline C {` or
62     // `namespace MACRO1 MACRO2 A::B::inline C {`, returns "A::B::inline C".
63     // Peek for the first '::' (or '{' or '(')) and then return all tokens from
64     // one token before that up until the '{'. A '(' might be a macro with
65     // arguments.
66     const FormatToken *FirstNSTok = Tok;
67     while (Tok && !Tok->isOneOf(tok::l_brace, tok::coloncolon, tok::l_paren)) {
68       FirstNSName += FirstNSTok->TokenText;
69       FirstNSTok = Tok;
70       Tok = Tok->getNextNonComment();
71     }
72 
73     Tok = FirstNSTok;
74     while (Tok && !Tok->is(tok::l_brace)) {
75       name += Tok->TokenText;
76       if (Tok->is(tok::kw_inline))
77         name += " ";
78       Tok = Tok->getNextNonComment();
79     }
80     if (name.empty())
81       name = FirstNSName;
82   }
83   return name;
84 }
85 
86 std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline,
87                                   const FormatToken *NamespaceTok,
88                                   unsigned SpacesToAdd) {
89   std::string text = "//";
90   text.append(SpacesToAdd, ' ');
91   text += NamespaceTok->TokenText;
92   if (NamespaceTok->is(TT_NamespaceMacro))
93     text += "(";
94   else if (!NamespaceName.empty())
95     text += ' ';
96   text += NamespaceName;
97   if (NamespaceTok->is(TT_NamespaceMacro))
98     text += ")";
99   if (AddNewline)
100     text += '\n';
101   return text;
102 }
103 
104 bool hasEndComment(const FormatToken *RBraceTok) {
105   return RBraceTok->Next && RBraceTok->Next->is(tok::comment);
106 }
107 
108 bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName,
109                      const FormatToken *NamespaceTok) {
110   assert(hasEndComment(RBraceTok));
111   const FormatToken *Comment = RBraceTok->Next;
112 
113   // Matches a valid namespace end comment.
114   // Valid namespace end comments don't need to be edited.
115   static const llvm::Regex NamespaceCommentPattern =
116       llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
117                   "namespace( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$",
118                   llvm::Regex::IgnoreCase);
119   static const llvm::Regex NamespaceMacroCommentPattern =
120       llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *"
121                   "([a-zA-Z0-9_]+)\\(([a-zA-Z0-9:_]*)\\)\\.? *(\\*/)?$",
122                   llvm::Regex::IgnoreCase);
123 
124   SmallVector<StringRef, 8> Groups;
125   if (NamespaceTok->is(TT_NamespaceMacro) &&
126       NamespaceMacroCommentPattern.match(Comment->TokenText, &Groups)) {
127     StringRef NamespaceTokenText = Groups.size() > 4 ? Groups[4] : "";
128     // The name of the macro must be used.
129     if (NamespaceTokenText != NamespaceTok->TokenText)
130       return false;
131   } else if (NamespaceTok->isNot(tok::kw_namespace) ||
132              !NamespaceCommentPattern.match(Comment->TokenText, &Groups)) {
133     // Comment does not match regex.
134     return false;
135   }
136   StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5] : "";
137   // Anonymous namespace comments must not mention a namespace name.
138   if (NamespaceName.empty() && !NamespaceNameInComment.empty())
139     return false;
140   StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : "";
141   // Named namespace comments must not mention anonymous namespace.
142   if (!NamespaceName.empty() && !AnonymousInComment.empty())
143     return false;
144   if (NamespaceNameInComment == NamespaceName)
145     return true;
146 
147   // Has namespace comment flowed onto the next line.
148   // } // namespace
149   //   // verylongnamespacenamethatdidnotfitonthepreviouscommentline
150   if (!(Comment->Next && Comment->Next->is(TT_LineComment)))
151     return false;
152 
153   static const llvm::Regex CommentPattern = llvm::Regex(
154       "^/[/*] *( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", llvm::Regex::IgnoreCase);
155 
156   // Pull out just the comment text.
157   if (!CommentPattern.match(Comment->Next->TokenText, &Groups))
158     return false;
159   NamespaceNameInComment = Groups.size() > 2 ? Groups[2] : "";
160 
161   return NamespaceNameInComment == NamespaceName;
162 }
163 
164 void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
165                    const SourceManager &SourceMgr,
166                    tooling::Replacements *Fixes) {
167   auto EndLoc = RBraceTok->Tok.getEndLoc();
168   auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc);
169   auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
170   if (Err) {
171     llvm::errs() << "Error while adding namespace end comment: "
172                  << llvm::toString(std::move(Err)) << "\n";
173   }
174 }
175 
176 void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText,
177                       const SourceManager &SourceMgr,
178                       tooling::Replacements *Fixes) {
179   assert(hasEndComment(RBraceTok));
180   const FormatToken *Comment = RBraceTok->Next;
181   auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(),
182                                              Comment->Tok.getEndLoc());
183   auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText));
184   if (Err) {
185     llvm::errs() << "Error while updating namespace end comment: "
186                  << llvm::toString(std::move(Err)) << "\n";
187   }
188 }
189 } // namespace
190 
191 const FormatToken *
192 getNamespaceToken(const AnnotatedLine *Line,
193                   const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
194   if (!Line->Affected || Line->InPPDirective || !Line->startsWith(tok::r_brace))
195     return nullptr;
196   size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex;
197   if (StartLineIndex == UnwrappedLine::kInvalidIndex)
198     return nullptr;
199   assert(StartLineIndex < AnnotatedLines.size());
200   const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First;
201   if (NamespaceTok->is(tok::l_brace)) {
202     // "namespace" keyword can be on the line preceding '{', e.g. in styles
203     // where BraceWrapping.AfterNamespace is true.
204     if (StartLineIndex > 0) {
205       NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First;
206       if (AnnotatedLines[StartLineIndex - 1]->endsWith(tok::semi))
207         return nullptr;
208     }
209   }
210 
211   return NamespaceTok->getNamespaceToken();
212 }
213 
214 StringRef
215 getNamespaceTokenText(const AnnotatedLine *Line,
216                       const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) {
217   const FormatToken *NamespaceTok = getNamespaceToken(Line, AnnotatedLines);
218   return NamespaceTok ? NamespaceTok->TokenText : StringRef();
219 }
220 
221 NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env,
222                                                      const FormatStyle &Style)
223     : TokenAnalyzer(Env, Style) {}
224 
225 std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze(
226     TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines,
227     FormatTokenLexer &Tokens) {
228   const SourceManager &SourceMgr = Env.getSourceManager();
229   AffectedRangeMgr.computeAffectedLines(AnnotatedLines);
230   tooling::Replacements Fixes;
231 
232   // Spin through the lines and ensure we have balanced braces.
233   int Braces = 0;
234   for (AnnotatedLine *Line : AnnotatedLines) {
235     FormatToken *Tok = Line->First;
236     while (Tok) {
237       Braces += Tok->is(tok::l_brace) ? 1 : Tok->is(tok::r_brace) ? -1 : 0;
238       Tok = Tok->Next;
239     }
240   }
241   // Don't attempt to comment unbalanced braces or this can
242   // lead to comments being placed on the closing brace which isn't
243   // the matching brace of the namespace. (occurs during incomplete editing).
244   if (Braces != 0)
245     return {Fixes, 0};
246 
247   std::string AllNamespaceNames;
248   size_t StartLineIndex = SIZE_MAX;
249   StringRef NamespaceTokenText;
250   unsigned int CompactedNamespacesCount = 0;
251   for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) {
252     const AnnotatedLine *EndLine = AnnotatedLines[I];
253     const FormatToken *NamespaceTok =
254         getNamespaceToken(EndLine, AnnotatedLines);
255     if (!NamespaceTok)
256       continue;
257     FormatToken *RBraceTok = EndLine->First;
258     if (RBraceTok->Finalized)
259       continue;
260     RBraceTok->Finalized = true;
261     const FormatToken *EndCommentPrevTok = RBraceTok;
262     // Namespaces often end with '};'. In that case, attach namespace end
263     // comments to the semicolon tokens.
264     if (RBraceTok->Next && RBraceTok->Next->is(tok::semi))
265       EndCommentPrevTok = RBraceTok->Next;
266     if (StartLineIndex == SIZE_MAX)
267       StartLineIndex = EndLine->MatchingOpeningBlockLineIndex;
268     std::string NamespaceName = computeName(NamespaceTok);
269     if (Style.CompactNamespaces) {
270       if (CompactedNamespacesCount == 0)
271         NamespaceTokenText = NamespaceTok->TokenText;
272       if ((I + 1 < E) &&
273           NamespaceTokenText ==
274               getNamespaceTokenText(AnnotatedLines[I + 1], AnnotatedLines) &&
275           StartLineIndex - CompactedNamespacesCount - 1 ==
276               AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex &&
277           !AnnotatedLines[I + 1]->First->Finalized) {
278         if (hasEndComment(EndCommentPrevTok)) {
279           // remove end comment, it will be merged in next one
280           updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes);
281         }
282         ++CompactedNamespacesCount;
283         if (!NamespaceName.empty())
284           AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames;
285         continue;
286       }
287       NamespaceName += AllNamespaceNames;
288       CompactedNamespacesCount = 0;
289       AllNamespaceNames = std::string();
290     }
291     // The next token in the token stream after the place where the end comment
292     // token must be. This is either the next token on the current line or the
293     // first token on the next line.
294     const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next;
295     if (EndCommentNextTok && EndCommentNextTok->is(tok::comment))
296       EndCommentNextTok = EndCommentNextTok->Next;
297     if (!EndCommentNextTok && I + 1 < E)
298       EndCommentNextTok = AnnotatedLines[I + 1]->First;
299     bool AddNewline = EndCommentNextTok &&
300                       EndCommentNextTok->NewlinesBefore == 0 &&
301                       EndCommentNextTok->isNot(tok::eof);
302     const std::string EndCommentText =
303         computeEndCommentText(NamespaceName, AddNewline, NamespaceTok,
304                               Style.SpacesInLineCommentPrefix.Minimum);
305     if (!hasEndComment(EndCommentPrevTok)) {
306       bool isShort = I - StartLineIndex <= Style.ShortNamespaceLines + 1;
307       if (!isShort)
308         addEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
309     } else if (!validEndComment(EndCommentPrevTok, NamespaceName,
310                                 NamespaceTok)) {
311       updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes);
312     }
313     StartLineIndex = SIZE_MAX;
314   }
315   return {Fixes, 0};
316 }
317 
318 } // namespace format
319 } // namespace clang
320