1 //===--- NamespaceEndCommentsFixer.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements NamespaceEndCommentsFixer, a TokenAnalyzer that 11 /// fixes namespace end comments. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #include "NamespaceEndCommentsFixer.h" 16 #include "llvm/Support/Debug.h" 17 #include "llvm/Support/Regex.h" 18 19 #define DEBUG_TYPE "namespace-end-comments-fixer" 20 21 namespace clang { 22 namespace format { 23 24 namespace { 25 // Computes the name of a namespace given the namespace token. 26 // Returns "" for anonymous namespace. 27 std::string computeName(const FormatToken *NamespaceTok) { 28 assert(NamespaceTok && 29 NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) && 30 "expecting a namespace token"); 31 std::string name; 32 const FormatToken *Tok = NamespaceTok->getNextNonComment(); 33 if (NamespaceTok->is(TT_NamespaceMacro)) { 34 // Collects all the non-comment tokens between opening parenthesis 35 // and closing parenthesis or comma. 36 assert(Tok && Tok->is(tok::l_paren) && "expected an opening parenthesis"); 37 Tok = Tok->getNextNonComment(); 38 while (Tok && !Tok->isOneOf(tok::r_paren, tok::comma)) { 39 name += Tok->TokenText; 40 Tok = Tok->getNextNonComment(); 41 } 42 } else { 43 // Skip attributes. 44 if (Tok && Tok->is(tok::l_square)) { 45 for (int NestLevel = 1; NestLevel > 0;) { 46 Tok = Tok->getNextNonComment(); 47 if (!Tok) 48 break; 49 if (Tok->is(tok::l_square)) 50 ++NestLevel; 51 else if (Tok->is(tok::r_square)) 52 --NestLevel; 53 } 54 if (Tok) 55 Tok = Tok->getNextNonComment(); 56 } 57 58 // Use the string after `namespace` as a name candidate until `{` or `::` or 59 // `(`. If the name is empty, use the candicate. 60 std::string FirstNSName; 61 // For `namespace [[foo]] A::B::inline C {` or 62 // `namespace MACRO1 MACRO2 A::B::inline C {`, returns "A::B::inline C". 63 // Peek for the first '::' (or '{' or '(')) and then return all tokens from 64 // one token before that up until the '{'. A '(' might be a macro with 65 // arguments. 66 const FormatToken *FirstNSTok = Tok; 67 while (Tok && !Tok->isOneOf(tok::l_brace, tok::coloncolon, tok::l_paren)) { 68 FirstNSName += FirstNSTok->TokenText; 69 FirstNSTok = Tok; 70 Tok = Tok->getNextNonComment(); 71 } 72 73 Tok = FirstNSTok; 74 while (Tok && !Tok->is(tok::l_brace)) { 75 name += Tok->TokenText; 76 if (Tok->is(tok::kw_inline)) 77 name += " "; 78 Tok = Tok->getNextNonComment(); 79 } 80 if (name.empty()) 81 name = FirstNSName; 82 } 83 return name; 84 } 85 86 std::string computeEndCommentText(StringRef NamespaceName, bool AddNewline, 87 const FormatToken *NamespaceTok, 88 unsigned SpacesToAdd) { 89 std::string text = "//"; 90 text.append(SpacesToAdd, ' '); 91 text += NamespaceTok->TokenText; 92 if (NamespaceTok->is(TT_NamespaceMacro)) 93 text += "("; 94 else if (!NamespaceName.empty()) 95 text += ' '; 96 text += NamespaceName; 97 if (NamespaceTok->is(TT_NamespaceMacro)) 98 text += ")"; 99 if (AddNewline) 100 text += '\n'; 101 return text; 102 } 103 104 bool hasEndComment(const FormatToken *RBraceTok) { 105 return RBraceTok->Next && RBraceTok->Next->is(tok::comment); 106 } 107 108 bool validEndComment(const FormatToken *RBraceTok, StringRef NamespaceName, 109 const FormatToken *NamespaceTok) { 110 assert(hasEndComment(RBraceTok)); 111 const FormatToken *Comment = RBraceTok->Next; 112 113 // Matches a valid namespace end comment. 114 // Valid namespace end comments don't need to be edited. 115 static const llvm::Regex NamespaceCommentPattern = 116 llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *" 117 "namespace( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", 118 llvm::Regex::IgnoreCase); 119 static const llvm::Regex NamespaceMacroCommentPattern = 120 llvm::Regex("^/[/*] *(end (of )?)? *(anonymous|unnamed)? *" 121 "([a-zA-Z0-9_]+)\\(([a-zA-Z0-9:_]*)\\)\\.? *(\\*/)?$", 122 llvm::Regex::IgnoreCase); 123 124 SmallVector<StringRef, 8> Groups; 125 if (NamespaceTok->is(TT_NamespaceMacro) && 126 NamespaceMacroCommentPattern.match(Comment->TokenText, &Groups)) { 127 StringRef NamespaceTokenText = Groups.size() > 4 ? Groups[4] : ""; 128 // The name of the macro must be used. 129 if (NamespaceTokenText != NamespaceTok->TokenText) 130 return false; 131 } else if (NamespaceTok->isNot(tok::kw_namespace) || 132 !NamespaceCommentPattern.match(Comment->TokenText, &Groups)) { 133 // Comment does not match regex. 134 return false; 135 } 136 StringRef NamespaceNameInComment = Groups.size() > 5 ? Groups[5] : ""; 137 // Anonymous namespace comments must not mention a namespace name. 138 if (NamespaceName.empty() && !NamespaceNameInComment.empty()) 139 return false; 140 StringRef AnonymousInComment = Groups.size() > 3 ? Groups[3] : ""; 141 // Named namespace comments must not mention anonymous namespace. 142 if (!NamespaceName.empty() && !AnonymousInComment.empty()) 143 return false; 144 if (NamespaceNameInComment == NamespaceName) 145 return true; 146 147 // Has namespace comment flowed onto the next line. 148 // } // namespace 149 // // verylongnamespacenamethatdidnotfitonthepreviouscommentline 150 if (!(Comment->Next && Comment->Next->is(TT_LineComment))) 151 return false; 152 153 static const llvm::Regex CommentPattern = llvm::Regex( 154 "^/[/*] *( +([a-zA-Z0-9:_]+))?\\.? *(\\*/)?$", llvm::Regex::IgnoreCase); 155 156 // Pull out just the comment text. 157 if (!CommentPattern.match(Comment->Next->TokenText, &Groups)) 158 return false; 159 NamespaceNameInComment = Groups.size() > 2 ? Groups[2] : ""; 160 161 return NamespaceNameInComment == NamespaceName; 162 } 163 164 void addEndComment(const FormatToken *RBraceTok, StringRef EndCommentText, 165 const SourceManager &SourceMgr, 166 tooling::Replacements *Fixes) { 167 auto EndLoc = RBraceTok->Tok.getEndLoc(); 168 auto Range = CharSourceRange::getCharRange(EndLoc, EndLoc); 169 auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText)); 170 if (Err) { 171 llvm::errs() << "Error while adding namespace end comment: " 172 << llvm::toString(std::move(Err)) << "\n"; 173 } 174 } 175 176 void updateEndComment(const FormatToken *RBraceTok, StringRef EndCommentText, 177 const SourceManager &SourceMgr, 178 tooling::Replacements *Fixes) { 179 assert(hasEndComment(RBraceTok)); 180 const FormatToken *Comment = RBraceTok->Next; 181 auto Range = CharSourceRange::getCharRange(Comment->getStartOfNonWhitespace(), 182 Comment->Tok.getEndLoc()); 183 auto Err = Fixes->add(tooling::Replacement(SourceMgr, Range, EndCommentText)); 184 if (Err) { 185 llvm::errs() << "Error while updating namespace end comment: " 186 << llvm::toString(std::move(Err)) << "\n"; 187 } 188 } 189 } // namespace 190 191 const FormatToken * 192 getNamespaceToken(const AnnotatedLine *Line, 193 const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { 194 if (!Line->Affected || Line->InPPDirective || !Line->startsWith(tok::r_brace)) 195 return nullptr; 196 size_t StartLineIndex = Line->MatchingOpeningBlockLineIndex; 197 if (StartLineIndex == UnwrappedLine::kInvalidIndex) 198 return nullptr; 199 assert(StartLineIndex < AnnotatedLines.size()); 200 const FormatToken *NamespaceTok = AnnotatedLines[StartLineIndex]->First; 201 if (NamespaceTok->is(tok::l_brace)) { 202 // "namespace" keyword can be on the line preceding '{', e.g. in styles 203 // where BraceWrapping.AfterNamespace is true. 204 if (StartLineIndex > 0) { 205 NamespaceTok = AnnotatedLines[StartLineIndex - 1]->First; 206 if (AnnotatedLines[StartLineIndex - 1]->endsWith(tok::semi)) 207 return nullptr; 208 } 209 } 210 211 return NamespaceTok->getNamespaceToken(); 212 } 213 214 StringRef 215 getNamespaceTokenText(const AnnotatedLine *Line, 216 const SmallVectorImpl<AnnotatedLine *> &AnnotatedLines) { 217 const FormatToken *NamespaceTok = getNamespaceToken(Line, AnnotatedLines); 218 return NamespaceTok ? NamespaceTok->TokenText : StringRef(); 219 } 220 221 NamespaceEndCommentsFixer::NamespaceEndCommentsFixer(const Environment &Env, 222 const FormatStyle &Style) 223 : TokenAnalyzer(Env, Style) {} 224 225 std::pair<tooling::Replacements, unsigned> NamespaceEndCommentsFixer::analyze( 226 TokenAnnotator &Annotator, SmallVectorImpl<AnnotatedLine *> &AnnotatedLines, 227 FormatTokenLexer &Tokens) { 228 const SourceManager &SourceMgr = Env.getSourceManager(); 229 AffectedRangeMgr.computeAffectedLines(AnnotatedLines); 230 tooling::Replacements Fixes; 231 232 // Spin through the lines and ensure we have balanced braces. 233 int Braces = 0; 234 for (AnnotatedLine *Line : AnnotatedLines) { 235 FormatToken *Tok = Line->First; 236 while (Tok) { 237 Braces += Tok->is(tok::l_brace) ? 1 : Tok->is(tok::r_brace) ? -1 : 0; 238 Tok = Tok->Next; 239 } 240 } 241 // Don't attempt to comment unbalanced braces or this can 242 // lead to comments being placed on the closing brace which isn't 243 // the matching brace of the namespace. (occurs during incomplete editing). 244 if (Braces != 0) 245 return {Fixes, 0}; 246 247 std::string AllNamespaceNames; 248 size_t StartLineIndex = SIZE_MAX; 249 StringRef NamespaceTokenText; 250 unsigned int CompactedNamespacesCount = 0; 251 for (size_t I = 0, E = AnnotatedLines.size(); I != E; ++I) { 252 const AnnotatedLine *EndLine = AnnotatedLines[I]; 253 const FormatToken *NamespaceTok = 254 getNamespaceToken(EndLine, AnnotatedLines); 255 if (!NamespaceTok) 256 continue; 257 FormatToken *RBraceTok = EndLine->First; 258 if (RBraceTok->Finalized) 259 continue; 260 RBraceTok->Finalized = true; 261 const FormatToken *EndCommentPrevTok = RBraceTok; 262 // Namespaces often end with '};'. In that case, attach namespace end 263 // comments to the semicolon tokens. 264 if (RBraceTok->Next && RBraceTok->Next->is(tok::semi)) 265 EndCommentPrevTok = RBraceTok->Next; 266 if (StartLineIndex == SIZE_MAX) 267 StartLineIndex = EndLine->MatchingOpeningBlockLineIndex; 268 std::string NamespaceName = computeName(NamespaceTok); 269 if (Style.CompactNamespaces) { 270 if (CompactedNamespacesCount == 0) 271 NamespaceTokenText = NamespaceTok->TokenText; 272 if ((I + 1 < E) && 273 NamespaceTokenText == 274 getNamespaceTokenText(AnnotatedLines[I + 1], AnnotatedLines) && 275 StartLineIndex - CompactedNamespacesCount - 1 == 276 AnnotatedLines[I + 1]->MatchingOpeningBlockLineIndex && 277 !AnnotatedLines[I + 1]->First->Finalized) { 278 if (hasEndComment(EndCommentPrevTok)) { 279 // remove end comment, it will be merged in next one 280 updateEndComment(EndCommentPrevTok, std::string(), SourceMgr, &Fixes); 281 } 282 ++CompactedNamespacesCount; 283 if (!NamespaceName.empty()) 284 AllNamespaceNames = "::" + NamespaceName + AllNamespaceNames; 285 continue; 286 } 287 NamespaceName += AllNamespaceNames; 288 CompactedNamespacesCount = 0; 289 AllNamespaceNames = std::string(); 290 } 291 // The next token in the token stream after the place where the end comment 292 // token must be. This is either the next token on the current line or the 293 // first token on the next line. 294 const FormatToken *EndCommentNextTok = EndCommentPrevTok->Next; 295 if (EndCommentNextTok && EndCommentNextTok->is(tok::comment)) 296 EndCommentNextTok = EndCommentNextTok->Next; 297 if (!EndCommentNextTok && I + 1 < E) 298 EndCommentNextTok = AnnotatedLines[I + 1]->First; 299 bool AddNewline = EndCommentNextTok && 300 EndCommentNextTok->NewlinesBefore == 0 && 301 EndCommentNextTok->isNot(tok::eof); 302 const std::string EndCommentText = 303 computeEndCommentText(NamespaceName, AddNewline, NamespaceTok, 304 Style.SpacesInLineCommentPrefix.Minimum); 305 if (!hasEndComment(EndCommentPrevTok)) { 306 bool isShort = I - StartLineIndex <= Style.ShortNamespaceLines + 1; 307 if (!isShort) 308 addEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes); 309 } else if (!validEndComment(EndCommentPrevTok, NamespaceName, 310 NamespaceTok)) { 311 updateEndComment(EndCommentPrevTok, EndCommentText, SourceMgr, &Fixes); 312 } 313 StartLineIndex = SIZE_MAX; 314 } 315 return {Fixes, 0}; 316 } 317 318 } // namespace format 319 } // namespace clang 320