1 //===--- SourceCode.h - Manipulating source code as strings -----*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 #include "SourceCode.h" 9 10 #include "FuzzyMatch.h" 11 #include "Preamble.h" 12 #include "Protocol.h" 13 #include "refactor/Tweak.h" 14 #include "support/Context.h" 15 #include "support/Logger.h" 16 #include "support/Threading.h" 17 #include "clang/AST/ASTContext.h" 18 #include "clang/Basic/LangOptions.h" 19 #include "clang/Basic/SourceLocation.h" 20 #include "clang/Basic/SourceManager.h" 21 #include "clang/Basic/TokenKinds.h" 22 #include "clang/Driver/Types.h" 23 #include "clang/Format/Format.h" 24 #include "clang/Lex/Lexer.h" 25 #include "clang/Lex/Preprocessor.h" 26 #include "clang/Lex/Token.h" 27 #include "clang/Tooling/Core/Replacement.h" 28 #include "clang/Tooling/Syntax/Tokens.h" 29 #include "llvm/ADT/ArrayRef.h" 30 #include "llvm/ADT/None.h" 31 #include "llvm/ADT/STLExtras.h" 32 #include "llvm/ADT/StringExtras.h" 33 #include "llvm/ADT/StringMap.h" 34 #include "llvm/ADT/StringRef.h" 35 #include "llvm/Support/Compiler.h" 36 #include "llvm/Support/Errc.h" 37 #include "llvm/Support/Error.h" 38 #include "llvm/Support/ErrorHandling.h" 39 #include "llvm/Support/LineIterator.h" 40 #include "llvm/Support/MemoryBuffer.h" 41 #include "llvm/Support/Path.h" 42 #include "llvm/Support/SHA1.h" 43 #include "llvm/Support/VirtualFileSystem.h" 44 #include "llvm/Support/xxhash.h" 45 #include <algorithm> 46 #include <cstddef> 47 #include <string> 48 #include <vector> 49 50 namespace clang { 51 namespace clangd { 52 53 // Here be dragons. LSP positions use columns measured in *UTF-16 code units*! 54 // Clangd uses UTF-8 and byte-offsets internally, so conversion is nontrivial. 55 56 // Iterates over unicode codepoints in the (UTF-8) string. For each, 57 // invokes CB(UTF-8 length, UTF-16 length), and breaks if it returns true. 58 // Returns true if CB returned true, false if we hit the end of string. 59 // 60 // If the string is not valid UTF-8, we log this error and "decode" the 61 // text in some arbitrary way. This is pretty sad, but this tends to happen deep 62 // within indexing of headers where clang misdetected the encoding, and 63 // propagating the error all the way back up is (probably?) not be worth it. 64 template <typename Callback> 65 static bool iterateCodepoints(llvm::StringRef U8, const Callback &CB) { 66 bool LoggedInvalid = false; 67 // A codepoint takes two UTF-16 code unit if it's astral (outside BMP). 68 // Astral codepoints are encoded as 4 bytes in UTF-8, starting with 11110xxx. 69 for (size_t I = 0; I < U8.size();) { 70 unsigned char C = static_cast<unsigned char>(U8[I]); 71 if (LLVM_LIKELY(!(C & 0x80))) { // ASCII character. 72 if (CB(1, 1)) 73 return true; 74 ++I; 75 continue; 76 } 77 // This convenient property of UTF-8 holds for all non-ASCII characters. 78 size_t UTF8Length = llvm::countLeadingOnes(C); 79 // 0xxx is ASCII, handled above. 10xxx is a trailing byte, invalid here. 80 // 11111xxx is not valid UTF-8 at all, maybe some ISO-8859-*. 81 if (LLVM_UNLIKELY(UTF8Length < 2 || UTF8Length > 4)) { 82 if (!LoggedInvalid) { 83 elog("File has invalid UTF-8 near offset {0}: {1}", I, llvm::toHex(U8)); 84 LoggedInvalid = true; 85 } 86 // We can't give a correct result, but avoid returning something wild. 87 // Pretend this is a valid ASCII byte, for lack of better options. 88 // (Too late to get ISO-8859-* right, we've skipped some bytes already). 89 if (CB(1, 1)) 90 return true; 91 ++I; 92 continue; 93 } 94 I += UTF8Length; // Skip over all trailing bytes. 95 // A codepoint takes two UTF-16 code unit if it's astral (outside BMP). 96 // Astral codepoints are encoded as 4 bytes in UTF-8 (11110xxx ...) 97 if (CB(UTF8Length, UTF8Length == 4 ? 2 : 1)) 98 return true; 99 } 100 return false; 101 } 102 103 // Returns the byte offset into the string that is an offset of \p Units in 104 // the specified encoding. 105 // Conceptually, this converts to the encoding, truncates to CodeUnits, 106 // converts back to UTF-8, and returns the length in bytes. 107 static size_t measureUnits(llvm::StringRef U8, int Units, OffsetEncoding Enc, 108 bool &Valid) { 109 Valid = Units >= 0; 110 if (Units <= 0) 111 return 0; 112 size_t Result = 0; 113 switch (Enc) { 114 case OffsetEncoding::UTF8: 115 Result = Units; 116 break; 117 case OffsetEncoding::UTF16: 118 Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) { 119 Result += U8Len; 120 Units -= U16Len; 121 return Units <= 0; 122 }); 123 if (Units < 0) // Offset in the middle of a surrogate pair. 124 Valid = false; 125 break; 126 case OffsetEncoding::UTF32: 127 Valid = iterateCodepoints(U8, [&](int U8Len, int U16Len) { 128 Result += U8Len; 129 Units--; 130 return Units <= 0; 131 }); 132 break; 133 case OffsetEncoding::UnsupportedEncoding: 134 llvm_unreachable("unsupported encoding"); 135 } 136 // Don't return an out-of-range index if we overran. 137 if (Result > U8.size()) { 138 Valid = false; 139 return U8.size(); 140 } 141 return Result; 142 } 143 144 Key<OffsetEncoding> kCurrentOffsetEncoding; 145 static OffsetEncoding lspEncoding() { 146 auto *Enc = Context::current().get(kCurrentOffsetEncoding); 147 return Enc ? *Enc : OffsetEncoding::UTF16; 148 } 149 150 // Like most strings in clangd, the input is UTF-8 encoded. 151 size_t lspLength(llvm::StringRef Code) { 152 size_t Count = 0; 153 switch (lspEncoding()) { 154 case OffsetEncoding::UTF8: 155 Count = Code.size(); 156 break; 157 case OffsetEncoding::UTF16: 158 iterateCodepoints(Code, [&](int U8Len, int U16Len) { 159 Count += U16Len; 160 return false; 161 }); 162 break; 163 case OffsetEncoding::UTF32: 164 iterateCodepoints(Code, [&](int U8Len, int U16Len) { 165 ++Count; 166 return false; 167 }); 168 break; 169 case OffsetEncoding::UnsupportedEncoding: 170 llvm_unreachable("unsupported encoding"); 171 } 172 return Count; 173 } 174 175 llvm::Expected<size_t> positionToOffset(llvm::StringRef Code, Position P, 176 bool AllowColumnsBeyondLineLength) { 177 if (P.line < 0) 178 return error(llvm::errc::invalid_argument, 179 "Line value can't be negative ({0})", P.line); 180 if (P.character < 0) 181 return error(llvm::errc::invalid_argument, 182 "Character value can't be negative ({0})", P.character); 183 size_t StartOfLine = 0; 184 for (int I = 0; I != P.line; ++I) { 185 size_t NextNL = Code.find('\n', StartOfLine); 186 if (NextNL == llvm::StringRef::npos) 187 return error(llvm::errc::invalid_argument, 188 "Line value is out of range ({0})", P.line); 189 StartOfLine = NextNL + 1; 190 } 191 StringRef Line = 192 Code.substr(StartOfLine).take_until([](char C) { return C == '\n'; }); 193 194 // P.character may be in UTF-16, transcode if necessary. 195 bool Valid; 196 size_t ByteInLine = measureUnits(Line, P.character, lspEncoding(), Valid); 197 if (!Valid && !AllowColumnsBeyondLineLength) 198 return error(llvm::errc::invalid_argument, 199 "{0} offset {1} is invalid for line {2}", lspEncoding(), 200 P.character, P.line); 201 return StartOfLine + ByteInLine; 202 } 203 204 Position offsetToPosition(llvm::StringRef Code, size_t Offset) { 205 Offset = std::min(Code.size(), Offset); 206 llvm::StringRef Before = Code.substr(0, Offset); 207 int Lines = Before.count('\n'); 208 size_t PrevNL = Before.rfind('\n'); 209 size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1); 210 Position Pos; 211 Pos.line = Lines; 212 Pos.character = lspLength(Before.substr(StartOfLine)); 213 return Pos; 214 } 215 216 Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc) { 217 // We use the SourceManager's line tables, but its column number is in bytes. 218 FileID FID; 219 unsigned Offset; 220 std::tie(FID, Offset) = SM.getDecomposedSpellingLoc(Loc); 221 Position P; 222 P.line = static_cast<int>(SM.getLineNumber(FID, Offset)) - 1; 223 bool Invalid = false; 224 llvm::StringRef Code = SM.getBufferData(FID, &Invalid); 225 if (!Invalid) { 226 auto ColumnInBytes = SM.getColumnNumber(FID, Offset) - 1; 227 auto LineSoFar = Code.substr(Offset - ColumnInBytes, ColumnInBytes); 228 P.character = lspLength(LineSoFar); 229 } 230 return P; 231 } 232 233 bool isSpelledInSource(SourceLocation Loc, const SourceManager &SM) { 234 if (Loc.isMacroID()) { 235 std::string PrintLoc = SM.getSpellingLoc(Loc).printToString(SM); 236 if (llvm::StringRef(PrintLoc).startswith("<scratch") || 237 llvm::StringRef(PrintLoc).startswith("<command line>")) 238 return false; 239 } 240 return true; 241 } 242 243 bool isValidFileRange(const SourceManager &Mgr, SourceRange R) { 244 if (!R.getBegin().isValid() || !R.getEnd().isValid()) 245 return false; 246 247 FileID BeginFID; 248 size_t BeginOffset = 0; 249 std::tie(BeginFID, BeginOffset) = Mgr.getDecomposedLoc(R.getBegin()); 250 251 FileID EndFID; 252 size_t EndOffset = 0; 253 std::tie(EndFID, EndOffset) = Mgr.getDecomposedLoc(R.getEnd()); 254 255 return BeginFID.isValid() && BeginFID == EndFID && BeginOffset <= EndOffset; 256 } 257 258 SourceLocation includeHashLoc(FileID IncludedFile, const SourceManager &SM) { 259 assert(SM.getLocForEndOfFile(IncludedFile).isFileID()); 260 FileID IncludingFile; 261 unsigned Offset; 262 std::tie(IncludingFile, Offset) = 263 SM.getDecomposedExpansionLoc(SM.getIncludeLoc(IncludedFile)); 264 bool Invalid = false; 265 llvm::StringRef Buf = SM.getBufferData(IncludingFile, &Invalid); 266 if (Invalid) 267 return SourceLocation(); 268 // Now buf is "...\n#include <foo>\n..." 269 // and Offset points here: ^ 270 // Rewind to the preceding # on the line. 271 assert(Offset < Buf.size()); 272 for (;; --Offset) { 273 if (Buf[Offset] == '#') 274 return SM.getComposedLoc(IncludingFile, Offset); 275 if (Buf[Offset] == '\n' || Offset == 0) // no hash, what's going on? 276 return SourceLocation(); 277 } 278 } 279 280 static unsigned getTokenLengthAtLoc(SourceLocation Loc, const SourceManager &SM, 281 const LangOptions &LangOpts) { 282 Token TheTok; 283 if (Lexer::getRawToken(Loc, TheTok, SM, LangOpts)) 284 return 0; 285 // FIXME: Here we check whether the token at the location is a greatergreater 286 // (>>) token and consider it as a single greater (>). This is to get it 287 // working for templates but it isn't correct for the right shift operator. We 288 // can avoid this by using half open char ranges in getFileRange() but getting 289 // token ending is not well supported in macroIDs. 290 if (TheTok.is(tok::greatergreater)) 291 return 1; 292 return TheTok.getLength(); 293 } 294 295 // Returns location of the last character of the token at a given loc 296 static SourceLocation getLocForTokenEnd(SourceLocation BeginLoc, 297 const SourceManager &SM, 298 const LangOptions &LangOpts) { 299 unsigned Len = getTokenLengthAtLoc(BeginLoc, SM, LangOpts); 300 return BeginLoc.getLocWithOffset(Len ? Len - 1 : 0); 301 } 302 303 // Returns location of the starting of the token at a given EndLoc 304 static SourceLocation getLocForTokenBegin(SourceLocation EndLoc, 305 const SourceManager &SM, 306 const LangOptions &LangOpts) { 307 return EndLoc.getLocWithOffset( 308 -(signed)getTokenLengthAtLoc(EndLoc, SM, LangOpts)); 309 } 310 311 // Converts a char source range to a token range. 312 static SourceRange toTokenRange(CharSourceRange Range, const SourceManager &SM, 313 const LangOptions &LangOpts) { 314 if (!Range.isTokenRange()) 315 Range.setEnd(getLocForTokenBegin(Range.getEnd(), SM, LangOpts)); 316 return Range.getAsRange(); 317 } 318 // Returns the union of two token ranges. 319 // To find the maximum of the Ends of the ranges, we compare the location of the 320 // last character of the token. 321 static SourceRange unionTokenRange(SourceRange R1, SourceRange R2, 322 const SourceManager &SM, 323 const LangOptions &LangOpts) { 324 SourceLocation Begin = 325 SM.isBeforeInTranslationUnit(R1.getBegin(), R2.getBegin()) 326 ? R1.getBegin() 327 : R2.getBegin(); 328 SourceLocation End = 329 SM.isBeforeInTranslationUnit(getLocForTokenEnd(R1.getEnd(), SM, LangOpts), 330 getLocForTokenEnd(R2.getEnd(), SM, LangOpts)) 331 ? R2.getEnd() 332 : R1.getEnd(); 333 return SourceRange(Begin, End); 334 } 335 336 // Given a range whose endpoints may be in different expansions or files, 337 // tries to find a range within a common file by following up the expansion and 338 // include location in each. 339 static SourceRange rangeInCommonFile(SourceRange R, const SourceManager &SM, 340 const LangOptions &LangOpts) { 341 // Fast path for most common cases. 342 if (SM.isWrittenInSameFile(R.getBegin(), R.getEnd())) 343 return R; 344 // Record the stack of expansion locations for the beginning, keyed by FileID. 345 llvm::DenseMap<FileID, SourceLocation> BeginExpansions; 346 for (SourceLocation Begin = R.getBegin(); Begin.isValid(); 347 Begin = Begin.isFileID() 348 ? includeHashLoc(SM.getFileID(Begin), SM) 349 : SM.getImmediateExpansionRange(Begin).getBegin()) { 350 BeginExpansions[SM.getFileID(Begin)] = Begin; 351 } 352 // Move up the stack of expansion locations for the end until we find the 353 // location in BeginExpansions with that has the same file id. 354 for (SourceLocation End = R.getEnd(); End.isValid(); 355 End = End.isFileID() ? includeHashLoc(SM.getFileID(End), SM) 356 : toTokenRange(SM.getImmediateExpansionRange(End), 357 SM, LangOpts) 358 .getEnd()) { 359 auto It = BeginExpansions.find(SM.getFileID(End)); 360 if (It != BeginExpansions.end()) { 361 if (SM.getFileOffset(It->second) > SM.getFileOffset(End)) 362 return SourceLocation(); 363 return {It->second, End}; 364 } 365 } 366 return SourceRange(); 367 } 368 369 // Find an expansion range (not necessarily immediate) the ends of which are in 370 // the same file id. 371 static SourceRange 372 getExpansionTokenRangeInSameFile(SourceLocation Loc, const SourceManager &SM, 373 const LangOptions &LangOpts) { 374 return rangeInCommonFile( 375 toTokenRange(SM.getImmediateExpansionRange(Loc), SM, LangOpts), SM, 376 LangOpts); 377 } 378 379 // Returns the file range for a given Location as a Token Range 380 // This is quite similar to getFileLoc in SourceManager as both use 381 // getImmediateExpansionRange and getImmediateSpellingLoc (for macro IDs). 382 // However: 383 // - We want to maintain the full range information as we move from one file to 384 // the next. getFileLoc only uses the BeginLoc of getImmediateExpansionRange. 385 // - We want to split '>>' tokens as the lexer parses the '>>' in nested 386 // template instantiations as a '>>' instead of two '>'s. 387 // There is also getExpansionRange but it simply calls 388 // getImmediateExpansionRange on the begin and ends separately which is wrong. 389 static SourceRange getTokenFileRange(SourceLocation Loc, 390 const SourceManager &SM, 391 const LangOptions &LangOpts) { 392 SourceRange FileRange = Loc; 393 while (!FileRange.getBegin().isFileID()) { 394 if (SM.isMacroArgExpansion(FileRange.getBegin())) { 395 FileRange = unionTokenRange( 396 SM.getImmediateSpellingLoc(FileRange.getBegin()), 397 SM.getImmediateSpellingLoc(FileRange.getEnd()), SM, LangOpts); 398 assert(SM.isWrittenInSameFile(FileRange.getBegin(), FileRange.getEnd())); 399 } else { 400 SourceRange ExpansionRangeForBegin = 401 getExpansionTokenRangeInSameFile(FileRange.getBegin(), SM, LangOpts); 402 SourceRange ExpansionRangeForEnd = 403 getExpansionTokenRangeInSameFile(FileRange.getEnd(), SM, LangOpts); 404 if (ExpansionRangeForBegin.isInvalid() || 405 ExpansionRangeForEnd.isInvalid()) 406 return SourceRange(); 407 assert(SM.isWrittenInSameFile(ExpansionRangeForBegin.getBegin(), 408 ExpansionRangeForEnd.getBegin()) && 409 "Both Expansion ranges should be in same file."); 410 FileRange = unionTokenRange(ExpansionRangeForBegin, ExpansionRangeForEnd, 411 SM, LangOpts); 412 } 413 } 414 return FileRange; 415 } 416 417 bool isInsideMainFile(SourceLocation Loc, const SourceManager &SM) { 418 if (!Loc.isValid()) 419 return false; 420 FileID FID = SM.getFileID(SM.getExpansionLoc(Loc)); 421 return FID == SM.getMainFileID() || FID == SM.getPreambleFileID(); 422 } 423 424 llvm::Optional<SourceRange> toHalfOpenFileRange(const SourceManager &SM, 425 const LangOptions &LangOpts, 426 SourceRange R) { 427 SourceRange R1 = getTokenFileRange(R.getBegin(), SM, LangOpts); 428 if (!isValidFileRange(SM, R1)) 429 return llvm::None; 430 431 SourceRange R2 = getTokenFileRange(R.getEnd(), SM, LangOpts); 432 if (!isValidFileRange(SM, R2)) 433 return llvm::None; 434 435 SourceRange Result = 436 rangeInCommonFile(unionTokenRange(R1, R2, SM, LangOpts), SM, LangOpts); 437 unsigned TokLen = getTokenLengthAtLoc(Result.getEnd(), SM, LangOpts); 438 // Convert from closed token range to half-open (char) range 439 Result.setEnd(Result.getEnd().getLocWithOffset(TokLen)); 440 if (!isValidFileRange(SM, Result)) 441 return llvm::None; 442 443 return Result; 444 } 445 446 llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R) { 447 assert(isValidFileRange(SM, R)); 448 auto Buf = SM.getBufferOrNone(SM.getFileID(R.getBegin())); 449 assert(Buf); 450 451 size_t BeginOffset = SM.getFileOffset(R.getBegin()); 452 size_t EndOffset = SM.getFileOffset(R.getEnd()); 453 return Buf->getBuffer().substr(BeginOffset, EndOffset - BeginOffset); 454 } 455 456 llvm::Expected<SourceLocation> sourceLocationInMainFile(const SourceManager &SM, 457 Position P) { 458 llvm::StringRef Code = SM.getBufferOrFake(SM.getMainFileID()).getBuffer(); 459 auto Offset = 460 positionToOffset(Code, P, /*AllowColumnBeyondLineLength=*/false); 461 if (!Offset) 462 return Offset.takeError(); 463 return SM.getLocForStartOfFile(SM.getMainFileID()).getLocWithOffset(*Offset); 464 } 465 466 Range halfOpenToRange(const SourceManager &SM, CharSourceRange R) { 467 // Clang is 1-based, LSP uses 0-based indexes. 468 Position Begin = sourceLocToPosition(SM, R.getBegin()); 469 Position End = sourceLocToPosition(SM, R.getEnd()); 470 471 return {Begin, End}; 472 } 473 474 void unionRanges(Range &A, Range B) { 475 if (B.start < A.start) 476 A.start = B.start; 477 if (A.end < B.end) 478 A.end = B.end; 479 } 480 481 std::pair<size_t, size_t> offsetToClangLineColumn(llvm::StringRef Code, 482 size_t Offset) { 483 Offset = std::min(Code.size(), Offset); 484 llvm::StringRef Before = Code.substr(0, Offset); 485 int Lines = Before.count('\n'); 486 size_t PrevNL = Before.rfind('\n'); 487 size_t StartOfLine = (PrevNL == llvm::StringRef::npos) ? 0 : (PrevNL + 1); 488 return {Lines + 1, Offset - StartOfLine + 1}; 489 } 490 491 std::pair<StringRef, StringRef> splitQualifiedName(StringRef QName) { 492 size_t Pos = QName.rfind("::"); 493 if (Pos == llvm::StringRef::npos) 494 return {llvm::StringRef(), QName}; 495 return {QName.substr(0, Pos + 2), QName.substr(Pos + 2)}; 496 } 497 498 TextEdit replacementToEdit(llvm::StringRef Code, 499 const tooling::Replacement &R) { 500 Range ReplacementRange = { 501 offsetToPosition(Code, R.getOffset()), 502 offsetToPosition(Code, R.getOffset() + R.getLength())}; 503 return {ReplacementRange, std::string(R.getReplacementText())}; 504 } 505 506 std::vector<TextEdit> replacementsToEdits(llvm::StringRef Code, 507 const tooling::Replacements &Repls) { 508 std::vector<TextEdit> Edits; 509 for (const auto &R : Repls) 510 Edits.push_back(replacementToEdit(Code, R)); 511 return Edits; 512 } 513 514 llvm::Optional<std::string> getCanonicalPath(const FileEntry *F, 515 const SourceManager &SourceMgr) { 516 if (!F) 517 return None; 518 519 llvm::SmallString<128> FilePath = F->getName(); 520 if (!llvm::sys::path::is_absolute(FilePath)) { 521 if (auto EC = 522 SourceMgr.getFileManager().getVirtualFileSystem().makeAbsolute( 523 FilePath)) { 524 elog("Could not turn relative path '{0}' to absolute: {1}", FilePath, 525 EC.message()); 526 return None; 527 } 528 } 529 530 // Handle the symbolic link path case where the current working directory 531 // (getCurrentWorkingDirectory) is a symlink. We always want to the real 532 // file path (instead of the symlink path) for the C++ symbols. 533 // 534 // Consider the following example: 535 // 536 // src dir: /project/src/foo.h 537 // current working directory (symlink): /tmp/build -> /project/src/ 538 // 539 // The file path of Symbol is "/project/src/foo.h" instead of 540 // "/tmp/build/foo.h" 541 if (auto Dir = SourceMgr.getFileManager().getDirectory( 542 llvm::sys::path::parent_path(FilePath))) { 543 llvm::SmallString<128> RealPath; 544 llvm::StringRef DirName = SourceMgr.getFileManager().getCanonicalName(*Dir); 545 llvm::sys::path::append(RealPath, DirName, 546 llvm::sys::path::filename(FilePath)); 547 return RealPath.str().str(); 548 } 549 550 return FilePath.str().str(); 551 } 552 553 TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M, 554 const LangOptions &L) { 555 TextEdit Result; 556 Result.range = 557 halfOpenToRange(M, Lexer::makeFileCharRange(FixIt.RemoveRange, M, L)); 558 Result.newText = FixIt.CodeToInsert; 559 return Result; 560 } 561 562 FileDigest digest(llvm::StringRef Content) { 563 uint64_t Hash{llvm::xxHash64(Content)}; 564 FileDigest Result; 565 for (unsigned I = 0; I < Result.size(); ++I) { 566 Result[I] = uint8_t(Hash); 567 Hash >>= 8; 568 } 569 return Result; 570 } 571 572 llvm::Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) { 573 bool Invalid = false; 574 llvm::StringRef Content = SM.getBufferData(FID, &Invalid); 575 if (Invalid) 576 return None; 577 return digest(Content); 578 } 579 580 format::FormatStyle getFormatStyleForFile(llvm::StringRef File, 581 llvm::StringRef Content, 582 const ThreadsafeFS &TFS) { 583 auto Style = format::getStyle(format::DefaultFormatStyle, File, 584 format::DefaultFallbackStyle, Content, 585 TFS.view(/*CWD=*/llvm::None).get()); 586 if (!Style) { 587 log("getStyle() failed for file {0}: {1}. Fallback is LLVM style.", File, 588 Style.takeError()); 589 return format::getLLVMStyle(); 590 } 591 return *Style; 592 } 593 594 llvm::Expected<tooling::Replacements> 595 cleanupAndFormat(StringRef Code, const tooling::Replacements &Replaces, 596 const format::FormatStyle &Style) { 597 auto CleanReplaces = cleanupAroundReplacements(Code, Replaces, Style); 598 if (!CleanReplaces) 599 return CleanReplaces; 600 return formatReplacements(Code, std::move(*CleanReplaces), Style); 601 } 602 603 static void 604 lex(llvm::StringRef Code, const LangOptions &LangOpts, 605 llvm::function_ref<void(const syntax::Token &, const SourceManager &SM)> 606 Action) { 607 // FIXME: InMemoryFileAdapter crashes unless the buffer is null terminated! 608 std::string NullTerminatedCode = Code.str(); 609 SourceManagerForFile FileSM("mock_file_name.cpp", NullTerminatedCode); 610 auto &SM = FileSM.get(); 611 for (const auto &Tok : syntax::tokenize(SM.getMainFileID(), SM, LangOpts)) 612 Action(Tok, SM); 613 } 614 615 llvm::StringMap<unsigned> collectIdentifiers(llvm::StringRef Content, 616 const format::FormatStyle &Style) { 617 llvm::StringMap<unsigned> Identifiers; 618 auto LangOpt = format::getFormattingLangOpts(Style); 619 lex(Content, LangOpt, [&](const syntax::Token &Tok, const SourceManager &SM) { 620 if (Tok.kind() == tok::identifier) 621 ++Identifiers[Tok.text(SM)]; 622 // FIXME: Should this function really return keywords too ? 623 else if (const auto *Keyword = tok::getKeywordSpelling(Tok.kind())) 624 ++Identifiers[Keyword]; 625 }); 626 return Identifiers; 627 } 628 629 std::vector<Range> collectIdentifierRanges(llvm::StringRef Identifier, 630 llvm::StringRef Content, 631 const LangOptions &LangOpts) { 632 std::vector<Range> Ranges; 633 lex(Content, LangOpts, 634 [&](const syntax::Token &Tok, const SourceManager &SM) { 635 if (Tok.kind() != tok::identifier || Tok.text(SM) != Identifier) 636 return; 637 Ranges.push_back(halfOpenToRange(SM, Tok.range(SM).toCharRange(SM))); 638 }); 639 return Ranges; 640 } 641 642 bool isKeyword(llvm::StringRef NewName, const LangOptions &LangOpts) { 643 // Keywords are initialized in constructor. 644 clang::IdentifierTable KeywordsTable(LangOpts); 645 return KeywordsTable.find(NewName) != KeywordsTable.end(); 646 } 647 648 namespace { 649 struct NamespaceEvent { 650 enum { 651 BeginNamespace, // namespace <ns> {. Payload is resolved <ns>. 652 EndNamespace, // } // namespace <ns>. Payload is resolved *outer* 653 // namespace. 654 UsingDirective // using namespace <ns>. Payload is unresolved <ns>. 655 } Trigger; 656 std::string Payload; 657 Position Pos; 658 }; 659 // Scans C++ source code for constructs that change the visible namespaces. 660 void parseNamespaceEvents(llvm::StringRef Code, const LangOptions &LangOpts, 661 llvm::function_ref<void(NamespaceEvent)> Callback) { 662 663 // Stack of enclosing namespaces, e.g. {"clang", "clangd"} 664 std::vector<std::string> Enclosing; // Contains e.g. "clang", "clangd" 665 // Stack counts open braces. true if the brace opened a namespace. 666 std::vector<bool> BraceStack; 667 668 enum { 669 Default, 670 Namespace, // just saw 'namespace' 671 NamespaceName, // just saw 'namespace' NSName 672 Using, // just saw 'using' 673 UsingNamespace, // just saw 'using namespace' 674 UsingNamespaceName, // just saw 'using namespace' NSName 675 } State = Default; 676 std::string NSName; 677 678 NamespaceEvent Event; 679 lex(Code, LangOpts, [&](const syntax::Token &Tok, const SourceManager &SM) { 680 Event.Pos = sourceLocToPosition(SM, Tok.location()); 681 switch (Tok.kind()) { 682 case tok::kw_using: 683 State = State == Default ? Using : Default; 684 break; 685 case tok::kw_namespace: 686 switch (State) { 687 case Using: 688 State = UsingNamespace; 689 break; 690 case Default: 691 State = Namespace; 692 break; 693 default: 694 State = Default; 695 break; 696 } 697 break; 698 case tok::identifier: 699 switch (State) { 700 case UsingNamespace: 701 NSName.clear(); 702 LLVM_FALLTHROUGH; 703 case UsingNamespaceName: 704 NSName.append(Tok.text(SM).str()); 705 State = UsingNamespaceName; 706 break; 707 case Namespace: 708 NSName.clear(); 709 LLVM_FALLTHROUGH; 710 case NamespaceName: 711 NSName.append(Tok.text(SM).str()); 712 State = NamespaceName; 713 break; 714 case Using: 715 case Default: 716 State = Default; 717 break; 718 } 719 break; 720 case tok::coloncolon: 721 // This can come at the beginning or in the middle of a namespace 722 // name. 723 switch (State) { 724 case UsingNamespace: 725 NSName.clear(); 726 LLVM_FALLTHROUGH; 727 case UsingNamespaceName: 728 NSName.append("::"); 729 State = UsingNamespaceName; 730 break; 731 case NamespaceName: 732 NSName.append("::"); 733 State = NamespaceName; 734 break; 735 case Namespace: // Not legal here. 736 case Using: 737 case Default: 738 State = Default; 739 break; 740 } 741 break; 742 case tok::l_brace: 743 // Record which { started a namespace, so we know when } ends one. 744 if (State == NamespaceName) { 745 // Parsed: namespace <name> { 746 BraceStack.push_back(true); 747 Enclosing.push_back(NSName); 748 Event.Trigger = NamespaceEvent::BeginNamespace; 749 Event.Payload = llvm::join(Enclosing, "::"); 750 Callback(Event); 751 } else { 752 // This case includes anonymous namespaces (State = Namespace). 753 // For our purposes, they're not namespaces and we ignore them. 754 BraceStack.push_back(false); 755 } 756 State = Default; 757 break; 758 case tok::r_brace: 759 // If braces are unmatched, we're going to be confused, but don't 760 // crash. 761 if (!BraceStack.empty()) { 762 if (BraceStack.back()) { 763 // Parsed: } // namespace 764 Enclosing.pop_back(); 765 Event.Trigger = NamespaceEvent::EndNamespace; 766 Event.Payload = llvm::join(Enclosing, "::"); 767 Callback(Event); 768 } 769 BraceStack.pop_back(); 770 } 771 break; 772 case tok::semi: 773 if (State == UsingNamespaceName) { 774 // Parsed: using namespace <name> ; 775 Event.Trigger = NamespaceEvent::UsingDirective; 776 Event.Payload = std::move(NSName); 777 Callback(Event); 778 } 779 State = Default; 780 break; 781 default: 782 State = Default; 783 break; 784 } 785 }); 786 } 787 788 // Returns the prefix namespaces of NS: {"" ... NS}. 789 llvm::SmallVector<llvm::StringRef> ancestorNamespaces(llvm::StringRef NS) { 790 llvm::SmallVector<llvm::StringRef> Results; 791 Results.push_back(NS.take_front(0)); 792 NS.split(Results, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false); 793 for (llvm::StringRef &R : Results) 794 R = NS.take_front(R.end() - NS.begin()); 795 return Results; 796 } 797 798 } // namespace 799 800 std::vector<std::string> visibleNamespaces(llvm::StringRef Code, 801 const LangOptions &LangOpts) { 802 std::string Current; 803 // Map from namespace to (resolved) namespaces introduced via using directive. 804 llvm::StringMap<llvm::StringSet<>> UsingDirectives; 805 806 parseNamespaceEvents(Code, LangOpts, [&](NamespaceEvent Event) { 807 llvm::StringRef NS = Event.Payload; 808 switch (Event.Trigger) { 809 case NamespaceEvent::BeginNamespace: 810 case NamespaceEvent::EndNamespace: 811 Current = std::move(Event.Payload); 812 break; 813 case NamespaceEvent::UsingDirective: 814 if (NS.consume_front("::")) 815 UsingDirectives[Current].insert(NS); 816 else { 817 for (llvm::StringRef Enclosing : ancestorNamespaces(Current)) { 818 if (Enclosing.empty()) 819 UsingDirectives[Current].insert(NS); 820 else 821 UsingDirectives[Current].insert((Enclosing + "::" + NS).str()); 822 } 823 } 824 break; 825 } 826 }); 827 828 std::vector<std::string> Found; 829 for (llvm::StringRef Enclosing : ancestorNamespaces(Current)) { 830 Found.push_back(std::string(Enclosing)); 831 auto It = UsingDirectives.find(Enclosing); 832 if (It != UsingDirectives.end()) 833 for (const auto &Used : It->second) 834 Found.push_back(std::string(Used.getKey())); 835 } 836 837 llvm::sort(Found, [&](const std::string &LHS, const std::string &RHS) { 838 if (Current == RHS) 839 return false; 840 if (Current == LHS) 841 return true; 842 return LHS < RHS; 843 }); 844 Found.erase(std::unique(Found.begin(), Found.end()), Found.end()); 845 return Found; 846 } 847 848 llvm::StringSet<> collectWords(llvm::StringRef Content) { 849 // We assume short words are not significant. 850 // We may want to consider other stopwords, e.g. language keywords. 851 // (A very naive implementation showed no benefit, but lexing might do better) 852 static constexpr int MinWordLength = 4; 853 854 std::vector<CharRole> Roles(Content.size()); 855 calculateRoles(Content, Roles); 856 857 llvm::StringSet<> Result; 858 llvm::SmallString<256> Word; 859 auto Flush = [&] { 860 if (Word.size() >= MinWordLength) { 861 for (char &C : Word) 862 C = llvm::toLower(C); 863 Result.insert(Word); 864 } 865 Word.clear(); 866 }; 867 for (unsigned I = 0; I < Content.size(); ++I) { 868 switch (Roles[I]) { 869 case Head: 870 Flush(); 871 LLVM_FALLTHROUGH; 872 case Tail: 873 Word.push_back(Content[I]); 874 break; 875 case Unknown: 876 case Separator: 877 Flush(); 878 break; 879 } 880 } 881 Flush(); 882 883 return Result; 884 } 885 886 static bool isLikelyIdentifier(llvm::StringRef Word, llvm::StringRef Before, 887 llvm::StringRef After) { 888 // `foo` is an identifier. 889 if (Before.endswith("`") && After.startswith("`")) 890 return true; 891 // In foo::bar, both foo and bar are identifiers. 892 if (Before.endswith("::") || After.startswith("::")) 893 return true; 894 // Doxygen tags like \c foo indicate identifiers. 895 // Don't search too far back. 896 // This duplicates clang's doxygen parser, revisit if it gets complicated. 897 Before = Before.take_back(100); // Don't search too far back. 898 auto Pos = Before.find_last_of("\\@"); 899 if (Pos != llvm::StringRef::npos) { 900 llvm::StringRef Tag = Before.substr(Pos + 1).rtrim(' '); 901 if (Tag == "p" || Tag == "c" || Tag == "class" || Tag == "tparam" || 902 Tag == "param" || Tag == "param[in]" || Tag == "param[out]" || 903 Tag == "param[in,out]" || Tag == "retval" || Tag == "throw" || 904 Tag == "throws" || Tag == "link") 905 return true; 906 } 907 908 // Word contains underscore. 909 // This handles things like snake_case and MACRO_CASE. 910 if (Word.contains('_')) { 911 return true; 912 } 913 // Word contains capital letter other than at beginning. 914 // This handles things like lowerCamel and UpperCamel. 915 // The check for also containing a lowercase letter is to rule out 916 // initialisms like "HTTP". 917 bool HasLower = Word.find_if(clang::isLowercase) != StringRef::npos; 918 bool HasUpper = Word.substr(1).find_if(clang::isUppercase) != StringRef::npos; 919 if (HasLower && HasUpper) { 920 return true; 921 } 922 // FIXME: consider mid-sentence Capitalization? 923 return false; 924 } 925 926 llvm::Optional<SpelledWord> SpelledWord::touching(SourceLocation SpelledLoc, 927 const syntax::TokenBuffer &TB, 928 const LangOptions &LangOpts) { 929 const auto &SM = TB.sourceManager(); 930 auto Touching = syntax::spelledTokensTouching(SpelledLoc, TB); 931 for (const auto &T : Touching) { 932 // If the token is an identifier or a keyword, don't use any heuristics. 933 if (tok::isAnyIdentifier(T.kind()) || tok::getKeywordSpelling(T.kind())) { 934 SpelledWord Result; 935 Result.Location = T.location(); 936 Result.Text = T.text(SM); 937 Result.LikelyIdentifier = tok::isAnyIdentifier(T.kind()); 938 Result.PartOfSpelledToken = &T; 939 Result.SpelledToken = &T; 940 auto Expanded = 941 TB.expandedTokens(SM.getMacroArgExpandedLocation(T.location())); 942 if (Expanded.size() == 1 && Expanded.front().text(SM) == Result.Text) 943 Result.ExpandedToken = &Expanded.front(); 944 return Result; 945 } 946 } 947 FileID File; 948 unsigned Offset; 949 std::tie(File, Offset) = SM.getDecomposedLoc(SpelledLoc); 950 bool Invalid = false; 951 llvm::StringRef Code = SM.getBufferData(File, &Invalid); 952 if (Invalid) 953 return llvm::None; 954 unsigned B = Offset, E = Offset; 955 while (B > 0 && isAsciiIdentifierContinue(Code[B - 1])) 956 --B; 957 while (E < Code.size() && isAsciiIdentifierContinue(Code[E])) 958 ++E; 959 if (B == E) 960 return llvm::None; 961 962 SpelledWord Result; 963 Result.Location = SM.getComposedLoc(File, B); 964 Result.Text = Code.slice(B, E); 965 Result.LikelyIdentifier = 966 isLikelyIdentifier(Result.Text, Code.substr(0, B), Code.substr(E)) && 967 // should not be a keyword 968 tok::isAnyIdentifier( 969 IdentifierTable(LangOpts).get(Result.Text).getTokenID()); 970 for (const auto &T : Touching) 971 if (T.location() <= Result.Location) 972 Result.PartOfSpelledToken = &T; 973 return Result; 974 } 975 976 llvm::Optional<DefinedMacro> locateMacroAt(const syntax::Token &SpelledTok, 977 Preprocessor &PP) { 978 if (SpelledTok.kind() != tok::identifier) 979 return None; 980 SourceLocation Loc = SpelledTok.location(); 981 assert(Loc.isFileID()); 982 const auto &SM = PP.getSourceManager(); 983 IdentifierInfo *IdentifierInfo = PP.getIdentifierInfo(SpelledTok.text(SM)); 984 if (!IdentifierInfo || !IdentifierInfo->hadMacroDefinition()) 985 return None; 986 987 // We need to take special case to handle #define and #undef. 988 // Preprocessor::getMacroDefinitionAtLoc() only considers a macro 989 // definition to be in scope *after* the location of the macro name in a 990 // #define that introduces it, and *before* the location of the macro name 991 // in an #undef that undefines it. To handle these cases, we check for 992 // the macro being in scope either just after or just before the location 993 // of the token. In getting the location before, we also take care to check 994 // for start-of-file. 995 FileID FID = SM.getFileID(Loc); 996 assert(Loc != SM.getLocForEndOfFile(FID)); 997 SourceLocation JustAfterToken = Loc.getLocWithOffset(1); 998 auto *MacroInfo = 999 PP.getMacroDefinitionAtLoc(IdentifierInfo, JustAfterToken).getMacroInfo(); 1000 if (!MacroInfo && SM.getLocForStartOfFile(FID) != Loc) { 1001 SourceLocation JustBeforeToken = Loc.getLocWithOffset(-1); 1002 MacroInfo = PP.getMacroDefinitionAtLoc(IdentifierInfo, JustBeforeToken) 1003 .getMacroInfo(); 1004 } 1005 if (!MacroInfo) { 1006 return None; 1007 } 1008 return DefinedMacro{ 1009 IdentifierInfo->getName(), MacroInfo, 1010 translatePreamblePatchLocation(MacroInfo->getDefinitionLoc(), SM)}; 1011 } 1012 1013 llvm::Expected<std::string> Edit::apply() const { 1014 return tooling::applyAllReplacements(InitialCode, Replacements); 1015 } 1016 1017 std::vector<TextEdit> Edit::asTextEdits() const { 1018 return replacementsToEdits(InitialCode, Replacements); 1019 } 1020 1021 bool Edit::canApplyTo(llvm::StringRef Code) const { 1022 // Create line iterators, since line numbers are important while applying our 1023 // edit we cannot skip blank lines. 1024 auto LHS = llvm::MemoryBuffer::getMemBuffer(Code); 1025 llvm::line_iterator LHSIt(*LHS, /*SkipBlanks=*/false); 1026 1027 auto RHS = llvm::MemoryBuffer::getMemBuffer(InitialCode); 1028 llvm::line_iterator RHSIt(*RHS, /*SkipBlanks=*/false); 1029 1030 // Compare the InitialCode we prepared the edit for with the Code we received 1031 // line by line to make sure there are no differences. 1032 // FIXME: This check is too conservative now, it should be enough to only 1033 // check lines around the replacements contained inside the Edit. 1034 while (!LHSIt.is_at_eof() && !RHSIt.is_at_eof()) { 1035 if (*LHSIt != *RHSIt) 1036 return false; 1037 ++LHSIt; 1038 ++RHSIt; 1039 } 1040 1041 // After we reach EOF for any of the files we make sure the other one doesn't 1042 // contain any additional content except empty lines, they should not 1043 // interfere with the edit we produced. 1044 while (!LHSIt.is_at_eof()) { 1045 if (!LHSIt->empty()) 1046 return false; 1047 ++LHSIt; 1048 } 1049 while (!RHSIt.is_at_eof()) { 1050 if (!RHSIt->empty()) 1051 return false; 1052 ++RHSIt; 1053 } 1054 return true; 1055 } 1056 1057 llvm::Error reformatEdit(Edit &E, const format::FormatStyle &Style) { 1058 if (auto NewEdits = cleanupAndFormat(E.InitialCode, E.Replacements, Style)) 1059 E.Replacements = std::move(*NewEdits); 1060 else 1061 return NewEdits.takeError(); 1062 return llvm::Error::success(); 1063 } 1064 1065 llvm::Error applyChange(std::string &Contents, 1066 const TextDocumentContentChangeEvent &Change) { 1067 if (!Change.range) { 1068 Contents = Change.text; 1069 return llvm::Error::success(); 1070 } 1071 1072 const Position &Start = Change.range->start; 1073 llvm::Expected<size_t> StartIndex = positionToOffset(Contents, Start, false); 1074 if (!StartIndex) 1075 return StartIndex.takeError(); 1076 1077 const Position &End = Change.range->end; 1078 llvm::Expected<size_t> EndIndex = positionToOffset(Contents, End, false); 1079 if (!EndIndex) 1080 return EndIndex.takeError(); 1081 1082 if (*EndIndex < *StartIndex) 1083 return error(llvm::errc::invalid_argument, 1084 "Range's end position ({0}) is before start position ({1})", 1085 End, Start); 1086 1087 // Since the range length between two LSP positions is dependent on the 1088 // contents of the buffer we compute the range length between the start and 1089 // end position ourselves and compare it to the range length of the LSP 1090 // message to verify the buffers of the client and server are in sync. 1091 1092 // EndIndex and StartIndex are in bytes, but Change.rangeLength is in UTF-16 1093 // code units. 1094 ssize_t ComputedRangeLength = 1095 lspLength(Contents.substr(*StartIndex, *EndIndex - *StartIndex)); 1096 1097 if (Change.rangeLength && ComputedRangeLength != *Change.rangeLength) 1098 return error(llvm::errc::invalid_argument, 1099 "Change's rangeLength ({0}) doesn't match the " 1100 "computed range length ({1}).", 1101 *Change.rangeLength, ComputedRangeLength); 1102 1103 Contents.replace(*StartIndex, *EndIndex - *StartIndex, Change.text); 1104 1105 return llvm::Error::success(); 1106 } 1107 1108 EligibleRegion getEligiblePoints(llvm::StringRef Code, 1109 llvm::StringRef FullyQualifiedName, 1110 const LangOptions &LangOpts) { 1111 EligibleRegion ER; 1112 // Start with global namespace. 1113 std::vector<std::string> Enclosing = {""}; 1114 // FIXME: In addition to namespaces try to generate events for function 1115 // definitions as well. One might use a closing parantheses(")" followed by an 1116 // opening brace "{" to trigger the start. 1117 parseNamespaceEvents(Code, LangOpts, [&](NamespaceEvent Event) { 1118 // Using Directives only introduces declarations to current scope, they do 1119 // not change the current namespace, so skip them. 1120 if (Event.Trigger == NamespaceEvent::UsingDirective) 1121 return; 1122 // Do not qualify the global namespace. 1123 if (!Event.Payload.empty()) 1124 Event.Payload.append("::"); 1125 1126 std::string CurrentNamespace; 1127 if (Event.Trigger == NamespaceEvent::BeginNamespace) { 1128 Enclosing.emplace_back(std::move(Event.Payload)); 1129 CurrentNamespace = Enclosing.back(); 1130 // parseNameSpaceEvents reports the beginning position of a token; we want 1131 // to insert after '{', so increment by one. 1132 ++Event.Pos.character; 1133 } else { 1134 // Event.Payload points to outer namespace when exiting a scope, so use 1135 // the namespace we've last entered instead. 1136 CurrentNamespace = std::move(Enclosing.back()); 1137 Enclosing.pop_back(); 1138 assert(Enclosing.back() == Event.Payload); 1139 } 1140 1141 // Ignore namespaces that are not a prefix of the target. 1142 if (!FullyQualifiedName.startswith(CurrentNamespace)) 1143 return; 1144 1145 // Prefer the namespace that shares the longest prefix with target. 1146 if (CurrentNamespace.size() > ER.EnclosingNamespace.size()) { 1147 ER.EligiblePoints.clear(); 1148 ER.EnclosingNamespace = CurrentNamespace; 1149 } 1150 if (CurrentNamespace.size() == ER.EnclosingNamespace.size()) 1151 ER.EligiblePoints.emplace_back(std::move(Event.Pos)); 1152 }); 1153 // If there were no shared namespaces just return EOF. 1154 if (ER.EligiblePoints.empty()) { 1155 assert(ER.EnclosingNamespace.empty()); 1156 ER.EligiblePoints.emplace_back(offsetToPosition(Code, Code.size())); 1157 } 1158 return ER; 1159 } 1160 1161 bool isHeaderFile(llvm::StringRef FileName, 1162 llvm::Optional<LangOptions> LangOpts) { 1163 // Respect the langOpts, for non-file-extension cases, e.g. standard library 1164 // files. 1165 if (LangOpts && LangOpts->IsHeaderFile) 1166 return true; 1167 namespace types = clang::driver::types; 1168 auto Lang = types::lookupTypeForExtension( 1169 llvm::sys::path::extension(FileName).substr(1)); 1170 return Lang != types::TY_INVALID && types::onlyPrecompileType(Lang); 1171 } 1172 1173 bool isProtoFile(SourceLocation Loc, const SourceManager &SM) { 1174 auto FileName = SM.getFilename(Loc); 1175 if (!FileName.endswith(".proto.h") && !FileName.endswith(".pb.h")) 1176 return false; 1177 auto FID = SM.getFileID(Loc); 1178 // All proto generated headers should start with this line. 1179 static const char *PROTO_HEADER_COMMENT = 1180 "// Generated by the protocol buffer compiler. DO NOT EDIT!"; 1181 // Double check that this is an actual protobuf header. 1182 return SM.getBufferData(FID).startswith(PROTO_HEADER_COMMENT); 1183 } 1184 1185 namespace { 1186 1187 // Is Line an #if or #ifdef directive? 1188 // FIXME: This makes headers with #ifdef LINUX/WINDOWS/MACOS marked as non 1189 // self-contained and is probably not what we want. 1190 bool isIf(llvm::StringRef Line) { 1191 Line = Line.ltrim(); 1192 if (!Line.consume_front("#")) 1193 return false; 1194 Line = Line.ltrim(); 1195 return Line.startswith("if"); 1196 } 1197 1198 // Is Line an #error directive mentioning includes? 1199 bool isErrorAboutInclude(llvm::StringRef Line) { 1200 Line = Line.ltrim(); 1201 if (!Line.consume_front("#")) 1202 return false; 1203 Line = Line.ltrim(); 1204 if (!Line.startswith("error")) 1205 return false; 1206 return Line.contains_insensitive( 1207 "includ"); // Matches "include" or "including". 1208 } 1209 1210 // Heuristically headers that only want to be included via an umbrella. 1211 bool isDontIncludeMeHeader(llvm::StringRef Content) { 1212 llvm::StringRef Line; 1213 // Only sniff up to 100 lines or 10KB. 1214 Content = Content.take_front(100 * 100); 1215 for (unsigned I = 0; I < 100 && !Content.empty(); ++I) { 1216 std::tie(Line, Content) = Content.split('\n'); 1217 if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first)) 1218 return true; 1219 } 1220 return false; 1221 } 1222 1223 } // namespace 1224 1225 bool isSelfContainedHeader(const FileEntry *FE, FileID FID, 1226 const SourceManager &SM, HeaderSearch &HeaderInfo) { 1227 // FIXME: Should files that have been #import'd be considered 1228 // self-contained? That's really a property of the includer, 1229 // not of the file. 1230 if (!HeaderInfo.isFileMultipleIncludeGuarded(FE) && 1231 !HeaderInfo.hasFileBeenImported(FE)) 1232 return false; 1233 // This pattern indicates that a header can't be used without 1234 // particular preprocessor state, usually set up by another header. 1235 return !isDontIncludeMeHeader(SM.getBufferData(FID)); 1236 } 1237 1238 } // namespace clangd 1239 } // namespace clang 1240