1 //===--- Replacement.cpp - Framework for clang refactoring tools ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Implements classes to support/store refactorings. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/Tooling/Core/Replacement.h" 15 16 #include "clang/Basic/Diagnostic.h" 17 #include "clang/Basic/DiagnosticIDs.h" 18 #include "clang/Basic/DiagnosticOptions.h" 19 #include "clang/Basic/FileManager.h" 20 #include "clang/Basic/SourceManager.h" 21 #include "clang/Lex/Lexer.h" 22 #include "clang/Rewrite/Core/Rewriter.h" 23 #include "llvm/Support/FileSystem.h" 24 #include "llvm/Support/raw_os_ostream.h" 25 26 namespace clang { 27 namespace tooling { 28 29 static const char * const InvalidLocation = ""; 30 31 Replacement::Replacement() 32 : FilePath(InvalidLocation) {} 33 34 Replacement::Replacement(StringRef FilePath, unsigned Offset, unsigned Length, 35 StringRef ReplacementText) 36 : FilePath(FilePath), ReplacementRange(Offset, Length), 37 ReplacementText(ReplacementText) {} 38 39 Replacement::Replacement(const SourceManager &Sources, SourceLocation Start, 40 unsigned Length, StringRef ReplacementText) { 41 setFromSourceLocation(Sources, Start, Length, ReplacementText); 42 } 43 44 Replacement::Replacement(const SourceManager &Sources, 45 const CharSourceRange &Range, 46 StringRef ReplacementText, 47 const LangOptions &LangOpts) { 48 setFromSourceRange(Sources, Range, ReplacementText, LangOpts); 49 } 50 51 bool Replacement::isApplicable() const { 52 return FilePath != InvalidLocation; 53 } 54 55 bool Replacement::apply(Rewriter &Rewrite) const { 56 SourceManager &SM = Rewrite.getSourceMgr(); 57 const FileEntry *Entry = SM.getFileManager().getFile(FilePath); 58 if (!Entry) 59 return false; 60 61 FileID ID = SM.getOrCreateFileID(Entry, SrcMgr::C_User); 62 const SourceLocation Start = 63 SM.getLocForStartOfFile(ID). 64 getLocWithOffset(ReplacementRange.getOffset()); 65 // ReplaceText returns false on success. 66 // ReplaceText only fails if the source location is not a file location, in 67 // which case we already returned false earlier. 68 bool RewriteSucceeded = !Rewrite.ReplaceText( 69 Start, ReplacementRange.getLength(), ReplacementText); 70 assert(RewriteSucceeded); 71 return RewriteSucceeded; 72 } 73 74 std::string Replacement::toString() const { 75 std::string Result; 76 llvm::raw_string_ostream Stream(Result); 77 Stream << FilePath << ": " << ReplacementRange.getOffset() << ":+" 78 << ReplacementRange.getLength() << ":\"" << ReplacementText << "\""; 79 return Stream.str(); 80 } 81 82 bool operator<(const Replacement &LHS, const Replacement &RHS) { 83 if (LHS.getOffset() != RHS.getOffset()) 84 return LHS.getOffset() < RHS.getOffset(); 85 86 if (LHS.getLength() != RHS.getLength()) 87 return LHS.getLength() < RHS.getLength(); 88 89 if (LHS.getFilePath() != RHS.getFilePath()) 90 return LHS.getFilePath() < RHS.getFilePath(); 91 return LHS.getReplacementText() < RHS.getReplacementText(); 92 } 93 94 bool operator==(const Replacement &LHS, const Replacement &RHS) { 95 return LHS.getOffset() == RHS.getOffset() && 96 LHS.getLength() == RHS.getLength() && 97 LHS.getFilePath() == RHS.getFilePath() && 98 LHS.getReplacementText() == RHS.getReplacementText(); 99 } 100 101 void Replacement::setFromSourceLocation(const SourceManager &Sources, 102 SourceLocation Start, unsigned Length, 103 StringRef ReplacementText) { 104 const std::pair<FileID, unsigned> DecomposedLocation = 105 Sources.getDecomposedLoc(Start); 106 const FileEntry *Entry = Sources.getFileEntryForID(DecomposedLocation.first); 107 this->FilePath = Entry ? Entry->getName() : InvalidLocation; 108 this->ReplacementRange = Range(DecomposedLocation.second, Length); 109 this->ReplacementText = ReplacementText; 110 } 111 112 // FIXME: This should go into the Lexer, but we need to figure out how 113 // to handle ranges for refactoring in general first - there is no obvious 114 // good way how to integrate this into the Lexer yet. 115 static int getRangeSize(const SourceManager &Sources, 116 const CharSourceRange &Range, 117 const LangOptions &LangOpts) { 118 SourceLocation SpellingBegin = Sources.getSpellingLoc(Range.getBegin()); 119 SourceLocation SpellingEnd = Sources.getSpellingLoc(Range.getEnd()); 120 std::pair<FileID, unsigned> Start = Sources.getDecomposedLoc(SpellingBegin); 121 std::pair<FileID, unsigned> End = Sources.getDecomposedLoc(SpellingEnd); 122 if (Start.first != End.first) return -1; 123 if (Range.isTokenRange()) 124 End.second += Lexer::MeasureTokenLength(SpellingEnd, Sources, LangOpts); 125 return End.second - Start.second; 126 } 127 128 void Replacement::setFromSourceRange(const SourceManager &Sources, 129 const CharSourceRange &Range, 130 StringRef ReplacementText, 131 const LangOptions &LangOpts) { 132 setFromSourceLocation(Sources, Sources.getSpellingLoc(Range.getBegin()), 133 getRangeSize(Sources, Range, LangOpts), 134 ReplacementText); 135 } 136 137 Replacement 138 Replacements::getReplacementInChangedCode(const Replacement &R) const { 139 unsigned NewStart = getShiftedCodePosition(R.getOffset()); 140 unsigned NewEnd = getShiftedCodePosition(R.getOffset() + R.getLength()); 141 return Replacement(R.getFilePath(), NewStart, NewEnd - NewStart, 142 R.getReplacementText()); 143 } 144 145 static llvm::Error makeConflictReplacementsError(const Replacement &New, 146 const Replacement &Existing) { 147 return llvm::make_error<llvm::StringError>( 148 "New replacement:\n" + New.toString() + 149 "\nconflicts with existing replacement:\n" + Existing.toString(), 150 llvm::inconvertibleErrorCode()); 151 } 152 153 Replacements Replacements::getCanonicalReplacements() const { 154 std::vector<Replacement> NewReplaces; 155 // Merge adjacent replacements. 156 for (const auto &R : Replaces) { 157 if (NewReplaces.empty()) { 158 NewReplaces.push_back(R); 159 continue; 160 } 161 auto &Prev = NewReplaces.back(); 162 unsigned PrevEnd = Prev.getOffset() + Prev.getLength(); 163 if (PrevEnd < R.getOffset()) { 164 NewReplaces.push_back(R); 165 } else { 166 assert(PrevEnd == R.getOffset() && 167 "Existing replacements must not overlap."); 168 Replacement NewR( 169 R.getFilePath(), Prev.getOffset(), Prev.getLength() + R.getLength(), 170 (Prev.getReplacementText() + R.getReplacementText()).str()); 171 Prev = NewR; 172 } 173 } 174 ReplacementsImpl NewReplacesImpl(NewReplaces.begin(), NewReplaces.end()); 175 return Replacements(NewReplacesImpl.begin(), NewReplacesImpl.end()); 176 } 177 178 // `R` and `Replaces` are order-independent if applying them in either order 179 // has the same effect, so we need to compare replacements associated to 180 // applying them in either order. 181 llvm::Expected<Replacements> 182 Replacements::mergeIfOrderIndependent(const Replacement &R) const { 183 Replacements Rs(R); 184 // A Replacements set containg a single replacement that is `R` referring to 185 // the code after the existing replacements `Replaces` are applied. 186 Replacements RsShiftedByReplaces(getReplacementInChangedCode(R)); 187 // A Replacements set that is `Replaces` referring to the code after `R` is 188 // applied. 189 Replacements ReplacesShiftedByRs; 190 for (const auto &Replace : Replaces) 191 ReplacesShiftedByRs.Replaces.insert( 192 Rs.getReplacementInChangedCode(Replace)); 193 // This is equivalent to applying `Replaces` first and then `R`. 194 auto MergeShiftedRs = merge(RsShiftedByReplaces); 195 // This is equivalent to applying `R` first and then `Replaces`. 196 auto MergeShiftedReplaces = Rs.merge(ReplacesShiftedByRs); 197 198 // Since empty or segmented replacements around existing replacements might be 199 // produced above, we need to compare replacements in canonical forms. 200 if (MergeShiftedRs.getCanonicalReplacements() == 201 MergeShiftedReplaces.getCanonicalReplacements()) 202 return MergeShiftedRs; 203 return makeConflictReplacementsError(R, *Replaces.begin()); 204 } 205 206 llvm::Error Replacements::add(const Replacement &R) { 207 // Check the file path. 208 if (!Replaces.empty() && R.getFilePath() != Replaces.begin()->getFilePath()) 209 return llvm::make_error<llvm::StringError>( 210 "All replacements must have the same file path. New replacement: " + 211 R.getFilePath() + ", existing replacements: " + 212 Replaces.begin()->getFilePath() + "\n", 213 llvm::inconvertibleErrorCode()); 214 215 // Special-case header insertions. 216 if (R.getOffset() == UINT_MAX) { 217 Replaces.insert(R); 218 return llvm::Error::success(); 219 } 220 221 // This replacement cannot conflict with replacements that end before 222 // this replacement starts or start after this replacement ends. 223 // We also know that there currently are no overlapping replacements. 224 // Thus, we know that all replacements that start after the end of the current 225 // replacement cannot overlap. 226 Replacement AtEnd(R.getFilePath(), R.getOffset() + R.getLength(), 0, ""); 227 228 // Find the first entry that starts after or at the end of R. Note that 229 // entries that start at the end can still be conflicting if R is an 230 // insertion. 231 auto I = Replaces.lower_bound(AtEnd); 232 // If `I` starts at the same offset as `R`, `R` must be an insertion. 233 if (I != Replaces.end() && R.getOffset() == I->getOffset()) { 234 assert(R.getLength() == 0); 235 // `I` is also an insertion, `R` and `I` conflict. 236 if (I->getLength() == 0) { 237 // Check if two insertions are order-indepedent: if inserting them in 238 // either order produces the same text, they are order-independent. 239 if ((R.getReplacementText() + I->getReplacementText()).str() != 240 (I->getReplacementText() + R.getReplacementText()).str()) { 241 return makeConflictReplacementsError(R, *I); 242 } 243 // If insertions are order-independent, we can merge them. 244 Replacement NewR( 245 R.getFilePath(), R.getOffset(), 0, 246 (R.getReplacementText() + I->getReplacementText()).str()); 247 Replaces.erase(I); 248 Replaces.insert(std::move(NewR)); 249 return llvm::Error::success(); 250 } 251 // Insertion `R` is adjacent to a non-insertion replacement `I`, so they 252 // are order-independent. It is safe to assume that `R` will not conflict 253 // with any replacement before `I` since all replacements before `I` must 254 // either end before `R` or end at `R` but has length > 0 (if the 255 // replacement before `I` is an insertion at `R`, it would have been `I` 256 // since it is a lower bound of `AtEnd` and ordered before the current `I` 257 // in the set). 258 Replaces.insert(R); 259 return llvm::Error::success(); 260 } 261 262 // `I` is the smallest iterator (after `R`) whose entry cannot overlap. 263 // If that is begin(), there are no overlaps. 264 if (I == Replaces.begin()) { 265 Replaces.insert(R); 266 return llvm::Error::success(); 267 } 268 --I; 269 auto Overlap = [](const Replacement &R1, const Replacement &R2) -> bool { 270 return Range(R1.getOffset(), R1.getLength()) 271 .overlapsWith(Range(R2.getOffset(), R2.getLength())); 272 }; 273 // If the previous entry does not overlap, we know that entries before it 274 // can also not overlap. 275 if (!Overlap(R, *I)) { 276 // If `R` and `I` do not have the same offset, it is safe to add `R` since 277 // it must come after `I`. Otherwise: 278 // - If `R` is an insertion, `I` must not be an insertion since it would 279 // have come after `AtEnd`. 280 // - If `R` is not an insertion, `I` must be an insertion; otherwise, `R` 281 // and `I` would have overlapped. 282 // In either case, we can safely insert `R`. 283 Replaces.insert(R); 284 } else { 285 // `I` overlaps with `R`. We need to check `R` against all overlapping 286 // replacements to see if they are order-indepedent. If they are, merge `R` 287 // with them and replace them with the merged replacements. 288 auto MergeBegin = I; 289 auto MergeEnd = std::next(I); 290 while (I != Replaces.begin()) { 291 --I; 292 // If `I` doesn't overlap with `R`, don't merge it. 293 if (!Overlap(R, *I)) 294 break; 295 MergeBegin = I; 296 } 297 Replacements OverlapReplaces(MergeBegin, MergeEnd); 298 llvm::Expected<Replacements> Merged = 299 OverlapReplaces.mergeIfOrderIndependent(R); 300 if (!Merged) 301 return Merged.takeError(); 302 Replaces.erase(MergeBegin, MergeEnd); 303 Replaces.insert(Merged->begin(), Merged->end()); 304 } 305 return llvm::Error::success(); 306 } 307 308 namespace { 309 310 // Represents a merged replacement, i.e. a replacement consisting of multiple 311 // overlapping replacements from 'First' and 'Second' in mergeReplacements. 312 // 313 // Position projection: 314 // Offsets and lengths of the replacements can generally refer to two different 315 // coordinate spaces. Replacements from 'First' refer to the original text 316 // whereas replacements from 'Second' refer to the text after applying 'First'. 317 // 318 // MergedReplacement always operates in the coordinate space of the original 319 // text, i.e. transforms elements from 'Second' to take into account what was 320 // changed based on the elements from 'First'. 321 // 322 // We can correctly calculate this projection as we look at the replacements in 323 // order of strictly increasing offsets. 324 // 325 // Invariants: 326 // * We always merge elements from 'First' into elements from 'Second' and vice 327 // versa. Within each set, the replacements are non-overlapping. 328 // * We only extend to the right, i.e. merge elements with strictly increasing 329 // offsets. 330 class MergedReplacement { 331 public: 332 MergedReplacement(const Replacement &R, bool MergeSecond, int D) 333 : MergeSecond(MergeSecond), Delta(D), FilePath(R.getFilePath()), 334 Offset(R.getOffset() + (MergeSecond ? 0 : Delta)), Length(R.getLength()), 335 Text(R.getReplacementText()) { 336 Delta += MergeSecond ? 0 : Text.size() - Length; 337 DeltaFirst = MergeSecond ? Text.size() - Length : 0; 338 } 339 340 // Merges the next element 'R' into this merged element. As we always merge 341 // from 'First' into 'Second' or vice versa, the MergedReplacement knows what 342 // set the next element is coming from. 343 void merge(const Replacement &R) { 344 if (MergeSecond) { 345 unsigned REnd = R.getOffset() + Delta + R.getLength(); 346 unsigned End = Offset + Text.size(); 347 if (REnd > End) { 348 Length += REnd - End; 349 MergeSecond = false; 350 } 351 StringRef TextRef = Text; 352 StringRef Head = TextRef.substr(0, R.getOffset() + Delta - Offset); 353 StringRef Tail = TextRef.substr(REnd - Offset); 354 Text = (Head + R.getReplacementText() + Tail).str(); 355 Delta += R.getReplacementText().size() - R.getLength(); 356 } else { 357 unsigned End = Offset + Length; 358 StringRef RText = R.getReplacementText(); 359 StringRef Tail = RText.substr(End - R.getOffset()); 360 Text = (Text + Tail).str(); 361 if (R.getOffset() + RText.size() > End) { 362 Length = R.getOffset() + R.getLength() - Offset; 363 MergeSecond = true; 364 } else { 365 Length += R.getLength() - RText.size(); 366 } 367 DeltaFirst += RText.size() - R.getLength(); 368 } 369 } 370 371 // Returns 'true' if 'R' starts strictly after the MergedReplacement and thus 372 // doesn't need to be merged. 373 bool endsBefore(const Replacement &R) const { 374 if (MergeSecond) 375 return Offset + Text.size() < R.getOffset() + Delta; 376 return Offset + Length < R.getOffset(); 377 } 378 379 // Returns 'true' if an element from the second set should be merged next. 380 bool mergeSecond() const { return MergeSecond; } 381 int deltaFirst() const { return DeltaFirst; } 382 Replacement asReplacement() const { return {FilePath, Offset, Length, Text}; } 383 384 private: 385 bool MergeSecond; 386 387 // Amount of characters that elements from 'Second' need to be shifted by in 388 // order to refer to the original text. 389 int Delta; 390 391 // Sum of all deltas (text-length - length) of elements from 'First' merged 392 // into this element. This is used to update 'Delta' once the 393 // MergedReplacement is completed. 394 int DeltaFirst; 395 396 // Data of the actually merged replacement. FilePath and Offset aren't changed 397 // as the element is only extended to the right. 398 const StringRef FilePath; 399 const unsigned Offset; 400 unsigned Length; 401 std::string Text; 402 }; 403 404 } // namespace 405 406 Replacements Replacements::merge(const Replacements &ReplacesToMerge) const { 407 if (empty() || ReplacesToMerge.empty()) 408 return empty() ? ReplacesToMerge : *this; 409 410 auto &First = Replaces; 411 auto &Second = ReplacesToMerge.Replaces; 412 // Delta is the amount of characters that replacements from 'Second' need to 413 // be shifted so that their offsets refer to the original text. 414 int Delta = 0; 415 ReplacementsImpl Result; 416 417 // Iterate over both sets and always add the next element (smallest total 418 // Offset) from either 'First' or 'Second'. Merge that element with 419 // subsequent replacements as long as they overlap. See more details in the 420 // comment on MergedReplacement. 421 for (auto FirstI = First.begin(), SecondI = Second.begin(); 422 FirstI != First.end() || SecondI != Second.end();) { 423 bool NextIsFirst = SecondI == Second.end() || 424 (FirstI != First.end() && 425 FirstI->getOffset() < SecondI->getOffset() + Delta); 426 MergedReplacement Merged(NextIsFirst ? *FirstI : *SecondI, NextIsFirst, 427 Delta); 428 ++(NextIsFirst ? FirstI : SecondI); 429 430 while ((Merged.mergeSecond() && SecondI != Second.end()) || 431 (!Merged.mergeSecond() && FirstI != First.end())) { 432 auto &I = Merged.mergeSecond() ? SecondI : FirstI; 433 if (Merged.endsBefore(*I)) 434 break; 435 Merged.merge(*I); 436 ++I; 437 } 438 Delta -= Merged.deltaFirst(); 439 Result.insert(Merged.asReplacement()); 440 } 441 return Replacements(Result.begin(), Result.end()); 442 } 443 444 // Combines overlapping ranges in \p Ranges and sorts the combined ranges. 445 // Returns a set of non-overlapping and sorted ranges that is equivalent to 446 // \p Ranges. 447 static std::vector<Range> combineAndSortRanges(std::vector<Range> Ranges) { 448 std::sort(Ranges.begin(), Ranges.end(), 449 [](const Range &LHS, const Range &RHS) { 450 if (LHS.getOffset() != RHS.getOffset()) 451 return LHS.getOffset() < RHS.getOffset(); 452 return LHS.getLength() < RHS.getLength(); 453 }); 454 std::vector<Range> Result; 455 for (const auto &R : Ranges) { 456 if (Result.empty() || 457 Result.back().getOffset() + Result.back().getLength() < R.getOffset()) { 458 Result.push_back(R); 459 } else { 460 unsigned NewEnd = 461 std::max(Result.back().getOffset() + Result.back().getLength(), 462 R.getOffset() + R.getLength()); 463 Result[Result.size() - 1] = 464 Range(Result.back().getOffset(), NewEnd - Result.back().getOffset()); 465 } 466 } 467 return Result; 468 } 469 470 std::vector<Range> 471 calculateRangesAfterReplacements(const Replacements &Replaces, 472 const std::vector<Range> &Ranges) { 473 // To calculate the new ranges, 474 // - Turn \p Ranges into Replacements at (offset, length) with an empty 475 // (unimportant) replacement text of length "length". 476 // - Merge with \p Replaces. 477 // - The new ranges will be the affected ranges of the merged replacements. 478 auto MergedRanges = combineAndSortRanges(Ranges); 479 if (Replaces.empty()) 480 return MergedRanges; 481 tooling::Replacements FakeReplaces; 482 for (const auto &R : MergedRanges) { 483 auto Err = FakeReplaces.add(Replacement(Replaces.begin()->getFilePath(), 484 R.getOffset(), R.getLength(), 485 std::string(R.getLength(), ' '))); 486 assert(!Err && 487 "Replacements must not conflict since ranges have been merged."); 488 (void)Err; 489 } 490 return FakeReplaces.merge(Replaces).getAffectedRanges(); 491 } 492 493 std::vector<Range> Replacements::getAffectedRanges() const { 494 std::vector<Range> ChangedRanges; 495 int Shift = 0; 496 for (const Replacement &R : Replaces) { 497 unsigned Offset = R.getOffset() + Shift; 498 unsigned Length = R.getReplacementText().size(); 499 Shift += Length - R.getLength(); 500 ChangedRanges.push_back(Range(Offset, Length)); 501 } 502 return combineAndSortRanges(ChangedRanges); 503 } 504 505 unsigned Replacements::getShiftedCodePosition(unsigned Position) const { 506 unsigned Offset = 0; 507 for (const auto& R : Replaces) { 508 if (R.getOffset() + R.getLength() <= Position) { 509 Offset += R.getReplacementText().size() - R.getLength(); 510 continue; 511 } 512 if (R.getOffset() < Position && 513 R.getOffset() + R.getReplacementText().size() <= Position) { 514 Position = R.getOffset() + R.getReplacementText().size(); 515 if (R.getReplacementText().size() > 0) 516 Position--; 517 } 518 break; 519 } 520 return Position + Offset; 521 } 522 523 bool applyAllReplacements(const Replacements &Replaces, Rewriter &Rewrite) { 524 bool Result = true; 525 for (auto I = Replaces.rbegin(), E = Replaces.rend(); I != E; ++I) { 526 if (I->isApplicable()) { 527 Result = I->apply(Rewrite) && Result; 528 } else { 529 Result = false; 530 } 531 } 532 return Result; 533 } 534 535 llvm::Expected<std::string> applyAllReplacements(StringRef Code, 536 const Replacements &Replaces) { 537 if (Replaces.empty()) 538 return Code.str(); 539 540 IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem( 541 new vfs::InMemoryFileSystem); 542 FileManager Files(FileSystemOptions(), InMemoryFileSystem); 543 DiagnosticsEngine Diagnostics( 544 IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), 545 new DiagnosticOptions); 546 SourceManager SourceMgr(Diagnostics, Files); 547 Rewriter Rewrite(SourceMgr, LangOptions()); 548 InMemoryFileSystem->addFile( 549 "<stdin>", 0, llvm::MemoryBuffer::getMemBuffer(Code, "<stdin>")); 550 FileID ID = SourceMgr.createFileID(Files.getFile("<stdin>"), SourceLocation(), 551 clang::SrcMgr::C_User); 552 for (auto I = Replaces.rbegin(), E = Replaces.rend(); I != E; ++I) { 553 Replacement Replace("<stdin>", I->getOffset(), I->getLength(), 554 I->getReplacementText()); 555 if (!Replace.apply(Rewrite)) 556 return llvm::make_error<llvm::StringError>( 557 "Failed to apply replacement: " + Replace.toString(), 558 llvm::inconvertibleErrorCode()); 559 } 560 std::string Result; 561 llvm::raw_string_ostream OS(Result); 562 Rewrite.getEditBuffer(ID).write(OS); 563 OS.flush(); 564 return Result; 565 } 566 567 std::map<std::string, Replacements> 568 groupReplacementsByFile(const Replacements &Replaces) { 569 std::map<std::string, Replacements> FileToReplaces; 570 for (const auto &Replace : Replaces) 571 // We can ignore the Error here since \p Replaces is already conflict-free. 572 FileToReplaces[Replace.getFilePath()].add(Replace); 573 return FileToReplaces; 574 } 575 576 } // end namespace tooling 577 } // end namespace clang 578