1 //===--- Replacement.cpp - Framework for clang refactoring tools ----------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  Implements classes to support/store refactorings.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Tooling/Core/Replacement.h"
15 
16 #include "clang/Basic/Diagnostic.h"
17 #include "clang/Basic/DiagnosticIDs.h"
18 #include "clang/Basic/DiagnosticOptions.h"
19 #include "clang/Basic/FileManager.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Lex/Lexer.h"
22 #include "clang/Rewrite/Core/Rewriter.h"
23 #include "llvm/Support/FileSystem.h"
24 #include "llvm/Support/raw_os_ostream.h"
25 
26 namespace clang {
27 namespace tooling {
28 
29 static const char * const InvalidLocation = "";
30 
31 Replacement::Replacement()
32   : FilePath(InvalidLocation) {}
33 
34 Replacement::Replacement(StringRef FilePath, unsigned Offset, unsigned Length,
35                          StringRef ReplacementText)
36     : FilePath(FilePath), ReplacementRange(Offset, Length),
37       ReplacementText(ReplacementText) {}
38 
39 Replacement::Replacement(const SourceManager &Sources, SourceLocation Start,
40                          unsigned Length, StringRef ReplacementText) {
41   setFromSourceLocation(Sources, Start, Length, ReplacementText);
42 }
43 
44 Replacement::Replacement(const SourceManager &Sources,
45                          const CharSourceRange &Range,
46                          StringRef ReplacementText,
47                          const LangOptions &LangOpts) {
48   setFromSourceRange(Sources, Range, ReplacementText, LangOpts);
49 }
50 
51 bool Replacement::isApplicable() const {
52   return FilePath != InvalidLocation;
53 }
54 
55 bool Replacement::apply(Rewriter &Rewrite) const {
56   SourceManager &SM = Rewrite.getSourceMgr();
57   const FileEntry *Entry = SM.getFileManager().getFile(FilePath);
58   if (!Entry)
59     return false;
60 
61   FileID ID = SM.getOrCreateFileID(Entry, SrcMgr::C_User);
62   const SourceLocation Start =
63     SM.getLocForStartOfFile(ID).
64     getLocWithOffset(ReplacementRange.getOffset());
65   // ReplaceText returns false on success.
66   // ReplaceText only fails if the source location is not a file location, in
67   // which case we already returned false earlier.
68   bool RewriteSucceeded = !Rewrite.ReplaceText(
69       Start, ReplacementRange.getLength(), ReplacementText);
70   assert(RewriteSucceeded);
71   return RewriteSucceeded;
72 }
73 
74 std::string Replacement::toString() const {
75   std::string Result;
76   llvm::raw_string_ostream Stream(Result);
77   Stream << FilePath << ": " << ReplacementRange.getOffset() << ":+"
78          << ReplacementRange.getLength() << ":\"" << ReplacementText << "\"";
79   return Stream.str();
80 }
81 
82 bool operator<(const Replacement &LHS, const Replacement &RHS) {
83   if (LHS.getOffset() != RHS.getOffset())
84     return LHS.getOffset() < RHS.getOffset();
85 
86   // Apply longer replacements first, specifically so that deletions are
87   // executed before insertions. It is (hopefully) never the intention to
88   // delete parts of newly inserted code.
89   if (LHS.getLength() != RHS.getLength())
90     return LHS.getLength() > RHS.getLength();
91 
92   if (LHS.getFilePath() != RHS.getFilePath())
93     return LHS.getFilePath() < RHS.getFilePath();
94   return LHS.getReplacementText() < RHS.getReplacementText();
95 }
96 
97 bool operator==(const Replacement &LHS, const Replacement &RHS) {
98   return LHS.getOffset() == RHS.getOffset() &&
99          LHS.getLength() == RHS.getLength() &&
100          LHS.getFilePath() == RHS.getFilePath() &&
101          LHS.getReplacementText() == RHS.getReplacementText();
102 }
103 
104 void Replacement::setFromSourceLocation(const SourceManager &Sources,
105                                         SourceLocation Start, unsigned Length,
106                                         StringRef ReplacementText) {
107   const std::pair<FileID, unsigned> DecomposedLocation =
108       Sources.getDecomposedLoc(Start);
109   const FileEntry *Entry = Sources.getFileEntryForID(DecomposedLocation.first);
110   this->FilePath = Entry ? Entry->getName() : InvalidLocation;
111   this->ReplacementRange = Range(DecomposedLocation.second, Length);
112   this->ReplacementText = ReplacementText;
113 }
114 
115 // FIXME: This should go into the Lexer, but we need to figure out how
116 // to handle ranges for refactoring in general first - there is no obvious
117 // good way how to integrate this into the Lexer yet.
118 static int getRangeSize(const SourceManager &Sources,
119                         const CharSourceRange &Range,
120                         const LangOptions &LangOpts) {
121   SourceLocation SpellingBegin = Sources.getSpellingLoc(Range.getBegin());
122   SourceLocation SpellingEnd = Sources.getSpellingLoc(Range.getEnd());
123   std::pair<FileID, unsigned> Start = Sources.getDecomposedLoc(SpellingBegin);
124   std::pair<FileID, unsigned> End = Sources.getDecomposedLoc(SpellingEnd);
125   if (Start.first != End.first) return -1;
126   if (Range.isTokenRange())
127     End.second += Lexer::MeasureTokenLength(SpellingEnd, Sources, LangOpts);
128   return End.second - Start.second;
129 }
130 
131 void Replacement::setFromSourceRange(const SourceManager &Sources,
132                                      const CharSourceRange &Range,
133                                      StringRef ReplacementText,
134                                      const LangOptions &LangOpts) {
135   setFromSourceLocation(Sources, Sources.getSpellingLoc(Range.getBegin()),
136                         getRangeSize(Sources, Range, LangOpts),
137                         ReplacementText);
138 }
139 
140 llvm::Error Replacements::add(const Replacement &R) {
141   // Check the file path.
142   if (!Replaces.empty() && R.getFilePath() != Replaces.begin()->getFilePath())
143     return llvm::make_error<llvm::StringError>(
144         "All replacements must have the same file path. New replacement: " +
145             R.getFilePath() + ", existing replacements: " +
146             Replaces.begin()->getFilePath() + "\n",
147         llvm::inconvertibleErrorCode());
148 
149   // Special-case header insertions.
150   if (R.getOffset() == UINT_MAX) {
151     Replaces.insert(R);
152     return llvm::Error::success();
153   }
154 
155   // This replacement cannot conflict with replacements that end before
156   // this replacement starts or start after this replacement ends.
157   // We also know that there currently are no overlapping replacements.
158   // Thus, we know that all replacements that start after the end of the current
159   // replacement cannot overlap.
160   Replacement AtEnd(R.getFilePath(), R.getOffset() + R.getLength(), 0, "");
161 
162   // Find the first entry that starts after or at the end of R. Note that
163   // entries that start at the end can still be conflicting if R is an
164   // insertion.
165   auto I = Replaces.lower_bound(AtEnd);
166   // If it starts at the same offset as R (can only happen if R is an
167   // insertion), we have a conflict.  In that case, increase I to fall through
168   // to the conflict check.
169   if (I != Replaces.end() && R.getOffset() == I->getOffset())
170     ++I;
171 
172   // I is the smallest iterator whose entry cannot overlap.
173   // If that is begin(), there are no overlaps.
174   if (I == Replaces.begin()) {
175     Replaces.insert(R);
176     return llvm::Error::success();
177   }
178   --I;
179   // If the previous entry does not overlap, we know that entries before it
180   // can also not overlap.
181   if (R.getOffset() != I->getOffset() &&
182       !Range(R.getOffset(), R.getLength())
183            .overlapsWith(Range(I->getOffset(), I->getLength()))) {
184     Replaces.insert(R);
185     return llvm::Error::success();
186   }
187   return llvm::make_error<llvm::StringError>(
188       "New replacement:\n" + R.toString() +
189           "\nconflicts with existing replacement:\n" + I->toString(),
190       llvm::inconvertibleErrorCode());
191 }
192 
193 namespace {
194 
195 // Represents a merged replacement, i.e. a replacement consisting of multiple
196 // overlapping replacements from 'First' and 'Second' in mergeReplacements.
197 //
198 // Position projection:
199 // Offsets and lengths of the replacements can generally refer to two different
200 // coordinate spaces. Replacements from 'First' refer to the original text
201 // whereas replacements from 'Second' refer to the text after applying 'First'.
202 //
203 // MergedReplacement always operates in the coordinate space of the original
204 // text, i.e. transforms elements from 'Second' to take into account what was
205 // changed based on the elements from 'First'.
206 //
207 // We can correctly calculate this projection as we look at the replacements in
208 // order of strictly increasing offsets.
209 //
210 // Invariants:
211 // * We always merge elements from 'First' into elements from 'Second' and vice
212 //   versa. Within each set, the replacements are non-overlapping.
213 // * We only extend to the right, i.e. merge elements with strictly increasing
214 //   offsets.
215 class MergedReplacement {
216 public:
217   MergedReplacement(const Replacement &R, bool MergeSecond, int D)
218       : MergeSecond(MergeSecond), Delta(D), FilePath(R.getFilePath()),
219         Offset(R.getOffset() + (MergeSecond ? 0 : Delta)), Length(R.getLength()),
220         Text(R.getReplacementText()) {
221     Delta += MergeSecond ? 0 : Text.size() - Length;
222     DeltaFirst = MergeSecond ? Text.size() - Length : 0;
223   }
224 
225   // Merges the next element 'R' into this merged element. As we always merge
226   // from 'First' into 'Second' or vice versa, the MergedReplacement knows what
227   // set the next element is coming from.
228   void merge(const Replacement &R) {
229     if (MergeSecond) {
230       unsigned REnd = R.getOffset() + Delta + R.getLength();
231       unsigned End = Offset + Text.size();
232       if (REnd > End) {
233         Length += REnd - End;
234         MergeSecond = false;
235       }
236       StringRef TextRef = Text;
237       StringRef Head = TextRef.substr(0, R.getOffset() + Delta - Offset);
238       StringRef Tail = TextRef.substr(REnd - Offset);
239       Text = (Head + R.getReplacementText() + Tail).str();
240       Delta += R.getReplacementText().size() - R.getLength();
241     } else {
242       unsigned End = Offset + Length;
243       StringRef RText = R.getReplacementText();
244       StringRef Tail = RText.substr(End - R.getOffset());
245       Text = (Text + Tail).str();
246       if (R.getOffset() + RText.size() > End) {
247         Length = R.getOffset() + R.getLength() - Offset;
248         MergeSecond = true;
249       } else {
250         Length += R.getLength() - RText.size();
251       }
252       DeltaFirst += RText.size() - R.getLength();
253     }
254   }
255 
256   // Returns 'true' if 'R' starts strictly after the MergedReplacement and thus
257   // doesn't need to be merged.
258   bool endsBefore(const Replacement &R) const {
259     if (MergeSecond)
260       return Offset + Text.size() < R.getOffset() + Delta;
261     return Offset + Length < R.getOffset();
262   }
263 
264   // Returns 'true' if an element from the second set should be merged next.
265   bool mergeSecond() const { return MergeSecond; }
266   int deltaFirst() const { return DeltaFirst; }
267   Replacement asReplacement() const { return {FilePath, Offset, Length, Text}; }
268 
269 private:
270   bool MergeSecond;
271 
272   // Amount of characters that elements from 'Second' need to be shifted by in
273   // order to refer to the original text.
274   int Delta;
275 
276   // Sum of all deltas (text-length - length) of elements from 'First' merged
277   // into this element. This is used to update 'Delta' once the
278   // MergedReplacement is completed.
279   int DeltaFirst;
280 
281   // Data of the actually merged replacement. FilePath and Offset aren't changed
282   // as the element is only extended to the right.
283   const StringRef FilePath;
284   const unsigned Offset;
285   unsigned Length;
286   std::string Text;
287 };
288 
289 } // namespace
290 
291 Replacements Replacements::merge(const Replacements &ReplacesToMerge) const {
292   if (empty() || ReplacesToMerge.empty())
293     return empty() ? ReplacesToMerge : *this;
294 
295   auto &First = Replaces;
296   auto &Second = ReplacesToMerge.Replaces;
297   // Delta is the amount of characters that replacements from 'Second' need to
298   // be shifted so that their offsets refer to the original text.
299   int Delta = 0;
300   ReplacementsImpl Result;
301 
302   // Iterate over both sets and always add the next element (smallest total
303   // Offset) from either 'First' or 'Second'. Merge that element with
304   // subsequent replacements as long as they overlap. See more details in the
305   // comment on MergedReplacement.
306   for (auto FirstI = First.begin(), SecondI = Second.begin();
307        FirstI != First.end() || SecondI != Second.end();) {
308     bool NextIsFirst = SecondI == Second.end() ||
309                        (FirstI != First.end() &&
310                         FirstI->getOffset() < SecondI->getOffset() + Delta);
311     MergedReplacement Merged(NextIsFirst ? *FirstI : *SecondI, NextIsFirst,
312                              Delta);
313     ++(NextIsFirst ? FirstI : SecondI);
314 
315     while ((Merged.mergeSecond() && SecondI != Second.end()) ||
316            (!Merged.mergeSecond() && FirstI != First.end())) {
317       auto &I = Merged.mergeSecond() ? SecondI : FirstI;
318       if (Merged.endsBefore(*I))
319         break;
320       Merged.merge(*I);
321       ++I;
322     }
323     Delta -= Merged.deltaFirst();
324     Result.insert(Merged.asReplacement());
325   }
326   return Replacements(Result.begin(), Result.end());
327 }
328 
329 // Combines overlapping ranges in \p Ranges and sorts the combined ranges.
330 // Returns a set of non-overlapping and sorted ranges that is equivalent to
331 // \p Ranges.
332 static std::vector<Range> combineAndSortRanges(std::vector<Range> Ranges) {
333   std::sort(Ranges.begin(), Ranges.end(),
334             [](const Range &LHS, const Range &RHS) {
335               if (LHS.getOffset() != RHS.getOffset())
336                 return LHS.getOffset() < RHS.getOffset();
337               return LHS.getLength() < RHS.getLength();
338             });
339   std::vector<Range> Result;
340   for (const auto &R : Ranges) {
341     if (Result.empty() ||
342         Result.back().getOffset() + Result.back().getLength() < R.getOffset()) {
343       Result.push_back(R);
344     } else {
345       unsigned NewEnd =
346           std::max(Result.back().getOffset() + Result.back().getLength(),
347                    R.getOffset() + R.getLength());
348       Result[Result.size() - 1] =
349           Range(Result.back().getOffset(), NewEnd - Result.back().getOffset());
350     }
351   }
352   return Result;
353 }
354 
355 std::vector<Range>
356 calculateRangesAfterReplacements(const Replacements &Replaces,
357                                  const std::vector<Range> &Ranges) {
358   // To calculate the new ranges,
359   //   - Turn \p Ranges into Replacements at (offset, length) with an empty
360   //     (unimportant) replacement text of length "length".
361   //   - Merge with \p Replaces.
362   //   - The new ranges will be the affected ranges of the merged replacements.
363   auto MergedRanges = combineAndSortRanges(Ranges);
364   if (Replaces.empty())
365     return MergedRanges;
366   tooling::Replacements FakeReplaces;
367   for (const auto &R : MergedRanges) {
368     auto Err = FakeReplaces.add(Replacement(Replaces.begin()->getFilePath(),
369                                             R.getOffset(), R.getLength(),
370                                             std::string(R.getLength(), ' ')));
371     assert(!Err &&
372            "Replacements must not conflict since ranges have been merged.");
373     (void)Err;
374   }
375   return FakeReplaces.merge(Replaces).getAffectedRanges();
376 }
377 
378 std::vector<Range> Replacements::getAffectedRanges() const {
379   std::vector<Range> ChangedRanges;
380   int Shift = 0;
381   for (const Replacement &R : Replaces) {
382     unsigned Offset = R.getOffset() + Shift;
383     unsigned Length = R.getReplacementText().size();
384     Shift += Length - R.getLength();
385     ChangedRanges.push_back(Range(Offset, Length));
386   }
387   return combineAndSortRanges(ChangedRanges);
388 }
389 
390 unsigned Replacements::getShiftedCodePosition(unsigned Position) const {
391   unsigned Offset = 0;
392   for (const auto& R : Replaces) {
393     if (R.getOffset() + R.getLength() <= Position) {
394       Offset += R.getReplacementText().size() - R.getLength();
395       continue;
396     }
397     if (R.getOffset() < Position &&
398         R.getOffset() + R.getReplacementText().size() <= Position) {
399       Position = R.getOffset() + R.getReplacementText().size();
400       if (R.getReplacementText().size() > 0)
401         Position--;
402     }
403     break;
404   }
405   return Position + Offset;
406 }
407 
408 bool applyAllReplacements(const Replacements &Replaces, Rewriter &Rewrite) {
409   bool Result = true;
410   for (Replacements::const_iterator I = Replaces.begin(),
411                                     E = Replaces.end();
412        I != E; ++I) {
413     if (I->isApplicable()) {
414       Result = I->apply(Rewrite) && Result;
415     } else {
416       Result = false;
417     }
418   }
419   return Result;
420 }
421 
422 llvm::Expected<std::string> applyAllReplacements(StringRef Code,
423                                                 const Replacements &Replaces) {
424   if (Replaces.empty())
425     return Code.str();
426 
427   IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
428       new vfs::InMemoryFileSystem);
429   FileManager Files(FileSystemOptions(), InMemoryFileSystem);
430   DiagnosticsEngine Diagnostics(
431       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
432       new DiagnosticOptions);
433   SourceManager SourceMgr(Diagnostics, Files);
434   Rewriter Rewrite(SourceMgr, LangOptions());
435   InMemoryFileSystem->addFile(
436       "<stdin>", 0, llvm::MemoryBuffer::getMemBuffer(Code, "<stdin>"));
437   FileID ID = SourceMgr.createFileID(Files.getFile("<stdin>"), SourceLocation(),
438                                      clang::SrcMgr::C_User);
439   for (Replacements::const_iterator I = Replaces.begin(), E = Replaces.end();
440        I != E; ++I) {
441     Replacement Replace("<stdin>", I->getOffset(), I->getLength(),
442                         I->getReplacementText());
443     if (!Replace.apply(Rewrite))
444       return llvm::make_error<llvm::StringError>(
445           "Failed to apply replacement: " + Replace.toString(),
446           llvm::inconvertibleErrorCode());
447   }
448   std::string Result;
449   llvm::raw_string_ostream OS(Result);
450   Rewrite.getEditBuffer(ID).write(OS);
451   OS.flush();
452   return Result;
453 }
454 
455 std::map<std::string, Replacements>
456 groupReplacementsByFile(const Replacements &Replaces) {
457   std::map<std::string, Replacements> FileToReplaces;
458   for (const auto &Replace : Replaces)
459     // We can ignore the Error here since \p Replaces is already conflict-free.
460     FileToReplaces[Replace.getFilePath()].add(Replace);
461   return FileToReplaces;
462 }
463 
464 } // end namespace tooling
465 } // end namespace clang
466