1 //===-- DifferenceEngine.cpp - Structural function/module comparison ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This header defines the implementation of the LLVM difference
10 // engine, which structurally compares global values within a module.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "DifferenceEngine.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringSet.h"
20 #include "llvm/IR/CFG.h"
21 #include "llvm/IR/Constants.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/Instructions.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include "llvm/Support/type_traits.h"
28 #include <utility>
29
30 using namespace llvm;
31
32 namespace {
33
34 /// A priority queue, implemented as a heap.
35 template <class T, class Sorter, unsigned InlineCapacity>
36 class PriorityQueue {
37 Sorter Precedes;
38 llvm::SmallVector<T, InlineCapacity> Storage;
39
40 public:
PriorityQueue(const Sorter & Precedes)41 PriorityQueue(const Sorter &Precedes) : Precedes(Precedes) {}
42
43 /// Checks whether the heap is empty.
empty() const44 bool empty() const { return Storage.empty(); }
45
46 /// Insert a new value on the heap.
insert(const T & V)47 void insert(const T &V) {
48 unsigned Index = Storage.size();
49 Storage.push_back(V);
50 if (Index == 0) return;
51
52 T *data = Storage.data();
53 while (true) {
54 unsigned Target = (Index + 1) / 2 - 1;
55 if (!Precedes(data[Index], data[Target])) return;
56 std::swap(data[Index], data[Target]);
57 if (Target == 0) return;
58 Index = Target;
59 }
60 }
61
62 /// Remove the minimum value in the heap. Only valid on a non-empty heap.
remove_min()63 T remove_min() {
64 assert(!empty());
65 T tmp = Storage[0];
66
67 unsigned NewSize = Storage.size() - 1;
68 if (NewSize) {
69 // Move the slot at the end to the beginning.
70 if (std::is_trivially_copyable<T>::value)
71 Storage[0] = Storage[NewSize];
72 else
73 std::swap(Storage[0], Storage[NewSize]);
74
75 // Bubble the root up as necessary.
76 unsigned Index = 0;
77 while (true) {
78 // With a 1-based index, the children would be Index*2 and Index*2+1.
79 unsigned R = (Index + 1) * 2;
80 unsigned L = R - 1;
81
82 // If R is out of bounds, we're done after this in any case.
83 if (R >= NewSize) {
84 // If L is also out of bounds, we're done immediately.
85 if (L >= NewSize) break;
86
87 // Otherwise, test whether we should swap L and Index.
88 if (Precedes(Storage[L], Storage[Index]))
89 std::swap(Storage[L], Storage[Index]);
90 break;
91 }
92
93 // Otherwise, we need to compare with the smaller of L and R.
94 // Prefer R because it's closer to the end of the array.
95 unsigned IndexToTest = (Precedes(Storage[L], Storage[R]) ? L : R);
96
97 // If Index is >= the min of L and R, then heap ordering is restored.
98 if (!Precedes(Storage[IndexToTest], Storage[Index]))
99 break;
100
101 // Otherwise, keep bubbling up.
102 std::swap(Storage[IndexToTest], Storage[Index]);
103 Index = IndexToTest;
104 }
105 }
106 Storage.pop_back();
107
108 return tmp;
109 }
110 };
111
112 /// A function-scope difference engine.
113 class FunctionDifferenceEngine {
114 DifferenceEngine &Engine;
115
116 // Some initializers may reference the variable we're currently checking. This
117 // can cause an infinite loop. The Saved[LR]HS ivars can be checked to prevent
118 // recursing.
119 const Value *SavedLHS;
120 const Value *SavedRHS;
121
122 /// The current mapping from old local values to new local values.
123 DenseMap<const Value *, const Value *> Values;
124
125 /// The current mapping from old blocks to new blocks.
126 DenseMap<const BasicBlock *, const BasicBlock *> Blocks;
127
128 DenseSet<std::pair<const Value *, const Value *>> TentativeValues;
129
getUnprocPredCount(const BasicBlock * Block) const130 unsigned getUnprocPredCount(const BasicBlock *Block) const {
131 unsigned Count = 0;
132 for (const_pred_iterator I = pred_begin(Block), E = pred_end(Block); I != E;
133 ++I)
134 if (!Blocks.count(*I)) Count++;
135 return Count;
136 }
137
138 typedef std::pair<const BasicBlock *, const BasicBlock *> BlockPair;
139
140 /// A type which sorts a priority queue by the number of unprocessed
141 /// predecessor blocks it has remaining.
142 ///
143 /// This is actually really expensive to calculate.
144 struct QueueSorter {
145 const FunctionDifferenceEngine &fde;
QueueSorter__anon246d63510111::FunctionDifferenceEngine::QueueSorter146 explicit QueueSorter(const FunctionDifferenceEngine &fde) : fde(fde) {}
147
operator ()__anon246d63510111::FunctionDifferenceEngine::QueueSorter148 bool operator()(BlockPair &Old, BlockPair &New) {
149 return fde.getUnprocPredCount(Old.first)
150 < fde.getUnprocPredCount(New.first);
151 }
152 };
153
154 /// A queue of unified blocks to process.
155 PriorityQueue<BlockPair, QueueSorter, 20> Queue;
156
157 /// Try to unify the given two blocks. Enqueues them for processing
158 /// if they haven't already been processed.
159 ///
160 /// Returns true if there was a problem unifying them.
tryUnify(const BasicBlock * L,const BasicBlock * R)161 bool tryUnify(const BasicBlock *L, const BasicBlock *R) {
162 const BasicBlock *&Ref = Blocks[L];
163
164 if (Ref) {
165 if (Ref == R) return false;
166
167 Engine.logf("successor %l cannot be equivalent to %r; "
168 "it's already equivalent to %r")
169 << L << R << Ref;
170 return true;
171 }
172
173 Ref = R;
174 Queue.insert(BlockPair(L, R));
175 return false;
176 }
177
178 /// Unifies two instructions, given that they're known not to have
179 /// structural differences.
unify(const Instruction * L,const Instruction * R)180 void unify(const Instruction *L, const Instruction *R) {
181 DifferenceEngine::Context C(Engine, L, R);
182
183 bool Result = diff(L, R, true, true);
184 assert(!Result && "structural differences second time around?");
185 (void) Result;
186 if (!L->use_empty())
187 Values[L] = R;
188 }
189
processQueue()190 void processQueue() {
191 while (!Queue.empty()) {
192 BlockPair Pair = Queue.remove_min();
193 diff(Pair.first, Pair.second);
194 }
195 }
196
diff(const BasicBlock * L,const BasicBlock * R)197 void diff(const BasicBlock *L, const BasicBlock *R) {
198 DifferenceEngine::Context C(Engine, L, R);
199
200 BasicBlock::const_iterator LI = L->begin(), LE = L->end();
201 BasicBlock::const_iterator RI = R->begin();
202
203 do {
204 assert(LI != LE && RI != R->end());
205 const Instruction *LeftI = &*LI, *RightI = &*RI;
206
207 // If the instructions differ, start the more sophisticated diff
208 // algorithm at the start of the block.
209 if (diff(LeftI, RightI, false, false)) {
210 TentativeValues.clear();
211 return runBlockDiff(L->begin(), R->begin());
212 }
213
214 // Otherwise, tentatively unify them.
215 if (!LeftI->use_empty())
216 TentativeValues.insert(std::make_pair(LeftI, RightI));
217
218 ++LI;
219 ++RI;
220 } while (LI != LE); // This is sufficient: we can't get equality of
221 // terminators if there are residual instructions.
222
223 // Unify everything in the block, non-tentatively this time.
224 TentativeValues.clear();
225 for (LI = L->begin(), RI = R->begin(); LI != LE; ++LI, ++RI)
226 unify(&*LI, &*RI);
227 }
228
229 bool matchForBlockDiff(const Instruction *L, const Instruction *R);
230 void runBlockDiff(BasicBlock::const_iterator LI,
231 BasicBlock::const_iterator RI);
232
diffCallSites(const CallBase & L,const CallBase & R,bool Complain)233 bool diffCallSites(const CallBase &L, const CallBase &R, bool Complain) {
234 // FIXME: call attributes
235 if (!equivalentAsOperands(L.getCalledOperand(), R.getCalledOperand())) {
236 if (Complain) Engine.log("called functions differ");
237 return true;
238 }
239 if (L.arg_size() != R.arg_size()) {
240 if (Complain) Engine.log("argument counts differ");
241 return true;
242 }
243 for (unsigned I = 0, E = L.arg_size(); I != E; ++I)
244 if (!equivalentAsOperands(L.getArgOperand(I), R.getArgOperand(I))) {
245 if (Complain)
246 Engine.logf("arguments %l and %r differ")
247 << L.getArgOperand(I) << R.getArgOperand(I);
248 return true;
249 }
250 return false;
251 }
252
diff(const Instruction * L,const Instruction * R,bool Complain,bool TryUnify)253 bool diff(const Instruction *L, const Instruction *R, bool Complain,
254 bool TryUnify) {
255 // FIXME: metadata (if Complain is set)
256
257 // Different opcodes always imply different operations.
258 if (L->getOpcode() != R->getOpcode()) {
259 if (Complain) Engine.log("different instruction types");
260 return true;
261 }
262
263 if (isa<CmpInst>(L)) {
264 if (cast<CmpInst>(L)->getPredicate()
265 != cast<CmpInst>(R)->getPredicate()) {
266 if (Complain) Engine.log("different predicates");
267 return true;
268 }
269 } else if (isa<CallInst>(L)) {
270 return diffCallSites(cast<CallInst>(*L), cast<CallInst>(*R), Complain);
271 } else if (isa<PHINode>(L)) {
272 const PHINode &LI = cast<PHINode>(*L);
273 const PHINode &RI = cast<PHINode>(*R);
274
275 // This is really weird; type uniquing is broken?
276 if (LI.getType() != RI.getType()) {
277 if (!LI.getType()->isPointerTy() || !RI.getType()->isPointerTy()) {
278 if (Complain) Engine.log("different phi types");
279 return true;
280 }
281 }
282
283 if (LI.getNumIncomingValues() != RI.getNumIncomingValues()) {
284 if (Complain)
285 Engine.log("PHI node # of incoming values differ");
286 return true;
287 }
288
289 for (unsigned I = 0; I < LI.getNumIncomingValues(); ++I) {
290 if (TryUnify)
291 tryUnify(LI.getIncomingBlock(I), RI.getIncomingBlock(I));
292
293 if (!equivalentAsOperands(LI.getIncomingValue(I),
294 RI.getIncomingValue(I))) {
295 if (Complain)
296 Engine.log("PHI node incoming values differ");
297 return true;
298 }
299 }
300
301 return false;
302
303 // Terminators.
304 } else if (isa<InvokeInst>(L)) {
305 const InvokeInst &LI = cast<InvokeInst>(*L);
306 const InvokeInst &RI = cast<InvokeInst>(*R);
307 if (diffCallSites(LI, RI, Complain))
308 return true;
309
310 if (TryUnify) {
311 tryUnify(LI.getNormalDest(), RI.getNormalDest());
312 tryUnify(LI.getUnwindDest(), RI.getUnwindDest());
313 }
314 return false;
315
316 } else if (isa<CallBrInst>(L)) {
317 const CallBrInst &LI = cast<CallBrInst>(*L);
318 const CallBrInst &RI = cast<CallBrInst>(*R);
319 if (LI.getNumIndirectDests() != RI.getNumIndirectDests()) {
320 if (Complain)
321 Engine.log("callbr # of indirect destinations differ");
322 return true;
323 }
324
325 // Perform the "try unify" step so that we can equate the indirect
326 // destinations before checking the call site.
327 for (unsigned I = 0; I < LI.getNumIndirectDests(); I++)
328 tryUnify(LI.getIndirectDest(I), RI.getIndirectDest(I));
329
330 if (diffCallSites(LI, RI, Complain))
331 return true;
332
333 if (TryUnify)
334 tryUnify(LI.getDefaultDest(), RI.getDefaultDest());
335 return false;
336
337 } else if (isa<BranchInst>(L)) {
338 const BranchInst *LI = cast<BranchInst>(L);
339 const BranchInst *RI = cast<BranchInst>(R);
340 if (LI->isConditional() != RI->isConditional()) {
341 if (Complain) Engine.log("branch conditionality differs");
342 return true;
343 }
344
345 if (LI->isConditional()) {
346 if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) {
347 if (Complain) Engine.log("branch conditions differ");
348 return true;
349 }
350 if (TryUnify) tryUnify(LI->getSuccessor(1), RI->getSuccessor(1));
351 }
352 if (TryUnify) tryUnify(LI->getSuccessor(0), RI->getSuccessor(0));
353 return false;
354
355 } else if (isa<IndirectBrInst>(L)) {
356 const IndirectBrInst *LI = cast<IndirectBrInst>(L);
357 const IndirectBrInst *RI = cast<IndirectBrInst>(R);
358 if (LI->getNumDestinations() != RI->getNumDestinations()) {
359 if (Complain) Engine.log("indirectbr # of destinations differ");
360 return true;
361 }
362
363 if (!equivalentAsOperands(LI->getAddress(), RI->getAddress())) {
364 if (Complain) Engine.log("indirectbr addresses differ");
365 return true;
366 }
367
368 if (TryUnify) {
369 for (unsigned i = 0; i < LI->getNumDestinations(); i++) {
370 tryUnify(LI->getDestination(i), RI->getDestination(i));
371 }
372 }
373 return false;
374
375 } else if (isa<SwitchInst>(L)) {
376 const SwitchInst *LI = cast<SwitchInst>(L);
377 const SwitchInst *RI = cast<SwitchInst>(R);
378 if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) {
379 if (Complain) Engine.log("switch conditions differ");
380 return true;
381 }
382 if (TryUnify) tryUnify(LI->getDefaultDest(), RI->getDefaultDest());
383
384 bool Difference = false;
385
386 DenseMap<const ConstantInt *, const BasicBlock *> LCases;
387 for (auto Case : LI->cases())
388 LCases[Case.getCaseValue()] = Case.getCaseSuccessor();
389
390 for (auto Case : RI->cases()) {
391 const ConstantInt *CaseValue = Case.getCaseValue();
392 const BasicBlock *LCase = LCases[CaseValue];
393 if (LCase) {
394 if (TryUnify)
395 tryUnify(LCase, Case.getCaseSuccessor());
396 LCases.erase(CaseValue);
397 } else if (Complain || !Difference) {
398 if (Complain)
399 Engine.logf("right switch has extra case %r") << CaseValue;
400 Difference = true;
401 }
402 }
403 if (!Difference)
404 for (DenseMap<const ConstantInt *, const BasicBlock *>::iterator
405 I = LCases.begin(),
406 E = LCases.end();
407 I != E; ++I) {
408 if (Complain)
409 Engine.logf("left switch has extra case %l") << I->first;
410 Difference = true;
411 }
412 return Difference;
413 } else if (isa<UnreachableInst>(L)) {
414 return false;
415 }
416
417 if (L->getNumOperands() != R->getNumOperands()) {
418 if (Complain) Engine.log("instructions have different operand counts");
419 return true;
420 }
421
422 for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) {
423 Value *LO = L->getOperand(I), *RO = R->getOperand(I);
424 if (!equivalentAsOperands(LO, RO)) {
425 if (Complain) Engine.logf("operands %l and %r differ") << LO << RO;
426 return true;
427 }
428 }
429
430 return false;
431 }
432
433 public:
equivalentAsOperands(const Constant * L,const Constant * R)434 bool equivalentAsOperands(const Constant *L, const Constant *R) {
435 // Use equality as a preliminary filter.
436 if (L == R)
437 return true;
438
439 if (L->getValueID() != R->getValueID())
440 return false;
441
442 // Ask the engine about global values.
443 if (isa<GlobalValue>(L))
444 return Engine.equivalentAsOperands(cast<GlobalValue>(L),
445 cast<GlobalValue>(R));
446
447 // Compare constant expressions structurally.
448 if (isa<ConstantExpr>(L))
449 return equivalentAsOperands(cast<ConstantExpr>(L),
450 cast<ConstantExpr>(R));
451
452 // Constants of the "same type" don't always actually have the same
453 // type; I don't know why. Just white-list them.
454 if (isa<ConstantPointerNull>(L) || isa<UndefValue>(L) || isa<ConstantAggregateZero>(L))
455 return true;
456
457 // Block addresses only match if we've already encountered the
458 // block. FIXME: tentative matches?
459 if (isa<BlockAddress>(L))
460 return Blocks[cast<BlockAddress>(L)->getBasicBlock()]
461 == cast<BlockAddress>(R)->getBasicBlock();
462
463 // If L and R are ConstantVectors, compare each element
464 if (isa<ConstantVector>(L)) {
465 const ConstantVector *CVL = cast<ConstantVector>(L);
466 const ConstantVector *CVR = cast<ConstantVector>(R);
467 if (CVL->getType()->getNumElements() != CVR->getType()->getNumElements())
468 return false;
469 for (unsigned i = 0; i < CVL->getType()->getNumElements(); i++) {
470 if (!equivalentAsOperands(CVL->getOperand(i), CVR->getOperand(i)))
471 return false;
472 }
473 return true;
474 }
475
476 // If L and R are ConstantArrays, compare the element count and types.
477 if (isa<ConstantArray>(L)) {
478 const ConstantArray *CAL = cast<ConstantArray>(L);
479 const ConstantArray *CAR = cast<ConstantArray>(R);
480 // Sometimes a type may be equivalent, but not uniquified---e.g. it may
481 // contain a GEP instruction. Do a deeper comparison of the types.
482 if (CAL->getType()->getNumElements() != CAR->getType()->getNumElements())
483 return false;
484
485 for (unsigned I = 0; I < CAL->getType()->getNumElements(); ++I) {
486 if (!equivalentAsOperands(CAL->getAggregateElement(I),
487 CAR->getAggregateElement(I)))
488 return false;
489 }
490
491 return true;
492 }
493
494 // If L and R are ConstantStructs, compare each field and type.
495 if (isa<ConstantStruct>(L)) {
496 const ConstantStruct *CSL = cast<ConstantStruct>(L);
497 const ConstantStruct *CSR = cast<ConstantStruct>(R);
498
499 const StructType *LTy = cast<StructType>(CSL->getType());
500 const StructType *RTy = cast<StructType>(CSR->getType());
501
502 // The StructTypes should have the same attributes. Don't use
503 // isLayoutIdentical(), because that just checks the element pointers,
504 // which may not work here.
505 if (LTy->getNumElements() != RTy->getNumElements() ||
506 LTy->isPacked() != RTy->isPacked())
507 return false;
508
509 for (unsigned I = 0; I < LTy->getNumElements(); I++) {
510 const Value *LAgg = CSL->getAggregateElement(I);
511 const Value *RAgg = CSR->getAggregateElement(I);
512
513 if (LAgg == SavedLHS || RAgg == SavedRHS) {
514 if (LAgg != SavedLHS || RAgg != SavedRHS)
515 // If the left and right operands aren't both re-analyzing the
516 // variable, then the initialiers don't match, so report "false".
517 // Otherwise, we skip these operands..
518 return false;
519
520 continue;
521 }
522
523 if (!equivalentAsOperands(LAgg, RAgg)) {
524 return false;
525 }
526 }
527
528 return true;
529 }
530
531 return false;
532 }
533
equivalentAsOperands(const ConstantExpr * L,const ConstantExpr * R)534 bool equivalentAsOperands(const ConstantExpr *L, const ConstantExpr *R) {
535 if (L == R)
536 return true;
537
538 if (L->getOpcode() != R->getOpcode())
539 return false;
540
541 switch (L->getOpcode()) {
542 case Instruction::ICmp:
543 case Instruction::FCmp:
544 if (L->getPredicate() != R->getPredicate())
545 return false;
546 break;
547
548 case Instruction::GetElementPtr:
549 // FIXME: inbounds?
550 break;
551
552 default:
553 break;
554 }
555
556 if (L->getNumOperands() != R->getNumOperands())
557 return false;
558
559 for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) {
560 const auto *LOp = L->getOperand(I);
561 const auto *ROp = R->getOperand(I);
562
563 if (LOp == SavedLHS || ROp == SavedRHS) {
564 if (LOp != SavedLHS || ROp != SavedRHS)
565 // If the left and right operands aren't both re-analyzing the
566 // variable, then the initialiers don't match, so report "false".
567 // Otherwise, we skip these operands..
568 return false;
569
570 continue;
571 }
572
573 if (!equivalentAsOperands(LOp, ROp))
574 return false;
575 }
576
577 return true;
578 }
579
equivalentAsOperands(const Value * L,const Value * R)580 bool equivalentAsOperands(const Value *L, const Value *R) {
581 // Fall out if the values have different kind.
582 // This possibly shouldn't take priority over oracles.
583 if (L->getValueID() != R->getValueID())
584 return false;
585
586 // Value subtypes: Argument, Constant, Instruction, BasicBlock,
587 // InlineAsm, MDNode, MDString, PseudoSourceValue
588
589 if (isa<Constant>(L))
590 return equivalentAsOperands(cast<Constant>(L), cast<Constant>(R));
591
592 if (isa<Instruction>(L))
593 return Values[L] == R || TentativeValues.count(std::make_pair(L, R));
594
595 if (isa<Argument>(L))
596 return Values[L] == R;
597
598 if (isa<BasicBlock>(L))
599 return Blocks[cast<BasicBlock>(L)] != R;
600
601 // Pretend everything else is identical.
602 return true;
603 }
604
605 // Avoid a gcc warning about accessing 'this' in an initializer.
this_()606 FunctionDifferenceEngine *this_() { return this; }
607
608 public:
FunctionDifferenceEngine(DifferenceEngine & Engine,const Value * SavedLHS=nullptr,const Value * SavedRHS=nullptr)609 FunctionDifferenceEngine(DifferenceEngine &Engine,
610 const Value *SavedLHS = nullptr,
611 const Value *SavedRHS = nullptr)
612 : Engine(Engine), SavedLHS(SavedLHS), SavedRHS(SavedRHS),
613 Queue(QueueSorter(*this_())) {}
614
diff(const Function * L,const Function * R)615 void diff(const Function *L, const Function *R) {
616 if (L->arg_size() != R->arg_size())
617 Engine.log("different argument counts");
618
619 // Map the arguments.
620 for (Function::const_arg_iterator LI = L->arg_begin(), LE = L->arg_end(),
621 RI = R->arg_begin(), RE = R->arg_end();
622 LI != LE && RI != RE; ++LI, ++RI)
623 Values[&*LI] = &*RI;
624
625 tryUnify(&*L->begin(), &*R->begin());
626 processQueue();
627 }
628 };
629
630 struct DiffEntry {
DiffEntry__anon246d63510111::DiffEntry631 DiffEntry() : Cost(0) {}
632
633 unsigned Cost;
634 llvm::SmallVector<char, 8> Path; // actually of DifferenceEngine::DiffChange
635 };
636
matchForBlockDiff(const Instruction * L,const Instruction * R)637 bool FunctionDifferenceEngine::matchForBlockDiff(const Instruction *L,
638 const Instruction *R) {
639 return !diff(L, R, false, false);
640 }
641
runBlockDiff(BasicBlock::const_iterator LStart,BasicBlock::const_iterator RStart)642 void FunctionDifferenceEngine::runBlockDiff(BasicBlock::const_iterator LStart,
643 BasicBlock::const_iterator RStart) {
644 BasicBlock::const_iterator LE = LStart->getParent()->end();
645 BasicBlock::const_iterator RE = RStart->getParent()->end();
646
647 unsigned NL = std::distance(LStart, LE);
648
649 SmallVector<DiffEntry, 20> Paths1(NL+1);
650 SmallVector<DiffEntry, 20> Paths2(NL+1);
651
652 DiffEntry *Cur = Paths1.data();
653 DiffEntry *Next = Paths2.data();
654
655 const unsigned LeftCost = 2;
656 const unsigned RightCost = 2;
657 const unsigned MatchCost = 0;
658
659 assert(TentativeValues.empty());
660
661 // Initialize the first column.
662 for (unsigned I = 0; I != NL+1; ++I) {
663 Cur[I].Cost = I * LeftCost;
664 for (unsigned J = 0; J != I; ++J)
665 Cur[I].Path.push_back(DC_left);
666 }
667
668 for (BasicBlock::const_iterator RI = RStart; RI != RE; ++RI) {
669 // Initialize the first row.
670 Next[0] = Cur[0];
671 Next[0].Cost += RightCost;
672 Next[0].Path.push_back(DC_right);
673
674 unsigned Index = 1;
675 for (BasicBlock::const_iterator LI = LStart; LI != LE; ++LI, ++Index) {
676 if (matchForBlockDiff(&*LI, &*RI)) {
677 Next[Index] = Cur[Index-1];
678 Next[Index].Cost += MatchCost;
679 Next[Index].Path.push_back(DC_match);
680 TentativeValues.insert(std::make_pair(&*LI, &*RI));
681 } else if (Next[Index-1].Cost <= Cur[Index].Cost) {
682 Next[Index] = Next[Index-1];
683 Next[Index].Cost += LeftCost;
684 Next[Index].Path.push_back(DC_left);
685 } else {
686 Next[Index] = Cur[Index];
687 Next[Index].Cost += RightCost;
688 Next[Index].Path.push_back(DC_right);
689 }
690 }
691
692 std::swap(Cur, Next);
693 }
694
695 // We don't need the tentative values anymore; everything from here
696 // on out should be non-tentative.
697 TentativeValues.clear();
698
699 SmallVectorImpl<char> &Path = Cur[NL].Path;
700 BasicBlock::const_iterator LI = LStart, RI = RStart;
701
702 DiffLogBuilder Diff(Engine.getConsumer());
703
704 // Drop trailing matches.
705 while (Path.size() && Path.back() == DC_match)
706 Path.pop_back();
707
708 // Skip leading matches.
709 SmallVectorImpl<char>::iterator
710 PI = Path.begin(), PE = Path.end();
711 while (PI != PE && *PI == DC_match) {
712 unify(&*LI, &*RI);
713 ++PI;
714 ++LI;
715 ++RI;
716 }
717
718 for (; PI != PE; ++PI) {
719 switch (static_cast<DiffChange>(*PI)) {
720 case DC_match:
721 assert(LI != LE && RI != RE);
722 {
723 const Instruction *L = &*LI, *R = &*RI;
724 unify(L, R);
725 Diff.addMatch(L, R);
726 }
727 ++LI; ++RI;
728 break;
729
730 case DC_left:
731 assert(LI != LE);
732 Diff.addLeft(&*LI);
733 ++LI;
734 break;
735
736 case DC_right:
737 assert(RI != RE);
738 Diff.addRight(&*RI);
739 ++RI;
740 break;
741 }
742 }
743
744 // Finishing unifying and complaining about the tails of the block,
745 // which should be matches all the way through.
746 while (LI != LE) {
747 assert(RI != RE);
748 unify(&*LI, &*RI);
749 ++LI;
750 ++RI;
751 }
752
753 // If the terminators have different kinds, but one is an invoke and the
754 // other is an unconditional branch immediately following a call, unify
755 // the results and the destinations.
756 const Instruction *LTerm = LStart->getParent()->getTerminator();
757 const Instruction *RTerm = RStart->getParent()->getTerminator();
758 if (isa<BranchInst>(LTerm) && isa<InvokeInst>(RTerm)) {
759 if (cast<BranchInst>(LTerm)->isConditional()) return;
760 BasicBlock::const_iterator I = LTerm->getIterator();
761 if (I == LStart->getParent()->begin()) return;
762 --I;
763 if (!isa<CallInst>(*I)) return;
764 const CallInst *LCall = cast<CallInst>(&*I);
765 const InvokeInst *RInvoke = cast<InvokeInst>(RTerm);
766 if (!equivalentAsOperands(LCall->getCalledOperand(),
767 RInvoke->getCalledOperand()))
768 return;
769 if (!LCall->use_empty())
770 Values[LCall] = RInvoke;
771 tryUnify(LTerm->getSuccessor(0), RInvoke->getNormalDest());
772 } else if (isa<InvokeInst>(LTerm) && isa<BranchInst>(RTerm)) {
773 if (cast<BranchInst>(RTerm)->isConditional()) return;
774 BasicBlock::const_iterator I = RTerm->getIterator();
775 if (I == RStart->getParent()->begin()) return;
776 --I;
777 if (!isa<CallInst>(*I)) return;
778 const CallInst *RCall = cast<CallInst>(I);
779 const InvokeInst *LInvoke = cast<InvokeInst>(LTerm);
780 if (!equivalentAsOperands(LInvoke->getCalledOperand(),
781 RCall->getCalledOperand()))
782 return;
783 if (!LInvoke->use_empty())
784 Values[LInvoke] = RCall;
785 tryUnify(LInvoke->getNormalDest(), RTerm->getSuccessor(0));
786 }
787 }
788 }
789
anchor()790 void DifferenceEngine::Oracle::anchor() { }
791
diff(const Function * L,const Function * R)792 void DifferenceEngine::diff(const Function *L, const Function *R) {
793 Context C(*this, L, R);
794
795 // FIXME: types
796 // FIXME: attributes and CC
797 // FIXME: parameter attributes
798
799 // If both are declarations, we're done.
800 if (L->empty() && R->empty())
801 return;
802 else if (L->empty())
803 log("left function is declaration, right function is definition");
804 else if (R->empty())
805 log("right function is declaration, left function is definition");
806 else
807 FunctionDifferenceEngine(*this).diff(L, R);
808 }
809
diff(const Module * L,const Module * R)810 void DifferenceEngine::diff(const Module *L, const Module *R) {
811 StringSet<> LNames;
812 SmallVector<std::pair<const Function *, const Function *>, 20> Queue;
813
814 unsigned LeftAnonCount = 0;
815 unsigned RightAnonCount = 0;
816
817 for (Module::const_iterator I = L->begin(), E = L->end(); I != E; ++I) {
818 const Function *LFn = &*I;
819 StringRef Name = LFn->getName();
820 if (Name.empty()) {
821 ++LeftAnonCount;
822 continue;
823 }
824
825 LNames.insert(Name);
826
827 if (Function *RFn = R->getFunction(LFn->getName()))
828 Queue.push_back(std::make_pair(LFn, RFn));
829 else
830 logf("function %l exists only in left module") << LFn;
831 }
832
833 for (Module::const_iterator I = R->begin(), E = R->end(); I != E; ++I) {
834 const Function *RFn = &*I;
835 StringRef Name = RFn->getName();
836 if (Name.empty()) {
837 ++RightAnonCount;
838 continue;
839 }
840
841 if (!LNames.count(Name))
842 logf("function %r exists only in right module") << RFn;
843 }
844
845 if (LeftAnonCount != 0 || RightAnonCount != 0) {
846 SmallString<32> Tmp;
847 logf(("not comparing " + Twine(LeftAnonCount) +
848 " anonymous functions in the left module and " +
849 Twine(RightAnonCount) + " in the right module")
850 .toStringRef(Tmp));
851 }
852
853 for (SmallVectorImpl<std::pair<const Function *, const Function *>>::iterator
854 I = Queue.begin(),
855 E = Queue.end();
856 I != E; ++I)
857 diff(I->first, I->second);
858 }
859
equivalentAsOperands(const GlobalValue * L,const GlobalValue * R)860 bool DifferenceEngine::equivalentAsOperands(const GlobalValue *L,
861 const GlobalValue *R) {
862 if (globalValueOracle) return (*globalValueOracle)(L, R);
863
864 if (isa<GlobalVariable>(L) && isa<GlobalVariable>(R)) {
865 const GlobalVariable *GVL = cast<GlobalVariable>(L);
866 const GlobalVariable *GVR = cast<GlobalVariable>(R);
867 if (GVL->hasLocalLinkage() && GVL->hasUniqueInitializer() &&
868 GVR->hasLocalLinkage() && GVR->hasUniqueInitializer())
869 return FunctionDifferenceEngine(*this, GVL, GVR)
870 .equivalentAsOperands(GVL->getInitializer(), GVR->getInitializer());
871 }
872
873 return L->getName() == R->getName();
874 }
875