1 //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // Implementation file for the IRSimilarityIdentifier for identifying
11 // similarities in IR including the IRInstructionMapper.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Analysis/IRSimilarityIdentifier.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/IR/Intrinsics.h"
18 #include "llvm/IR/Operator.h"
19 #include "llvm/IR/User.h"
20 #include "llvm/InitializePasses.h"
21 #include "llvm/Support/SuffixTree.h"
22 
23 using namespace llvm;
24 using namespace IRSimilarity;
25 
26 namespace llvm {
27 cl::opt<bool>
28     DisableBranches("no-ir-sim-branch-matching", cl::init(false),
29                     cl::ReallyHidden,
30                     cl::desc("disable similarity matching, and outlining, "
31                              "across branches for debugging purposes."));
32 
33 cl::opt<bool>
34     DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
35                          cl::ReallyHidden,
36                          cl::desc("disable outlining indirect calls."));
37 
38 cl::opt<bool>
39     MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
40                      cl::desc("only allow matching call instructions if the "
41                               "name and type signature match."));
42 } // namespace llvm
43 
44 IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
45                                      IRInstructionDataList &IDList)
46     : Inst(&I), Legal(Legality), IDL(&IDList) {
47   initializeInstruction();
48 }
49 
50 void IRInstructionData::initializeInstruction() {
51   // We check for whether we have a comparison instruction.  If it is, we
52   // find the "less than" version of the predicate for consistency for
53   // comparison instructions throught the program.
54   if (CmpInst *C = dyn_cast<CmpInst>(Inst)) {
55     CmpInst::Predicate Predicate = predicateForConsistency(C);
56     if (Predicate != C->getPredicate())
57       RevisedPredicate = Predicate;
58   }
59 
60   // Here we collect the operands and their types for determining whether
61   // the structure of the operand use matches between two different candidates.
62   for (Use &OI : Inst->operands()) {
63     if (isa<CmpInst>(Inst) && RevisedPredicate.hasValue()) {
64       // If we have a CmpInst where the predicate is reversed, it means the
65       // operands must be reversed as well.
66       OperVals.insert(OperVals.begin(), OI.get());
67       continue;
68     }
69 
70     OperVals.push_back(OI.get());
71   }
72 
73   // We capture the incoming BasicBlocks as values as well as the incoming
74   // Values in order to check for structural similarity.
75   if (PHINode *PN = dyn_cast<PHINode>(Inst))
76     for (BasicBlock *BB : PN->blocks())
77       OperVals.push_back(BB);
78 }
79 
80 IRInstructionData::IRInstructionData(IRInstructionDataList &IDList)
81     : IDL(&IDList) {}
82 
83 void IRInstructionData::setBranchSuccessors(
84     DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
85   assert(isa<BranchInst>(Inst) && "Instruction must be branch");
86 
87   BranchInst *BI = cast<BranchInst>(Inst);
88   DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
89 
90   BBNumIt = BasicBlockToInteger.find(BI->getParent());
91   assert(BBNumIt != BasicBlockToInteger.end() &&
92          "Could not find location for BasicBlock!");
93 
94   int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
95 
96   for (BasicBlock *Successor : BI->successors()) {
97     BBNumIt = BasicBlockToInteger.find(Successor);
98     assert(BBNumIt != BasicBlockToInteger.end() &&
99            "Could not find number for BasicBlock!");
100     int OtherBlockNumber = static_cast<int>(BBNumIt->second);
101 
102     int Relative = OtherBlockNumber - CurrentBlockNumber;
103     RelativeBlockLocations.push_back(Relative);
104   }
105 }
106 
107 void IRInstructionData::setCalleeName(bool MatchByName) {
108   CallInst *CI = dyn_cast<CallInst>(Inst);
109   assert(CI && "Instruction must be call");
110 
111   CalleeName = "";
112   if (!CI->isIndirectCall() && MatchByName)
113     CalleeName = CI->getCalledFunction()->getName().str();
114 }
115 
116 void IRInstructionData::setPHIPredecessors(
117     DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
118   assert(isa<PHINode>(Inst) && "Instruction must be phi node");
119 
120   PHINode *PN = cast<PHINode>(Inst);
121   DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
122 
123   BBNumIt = BasicBlockToInteger.find(PN->getParent());
124   assert(BBNumIt != BasicBlockToInteger.end() &&
125          "Could not find location for BasicBlock!");
126 
127   int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
128 
129   // Convert the incoming blocks of the PHINode to an integer value, based on
130   // the relative distances between the current block and the incoming block.
131   for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
132     BasicBlock *Incoming = PN->getIncomingBlock(Idx);
133     BBNumIt = BasicBlockToInteger.find(Incoming);
134     assert(BBNumIt != BasicBlockToInteger.end() &&
135            "Could not find number for BasicBlock!");
136     int OtherBlockNumber = static_cast<int>(BBNumIt->second);
137 
138     int Relative = OtherBlockNumber - CurrentBlockNumber;
139     RelativeBlockLocations.push_back(Relative);
140     RelativeBlockLocations.push_back(Relative);
141   }
142 }
143 
144 CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {
145   switch (CI->getPredicate()) {
146   case CmpInst::FCMP_OGT:
147   case CmpInst::FCMP_UGT:
148   case CmpInst::FCMP_OGE:
149   case CmpInst::FCMP_UGE:
150   case CmpInst::ICMP_SGT:
151   case CmpInst::ICMP_UGT:
152   case CmpInst::ICMP_SGE:
153   case CmpInst::ICMP_UGE:
154     return CI->getSwappedPredicate();
155   default:
156     return CI->getPredicate();
157   }
158 }
159 
160 CmpInst::Predicate IRInstructionData::getPredicate() const {
161   assert(isa<CmpInst>(Inst) &&
162          "Can only get a predicate from a compare instruction");
163 
164   if (RevisedPredicate.hasValue())
165     return RevisedPredicate.getValue();
166 
167   return cast<CmpInst>(Inst)->getPredicate();
168 }
169 
170 StringRef IRInstructionData::getCalleeName() const {
171   assert(isa<CallInst>(Inst) &&
172          "Can only get a name from a call instruction");
173 
174   assert(CalleeName.hasValue() && "CalleeName has not been set");
175 
176   return *CalleeName;
177 }
178 
179 bool IRSimilarity::isClose(const IRInstructionData &A,
180                            const IRInstructionData &B) {
181 
182   if (!A.Legal || !B.Legal)
183     return false;
184 
185   // Check if we are performing the same sort of operation on the same types
186   // but not on the same values.
187   if (!A.Inst->isSameOperationAs(B.Inst)) {
188     // If there is a predicate, this means that either there is a swapped
189     // predicate, or that the types are different, we want to make sure that
190     // the predicates are equivalent via swapping.
191     if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) {
192 
193       if (A.getPredicate() != B.getPredicate())
194         return false;
195 
196       // If the predicates are the same via swap, make sure that the types are
197       // still the same.
198       auto ZippedTypes = zip(A.OperVals, B.OperVals);
199 
200       return all_of(
201           ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
202             return std::get<0>(R)->getType() == std::get<1>(R)->getType();
203           });
204     }
205 
206     return false;
207   }
208 
209   // Since any GEP Instruction operands after the first operand cannot be
210   // defined by a register, we must make sure that the operands after the first
211   // are the same in the two instructions
212   if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) {
213     auto *OtherGEP = cast<GetElementPtrInst>(B.Inst);
214 
215     // If the instructions do not have the same inbounds restrictions, we do
216     // not consider them the same.
217     if (GEP->isInBounds() != OtherGEP->isInBounds())
218       return false;
219 
220     auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());
221 
222     // We increment here since we do not care about the first instruction,
223     // we only care about the following operands since they must be the
224     // exact same to be considered similar.
225     return all_of(drop_begin(ZippedOperands),
226                   [](std::tuple<llvm::Use &, llvm::Use &> R) {
227                     return std::get<0>(R) == std::get<1>(R);
228                   });
229   }
230 
231   // If the instructions are functions calls, we make sure that the function
232   // name is the same.  We already know that the types are since is
233   // isSameOperationAs is true.
234   if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
235     if (A.getCalleeName().str().compare(B.getCalleeName().str()) != 0)
236       return false;
237   }
238 
239   if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) &&
240       A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())
241     return false;
242 
243   return true;
244 }
245 
246 // TODO: This is the same as the MachineOutliner, and should be consolidated
247 // into the same interface.
248 void IRInstructionMapper::convertToUnsignedVec(
249     BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
250     std::vector<unsigned> &IntegerMapping) {
251   BasicBlock::iterator It = BB.begin();
252 
253   std::vector<unsigned> IntegerMappingForBB;
254   std::vector<IRInstructionData *> InstrListForBB;
255 
256   for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {
257     switch (InstClassifier.visit(*It)) {
258     case InstrType::Legal:
259       mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
260       break;
261     case InstrType::Illegal:
262       mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
263       break;
264     case InstrType::Invisible:
265       AddedIllegalLastTime = false;
266       break;
267     }
268   }
269 
270   if (HaveLegalRange) {
271     if (AddedIllegalLastTime)
272       mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
273     for (IRInstructionData *ID : InstrListForBB)
274       this->IDL->push_back(*ID);
275     llvm::append_range(InstrList, InstrListForBB);
276     llvm::append_range(IntegerMapping, IntegerMappingForBB);
277   }
278 }
279 
280 // TODO: This is the same as the MachineOutliner, and should be consolidated
281 // into the same interface.
282 unsigned IRInstructionMapper::mapToLegalUnsigned(
283     BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
284     std::vector<IRInstructionData *> &InstrListForBB) {
285   // We added something legal, so we should unset the AddedLegalLastTime
286   // flag.
287   AddedIllegalLastTime = false;
288 
289   // If we have at least two adjacent legal instructions (which may have
290   // invisible instructions in between), remember that.
291   if (CanCombineWithPrevInstr)
292     HaveLegalRange = true;
293   CanCombineWithPrevInstr = true;
294 
295   // Get the integer for this instruction or give it the current
296   // LegalInstrNumber.
297   IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL);
298   InstrListForBB.push_back(ID);
299 
300   if (isa<BranchInst>(*It))
301     ID->setBranchSuccessors(BasicBlockToInteger);
302 
303   if (isa<CallInst>(*It))
304     ID->setCalleeName(EnableMatchCallsByName);
305 
306   if (isa<PHINode>(*It))
307     ID->setPHIPredecessors(BasicBlockToInteger);
308 
309   // Add to the instruction list
310   bool WasInserted;
311   DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator
312       ResultIt;
313   std::tie(ResultIt, WasInserted) =
314       InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber));
315   unsigned INumber = ResultIt->second;
316 
317   // There was an insertion.
318   if (WasInserted)
319     LegalInstrNumber++;
320 
321   IntegerMappingForBB.push_back(INumber);
322 
323   // Make sure we don't overflow or use any integers reserved by the DenseMap.
324   assert(LegalInstrNumber < IllegalInstrNumber &&
325          "Instruction mapping overflow!");
326 
327   assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
328          "Tried to assign DenseMap tombstone or empty key to instruction.");
329   assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
330          "Tried to assign DenseMap tombstone or empty key to instruction.");
331 
332   return INumber;
333 }
334 
335 IRInstructionData *
336 IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality,
337                                                IRInstructionDataList &IDL) {
338   return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL);
339 }
340 
341 IRInstructionData *
342 IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) {
343   return new (InstDataAllocator->Allocate()) IRInstructionData(IDL);
344 }
345 
346 IRInstructionDataList *
347 IRInstructionMapper::allocateIRInstructionDataList() {
348   return new (IDLAllocator->Allocate()) IRInstructionDataList();
349 }
350 
351 // TODO: This is the same as the MachineOutliner, and should be consolidated
352 // into the same interface.
353 unsigned IRInstructionMapper::mapToIllegalUnsigned(
354     BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
355     std::vector<IRInstructionData *> &InstrListForBB, bool End) {
356   // Can't combine an illegal instruction. Set the flag.
357   CanCombineWithPrevInstr = false;
358 
359   // Only add one illegal number per range of legal numbers.
360   if (AddedIllegalLastTime)
361     return IllegalInstrNumber;
362 
363   IRInstructionData *ID = nullptr;
364   if (!End)
365     ID = allocateIRInstructionData(*It, false, *IDL);
366   else
367     ID = allocateIRInstructionData(*IDL);
368   InstrListForBB.push_back(ID);
369 
370   // Remember that we added an illegal number last time.
371   AddedIllegalLastTime = true;
372   unsigned INumber = IllegalInstrNumber;
373   IntegerMappingForBB.push_back(IllegalInstrNumber--);
374 
375   assert(LegalInstrNumber < IllegalInstrNumber &&
376          "Instruction mapping overflow!");
377 
378   assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
379          "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
380 
381   assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
382          "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
383 
384   return INumber;
385 }
386 
387 IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
388                                              IRInstructionData *FirstInstIt,
389                                              IRInstructionData *LastInstIt)
390     : StartIdx(StartIdx), Len(Len) {
391 
392   assert(FirstInstIt != nullptr && "Instruction is nullptr!");
393   assert(LastInstIt != nullptr && "Instruction is nullptr!");
394   assert(StartIdx + Len > StartIdx &&
395          "Overflow for IRSimilarityCandidate range?");
396   assert(Len - 1 == static_cast<unsigned>(std::distance(
397                         iterator(FirstInstIt), iterator(LastInstIt))) &&
398          "Length of the first and last IRInstructionData do not match the "
399          "given length");
400 
401   // We iterate over the given instructions, and map each unique value
402   // to a unique number in the IRSimilarityCandidate ValueToNumber and
403   // NumberToValue maps.  A constant get its own value globally, the individual
404   // uses of the constants are not considered to be unique.
405   //
406   // IR:                    Mapping Added:
407   // %add1 = add i32 %a, c1    %add1 -> 3, %a -> 1, c1 -> 2
408   // %add2 = add i32 %a, %1    %add2 -> 4
409   // %add3 = add i32 c2, c1    %add3 -> 6, c2 -> 5
410   //
411   // when replace with global values, starting from 1, would be
412   //
413   // 3 = add i32 1, 2
414   // 4 = add i32 1, 3
415   // 6 = add i32 5, 2
416   unsigned LocalValNumber = 1;
417   IRInstructionDataList::iterator ID = iterator(*FirstInstIt);
418   for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
419     // Map the operand values to an unsigned integer if it does not already
420     // have an unsigned integer assigned to it.
421     for (Value *Arg : ID->OperVals)
422       if (ValueToNumber.find(Arg) == ValueToNumber.end()) {
423         ValueToNumber.try_emplace(Arg, LocalValNumber);
424         NumberToValue.try_emplace(LocalValNumber, Arg);
425         LocalValNumber++;
426       }
427 
428     // Mapping the instructions to an unsigned integer if it is not already
429     // exist in the mapping.
430     if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) {
431       ValueToNumber.try_emplace(ID->Inst, LocalValNumber);
432       NumberToValue.try_emplace(LocalValNumber, ID->Inst);
433       LocalValNumber++;
434     }
435   }
436 
437   // Setting the first and last instruction data pointers for the candidate.  If
438   // we got through the entire for loop without hitting an assert, we know
439   // that both of these instructions are not nullptrs.
440   FirstInst = FirstInstIt;
441   LastInst = LastInstIt;
442 }
443 
444 bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A,
445                                       const IRSimilarityCandidate &B) {
446   if (A.getLength() != B.getLength())
447     return false;
448 
449   auto InstrDataForBoth =
450       zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
451 
452   return all_of(InstrDataForBoth,
453                 [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
454                   IRInstructionData &A = std::get<0>(R);
455                   IRInstructionData &B = std::get<1>(R);
456                   if (!A.Legal || !B.Legal)
457                     return false;
458                   return isClose(A, B);
459                 });
460 }
461 
462 /// Determine if one or more of the assigned global value numbers for the
463 /// operands in \p TargetValueNumbers is in the current mapping set for operand
464 /// numbers in \p SourceOperands.  The set of possible corresponding global
465 /// value numbers are replaced with the most recent version of compatible
466 /// values.
467 ///
468 /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global
469 /// value number for the source IRInstructionCandidate.
470 /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source
471 /// IRSimilarityCandidate global value numbers to a set of possible numbers in
472 /// the target.
473 /// \param [in] SourceOperands - The operands in the original
474 /// IRSimilarityCandidate in the current instruction.
475 /// \param [in] TargetValueNumbers - The global value numbers of the operands in
476 /// the corresponding Instruction in the other IRSimilarityCandidate.
477 /// \returns true if there exists a possible mapping between the source
478 /// Instruction operands and the target Instruction operands, and false if not.
479 static bool checkNumberingAndReplaceCommutative(
480   const DenseMap<Value *, unsigned> &SourceValueToNumberMapping,
481   DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
482   ArrayRef<Value *> &SourceOperands,
483   DenseSet<unsigned> &TargetValueNumbers){
484 
485   DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
486 
487   unsigned ArgVal;
488   bool WasInserted;
489 
490   // Iterate over the operands in the source IRSimilarityCandidate to determine
491   // whether there exists an operand in the other IRSimilarityCandidate that
492   // creates a valid mapping of Value to Value between the
493   // IRSimilarityCaniddates.
494   for (Value *V : SourceOperands) {
495     ArgVal = SourceValueToNumberMapping.find(V)->second;
496 
497     std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
498         std::make_pair(ArgVal, TargetValueNumbers));
499 
500     // Instead of finding a current mapping, we inserted a set.  This means a
501     // mapping did not exist for the source Instruction operand, it has no
502     // current constraints we need to check.
503     if (WasInserted)
504       continue;
505 
506     // If a mapping already exists for the source operand to the values in the
507     // other IRSimilarityCandidate we need to iterate over the items in other
508     // IRSimilarityCandidate's Instruction to determine whether there is a valid
509     // mapping of Value to Value.
510     DenseSet<unsigned> NewSet;
511     for (unsigned &Curr : ValueMappingIt->second)
512       // If we can find the value in the mapping, we add it to the new set.
513       if (TargetValueNumbers.contains(Curr))
514         NewSet.insert(Curr);
515 
516     // If we could not find a Value, return 0.
517     if (NewSet.empty())
518       return false;
519 
520     // Otherwise replace the old mapping with the newly constructed one.
521     if (NewSet.size() != ValueMappingIt->second.size())
522       ValueMappingIt->second.swap(NewSet);
523 
524     // We have reached no conclusions about the mapping, and cannot remove
525     // any items from the other operands, so we move to check the next operand.
526     if (ValueMappingIt->second.size() != 1)
527       continue;
528 
529 
530     unsigned ValToRemove = *ValueMappingIt->second.begin();
531     // When there is only one item left in the mapping for and operand, remove
532     // the value from the other operands.  If it results in there being no
533     // mapping, return false, it means the mapping is wrong
534     for (Value *InnerV : SourceOperands) {
535       if (V == InnerV)
536         continue;
537 
538       unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;
539       ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);
540       if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())
541         continue;
542 
543       ValueMappingIt->second.erase(ValToRemove);
544       if (ValueMappingIt->second.empty())
545         return false;
546     }
547   }
548 
549   return true;
550 }
551 
552 /// Determine if operand number \p TargetArgVal is in the current mapping set
553 /// for operand number \p SourceArgVal.
554 ///
555 /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
556 /// value numbers from source IRSimilarityCandidate to target
557 /// IRSimilarityCandidate.
558 /// \param [in] SourceArgVal The global value number for an operand in the
559 /// in the original candidate.
560 /// \param [in] TargetArgVal The global value number for the corresponding
561 /// operand in the other candidate.
562 /// \returns True if there exists a mapping and false if not.
563 bool checkNumberingAndReplace(
564     DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
565     unsigned SourceArgVal, unsigned TargetArgVal) {
566   // We are given two unsigned integers representing the global values of
567   // the operands in different IRSimilarityCandidates and a current mapping
568   // between the two.
569   //
570   // Source Operand GVN: 1
571   // Target Operand GVN: 2
572   // CurrentMapping: {1: {1, 2}}
573   //
574   // Since we have mapping, and the target operand is contained in the set, we
575   // update it to:
576   // CurrentMapping: {1: {2}}
577   // and can return true. But, if the mapping was
578   // CurrentMapping: {1: {3}}
579   // we would return false.
580 
581   bool WasInserted;
582   DenseMap<unsigned, DenseSet<unsigned>>::iterator Val;
583 
584   std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
585       std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));
586 
587   // If we created a new mapping, then we are done.
588   if (WasInserted)
589     return true;
590 
591   // If there is more than one option in the mapping set, and the target value
592   // is included in the mapping set replace that set with one that only includes
593   // the target value, as it is the only valid mapping via the non commutative
594   // instruction.
595 
596   DenseSet<unsigned> &TargetSet = Val->second;
597   if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {
598     TargetSet.clear();
599     TargetSet.insert(TargetArgVal);
600     return true;
601   }
602 
603   // Return true if we can find the value in the set.
604   return TargetSet.contains(TargetArgVal);
605 }
606 
607 bool IRSimilarityCandidate::compareNonCommutativeOperandMapping(
608     OperandMapping A, OperandMapping B) {
609   // Iterators to keep track of where we are in the operands for each
610   // Instruction.
611   ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
612   ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
613   unsigned OperandLength = A.OperVals.size();
614 
615   // For each operand, get the value numbering and ensure it is consistent.
616   for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
617     unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
618     unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
619 
620     // Attempt to add a set with only the target value.  If there is no mapping
621     // we can create it here.
622     //
623     // For an instruction like a subtraction:
624     // IRSimilarityCandidateA:  IRSimilarityCandidateB:
625     // %resultA = sub %a, %b    %resultB = sub %d, %e
626     //
627     // We map %a -> %d and %b -> %e.
628     //
629     // And check to see whether their mapping is consistent in
630     // checkNumberingAndReplace.
631 
632     if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))
633       return false;
634 
635     if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))
636       return false;
637   }
638   return true;
639 }
640 
641 bool IRSimilarityCandidate::compareCommutativeOperandMapping(
642     OperandMapping A, OperandMapping B) {
643   DenseSet<unsigned> ValueNumbersA;
644   DenseSet<unsigned> ValueNumbersB;
645 
646   ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
647   ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
648   unsigned OperandLength = A.OperVals.size();
649 
650   // Find the value number sets for the operands.
651   for (unsigned Idx = 0; Idx < OperandLength;
652        Idx++, VItA++, VItB++) {
653     ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);
654     ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);
655   }
656 
657   // Iterate over the operands in the first IRSimilarityCandidate and make sure
658   // there exists a possible mapping with the operands in the second
659   // IRSimilarityCandidate.
660   if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber,
661                                            A.ValueNumberMapping, A.OperVals,
662                                            ValueNumbersB))
663     return false;
664 
665   // Iterate over the operands in the second IRSimilarityCandidate and make sure
666   // there exists a possible mapping with the operands in the first
667   // IRSimilarityCandidate.
668   if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber,
669                                            B.ValueNumberMapping, B.OperVals,
670                                            ValueNumbersA))
671     return false;
672 
673   return true;
674 }
675 
676 bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A,
677                                                    RelativeLocMapping B) {
678   // Get the basic blocks the label refers to.
679   BasicBlock *ABB = static_cast<BasicBlock *>(A.OperVal);
680   BasicBlock *BBB = static_cast<BasicBlock *>(B.OperVal);
681 
682   // Get the basic blocks contained in each region.
683   DenseSet<BasicBlock *> BasicBlockA;
684   DenseSet<BasicBlock *> BasicBlockB;
685   A.IRSC.getBasicBlocks(BasicBlockA);
686   B.IRSC.getBasicBlocks(BasicBlockB);
687 
688   // Determine if the block is contained in the region.
689   bool AContained = BasicBlockA.contains(ABB);
690   bool BContained = BasicBlockB.contains(BBB);
691 
692   // Both blocks need to be contained in the region, or both need to be outside
693   // the reigon.
694   if (AContained != BContained)
695     return false;
696 
697   // If both are contained, then we need to make sure that the relative
698   // distance to the target blocks are the same.
699   if (AContained)
700     return A.RelativeLocation == B.RelativeLocation;
701   return true;
702 }
703 
704 bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
705                                              const IRSimilarityCandidate &B) {
706   DenseMap<unsigned, DenseSet<unsigned>> MappingA;
707   DenseMap<unsigned, DenseSet<unsigned>> MappingB;
708   return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB);
709 }
710 
711 typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &,
712                       SmallVector<int, 4> &, ArrayRef<Value *> &,
713                       ArrayRef<Value *> &>
714     ZippedRelativeLocationsT;
715 
716 bool IRSimilarityCandidate::compareStructure(
717     const IRSimilarityCandidate &A, const IRSimilarityCandidate &B,
718     DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
719     DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
720   if (A.getLength() != B.getLength())
721     return false;
722 
723   if (A.ValueToNumber.size() != B.ValueToNumber.size())
724     return false;
725 
726   iterator ItA = A.begin();
727   iterator ItB = B.begin();
728 
729   // These ValueNumber Mapping sets create a create a mapping between the values
730   // in one candidate to values in the other candidate.  If we create a set with
731   // one element, and that same element maps to the original element in the
732   // candidate we have a good mapping.
733   DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
734 
735 
736   // Iterate over the instructions contained in each candidate
737   unsigned SectionLength = A.getStartIdx() + A.getLength();
738   for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
739        ItA++, ItB++, Loc++) {
740     // Make sure the instructions are similar to one another.
741     if (!isClose(*ItA, *ItB))
742       return false;
743 
744     Instruction *IA = ItA->Inst;
745     Instruction *IB = ItB->Inst;
746 
747     if (!ItA->Legal || !ItB->Legal)
748       return false;
749 
750     // Get the operand sets for the instructions.
751     ArrayRef<Value *> OperValsA = ItA->OperVals;
752     ArrayRef<Value *> OperValsB = ItB->OperVals;
753 
754     unsigned InstValA = A.ValueToNumber.find(IA)->second;
755     unsigned InstValB = B.ValueToNumber.find(IB)->second;
756 
757     bool WasInserted;
758     // Ensure that the mappings for the instructions exists.
759     std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
760         std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
761     if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
762       return false;
763 
764     std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert(
765         std::make_pair(InstValB, DenseSet<unsigned>({InstValA})));
766     if (!WasInserted && !ValueMappingIt->second.contains(InstValA))
767       return false;
768 
769     // We have different paths for commutative instructions and non-commutative
770     // instructions since commutative instructions could allow multiple mappings
771     // to certain values.
772     if (IA->isCommutative() && !isa<FPMathOperator>(IA)) {
773       if (!compareCommutativeOperandMapping(
774               {A, OperValsA, ValueNumberMappingA},
775               {B, OperValsB, ValueNumberMappingB}))
776         return false;
777       continue;
778     }
779 
780     // Handle the non-commutative cases.
781     if (!compareNonCommutativeOperandMapping(
782             {A, OperValsA, ValueNumberMappingA},
783             {B, OperValsB, ValueNumberMappingB}))
784       return false;
785 
786     // Here we check that between two corresponding instructions,
787     // when referring to a basic block in the same region, the
788     // relative locations are the same. And, that the instructions refer to
789     // basic blocks outside the region in the same corresponding locations.
790 
791     // We are able to make the assumption about blocks outside of the region
792     // since the target block labels are considered values and will follow the
793     // same number matching that we defined for the other instructions in the
794     // region.  So, at this point, in each location we target a specific block
795     // outside the region, we are targeting a corresponding block in each
796     // analagous location in the region we are comparing to.
797     if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) &&
798         !(isa<PHINode>(IA) && isa<PHINode>(IB)))
799       continue;
800 
801     SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;
802     SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;
803     if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
804         OperValsA.size() != OperValsB.size())
805       return false;
806 
807     ZippedRelativeLocationsT ZippedRelativeLocations =
808         zip(RelBlockLocsA, RelBlockLocsB, OperValsA, OperValsB);
809     if (any_of(ZippedRelativeLocations,
810                [&A, &B](std::tuple<int, int, Value *, Value *> R) {
811                  return !checkRelativeLocations(
812                      {A, std::get<0>(R), std::get<2>(R)},
813                      {B, std::get<1>(R), std::get<3>(R)});
814                }))
815       return false;
816   }
817   return true;
818 }
819 
820 bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A,
821                                     const IRSimilarityCandidate &B) {
822   auto DoesOverlap = [](const IRSimilarityCandidate &X,
823                         const IRSimilarityCandidate &Y) {
824     // Check:
825     // XXXXXX        X starts before Y ends
826     //      YYYYYYY  Y starts after X starts
827     return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
828   };
829 
830   return DoesOverlap(A, B) || DoesOverlap(B, A);
831 }
832 
833 void IRSimilarityIdentifier::populateMapper(
834     Module &M, std::vector<IRInstructionData *> &InstrList,
835     std::vector<unsigned> &IntegerMapping) {
836 
837   std::vector<IRInstructionData *> InstrListForModule;
838   std::vector<unsigned> IntegerMappingForModule;
839   // Iterate over the functions in the module to map each Instruction in each
840   // BasicBlock to an unsigned integer.
841   Mapper.initializeForBBs(M);
842 
843   for (Function &F : M) {
844 
845     if (F.empty())
846       continue;
847 
848     for (BasicBlock &BB : F) {
849 
850       // BB has potential to have similarity since it has a size greater than 2
851       // and can therefore match other regions greater than 2. Map it to a list
852       // of unsigned integers.
853       Mapper.convertToUnsignedVec(BB, InstrListForModule,
854                                   IntegerMappingForModule);
855     }
856 
857     BasicBlock::iterator It = F.begin()->end();
858     Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule,
859                                 true);
860     if (InstrListForModule.size() > 0)
861       Mapper.IDL->push_back(*InstrListForModule.back());
862   }
863 
864   // Insert the InstrListForModule at the end of the overall InstrList so that
865   // we can have a long InstrList for the entire set of Modules being analyzed.
866   llvm::append_range(InstrList, InstrListForModule);
867   // Do the same as above, but for IntegerMapping.
868   llvm::append_range(IntegerMapping, IntegerMappingForModule);
869 }
870 
871 void IRSimilarityIdentifier::populateMapper(
872     ArrayRef<std::unique_ptr<Module>> &Modules,
873     std::vector<IRInstructionData *> &InstrList,
874     std::vector<unsigned> &IntegerMapping) {
875 
876   // Iterate over, and map the instructions in each module.
877   for (const std::unique_ptr<Module> &M : Modules)
878     populateMapper(*M, InstrList, IntegerMapping);
879 }
880 
881 /// From a repeated subsequence, find all the different instances of the
882 /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
883 /// the IRInstructionData in subsequence.
884 ///
885 /// \param [in] Mapper - The instruction mapper for basic correctness checks.
886 /// \param [in] InstrList - The vector that holds the instruction data.
887 /// \param [in] IntegerMapping - The vector that holds the mapped integers.
888 /// \param [out] CandsForRepSubstring - The vector to store the generated
889 /// IRSimilarityCandidates.
890 static void createCandidatesFromSuffixTree(
891     const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList,
892     std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS,
893     std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
894 
895   unsigned StringLen = RS.Length;
896   if (StringLen < 2)
897     return;
898 
899   // Create an IRSimilarityCandidate for instance of this subsequence \p RS.
900   for (const unsigned &StartIdx : RS.StartIndices) {
901     unsigned EndIdx = StartIdx + StringLen - 1;
902 
903     // Check that this subsequence does not contain an illegal instruction.
904     bool ContainsIllegal = false;
905     for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
906       unsigned Key = IntegerMapping[CurrIdx];
907       if (Key > Mapper.IllegalInstrNumber) {
908         ContainsIllegal = true;
909         break;
910       }
911     }
912 
913     // If we have an illegal instruction, we should not create an
914     // IRSimilarityCandidate for this region.
915     if (ContainsIllegal)
916       continue;
917 
918     // We are getting iterators to the instructions in this region of code
919     // by advancing the start and end indices from the start of the
920     // InstrList.
921     std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();
922     std::advance(StartIt, StartIdx);
923     std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();
924     std::advance(EndIt, EndIdx);
925 
926     CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
927   }
928 }
929 
930 void IRSimilarityCandidate::createCanonicalRelationFrom(
931     IRSimilarityCandidate &SourceCand,
932     DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
933     DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
934   assert(SourceCand.CanonNumToNumber.size() != 0 &&
935          "Base canonical relationship is empty!");
936   assert(SourceCand.NumberToCanonNum.size() != 0 &&
937          "Base canonical relationship is empty!");
938 
939   assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
940   assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
941 
942   DenseSet<unsigned> UsedGVNs;
943   // Iterate over the mappings provided from this candidate to SourceCand.  We
944   // are then able to map the GVN in this candidate to the same canonical number
945   // given to the corresponding GVN in SourceCand.
946   for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) {
947     unsigned SourceGVN = GVNMapping.first;
948 
949     assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");
950 
951     unsigned ResultGVN;
952     // We need special handling if we have more than one potential value.  This
953     // means that there are at least two GVNs that could correspond to this GVN.
954     // This could lead to potential swapping later on, so we make a decision
955     // here to ensure a one-to-one mapping.
956     if (GVNMapping.second.size() > 1) {
957       bool Found = false;
958       for (unsigned Val : GVNMapping.second) {
959         // We make sure the target value number hasn't already been reserved.
960         if (UsedGVNs.contains(Val))
961           continue;
962 
963         // We make sure that the opposite mapping is still consistent.
964         DenseMap<unsigned, DenseSet<unsigned>>::iterator It =
965             FromSourceMapping.find(Val);
966 
967         if (!It->second.contains(SourceGVN))
968           continue;
969 
970         // We pick the first item that satisfies these conditions.
971         Found = true;
972         ResultGVN = Val;
973         break;
974       }
975 
976       assert(Found && "Could not find matching value for source GVN");
977       (void)Found;
978 
979     } else
980       ResultGVN = *GVNMapping.second.begin();
981 
982     // Whatever GVN is found, we mark it as used.
983     UsedGVNs.insert(ResultGVN);
984 
985     unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);
986     CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
987     NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
988   }
989 }
990 
991 void IRSimilarityCandidate::createCanonicalMappingFor(
992     IRSimilarityCandidate &CurrCand) {
993   assert(CurrCand.CanonNumToNumber.size() == 0 &&
994          "Canonical Relationship is non-empty");
995   assert(CurrCand.NumberToCanonNum.size() == 0 &&
996          "Canonical Relationship is non-empty");
997 
998   unsigned CanonNum = 0;
999   // Iterate over the value numbers found, the order does not matter in this
1000   // case.
1001   for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
1002     CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));
1003     CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));
1004     CanonNum++;
1005   }
1006 }
1007 
1008 /// From the list of IRSimilarityCandidates, perform a comparison between each
1009 /// IRSimilarityCandidate to determine if there are overlapping
1010 /// IRInstructionData, or if they do not have the same structure.
1011 ///
1012 /// \param [in] CandsForRepSubstring - The vector containing the
1013 /// IRSimilarityCandidates.
1014 /// \param [out] StructuralGroups - the mapping of unsigned integers to vector
1015 /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
1016 /// vector are structurally similar to one another.
1017 static void findCandidateStructures(
1018     std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
1019     DenseMap<unsigned, SimilarityGroup> &StructuralGroups) {
1020   std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
1021       InnerCandEndIt;
1022 
1023   // IRSimilarityCandidates each have a structure for operand use.  It is
1024   // possible that two instances of the same subsequences have different
1025   // structure. Each type of structure found is assigned a number.  This
1026   // DenseMap maps an IRSimilarityCandidate to which type of similarity
1027   // discovered it fits within.
1028   DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;
1029 
1030   // Find the compatibility from each candidate to the others to determine
1031   // which candidates overlap and which have the same structure by mapping
1032   // each structure to a different group.
1033   bool SameStructure;
1034   bool Inserted;
1035   unsigned CurrentGroupNum = 0;
1036   unsigned OuterGroupNum;
1037   DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt;
1038   DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner;
1039   DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair;
1040 
1041   // Iterate over the candidates to determine its structural and overlapping
1042   // compatibility with other instructions
1043   DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
1044   DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
1045   for (CandIt = CandsForRepSubstring.begin(),
1046       CandEndIt = CandsForRepSubstring.end();
1047        CandIt != CandEndIt; CandIt++) {
1048 
1049     // Determine if it has an assigned structural group already.
1050     CandToGroupIt = CandToGroup.find(&*CandIt);
1051     if (CandToGroupIt == CandToGroup.end()) {
1052       // If not, we assign it one, and add it to our mapping.
1053       std::tie(CandToGroupIt, Inserted) =
1054           CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++));
1055     }
1056 
1057     // Get the structural group number from the iterator.
1058     OuterGroupNum = CandToGroupIt->second;
1059 
1060     // Check if we already have a list of IRSimilarityCandidates for the current
1061     // structural group.  Create one if one does not exist.
1062     CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
1063     if (CurrentGroupPair == StructuralGroups.end()) {
1064       IRSimilarityCandidate::createCanonicalMappingFor(*CandIt);
1065       std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
1066           std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
1067     }
1068 
1069     // Iterate over the IRSimilarityCandidates following the current
1070     // IRSimilarityCandidate in the list to determine whether the two
1071     // IRSimilarityCandidates are compatible.  This is so we do not repeat pairs
1072     // of IRSimilarityCandidates.
1073     for (InnerCandIt = std::next(CandIt),
1074         InnerCandEndIt = CandsForRepSubstring.end();
1075          InnerCandIt != InnerCandEndIt; InnerCandIt++) {
1076 
1077       // We check if the inner item has a group already, if it does, we skip it.
1078       CandToGroupItInner = CandToGroup.find(&*InnerCandIt);
1079       if (CandToGroupItInner != CandToGroup.end())
1080         continue;
1081 
1082       // Otherwise we determine if they have the same structure and add it to
1083       // vector if they match.
1084       ValueNumberMappingA.clear();
1085       ValueNumberMappingB.clear();
1086       SameStructure = IRSimilarityCandidate::compareStructure(
1087           *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
1088       if (!SameStructure)
1089         continue;
1090 
1091       InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
1092                                                ValueNumberMappingB);
1093       CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1094       CurrentGroupPair->second.push_back(*InnerCandIt);
1095     }
1096   }
1097 }
1098 
1099 void IRSimilarityIdentifier::findCandidates(
1100     std::vector<IRInstructionData *> &InstrList,
1101     std::vector<unsigned> &IntegerMapping) {
1102   SuffixTree ST(IntegerMapping);
1103 
1104   std::vector<IRSimilarityCandidate> CandsForRepSubstring;
1105   std::vector<SimilarityGroup> NewCandidateGroups;
1106 
1107   DenseMap<unsigned, SimilarityGroup> StructuralGroups;
1108 
1109   // Iterate over the subsequences found by the Suffix Tree to create
1110   // IRSimilarityCandidates for each repeated subsequence and determine which
1111   // instances are structurally similar to one another.
1112   for (SuffixTree::RepeatedSubstring &RS : ST) {
1113     createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
1114                                    CandsForRepSubstring);
1115 
1116     if (CandsForRepSubstring.size() < 2)
1117       continue;
1118 
1119     findCandidateStructures(CandsForRepSubstring, StructuralGroups);
1120     for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups)
1121       // We only add the group if it contains more than one
1122       // IRSimilarityCandidate.  If there is only one, that means there is no
1123       // other repeated subsequence with the same structure.
1124       if (Group.second.size() > 1)
1125         SimilarityCandidates->push_back(Group.second);
1126 
1127     CandsForRepSubstring.clear();
1128     StructuralGroups.clear();
1129     NewCandidateGroups.clear();
1130   }
1131 }
1132 
1133 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
1134     ArrayRef<std::unique_ptr<Module>> Modules) {
1135   resetSimilarityCandidates();
1136 
1137   std::vector<IRInstructionData *> InstrList;
1138   std::vector<unsigned> IntegerMapping;
1139   Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1140   Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1141   Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1142 
1143   populateMapper(Modules, InstrList, IntegerMapping);
1144   findCandidates(InstrList, IntegerMapping);
1145 
1146   return SimilarityCandidates.getValue();
1147 }
1148 
1149 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
1150   resetSimilarityCandidates();
1151   Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1152   Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1153   Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1154 
1155   std::vector<IRInstructionData *> InstrList;
1156   std::vector<unsigned> IntegerMapping;
1157 
1158   populateMapper(M, InstrList, IntegerMapping);
1159   findCandidates(InstrList, IntegerMapping);
1160 
1161   return SimilarityCandidates.getValue();
1162 }
1163 
1164 INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier",
1165                 "ir-similarity-identifier", false, true)
1166 
1167 IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
1168     : ModulePass(ID) {
1169   initializeIRSimilarityIdentifierWrapperPassPass(
1170       *PassRegistry::getPassRegistry());
1171 }
1172 
1173 bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
1174   IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
1175                                         MatchCallsByName));
1176   return false;
1177 }
1178 
1179 bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) {
1180   IRSI.reset();
1181   return false;
1182 }
1183 
1184 bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) {
1185   IRSI->findSimilarity(M);
1186   return false;
1187 }
1188 
1189 AnalysisKey IRSimilarityAnalysis::Key;
1190 IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
1191                                                  ModuleAnalysisManager &) {
1192 
1193   auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
1194                                      MatchCallsByName);
1195   IRSI.findSimilarity(M);
1196   return IRSI;
1197 }
1198 
1199 PreservedAnalyses
1200 IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) {
1201   IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M);
1202   Optional<SimilarityGroupList> &SimilarityCandidatesOpt = IRSI.getSimilarity();
1203 
1204   for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
1205     OS << CandVec.size() << " candidates of length "
1206        << CandVec.begin()->getLength() << ".  Found in: \n";
1207     for (IRSimilarityCandidate &Cand : CandVec) {
1208       OS << "  Function: " << Cand.front()->Inst->getFunction()->getName().str()
1209          << ", Basic Block: ";
1210       if (Cand.front()->Inst->getParent()->getName().str() == "")
1211         OS << "(unnamed)";
1212       else
1213         OS << Cand.front()->Inst->getParent()->getName().str();
1214       OS << "\n    Start Instruction: ";
1215       Cand.frontInstruction()->print(OS);
1216       OS << "\n      End Instruction: ";
1217       Cand.backInstruction()->print(OS);
1218       OS << "\n";
1219     }
1220   }
1221 
1222   return PreservedAnalyses::all();
1223 }
1224 
1225 char IRSimilarityIdentifierWrapperPass::ID = 0;
1226