1 //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // \file 10 // Implementation file for the IRSimilarityIdentifier for identifying 11 // similarities in IR including the IRInstructionMapper. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Analysis/IRSimilarityIdentifier.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/IR/Intrinsics.h" 18 #include "llvm/IR/Operator.h" 19 #include "llvm/IR/User.h" 20 #include "llvm/InitializePasses.h" 21 #include "llvm/Support/SuffixTree.h" 22 23 using namespace llvm; 24 using namespace IRSimilarity; 25 26 namespace llvm { 27 cl::opt<bool> 28 DisableBranches("no-ir-sim-branch-matching", cl::init(false), 29 cl::ReallyHidden, 30 cl::desc("disable similarity matching, and outlining, " 31 "across branches for debugging purposes.")); 32 33 cl::opt<bool> 34 DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), 35 cl::ReallyHidden, 36 cl::desc("disable outlining indirect calls.")); 37 38 cl::opt<bool> 39 MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden, 40 cl::desc("only allow matching call instructions if the " 41 "name and type signature match.")); 42 } // namespace llvm 43 44 IRInstructionData::IRInstructionData(Instruction &I, bool Legality, 45 IRInstructionDataList &IDList) 46 : Inst(&I), Legal(Legality), IDL(&IDList) { 47 initializeInstruction(); 48 } 49 50 void IRInstructionData::initializeInstruction() { 51 // We check for whether we have a comparison instruction. If it is, we 52 // find the "less than" version of the predicate for consistency for 53 // comparison instructions throught the program. 54 if (CmpInst *C = dyn_cast<CmpInst>(Inst)) { 55 CmpInst::Predicate Predicate = predicateForConsistency(C); 56 if (Predicate != C->getPredicate()) 57 RevisedPredicate = Predicate; 58 } 59 60 // Here we collect the operands and their types for determining whether 61 // the structure of the operand use matches between two different candidates. 62 for (Use &OI : Inst->operands()) { 63 if (isa<CmpInst>(Inst) && RevisedPredicate.hasValue()) { 64 // If we have a CmpInst where the predicate is reversed, it means the 65 // operands must be reversed as well. 66 OperVals.insert(OperVals.begin(), OI.get()); 67 continue; 68 } 69 70 OperVals.push_back(OI.get()); 71 } 72 73 // We capture the incoming BasicBlocks as values as well as the incoming 74 // Values in order to check for structural similarity. 75 if (PHINode *PN = dyn_cast<PHINode>(Inst)) 76 for (BasicBlock *BB : PN->blocks()) 77 OperVals.push_back(BB); 78 } 79 80 IRInstructionData::IRInstructionData(IRInstructionDataList &IDList) 81 : IDL(&IDList) {} 82 83 void IRInstructionData::setBranchSuccessors( 84 DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) { 85 assert(isa<BranchInst>(Inst) && "Instruction must be branch"); 86 87 BranchInst *BI = cast<BranchInst>(Inst); 88 DenseMap<BasicBlock *, unsigned>::iterator BBNumIt; 89 90 BBNumIt = BasicBlockToInteger.find(BI->getParent()); 91 assert(BBNumIt != BasicBlockToInteger.end() && 92 "Could not find location for BasicBlock!"); 93 94 int CurrentBlockNumber = static_cast<int>(BBNumIt->second); 95 96 for (BasicBlock *Successor : BI->successors()) { 97 BBNumIt = BasicBlockToInteger.find(Successor); 98 assert(BBNumIt != BasicBlockToInteger.end() && 99 "Could not find number for BasicBlock!"); 100 int OtherBlockNumber = static_cast<int>(BBNumIt->second); 101 102 int Relative = OtherBlockNumber - CurrentBlockNumber; 103 RelativeBlockLocations.push_back(Relative); 104 } 105 } 106 107 void IRInstructionData::setCalleeName(bool MatchByName) { 108 CallInst *CI = dyn_cast<CallInst>(Inst); 109 assert(CI && "Instruction must be call"); 110 111 CalleeName = ""; 112 if (!CI->isIndirectCall() && MatchByName) 113 CalleeName = CI->getCalledFunction()->getName().str(); 114 } 115 116 void IRInstructionData::setPHIPredecessors( 117 DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) { 118 assert(isa<PHINode>(Inst) && "Instruction must be phi node"); 119 120 PHINode *PN = cast<PHINode>(Inst); 121 DenseMap<BasicBlock *, unsigned>::iterator BBNumIt; 122 123 BBNumIt = BasicBlockToInteger.find(PN->getParent()); 124 assert(BBNumIt != BasicBlockToInteger.end() && 125 "Could not find location for BasicBlock!"); 126 127 int CurrentBlockNumber = static_cast<int>(BBNumIt->second); 128 129 // Convert the incoming blocks of the PHINode to an integer value, based on 130 // the relative distances between the current block and the incoming block. 131 for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) { 132 BasicBlock *Incoming = PN->getIncomingBlock(Idx); 133 BBNumIt = BasicBlockToInteger.find(Incoming); 134 assert(BBNumIt != BasicBlockToInteger.end() && 135 "Could not find number for BasicBlock!"); 136 int OtherBlockNumber = static_cast<int>(BBNumIt->second); 137 138 int Relative = OtherBlockNumber - CurrentBlockNumber; 139 RelativeBlockLocations.push_back(Relative); 140 RelativeBlockLocations.push_back(Relative); 141 } 142 } 143 144 CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) { 145 switch (CI->getPredicate()) { 146 case CmpInst::FCMP_OGT: 147 case CmpInst::FCMP_UGT: 148 case CmpInst::FCMP_OGE: 149 case CmpInst::FCMP_UGE: 150 case CmpInst::ICMP_SGT: 151 case CmpInst::ICMP_UGT: 152 case CmpInst::ICMP_SGE: 153 case CmpInst::ICMP_UGE: 154 return CI->getSwappedPredicate(); 155 default: 156 return CI->getPredicate(); 157 } 158 } 159 160 CmpInst::Predicate IRInstructionData::getPredicate() const { 161 assert(isa<CmpInst>(Inst) && 162 "Can only get a predicate from a compare instruction"); 163 164 if (RevisedPredicate.hasValue()) 165 return RevisedPredicate.getValue(); 166 167 return cast<CmpInst>(Inst)->getPredicate(); 168 } 169 170 StringRef IRInstructionData::getCalleeName() const { 171 assert(isa<CallInst>(Inst) && 172 "Can only get a name from a call instruction"); 173 174 assert(CalleeName.hasValue() && "CalleeName has not been set"); 175 176 return *CalleeName; 177 } 178 179 bool IRSimilarity::isClose(const IRInstructionData &A, 180 const IRInstructionData &B) { 181 182 if (!A.Legal || !B.Legal) 183 return false; 184 185 // Check if we are performing the same sort of operation on the same types 186 // but not on the same values. 187 if (!A.Inst->isSameOperationAs(B.Inst)) { 188 // If there is a predicate, this means that either there is a swapped 189 // predicate, or that the types are different, we want to make sure that 190 // the predicates are equivalent via swapping. 191 if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) { 192 193 if (A.getPredicate() != B.getPredicate()) 194 return false; 195 196 // If the predicates are the same via swap, make sure that the types are 197 // still the same. 198 auto ZippedTypes = zip(A.OperVals, B.OperVals); 199 200 return all_of( 201 ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) { 202 return std::get<0>(R)->getType() == std::get<1>(R)->getType(); 203 }); 204 } 205 206 return false; 207 } 208 209 // Since any GEP Instruction operands after the first operand cannot be 210 // defined by a register, we must make sure that the operands after the first 211 // are the same in the two instructions 212 if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) { 213 auto *OtherGEP = cast<GetElementPtrInst>(B.Inst); 214 215 // If the instructions do not have the same inbounds restrictions, we do 216 // not consider them the same. 217 if (GEP->isInBounds() != OtherGEP->isInBounds()) 218 return false; 219 220 auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices()); 221 222 // We increment here since we do not care about the first instruction, 223 // we only care about the following operands since they must be the 224 // exact same to be considered similar. 225 return all_of(drop_begin(ZippedOperands), 226 [](std::tuple<llvm::Use &, llvm::Use &> R) { 227 return std::get<0>(R) == std::get<1>(R); 228 }); 229 } 230 231 // If the instructions are functions calls, we make sure that the function 232 // name is the same. We already know that the types are since is 233 // isSameOperationAs is true. 234 if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) { 235 if (A.getCalleeName().str().compare(B.getCalleeName().str()) != 0) 236 return false; 237 } 238 239 if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) && 240 A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size()) 241 return false; 242 243 return true; 244 } 245 246 // TODO: This is the same as the MachineOutliner, and should be consolidated 247 // into the same interface. 248 void IRInstructionMapper::convertToUnsignedVec( 249 BasicBlock &BB, std::vector<IRInstructionData *> &InstrList, 250 std::vector<unsigned> &IntegerMapping) { 251 BasicBlock::iterator It = BB.begin(); 252 253 std::vector<unsigned> IntegerMappingForBB; 254 std::vector<IRInstructionData *> InstrListForBB; 255 256 for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { 257 switch (InstClassifier.visit(*It)) { 258 case InstrType::Legal: 259 mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 260 break; 261 case InstrType::Illegal: 262 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 263 break; 264 case InstrType::Invisible: 265 AddedIllegalLastTime = false; 266 break; 267 } 268 } 269 270 if (HaveLegalRange) { 271 if (AddedIllegalLastTime) 272 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); 273 for (IRInstructionData *ID : InstrListForBB) 274 this->IDL->push_back(*ID); 275 llvm::append_range(InstrList, InstrListForBB); 276 llvm::append_range(IntegerMapping, IntegerMappingForBB); 277 } 278 } 279 280 // TODO: This is the same as the MachineOutliner, and should be consolidated 281 // into the same interface. 282 unsigned IRInstructionMapper::mapToLegalUnsigned( 283 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 284 std::vector<IRInstructionData *> &InstrListForBB) { 285 // We added something legal, so we should unset the AddedLegalLastTime 286 // flag. 287 AddedIllegalLastTime = false; 288 289 // If we have at least two adjacent legal instructions (which may have 290 // invisible instructions in between), remember that. 291 if (CanCombineWithPrevInstr) 292 HaveLegalRange = true; 293 CanCombineWithPrevInstr = true; 294 295 // Get the integer for this instruction or give it the current 296 // LegalInstrNumber. 297 IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL); 298 InstrListForBB.push_back(ID); 299 300 if (isa<BranchInst>(*It)) 301 ID->setBranchSuccessors(BasicBlockToInteger); 302 303 if (isa<CallInst>(*It)) 304 ID->setCalleeName(EnableMatchCallsByName); 305 306 if (isa<PHINode>(*It)) 307 ID->setPHIPredecessors(BasicBlockToInteger); 308 309 // Add to the instruction list 310 bool WasInserted; 311 DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator 312 ResultIt; 313 std::tie(ResultIt, WasInserted) = 314 InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber)); 315 unsigned INumber = ResultIt->second; 316 317 // There was an insertion. 318 if (WasInserted) 319 LegalInstrNumber++; 320 321 IntegerMappingForBB.push_back(INumber); 322 323 // Make sure we don't overflow or use any integers reserved by the DenseMap. 324 assert(LegalInstrNumber < IllegalInstrNumber && 325 "Instruction mapping overflow!"); 326 327 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 328 "Tried to assign DenseMap tombstone or empty key to instruction."); 329 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 330 "Tried to assign DenseMap tombstone or empty key to instruction."); 331 332 return INumber; 333 } 334 335 IRInstructionData * 336 IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality, 337 IRInstructionDataList &IDL) { 338 return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL); 339 } 340 341 IRInstructionData * 342 IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) { 343 return new (InstDataAllocator->Allocate()) IRInstructionData(IDL); 344 } 345 346 IRInstructionDataList * 347 IRInstructionMapper::allocateIRInstructionDataList() { 348 return new (IDLAllocator->Allocate()) IRInstructionDataList(); 349 } 350 351 // TODO: This is the same as the MachineOutliner, and should be consolidated 352 // into the same interface. 353 unsigned IRInstructionMapper::mapToIllegalUnsigned( 354 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 355 std::vector<IRInstructionData *> &InstrListForBB, bool End) { 356 // Can't combine an illegal instruction. Set the flag. 357 CanCombineWithPrevInstr = false; 358 359 // Only add one illegal number per range of legal numbers. 360 if (AddedIllegalLastTime) 361 return IllegalInstrNumber; 362 363 IRInstructionData *ID = nullptr; 364 if (!End) 365 ID = allocateIRInstructionData(*It, false, *IDL); 366 else 367 ID = allocateIRInstructionData(*IDL); 368 InstrListForBB.push_back(ID); 369 370 // Remember that we added an illegal number last time. 371 AddedIllegalLastTime = true; 372 unsigned INumber = IllegalInstrNumber; 373 IntegerMappingForBB.push_back(IllegalInstrNumber--); 374 375 assert(LegalInstrNumber < IllegalInstrNumber && 376 "Instruction mapping overflow!"); 377 378 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 379 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 380 381 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 382 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 383 384 return INumber; 385 } 386 387 IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len, 388 IRInstructionData *FirstInstIt, 389 IRInstructionData *LastInstIt) 390 : StartIdx(StartIdx), Len(Len) { 391 392 assert(FirstInstIt != nullptr && "Instruction is nullptr!"); 393 assert(LastInstIt != nullptr && "Instruction is nullptr!"); 394 assert(StartIdx + Len > StartIdx && 395 "Overflow for IRSimilarityCandidate range?"); 396 assert(Len - 1 == static_cast<unsigned>(std::distance( 397 iterator(FirstInstIt), iterator(LastInstIt))) && 398 "Length of the first and last IRInstructionData do not match the " 399 "given length"); 400 401 // We iterate over the given instructions, and map each unique value 402 // to a unique number in the IRSimilarityCandidate ValueToNumber and 403 // NumberToValue maps. A constant get its own value globally, the individual 404 // uses of the constants are not considered to be unique. 405 // 406 // IR: Mapping Added: 407 // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 408 // %add2 = add i32 %a, %1 %add2 -> 4 409 // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 410 // 411 // when replace with global values, starting from 1, would be 412 // 413 // 3 = add i32 1, 2 414 // 4 = add i32 1, 3 415 // 6 = add i32 5, 2 416 unsigned LocalValNumber = 1; 417 IRInstructionDataList::iterator ID = iterator(*FirstInstIt); 418 for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) { 419 // Map the operand values to an unsigned integer if it does not already 420 // have an unsigned integer assigned to it. 421 for (Value *Arg : ID->OperVals) 422 if (ValueToNumber.find(Arg) == ValueToNumber.end()) { 423 ValueToNumber.try_emplace(Arg, LocalValNumber); 424 NumberToValue.try_emplace(LocalValNumber, Arg); 425 LocalValNumber++; 426 } 427 428 // Mapping the instructions to an unsigned integer if it is not already 429 // exist in the mapping. 430 if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) { 431 ValueToNumber.try_emplace(ID->Inst, LocalValNumber); 432 NumberToValue.try_emplace(LocalValNumber, ID->Inst); 433 LocalValNumber++; 434 } 435 } 436 437 // Setting the first and last instruction data pointers for the candidate. If 438 // we got through the entire for loop without hitting an assert, we know 439 // that both of these instructions are not nullptrs. 440 FirstInst = FirstInstIt; 441 LastInst = LastInstIt; 442 } 443 444 bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, 445 const IRSimilarityCandidate &B) { 446 if (A.getLength() != B.getLength()) 447 return false; 448 449 auto InstrDataForBoth = 450 zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end())); 451 452 return all_of(InstrDataForBoth, 453 [](std::tuple<IRInstructionData &, IRInstructionData &> R) { 454 IRInstructionData &A = std::get<0>(R); 455 IRInstructionData &B = std::get<1>(R); 456 if (!A.Legal || !B.Legal) 457 return false; 458 return isClose(A, B); 459 }); 460 } 461 462 /// Determine if one or more of the assigned global value numbers for the 463 /// operands in \p TargetValueNumbers is in the current mapping set for operand 464 /// numbers in \p SourceOperands. The set of possible corresponding global 465 /// value numbers are replaced with the most recent version of compatible 466 /// values. 467 /// 468 /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global 469 /// value number for the source IRInstructionCandidate. 470 /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source 471 /// IRSimilarityCandidate global value numbers to a set of possible numbers in 472 /// the target. 473 /// \param [in] SourceOperands - The operands in the original 474 /// IRSimilarityCandidate in the current instruction. 475 /// \param [in] TargetValueNumbers - The global value numbers of the operands in 476 /// the corresponding Instruction in the other IRSimilarityCandidate. 477 /// \returns true if there exists a possible mapping between the source 478 /// Instruction operands and the target Instruction operands, and false if not. 479 static bool checkNumberingAndReplaceCommutative( 480 const DenseMap<Value *, unsigned> &SourceValueToNumberMapping, 481 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 482 ArrayRef<Value *> &SourceOperands, 483 DenseSet<unsigned> &TargetValueNumbers){ 484 485 DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 486 487 unsigned ArgVal; 488 bool WasInserted; 489 490 // Iterate over the operands in the source IRSimilarityCandidate to determine 491 // whether there exists an operand in the other IRSimilarityCandidate that 492 // creates a valid mapping of Value to Value between the 493 // IRSimilarityCaniddates. 494 for (Value *V : SourceOperands) { 495 ArgVal = SourceValueToNumberMapping.find(V)->second; 496 497 std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert( 498 std::make_pair(ArgVal, TargetValueNumbers)); 499 500 // Instead of finding a current mapping, we inserted a set. This means a 501 // mapping did not exist for the source Instruction operand, it has no 502 // current constraints we need to check. 503 if (WasInserted) 504 continue; 505 506 // If a mapping already exists for the source operand to the values in the 507 // other IRSimilarityCandidate we need to iterate over the items in other 508 // IRSimilarityCandidate's Instruction to determine whether there is a valid 509 // mapping of Value to Value. 510 DenseSet<unsigned> NewSet; 511 for (unsigned &Curr : ValueMappingIt->second) 512 // If we can find the value in the mapping, we add it to the new set. 513 if (TargetValueNumbers.contains(Curr)) 514 NewSet.insert(Curr); 515 516 // If we could not find a Value, return 0. 517 if (NewSet.empty()) 518 return false; 519 520 // Otherwise replace the old mapping with the newly constructed one. 521 if (NewSet.size() != ValueMappingIt->second.size()) 522 ValueMappingIt->second.swap(NewSet); 523 524 // We have reached no conclusions about the mapping, and cannot remove 525 // any items from the other operands, so we move to check the next operand. 526 if (ValueMappingIt->second.size() != 1) 527 continue; 528 529 530 unsigned ValToRemove = *ValueMappingIt->second.begin(); 531 // When there is only one item left in the mapping for and operand, remove 532 // the value from the other operands. If it results in there being no 533 // mapping, return false, it means the mapping is wrong 534 for (Value *InnerV : SourceOperands) { 535 if (V == InnerV) 536 continue; 537 538 unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second; 539 ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal); 540 if (ValueMappingIt == CurrentSrcTgtNumberMapping.end()) 541 continue; 542 543 ValueMappingIt->second.erase(ValToRemove); 544 if (ValueMappingIt->second.empty()) 545 return false; 546 } 547 } 548 549 return true; 550 } 551 552 /// Determine if operand number \p TargetArgVal is in the current mapping set 553 /// for operand number \p SourceArgVal. 554 /// 555 /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global 556 /// value numbers from source IRSimilarityCandidate to target 557 /// IRSimilarityCandidate. 558 /// \param [in] SourceArgVal The global value number for an operand in the 559 /// in the original candidate. 560 /// \param [in] TargetArgVal The global value number for the corresponding 561 /// operand in the other candidate. 562 /// \returns True if there exists a mapping and false if not. 563 bool checkNumberingAndReplace( 564 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 565 unsigned SourceArgVal, unsigned TargetArgVal) { 566 // We are given two unsigned integers representing the global values of 567 // the operands in different IRSimilarityCandidates and a current mapping 568 // between the two. 569 // 570 // Source Operand GVN: 1 571 // Target Operand GVN: 2 572 // CurrentMapping: {1: {1, 2}} 573 // 574 // Since we have mapping, and the target operand is contained in the set, we 575 // update it to: 576 // CurrentMapping: {1: {2}} 577 // and can return true. But, if the mapping was 578 // CurrentMapping: {1: {3}} 579 // we would return false. 580 581 bool WasInserted; 582 DenseMap<unsigned, DenseSet<unsigned>>::iterator Val; 583 584 std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert( 585 std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal}))); 586 587 // If we created a new mapping, then we are done. 588 if (WasInserted) 589 return true; 590 591 // If there is more than one option in the mapping set, and the target value 592 // is included in the mapping set replace that set with one that only includes 593 // the target value, as it is the only valid mapping via the non commutative 594 // instruction. 595 596 DenseSet<unsigned> &TargetSet = Val->second; 597 if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) { 598 TargetSet.clear(); 599 TargetSet.insert(TargetArgVal); 600 return true; 601 } 602 603 // Return true if we can find the value in the set. 604 return TargetSet.contains(TargetArgVal); 605 } 606 607 bool IRSimilarityCandidate::compareNonCommutativeOperandMapping( 608 OperandMapping A, OperandMapping B) { 609 // Iterators to keep track of where we are in the operands for each 610 // Instruction. 611 ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 612 ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 613 unsigned OperandLength = A.OperVals.size(); 614 615 // For each operand, get the value numbering and ensure it is consistent. 616 for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) { 617 unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second; 618 unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second; 619 620 // Attempt to add a set with only the target value. If there is no mapping 621 // we can create it here. 622 // 623 // For an instruction like a subtraction: 624 // IRSimilarityCandidateA: IRSimilarityCandidateB: 625 // %resultA = sub %a, %b %resultB = sub %d, %e 626 // 627 // We map %a -> %d and %b -> %e. 628 // 629 // And check to see whether their mapping is consistent in 630 // checkNumberingAndReplace. 631 632 if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB)) 633 return false; 634 635 if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA)) 636 return false; 637 } 638 return true; 639 } 640 641 bool IRSimilarityCandidate::compareCommutativeOperandMapping( 642 OperandMapping A, OperandMapping B) { 643 DenseSet<unsigned> ValueNumbersA; 644 DenseSet<unsigned> ValueNumbersB; 645 646 ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 647 ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 648 unsigned OperandLength = A.OperVals.size(); 649 650 // Find the value number sets for the operands. 651 for (unsigned Idx = 0; Idx < OperandLength; 652 Idx++, VItA++, VItB++) { 653 ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second); 654 ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second); 655 } 656 657 // Iterate over the operands in the first IRSimilarityCandidate and make sure 658 // there exists a possible mapping with the operands in the second 659 // IRSimilarityCandidate. 660 if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber, 661 A.ValueNumberMapping, A.OperVals, 662 ValueNumbersB)) 663 return false; 664 665 // Iterate over the operands in the second IRSimilarityCandidate and make sure 666 // there exists a possible mapping with the operands in the first 667 // IRSimilarityCandidate. 668 if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber, 669 B.ValueNumberMapping, B.OperVals, 670 ValueNumbersA)) 671 return false; 672 673 return true; 674 } 675 676 bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A, 677 RelativeLocMapping B) { 678 // Get the basic blocks the label refers to. 679 BasicBlock *ABB = static_cast<BasicBlock *>(A.OperVal); 680 BasicBlock *BBB = static_cast<BasicBlock *>(B.OperVal); 681 682 // Get the basic blocks contained in each region. 683 DenseSet<BasicBlock *> BasicBlockA; 684 DenseSet<BasicBlock *> BasicBlockB; 685 A.IRSC.getBasicBlocks(BasicBlockA); 686 B.IRSC.getBasicBlocks(BasicBlockB); 687 688 // Determine if the block is contained in the region. 689 bool AContained = BasicBlockA.contains(ABB); 690 bool BContained = BasicBlockB.contains(BBB); 691 692 // Both blocks need to be contained in the region, or both need to be outside 693 // the reigon. 694 if (AContained != BContained) 695 return false; 696 697 // If both are contained, then we need to make sure that the relative 698 // distance to the target blocks are the same. 699 if (AContained) 700 return A.RelativeLocation == B.RelativeLocation; 701 return true; 702 } 703 704 bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A, 705 const IRSimilarityCandidate &B) { 706 DenseMap<unsigned, DenseSet<unsigned>> MappingA; 707 DenseMap<unsigned, DenseSet<unsigned>> MappingB; 708 return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB); 709 } 710 711 typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &, 712 SmallVector<int, 4> &, ArrayRef<Value *> &, 713 ArrayRef<Value *> &> 714 ZippedRelativeLocationsT; 715 716 bool IRSimilarityCandidate::compareStructure( 717 const IRSimilarityCandidate &A, const IRSimilarityCandidate &B, 718 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA, 719 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) { 720 if (A.getLength() != B.getLength()) 721 return false; 722 723 if (A.ValueToNumber.size() != B.ValueToNumber.size()) 724 return false; 725 726 iterator ItA = A.begin(); 727 iterator ItB = B.begin(); 728 729 // These ValueNumber Mapping sets create a create a mapping between the values 730 // in one candidate to values in the other candidate. If we create a set with 731 // one element, and that same element maps to the original element in the 732 // candidate we have a good mapping. 733 DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 734 735 736 // Iterate over the instructions contained in each candidate 737 unsigned SectionLength = A.getStartIdx() + A.getLength(); 738 for (unsigned Loc = A.getStartIdx(); Loc < SectionLength; 739 ItA++, ItB++, Loc++) { 740 // Make sure the instructions are similar to one another. 741 if (!isClose(*ItA, *ItB)) 742 return false; 743 744 Instruction *IA = ItA->Inst; 745 Instruction *IB = ItB->Inst; 746 747 if (!ItA->Legal || !ItB->Legal) 748 return false; 749 750 // Get the operand sets for the instructions. 751 ArrayRef<Value *> OperValsA = ItA->OperVals; 752 ArrayRef<Value *> OperValsB = ItB->OperVals; 753 754 unsigned InstValA = A.ValueToNumber.find(IA)->second; 755 unsigned InstValB = B.ValueToNumber.find(IB)->second; 756 757 bool WasInserted; 758 // Ensure that the mappings for the instructions exists. 759 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert( 760 std::make_pair(InstValA, DenseSet<unsigned>({InstValB}))); 761 if (!WasInserted && !ValueMappingIt->second.contains(InstValB)) 762 return false; 763 764 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert( 765 std::make_pair(InstValB, DenseSet<unsigned>({InstValA}))); 766 if (!WasInserted && !ValueMappingIt->second.contains(InstValA)) 767 return false; 768 769 // We have different paths for commutative instructions and non-commutative 770 // instructions since commutative instructions could allow multiple mappings 771 // to certain values. 772 if (IA->isCommutative() && !isa<FPMathOperator>(IA)) { 773 if (!compareCommutativeOperandMapping( 774 {A, OperValsA, ValueNumberMappingA}, 775 {B, OperValsB, ValueNumberMappingB})) 776 return false; 777 continue; 778 } 779 780 // Handle the non-commutative cases. 781 if (!compareNonCommutativeOperandMapping( 782 {A, OperValsA, ValueNumberMappingA}, 783 {B, OperValsB, ValueNumberMappingB})) 784 return false; 785 786 // Here we check that between two corresponding instructions, 787 // when referring to a basic block in the same region, the 788 // relative locations are the same. And, that the instructions refer to 789 // basic blocks outside the region in the same corresponding locations. 790 791 // We are able to make the assumption about blocks outside of the region 792 // since the target block labels are considered values and will follow the 793 // same number matching that we defined for the other instructions in the 794 // region. So, at this point, in each location we target a specific block 795 // outside the region, we are targeting a corresponding block in each 796 // analagous location in the region we are comparing to. 797 if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) && 798 !(isa<PHINode>(IA) && isa<PHINode>(IB))) 799 continue; 800 801 SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations; 802 SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations; 803 if (RelBlockLocsA.size() != RelBlockLocsB.size() && 804 OperValsA.size() != OperValsB.size()) 805 return false; 806 807 ZippedRelativeLocationsT ZippedRelativeLocations = 808 zip(RelBlockLocsA, RelBlockLocsB, OperValsA, OperValsB); 809 if (any_of(ZippedRelativeLocations, 810 [&A, &B](std::tuple<int, int, Value *, Value *> R) { 811 return !checkRelativeLocations( 812 {A, std::get<0>(R), std::get<2>(R)}, 813 {B, std::get<1>(R), std::get<3>(R)}); 814 })) 815 return false; 816 } 817 return true; 818 } 819 820 bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A, 821 const IRSimilarityCandidate &B) { 822 auto DoesOverlap = [](const IRSimilarityCandidate &X, 823 const IRSimilarityCandidate &Y) { 824 // Check: 825 // XXXXXX X starts before Y ends 826 // YYYYYYY Y starts after X starts 827 return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx; 828 }; 829 830 return DoesOverlap(A, B) || DoesOverlap(B, A); 831 } 832 833 void IRSimilarityIdentifier::populateMapper( 834 Module &M, std::vector<IRInstructionData *> &InstrList, 835 std::vector<unsigned> &IntegerMapping) { 836 837 std::vector<IRInstructionData *> InstrListForModule; 838 std::vector<unsigned> IntegerMappingForModule; 839 // Iterate over the functions in the module to map each Instruction in each 840 // BasicBlock to an unsigned integer. 841 Mapper.initializeForBBs(M); 842 843 for (Function &F : M) { 844 845 if (F.empty()) 846 continue; 847 848 for (BasicBlock &BB : F) { 849 850 // BB has potential to have similarity since it has a size greater than 2 851 // and can therefore match other regions greater than 2. Map it to a list 852 // of unsigned integers. 853 Mapper.convertToUnsignedVec(BB, InstrListForModule, 854 IntegerMappingForModule); 855 } 856 857 BasicBlock::iterator It = F.begin()->end(); 858 Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule, 859 true); 860 if (InstrListForModule.size() > 0) 861 Mapper.IDL->push_back(*InstrListForModule.back()); 862 } 863 864 // Insert the InstrListForModule at the end of the overall InstrList so that 865 // we can have a long InstrList for the entire set of Modules being analyzed. 866 llvm::append_range(InstrList, InstrListForModule); 867 // Do the same as above, but for IntegerMapping. 868 llvm::append_range(IntegerMapping, IntegerMappingForModule); 869 } 870 871 void IRSimilarityIdentifier::populateMapper( 872 ArrayRef<std::unique_ptr<Module>> &Modules, 873 std::vector<IRInstructionData *> &InstrList, 874 std::vector<unsigned> &IntegerMapping) { 875 876 // Iterate over, and map the instructions in each module. 877 for (const std::unique_ptr<Module> &M : Modules) 878 populateMapper(*M, InstrList, IntegerMapping); 879 } 880 881 /// From a repeated subsequence, find all the different instances of the 882 /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from 883 /// the IRInstructionData in subsequence. 884 /// 885 /// \param [in] Mapper - The instruction mapper for basic correctness checks. 886 /// \param [in] InstrList - The vector that holds the instruction data. 887 /// \param [in] IntegerMapping - The vector that holds the mapped integers. 888 /// \param [out] CandsForRepSubstring - The vector to store the generated 889 /// IRSimilarityCandidates. 890 static void createCandidatesFromSuffixTree( 891 const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList, 892 std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS, 893 std::vector<IRSimilarityCandidate> &CandsForRepSubstring) { 894 895 unsigned StringLen = RS.Length; 896 if (StringLen < 2) 897 return; 898 899 // Create an IRSimilarityCandidate for instance of this subsequence \p RS. 900 for (const unsigned &StartIdx : RS.StartIndices) { 901 unsigned EndIdx = StartIdx + StringLen - 1; 902 903 // Check that this subsequence does not contain an illegal instruction. 904 bool ContainsIllegal = false; 905 for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) { 906 unsigned Key = IntegerMapping[CurrIdx]; 907 if (Key > Mapper.IllegalInstrNumber) { 908 ContainsIllegal = true; 909 break; 910 } 911 } 912 913 // If we have an illegal instruction, we should not create an 914 // IRSimilarityCandidate for this region. 915 if (ContainsIllegal) 916 continue; 917 918 // We are getting iterators to the instructions in this region of code 919 // by advancing the start and end indices from the start of the 920 // InstrList. 921 std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin(); 922 std::advance(StartIt, StartIdx); 923 std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin(); 924 std::advance(EndIt, EndIdx); 925 926 CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt); 927 } 928 } 929 930 void IRSimilarityCandidate::createCanonicalRelationFrom( 931 IRSimilarityCandidate &SourceCand, 932 DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping, 933 DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) { 934 assert(SourceCand.CanonNumToNumber.size() != 0 && 935 "Base canonical relationship is empty!"); 936 assert(SourceCand.NumberToCanonNum.size() != 0 && 937 "Base canonical relationship is empty!"); 938 939 assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty"); 940 assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty"); 941 942 DenseSet<unsigned> UsedGVNs; 943 // Iterate over the mappings provided from this candidate to SourceCand. We 944 // are then able to map the GVN in this candidate to the same canonical number 945 // given to the corresponding GVN in SourceCand. 946 for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) { 947 unsigned SourceGVN = GVNMapping.first; 948 949 assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!"); 950 951 unsigned ResultGVN; 952 // We need special handling if we have more than one potential value. This 953 // means that there are at least two GVNs that could correspond to this GVN. 954 // This could lead to potential swapping later on, so we make a decision 955 // here to ensure a one-to-one mapping. 956 if (GVNMapping.second.size() > 1) { 957 bool Found = false; 958 for (unsigned Val : GVNMapping.second) { 959 // We make sure the target value number hasn't already been reserved. 960 if (UsedGVNs.contains(Val)) 961 continue; 962 963 // We make sure that the opposite mapping is still consistent. 964 DenseMap<unsigned, DenseSet<unsigned>>::iterator It = 965 FromSourceMapping.find(Val); 966 967 if (!It->second.contains(SourceGVN)) 968 continue; 969 970 // We pick the first item that satisfies these conditions. 971 Found = true; 972 ResultGVN = Val; 973 break; 974 } 975 976 assert(Found && "Could not find matching value for source GVN"); 977 (void)Found; 978 979 } else 980 ResultGVN = *GVNMapping.second.begin(); 981 982 // Whatever GVN is found, we mark it as used. 983 UsedGVNs.insert(ResultGVN); 984 985 unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN); 986 CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN)); 987 NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum)); 988 } 989 } 990 991 void IRSimilarityCandidate::createCanonicalMappingFor( 992 IRSimilarityCandidate &CurrCand) { 993 assert(CurrCand.CanonNumToNumber.size() == 0 && 994 "Canonical Relationship is non-empty"); 995 assert(CurrCand.NumberToCanonNum.size() == 0 && 996 "Canonical Relationship is non-empty"); 997 998 unsigned CanonNum = 0; 999 // Iterate over the value numbers found, the order does not matter in this 1000 // case. 1001 for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) { 1002 CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum)); 1003 CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first)); 1004 CanonNum++; 1005 } 1006 } 1007 1008 /// From the list of IRSimilarityCandidates, perform a comparison between each 1009 /// IRSimilarityCandidate to determine if there are overlapping 1010 /// IRInstructionData, or if they do not have the same structure. 1011 /// 1012 /// \param [in] CandsForRepSubstring - The vector containing the 1013 /// IRSimilarityCandidates. 1014 /// \param [out] StructuralGroups - the mapping of unsigned integers to vector 1015 /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the 1016 /// vector are structurally similar to one another. 1017 static void findCandidateStructures( 1018 std::vector<IRSimilarityCandidate> &CandsForRepSubstring, 1019 DenseMap<unsigned, SimilarityGroup> &StructuralGroups) { 1020 std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt, 1021 InnerCandEndIt; 1022 1023 // IRSimilarityCandidates each have a structure for operand use. It is 1024 // possible that two instances of the same subsequences have different 1025 // structure. Each type of structure found is assigned a number. This 1026 // DenseMap maps an IRSimilarityCandidate to which type of similarity 1027 // discovered it fits within. 1028 DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; 1029 1030 // Find the compatibility from each candidate to the others to determine 1031 // which candidates overlap and which have the same structure by mapping 1032 // each structure to a different group. 1033 bool SameStructure; 1034 bool Inserted; 1035 unsigned CurrentGroupNum = 0; 1036 unsigned OuterGroupNum; 1037 DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt; 1038 DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner; 1039 DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair; 1040 1041 // Iterate over the candidates to determine its structural and overlapping 1042 // compatibility with other instructions 1043 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA; 1044 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB; 1045 for (CandIt = CandsForRepSubstring.begin(), 1046 CandEndIt = CandsForRepSubstring.end(); 1047 CandIt != CandEndIt; CandIt++) { 1048 1049 // Determine if it has an assigned structural group already. 1050 CandToGroupIt = CandToGroup.find(&*CandIt); 1051 if (CandToGroupIt == CandToGroup.end()) { 1052 // If not, we assign it one, and add it to our mapping. 1053 std::tie(CandToGroupIt, Inserted) = 1054 CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++)); 1055 } 1056 1057 // Get the structural group number from the iterator. 1058 OuterGroupNum = CandToGroupIt->second; 1059 1060 // Check if we already have a list of IRSimilarityCandidates for the current 1061 // structural group. Create one if one does not exist. 1062 CurrentGroupPair = StructuralGroups.find(OuterGroupNum); 1063 if (CurrentGroupPair == StructuralGroups.end()) { 1064 IRSimilarityCandidate::createCanonicalMappingFor(*CandIt); 1065 std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert( 1066 std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt}))); 1067 } 1068 1069 // Iterate over the IRSimilarityCandidates following the current 1070 // IRSimilarityCandidate in the list to determine whether the two 1071 // IRSimilarityCandidates are compatible. This is so we do not repeat pairs 1072 // of IRSimilarityCandidates. 1073 for (InnerCandIt = std::next(CandIt), 1074 InnerCandEndIt = CandsForRepSubstring.end(); 1075 InnerCandIt != InnerCandEndIt; InnerCandIt++) { 1076 1077 // We check if the inner item has a group already, if it does, we skip it. 1078 CandToGroupItInner = CandToGroup.find(&*InnerCandIt); 1079 if (CandToGroupItInner != CandToGroup.end()) 1080 continue; 1081 1082 // Otherwise we determine if they have the same structure and add it to 1083 // vector if they match. 1084 ValueNumberMappingA.clear(); 1085 ValueNumberMappingB.clear(); 1086 SameStructure = IRSimilarityCandidate::compareStructure( 1087 *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB); 1088 if (!SameStructure) 1089 continue; 1090 1091 InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA, 1092 ValueNumberMappingB); 1093 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum)); 1094 CurrentGroupPair->second.push_back(*InnerCandIt); 1095 } 1096 } 1097 } 1098 1099 void IRSimilarityIdentifier::findCandidates( 1100 std::vector<IRInstructionData *> &InstrList, 1101 std::vector<unsigned> &IntegerMapping) { 1102 SuffixTree ST(IntegerMapping); 1103 1104 std::vector<IRSimilarityCandidate> CandsForRepSubstring; 1105 std::vector<SimilarityGroup> NewCandidateGroups; 1106 1107 DenseMap<unsigned, SimilarityGroup> StructuralGroups; 1108 1109 // Iterate over the subsequences found by the Suffix Tree to create 1110 // IRSimilarityCandidates for each repeated subsequence and determine which 1111 // instances are structurally similar to one another. 1112 for (SuffixTree::RepeatedSubstring &RS : ST) { 1113 createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS, 1114 CandsForRepSubstring); 1115 1116 if (CandsForRepSubstring.size() < 2) 1117 continue; 1118 1119 findCandidateStructures(CandsForRepSubstring, StructuralGroups); 1120 for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) 1121 // We only add the group if it contains more than one 1122 // IRSimilarityCandidate. If there is only one, that means there is no 1123 // other repeated subsequence with the same structure. 1124 if (Group.second.size() > 1) 1125 SimilarityCandidates->push_back(Group.second); 1126 1127 CandsForRepSubstring.clear(); 1128 StructuralGroups.clear(); 1129 NewCandidateGroups.clear(); 1130 } 1131 } 1132 1133 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( 1134 ArrayRef<std::unique_ptr<Module>> Modules) { 1135 resetSimilarityCandidates(); 1136 1137 std::vector<IRInstructionData *> InstrList; 1138 std::vector<unsigned> IntegerMapping; 1139 Mapper.InstClassifier.EnableBranches = this->EnableBranches; 1140 Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; 1141 Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; 1142 1143 populateMapper(Modules, InstrList, IntegerMapping); 1144 findCandidates(InstrList, IntegerMapping); 1145 1146 return SimilarityCandidates.getValue(); 1147 } 1148 1149 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { 1150 resetSimilarityCandidates(); 1151 Mapper.InstClassifier.EnableBranches = this->EnableBranches; 1152 Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; 1153 Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; 1154 1155 std::vector<IRInstructionData *> InstrList; 1156 std::vector<unsigned> IntegerMapping; 1157 1158 populateMapper(M, InstrList, IntegerMapping); 1159 findCandidates(InstrList, IntegerMapping); 1160 1161 return SimilarityCandidates.getValue(); 1162 } 1163 1164 INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier", 1165 "ir-similarity-identifier", false, true) 1166 1167 IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() 1168 : ModulePass(ID) { 1169 initializeIRSimilarityIdentifierWrapperPassPass( 1170 *PassRegistry::getPassRegistry()); 1171 } 1172 1173 bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { 1174 IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, 1175 MatchCallsByName)); 1176 return false; 1177 } 1178 1179 bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) { 1180 IRSI.reset(); 1181 return false; 1182 } 1183 1184 bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) { 1185 IRSI->findSimilarity(M); 1186 return false; 1187 } 1188 1189 AnalysisKey IRSimilarityAnalysis::Key; 1190 IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, 1191 ModuleAnalysisManager &) { 1192 1193 auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, 1194 MatchCallsByName); 1195 IRSI.findSimilarity(M); 1196 return IRSI; 1197 } 1198 1199 PreservedAnalyses 1200 IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { 1201 IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M); 1202 Optional<SimilarityGroupList> &SimilarityCandidatesOpt = IRSI.getSimilarity(); 1203 1204 for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) { 1205 OS << CandVec.size() << " candidates of length " 1206 << CandVec.begin()->getLength() << ". Found in: \n"; 1207 for (IRSimilarityCandidate &Cand : CandVec) { 1208 OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str() 1209 << ", Basic Block: "; 1210 if (Cand.front()->Inst->getParent()->getName().str() == "") 1211 OS << "(unnamed)"; 1212 else 1213 OS << Cand.front()->Inst->getParent()->getName().str(); 1214 OS << "\n Start Instruction: "; 1215 Cand.frontInstruction()->print(OS); 1216 OS << "\n End Instruction: "; 1217 Cand.backInstruction()->print(OS); 1218 OS << "\n"; 1219 } 1220 } 1221 1222 return PreservedAnalyses::all(); 1223 } 1224 1225 char IRSimilarityIdentifierWrapperPass::ID = 0; 1226