1 //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // \file 10 // Implementation file for the IRSimilarityIdentifier for identifying 11 // similarities in IR including the IRInstructionMapper. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Analysis/IRSimilarityIdentifier.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/IR/Intrinsics.h" 18 #include "llvm/IR/Operator.h" 19 #include "llvm/IR/User.h" 20 #include "llvm/InitializePasses.h" 21 #include "llvm/Support/SuffixTree.h" 22 23 using namespace llvm; 24 using namespace IRSimilarity; 25 26 namespace llvm { 27 cl::opt<bool> 28 DisableBranches("no-ir-sim-branch-matching", cl::init(false), 29 cl::ReallyHidden, 30 cl::desc("disable similarity matching, and outlining, " 31 "across branches for debugging purposes.")); 32 33 cl::opt<bool> 34 DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false), 35 cl::ReallyHidden, 36 cl::desc("disable outlining indirect calls.")); 37 38 cl::opt<bool> 39 MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden, 40 cl::desc("only allow matching call instructions if the " 41 "name and type signature match.")); 42 43 cl::opt<bool> 44 DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden, 45 cl::desc("Don't match or outline intrinsics")); 46 } // namespace llvm 47 48 IRInstructionData::IRInstructionData(Instruction &I, bool Legality, 49 IRInstructionDataList &IDList) 50 : Inst(&I), Legal(Legality), IDL(&IDList) { 51 initializeInstruction(); 52 } 53 54 void IRInstructionData::initializeInstruction() { 55 // We check for whether we have a comparison instruction. If it is, we 56 // find the "less than" version of the predicate for consistency for 57 // comparison instructions throught the program. 58 if (CmpInst *C = dyn_cast<CmpInst>(Inst)) { 59 CmpInst::Predicate Predicate = predicateForConsistency(C); 60 if (Predicate != C->getPredicate()) 61 RevisedPredicate = Predicate; 62 } 63 64 // Here we collect the operands and their types for determining whether 65 // the structure of the operand use matches between two different candidates. 66 for (Use &OI : Inst->operands()) { 67 if (isa<CmpInst>(Inst) && RevisedPredicate.hasValue()) { 68 // If we have a CmpInst where the predicate is reversed, it means the 69 // operands must be reversed as well. 70 OperVals.insert(OperVals.begin(), OI.get()); 71 continue; 72 } 73 74 OperVals.push_back(OI.get()); 75 } 76 77 // We capture the incoming BasicBlocks as values as well as the incoming 78 // Values in order to check for structural similarity. 79 if (PHINode *PN = dyn_cast<PHINode>(Inst)) 80 for (BasicBlock *BB : PN->blocks()) 81 OperVals.push_back(BB); 82 } 83 84 IRInstructionData::IRInstructionData(IRInstructionDataList &IDList) 85 : IDL(&IDList) {} 86 87 void IRInstructionData::setBranchSuccessors( 88 DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) { 89 assert(isa<BranchInst>(Inst) && "Instruction must be branch"); 90 91 BranchInst *BI = cast<BranchInst>(Inst); 92 DenseMap<BasicBlock *, unsigned>::iterator BBNumIt; 93 94 BBNumIt = BasicBlockToInteger.find(BI->getParent()); 95 assert(BBNumIt != BasicBlockToInteger.end() && 96 "Could not find location for BasicBlock!"); 97 98 int CurrentBlockNumber = static_cast<int>(BBNumIt->second); 99 100 for (BasicBlock *Successor : BI->successors()) { 101 BBNumIt = BasicBlockToInteger.find(Successor); 102 assert(BBNumIt != BasicBlockToInteger.end() && 103 "Could not find number for BasicBlock!"); 104 int OtherBlockNumber = static_cast<int>(BBNumIt->second); 105 106 int Relative = OtherBlockNumber - CurrentBlockNumber; 107 RelativeBlockLocations.push_back(Relative); 108 } 109 } 110 111 void IRInstructionData::setCalleeName(bool MatchByName) { 112 CallInst *CI = dyn_cast<CallInst>(Inst); 113 assert(CI && "Instruction must be call"); 114 115 CalleeName = ""; 116 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { 117 // To hash intrinsics, we use the opcode, and types like the other 118 // instructions, but also, the Intrinsic ID, and the Name of the 119 // intrinsic. 120 Intrinsic::ID IntrinsicID = II->getIntrinsicID(); 121 FunctionType *FT = II->getFunctionType(); 122 // If there is an overloaded name, we have to use the complex version 123 // of getName to get the entire string. 124 if (Intrinsic::isOverloaded(IntrinsicID)) 125 CalleeName = 126 Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT); 127 // If there is not an overloaded name, we only need to use this version. 128 else 129 CalleeName = Intrinsic::getName(IntrinsicID).str(); 130 131 return; 132 } 133 134 if (!CI->isIndirectCall() && MatchByName) 135 CalleeName = CI->getCalledFunction()->getName().str(); 136 } 137 138 void IRInstructionData::setPHIPredecessors( 139 DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) { 140 assert(isa<PHINode>(Inst) && "Instruction must be phi node"); 141 142 PHINode *PN = cast<PHINode>(Inst); 143 DenseMap<BasicBlock *, unsigned>::iterator BBNumIt; 144 145 BBNumIt = BasicBlockToInteger.find(PN->getParent()); 146 assert(BBNumIt != BasicBlockToInteger.end() && 147 "Could not find location for BasicBlock!"); 148 149 int CurrentBlockNumber = static_cast<int>(BBNumIt->second); 150 151 // Convert the incoming blocks of the PHINode to an integer value, based on 152 // the relative distances between the current block and the incoming block. 153 for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) { 154 BasicBlock *Incoming = PN->getIncomingBlock(Idx); 155 BBNumIt = BasicBlockToInteger.find(Incoming); 156 assert(BBNumIt != BasicBlockToInteger.end() && 157 "Could not find number for BasicBlock!"); 158 int OtherBlockNumber = static_cast<int>(BBNumIt->second); 159 160 int Relative = OtherBlockNumber - CurrentBlockNumber; 161 RelativeBlockLocations.push_back(Relative); 162 RelativeBlockLocations.push_back(Relative); 163 } 164 } 165 166 CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) { 167 switch (CI->getPredicate()) { 168 case CmpInst::FCMP_OGT: 169 case CmpInst::FCMP_UGT: 170 case CmpInst::FCMP_OGE: 171 case CmpInst::FCMP_UGE: 172 case CmpInst::ICMP_SGT: 173 case CmpInst::ICMP_UGT: 174 case CmpInst::ICMP_SGE: 175 case CmpInst::ICMP_UGE: 176 return CI->getSwappedPredicate(); 177 default: 178 return CI->getPredicate(); 179 } 180 } 181 182 CmpInst::Predicate IRInstructionData::getPredicate() const { 183 assert(isa<CmpInst>(Inst) && 184 "Can only get a predicate from a compare instruction"); 185 186 if (RevisedPredicate.hasValue()) 187 return RevisedPredicate.getValue(); 188 189 return cast<CmpInst>(Inst)->getPredicate(); 190 } 191 192 StringRef IRInstructionData::getCalleeName() const { 193 assert(isa<CallInst>(Inst) && 194 "Can only get a name from a call instruction"); 195 196 assert(CalleeName.hasValue() && "CalleeName has not been set"); 197 198 return *CalleeName; 199 } 200 201 bool IRSimilarity::isClose(const IRInstructionData &A, 202 const IRInstructionData &B) { 203 204 if (!A.Legal || !B.Legal) 205 return false; 206 207 // Check if we are performing the same sort of operation on the same types 208 // but not on the same values. 209 if (!A.Inst->isSameOperationAs(B.Inst)) { 210 // If there is a predicate, this means that either there is a swapped 211 // predicate, or that the types are different, we want to make sure that 212 // the predicates are equivalent via swapping. 213 if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) { 214 215 if (A.getPredicate() != B.getPredicate()) 216 return false; 217 218 // If the predicates are the same via swap, make sure that the types are 219 // still the same. 220 auto ZippedTypes = zip(A.OperVals, B.OperVals); 221 222 return all_of( 223 ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) { 224 return std::get<0>(R)->getType() == std::get<1>(R)->getType(); 225 }); 226 } 227 228 return false; 229 } 230 231 // Since any GEP Instruction operands after the first operand cannot be 232 // defined by a register, we must make sure that the operands after the first 233 // are the same in the two instructions 234 if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) { 235 auto *OtherGEP = cast<GetElementPtrInst>(B.Inst); 236 237 // If the instructions do not have the same inbounds restrictions, we do 238 // not consider them the same. 239 if (GEP->isInBounds() != OtherGEP->isInBounds()) 240 return false; 241 242 auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices()); 243 244 // We increment here since we do not care about the first instruction, 245 // we only care about the following operands since they must be the 246 // exact same to be considered similar. 247 return all_of(drop_begin(ZippedOperands), 248 [](std::tuple<llvm::Use &, llvm::Use &> R) { 249 return std::get<0>(R) == std::get<1>(R); 250 }); 251 } 252 253 // If the instructions are functions calls, we make sure that the function 254 // name is the same. We already know that the types are since is 255 // isSameOperationAs is true. 256 if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) { 257 if (A.getCalleeName().str() != B.getCalleeName().str()) 258 return false; 259 } 260 261 if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) && 262 A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size()) 263 return false; 264 265 return true; 266 } 267 268 // TODO: This is the same as the MachineOutliner, and should be consolidated 269 // into the same interface. 270 void IRInstructionMapper::convertToUnsignedVec( 271 BasicBlock &BB, std::vector<IRInstructionData *> &InstrList, 272 std::vector<unsigned> &IntegerMapping) { 273 BasicBlock::iterator It = BB.begin(); 274 275 std::vector<unsigned> IntegerMappingForBB; 276 std::vector<IRInstructionData *> InstrListForBB; 277 278 for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { 279 switch (InstClassifier.visit(*It)) { 280 case InstrType::Legal: 281 mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 282 break; 283 case InstrType::Illegal: 284 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 285 break; 286 case InstrType::Invisible: 287 AddedIllegalLastTime = false; 288 break; 289 } 290 } 291 292 if (AddedIllegalLastTime) 293 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); 294 for (IRInstructionData *ID : InstrListForBB) 295 this->IDL->push_back(*ID); 296 llvm::append_range(InstrList, InstrListForBB); 297 llvm::append_range(IntegerMapping, IntegerMappingForBB); 298 } 299 300 // TODO: This is the same as the MachineOutliner, and should be consolidated 301 // into the same interface. 302 unsigned IRInstructionMapper::mapToLegalUnsigned( 303 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 304 std::vector<IRInstructionData *> &InstrListForBB) { 305 // We added something legal, so we should unset the AddedLegalLastTime 306 // flag. 307 AddedIllegalLastTime = false; 308 309 // If we have at least two adjacent legal instructions (which may have 310 // invisible instructions in between), remember that. 311 if (CanCombineWithPrevInstr) 312 HaveLegalRange = true; 313 CanCombineWithPrevInstr = true; 314 315 // Get the integer for this instruction or give it the current 316 // LegalInstrNumber. 317 IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL); 318 InstrListForBB.push_back(ID); 319 320 if (isa<BranchInst>(*It)) 321 ID->setBranchSuccessors(BasicBlockToInteger); 322 323 if (isa<CallInst>(*It)) 324 ID->setCalleeName(EnableMatchCallsByName); 325 326 if (isa<PHINode>(*It)) 327 ID->setPHIPredecessors(BasicBlockToInteger); 328 329 // Add to the instruction list 330 bool WasInserted; 331 DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator 332 ResultIt; 333 std::tie(ResultIt, WasInserted) = 334 InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber)); 335 unsigned INumber = ResultIt->second; 336 337 // There was an insertion. 338 if (WasInserted) 339 LegalInstrNumber++; 340 341 IntegerMappingForBB.push_back(INumber); 342 343 // Make sure we don't overflow or use any integers reserved by the DenseMap. 344 assert(LegalInstrNumber < IllegalInstrNumber && 345 "Instruction mapping overflow!"); 346 347 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 348 "Tried to assign DenseMap tombstone or empty key to instruction."); 349 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 350 "Tried to assign DenseMap tombstone or empty key to instruction."); 351 352 return INumber; 353 } 354 355 IRInstructionData * 356 IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality, 357 IRInstructionDataList &IDL) { 358 return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL); 359 } 360 361 IRInstructionData * 362 IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) { 363 return new (InstDataAllocator->Allocate()) IRInstructionData(IDL); 364 } 365 366 IRInstructionDataList * 367 IRInstructionMapper::allocateIRInstructionDataList() { 368 return new (IDLAllocator->Allocate()) IRInstructionDataList(); 369 } 370 371 // TODO: This is the same as the MachineOutliner, and should be consolidated 372 // into the same interface. 373 unsigned IRInstructionMapper::mapToIllegalUnsigned( 374 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 375 std::vector<IRInstructionData *> &InstrListForBB, bool End) { 376 // Can't combine an illegal instruction. Set the flag. 377 CanCombineWithPrevInstr = false; 378 379 // Only add one illegal number per range of legal numbers. 380 if (AddedIllegalLastTime) 381 return IllegalInstrNumber; 382 383 IRInstructionData *ID = nullptr; 384 if (!End) 385 ID = allocateIRInstructionData(*It, false, *IDL); 386 else 387 ID = allocateIRInstructionData(*IDL); 388 InstrListForBB.push_back(ID); 389 390 // Remember that we added an illegal number last time. 391 AddedIllegalLastTime = true; 392 unsigned INumber = IllegalInstrNumber; 393 IntegerMappingForBB.push_back(IllegalInstrNumber--); 394 395 assert(LegalInstrNumber < IllegalInstrNumber && 396 "Instruction mapping overflow!"); 397 398 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 399 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 400 401 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 402 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 403 404 return INumber; 405 } 406 407 IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len, 408 IRInstructionData *FirstInstIt, 409 IRInstructionData *LastInstIt) 410 : StartIdx(StartIdx), Len(Len) { 411 412 assert(FirstInstIt != nullptr && "Instruction is nullptr!"); 413 assert(LastInstIt != nullptr && "Instruction is nullptr!"); 414 assert(StartIdx + Len > StartIdx && 415 "Overflow for IRSimilarityCandidate range?"); 416 assert(Len - 1 == static_cast<unsigned>(std::distance( 417 iterator(FirstInstIt), iterator(LastInstIt))) && 418 "Length of the first and last IRInstructionData do not match the " 419 "given length"); 420 421 // We iterate over the given instructions, and map each unique value 422 // to a unique number in the IRSimilarityCandidate ValueToNumber and 423 // NumberToValue maps. A constant get its own value globally, the individual 424 // uses of the constants are not considered to be unique. 425 // 426 // IR: Mapping Added: 427 // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 428 // %add2 = add i32 %a, %1 %add2 -> 4 429 // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 430 // 431 // when replace with global values, starting from 1, would be 432 // 433 // 3 = add i32 1, 2 434 // 4 = add i32 1, 3 435 // 6 = add i32 5, 2 436 unsigned LocalValNumber = 1; 437 IRInstructionDataList::iterator ID = iterator(*FirstInstIt); 438 for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) { 439 // Map the operand values to an unsigned integer if it does not already 440 // have an unsigned integer assigned to it. 441 for (Value *Arg : ID->OperVals) 442 if (ValueToNumber.find(Arg) == ValueToNumber.end()) { 443 ValueToNumber.try_emplace(Arg, LocalValNumber); 444 NumberToValue.try_emplace(LocalValNumber, Arg); 445 LocalValNumber++; 446 } 447 448 // Mapping the instructions to an unsigned integer if it is not already 449 // exist in the mapping. 450 if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) { 451 ValueToNumber.try_emplace(ID->Inst, LocalValNumber); 452 NumberToValue.try_emplace(LocalValNumber, ID->Inst); 453 LocalValNumber++; 454 } 455 } 456 457 // Setting the first and last instruction data pointers for the candidate. If 458 // we got through the entire for loop without hitting an assert, we know 459 // that both of these instructions are not nullptrs. 460 FirstInst = FirstInstIt; 461 LastInst = LastInstIt; 462 463 // Add the basic blocks contained in the set into the global value numbering. 464 DenseSet<BasicBlock *> BBSet; 465 getBasicBlocks(BBSet); 466 for (BasicBlock *BB : BBSet) { 467 if (ValueToNumber.find(BB) != ValueToNumber.end()) 468 continue; 469 470 ValueToNumber.try_emplace(BB, LocalValNumber); 471 NumberToValue.try_emplace(LocalValNumber, BB); 472 LocalValNumber++; 473 } 474 } 475 476 bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, 477 const IRSimilarityCandidate &B) { 478 if (A.getLength() != B.getLength()) 479 return false; 480 481 auto InstrDataForBoth = 482 zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end())); 483 484 return all_of(InstrDataForBoth, 485 [](std::tuple<IRInstructionData &, IRInstructionData &> R) { 486 IRInstructionData &A = std::get<0>(R); 487 IRInstructionData &B = std::get<1>(R); 488 if (!A.Legal || !B.Legal) 489 return false; 490 return isClose(A, B); 491 }); 492 } 493 494 /// Determine if one or more of the assigned global value numbers for the 495 /// operands in \p TargetValueNumbers is in the current mapping set for operand 496 /// numbers in \p SourceOperands. The set of possible corresponding global 497 /// value numbers are replaced with the most recent version of compatible 498 /// values. 499 /// 500 /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global 501 /// value number for the source IRInstructionCandidate. 502 /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source 503 /// IRSimilarityCandidate global value numbers to a set of possible numbers in 504 /// the target. 505 /// \param [in] SourceOperands - The operands in the original 506 /// IRSimilarityCandidate in the current instruction. 507 /// \param [in] TargetValueNumbers - The global value numbers of the operands in 508 /// the corresponding Instruction in the other IRSimilarityCandidate. 509 /// \returns true if there exists a possible mapping between the source 510 /// Instruction operands and the target Instruction operands, and false if not. 511 static bool checkNumberingAndReplaceCommutative( 512 const DenseMap<Value *, unsigned> &SourceValueToNumberMapping, 513 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 514 ArrayRef<Value *> &SourceOperands, 515 DenseSet<unsigned> &TargetValueNumbers){ 516 517 DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 518 519 unsigned ArgVal; 520 bool WasInserted; 521 522 // Iterate over the operands in the source IRSimilarityCandidate to determine 523 // whether there exists an operand in the other IRSimilarityCandidate that 524 // creates a valid mapping of Value to Value between the 525 // IRSimilarityCaniddates. 526 for (Value *V : SourceOperands) { 527 ArgVal = SourceValueToNumberMapping.find(V)->second; 528 529 std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert( 530 std::make_pair(ArgVal, TargetValueNumbers)); 531 532 // Instead of finding a current mapping, we inserted a set. This means a 533 // mapping did not exist for the source Instruction operand, it has no 534 // current constraints we need to check. 535 if (WasInserted) 536 continue; 537 538 // If a mapping already exists for the source operand to the values in the 539 // other IRSimilarityCandidate we need to iterate over the items in other 540 // IRSimilarityCandidate's Instruction to determine whether there is a valid 541 // mapping of Value to Value. 542 DenseSet<unsigned> NewSet; 543 for (unsigned &Curr : ValueMappingIt->second) 544 // If we can find the value in the mapping, we add it to the new set. 545 if (TargetValueNumbers.contains(Curr)) 546 NewSet.insert(Curr); 547 548 // If we could not find a Value, return 0. 549 if (NewSet.empty()) 550 return false; 551 552 // Otherwise replace the old mapping with the newly constructed one. 553 if (NewSet.size() != ValueMappingIt->second.size()) 554 ValueMappingIt->second.swap(NewSet); 555 556 // We have reached no conclusions about the mapping, and cannot remove 557 // any items from the other operands, so we move to check the next operand. 558 if (ValueMappingIt->second.size() != 1) 559 continue; 560 561 562 unsigned ValToRemove = *ValueMappingIt->second.begin(); 563 // When there is only one item left in the mapping for and operand, remove 564 // the value from the other operands. If it results in there being no 565 // mapping, return false, it means the mapping is wrong 566 for (Value *InnerV : SourceOperands) { 567 if (V == InnerV) 568 continue; 569 570 unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second; 571 ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal); 572 if (ValueMappingIt == CurrentSrcTgtNumberMapping.end()) 573 continue; 574 575 ValueMappingIt->second.erase(ValToRemove); 576 if (ValueMappingIt->second.empty()) 577 return false; 578 } 579 } 580 581 return true; 582 } 583 584 /// Determine if operand number \p TargetArgVal is in the current mapping set 585 /// for operand number \p SourceArgVal. 586 /// 587 /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global 588 /// value numbers from source IRSimilarityCandidate to target 589 /// IRSimilarityCandidate. 590 /// \param [in] SourceArgVal The global value number for an operand in the 591 /// in the original candidate. 592 /// \param [in] TargetArgVal The global value number for the corresponding 593 /// operand in the other candidate. 594 /// \returns True if there exists a mapping and false if not. 595 bool checkNumberingAndReplace( 596 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 597 unsigned SourceArgVal, unsigned TargetArgVal) { 598 // We are given two unsigned integers representing the global values of 599 // the operands in different IRSimilarityCandidates and a current mapping 600 // between the two. 601 // 602 // Source Operand GVN: 1 603 // Target Operand GVN: 2 604 // CurrentMapping: {1: {1, 2}} 605 // 606 // Since we have mapping, and the target operand is contained in the set, we 607 // update it to: 608 // CurrentMapping: {1: {2}} 609 // and can return true. But, if the mapping was 610 // CurrentMapping: {1: {3}} 611 // we would return false. 612 613 bool WasInserted; 614 DenseMap<unsigned, DenseSet<unsigned>>::iterator Val; 615 616 std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert( 617 std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal}))); 618 619 // If we created a new mapping, then we are done. 620 if (WasInserted) 621 return true; 622 623 // If there is more than one option in the mapping set, and the target value 624 // is included in the mapping set replace that set with one that only includes 625 // the target value, as it is the only valid mapping via the non commutative 626 // instruction. 627 628 DenseSet<unsigned> &TargetSet = Val->second; 629 if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) { 630 TargetSet.clear(); 631 TargetSet.insert(TargetArgVal); 632 return true; 633 } 634 635 // Return true if we can find the value in the set. 636 return TargetSet.contains(TargetArgVal); 637 } 638 639 bool IRSimilarityCandidate::compareNonCommutativeOperandMapping( 640 OperandMapping A, OperandMapping B) { 641 // Iterators to keep track of where we are in the operands for each 642 // Instruction. 643 ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 644 ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 645 unsigned OperandLength = A.OperVals.size(); 646 647 // For each operand, get the value numbering and ensure it is consistent. 648 for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) { 649 unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second; 650 unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second; 651 652 // Attempt to add a set with only the target value. If there is no mapping 653 // we can create it here. 654 // 655 // For an instruction like a subtraction: 656 // IRSimilarityCandidateA: IRSimilarityCandidateB: 657 // %resultA = sub %a, %b %resultB = sub %d, %e 658 // 659 // We map %a -> %d and %b -> %e. 660 // 661 // And check to see whether their mapping is consistent in 662 // checkNumberingAndReplace. 663 664 if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB)) 665 return false; 666 667 if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA)) 668 return false; 669 } 670 return true; 671 } 672 673 bool IRSimilarityCandidate::compareCommutativeOperandMapping( 674 OperandMapping A, OperandMapping B) { 675 DenseSet<unsigned> ValueNumbersA; 676 DenseSet<unsigned> ValueNumbersB; 677 678 ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 679 ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 680 unsigned OperandLength = A.OperVals.size(); 681 682 // Find the value number sets for the operands. 683 for (unsigned Idx = 0; Idx < OperandLength; 684 Idx++, VItA++, VItB++) { 685 ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second); 686 ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second); 687 } 688 689 // Iterate over the operands in the first IRSimilarityCandidate and make sure 690 // there exists a possible mapping with the operands in the second 691 // IRSimilarityCandidate. 692 if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber, 693 A.ValueNumberMapping, A.OperVals, 694 ValueNumbersB)) 695 return false; 696 697 // Iterate over the operands in the second IRSimilarityCandidate and make sure 698 // there exists a possible mapping with the operands in the first 699 // IRSimilarityCandidate. 700 if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber, 701 B.ValueNumberMapping, B.OperVals, 702 ValueNumbersA)) 703 return false; 704 705 return true; 706 } 707 708 bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A, 709 RelativeLocMapping B) { 710 // Get the basic blocks the label refers to. 711 BasicBlock *ABB = static_cast<BasicBlock *>(A.OperVal); 712 BasicBlock *BBB = static_cast<BasicBlock *>(B.OperVal); 713 714 // Get the basic blocks contained in each region. 715 DenseSet<BasicBlock *> BasicBlockA; 716 DenseSet<BasicBlock *> BasicBlockB; 717 A.IRSC.getBasicBlocks(BasicBlockA); 718 B.IRSC.getBasicBlocks(BasicBlockB); 719 720 // Determine if the block is contained in the region. 721 bool AContained = BasicBlockA.contains(ABB); 722 bool BContained = BasicBlockB.contains(BBB); 723 724 // Both blocks need to be contained in the region, or both need to be outside 725 // the reigon. 726 if (AContained != BContained) 727 return false; 728 729 // If both are contained, then we need to make sure that the relative 730 // distance to the target blocks are the same. 731 if (AContained) 732 return A.RelativeLocation == B.RelativeLocation; 733 return true; 734 } 735 736 bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A, 737 const IRSimilarityCandidate &B) { 738 DenseMap<unsigned, DenseSet<unsigned>> MappingA; 739 DenseMap<unsigned, DenseSet<unsigned>> MappingB; 740 return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB); 741 } 742 743 typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &, 744 SmallVector<int, 4> &, ArrayRef<Value *> &, 745 ArrayRef<Value *> &> 746 ZippedRelativeLocationsT; 747 748 bool IRSimilarityCandidate::compareStructure( 749 const IRSimilarityCandidate &A, const IRSimilarityCandidate &B, 750 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA, 751 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) { 752 if (A.getLength() != B.getLength()) 753 return false; 754 755 if (A.ValueToNumber.size() != B.ValueToNumber.size()) 756 return false; 757 758 iterator ItA = A.begin(); 759 iterator ItB = B.begin(); 760 761 // These ValueNumber Mapping sets create a create a mapping between the values 762 // in one candidate to values in the other candidate. If we create a set with 763 // one element, and that same element maps to the original element in the 764 // candidate we have a good mapping. 765 DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 766 767 768 // Iterate over the instructions contained in each candidate 769 unsigned SectionLength = A.getStartIdx() + A.getLength(); 770 for (unsigned Loc = A.getStartIdx(); Loc < SectionLength; 771 ItA++, ItB++, Loc++) { 772 // Make sure the instructions are similar to one another. 773 if (!isClose(*ItA, *ItB)) 774 return false; 775 776 Instruction *IA = ItA->Inst; 777 Instruction *IB = ItB->Inst; 778 779 if (!ItA->Legal || !ItB->Legal) 780 return false; 781 782 // Get the operand sets for the instructions. 783 ArrayRef<Value *> OperValsA = ItA->OperVals; 784 ArrayRef<Value *> OperValsB = ItB->OperVals; 785 786 unsigned InstValA = A.ValueToNumber.find(IA)->second; 787 unsigned InstValB = B.ValueToNumber.find(IB)->second; 788 789 bool WasInserted; 790 // Ensure that the mappings for the instructions exists. 791 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert( 792 std::make_pair(InstValA, DenseSet<unsigned>({InstValB}))); 793 if (!WasInserted && !ValueMappingIt->second.contains(InstValB)) 794 return false; 795 796 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert( 797 std::make_pair(InstValB, DenseSet<unsigned>({InstValA}))); 798 if (!WasInserted && !ValueMappingIt->second.contains(InstValA)) 799 return false; 800 801 // We have different paths for commutative instructions and non-commutative 802 // instructions since commutative instructions could allow multiple mappings 803 // to certain values. 804 if (IA->isCommutative() && !isa<FPMathOperator>(IA) && 805 !isa<IntrinsicInst>(IA)) { 806 if (!compareCommutativeOperandMapping( 807 {A, OperValsA, ValueNumberMappingA}, 808 {B, OperValsB, ValueNumberMappingB})) 809 return false; 810 continue; 811 } 812 813 // Handle the non-commutative cases. 814 if (!compareNonCommutativeOperandMapping( 815 {A, OperValsA, ValueNumberMappingA}, 816 {B, OperValsB, ValueNumberMappingB})) 817 return false; 818 819 // Here we check that between two corresponding instructions, 820 // when referring to a basic block in the same region, the 821 // relative locations are the same. And, that the instructions refer to 822 // basic blocks outside the region in the same corresponding locations. 823 824 // We are able to make the assumption about blocks outside of the region 825 // since the target block labels are considered values and will follow the 826 // same number matching that we defined for the other instructions in the 827 // region. So, at this point, in each location we target a specific block 828 // outside the region, we are targeting a corresponding block in each 829 // analagous location in the region we are comparing to. 830 if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) && 831 !(isa<PHINode>(IA) && isa<PHINode>(IB))) 832 continue; 833 834 SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations; 835 SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations; 836 if (RelBlockLocsA.size() != RelBlockLocsB.size() && 837 OperValsA.size() != OperValsB.size()) 838 return false; 839 840 ZippedRelativeLocationsT ZippedRelativeLocations = 841 zip(RelBlockLocsA, RelBlockLocsB, OperValsA, OperValsB); 842 if (any_of(ZippedRelativeLocations, 843 [&A, &B](std::tuple<int, int, Value *, Value *> R) { 844 return !checkRelativeLocations( 845 {A, std::get<0>(R), std::get<2>(R)}, 846 {B, std::get<1>(R), std::get<3>(R)}); 847 })) 848 return false; 849 } 850 return true; 851 } 852 853 bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A, 854 const IRSimilarityCandidate &B) { 855 auto DoesOverlap = [](const IRSimilarityCandidate &X, 856 const IRSimilarityCandidate &Y) { 857 // Check: 858 // XXXXXX X starts before Y ends 859 // YYYYYYY Y starts after X starts 860 return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx; 861 }; 862 863 return DoesOverlap(A, B) || DoesOverlap(B, A); 864 } 865 866 void IRSimilarityIdentifier::populateMapper( 867 Module &M, std::vector<IRInstructionData *> &InstrList, 868 std::vector<unsigned> &IntegerMapping) { 869 870 std::vector<IRInstructionData *> InstrListForModule; 871 std::vector<unsigned> IntegerMappingForModule; 872 // Iterate over the functions in the module to map each Instruction in each 873 // BasicBlock to an unsigned integer. 874 Mapper.initializeForBBs(M); 875 876 for (Function &F : M) { 877 878 if (F.empty()) 879 continue; 880 881 for (BasicBlock &BB : F) { 882 883 // BB has potential to have similarity since it has a size greater than 2 884 // and can therefore match other regions greater than 2. Map it to a list 885 // of unsigned integers. 886 Mapper.convertToUnsignedVec(BB, InstrListForModule, 887 IntegerMappingForModule); 888 } 889 890 BasicBlock::iterator It = F.begin()->end(); 891 Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule, 892 true); 893 if (InstrListForModule.size() > 0) 894 Mapper.IDL->push_back(*InstrListForModule.back()); 895 } 896 897 // Insert the InstrListForModule at the end of the overall InstrList so that 898 // we can have a long InstrList for the entire set of Modules being analyzed. 899 llvm::append_range(InstrList, InstrListForModule); 900 // Do the same as above, but for IntegerMapping. 901 llvm::append_range(IntegerMapping, IntegerMappingForModule); 902 } 903 904 void IRSimilarityIdentifier::populateMapper( 905 ArrayRef<std::unique_ptr<Module>> &Modules, 906 std::vector<IRInstructionData *> &InstrList, 907 std::vector<unsigned> &IntegerMapping) { 908 909 // Iterate over, and map the instructions in each module. 910 for (const std::unique_ptr<Module> &M : Modules) 911 populateMapper(*M, InstrList, IntegerMapping); 912 } 913 914 /// From a repeated subsequence, find all the different instances of the 915 /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from 916 /// the IRInstructionData in subsequence. 917 /// 918 /// \param [in] Mapper - The instruction mapper for basic correctness checks. 919 /// \param [in] InstrList - The vector that holds the instruction data. 920 /// \param [in] IntegerMapping - The vector that holds the mapped integers. 921 /// \param [out] CandsForRepSubstring - The vector to store the generated 922 /// IRSimilarityCandidates. 923 static void createCandidatesFromSuffixTree( 924 const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList, 925 std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS, 926 std::vector<IRSimilarityCandidate> &CandsForRepSubstring) { 927 928 unsigned StringLen = RS.Length; 929 if (StringLen < 2) 930 return; 931 932 // Create an IRSimilarityCandidate for instance of this subsequence \p RS. 933 for (const unsigned &StartIdx : RS.StartIndices) { 934 unsigned EndIdx = StartIdx + StringLen - 1; 935 936 // Check that this subsequence does not contain an illegal instruction. 937 bool ContainsIllegal = false; 938 for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) { 939 unsigned Key = IntegerMapping[CurrIdx]; 940 if (Key > Mapper.IllegalInstrNumber) { 941 ContainsIllegal = true; 942 break; 943 } 944 } 945 946 // If we have an illegal instruction, we should not create an 947 // IRSimilarityCandidate for this region. 948 if (ContainsIllegal) 949 continue; 950 951 // We are getting iterators to the instructions in this region of code 952 // by advancing the start and end indices from the start of the 953 // InstrList. 954 std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin(); 955 std::advance(StartIt, StartIdx); 956 std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin(); 957 std::advance(EndIt, EndIdx); 958 959 CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt); 960 } 961 } 962 963 void IRSimilarityCandidate::createCanonicalRelationFrom( 964 IRSimilarityCandidate &SourceCand, 965 DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping, 966 DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) { 967 assert(SourceCand.CanonNumToNumber.size() != 0 && 968 "Base canonical relationship is empty!"); 969 assert(SourceCand.NumberToCanonNum.size() != 0 && 970 "Base canonical relationship is empty!"); 971 972 assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty"); 973 assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty"); 974 975 DenseSet<unsigned> UsedGVNs; 976 // Iterate over the mappings provided from this candidate to SourceCand. We 977 // are then able to map the GVN in this candidate to the same canonical number 978 // given to the corresponding GVN in SourceCand. 979 for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) { 980 unsigned SourceGVN = GVNMapping.first; 981 982 assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!"); 983 984 unsigned ResultGVN; 985 // We need special handling if we have more than one potential value. This 986 // means that there are at least two GVNs that could correspond to this GVN. 987 // This could lead to potential swapping later on, so we make a decision 988 // here to ensure a one-to-one mapping. 989 if (GVNMapping.second.size() > 1) { 990 bool Found = false; 991 for (unsigned Val : GVNMapping.second) { 992 // We make sure the target value number hasn't already been reserved. 993 if (UsedGVNs.contains(Val)) 994 continue; 995 996 // We make sure that the opposite mapping is still consistent. 997 DenseMap<unsigned, DenseSet<unsigned>>::iterator It = 998 FromSourceMapping.find(Val); 999 1000 if (!It->second.contains(SourceGVN)) 1001 continue; 1002 1003 // We pick the first item that satisfies these conditions. 1004 Found = true; 1005 ResultGVN = Val; 1006 break; 1007 } 1008 1009 assert(Found && "Could not find matching value for source GVN"); 1010 (void)Found; 1011 1012 } else 1013 ResultGVN = *GVNMapping.second.begin(); 1014 1015 // Whatever GVN is found, we mark it as used. 1016 UsedGVNs.insert(ResultGVN); 1017 1018 unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN); 1019 CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN)); 1020 NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum)); 1021 } 1022 1023 DenseSet<BasicBlock *> BBSet; 1024 getBasicBlocks(BBSet); 1025 // Find canonical numbers for the BasicBlocks in the current candidate. 1026 // This is done by finding the corresponding value for the first instruction 1027 // in the block in the current candidate, finding the matching value in the 1028 // source candidate. Then by finding the parent of this value, use the 1029 // canonical number of the block in the source candidate for the canonical 1030 // number in the current candidate. 1031 for (BasicBlock *BB : BBSet) { 1032 unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second; 1033 1034 // We can skip the BasicBlock if the canonical numbering has already been 1035 // found in a separate instruction. 1036 if (NumberToCanonNum.find(BBGVNForCurrCand) != NumberToCanonNum.end()) 1037 continue; 1038 1039 // If the basic block is the starting block, then the shared instruction may 1040 // not be the first instruction in the block, it will be the first 1041 // instruction in the similarity region. 1042 Value *FirstOutlineInst = BB == getStartBB() 1043 ? frontInstruction() 1044 : &*BB->instructionsWithoutDebug().begin(); 1045 1046 unsigned FirstInstGVN = *getGVN(FirstOutlineInst); 1047 unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN); 1048 unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum); 1049 Value *SourceV = *SourceCand.fromGVN(SourceGVN); 1050 BasicBlock *SourceBB = cast<Instruction>(SourceV)->getParent(); 1051 unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB); 1052 unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN); 1053 CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand)); 1054 NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN)); 1055 } 1056 } 1057 1058 void IRSimilarityCandidate::createCanonicalMappingFor( 1059 IRSimilarityCandidate &CurrCand) { 1060 assert(CurrCand.CanonNumToNumber.size() == 0 && 1061 "Canonical Relationship is non-empty"); 1062 assert(CurrCand.NumberToCanonNum.size() == 0 && 1063 "Canonical Relationship is non-empty"); 1064 1065 unsigned CanonNum = 0; 1066 // Iterate over the value numbers found, the order does not matter in this 1067 // case. 1068 for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) { 1069 CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum)); 1070 CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first)); 1071 CanonNum++; 1072 } 1073 } 1074 1075 /// From the list of IRSimilarityCandidates, perform a comparison between each 1076 /// IRSimilarityCandidate to determine if there are overlapping 1077 /// IRInstructionData, or if they do not have the same structure. 1078 /// 1079 /// \param [in] CandsForRepSubstring - The vector containing the 1080 /// IRSimilarityCandidates. 1081 /// \param [out] StructuralGroups - the mapping of unsigned integers to vector 1082 /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the 1083 /// vector are structurally similar to one another. 1084 static void findCandidateStructures( 1085 std::vector<IRSimilarityCandidate> &CandsForRepSubstring, 1086 DenseMap<unsigned, SimilarityGroup> &StructuralGroups) { 1087 std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt, 1088 InnerCandEndIt; 1089 1090 // IRSimilarityCandidates each have a structure for operand use. It is 1091 // possible that two instances of the same subsequences have different 1092 // structure. Each type of structure found is assigned a number. This 1093 // DenseMap maps an IRSimilarityCandidate to which type of similarity 1094 // discovered it fits within. 1095 DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; 1096 1097 // Find the compatibility from each candidate to the others to determine 1098 // which candidates overlap and which have the same structure by mapping 1099 // each structure to a different group. 1100 bool SameStructure; 1101 bool Inserted; 1102 unsigned CurrentGroupNum = 0; 1103 unsigned OuterGroupNum; 1104 DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt; 1105 DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner; 1106 DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair; 1107 1108 // Iterate over the candidates to determine its structural and overlapping 1109 // compatibility with other instructions 1110 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA; 1111 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB; 1112 for (CandIt = CandsForRepSubstring.begin(), 1113 CandEndIt = CandsForRepSubstring.end(); 1114 CandIt != CandEndIt; CandIt++) { 1115 1116 // Determine if it has an assigned structural group already. 1117 CandToGroupIt = CandToGroup.find(&*CandIt); 1118 if (CandToGroupIt == CandToGroup.end()) { 1119 // If not, we assign it one, and add it to our mapping. 1120 std::tie(CandToGroupIt, Inserted) = 1121 CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++)); 1122 } 1123 1124 // Get the structural group number from the iterator. 1125 OuterGroupNum = CandToGroupIt->second; 1126 1127 // Check if we already have a list of IRSimilarityCandidates for the current 1128 // structural group. Create one if one does not exist. 1129 CurrentGroupPair = StructuralGroups.find(OuterGroupNum); 1130 if (CurrentGroupPair == StructuralGroups.end()) { 1131 IRSimilarityCandidate::createCanonicalMappingFor(*CandIt); 1132 std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert( 1133 std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt}))); 1134 } 1135 1136 // Iterate over the IRSimilarityCandidates following the current 1137 // IRSimilarityCandidate in the list to determine whether the two 1138 // IRSimilarityCandidates are compatible. This is so we do not repeat pairs 1139 // of IRSimilarityCandidates. 1140 for (InnerCandIt = std::next(CandIt), 1141 InnerCandEndIt = CandsForRepSubstring.end(); 1142 InnerCandIt != InnerCandEndIt; InnerCandIt++) { 1143 1144 // We check if the inner item has a group already, if it does, we skip it. 1145 CandToGroupItInner = CandToGroup.find(&*InnerCandIt); 1146 if (CandToGroupItInner != CandToGroup.end()) 1147 continue; 1148 1149 // Otherwise we determine if they have the same structure and add it to 1150 // vector if they match. 1151 ValueNumberMappingA.clear(); 1152 ValueNumberMappingB.clear(); 1153 SameStructure = IRSimilarityCandidate::compareStructure( 1154 *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB); 1155 if (!SameStructure) 1156 continue; 1157 1158 InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA, 1159 ValueNumberMappingB); 1160 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum)); 1161 CurrentGroupPair->second.push_back(*InnerCandIt); 1162 } 1163 } 1164 } 1165 1166 void IRSimilarityIdentifier::findCandidates( 1167 std::vector<IRInstructionData *> &InstrList, 1168 std::vector<unsigned> &IntegerMapping) { 1169 SuffixTree ST(IntegerMapping); 1170 1171 std::vector<IRSimilarityCandidate> CandsForRepSubstring; 1172 std::vector<SimilarityGroup> NewCandidateGroups; 1173 1174 DenseMap<unsigned, SimilarityGroup> StructuralGroups; 1175 1176 // Iterate over the subsequences found by the Suffix Tree to create 1177 // IRSimilarityCandidates for each repeated subsequence and determine which 1178 // instances are structurally similar to one another. 1179 for (SuffixTree::RepeatedSubstring &RS : ST) { 1180 createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS, 1181 CandsForRepSubstring); 1182 1183 if (CandsForRepSubstring.size() < 2) 1184 continue; 1185 1186 findCandidateStructures(CandsForRepSubstring, StructuralGroups); 1187 for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) 1188 // We only add the group if it contains more than one 1189 // IRSimilarityCandidate. If there is only one, that means there is no 1190 // other repeated subsequence with the same structure. 1191 if (Group.second.size() > 1) 1192 SimilarityCandidates->push_back(Group.second); 1193 1194 CandsForRepSubstring.clear(); 1195 StructuralGroups.clear(); 1196 NewCandidateGroups.clear(); 1197 } 1198 } 1199 1200 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( 1201 ArrayRef<std::unique_ptr<Module>> Modules) { 1202 resetSimilarityCandidates(); 1203 1204 std::vector<IRInstructionData *> InstrList; 1205 std::vector<unsigned> IntegerMapping; 1206 Mapper.InstClassifier.EnableBranches = this->EnableBranches; 1207 Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; 1208 Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; 1209 Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; 1210 Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls; 1211 1212 populateMapper(Modules, InstrList, IntegerMapping); 1213 findCandidates(InstrList, IntegerMapping); 1214 1215 return SimilarityCandidates.getValue(); 1216 } 1217 1218 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { 1219 resetSimilarityCandidates(); 1220 Mapper.InstClassifier.EnableBranches = this->EnableBranches; 1221 Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls; 1222 Mapper.EnableMatchCallsByName = EnableMatchingCallsByName; 1223 Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics; 1224 Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls; 1225 1226 std::vector<IRInstructionData *> InstrList; 1227 std::vector<unsigned> IntegerMapping; 1228 1229 populateMapper(M, InstrList, IntegerMapping); 1230 findCandidates(InstrList, IntegerMapping); 1231 1232 return SimilarityCandidates.getValue(); 1233 } 1234 1235 INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier", 1236 "ir-similarity-identifier", false, true) 1237 1238 IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() 1239 : ModulePass(ID) { 1240 initializeIRSimilarityIdentifierWrapperPassPass( 1241 *PassRegistry::getPassRegistry()); 1242 } 1243 1244 bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { 1245 IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, 1246 MatchCallsByName, !DisableIntrinsics, 1247 false)); 1248 return false; 1249 } 1250 1251 bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) { 1252 IRSI.reset(); 1253 return false; 1254 } 1255 1256 bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) { 1257 IRSI->findSimilarity(M); 1258 return false; 1259 } 1260 1261 AnalysisKey IRSimilarityAnalysis::Key; 1262 IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, 1263 ModuleAnalysisManager &) { 1264 auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls, 1265 MatchCallsByName, !DisableIntrinsics, 1266 false); 1267 IRSI.findSimilarity(M); 1268 return IRSI; 1269 } 1270 1271 PreservedAnalyses 1272 IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { 1273 IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M); 1274 Optional<SimilarityGroupList> &SimilarityCandidatesOpt = IRSI.getSimilarity(); 1275 1276 for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) { 1277 OS << CandVec.size() << " candidates of length " 1278 << CandVec.begin()->getLength() << ". Found in: \n"; 1279 for (IRSimilarityCandidate &Cand : CandVec) { 1280 OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str() 1281 << ", Basic Block: "; 1282 if (Cand.front()->Inst->getParent()->getName().str() == "") 1283 OS << "(unnamed)"; 1284 else 1285 OS << Cand.front()->Inst->getParent()->getName().str(); 1286 OS << "\n Start Instruction: "; 1287 Cand.frontInstruction()->print(OS); 1288 OS << "\n End Instruction: "; 1289 Cand.backInstruction()->print(OS); 1290 OS << "\n"; 1291 } 1292 } 1293 1294 return PreservedAnalyses::all(); 1295 } 1296 1297 char IRSimilarityIdentifierWrapperPass::ID = 0; 1298