1 //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // \file 10 // Implementation file for the IRSimilarityIdentifier for identifying 11 // similarities in IR including the IRInstructionMapper. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Analysis/IRSimilarityIdentifier.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/IR/Intrinsics.h" 18 #include "llvm/IR/User.h" 19 #include "llvm/InitializePasses.h" 20 #include "llvm/Support/SuffixTree.h" 21 22 using namespace llvm; 23 using namespace IRSimilarity; 24 25 IRInstructionData::IRInstructionData(Instruction &I, bool Legality, 26 IRInstructionDataList &IDList) 27 : Inst(&I), Legal(Legality), IDL(&IDList) { 28 // Here we collect the operands to be used to determine whether two 29 // instructions are similar to one another. 30 for (Use &OI : I.operands()) 31 OperVals.push_back(OI.get()); 32 } 33 34 bool IRSimilarity::isClose(const IRInstructionData &A, 35 const IRInstructionData &B) { 36 return A.Legal && A.Inst->isSameOperationAs(B.Inst); 37 } 38 39 // TODO: This is the same as the MachineOutliner, and should be consolidated 40 // into the same interface. 41 void IRInstructionMapper::convertToUnsignedVec( 42 BasicBlock &BB, std::vector<IRInstructionData *> &InstrList, 43 std::vector<unsigned> &IntegerMapping) { 44 BasicBlock::iterator It = BB.begin(); 45 46 std::vector<unsigned> IntegerMappingForBB; 47 std::vector<IRInstructionData *> InstrListForBB; 48 49 HaveLegalRange = false; 50 CanCombineWithPrevInstr = false; 51 AddedIllegalLastTime = true; 52 53 for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { 54 switch (InstClassifier.visit(*It)) { 55 case InstrType::Legal: 56 mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 57 break; 58 case InstrType::Illegal: 59 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 60 break; 61 case InstrType::Invisible: 62 AddedIllegalLastTime = false; 63 break; 64 } 65 } 66 67 if (HaveLegalRange) { 68 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); 69 for_each(InstrListForBB, 70 [this](IRInstructionData *ID) { this->IDL->push_back(*ID); }); 71 InstrList.insert(InstrList.end(), InstrListForBB.begin(), 72 InstrListForBB.end()); 73 IntegerMapping.insert(IntegerMapping.end(), IntegerMappingForBB.begin(), 74 IntegerMappingForBB.end()); 75 } 76 } 77 78 // TODO: This is the same as the MachineOutliner, and should be consolidated 79 // into the same interface. 80 unsigned IRInstructionMapper::mapToLegalUnsigned( 81 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 82 std::vector<IRInstructionData *> &InstrListForBB) { 83 // We added something legal, so we should unset the AddedLegalLastTime 84 // flag. 85 AddedIllegalLastTime = false; 86 87 // If we have at least two adjacent legal instructions (which may have 88 // invisible instructions in between), remember that. 89 if (CanCombineWithPrevInstr) 90 HaveLegalRange = true; 91 CanCombineWithPrevInstr = true; 92 93 // Get the integer for this instruction or give it the current 94 // LegalInstrNumber. 95 IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL); 96 InstrListForBB.push_back(ID); 97 98 // Add to the instruction list 99 bool WasInserted; 100 DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator 101 ResultIt; 102 std::tie(ResultIt, WasInserted) = 103 InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber)); 104 unsigned INumber = ResultIt->second; 105 106 // There was an insertion. 107 if (WasInserted) 108 LegalInstrNumber++; 109 110 IntegerMappingForBB.push_back(INumber); 111 112 // Make sure we don't overflow or use any integers reserved by the DenseMap. 113 assert(LegalInstrNumber < IllegalInstrNumber && 114 "Instruction mapping overflow!"); 115 116 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 117 "Tried to assign DenseMap tombstone or empty key to instruction."); 118 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 119 "Tried to assign DenseMap tombstone or empty key to instruction."); 120 121 return INumber; 122 } 123 124 IRInstructionData * 125 IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality, 126 IRInstructionDataList &IDL) { 127 return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL); 128 } 129 130 IRInstructionDataList * 131 IRInstructionMapper::allocateIRInstructionDataList() { 132 return new (IDLAllocator->Allocate()) IRInstructionDataList(); 133 } 134 135 // TODO: This is the same as the MachineOutliner, and should be consolidated 136 // into the same interface. 137 unsigned IRInstructionMapper::mapToIllegalUnsigned( 138 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 139 std::vector<IRInstructionData *> &InstrListForBB, bool End) { 140 // Can't combine an illegal instruction. Set the flag. 141 CanCombineWithPrevInstr = false; 142 143 // Only add one illegal number per range of legal numbers. 144 if (AddedIllegalLastTime) 145 return IllegalInstrNumber; 146 147 IRInstructionData *ID = nullptr; 148 if (!End) 149 ID = allocateIRInstructionData(*It, false, *IDL); 150 InstrListForBB.push_back(ID); 151 152 // Remember that we added an illegal number last time. 153 AddedIllegalLastTime = true; 154 unsigned INumber = IllegalInstrNumber; 155 IntegerMappingForBB.push_back(IllegalInstrNumber--); 156 157 assert(LegalInstrNumber < IllegalInstrNumber && 158 "Instruction mapping overflow!"); 159 160 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 161 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 162 163 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 164 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 165 166 return INumber; 167 } 168 169 IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len, 170 IRInstructionData *FirstInstIt, 171 IRInstructionData *LastInstIt) 172 : StartIdx(StartIdx), Len(Len) { 173 174 assert(FirstInstIt != nullptr && "Instruction is nullptr!"); 175 assert(LastInstIt != nullptr && "Instruction is nullptr!"); 176 assert(StartIdx + Len > StartIdx && 177 "Overflow for IRSimilarityCandidate range?"); 178 assert(Len - 1 == static_cast<unsigned>(std::distance( 179 iterator(FirstInstIt), iterator(LastInstIt))) && 180 "Length of the first and last IRInstructionData do not match the " 181 "given length"); 182 183 // We iterate over the given instructions, and map each unique value 184 // to a unique number in the IRSimilarityCandidate ValueToNumber and 185 // NumberToValue maps. A constant get its own value globally, the individual 186 // uses of the constants are not considered to be unique. 187 // 188 // IR: Mapping Added: 189 // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 190 // %add2 = add i32 %a, %1 %add2 -> 4 191 // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 192 // 193 // when replace with global values, starting from 1, would be 194 // 195 // 3 = add i32 1, 2 196 // 4 = add i32 1, 3 197 // 6 = add i32 5, 2 198 unsigned LocalValNumber = 1; 199 IRInstructionDataList::iterator ID = iterator(*FirstInstIt); 200 for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) { 201 // Map the operand values to an unsigned integer if it does not already 202 // have an unsigned integer assigned to it. 203 for (Value *Arg : ID->OperVals) 204 if (ValueToNumber.find(Arg) == ValueToNumber.end()) { 205 ValueToNumber.try_emplace(Arg, LocalValNumber); 206 NumberToValue.try_emplace(LocalValNumber, Arg); 207 LocalValNumber++; 208 } 209 210 // Mapping the instructions to an unsigned integer if it is not already 211 // exist in the mapping. 212 if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) { 213 ValueToNumber.try_emplace(ID->Inst, LocalValNumber); 214 NumberToValue.try_emplace(LocalValNumber, ID->Inst); 215 LocalValNumber++; 216 } 217 } 218 219 // Setting the first and last instruction data pointers for the candidate. If 220 // we got through the entire for loop without hitting an assert, we know 221 // that both of these instructions are not nullptrs. 222 FirstInst = FirstInstIt; 223 LastInst = LastInstIt; 224 } 225 226 bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, 227 const IRSimilarityCandidate &B) { 228 if (A.getLength() != B.getLength()) 229 return false; 230 231 auto InstrDataForBoth = 232 zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end())); 233 234 return all_of(InstrDataForBoth, 235 [](std::tuple<IRInstructionData &, IRInstructionData &> R) { 236 IRInstructionData &A = std::get<0>(R); 237 IRInstructionData &B = std::get<1>(R); 238 if (!A.Legal || !B.Legal) 239 return false; 240 return isClose(A, B); 241 }); 242 } 243 244 /// Determine if operand number \p TargetArgVal is in the current mapping set 245 /// for operand number \p SourceArgVal. 246 /// 247 /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global 248 /// value numbers from source IRSimilarityCandidate to target 249 /// IRSimilarityCandidate. 250 /// \param [in] SourceArgVal The global value number for an operand in the 251 /// in the original candidate. 252 /// \param [in] TargetArgVal The global value number for the corresponding 253 /// operand in the other candidate. 254 /// \returns True if there exists a mapping and false if not. 255 bool checkNumberingAndReplace( 256 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 257 unsigned SourceArgVal, unsigned TargetArgVal) { 258 // We are given two unsigned integers representing the global values of 259 // the operands in different IRSimilarityCandidates and a current mapping 260 // between the two. 261 // 262 // Source Operand GVN: 1 263 // Target Operand GVN: 2 264 // CurrentMapping: {1: {1, 2}} 265 // 266 // Since we have mapping, and the target operand is contained in the set, we 267 // update it to: 268 // CurrentMapping: {1: {2}} 269 // and can return true. But, if the mapping was 270 // CurrentMapping: {1: {3}} 271 // we would return false. 272 273 bool WasInserted; 274 DenseMap<unsigned, DenseSet<unsigned>>::iterator Val; 275 276 std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert( 277 std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal}))); 278 279 // If we created a new mapping, then we are done. 280 if (WasInserted) 281 return true; 282 283 // If there is more than one option in the mapping set, and the target value 284 // is included in the mapping set replace that set with one that only includes 285 // the target value, as it is the only valid mapping via the non commutative 286 // instruction. 287 288 DenseSet<unsigned> &TargetSet = Val->second; 289 if (TargetSet.size() > 1 && TargetSet.find(TargetArgVal) != TargetSet.end()) { 290 TargetSet.clear(); 291 TargetSet.insert(TargetArgVal); 292 return true; 293 } 294 295 // Return true if we can find the value in the set. 296 return TargetSet.find(TargetArgVal) != TargetSet.end(); 297 } 298 299 bool IRSimilarityCandidate::compareOperandMapping(OperandMapping A, 300 OperandMapping B) { 301 // Iterators to keep track of where we are in the operands for each 302 // Instruction. 303 ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 304 ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 305 unsigned OperandLength = A.OperVals.size(); 306 307 // For each operand, get the value numbering and ensure it is consistent. 308 for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) { 309 unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second; 310 unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second; 311 312 // Attempt to add a set with only the target value. If there is no mapping 313 // we can create it here. 314 // 315 // For an instruction like a subtraction: 316 // IRSimilarityCandidateA: IRSimilarityCandidateB: 317 // %resultA = sub %a, %b %resultB = sub %d, %e 318 // 319 // We map %a -> %d and %b -> %e. 320 // 321 // And check to see whether their mapping is consistent in 322 // checkNumberingAndReplace. 323 324 if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB)) 325 return false; 326 327 if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA)) 328 return false; 329 } 330 return true; 331 } 332 333 bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A, 334 const IRSimilarityCandidate &B) { 335 if (A.getLength() != B.getLength()) 336 return false; 337 338 if (A.ValueToNumber.size() != B.ValueToNumber.size()) 339 return false; 340 341 iterator ItA = A.begin(); 342 iterator ItB = B.begin(); 343 344 // These sets create a create a mapping between the values in one candidate 345 // to values in the other candidate. If we create a set with one element, 346 // and that same element maps to the original element in the candidate 347 // we have a good mapping. 348 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA; 349 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB; 350 DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 351 352 bool WasInserted; 353 354 // Iterate over the instructions contained in each candidate 355 unsigned SectionLength = A.getStartIdx() + A.getLength(); 356 for (unsigned Loc = A.getStartIdx(); Loc < SectionLength; 357 ItA++, ItB++, Loc++) { 358 // Make sure the instructions are similar to one another. 359 if (!isClose(*ItA, *ItB)) 360 return false; 361 362 Instruction *IA = ItA->Inst; 363 Instruction *IB = ItB->Inst; 364 365 if (!ItA->Legal || !ItB->Legal) 366 return false; 367 368 // Get the operand sets for the instructions. 369 ArrayRef<Value *> OperValsA = ItA->OperVals; 370 ArrayRef<Value *> OperValsB = ItB->OperVals; 371 372 unsigned InstValA = A.ValueToNumber.find(IA)->second; 373 unsigned InstValB = B.ValueToNumber.find(IB)->second; 374 375 // Ensure that the mappings for the instructions exists. 376 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert( 377 std::make_pair(InstValA, DenseSet<unsigned>({InstValB}))); 378 if (!WasInserted && ValueMappingIt->second.find(InstValB) == 379 ValueMappingIt->second.end()) 380 return false; 381 382 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert( 383 std::make_pair(InstValB, DenseSet<unsigned>({InstValA}))); 384 if (!WasInserted && ValueMappingIt->second.find(InstValA) == 385 ValueMappingIt->second.end()) 386 return false; 387 388 // TODO: Handle commutative instructions by mapping one operand to many 389 // operands instead only mapping a single operand to a single operand. 390 if (!compareOperandMapping({A, OperValsA, ValueNumberMappingA}, 391 {B, OperValsB, ValueNumberMappingB})) 392 return false; 393 } 394 return true; 395 } 396 397 bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A, 398 const IRSimilarityCandidate &B) { 399 auto DoesOverlap = [](const IRSimilarityCandidate &X, 400 const IRSimilarityCandidate &Y) { 401 // Check: 402 // XXXXXX X starts before Y ends 403 // YYYYYYY Y starts after X starts 404 return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx; 405 }; 406 407 return DoesOverlap(A, B) || DoesOverlap(B, A); 408 } 409 410 void IRSimilarityIdentifier::populateMapper( 411 Module &M, std::vector<IRInstructionData *> &InstrList, 412 std::vector<unsigned> &IntegerMapping) { 413 414 std::vector<IRInstructionData *> InstrListForModule; 415 std::vector<unsigned> IntegerMappingForModule; 416 // Iterate over the functions in the module to map each Instruction in each 417 // BasicBlock to an unsigned integer. 418 for (Function &F : M) { 419 420 if (F.empty()) 421 continue; 422 423 for (BasicBlock &BB : F) { 424 425 if (BB.sizeWithoutDebug() < 2) 426 continue; 427 428 // BB has potential to have similarity since it has a size greater than 2 429 // and can therefore match other regions greater than 2. Map it to a list 430 // of unsigned integers. 431 Mapper.convertToUnsignedVec(BB, InstrListForModule, 432 IntegerMappingForModule); 433 } 434 } 435 436 // Insert the InstrListForModule at the end of the overall InstrList so that 437 // we can have a long InstrList for the entire set of Modules being analyzed. 438 InstrList.insert(InstrList.end(), InstrListForModule.begin(), 439 InstrListForModule.end()); 440 // Do the same as above, but for IntegerMapping. 441 IntegerMapping.insert(IntegerMapping.end(), IntegerMappingForModule.begin(), 442 IntegerMappingForModule.end()); 443 } 444 445 void IRSimilarityIdentifier::populateMapper( 446 ArrayRef<std::unique_ptr<Module>> &Modules, 447 std::vector<IRInstructionData *> &InstrList, 448 std::vector<unsigned> &IntegerMapping) { 449 450 // Iterate over, and map the instructions in each module. 451 for (const std::unique_ptr<Module> &M : Modules) 452 populateMapper(*M, InstrList, IntegerMapping); 453 } 454 455 /// From a repeated subsequence, find all the different instances of the 456 /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from 457 /// the IRInstructionData in subsequence. 458 /// 459 /// \param [in] Mapper - The instruction mapper for sanity checks. 460 /// \param [in] InstrList - The vector that holds the instruction data. 461 /// \param [in] IntegerMapping - The vector that holds the mapped integers. 462 /// \param [out] CandsForRepSubstring - The vector to store the generated 463 /// IRSimilarityCandidates. 464 static void createCandidatesFromSuffixTree( 465 IRInstructionMapper Mapper, std::vector<IRInstructionData *> &InstrList, 466 std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS, 467 std::vector<IRSimilarityCandidate> &CandsForRepSubstring) { 468 469 unsigned StringLen = RS.Length; 470 471 // Create an IRSimilarityCandidate for instance of this subsequence \p RS. 472 for (const unsigned &StartIdx : RS.StartIndices) { 473 unsigned EndIdx = StartIdx + StringLen - 1; 474 475 // Check that this subsequence does not contain an illegal instruction. 476 bool ContainsIllegal = false; 477 for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) { 478 unsigned Key = IntegerMapping[CurrIdx]; 479 if (Key > Mapper.IllegalInstrNumber) { 480 ContainsIllegal = true; 481 break; 482 } 483 } 484 485 // If we have an illegal instruction, we should not create an 486 // IRSimilarityCandidate for this region. 487 if (ContainsIllegal) 488 continue; 489 490 // We are getting iterators to the instructions in this region of code 491 // by advancing the start and end indices from the start of the 492 // InstrList. 493 std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin(); 494 std::advance(StartIt, StartIdx); 495 std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin(); 496 std::advance(EndIt, EndIdx); 497 498 CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt); 499 } 500 } 501 502 /// From the list of IRSimilarityCandidates, perform a comparison between each 503 /// IRSimilarityCandidate to determine if there are overlapping 504 /// IRInstructionData, or if they do not have the same structure. 505 /// 506 /// \param [in] CandsForRepSubstring - The vector containing the 507 /// IRSimilarityCandidates. 508 /// \param [out] StructuralGroups - the mapping of unsigned integers to vector 509 /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the 510 /// vector are structurally similar to one another. 511 static void findCandidateStructures( 512 std::vector<IRSimilarityCandidate> &CandsForRepSubstring, 513 DenseMap<unsigned, SimilarityGroup> &StructuralGroups) { 514 std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt, 515 InnerCandEndIt; 516 517 // IRSimilarityCandidates each have a structure for operand use. It is 518 // possible that two instances of the same subsequences have different 519 // structure. Each type of structure found is assigned a number. This 520 // DenseMap maps an IRSimilarityCandidate to which type of similarity 521 // discovered it fits within. 522 DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; 523 524 // Find the compatibility from each candidate to the others to determine 525 // which candidates overlap and which have the same structure by mapping 526 // each structure to a different group. 527 bool SameStructure; 528 bool Inserted; 529 unsigned CurrentGroupNum = 0; 530 unsigned OuterGroupNum; 531 DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt; 532 DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner; 533 DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair; 534 535 // Iterate over the candidates to determine its structural and overlapping 536 // compatibility with other instructions 537 for (CandIt = CandsForRepSubstring.begin(), 538 CandEndIt = CandsForRepSubstring.end(); 539 CandIt != CandEndIt; CandIt++) { 540 541 // Determine if it has an assigned structural group already. 542 CandToGroupIt = CandToGroup.find(&*CandIt); 543 if (CandToGroupIt == CandToGroup.end()) { 544 // If not, we assign it one, and add it to our mapping. 545 std::tie(CandToGroupIt, Inserted) = 546 CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++)); 547 } 548 549 // Get the structural group number from the iterator. 550 OuterGroupNum = CandToGroupIt->second; 551 552 // Check if we already have a list of IRSimilarityCandidates for the current 553 // structural group. Create one if one does not exist. 554 CurrentGroupPair = StructuralGroups.find(OuterGroupNum); 555 if (CurrentGroupPair == StructuralGroups.end()) 556 std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert( 557 std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt}))); 558 559 // Iterate over the IRSimilarityCandidates following the current 560 // IRSimilarityCandidate in the list to determine whether the two 561 // IRSimilarityCandidates are compatible. This is so we do not repeat pairs 562 // of IRSimilarityCandidates. 563 for (InnerCandIt = std::next(CandIt), 564 InnerCandEndIt = CandsForRepSubstring.end(); 565 InnerCandIt != InnerCandEndIt; InnerCandIt++) { 566 567 // We check if the inner item has a group already, if it does, we skip it. 568 CandToGroupItInner = CandToGroup.find(&*InnerCandIt); 569 if (CandToGroupItInner != CandToGroup.end()) 570 continue; 571 572 // Otherwise we determine if they have the same structure and add it to 573 // vector if they match. 574 SameStructure = 575 IRSimilarityCandidate::compareStructure(*CandIt, *InnerCandIt); 576 if (!SameStructure) 577 continue; 578 579 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum)); 580 CurrentGroupPair->second.push_back(*InnerCandIt); 581 } 582 } 583 } 584 585 void IRSimilarityIdentifier::findCandidates( 586 std::vector<IRInstructionData *> &InstrList, 587 std::vector<unsigned> &IntegerMapping) { 588 SuffixTree ST(IntegerMapping); 589 590 std::vector<IRSimilarityCandidate> CandsForRepSubstring; 591 std::vector<SimilarityGroup> NewCandidateGroups; 592 593 DenseMap<unsigned, SimilarityGroup> StructuralGroups; 594 595 // Iterate over the subsequences found by the Suffix Tree to create 596 // IRSimilarityCandidates for each repeated subsequence and determine which 597 // instances are structurally similar to one another. 598 for (auto It = ST.begin(), Et = ST.end(); It != Et; ++It) { 599 createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, *It, 600 CandsForRepSubstring); 601 602 if (CandsForRepSubstring.size() < 2) 603 continue; 604 605 findCandidateStructures(CandsForRepSubstring, StructuralGroups); 606 for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) 607 // We only add the group if it contains more than one 608 // IRSimilarityCandidate. If there is only one, that means there is no 609 // other repeated subsequence with the same structure. 610 if (Group.second.size() > 1) 611 SimilarityCandidates->push_back(Group.second); 612 613 CandsForRepSubstring.clear(); 614 StructuralGroups.clear(); 615 NewCandidateGroups.clear(); 616 } 617 } 618 619 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( 620 ArrayRef<std::unique_ptr<Module>> Modules) { 621 resetSimilarityCandidates(); 622 623 std::vector<IRInstructionData *> InstrList; 624 std::vector<unsigned> IntegerMapping; 625 626 populateMapper(Modules, InstrList, IntegerMapping); 627 findCandidates(InstrList, IntegerMapping); 628 629 return SimilarityCandidates.getValue(); 630 } 631 632 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { 633 resetSimilarityCandidates(); 634 635 std::vector<IRInstructionData *> InstrList; 636 std::vector<unsigned> IntegerMapping; 637 638 populateMapper(M, InstrList, IntegerMapping); 639 findCandidates(InstrList, IntegerMapping); 640 641 return SimilarityCandidates.getValue(); 642 } 643 644 INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier", 645 "ir-similarity-identifier", false, true) 646 647 IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() 648 : ModulePass(ID) { 649 initializeIRSimilarityIdentifierWrapperPassPass( 650 *PassRegistry::getPassRegistry()); 651 } 652 653 bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { 654 IRSI.reset(new IRSimilarityIdentifier(M)); 655 return false; 656 } 657 658 bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) { 659 IRSI.reset(); 660 return false; 661 } 662 663 bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) { 664 // All the real work is done in the constructor for the pass. 665 IRSI.reset(new IRSimilarityIdentifier(M)); 666 return false; 667 } 668 669 AnalysisKey IRSimilarityAnalysis::Key; 670 IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, 671 ModuleAnalysisManager &) { 672 673 return IRSimilarityIdentifier(M); 674 } 675 676 PreservedAnalyses 677 IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { 678 IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M); 679 Optional<SimilarityGroupList> &SimilarityCandidatesOpt = IRSI.getSimilarity(); 680 681 for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) { 682 OS << CandVec.size() << " candidates of length " 683 << CandVec.begin()->getLength() << ". Found in: \n"; 684 for (IRSimilarityCandidate &Cand : CandVec) { 685 OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str() 686 << ", Basic Block: "; 687 if (Cand.front()->Inst->getParent()->getName().str() == "") 688 OS << "(unnamed)\n"; 689 else 690 OS << Cand.front()->Inst->getParent()->getName().str() << "\n"; 691 } 692 } 693 694 return PreservedAnalyses::all(); 695 } 696 697 char IRSimilarityIdentifierWrapperPass::ID = 0; 698