1 //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // \file 10 // Implementation file for the IRSimilarityIdentifier for identifying 11 // similarities in IR including the IRInstructionMapper. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Analysis/IRSimilarityIdentifier.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/IR/Intrinsics.h" 18 #include "llvm/IR/Operator.h" 19 #include "llvm/IR/User.h" 20 #include "llvm/InitializePasses.h" 21 #include "llvm/Support/SuffixTree.h" 22 23 using namespace llvm; 24 using namespace IRSimilarity; 25 26 IRInstructionData::IRInstructionData(Instruction &I, bool Legality, 27 IRInstructionDataList &IDList) 28 : Inst(&I), Legal(Legality), IDL(&IDList) { 29 // We check for whether we have a comparison instruction. If it is, we 30 // find the "less than" version of the predicate for consistency for 31 // comparison instructions throught the program. 32 if (CmpInst *C = dyn_cast<CmpInst>(&I)) { 33 CmpInst::Predicate Predicate = predicateForConsistency(C); 34 if (Predicate != C->getPredicate()) 35 RevisedPredicate = Predicate; 36 } 37 38 // Here we collect the operands and their types for determining whether 39 // the structure of the operand use matches between two different candidates. 40 for (Use &OI : I.operands()) { 41 if (isa<CmpInst>(I) && RevisedPredicate.hasValue()) { 42 // If we have a CmpInst where the predicate is reversed, it means the 43 // operands must be reversed as well. 44 OperVals.insert(OperVals.begin(), OI.get()); 45 continue; 46 } 47 48 OperVals.push_back(OI.get()); 49 } 50 } 51 52 CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) { 53 switch (CI->getPredicate()) { 54 case CmpInst::FCMP_OGT: 55 case CmpInst::FCMP_UGT: 56 case CmpInst::FCMP_OGE: 57 case CmpInst::FCMP_UGE: 58 case CmpInst::ICMP_SGT: 59 case CmpInst::ICMP_UGT: 60 case CmpInst::ICMP_SGE: 61 case CmpInst::ICMP_UGE: 62 return CI->getSwappedPredicate(); 63 default: 64 return CI->getPredicate(); 65 } 66 } 67 68 CmpInst::Predicate IRInstructionData::getPredicate() const { 69 assert(isa<CmpInst>(Inst) && 70 "Can only get a predicate from a compare instruction"); 71 72 if (RevisedPredicate.hasValue()) 73 return RevisedPredicate.getValue(); 74 75 return cast<CmpInst>(Inst)->getPredicate(); 76 } 77 78 static StringRef getCalledFunctionName(CallInst &CI) { 79 assert(CI.getCalledFunction() != nullptr && "Called Function is nullptr?"); 80 81 return CI.getCalledFunction()->getName(); 82 } 83 84 bool IRSimilarity::isClose(const IRInstructionData &A, 85 const IRInstructionData &B) { 86 87 if (!A.Legal || !B.Legal) 88 return false; 89 90 // Check if we are performing the same sort of operation on the same types 91 // but not on the same values. 92 if (!A.Inst->isSameOperationAs(B.Inst)) { 93 // If there is a predicate, this means that either there is a swapped 94 // predicate, or that the types are different, we want to make sure that 95 // the predicates are equivalent via swapping. 96 if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) { 97 98 if (A.getPredicate() != B.getPredicate()) 99 return false; 100 101 // If the predicates are the same via swap, make sure that the types are 102 // still the same. 103 auto ZippedTypes = zip(A.OperVals, B.OperVals); 104 105 return all_of( 106 ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) { 107 return std::get<0>(R)->getType() == std::get<1>(R)->getType(); 108 }); 109 } 110 111 return false; 112 } 113 114 // Since any GEP Instruction operands after the first operand cannot be 115 // defined by a register, we must make sure that the operands after the first 116 // are the same in the two instructions 117 if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) { 118 auto *OtherGEP = cast<GetElementPtrInst>(B.Inst); 119 120 // If the instructions do not have the same inbounds restrictions, we do 121 // not consider them the same. 122 if (GEP->isInBounds() != OtherGEP->isInBounds()) 123 return false; 124 125 auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices()); 126 127 auto ZIt = ZippedOperands.begin(); 128 129 // We increment here since we do not care about the first instruction, 130 // we only care about the following operands since they must be the 131 // exact same to be considered similar. 132 return std::all_of(++ZIt, ZippedOperands.end(), 133 [](std::tuple<llvm::Use &, llvm::Use &> R) { 134 return std::get<0>(R) == std::get<1>(R); 135 }); 136 } 137 138 // If the instructions are functions, we make sure that the function name is 139 // the same. We already know that the types are since is isSameOperationAs is 140 // true. 141 if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) { 142 CallInst *CIA = cast<CallInst>(A.Inst); 143 CallInst *CIB = cast<CallInst>(B.Inst); 144 if (getCalledFunctionName(*CIA).compare(getCalledFunctionName(*CIB)) != 0) 145 return false; 146 } 147 148 return true; 149 } 150 151 // TODO: This is the same as the MachineOutliner, and should be consolidated 152 // into the same interface. 153 void IRInstructionMapper::convertToUnsignedVec( 154 BasicBlock &BB, std::vector<IRInstructionData *> &InstrList, 155 std::vector<unsigned> &IntegerMapping) { 156 BasicBlock::iterator It = BB.begin(); 157 158 std::vector<unsigned> IntegerMappingForBB; 159 std::vector<IRInstructionData *> InstrListForBB; 160 161 HaveLegalRange = false; 162 CanCombineWithPrevInstr = false; 163 AddedIllegalLastTime = true; 164 165 for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { 166 switch (InstClassifier.visit(*It)) { 167 case InstrType::Legal: 168 mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 169 break; 170 case InstrType::Illegal: 171 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 172 break; 173 case InstrType::Invisible: 174 AddedIllegalLastTime = false; 175 break; 176 } 177 } 178 179 if (HaveLegalRange) { 180 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); 181 for_each(InstrListForBB, 182 [this](IRInstructionData *ID) { this->IDL->push_back(*ID); }); 183 llvm::append_range(InstrList, InstrListForBB); 184 llvm::append_range(IntegerMapping, IntegerMappingForBB); 185 } 186 } 187 188 // TODO: This is the same as the MachineOutliner, and should be consolidated 189 // into the same interface. 190 unsigned IRInstructionMapper::mapToLegalUnsigned( 191 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 192 std::vector<IRInstructionData *> &InstrListForBB) { 193 // We added something legal, so we should unset the AddedLegalLastTime 194 // flag. 195 AddedIllegalLastTime = false; 196 197 // If we have at least two adjacent legal instructions (which may have 198 // invisible instructions in between), remember that. 199 if (CanCombineWithPrevInstr) 200 HaveLegalRange = true; 201 CanCombineWithPrevInstr = true; 202 203 // Get the integer for this instruction or give it the current 204 // LegalInstrNumber. 205 IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL); 206 InstrListForBB.push_back(ID); 207 208 // Add to the instruction list 209 bool WasInserted; 210 DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator 211 ResultIt; 212 std::tie(ResultIt, WasInserted) = 213 InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber)); 214 unsigned INumber = ResultIt->second; 215 216 // There was an insertion. 217 if (WasInserted) 218 LegalInstrNumber++; 219 220 IntegerMappingForBB.push_back(INumber); 221 222 // Make sure we don't overflow or use any integers reserved by the DenseMap. 223 assert(LegalInstrNumber < IllegalInstrNumber && 224 "Instruction mapping overflow!"); 225 226 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 227 "Tried to assign DenseMap tombstone or empty key to instruction."); 228 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 229 "Tried to assign DenseMap tombstone or empty key to instruction."); 230 231 return INumber; 232 } 233 234 IRInstructionData * 235 IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality, 236 IRInstructionDataList &IDL) { 237 return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL); 238 } 239 240 IRInstructionDataList * 241 IRInstructionMapper::allocateIRInstructionDataList() { 242 return new (IDLAllocator->Allocate()) IRInstructionDataList(); 243 } 244 245 // TODO: This is the same as the MachineOutliner, and should be consolidated 246 // into the same interface. 247 unsigned IRInstructionMapper::mapToIllegalUnsigned( 248 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 249 std::vector<IRInstructionData *> &InstrListForBB, bool End) { 250 // Can't combine an illegal instruction. Set the flag. 251 CanCombineWithPrevInstr = false; 252 253 // Only add one illegal number per range of legal numbers. 254 if (AddedIllegalLastTime) 255 return IllegalInstrNumber; 256 257 IRInstructionData *ID = nullptr; 258 if (!End) 259 ID = allocateIRInstructionData(*It, false, *IDL); 260 InstrListForBB.push_back(ID); 261 262 // Remember that we added an illegal number last time. 263 AddedIllegalLastTime = true; 264 unsigned INumber = IllegalInstrNumber; 265 IntegerMappingForBB.push_back(IllegalInstrNumber--); 266 267 assert(LegalInstrNumber < IllegalInstrNumber && 268 "Instruction mapping overflow!"); 269 270 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 271 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 272 273 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 274 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 275 276 return INumber; 277 } 278 279 IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len, 280 IRInstructionData *FirstInstIt, 281 IRInstructionData *LastInstIt) 282 : StartIdx(StartIdx), Len(Len) { 283 284 assert(FirstInstIt != nullptr && "Instruction is nullptr!"); 285 assert(LastInstIt != nullptr && "Instruction is nullptr!"); 286 assert(StartIdx + Len > StartIdx && 287 "Overflow for IRSimilarityCandidate range?"); 288 assert(Len - 1 == static_cast<unsigned>(std::distance( 289 iterator(FirstInstIt), iterator(LastInstIt))) && 290 "Length of the first and last IRInstructionData do not match the " 291 "given length"); 292 293 // We iterate over the given instructions, and map each unique value 294 // to a unique number in the IRSimilarityCandidate ValueToNumber and 295 // NumberToValue maps. A constant get its own value globally, the individual 296 // uses of the constants are not considered to be unique. 297 // 298 // IR: Mapping Added: 299 // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 300 // %add2 = add i32 %a, %1 %add2 -> 4 301 // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 302 // 303 // when replace with global values, starting from 1, would be 304 // 305 // 3 = add i32 1, 2 306 // 4 = add i32 1, 3 307 // 6 = add i32 5, 2 308 unsigned LocalValNumber = 1; 309 IRInstructionDataList::iterator ID = iterator(*FirstInstIt); 310 for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) { 311 // Map the operand values to an unsigned integer if it does not already 312 // have an unsigned integer assigned to it. 313 for (Value *Arg : ID->OperVals) 314 if (ValueToNumber.find(Arg) == ValueToNumber.end()) { 315 ValueToNumber.try_emplace(Arg, LocalValNumber); 316 NumberToValue.try_emplace(LocalValNumber, Arg); 317 LocalValNumber++; 318 } 319 320 // Mapping the instructions to an unsigned integer if it is not already 321 // exist in the mapping. 322 if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) { 323 ValueToNumber.try_emplace(ID->Inst, LocalValNumber); 324 NumberToValue.try_emplace(LocalValNumber, ID->Inst); 325 LocalValNumber++; 326 } 327 } 328 329 // Setting the first and last instruction data pointers for the candidate. If 330 // we got through the entire for loop without hitting an assert, we know 331 // that both of these instructions are not nullptrs. 332 FirstInst = FirstInstIt; 333 LastInst = LastInstIt; 334 } 335 336 bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, 337 const IRSimilarityCandidate &B) { 338 if (A.getLength() != B.getLength()) 339 return false; 340 341 auto InstrDataForBoth = 342 zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end())); 343 344 return all_of(InstrDataForBoth, 345 [](std::tuple<IRInstructionData &, IRInstructionData &> R) { 346 IRInstructionData &A = std::get<0>(R); 347 IRInstructionData &B = std::get<1>(R); 348 if (!A.Legal || !B.Legal) 349 return false; 350 return isClose(A, B); 351 }); 352 } 353 354 /// Determine if one or more of the assigned global value numbers for the 355 /// operands in \p TargetValueNumbers is in the current mapping set for operand 356 /// numbers in \p SourceOperands. The set of possible corresponding global 357 /// value numbers are replaced with the most recent version of compatible 358 /// values. 359 /// 360 /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global 361 /// value number for the source IRInstructionCandidate. 362 /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source 363 /// IRSimilarityCandidate global value numbers to a set of possible numbers in 364 /// the target. 365 /// \param [in] SourceOperands - The operands in the original 366 /// IRSimilarityCandidate in the current instruction. 367 /// \param [in] TargetValueNumbers - The global value numbers of the operands in 368 /// the corresponding Instruction in the other IRSimilarityCandidate. 369 /// \returns true if there exists a possible mapping between the source 370 /// Instruction operands and the target Instruction operands, and false if not. 371 static bool checkNumberingAndReplaceCommutative( 372 const DenseMap<Value *, unsigned> &SourceValueToNumberMapping, 373 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 374 ArrayRef<Value *> &SourceOperands, 375 DenseSet<unsigned> &TargetValueNumbers){ 376 377 DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 378 379 unsigned ArgVal; 380 bool WasInserted; 381 382 // Iterate over the operands in the source IRSimilarityCandidate to determine 383 // whether there exists an operand in the other IRSimilarityCandidate that 384 // creates a valid mapping of Value to Value between the 385 // IRSimilarityCaniddates. 386 for (Value *V : SourceOperands) { 387 ArgVal = SourceValueToNumberMapping.find(V)->second; 388 389 std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert( 390 std::make_pair(ArgVal, TargetValueNumbers)); 391 392 // Instead of finding a current mapping, we inserted a set. This means a 393 // mapping did not exist for the source Instruction operand, it has no 394 // current constraints we need to check. 395 if (WasInserted) 396 continue; 397 398 // If a mapping already exists for the source operand to the values in the 399 // other IRSimilarityCandidate we need to iterate over the items in other 400 // IRSimilarityCandidate's Instruction to determine whether there is a valid 401 // mapping of Value to Value. 402 DenseSet<unsigned> NewSet; 403 for (unsigned &Curr : ValueMappingIt->second) 404 // If we can find the value in the mapping, we add it to the new set. 405 if (TargetValueNumbers.contains(Curr)) 406 NewSet.insert(Curr); 407 408 // If we could not find a Value, return 0. 409 if (NewSet.empty()) 410 return false; 411 412 // Otherwise replace the old mapping with the newly constructed one. 413 if (NewSet.size() != ValueMappingIt->second.size()) 414 ValueMappingIt->second.swap(NewSet); 415 416 // We have reached no conclusions about the mapping, and cannot remove 417 // any items from the other operands, so we move to check the next operand. 418 if (ValueMappingIt->second.size() != 1) 419 continue; 420 421 422 unsigned ValToRemove = *ValueMappingIt->second.begin(); 423 // When there is only one item left in the mapping for and operand, remove 424 // the value from the other operands. If it results in there being no 425 // mapping, return false, it means the mapping is wrong 426 for (Value *InnerV : SourceOperands) { 427 if (V == InnerV) 428 continue; 429 430 unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second; 431 ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal); 432 if (ValueMappingIt == CurrentSrcTgtNumberMapping.end()) 433 continue; 434 435 ValueMappingIt->second.erase(ValToRemove); 436 if (ValueMappingIt->second.empty()) 437 return false; 438 } 439 } 440 441 return true; 442 } 443 444 /// Determine if operand number \p TargetArgVal is in the current mapping set 445 /// for operand number \p SourceArgVal. 446 /// 447 /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global 448 /// value numbers from source IRSimilarityCandidate to target 449 /// IRSimilarityCandidate. 450 /// \param [in] SourceArgVal The global value number for an operand in the 451 /// in the original candidate. 452 /// \param [in] TargetArgVal The global value number for the corresponding 453 /// operand in the other candidate. 454 /// \returns True if there exists a mapping and false if not. 455 bool checkNumberingAndReplace( 456 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 457 unsigned SourceArgVal, unsigned TargetArgVal) { 458 // We are given two unsigned integers representing the global values of 459 // the operands in different IRSimilarityCandidates and a current mapping 460 // between the two. 461 // 462 // Source Operand GVN: 1 463 // Target Operand GVN: 2 464 // CurrentMapping: {1: {1, 2}} 465 // 466 // Since we have mapping, and the target operand is contained in the set, we 467 // update it to: 468 // CurrentMapping: {1: {2}} 469 // and can return true. But, if the mapping was 470 // CurrentMapping: {1: {3}} 471 // we would return false. 472 473 bool WasInserted; 474 DenseMap<unsigned, DenseSet<unsigned>>::iterator Val; 475 476 std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert( 477 std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal}))); 478 479 // If we created a new mapping, then we are done. 480 if (WasInserted) 481 return true; 482 483 // If there is more than one option in the mapping set, and the target value 484 // is included in the mapping set replace that set with one that only includes 485 // the target value, as it is the only valid mapping via the non commutative 486 // instruction. 487 488 DenseSet<unsigned> &TargetSet = Val->second; 489 if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) { 490 TargetSet.clear(); 491 TargetSet.insert(TargetArgVal); 492 return true; 493 } 494 495 // Return true if we can find the value in the set. 496 return TargetSet.contains(TargetArgVal); 497 } 498 499 bool IRSimilarityCandidate::compareNonCommutativeOperandMapping( 500 OperandMapping A, OperandMapping B) { 501 // Iterators to keep track of where we are in the operands for each 502 // Instruction. 503 ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 504 ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 505 unsigned OperandLength = A.OperVals.size(); 506 507 // For each operand, get the value numbering and ensure it is consistent. 508 for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) { 509 unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second; 510 unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second; 511 512 // Attempt to add a set with only the target value. If there is no mapping 513 // we can create it here. 514 // 515 // For an instruction like a subtraction: 516 // IRSimilarityCandidateA: IRSimilarityCandidateB: 517 // %resultA = sub %a, %b %resultB = sub %d, %e 518 // 519 // We map %a -> %d and %b -> %e. 520 // 521 // And check to see whether their mapping is consistent in 522 // checkNumberingAndReplace. 523 524 if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB)) 525 return false; 526 527 if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA)) 528 return false; 529 } 530 return true; 531 } 532 533 bool IRSimilarityCandidate::compareCommutativeOperandMapping( 534 OperandMapping A, OperandMapping B) { 535 DenseSet<unsigned> ValueNumbersA; 536 DenseSet<unsigned> ValueNumbersB; 537 538 ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 539 ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 540 unsigned OperandLength = A.OperVals.size(); 541 542 // Find the value number sets for the operands. 543 for (unsigned Idx = 0; Idx < OperandLength; 544 Idx++, VItA++, VItB++) { 545 ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second); 546 ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second); 547 } 548 549 // Iterate over the operands in the first IRSimilarityCandidate and make sure 550 // there exists a possible mapping with the operands in the second 551 // IRSimilarityCandidate. 552 if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber, 553 A.ValueNumberMapping, A.OperVals, 554 ValueNumbersB)) 555 return false; 556 557 // Iterate over the operands in the second IRSimilarityCandidate and make sure 558 // there exists a possible mapping with the operands in the first 559 // IRSimilarityCandidate. 560 if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber, 561 B.ValueNumberMapping, B.OperVals, 562 ValueNumbersA)) 563 return false; 564 565 return true; 566 } 567 568 bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A, 569 const IRSimilarityCandidate &B) { 570 if (A.getLength() != B.getLength()) 571 return false; 572 573 if (A.ValueToNumber.size() != B.ValueToNumber.size()) 574 return false; 575 576 iterator ItA = A.begin(); 577 iterator ItB = B.begin(); 578 579 // These sets create a create a mapping between the values in one candidate 580 // to values in the other candidate. If we create a set with one element, 581 // and that same element maps to the original element in the candidate 582 // we have a good mapping. 583 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA; 584 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB; 585 DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 586 587 bool WasInserted; 588 589 // Iterate over the instructions contained in each candidate 590 unsigned SectionLength = A.getStartIdx() + A.getLength(); 591 for (unsigned Loc = A.getStartIdx(); Loc < SectionLength; 592 ItA++, ItB++, Loc++) { 593 // Make sure the instructions are similar to one another. 594 if (!isClose(*ItA, *ItB)) 595 return false; 596 597 Instruction *IA = ItA->Inst; 598 Instruction *IB = ItB->Inst; 599 600 if (!ItA->Legal || !ItB->Legal) 601 return false; 602 603 // Get the operand sets for the instructions. 604 ArrayRef<Value *> OperValsA = ItA->OperVals; 605 ArrayRef<Value *> OperValsB = ItB->OperVals; 606 607 unsigned InstValA = A.ValueToNumber.find(IA)->second; 608 unsigned InstValB = B.ValueToNumber.find(IB)->second; 609 610 // Ensure that the mappings for the instructions exists. 611 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert( 612 std::make_pair(InstValA, DenseSet<unsigned>({InstValB}))); 613 if (!WasInserted && !ValueMappingIt->second.contains(InstValB)) 614 return false; 615 616 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert( 617 std::make_pair(InstValB, DenseSet<unsigned>({InstValA}))); 618 if (!WasInserted && !ValueMappingIt->second.contains(InstValA)) 619 return false; 620 621 // We have different paths for commutative instructions and non-commutative 622 // instructions since commutative instructions could allow multiple mappings 623 // to certain values. 624 if (IA->isCommutative() && !isa<FPMathOperator>(IA)) { 625 if (!compareCommutativeOperandMapping( 626 {A, OperValsA, ValueNumberMappingA}, 627 {B, OperValsB, ValueNumberMappingB})) 628 return false; 629 continue; 630 } 631 632 // Handle the non-commutative cases. 633 if (!compareNonCommutativeOperandMapping( 634 {A, OperValsA, ValueNumberMappingA}, 635 {B, OperValsB, ValueNumberMappingB})) 636 return false; 637 } 638 return true; 639 } 640 641 bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A, 642 const IRSimilarityCandidate &B) { 643 auto DoesOverlap = [](const IRSimilarityCandidate &X, 644 const IRSimilarityCandidate &Y) { 645 // Check: 646 // XXXXXX X starts before Y ends 647 // YYYYYYY Y starts after X starts 648 return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx; 649 }; 650 651 return DoesOverlap(A, B) || DoesOverlap(B, A); 652 } 653 654 void IRSimilarityIdentifier::populateMapper( 655 Module &M, std::vector<IRInstructionData *> &InstrList, 656 std::vector<unsigned> &IntegerMapping) { 657 658 std::vector<IRInstructionData *> InstrListForModule; 659 std::vector<unsigned> IntegerMappingForModule; 660 // Iterate over the functions in the module to map each Instruction in each 661 // BasicBlock to an unsigned integer. 662 for (Function &F : M) { 663 664 if (F.empty()) 665 continue; 666 667 for (BasicBlock &BB : F) { 668 669 if (BB.sizeWithoutDebug() < 2) 670 continue; 671 672 // BB has potential to have similarity since it has a size greater than 2 673 // and can therefore match other regions greater than 2. Map it to a list 674 // of unsigned integers. 675 Mapper.convertToUnsignedVec(BB, InstrListForModule, 676 IntegerMappingForModule); 677 } 678 } 679 680 // Insert the InstrListForModule at the end of the overall InstrList so that 681 // we can have a long InstrList for the entire set of Modules being analyzed. 682 llvm::append_range(InstrList, InstrListForModule); 683 // Do the same as above, but for IntegerMapping. 684 llvm::append_range(IntegerMapping, IntegerMappingForModule); 685 } 686 687 void IRSimilarityIdentifier::populateMapper( 688 ArrayRef<std::unique_ptr<Module>> &Modules, 689 std::vector<IRInstructionData *> &InstrList, 690 std::vector<unsigned> &IntegerMapping) { 691 692 // Iterate over, and map the instructions in each module. 693 for (const std::unique_ptr<Module> &M : Modules) 694 populateMapper(*M, InstrList, IntegerMapping); 695 } 696 697 /// From a repeated subsequence, find all the different instances of the 698 /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from 699 /// the IRInstructionData in subsequence. 700 /// 701 /// \param [in] Mapper - The instruction mapper for sanity checks. 702 /// \param [in] InstrList - The vector that holds the instruction data. 703 /// \param [in] IntegerMapping - The vector that holds the mapped integers. 704 /// \param [out] CandsForRepSubstring - The vector to store the generated 705 /// IRSimilarityCandidates. 706 static void createCandidatesFromSuffixTree( 707 IRInstructionMapper Mapper, std::vector<IRInstructionData *> &InstrList, 708 std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS, 709 std::vector<IRSimilarityCandidate> &CandsForRepSubstring) { 710 711 unsigned StringLen = RS.Length; 712 713 // Create an IRSimilarityCandidate for instance of this subsequence \p RS. 714 for (const unsigned &StartIdx : RS.StartIndices) { 715 unsigned EndIdx = StartIdx + StringLen - 1; 716 717 // Check that this subsequence does not contain an illegal instruction. 718 bool ContainsIllegal = false; 719 for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) { 720 unsigned Key = IntegerMapping[CurrIdx]; 721 if (Key > Mapper.IllegalInstrNumber) { 722 ContainsIllegal = true; 723 break; 724 } 725 } 726 727 // If we have an illegal instruction, we should not create an 728 // IRSimilarityCandidate for this region. 729 if (ContainsIllegal) 730 continue; 731 732 // We are getting iterators to the instructions in this region of code 733 // by advancing the start and end indices from the start of the 734 // InstrList. 735 std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin(); 736 std::advance(StartIt, StartIdx); 737 std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin(); 738 std::advance(EndIt, EndIdx); 739 740 CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt); 741 } 742 } 743 744 /// From the list of IRSimilarityCandidates, perform a comparison between each 745 /// IRSimilarityCandidate to determine if there are overlapping 746 /// IRInstructionData, or if they do not have the same structure. 747 /// 748 /// \param [in] CandsForRepSubstring - The vector containing the 749 /// IRSimilarityCandidates. 750 /// \param [out] StructuralGroups - the mapping of unsigned integers to vector 751 /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the 752 /// vector are structurally similar to one another. 753 static void findCandidateStructures( 754 std::vector<IRSimilarityCandidate> &CandsForRepSubstring, 755 DenseMap<unsigned, SimilarityGroup> &StructuralGroups) { 756 std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt, 757 InnerCandEndIt; 758 759 // IRSimilarityCandidates each have a structure for operand use. It is 760 // possible that two instances of the same subsequences have different 761 // structure. Each type of structure found is assigned a number. This 762 // DenseMap maps an IRSimilarityCandidate to which type of similarity 763 // discovered it fits within. 764 DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; 765 766 // Find the compatibility from each candidate to the others to determine 767 // which candidates overlap and which have the same structure by mapping 768 // each structure to a different group. 769 bool SameStructure; 770 bool Inserted; 771 unsigned CurrentGroupNum = 0; 772 unsigned OuterGroupNum; 773 DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt; 774 DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner; 775 DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair; 776 777 // Iterate over the candidates to determine its structural and overlapping 778 // compatibility with other instructions 779 for (CandIt = CandsForRepSubstring.begin(), 780 CandEndIt = CandsForRepSubstring.end(); 781 CandIt != CandEndIt; CandIt++) { 782 783 // Determine if it has an assigned structural group already. 784 CandToGroupIt = CandToGroup.find(&*CandIt); 785 if (CandToGroupIt == CandToGroup.end()) { 786 // If not, we assign it one, and add it to our mapping. 787 std::tie(CandToGroupIt, Inserted) = 788 CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++)); 789 } 790 791 // Get the structural group number from the iterator. 792 OuterGroupNum = CandToGroupIt->second; 793 794 // Check if we already have a list of IRSimilarityCandidates for the current 795 // structural group. Create one if one does not exist. 796 CurrentGroupPair = StructuralGroups.find(OuterGroupNum); 797 if (CurrentGroupPair == StructuralGroups.end()) 798 std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert( 799 std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt}))); 800 801 // Iterate over the IRSimilarityCandidates following the current 802 // IRSimilarityCandidate in the list to determine whether the two 803 // IRSimilarityCandidates are compatible. This is so we do not repeat pairs 804 // of IRSimilarityCandidates. 805 for (InnerCandIt = std::next(CandIt), 806 InnerCandEndIt = CandsForRepSubstring.end(); 807 InnerCandIt != InnerCandEndIt; InnerCandIt++) { 808 809 // We check if the inner item has a group already, if it does, we skip it. 810 CandToGroupItInner = CandToGroup.find(&*InnerCandIt); 811 if (CandToGroupItInner != CandToGroup.end()) 812 continue; 813 814 // Otherwise we determine if they have the same structure and add it to 815 // vector if they match. 816 SameStructure = 817 IRSimilarityCandidate::compareStructure(*CandIt, *InnerCandIt); 818 if (!SameStructure) 819 continue; 820 821 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum)); 822 CurrentGroupPair->second.push_back(*InnerCandIt); 823 } 824 } 825 } 826 827 void IRSimilarityIdentifier::findCandidates( 828 std::vector<IRInstructionData *> &InstrList, 829 std::vector<unsigned> &IntegerMapping) { 830 SuffixTree ST(IntegerMapping); 831 832 std::vector<IRSimilarityCandidate> CandsForRepSubstring; 833 std::vector<SimilarityGroup> NewCandidateGroups; 834 835 DenseMap<unsigned, SimilarityGroup> StructuralGroups; 836 837 // Iterate over the subsequences found by the Suffix Tree to create 838 // IRSimilarityCandidates for each repeated subsequence and determine which 839 // instances are structurally similar to one another. 840 for (auto It = ST.begin(), Et = ST.end(); It != Et; ++It) { 841 createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, *It, 842 CandsForRepSubstring); 843 844 if (CandsForRepSubstring.size() < 2) 845 continue; 846 847 findCandidateStructures(CandsForRepSubstring, StructuralGroups); 848 for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) 849 // We only add the group if it contains more than one 850 // IRSimilarityCandidate. If there is only one, that means there is no 851 // other repeated subsequence with the same structure. 852 if (Group.second.size() > 1) 853 SimilarityCandidates->push_back(Group.second); 854 855 CandsForRepSubstring.clear(); 856 StructuralGroups.clear(); 857 NewCandidateGroups.clear(); 858 } 859 } 860 861 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( 862 ArrayRef<std::unique_ptr<Module>> Modules) { 863 resetSimilarityCandidates(); 864 865 std::vector<IRInstructionData *> InstrList; 866 std::vector<unsigned> IntegerMapping; 867 868 populateMapper(Modules, InstrList, IntegerMapping); 869 findCandidates(InstrList, IntegerMapping); 870 871 return SimilarityCandidates.getValue(); 872 } 873 874 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { 875 resetSimilarityCandidates(); 876 877 std::vector<IRInstructionData *> InstrList; 878 std::vector<unsigned> IntegerMapping; 879 880 populateMapper(M, InstrList, IntegerMapping); 881 findCandidates(InstrList, IntegerMapping); 882 883 return SimilarityCandidates.getValue(); 884 } 885 886 INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier", 887 "ir-similarity-identifier", false, true) 888 889 IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() 890 : ModulePass(ID) { 891 initializeIRSimilarityIdentifierWrapperPassPass( 892 *PassRegistry::getPassRegistry()); 893 } 894 895 bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { 896 IRSI.reset(new IRSimilarityIdentifier(M)); 897 return false; 898 } 899 900 bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) { 901 IRSI.reset(); 902 return false; 903 } 904 905 bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) { 906 // All the real work is done in the constructor for the pass. 907 IRSI.reset(new IRSimilarityIdentifier(M)); 908 return false; 909 } 910 911 AnalysisKey IRSimilarityAnalysis::Key; 912 IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, 913 ModuleAnalysisManager &) { 914 915 return IRSimilarityIdentifier(M); 916 } 917 918 PreservedAnalyses 919 IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { 920 IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M); 921 Optional<SimilarityGroupList> &SimilarityCandidatesOpt = IRSI.getSimilarity(); 922 923 for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) { 924 OS << CandVec.size() << " candidates of length " 925 << CandVec.begin()->getLength() << ". Found in: \n"; 926 for (IRSimilarityCandidate &Cand : CandVec) { 927 OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str() 928 << ", Basic Block: "; 929 if (Cand.front()->Inst->getParent()->getName().str() == "") 930 OS << "(unnamed)\n"; 931 else 932 OS << Cand.front()->Inst->getParent()->getName().str() << "\n"; 933 } 934 } 935 936 return PreservedAnalyses::all(); 937 } 938 939 char IRSimilarityIdentifierWrapperPass::ID = 0; 940