1 //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // \file 10 // Implementation file for the IRSimilarityIdentifier for identifying 11 // similarities in IR including the IRInstructionMapper. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Analysis/IRSimilarityIdentifier.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/IR/Intrinsics.h" 18 #include "llvm/IR/Operator.h" 19 #include "llvm/IR/User.h" 20 #include "llvm/InitializePasses.h" 21 #include "llvm/Support/SuffixTree.h" 22 23 using namespace llvm; 24 using namespace IRSimilarity; 25 26 IRInstructionData::IRInstructionData(Instruction &I, bool Legality, 27 IRInstructionDataList &IDList) 28 : Inst(&I), Legal(Legality), IDL(&IDList) { 29 // We check for whether we have a comparison instruction. If it is, we 30 // find the "less than" version of the predicate for consistency for 31 // comparison instructions throught the program. 32 if (CmpInst *C = dyn_cast<CmpInst>(&I)) { 33 CmpInst::Predicate Predicate = predicateForConsistency(C); 34 if (Predicate != C->getPredicate()) 35 RevisedPredicate = Predicate; 36 } 37 38 // Here we collect the operands and their types for determining whether 39 // the structure of the operand use matches between two different candidates. 40 for (Use &OI : I.operands()) { 41 if (isa<CmpInst>(I) && RevisedPredicate.hasValue()) { 42 // If we have a CmpInst where the predicate is reversed, it means the 43 // operands must be reversed as well. 44 OperVals.insert(OperVals.begin(), OI.get()); 45 continue; 46 } 47 48 OperVals.push_back(OI.get()); 49 } 50 } 51 52 CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) { 53 switch (CI->getPredicate()) { 54 case CmpInst::FCMP_OGT: 55 case CmpInst::FCMP_UGT: 56 case CmpInst::FCMP_OGE: 57 case CmpInst::FCMP_UGE: 58 case CmpInst::ICMP_SGT: 59 case CmpInst::ICMP_UGT: 60 case CmpInst::ICMP_SGE: 61 case CmpInst::ICMP_UGE: 62 return CI->getSwappedPredicate(); 63 default: 64 return CI->getPredicate(); 65 } 66 } 67 68 CmpInst::Predicate IRInstructionData::getPredicate() const { 69 assert(isa<CmpInst>(Inst) && 70 "Can only get a predicate from a compare instruction"); 71 72 if (RevisedPredicate.hasValue()) 73 return RevisedPredicate.getValue(); 74 75 return cast<CmpInst>(Inst)->getPredicate(); 76 } 77 78 bool IRSimilarity::isClose(const IRInstructionData &A, 79 const IRInstructionData &B) { 80 81 if (!A.Legal || !B.Legal) 82 return false; 83 84 // Check if we are performing the same sort of operation on the same types 85 // but not on the same values. 86 if (A.Inst->isSameOperationAs(B.Inst)) 87 return true; 88 89 // If there is a predicate, this means that either there is a swapped 90 // predicate, or that the types are different, we want to make sure that 91 // the predicates are equivalent via swapping. 92 if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) { 93 94 if (A.getPredicate() != B.getPredicate()) 95 return false; 96 97 // If the predicates are the same via swap, make sure that the types are 98 // still the same. 99 auto ZippedTypes = zip(A.OperVals, B.OperVals); 100 101 return all_of(ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) { 102 return std::get<0>(R)->getType() == std::get<1>(R)->getType(); 103 }); 104 } 105 106 return false; 107 } 108 109 // TODO: This is the same as the MachineOutliner, and should be consolidated 110 // into the same interface. 111 void IRInstructionMapper::convertToUnsignedVec( 112 BasicBlock &BB, std::vector<IRInstructionData *> &InstrList, 113 std::vector<unsigned> &IntegerMapping) { 114 BasicBlock::iterator It = BB.begin(); 115 116 std::vector<unsigned> IntegerMappingForBB; 117 std::vector<IRInstructionData *> InstrListForBB; 118 119 HaveLegalRange = false; 120 CanCombineWithPrevInstr = false; 121 AddedIllegalLastTime = true; 122 123 for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) { 124 switch (InstClassifier.visit(*It)) { 125 case InstrType::Legal: 126 mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 127 break; 128 case InstrType::Illegal: 129 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB); 130 break; 131 case InstrType::Invisible: 132 AddedIllegalLastTime = false; 133 break; 134 } 135 } 136 137 if (HaveLegalRange) { 138 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true); 139 for_each(InstrListForBB, 140 [this](IRInstructionData *ID) { this->IDL->push_back(*ID); }); 141 llvm::append_range(InstrList, InstrListForBB); 142 llvm::append_range(IntegerMapping, IntegerMappingForBB); 143 } 144 } 145 146 // TODO: This is the same as the MachineOutliner, and should be consolidated 147 // into the same interface. 148 unsigned IRInstructionMapper::mapToLegalUnsigned( 149 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 150 std::vector<IRInstructionData *> &InstrListForBB) { 151 // We added something legal, so we should unset the AddedLegalLastTime 152 // flag. 153 AddedIllegalLastTime = false; 154 155 // If we have at least two adjacent legal instructions (which may have 156 // invisible instructions in between), remember that. 157 if (CanCombineWithPrevInstr) 158 HaveLegalRange = true; 159 CanCombineWithPrevInstr = true; 160 161 // Get the integer for this instruction or give it the current 162 // LegalInstrNumber. 163 IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL); 164 InstrListForBB.push_back(ID); 165 166 // Add to the instruction list 167 bool WasInserted; 168 DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator 169 ResultIt; 170 std::tie(ResultIt, WasInserted) = 171 InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber)); 172 unsigned INumber = ResultIt->second; 173 174 // There was an insertion. 175 if (WasInserted) 176 LegalInstrNumber++; 177 178 IntegerMappingForBB.push_back(INumber); 179 180 // Make sure we don't overflow or use any integers reserved by the DenseMap. 181 assert(LegalInstrNumber < IllegalInstrNumber && 182 "Instruction mapping overflow!"); 183 184 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 185 "Tried to assign DenseMap tombstone or empty key to instruction."); 186 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 187 "Tried to assign DenseMap tombstone or empty key to instruction."); 188 189 return INumber; 190 } 191 192 IRInstructionData * 193 IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality, 194 IRInstructionDataList &IDL) { 195 return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL); 196 } 197 198 IRInstructionDataList * 199 IRInstructionMapper::allocateIRInstructionDataList() { 200 return new (IDLAllocator->Allocate()) IRInstructionDataList(); 201 } 202 203 // TODO: This is the same as the MachineOutliner, and should be consolidated 204 // into the same interface. 205 unsigned IRInstructionMapper::mapToIllegalUnsigned( 206 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB, 207 std::vector<IRInstructionData *> &InstrListForBB, bool End) { 208 // Can't combine an illegal instruction. Set the flag. 209 CanCombineWithPrevInstr = false; 210 211 // Only add one illegal number per range of legal numbers. 212 if (AddedIllegalLastTime) 213 return IllegalInstrNumber; 214 215 IRInstructionData *ID = nullptr; 216 if (!End) 217 ID = allocateIRInstructionData(*It, false, *IDL); 218 InstrListForBB.push_back(ID); 219 220 // Remember that we added an illegal number last time. 221 AddedIllegalLastTime = true; 222 unsigned INumber = IllegalInstrNumber; 223 IntegerMappingForBB.push_back(IllegalInstrNumber--); 224 225 assert(LegalInstrNumber < IllegalInstrNumber && 226 "Instruction mapping overflow!"); 227 228 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() && 229 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 230 231 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() && 232 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!"); 233 234 return INumber; 235 } 236 237 IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len, 238 IRInstructionData *FirstInstIt, 239 IRInstructionData *LastInstIt) 240 : StartIdx(StartIdx), Len(Len) { 241 242 assert(FirstInstIt != nullptr && "Instruction is nullptr!"); 243 assert(LastInstIt != nullptr && "Instruction is nullptr!"); 244 assert(StartIdx + Len > StartIdx && 245 "Overflow for IRSimilarityCandidate range?"); 246 assert(Len - 1 == static_cast<unsigned>(std::distance( 247 iterator(FirstInstIt), iterator(LastInstIt))) && 248 "Length of the first and last IRInstructionData do not match the " 249 "given length"); 250 251 // We iterate over the given instructions, and map each unique value 252 // to a unique number in the IRSimilarityCandidate ValueToNumber and 253 // NumberToValue maps. A constant get its own value globally, the individual 254 // uses of the constants are not considered to be unique. 255 // 256 // IR: Mapping Added: 257 // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2 258 // %add2 = add i32 %a, %1 %add2 -> 4 259 // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5 260 // 261 // when replace with global values, starting from 1, would be 262 // 263 // 3 = add i32 1, 2 264 // 4 = add i32 1, 3 265 // 6 = add i32 5, 2 266 unsigned LocalValNumber = 1; 267 IRInstructionDataList::iterator ID = iterator(*FirstInstIt); 268 for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) { 269 // Map the operand values to an unsigned integer if it does not already 270 // have an unsigned integer assigned to it. 271 for (Value *Arg : ID->OperVals) 272 if (ValueToNumber.find(Arg) == ValueToNumber.end()) { 273 ValueToNumber.try_emplace(Arg, LocalValNumber); 274 NumberToValue.try_emplace(LocalValNumber, Arg); 275 LocalValNumber++; 276 } 277 278 // Mapping the instructions to an unsigned integer if it is not already 279 // exist in the mapping. 280 if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) { 281 ValueToNumber.try_emplace(ID->Inst, LocalValNumber); 282 NumberToValue.try_emplace(LocalValNumber, ID->Inst); 283 LocalValNumber++; 284 } 285 } 286 287 // Setting the first and last instruction data pointers for the candidate. If 288 // we got through the entire for loop without hitting an assert, we know 289 // that both of these instructions are not nullptrs. 290 FirstInst = FirstInstIt; 291 LastInst = LastInstIt; 292 } 293 294 bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A, 295 const IRSimilarityCandidate &B) { 296 if (A.getLength() != B.getLength()) 297 return false; 298 299 auto InstrDataForBoth = 300 zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end())); 301 302 return all_of(InstrDataForBoth, 303 [](std::tuple<IRInstructionData &, IRInstructionData &> R) { 304 IRInstructionData &A = std::get<0>(R); 305 IRInstructionData &B = std::get<1>(R); 306 if (!A.Legal || !B.Legal) 307 return false; 308 return isClose(A, B); 309 }); 310 } 311 312 /// Determine if one or more of the assigned global value numbers for the 313 /// operands in \p TargetValueNumbers is in the current mapping set for operand 314 /// numbers in \p SourceOperands. The set of possible corresponding global 315 /// value numbers are replaced with the most recent version of compatible 316 /// values. 317 /// 318 /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global 319 /// value number for the source IRInstructionCandidate. 320 /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source 321 /// IRSimilarityCandidate global value numbers to a set of possible numbers in 322 /// the target. 323 /// \param [in] SourceOperands - The operands in the original 324 /// IRSimilarityCandidate in the current instruction. 325 /// \param [in] TargetValueNumbers - The global value numbers of the operands in 326 /// the corresponding Instruction in the other IRSimilarityCandidate. 327 /// \returns true if there exists a possible mapping between the source 328 /// Instruction operands and the target Instruction operands, and false if not. 329 static bool checkNumberingAndReplaceCommutative( 330 const DenseMap<Value *, unsigned> &SourceValueToNumberMapping, 331 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 332 ArrayRef<Value *> &SourceOperands, 333 DenseSet<unsigned> &TargetValueNumbers){ 334 335 DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 336 337 unsigned ArgVal; 338 bool WasInserted; 339 340 // Iterate over the operands in the source IRSimilarityCandidate to determine 341 // whether there exists an operand in the other IRSimilarityCandidate that 342 // creates a valid mapping of Value to Value between the 343 // IRSimilarityCaniddates. 344 for (Value *V : SourceOperands) { 345 ArgVal = SourceValueToNumberMapping.find(V)->second; 346 347 std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert( 348 std::make_pair(ArgVal, TargetValueNumbers)); 349 350 // Instead of finding a current mapping, we inserted a set. This means a 351 // mapping did not exist for the source Instruction operand, it has no 352 // current constraints we need to check. 353 if (WasInserted) 354 continue; 355 356 // If a mapping already exists for the source operand to the values in the 357 // other IRSimilarityCandidate we need to iterate over the items in other 358 // IRSimilarityCandidate's Instruction to determine whether there is a valid 359 // mapping of Value to Value. 360 DenseSet<unsigned> NewSet; 361 for (unsigned &Curr : ValueMappingIt->second) 362 // If we can find the value in the mapping, we add it to the new set. 363 if (TargetValueNumbers.find(Curr) != TargetValueNumbers.end()) 364 NewSet.insert(Curr); 365 366 // If we could not find a Value, return 0. 367 if (NewSet.empty()) 368 return false; 369 370 // Otherwise replace the old mapping with the newly constructed one. 371 if (NewSet.size() != ValueMappingIt->second.size()) 372 ValueMappingIt->second.swap(NewSet); 373 374 // We have reached no conclusions about the mapping, and cannot remove 375 // any items from the other operands, so we move to check the next operand. 376 if (ValueMappingIt->second.size() != 1) 377 continue; 378 379 380 unsigned ValToRemove = *ValueMappingIt->second.begin(); 381 // When there is only one item left in the mapping for and operand, remove 382 // the value from the other operands. If it results in there being no 383 // mapping, return false, it means the mapping is wrong 384 for (Value *InnerV : SourceOperands) { 385 if (V == InnerV) 386 continue; 387 388 unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second; 389 ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal); 390 if (ValueMappingIt == CurrentSrcTgtNumberMapping.end()) 391 continue; 392 393 ValueMappingIt->second.erase(ValToRemove); 394 if (ValueMappingIt->second.empty()) 395 return false; 396 } 397 } 398 399 return true; 400 } 401 402 /// Determine if operand number \p TargetArgVal is in the current mapping set 403 /// for operand number \p SourceArgVal. 404 /// 405 /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global 406 /// value numbers from source IRSimilarityCandidate to target 407 /// IRSimilarityCandidate. 408 /// \param [in] SourceArgVal The global value number for an operand in the 409 /// in the original candidate. 410 /// \param [in] TargetArgVal The global value number for the corresponding 411 /// operand in the other candidate. 412 /// \returns True if there exists a mapping and false if not. 413 bool checkNumberingAndReplace( 414 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping, 415 unsigned SourceArgVal, unsigned TargetArgVal) { 416 // We are given two unsigned integers representing the global values of 417 // the operands in different IRSimilarityCandidates and a current mapping 418 // between the two. 419 // 420 // Source Operand GVN: 1 421 // Target Operand GVN: 2 422 // CurrentMapping: {1: {1, 2}} 423 // 424 // Since we have mapping, and the target operand is contained in the set, we 425 // update it to: 426 // CurrentMapping: {1: {2}} 427 // and can return true. But, if the mapping was 428 // CurrentMapping: {1: {3}} 429 // we would return false. 430 431 bool WasInserted; 432 DenseMap<unsigned, DenseSet<unsigned>>::iterator Val; 433 434 std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert( 435 std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal}))); 436 437 // If we created a new mapping, then we are done. 438 if (WasInserted) 439 return true; 440 441 // If there is more than one option in the mapping set, and the target value 442 // is included in the mapping set replace that set with one that only includes 443 // the target value, as it is the only valid mapping via the non commutative 444 // instruction. 445 446 DenseSet<unsigned> &TargetSet = Val->second; 447 if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) { 448 TargetSet.clear(); 449 TargetSet.insert(TargetArgVal); 450 return true; 451 } 452 453 // Return true if we can find the value in the set. 454 return TargetSet.contains(TargetArgVal); 455 } 456 457 bool IRSimilarityCandidate::compareNonCommutativeOperandMapping( 458 OperandMapping A, OperandMapping B) { 459 // Iterators to keep track of where we are in the operands for each 460 // Instruction. 461 ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 462 ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 463 unsigned OperandLength = A.OperVals.size(); 464 465 // For each operand, get the value numbering and ensure it is consistent. 466 for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) { 467 unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second; 468 unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second; 469 470 // Attempt to add a set with only the target value. If there is no mapping 471 // we can create it here. 472 // 473 // For an instruction like a subtraction: 474 // IRSimilarityCandidateA: IRSimilarityCandidateB: 475 // %resultA = sub %a, %b %resultB = sub %d, %e 476 // 477 // We map %a -> %d and %b -> %e. 478 // 479 // And check to see whether their mapping is consistent in 480 // checkNumberingAndReplace. 481 482 if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB)) 483 return false; 484 485 if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA)) 486 return false; 487 } 488 return true; 489 } 490 491 bool IRSimilarityCandidate::compareCommutativeOperandMapping( 492 OperandMapping A, OperandMapping B) { 493 DenseSet<unsigned> ValueNumbersA; 494 DenseSet<unsigned> ValueNumbersB; 495 496 ArrayRef<Value *>::iterator VItA = A.OperVals.begin(); 497 ArrayRef<Value *>::iterator VItB = B.OperVals.begin(); 498 unsigned OperandLength = A.OperVals.size(); 499 500 // Find the value number sets for the operands. 501 for (unsigned Idx = 0; Idx < OperandLength; 502 Idx++, VItA++, VItB++) { 503 ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second); 504 ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second); 505 } 506 507 // Iterate over the operands in the first IRSimilarityCandidate and make sure 508 // there exists a possible mapping with the operands in the second 509 // IRSimilarityCandidate. 510 if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber, 511 A.ValueNumberMapping, A.OperVals, 512 ValueNumbersB)) 513 return false; 514 515 // Iterate over the operands in the second IRSimilarityCandidate and make sure 516 // there exists a possible mapping with the operands in the first 517 // IRSimilarityCandidate. 518 if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber, 519 B.ValueNumberMapping, B.OperVals, 520 ValueNumbersA)) 521 return false; 522 523 return true; 524 } 525 526 bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A, 527 const IRSimilarityCandidate &B) { 528 if (A.getLength() != B.getLength()) 529 return false; 530 531 if (A.ValueToNumber.size() != B.ValueToNumber.size()) 532 return false; 533 534 iterator ItA = A.begin(); 535 iterator ItB = B.begin(); 536 537 // These sets create a create a mapping between the values in one candidate 538 // to values in the other candidate. If we create a set with one element, 539 // and that same element maps to the original element in the candidate 540 // we have a good mapping. 541 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA; 542 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB; 543 DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt; 544 545 bool WasInserted; 546 547 // Iterate over the instructions contained in each candidate 548 unsigned SectionLength = A.getStartIdx() + A.getLength(); 549 for (unsigned Loc = A.getStartIdx(); Loc < SectionLength; 550 ItA++, ItB++, Loc++) { 551 // Make sure the instructions are similar to one another. 552 if (!isClose(*ItA, *ItB)) 553 return false; 554 555 Instruction *IA = ItA->Inst; 556 Instruction *IB = ItB->Inst; 557 558 if (!ItA->Legal || !ItB->Legal) 559 return false; 560 561 // Get the operand sets for the instructions. 562 ArrayRef<Value *> OperValsA = ItA->OperVals; 563 ArrayRef<Value *> OperValsB = ItB->OperVals; 564 565 unsigned InstValA = A.ValueToNumber.find(IA)->second; 566 unsigned InstValB = B.ValueToNumber.find(IB)->second; 567 568 // Ensure that the mappings for the instructions exists. 569 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert( 570 std::make_pair(InstValA, DenseSet<unsigned>({InstValB}))); 571 if (!WasInserted && !ValueMappingIt->second.contains(InstValB)) 572 return false; 573 574 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert( 575 std::make_pair(InstValB, DenseSet<unsigned>({InstValA}))); 576 if (!WasInserted && !ValueMappingIt->second.contains(InstValA)) 577 return false; 578 579 // We have different paths for commutative instructions and non-commutative 580 // instructions since commutative instructions could allow multiple mappings 581 // to certain values. 582 if (IA->isCommutative() && !isa<FPMathOperator>(IA)) { 583 if (!compareCommutativeOperandMapping( 584 {A, OperValsA, ValueNumberMappingA}, 585 {B, OperValsB, ValueNumberMappingB})) 586 return false; 587 continue; 588 } 589 590 // Handle the non-commutative cases. 591 if (!compareNonCommutativeOperandMapping( 592 {A, OperValsA, ValueNumberMappingA}, 593 {B, OperValsB, ValueNumberMappingB})) 594 return false; 595 } 596 return true; 597 } 598 599 bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A, 600 const IRSimilarityCandidate &B) { 601 auto DoesOverlap = [](const IRSimilarityCandidate &X, 602 const IRSimilarityCandidate &Y) { 603 // Check: 604 // XXXXXX X starts before Y ends 605 // YYYYYYY Y starts after X starts 606 return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx; 607 }; 608 609 return DoesOverlap(A, B) || DoesOverlap(B, A); 610 } 611 612 void IRSimilarityIdentifier::populateMapper( 613 Module &M, std::vector<IRInstructionData *> &InstrList, 614 std::vector<unsigned> &IntegerMapping) { 615 616 std::vector<IRInstructionData *> InstrListForModule; 617 std::vector<unsigned> IntegerMappingForModule; 618 // Iterate over the functions in the module to map each Instruction in each 619 // BasicBlock to an unsigned integer. 620 for (Function &F : M) { 621 622 if (F.empty()) 623 continue; 624 625 for (BasicBlock &BB : F) { 626 627 if (BB.sizeWithoutDebug() < 2) 628 continue; 629 630 // BB has potential to have similarity since it has a size greater than 2 631 // and can therefore match other regions greater than 2. Map it to a list 632 // of unsigned integers. 633 Mapper.convertToUnsignedVec(BB, InstrListForModule, 634 IntegerMappingForModule); 635 } 636 } 637 638 // Insert the InstrListForModule at the end of the overall InstrList so that 639 // we can have a long InstrList for the entire set of Modules being analyzed. 640 llvm::append_range(InstrList, InstrListForModule); 641 // Do the same as above, but for IntegerMapping. 642 llvm::append_range(IntegerMapping, IntegerMappingForModule); 643 } 644 645 void IRSimilarityIdentifier::populateMapper( 646 ArrayRef<std::unique_ptr<Module>> &Modules, 647 std::vector<IRInstructionData *> &InstrList, 648 std::vector<unsigned> &IntegerMapping) { 649 650 // Iterate over, and map the instructions in each module. 651 for (const std::unique_ptr<Module> &M : Modules) 652 populateMapper(*M, InstrList, IntegerMapping); 653 } 654 655 /// From a repeated subsequence, find all the different instances of the 656 /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from 657 /// the IRInstructionData in subsequence. 658 /// 659 /// \param [in] Mapper - The instruction mapper for sanity checks. 660 /// \param [in] InstrList - The vector that holds the instruction data. 661 /// \param [in] IntegerMapping - The vector that holds the mapped integers. 662 /// \param [out] CandsForRepSubstring - The vector to store the generated 663 /// IRSimilarityCandidates. 664 static void createCandidatesFromSuffixTree( 665 IRInstructionMapper Mapper, std::vector<IRInstructionData *> &InstrList, 666 std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS, 667 std::vector<IRSimilarityCandidate> &CandsForRepSubstring) { 668 669 unsigned StringLen = RS.Length; 670 671 // Create an IRSimilarityCandidate for instance of this subsequence \p RS. 672 for (const unsigned &StartIdx : RS.StartIndices) { 673 unsigned EndIdx = StartIdx + StringLen - 1; 674 675 // Check that this subsequence does not contain an illegal instruction. 676 bool ContainsIllegal = false; 677 for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) { 678 unsigned Key = IntegerMapping[CurrIdx]; 679 if (Key > Mapper.IllegalInstrNumber) { 680 ContainsIllegal = true; 681 break; 682 } 683 } 684 685 // If we have an illegal instruction, we should not create an 686 // IRSimilarityCandidate for this region. 687 if (ContainsIllegal) 688 continue; 689 690 // We are getting iterators to the instructions in this region of code 691 // by advancing the start and end indices from the start of the 692 // InstrList. 693 std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin(); 694 std::advance(StartIt, StartIdx); 695 std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin(); 696 std::advance(EndIt, EndIdx); 697 698 CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt); 699 } 700 } 701 702 /// From the list of IRSimilarityCandidates, perform a comparison between each 703 /// IRSimilarityCandidate to determine if there are overlapping 704 /// IRInstructionData, or if they do not have the same structure. 705 /// 706 /// \param [in] CandsForRepSubstring - The vector containing the 707 /// IRSimilarityCandidates. 708 /// \param [out] StructuralGroups - the mapping of unsigned integers to vector 709 /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the 710 /// vector are structurally similar to one another. 711 static void findCandidateStructures( 712 std::vector<IRSimilarityCandidate> &CandsForRepSubstring, 713 DenseMap<unsigned, SimilarityGroup> &StructuralGroups) { 714 std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt, 715 InnerCandEndIt; 716 717 // IRSimilarityCandidates each have a structure for operand use. It is 718 // possible that two instances of the same subsequences have different 719 // structure. Each type of structure found is assigned a number. This 720 // DenseMap maps an IRSimilarityCandidate to which type of similarity 721 // discovered it fits within. 722 DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup; 723 724 // Find the compatibility from each candidate to the others to determine 725 // which candidates overlap and which have the same structure by mapping 726 // each structure to a different group. 727 bool SameStructure; 728 bool Inserted; 729 unsigned CurrentGroupNum = 0; 730 unsigned OuterGroupNum; 731 DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt; 732 DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner; 733 DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair; 734 735 // Iterate over the candidates to determine its structural and overlapping 736 // compatibility with other instructions 737 for (CandIt = CandsForRepSubstring.begin(), 738 CandEndIt = CandsForRepSubstring.end(); 739 CandIt != CandEndIt; CandIt++) { 740 741 // Determine if it has an assigned structural group already. 742 CandToGroupIt = CandToGroup.find(&*CandIt); 743 if (CandToGroupIt == CandToGroup.end()) { 744 // If not, we assign it one, and add it to our mapping. 745 std::tie(CandToGroupIt, Inserted) = 746 CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++)); 747 } 748 749 // Get the structural group number from the iterator. 750 OuterGroupNum = CandToGroupIt->second; 751 752 // Check if we already have a list of IRSimilarityCandidates for the current 753 // structural group. Create one if one does not exist. 754 CurrentGroupPair = StructuralGroups.find(OuterGroupNum); 755 if (CurrentGroupPair == StructuralGroups.end()) 756 std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert( 757 std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt}))); 758 759 // Iterate over the IRSimilarityCandidates following the current 760 // IRSimilarityCandidate in the list to determine whether the two 761 // IRSimilarityCandidates are compatible. This is so we do not repeat pairs 762 // of IRSimilarityCandidates. 763 for (InnerCandIt = std::next(CandIt), 764 InnerCandEndIt = CandsForRepSubstring.end(); 765 InnerCandIt != InnerCandEndIt; InnerCandIt++) { 766 767 // We check if the inner item has a group already, if it does, we skip it. 768 CandToGroupItInner = CandToGroup.find(&*InnerCandIt); 769 if (CandToGroupItInner != CandToGroup.end()) 770 continue; 771 772 // Otherwise we determine if they have the same structure and add it to 773 // vector if they match. 774 SameStructure = 775 IRSimilarityCandidate::compareStructure(*CandIt, *InnerCandIt); 776 if (!SameStructure) 777 continue; 778 779 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum)); 780 CurrentGroupPair->second.push_back(*InnerCandIt); 781 } 782 } 783 } 784 785 void IRSimilarityIdentifier::findCandidates( 786 std::vector<IRInstructionData *> &InstrList, 787 std::vector<unsigned> &IntegerMapping) { 788 SuffixTree ST(IntegerMapping); 789 790 std::vector<IRSimilarityCandidate> CandsForRepSubstring; 791 std::vector<SimilarityGroup> NewCandidateGroups; 792 793 DenseMap<unsigned, SimilarityGroup> StructuralGroups; 794 795 // Iterate over the subsequences found by the Suffix Tree to create 796 // IRSimilarityCandidates for each repeated subsequence and determine which 797 // instances are structurally similar to one another. 798 for (auto It = ST.begin(), Et = ST.end(); It != Et; ++It) { 799 createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, *It, 800 CandsForRepSubstring); 801 802 if (CandsForRepSubstring.size() < 2) 803 continue; 804 805 findCandidateStructures(CandsForRepSubstring, StructuralGroups); 806 for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) 807 // We only add the group if it contains more than one 808 // IRSimilarityCandidate. If there is only one, that means there is no 809 // other repeated subsequence with the same structure. 810 if (Group.second.size() > 1) 811 SimilarityCandidates->push_back(Group.second); 812 813 CandsForRepSubstring.clear(); 814 StructuralGroups.clear(); 815 NewCandidateGroups.clear(); 816 } 817 } 818 819 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity( 820 ArrayRef<std::unique_ptr<Module>> Modules) { 821 resetSimilarityCandidates(); 822 823 std::vector<IRInstructionData *> InstrList; 824 std::vector<unsigned> IntegerMapping; 825 826 populateMapper(Modules, InstrList, IntegerMapping); 827 findCandidates(InstrList, IntegerMapping); 828 829 return SimilarityCandidates.getValue(); 830 } 831 832 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) { 833 resetSimilarityCandidates(); 834 835 std::vector<IRInstructionData *> InstrList; 836 std::vector<unsigned> IntegerMapping; 837 838 populateMapper(M, InstrList, IntegerMapping); 839 findCandidates(InstrList, IntegerMapping); 840 841 return SimilarityCandidates.getValue(); 842 } 843 844 INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier", 845 "ir-similarity-identifier", false, true) 846 847 IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass() 848 : ModulePass(ID) { 849 initializeIRSimilarityIdentifierWrapperPassPass( 850 *PassRegistry::getPassRegistry()); 851 } 852 853 bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) { 854 IRSI.reset(new IRSimilarityIdentifier(M)); 855 return false; 856 } 857 858 bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) { 859 IRSI.reset(); 860 return false; 861 } 862 863 bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) { 864 // All the real work is done in the constructor for the pass. 865 IRSI.reset(new IRSimilarityIdentifier(M)); 866 return false; 867 } 868 869 AnalysisKey IRSimilarityAnalysis::Key; 870 IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M, 871 ModuleAnalysisManager &) { 872 873 return IRSimilarityIdentifier(M); 874 } 875 876 PreservedAnalyses 877 IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) { 878 IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M); 879 Optional<SimilarityGroupList> &SimilarityCandidatesOpt = IRSI.getSimilarity(); 880 881 for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) { 882 OS << CandVec.size() << " candidates of length " 883 << CandVec.begin()->getLength() << ". Found in: \n"; 884 for (IRSimilarityCandidate &Cand : CandVec) { 885 OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str() 886 << ", Basic Block: "; 887 if (Cand.front()->Inst->getParent()->getName().str() == "") 888 OS << "(unnamed)\n"; 889 else 890 OS << Cand.front()->Inst->getParent()->getName().str() << "\n"; 891 } 892 } 893 894 return PreservedAnalyses::all(); 895 } 896 897 char IRSimilarityIdentifierWrapperPass::ID = 0; 898