1 //===- bolt/Passes/IdenticalCodeFolding.cpp -------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the IdenticalCodeFolding class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/IdenticalCodeFolding.h" 14 #include "bolt/Core/ParallelUtilities.h" 15 #include "llvm/Support/CommandLine.h" 16 #include "llvm/Support/ThreadPool.h" 17 #include "llvm/Support/Timer.h" 18 #include <atomic> 19 #include <map> 20 #include <set> 21 #include <unordered_map> 22 23 #define DEBUG_TYPE "bolt-icf" 24 25 using namespace llvm; 26 using namespace bolt; 27 28 namespace opts { 29 30 extern cl::OptionCategory BoltOptCategory; 31 32 static cl::opt<bool> 33 UseDFS("icf-dfs", 34 cl::desc("use DFS ordering when using -icf option"), 35 cl::ReallyHidden, 36 cl::ZeroOrMore, 37 cl::cat(BoltOptCategory)); 38 39 static cl::opt<bool> 40 TimeICF("time-icf", 41 cl::desc("time icf steps"), 42 cl::ReallyHidden, 43 cl::ZeroOrMore, 44 cl::cat(BoltOptCategory)); 45 } // namespace opts 46 47 namespace { 48 using JumpTable = bolt::JumpTable; 49 50 /// Compare two jump tables in 2 functions. The function relies on consistent 51 /// ordering of basic blocks in both binary functions (e.g. DFS). 52 bool equalJumpTables(const JumpTable &JumpTableA, const JumpTable &JumpTableB, 53 const BinaryFunction &FunctionA, 54 const BinaryFunction &FunctionB) { 55 if (JumpTableA.EntrySize != JumpTableB.EntrySize) 56 return false; 57 58 if (JumpTableA.Type != JumpTableB.Type) 59 return false; 60 61 if (JumpTableA.getSize() != JumpTableB.getSize()) 62 return false; 63 64 for (uint64_t Index = 0; Index < JumpTableA.Entries.size(); ++Index) { 65 const MCSymbol *LabelA = JumpTableA.Entries[Index]; 66 const MCSymbol *LabelB = JumpTableB.Entries[Index]; 67 68 const BinaryBasicBlock *TargetA = FunctionA.getBasicBlockForLabel(LabelA); 69 const BinaryBasicBlock *TargetB = FunctionB.getBasicBlockForLabel(LabelB); 70 71 if (!TargetA || !TargetB) { 72 assert((TargetA || LabelA == FunctionA.getFunctionEndLabel()) && 73 "no target basic block found"); 74 assert((TargetB || LabelB == FunctionB.getFunctionEndLabel()) && 75 "no target basic block found"); 76 77 if (TargetA != TargetB) 78 return false; 79 80 continue; 81 } 82 83 assert(TargetA && TargetB && "cannot locate target block(s)"); 84 85 if (TargetA->getLayoutIndex() != TargetB->getLayoutIndex()) 86 return false; 87 } 88 89 return true; 90 } 91 92 /// Helper function that compares an instruction of this function to the 93 /// given instruction of the given function. The functions should have 94 /// identical CFG. 95 template <class Compare> 96 bool isInstrEquivalentWith(const MCInst &InstA, const BinaryBasicBlock &BBA, 97 const MCInst &InstB, const BinaryBasicBlock &BBB, 98 Compare Comp) { 99 if (InstA.getOpcode() != InstB.getOpcode()) 100 return false; 101 102 const BinaryContext &BC = BBA.getFunction()->getBinaryContext(); 103 104 // In this function we check for special conditions: 105 // 106 // * instructions with landing pads 107 // 108 // Most of the common cases should be handled by MCPlus::equals() 109 // that compares regular instruction operands. 110 // 111 // NB: there's no need to compare jump table indirect jump instructions 112 // separately as jump tables are handled by comparing corresponding 113 // symbols. 114 const Optional<MCPlus::MCLandingPad> EHInfoA = BC.MIB->getEHInfo(InstA); 115 const Optional<MCPlus::MCLandingPad> EHInfoB = BC.MIB->getEHInfo(InstB); 116 117 if (EHInfoA || EHInfoB) { 118 if (!EHInfoA && (EHInfoB->first || EHInfoB->second)) 119 return false; 120 121 if (!EHInfoB && (EHInfoA->first || EHInfoA->second)) 122 return false; 123 124 if (EHInfoA && EHInfoB) { 125 // Action indices should match. 126 if (EHInfoA->second != EHInfoB->second) 127 return false; 128 129 if (!EHInfoA->first != !EHInfoB->first) 130 return false; 131 132 if (EHInfoA->first && EHInfoB->first) { 133 const BinaryBasicBlock *LPA = BBA.getLandingPad(EHInfoA->first); 134 const BinaryBasicBlock *LPB = BBB.getLandingPad(EHInfoB->first); 135 assert(LPA && LPB && "cannot locate landing pad(s)"); 136 137 if (LPA->getLayoutIndex() != LPB->getLayoutIndex()) 138 return false; 139 } 140 } 141 } 142 143 return BC.MIB->equals(InstA, InstB, Comp); 144 } 145 146 /// Returns true if this function has identical code and CFG with 147 /// the given function \p BF. 148 /// 149 /// If \p CongruentSymbols is set to true, then symbolic operands that reference 150 /// potentially identical but different functions are ignored during the 151 /// comparison. 152 bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B, 153 bool CongruentSymbols) { 154 assert(A.hasCFG() && B.hasCFG() && "both functions should have CFG"); 155 156 // Compare the two functions, one basic block at a time. 157 // Currently we require two identical basic blocks to have identical 158 // instruction sequences and the same index in their corresponding 159 // functions. The latter is important for CFG equality. 160 161 if (A.layout_size() != B.layout_size()) 162 return false; 163 164 // Comparing multi-entry functions could be non-trivial. 165 if (A.isMultiEntry() || B.isMultiEntry()) 166 return false; 167 168 // Process both functions in either DFS or existing order. 169 const BinaryFunction::BasicBlockOrderType &OrderA = 170 opts::UseDFS ? A.dfs() : A.getLayout(); 171 const BinaryFunction::BasicBlockOrderType &OrderB = 172 opts::UseDFS ? B.dfs() : B.getLayout(); 173 174 const BinaryContext &BC = A.getBinaryContext(); 175 176 auto BBI = OrderB.begin(); 177 for (const BinaryBasicBlock *BB : OrderA) { 178 const BinaryBasicBlock *OtherBB = *BBI; 179 180 if (BB->getLayoutIndex() != OtherBB->getLayoutIndex()) 181 return false; 182 183 // Compare successor basic blocks. 184 // NOTE: the comparison for jump tables is only partially verified here. 185 if (BB->succ_size() != OtherBB->succ_size()) 186 return false; 187 188 auto SuccBBI = OtherBB->succ_begin(); 189 for (const BinaryBasicBlock *SuccBB : BB->successors()) { 190 const BinaryBasicBlock *SuccOtherBB = *SuccBBI; 191 if (SuccBB->getLayoutIndex() != SuccOtherBB->getLayoutIndex()) 192 return false; 193 ++SuccBBI; 194 } 195 196 // Compare all instructions including pseudos. 197 auto I = BB->begin(), E = BB->end(); 198 auto OtherI = OtherBB->begin(), OtherE = OtherBB->end(); 199 while (I != E && OtherI != OtherE) { 200 // Compare symbols. 201 auto AreSymbolsIdentical = [&](const MCSymbol *SymbolA, 202 const MCSymbol *SymbolB) { 203 if (SymbolA == SymbolB) 204 return true; 205 206 // All local symbols are considered identical since they affect a 207 // control flow and we check the control flow separately. 208 // If a local symbol is escaped, then the function (potentially) has 209 // multiple entry points and we exclude such functions from 210 // comparison. 211 if (SymbolA->isTemporary() && SymbolB->isTemporary()) 212 return true; 213 214 // Compare symbols as functions. 215 uint64_t EntryIDA = 0; 216 uint64_t EntryIDB = 0; 217 const BinaryFunction *FunctionA = 218 BC.getFunctionForSymbol(SymbolA, &EntryIDA); 219 const BinaryFunction *FunctionB = 220 BC.getFunctionForSymbol(SymbolB, &EntryIDB); 221 if (FunctionA && EntryIDA) 222 FunctionA = nullptr; 223 if (FunctionB && EntryIDB) 224 FunctionB = nullptr; 225 if (FunctionA && FunctionB) { 226 // Self-referencing functions and recursive calls. 227 if (FunctionA == &A && FunctionB == &B) 228 return true; 229 230 // Functions with different hash values can never become identical, 231 // hence A and B are different. 232 if (CongruentSymbols) 233 return FunctionA->getHash() == FunctionB->getHash(); 234 235 return FunctionA == FunctionB; 236 } 237 238 // One of the symbols represents a function, the other one does not. 239 if (FunctionA != FunctionB) 240 return false; 241 242 // Check if symbols are jump tables. 243 const BinaryData *SIA = BC.getBinaryDataByName(SymbolA->getName()); 244 if (!SIA) 245 return false; 246 const BinaryData *SIB = BC.getBinaryDataByName(SymbolB->getName()); 247 if (!SIB) 248 return false; 249 250 assert((SIA->getAddress() != SIB->getAddress()) && 251 "different symbols should not have the same value"); 252 253 const JumpTable *JumpTableA = 254 A.getJumpTableContainingAddress(SIA->getAddress()); 255 if (!JumpTableA) 256 return false; 257 258 const JumpTable *JumpTableB = 259 B.getJumpTableContainingAddress(SIB->getAddress()); 260 if (!JumpTableB) 261 return false; 262 263 if ((SIA->getAddress() - JumpTableA->getAddress()) != 264 (SIB->getAddress() - JumpTableB->getAddress())) 265 return false; 266 267 return equalJumpTables(*JumpTableA, *JumpTableB, A, B); 268 }; 269 270 if (!isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB, 271 AreSymbolsIdentical)) 272 return false; 273 274 ++I; 275 ++OtherI; 276 } 277 278 // One of the identical blocks may have a trailing unconditional jump that 279 // is ignored for CFG purposes. 280 const MCInst *TrailingInstr = 281 (I != E ? &(*I) : (OtherI != OtherE ? &(*OtherI) : 0)); 282 if (TrailingInstr && !BC.MIB->isUnconditionalBranch(*TrailingInstr)) 283 return false; 284 285 ++BBI; 286 } 287 288 // Compare exceptions action tables. 289 if (A.getLSDAActionTable() != B.getLSDAActionTable() || 290 A.getLSDATypeTable() != B.getLSDATypeTable() || 291 A.getLSDATypeIndexTable() != B.getLSDATypeIndexTable()) 292 return false; 293 294 return true; 295 } 296 297 // This hash table is used to identify identical functions. It maps 298 // a function to a bucket of functions identical to it. 299 struct KeyHash { 300 size_t operator()(const BinaryFunction *F) const { return F->getHash(); } 301 }; 302 303 /// Identify two congruent functions. Two functions are considered congruent, 304 /// if they are identical/equal except for some of their instruction operands 305 /// that reference potentially identical functions, i.e. functions that could 306 /// be folded later. Congruent functions are candidates for folding in our 307 /// iterative ICF algorithm. 308 /// 309 /// Congruent functions are required to have identical hash. 310 struct KeyCongruent { 311 bool operator()(const BinaryFunction *A, const BinaryFunction *B) const { 312 if (A == B) 313 return true; 314 return isIdenticalWith(*A, *B, /*CongruentSymbols=*/true); 315 } 316 }; 317 318 struct KeyEqual { 319 bool operator()(const BinaryFunction *A, const BinaryFunction *B) const { 320 if (A == B) 321 return true; 322 return isIdenticalWith(*A, *B, /*CongruentSymbols=*/false); 323 } 324 }; 325 326 typedef std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>, 327 KeyHash, KeyCongruent> 328 CongruentBucketsMap; 329 330 typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>, 331 KeyHash, KeyEqual> 332 IdenticalBucketsMap; 333 334 std::string hashInteger(uint64_t Value) { 335 std::string HashString; 336 if (Value == 0) 337 HashString.push_back(0); 338 339 while (Value) { 340 uint8_t LSB = Value & 0xff; 341 HashString.push_back(LSB); 342 Value >>= 8; 343 } 344 345 return HashString; 346 } 347 348 std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) { 349 std::string HashString; 350 351 // Ignore function references. 352 if (BC.getFunctionForSymbol(&Symbol)) 353 return HashString; 354 355 llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol); 356 if (!ErrorOrValue) 357 return HashString; 358 359 // Ignore jump table references. 360 if (BC.getJumpTableContainingAddress(*ErrorOrValue)) 361 return HashString; 362 363 return HashString.append(hashInteger(*ErrorOrValue)); 364 } 365 366 std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) { 367 switch (Expr.getKind()) { 368 case MCExpr::Constant: 369 return hashInteger(cast<MCConstantExpr>(Expr).getValue()); 370 case MCExpr::SymbolRef: 371 return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol()); 372 case MCExpr::Unary: { 373 const auto &UnaryExpr = cast<MCUnaryExpr>(Expr); 374 return hashInteger(UnaryExpr.getOpcode()) 375 .append(hashExpr(BC, *UnaryExpr.getSubExpr())); 376 } 377 case MCExpr::Binary: { 378 const auto &BinaryExpr = cast<MCBinaryExpr>(Expr); 379 return hashExpr(BC, *BinaryExpr.getLHS()) 380 .append(hashInteger(BinaryExpr.getOpcode())) 381 .append(hashExpr(BC, *BinaryExpr.getRHS())); 382 } 383 case MCExpr::Target: 384 return std::string(); 385 } 386 387 llvm_unreachable("invalid expression kind"); 388 } 389 390 std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) { 391 if (Operand.isImm()) 392 return hashInteger(Operand.getImm()); 393 if (Operand.isReg()) 394 return hashInteger(Operand.getReg()); 395 if (Operand.isExpr()) 396 return hashExpr(BC, *Operand.getExpr()); 397 398 return std::string(); 399 } 400 401 } // namespace 402 403 namespace llvm { 404 namespace bolt { 405 406 void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) { 407 const size_t OriginalFunctionCount = BC.getBinaryFunctions().size(); 408 uint64_t NumFunctionsFolded = 0; 409 std::atomic<uint64_t> NumJTFunctionsFolded{0}; 410 std::atomic<uint64_t> BytesSavedEstimate{0}; 411 std::atomic<uint64_t> CallsSavedEstimate{0}; 412 std::atomic<uint64_t> NumFoldedLastIteration{0}; 413 CongruentBucketsMap CongruentBuckets; 414 415 // Hash all the functions 416 auto hashFunctions = [&]() { 417 NamedRegionTimer HashFunctionsTimer("hashing", "hashing", "ICF breakdown", 418 "ICF breakdown", opts::TimeICF); 419 ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) { 420 // Make sure indices are in-order. 421 BF.updateLayoutIndices(); 422 423 // Pre-compute hash before pushing into hashtable. 424 // Hash instruction operands to minimize hash collisions. 425 BF.computeHash(opts::UseDFS, [&BC](const MCOperand &Op) { 426 return hashInstOperand(BC, Op); 427 }); 428 }; 429 430 ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) { 431 return !shouldOptimize(BF); 432 }; 433 434 ParallelUtilities::runOnEachFunction( 435 BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, SkipFunc, 436 "hashFunctions", /*ForceSequential*/ false, 2); 437 }; 438 439 // Creates buckets with congruent functions - functions that potentially 440 // could be folded. 441 auto createCongruentBuckets = [&]() { 442 NamedRegionTimer CongruentBucketsTimer("congruent buckets", 443 "congruent buckets", "ICF breakdown", 444 "ICF breakdown", opts::TimeICF); 445 for (auto &BFI : BC.getBinaryFunctions()) { 446 BinaryFunction &BF = BFI.second; 447 if (!this->shouldOptimize(BF)) 448 continue; 449 CongruentBuckets[&BF].emplace(&BF); 450 } 451 }; 452 453 // Partition each set of congruent functions into sets of identical functions 454 // and fold them 455 auto performFoldingPass = [&]() { 456 NamedRegionTimer FoldingPassesTimer("folding passes", "folding passes", 457 "ICF breakdown", "ICF breakdown", 458 opts::TimeICF); 459 Timer SinglePass("single fold pass", "single fold pass"); 460 LLVM_DEBUG(SinglePass.startTimer()); 461 462 ThreadPool *ThPool; 463 if (!opts::NoThreads) 464 ThPool = &ParallelUtilities::getThreadPool(); 465 466 // Fold identical functions within a single congruent bucket 467 auto processSingleBucket = [&](std::set<BinaryFunction *> &Candidates) { 468 Timer T("folding single congruent list", "folding single congruent list"); 469 LLVM_DEBUG(T.startTimer()); 470 471 // Identical functions go into the same bucket. 472 IdenticalBucketsMap IdenticalBuckets; 473 for (BinaryFunction *BF : Candidates) { 474 IdenticalBuckets[BF].emplace_back(BF); 475 } 476 477 for (auto &IBI : IdenticalBuckets) { 478 // Functions identified as identical. 479 std::vector<BinaryFunction *> &Twins = IBI.second; 480 if (Twins.size() < 2) 481 continue; 482 483 // Fold functions. Keep the order consistent across invocations with 484 // different options. 485 std::stable_sort(Twins.begin(), Twins.end(), 486 [](const BinaryFunction *A, const BinaryFunction *B) { 487 return A->getFunctionNumber() < 488 B->getFunctionNumber(); 489 }); 490 491 BinaryFunction *ParentBF = Twins[0]; 492 for (unsigned I = 1; I < Twins.size(); ++I) { 493 BinaryFunction *ChildBF = Twins[I]; 494 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: folding " << *ChildBF << " into " 495 << *ParentBF << '\n'); 496 497 // Remove child function from the list of candidates. 498 auto FI = Candidates.find(ChildBF); 499 assert(FI != Candidates.end() && 500 "function expected to be in the set"); 501 Candidates.erase(FI); 502 503 // Fold the function and remove from the list of processed functions. 504 BytesSavedEstimate += ChildBF->getSize(); 505 CallsSavedEstimate += std::min(ChildBF->getKnownExecutionCount(), 506 ParentBF->getKnownExecutionCount()); 507 BC.foldFunction(*ChildBF, *ParentBF); 508 509 ++NumFoldedLastIteration; 510 511 if (ParentBF->hasJumpTables()) 512 ++NumJTFunctionsFolded; 513 } 514 } 515 516 LLVM_DEBUG(T.stopTimer()); 517 }; 518 519 // Create a task for each congruent bucket 520 for (auto &Entry : CongruentBuckets) { 521 std::set<BinaryFunction *> &Bucket = Entry.second; 522 if (Bucket.size() < 2) 523 continue; 524 525 if (opts::NoThreads) 526 processSingleBucket(Bucket); 527 else 528 ThPool->async(processSingleBucket, std::ref(Bucket)); 529 } 530 531 if (!opts::NoThreads) 532 ThPool->wait(); 533 534 LLVM_DEBUG(SinglePass.stopTimer()); 535 }; 536 537 hashFunctions(); 538 createCongruentBuckets(); 539 540 unsigned Iteration = 1; 541 // We repeat the pass until no new modifications happen. 542 do { 543 NumFoldedLastIteration = 0; 544 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ICF iteration " << Iteration << "...\n"); 545 546 performFoldingPass(); 547 548 NumFunctionsFolded += NumFoldedLastIteration; 549 ++Iteration; 550 551 } while (NumFoldedLastIteration > 0); 552 553 LLVM_DEBUG({ 554 // Print functions that are congruent but not identical. 555 for (auto &CBI : CongruentBuckets) { 556 std::set<BinaryFunction *> &Candidates = CBI.second; 557 if (Candidates.size() < 2) 558 continue; 559 dbgs() << "BOLT-DEBUG: the following " << Candidates.size() 560 << " functions (each of size " << (*Candidates.begin())->getSize() 561 << " bytes) are congruent but not identical:\n"; 562 for (BinaryFunction *BF : Candidates) { 563 dbgs() << " " << *BF; 564 if (BF->getKnownExecutionCount()) 565 dbgs() << " (executed " << BF->getKnownExecutionCount() << " times)"; 566 dbgs() << '\n'; 567 } 568 } 569 }); 570 571 if (NumFunctionsFolded) 572 outs() << "BOLT-INFO: ICF folded " << NumFunctionsFolded << " out of " 573 << OriginalFunctionCount << " functions in " << Iteration 574 << " passes. " << NumJTFunctionsFolded 575 << " functions had jump tables.\n" 576 << "BOLT-INFO: Removing all identical functions will save " 577 << format("%.2lf", (double)BytesSavedEstimate / 1024) 578 << " KB of code space. Folded functions were called " 579 << CallsSavedEstimate << " times based on profile.\n"; 580 } 581 582 } // namespace bolt 583 } // namespace llvm 584