1 //===- bolt/Passes/IdenticalCodeFolding.cpp -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the IdenticalCodeFolding class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/IdenticalCodeFolding.h"
14 #include "bolt/Core/ParallelUtilities.h"
15 #include "llvm/Support/CommandLine.h"
16 #include "llvm/Support/ThreadPool.h"
17 #include "llvm/Support/Timer.h"
18 #include <atomic>
19 #include <map>
20 #include <set>
21 #include <unordered_map>
22 
23 #define DEBUG_TYPE "bolt-icf"
24 
25 using namespace llvm;
26 using namespace bolt;
27 
28 namespace opts {
29 
30 extern cl::OptionCategory BoltOptCategory;
31 
32 static cl::opt<bool>
33 UseDFS("icf-dfs",
34   cl::desc("use DFS ordering when using -icf option"),
35   cl::ReallyHidden,
36   cl::ZeroOrMore,
37   cl::cat(BoltOptCategory));
38 
39 static cl::opt<bool>
40 TimeICF("time-icf",
41   cl::desc("time icf steps"),
42   cl::ReallyHidden,
43   cl::ZeroOrMore,
44   cl::cat(BoltOptCategory));
45 } // namespace opts
46 
47 namespace {
48 using JumpTable = bolt::JumpTable;
49 
50 /// Compare two jump tables in 2 functions. The function relies on consistent
51 /// ordering of basic blocks in both binary functions (e.g. DFS).
52 bool equalJumpTables(const JumpTable &JumpTableA, const JumpTable &JumpTableB,
53                      const BinaryFunction &FunctionA,
54                      const BinaryFunction &FunctionB) {
55   if (JumpTableA.EntrySize != JumpTableB.EntrySize)
56     return false;
57 
58   if (JumpTableA.Type != JumpTableB.Type)
59     return false;
60 
61   if (JumpTableA.getSize() != JumpTableB.getSize())
62     return false;
63 
64   for (uint64_t Index = 0; Index < JumpTableA.Entries.size(); ++Index) {
65     const MCSymbol *LabelA = JumpTableA.Entries[Index];
66     const MCSymbol *LabelB = JumpTableB.Entries[Index];
67 
68     const BinaryBasicBlock *TargetA = FunctionA.getBasicBlockForLabel(LabelA);
69     const BinaryBasicBlock *TargetB = FunctionB.getBasicBlockForLabel(LabelB);
70 
71     if (!TargetA || !TargetB) {
72       assert((TargetA || LabelA == FunctionA.getFunctionEndLabel()) &&
73              "no target basic block found");
74       assert((TargetB || LabelB == FunctionB.getFunctionEndLabel()) &&
75              "no target basic block found");
76 
77       if (TargetA != TargetB)
78         return false;
79 
80       continue;
81     }
82 
83     assert(TargetA && TargetB && "cannot locate target block(s)");
84 
85     if (TargetA->getLayoutIndex() != TargetB->getLayoutIndex())
86       return false;
87   }
88 
89   return true;
90 }
91 
92 /// Helper function that compares an instruction of this function to the
93 /// given instruction of the given function. The functions should have
94 /// identical CFG.
95 template <class Compare>
96 bool isInstrEquivalentWith(const MCInst &InstA, const BinaryBasicBlock &BBA,
97                            const MCInst &InstB, const BinaryBasicBlock &BBB,
98                            Compare Comp) {
99   if (InstA.getOpcode() != InstB.getOpcode())
100     return false;
101 
102   const BinaryContext &BC = BBA.getFunction()->getBinaryContext();
103 
104   // In this function we check for special conditions:
105   //
106   //    * instructions with landing pads
107   //
108   // Most of the common cases should be handled by MCPlus::equals()
109   // that compares regular instruction operands.
110   //
111   // NB: there's no need to compare jump table indirect jump instructions
112   //     separately as jump tables are handled by comparing corresponding
113   //     symbols.
114   const Optional<MCPlus::MCLandingPad> EHInfoA = BC.MIB->getEHInfo(InstA);
115   const Optional<MCPlus::MCLandingPad> EHInfoB = BC.MIB->getEHInfo(InstB);
116 
117   if (EHInfoA || EHInfoB) {
118     if (!EHInfoA && (EHInfoB->first || EHInfoB->second))
119       return false;
120 
121     if (!EHInfoB && (EHInfoA->first || EHInfoA->second))
122       return false;
123 
124     if (EHInfoA && EHInfoB) {
125       // Action indices should match.
126       if (EHInfoA->second != EHInfoB->second)
127         return false;
128 
129       if (!EHInfoA->first != !EHInfoB->first)
130         return false;
131 
132       if (EHInfoA->first && EHInfoB->first) {
133         const BinaryBasicBlock *LPA = BBA.getLandingPad(EHInfoA->first);
134         const BinaryBasicBlock *LPB = BBB.getLandingPad(EHInfoB->first);
135         assert(LPA && LPB && "cannot locate landing pad(s)");
136 
137         if (LPA->getLayoutIndex() != LPB->getLayoutIndex())
138           return false;
139       }
140     }
141   }
142 
143   return BC.MIB->equals(InstA, InstB, Comp);
144 }
145 
146 /// Returns true if this function has identical code and CFG with
147 /// the given function \p BF.
148 ///
149 /// If \p CongruentSymbols is set to true, then symbolic operands that reference
150 /// potentially identical but different functions are ignored during the
151 /// comparison.
152 bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B,
153                      bool CongruentSymbols) {
154   assert(A.hasCFG() && B.hasCFG() && "both functions should have CFG");
155 
156   // Compare the two functions, one basic block at a time.
157   // Currently we require two identical basic blocks to have identical
158   // instruction sequences and the same index in their corresponding
159   // functions. The latter is important for CFG equality.
160 
161   if (A.layout_size() != B.layout_size())
162     return false;
163 
164   // Comparing multi-entry functions could be non-trivial.
165   if (A.isMultiEntry() || B.isMultiEntry())
166     return false;
167 
168   // Process both functions in either DFS or existing order.
169   const BinaryFunction::BasicBlockOrderType &OrderA =
170       opts::UseDFS ? A.dfs() : A.getLayout();
171   const BinaryFunction::BasicBlockOrderType &OrderB =
172       opts::UseDFS ? B.dfs() : B.getLayout();
173 
174   const BinaryContext &BC = A.getBinaryContext();
175 
176   auto BBI = OrderB.begin();
177   for (const BinaryBasicBlock *BB : OrderA) {
178     const BinaryBasicBlock *OtherBB = *BBI;
179 
180     if (BB->getLayoutIndex() != OtherBB->getLayoutIndex())
181       return false;
182 
183     // Compare successor basic blocks.
184     // NOTE: the comparison for jump tables is only partially verified here.
185     if (BB->succ_size() != OtherBB->succ_size())
186       return false;
187 
188     auto SuccBBI = OtherBB->succ_begin();
189     for (const BinaryBasicBlock *SuccBB : BB->successors()) {
190       const BinaryBasicBlock *SuccOtherBB = *SuccBBI;
191       if (SuccBB->getLayoutIndex() != SuccOtherBB->getLayoutIndex())
192         return false;
193       ++SuccBBI;
194     }
195 
196     // Compare all instructions including pseudos.
197     auto I = BB->begin(), E = BB->end();
198     auto OtherI = OtherBB->begin(), OtherE = OtherBB->end();
199     while (I != E && OtherI != OtherE) {
200       // Compare symbols.
201       auto AreSymbolsIdentical = [&](const MCSymbol *SymbolA,
202                                      const MCSymbol *SymbolB) {
203         if (SymbolA == SymbolB)
204           return true;
205 
206         // All local symbols are considered identical since they affect a
207         // control flow and we check the control flow separately.
208         // If a local symbol is escaped, then the function (potentially) has
209         // multiple entry points and we exclude such functions from
210         // comparison.
211         if (SymbolA->isTemporary() && SymbolB->isTemporary())
212           return true;
213 
214         // Compare symbols as functions.
215         uint64_t EntryIDA = 0;
216         uint64_t EntryIDB = 0;
217         const BinaryFunction *FunctionA =
218             BC.getFunctionForSymbol(SymbolA, &EntryIDA);
219         const BinaryFunction *FunctionB =
220             BC.getFunctionForSymbol(SymbolB, &EntryIDB);
221         if (FunctionA && EntryIDA)
222           FunctionA = nullptr;
223         if (FunctionB && EntryIDB)
224           FunctionB = nullptr;
225         if (FunctionA && FunctionB) {
226           // Self-referencing functions and recursive calls.
227           if (FunctionA == &A && FunctionB == &B)
228             return true;
229 
230           // Functions with different hash values can never become identical,
231           // hence A and B are different.
232           if (CongruentSymbols)
233             return FunctionA->getHash() == FunctionB->getHash();
234 
235           return FunctionA == FunctionB;
236         }
237 
238         // One of the symbols represents a function, the other one does not.
239         if (FunctionA != FunctionB)
240           return false;
241 
242         // Check if symbols are jump tables.
243         const BinaryData *SIA = BC.getBinaryDataByName(SymbolA->getName());
244         if (!SIA)
245           return false;
246         const BinaryData *SIB = BC.getBinaryDataByName(SymbolB->getName());
247         if (!SIB)
248           return false;
249 
250         assert((SIA->getAddress() != SIB->getAddress()) &&
251                "different symbols should not have the same value");
252 
253         const JumpTable *JumpTableA =
254             A.getJumpTableContainingAddress(SIA->getAddress());
255         if (!JumpTableA)
256           return false;
257 
258         const JumpTable *JumpTableB =
259             B.getJumpTableContainingAddress(SIB->getAddress());
260         if (!JumpTableB)
261           return false;
262 
263         if ((SIA->getAddress() - JumpTableA->getAddress()) !=
264             (SIB->getAddress() - JumpTableB->getAddress()))
265           return false;
266 
267         return equalJumpTables(*JumpTableA, *JumpTableB, A, B);
268       };
269 
270       if (!isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB,
271                                  AreSymbolsIdentical))
272         return false;
273 
274       ++I;
275       ++OtherI;
276     }
277 
278     // One of the identical blocks may have a trailing unconditional jump that
279     // is ignored for CFG purposes.
280     const MCInst *TrailingInstr =
281         (I != E ? &(*I) : (OtherI != OtherE ? &(*OtherI) : 0));
282     if (TrailingInstr && !BC.MIB->isUnconditionalBranch(*TrailingInstr))
283       return false;
284 
285     ++BBI;
286   }
287 
288   // Compare exceptions action tables.
289   if (A.getLSDAActionTable() != B.getLSDAActionTable() ||
290       A.getLSDATypeTable() != B.getLSDATypeTable() ||
291       A.getLSDATypeIndexTable() != B.getLSDATypeIndexTable())
292     return false;
293 
294   return true;
295 }
296 
297 // This hash table is used to identify identical functions. It maps
298 // a function to a bucket of functions identical to it.
299 struct KeyHash {
300   size_t operator()(const BinaryFunction *F) const { return F->getHash(); }
301 };
302 
303 /// Identify two congruent functions. Two functions are considered congruent,
304 /// if they are identical/equal except for some of their instruction operands
305 /// that reference potentially identical functions, i.e. functions that could
306 /// be folded later. Congruent functions are candidates for folding in our
307 /// iterative ICF algorithm.
308 ///
309 /// Congruent functions are required to have identical hash.
310 struct KeyCongruent {
311   bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
312     if (A == B)
313       return true;
314     return isIdenticalWith(*A, *B, /*CongruentSymbols=*/true);
315   }
316 };
317 
318 struct KeyEqual {
319   bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
320     if (A == B)
321       return true;
322     return isIdenticalWith(*A, *B, /*CongruentSymbols=*/false);
323   }
324 };
325 
326 typedef std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>,
327                            KeyHash, KeyCongruent>
328     CongruentBucketsMap;
329 
330 typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>,
331                            KeyHash, KeyEqual>
332     IdenticalBucketsMap;
333 
334 std::string hashInteger(uint64_t Value) {
335   std::string HashString;
336   if (Value == 0)
337     HashString.push_back(0);
338 
339   while (Value) {
340     uint8_t LSB = Value & 0xff;
341     HashString.push_back(LSB);
342     Value >>= 8;
343   }
344 
345   return HashString;
346 }
347 
348 std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
349   std::string HashString;
350 
351   // Ignore function references.
352   if (BC.getFunctionForSymbol(&Symbol))
353     return HashString;
354 
355   llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol);
356   if (!ErrorOrValue)
357     return HashString;
358 
359   // Ignore jump table references.
360   if (BC.getJumpTableContainingAddress(*ErrorOrValue))
361     return HashString;
362 
363   return HashString.append(hashInteger(*ErrorOrValue));
364 }
365 
366 std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
367   switch (Expr.getKind()) {
368   case MCExpr::Constant:
369     return hashInteger(cast<MCConstantExpr>(Expr).getValue());
370   case MCExpr::SymbolRef:
371     return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol());
372   case MCExpr::Unary: {
373     const auto &UnaryExpr = cast<MCUnaryExpr>(Expr);
374     return hashInteger(UnaryExpr.getOpcode())
375         .append(hashExpr(BC, *UnaryExpr.getSubExpr()));
376   }
377   case MCExpr::Binary: {
378     const auto &BinaryExpr = cast<MCBinaryExpr>(Expr);
379     return hashExpr(BC, *BinaryExpr.getLHS())
380         .append(hashInteger(BinaryExpr.getOpcode()))
381         .append(hashExpr(BC, *BinaryExpr.getRHS()));
382   }
383   case MCExpr::Target:
384     return std::string();
385   }
386 
387   llvm_unreachable("invalid expression kind");
388 }
389 
390 std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) {
391   if (Operand.isImm())
392     return hashInteger(Operand.getImm());
393   if (Operand.isReg())
394     return hashInteger(Operand.getReg());
395   if (Operand.isExpr())
396     return hashExpr(BC, *Operand.getExpr());
397 
398   return std::string();
399 }
400 
401 } // namespace
402 
403 namespace llvm {
404 namespace bolt {
405 
406 void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
407   const size_t OriginalFunctionCount = BC.getBinaryFunctions().size();
408   uint64_t NumFunctionsFolded = 0;
409   std::atomic<uint64_t> NumJTFunctionsFolded{0};
410   std::atomic<uint64_t> BytesSavedEstimate{0};
411   std::atomic<uint64_t> CallsSavedEstimate{0};
412   std::atomic<uint64_t> NumFoldedLastIteration{0};
413   CongruentBucketsMap CongruentBuckets;
414 
415   // Hash all the functions
416   auto hashFunctions = [&]() {
417     NamedRegionTimer HashFunctionsTimer("hashing", "hashing", "ICF breakdown",
418                                         "ICF breakdown", opts::TimeICF);
419     ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
420       // Make sure indices are in-order.
421       BF.updateLayoutIndices();
422 
423       // Pre-compute hash before pushing into hashtable.
424       // Hash instruction operands to minimize hash collisions.
425       BF.computeHash(opts::UseDFS, [&BC](const MCOperand &Op) {
426         return hashInstOperand(BC, Op);
427       });
428     };
429 
430     ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
431       return !shouldOptimize(BF);
432     };
433 
434     ParallelUtilities::runOnEachFunction(
435         BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, SkipFunc,
436         "hashFunctions", /*ForceSequential*/ false, 2);
437   };
438 
439   // Creates buckets with congruent functions - functions that potentially
440   // could  be folded.
441   auto createCongruentBuckets = [&]() {
442     NamedRegionTimer CongruentBucketsTimer("congruent buckets",
443                                            "congruent buckets", "ICF breakdown",
444                                            "ICF breakdown", opts::TimeICF);
445     for (auto &BFI : BC.getBinaryFunctions()) {
446       BinaryFunction &BF = BFI.second;
447       if (!this->shouldOptimize(BF))
448         continue;
449       CongruentBuckets[&BF].emplace(&BF);
450     }
451   };
452 
453   // Partition each set of congruent functions into sets of identical functions
454   // and fold them
455   auto performFoldingPass = [&]() {
456     NamedRegionTimer FoldingPassesTimer("folding passes", "folding passes",
457                                         "ICF breakdown", "ICF breakdown",
458                                         opts::TimeICF);
459     Timer SinglePass("single fold pass", "single fold pass");
460     LLVM_DEBUG(SinglePass.startTimer());
461 
462     ThreadPool *ThPool;
463     if (!opts::NoThreads)
464       ThPool = &ParallelUtilities::getThreadPool();
465 
466     // Fold identical functions within a single congruent bucket
467     auto processSingleBucket = [&](std::set<BinaryFunction *> &Candidates) {
468       Timer T("folding single congruent list", "folding single congruent list");
469       LLVM_DEBUG(T.startTimer());
470 
471       // Identical functions go into the same bucket.
472       IdenticalBucketsMap IdenticalBuckets;
473       for (BinaryFunction *BF : Candidates) {
474         IdenticalBuckets[BF].emplace_back(BF);
475       }
476 
477       for (auto &IBI : IdenticalBuckets) {
478         // Functions identified as identical.
479         std::vector<BinaryFunction *> &Twins = IBI.second;
480         if (Twins.size() < 2)
481           continue;
482 
483         // Fold functions. Keep the order consistent across invocations with
484         // different options.
485         std::stable_sort(Twins.begin(), Twins.end(),
486                          [](const BinaryFunction *A, const BinaryFunction *B) {
487                            return A->getFunctionNumber() <
488                                   B->getFunctionNumber();
489                          });
490 
491         BinaryFunction *ParentBF = Twins[0];
492         for (unsigned I = 1; I < Twins.size(); ++I) {
493           BinaryFunction *ChildBF = Twins[I];
494           LLVM_DEBUG(dbgs() << "BOLT-DEBUG: folding " << *ChildBF << " into "
495                             << *ParentBF << '\n');
496 
497           // Remove child function from the list of candidates.
498           auto FI = Candidates.find(ChildBF);
499           assert(FI != Candidates.end() &&
500                  "function expected to be in the set");
501           Candidates.erase(FI);
502 
503           // Fold the function and remove from the list of processed functions.
504           BytesSavedEstimate += ChildBF->getSize();
505           CallsSavedEstimate += std::min(ChildBF->getKnownExecutionCount(),
506                                          ParentBF->getKnownExecutionCount());
507           BC.foldFunction(*ChildBF, *ParentBF);
508 
509           ++NumFoldedLastIteration;
510 
511           if (ParentBF->hasJumpTables())
512             ++NumJTFunctionsFolded;
513         }
514       }
515 
516       LLVM_DEBUG(T.stopTimer());
517     };
518 
519     // Create a task for each congruent bucket
520     for (auto &Entry : CongruentBuckets) {
521       std::set<BinaryFunction *> &Bucket = Entry.second;
522       if (Bucket.size() < 2)
523         continue;
524 
525       if (opts::NoThreads)
526         processSingleBucket(Bucket);
527       else
528         ThPool->async(processSingleBucket, std::ref(Bucket));
529     }
530 
531     if (!opts::NoThreads)
532       ThPool->wait();
533 
534     LLVM_DEBUG(SinglePass.stopTimer());
535   };
536 
537   hashFunctions();
538   createCongruentBuckets();
539 
540   unsigned Iteration = 1;
541   // We repeat the pass until no new modifications happen.
542   do {
543     NumFoldedLastIteration = 0;
544     LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ICF iteration " << Iteration << "...\n");
545 
546     performFoldingPass();
547 
548     NumFunctionsFolded += NumFoldedLastIteration;
549     ++Iteration;
550 
551   } while (NumFoldedLastIteration > 0);
552 
553   LLVM_DEBUG({
554     // Print functions that are congruent but not identical.
555     for (auto &CBI : CongruentBuckets) {
556       std::set<BinaryFunction *> &Candidates = CBI.second;
557       if (Candidates.size() < 2)
558         continue;
559       dbgs() << "BOLT-DEBUG: the following " << Candidates.size()
560              << " functions (each of size " << (*Candidates.begin())->getSize()
561              << " bytes) are congruent but not identical:\n";
562       for (BinaryFunction *BF : Candidates) {
563         dbgs() << "  " << *BF;
564         if (BF->getKnownExecutionCount())
565           dbgs() << " (executed " << BF->getKnownExecutionCount() << " times)";
566         dbgs() << '\n';
567       }
568     }
569   });
570 
571   if (NumFunctionsFolded)
572     outs() << "BOLT-INFO: ICF folded " << NumFunctionsFolded << " out of "
573            << OriginalFunctionCount << " functions in " << Iteration
574            << " passes. " << NumJTFunctionsFolded
575            << " functions had jump tables.\n"
576            << "BOLT-INFO: Removing all identical functions will save "
577            << format("%.2lf", (double)BytesSavedEstimate / 1024)
578            << " KB of code space. Folded functions were called "
579            << CallsSavedEstimate << " times based on profile.\n";
580 }
581 
582 } // namespace bolt
583 } // namespace llvm
584