1 //===- bolt/Passes/IdenticalCodeFolding.cpp -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the IdenticalCodeFolding class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "bolt/Passes/IdenticalCodeFolding.h"
14 #include "bolt/Core/ParallelUtilities.h"
15 #include "llvm/Support/CommandLine.h"
16 #include "llvm/Support/ThreadPool.h"
17 #include "llvm/Support/Timer.h"
18 #include <atomic>
19 #include <map>
20 #include <set>
21 #include <unordered_map>
22
23 #define DEBUG_TYPE "bolt-icf"
24
25 using namespace llvm;
26 using namespace bolt;
27
28 namespace opts {
29
30 extern cl::OptionCategory BoltOptCategory;
31
32 static cl::opt<bool> UseDFS("icf-dfs",
33 cl::desc("use DFS ordering when using -icf option"),
34 cl::ReallyHidden, cl::cat(BoltOptCategory));
35
36 static cl::opt<bool>
37 TimeICF("time-icf",
38 cl::desc("time icf steps"),
39 cl::ReallyHidden,
40 cl::ZeroOrMore,
41 cl::cat(BoltOptCategory));
42 } // namespace opts
43
44 namespace {
45 using JumpTable = bolt::JumpTable;
46
47 /// Compare two jump tables in 2 functions. The function relies on consistent
48 /// ordering of basic blocks in both binary functions (e.g. DFS).
equalJumpTables(const JumpTable & JumpTableA,const JumpTable & JumpTableB,const BinaryFunction & FunctionA,const BinaryFunction & FunctionB)49 bool equalJumpTables(const JumpTable &JumpTableA, const JumpTable &JumpTableB,
50 const BinaryFunction &FunctionA,
51 const BinaryFunction &FunctionB) {
52 if (JumpTableA.EntrySize != JumpTableB.EntrySize)
53 return false;
54
55 if (JumpTableA.Type != JumpTableB.Type)
56 return false;
57
58 if (JumpTableA.getSize() != JumpTableB.getSize())
59 return false;
60
61 for (uint64_t Index = 0; Index < JumpTableA.Entries.size(); ++Index) {
62 const MCSymbol *LabelA = JumpTableA.Entries[Index];
63 const MCSymbol *LabelB = JumpTableB.Entries[Index];
64
65 const BinaryBasicBlock *TargetA = FunctionA.getBasicBlockForLabel(LabelA);
66 const BinaryBasicBlock *TargetB = FunctionB.getBasicBlockForLabel(LabelB);
67
68 if (!TargetA || !TargetB) {
69 assert((TargetA || LabelA == FunctionA.getFunctionEndLabel()) &&
70 "no target basic block found");
71 assert((TargetB || LabelB == FunctionB.getFunctionEndLabel()) &&
72 "no target basic block found");
73
74 if (TargetA != TargetB)
75 return false;
76
77 continue;
78 }
79
80 assert(TargetA && TargetB && "cannot locate target block(s)");
81
82 if (TargetA->getLayoutIndex() != TargetB->getLayoutIndex())
83 return false;
84 }
85
86 return true;
87 }
88
89 /// Helper function that compares an instruction of this function to the
90 /// given instruction of the given function. The functions should have
91 /// identical CFG.
92 template <class Compare>
isInstrEquivalentWith(const MCInst & InstA,const BinaryBasicBlock & BBA,const MCInst & InstB,const BinaryBasicBlock & BBB,Compare Comp)93 bool isInstrEquivalentWith(const MCInst &InstA, const BinaryBasicBlock &BBA,
94 const MCInst &InstB, const BinaryBasicBlock &BBB,
95 Compare Comp) {
96 if (InstA.getOpcode() != InstB.getOpcode())
97 return false;
98
99 const BinaryContext &BC = BBA.getFunction()->getBinaryContext();
100
101 // In this function we check for special conditions:
102 //
103 // * instructions with landing pads
104 //
105 // Most of the common cases should be handled by MCPlus::equals()
106 // that compares regular instruction operands.
107 //
108 // NB: there's no need to compare jump table indirect jump instructions
109 // separately as jump tables are handled by comparing corresponding
110 // symbols.
111 const Optional<MCPlus::MCLandingPad> EHInfoA = BC.MIB->getEHInfo(InstA);
112 const Optional<MCPlus::MCLandingPad> EHInfoB = BC.MIB->getEHInfo(InstB);
113
114 if (EHInfoA || EHInfoB) {
115 if (!EHInfoA && (EHInfoB->first || EHInfoB->second))
116 return false;
117
118 if (!EHInfoB && (EHInfoA->first || EHInfoA->second))
119 return false;
120
121 if (EHInfoA && EHInfoB) {
122 // Action indices should match.
123 if (EHInfoA->second != EHInfoB->second)
124 return false;
125
126 if (!EHInfoA->first != !EHInfoB->first)
127 return false;
128
129 if (EHInfoA->first && EHInfoB->first) {
130 const BinaryBasicBlock *LPA = BBA.getLandingPad(EHInfoA->first);
131 const BinaryBasicBlock *LPB = BBB.getLandingPad(EHInfoB->first);
132 assert(LPA && LPB && "cannot locate landing pad(s)");
133
134 if (LPA->getLayoutIndex() != LPB->getLayoutIndex())
135 return false;
136 }
137 }
138 }
139
140 return BC.MIB->equals(InstA, InstB, Comp);
141 }
142
143 /// Returns true if this function has identical code and CFG with
144 /// the given function \p BF.
145 ///
146 /// If \p CongruentSymbols is set to true, then symbolic operands that reference
147 /// potentially identical but different functions are ignored during the
148 /// comparison.
isIdenticalWith(const BinaryFunction & A,const BinaryFunction & B,bool CongruentSymbols)149 bool isIdenticalWith(const BinaryFunction &A, const BinaryFunction &B,
150 bool CongruentSymbols) {
151 assert(A.hasCFG() && B.hasCFG() && "both functions should have CFG");
152
153 // Compare the two functions, one basic block at a time.
154 // Currently we require two identical basic blocks to have identical
155 // instruction sequences and the same index in their corresponding
156 // functions. The latter is important for CFG equality.
157
158 if (A.getLayout().block_size() != B.getLayout().block_size())
159 return false;
160
161 // Comparing multi-entry functions could be non-trivial.
162 if (A.isMultiEntry() || B.isMultiEntry())
163 return false;
164
165 // Process both functions in either DFS or existing order.
166 const BinaryFunction::BasicBlockOrderType OrderA =
167 opts::UseDFS
168 ? A.dfs()
169 : BinaryFunction::BasicBlockOrderType(A.getLayout().block_begin(),
170 A.getLayout().block_end());
171 const BinaryFunction::BasicBlockOrderType OrderB =
172 opts::UseDFS
173 ? B.dfs()
174 : BinaryFunction::BasicBlockOrderType(B.getLayout().block_begin(),
175 B.getLayout().block_end());
176
177 const BinaryContext &BC = A.getBinaryContext();
178
179 auto BBI = OrderB.begin();
180 for (const BinaryBasicBlock *BB : OrderA) {
181 const BinaryBasicBlock *OtherBB = *BBI;
182
183 if (BB->getLayoutIndex() != OtherBB->getLayoutIndex())
184 return false;
185
186 // Compare successor basic blocks.
187 // NOTE: the comparison for jump tables is only partially verified here.
188 if (BB->succ_size() != OtherBB->succ_size())
189 return false;
190
191 auto SuccBBI = OtherBB->succ_begin();
192 for (const BinaryBasicBlock *SuccBB : BB->successors()) {
193 const BinaryBasicBlock *SuccOtherBB = *SuccBBI;
194 if (SuccBB->getLayoutIndex() != SuccOtherBB->getLayoutIndex())
195 return false;
196 ++SuccBBI;
197 }
198
199 // Compare all instructions including pseudos.
200 auto I = BB->begin(), E = BB->end();
201 auto OtherI = OtherBB->begin(), OtherE = OtherBB->end();
202 while (I != E && OtherI != OtherE) {
203 // Compare symbols.
204 auto AreSymbolsIdentical = [&](const MCSymbol *SymbolA,
205 const MCSymbol *SymbolB) {
206 if (SymbolA == SymbolB)
207 return true;
208
209 // All local symbols are considered identical since they affect a
210 // control flow and we check the control flow separately.
211 // If a local symbol is escaped, then the function (potentially) has
212 // multiple entry points and we exclude such functions from
213 // comparison.
214 if (SymbolA->isTemporary() && SymbolB->isTemporary())
215 return true;
216
217 // Compare symbols as functions.
218 uint64_t EntryIDA = 0;
219 uint64_t EntryIDB = 0;
220 const BinaryFunction *FunctionA =
221 BC.getFunctionForSymbol(SymbolA, &EntryIDA);
222 const BinaryFunction *FunctionB =
223 BC.getFunctionForSymbol(SymbolB, &EntryIDB);
224 if (FunctionA && EntryIDA)
225 FunctionA = nullptr;
226 if (FunctionB && EntryIDB)
227 FunctionB = nullptr;
228 if (FunctionA && FunctionB) {
229 // Self-referencing functions and recursive calls.
230 if (FunctionA == &A && FunctionB == &B)
231 return true;
232
233 // Functions with different hash values can never become identical,
234 // hence A and B are different.
235 if (CongruentSymbols)
236 return FunctionA->getHash() == FunctionB->getHash();
237
238 return FunctionA == FunctionB;
239 }
240
241 // One of the symbols represents a function, the other one does not.
242 if (FunctionA != FunctionB)
243 return false;
244
245 // Check if symbols are jump tables.
246 const BinaryData *SIA = BC.getBinaryDataByName(SymbolA->getName());
247 if (!SIA)
248 return false;
249 const BinaryData *SIB = BC.getBinaryDataByName(SymbolB->getName());
250 if (!SIB)
251 return false;
252
253 assert((SIA->getAddress() != SIB->getAddress()) &&
254 "different symbols should not have the same value");
255
256 const JumpTable *JumpTableA =
257 A.getJumpTableContainingAddress(SIA->getAddress());
258 if (!JumpTableA)
259 return false;
260
261 const JumpTable *JumpTableB =
262 B.getJumpTableContainingAddress(SIB->getAddress());
263 if (!JumpTableB)
264 return false;
265
266 if ((SIA->getAddress() - JumpTableA->getAddress()) !=
267 (SIB->getAddress() - JumpTableB->getAddress()))
268 return false;
269
270 return equalJumpTables(*JumpTableA, *JumpTableB, A, B);
271 };
272
273 if (!isInstrEquivalentWith(*I, *BB, *OtherI, *OtherBB,
274 AreSymbolsIdentical))
275 return false;
276
277 ++I;
278 ++OtherI;
279 }
280
281 // One of the identical blocks may have a trailing unconditional jump that
282 // is ignored for CFG purposes.
283 const MCInst *TrailingInstr =
284 (I != E ? &(*I) : (OtherI != OtherE ? &(*OtherI) : 0));
285 if (TrailingInstr && !BC.MIB->isUnconditionalBranch(*TrailingInstr))
286 return false;
287
288 ++BBI;
289 }
290
291 // Compare exceptions action tables.
292 if (A.getLSDAActionTable() != B.getLSDAActionTable() ||
293 A.getLSDATypeTable() != B.getLSDATypeTable() ||
294 A.getLSDATypeIndexTable() != B.getLSDATypeIndexTable())
295 return false;
296
297 return true;
298 }
299
300 // This hash table is used to identify identical functions. It maps
301 // a function to a bucket of functions identical to it.
302 struct KeyHash {
operator ()__anon2344f6bf0111::KeyHash303 size_t operator()(const BinaryFunction *F) const { return F->getHash(); }
304 };
305
306 /// Identify two congruent functions. Two functions are considered congruent,
307 /// if they are identical/equal except for some of their instruction operands
308 /// that reference potentially identical functions, i.e. functions that could
309 /// be folded later. Congruent functions are candidates for folding in our
310 /// iterative ICF algorithm.
311 ///
312 /// Congruent functions are required to have identical hash.
313 struct KeyCongruent {
operator ()__anon2344f6bf0111::KeyCongruent314 bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
315 if (A == B)
316 return true;
317 return isIdenticalWith(*A, *B, /*CongruentSymbols=*/true);
318 }
319 };
320
321 struct KeyEqual {
operator ()__anon2344f6bf0111::KeyEqual322 bool operator()(const BinaryFunction *A, const BinaryFunction *B) const {
323 if (A == B)
324 return true;
325 return isIdenticalWith(*A, *B, /*CongruentSymbols=*/false);
326 }
327 };
328
329 typedef std::unordered_map<BinaryFunction *, std::set<BinaryFunction *>,
330 KeyHash, KeyCongruent>
331 CongruentBucketsMap;
332
333 typedef std::unordered_map<BinaryFunction *, std::vector<BinaryFunction *>,
334 KeyHash, KeyEqual>
335 IdenticalBucketsMap;
336
hashInteger(uint64_t Value)337 std::string hashInteger(uint64_t Value) {
338 std::string HashString;
339 if (Value == 0)
340 HashString.push_back(0);
341
342 while (Value) {
343 uint8_t LSB = Value & 0xff;
344 HashString.push_back(LSB);
345 Value >>= 8;
346 }
347
348 return HashString;
349 }
350
hashSymbol(BinaryContext & BC,const MCSymbol & Symbol)351 std::string hashSymbol(BinaryContext &BC, const MCSymbol &Symbol) {
352 std::string HashString;
353
354 // Ignore function references.
355 if (BC.getFunctionForSymbol(&Symbol))
356 return HashString;
357
358 llvm::ErrorOr<uint64_t> ErrorOrValue = BC.getSymbolValue(Symbol);
359 if (!ErrorOrValue)
360 return HashString;
361
362 // Ignore jump table references.
363 if (BC.getJumpTableContainingAddress(*ErrorOrValue))
364 return HashString;
365
366 return HashString.append(hashInteger(*ErrorOrValue));
367 }
368
hashExpr(BinaryContext & BC,const MCExpr & Expr)369 std::string hashExpr(BinaryContext &BC, const MCExpr &Expr) {
370 switch (Expr.getKind()) {
371 case MCExpr::Constant:
372 return hashInteger(cast<MCConstantExpr>(Expr).getValue());
373 case MCExpr::SymbolRef:
374 return hashSymbol(BC, cast<MCSymbolRefExpr>(Expr).getSymbol());
375 case MCExpr::Unary: {
376 const auto &UnaryExpr = cast<MCUnaryExpr>(Expr);
377 return hashInteger(UnaryExpr.getOpcode())
378 .append(hashExpr(BC, *UnaryExpr.getSubExpr()));
379 }
380 case MCExpr::Binary: {
381 const auto &BinaryExpr = cast<MCBinaryExpr>(Expr);
382 return hashExpr(BC, *BinaryExpr.getLHS())
383 .append(hashInteger(BinaryExpr.getOpcode()))
384 .append(hashExpr(BC, *BinaryExpr.getRHS()));
385 }
386 case MCExpr::Target:
387 return std::string();
388 }
389
390 llvm_unreachable("invalid expression kind");
391 }
392
hashInstOperand(BinaryContext & BC,const MCOperand & Operand)393 std::string hashInstOperand(BinaryContext &BC, const MCOperand &Operand) {
394 if (Operand.isImm())
395 return hashInteger(Operand.getImm());
396 if (Operand.isReg())
397 return hashInteger(Operand.getReg());
398 if (Operand.isExpr())
399 return hashExpr(BC, *Operand.getExpr());
400
401 return std::string();
402 }
403
404 } // namespace
405
406 namespace llvm {
407 namespace bolt {
408
runOnFunctions(BinaryContext & BC)409 void IdenticalCodeFolding::runOnFunctions(BinaryContext &BC) {
410 const size_t OriginalFunctionCount = BC.getBinaryFunctions().size();
411 uint64_t NumFunctionsFolded = 0;
412 std::atomic<uint64_t> NumJTFunctionsFolded{0};
413 std::atomic<uint64_t> BytesSavedEstimate{0};
414 std::atomic<uint64_t> CallsSavedEstimate{0};
415 std::atomic<uint64_t> NumFoldedLastIteration{0};
416 CongruentBucketsMap CongruentBuckets;
417
418 // Hash all the functions
419 auto hashFunctions = [&]() {
420 NamedRegionTimer HashFunctionsTimer("hashing", "hashing", "ICF breakdown",
421 "ICF breakdown", opts::TimeICF);
422 ParallelUtilities::WorkFuncTy WorkFun = [&](BinaryFunction &BF) {
423 // Make sure indices are in-order.
424 BF.getLayout().updateLayoutIndices();
425
426 // Pre-compute hash before pushing into hashtable.
427 // Hash instruction operands to minimize hash collisions.
428 BF.computeHash(opts::UseDFS, [&BC](const MCOperand &Op) {
429 return hashInstOperand(BC, Op);
430 });
431 };
432
433 ParallelUtilities::PredicateTy SkipFunc = [&](const BinaryFunction &BF) {
434 return !shouldOptimize(BF);
435 };
436
437 ParallelUtilities::runOnEachFunction(
438 BC, ParallelUtilities::SchedulingPolicy::SP_TRIVIAL, WorkFun, SkipFunc,
439 "hashFunctions", /*ForceSequential*/ false, 2);
440 };
441
442 // Creates buckets with congruent functions - functions that potentially
443 // could be folded.
444 auto createCongruentBuckets = [&]() {
445 NamedRegionTimer CongruentBucketsTimer("congruent buckets",
446 "congruent buckets", "ICF breakdown",
447 "ICF breakdown", opts::TimeICF);
448 for (auto &BFI : BC.getBinaryFunctions()) {
449 BinaryFunction &BF = BFI.second;
450 if (!this->shouldOptimize(BF))
451 continue;
452 CongruentBuckets[&BF].emplace(&BF);
453 }
454 };
455
456 // Partition each set of congruent functions into sets of identical functions
457 // and fold them
458 auto performFoldingPass = [&]() {
459 NamedRegionTimer FoldingPassesTimer("folding passes", "folding passes",
460 "ICF breakdown", "ICF breakdown",
461 opts::TimeICF);
462 Timer SinglePass("single fold pass", "single fold pass");
463 LLVM_DEBUG(SinglePass.startTimer());
464
465 ThreadPool *ThPool;
466 if (!opts::NoThreads)
467 ThPool = &ParallelUtilities::getThreadPool();
468
469 // Fold identical functions within a single congruent bucket
470 auto processSingleBucket = [&](std::set<BinaryFunction *> &Candidates) {
471 Timer T("folding single congruent list", "folding single congruent list");
472 LLVM_DEBUG(T.startTimer());
473
474 // Identical functions go into the same bucket.
475 IdenticalBucketsMap IdenticalBuckets;
476 for (BinaryFunction *BF : Candidates) {
477 IdenticalBuckets[BF].emplace_back(BF);
478 }
479
480 for (auto &IBI : IdenticalBuckets) {
481 // Functions identified as identical.
482 std::vector<BinaryFunction *> &Twins = IBI.second;
483 if (Twins.size() < 2)
484 continue;
485
486 // Fold functions. Keep the order consistent across invocations with
487 // different options.
488 llvm::stable_sort(
489 Twins, [](const BinaryFunction *A, const BinaryFunction *B) {
490 return A->getFunctionNumber() < B->getFunctionNumber();
491 });
492
493 BinaryFunction *ParentBF = Twins[0];
494 for (unsigned I = 1; I < Twins.size(); ++I) {
495 BinaryFunction *ChildBF = Twins[I];
496 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: folding " << *ChildBF << " into "
497 << *ParentBF << '\n');
498
499 // Remove child function from the list of candidates.
500 auto FI = Candidates.find(ChildBF);
501 assert(FI != Candidates.end() &&
502 "function expected to be in the set");
503 Candidates.erase(FI);
504
505 // Fold the function and remove from the list of processed functions.
506 BytesSavedEstimate += ChildBF->getSize();
507 CallsSavedEstimate += std::min(ChildBF->getKnownExecutionCount(),
508 ParentBF->getKnownExecutionCount());
509 BC.foldFunction(*ChildBF, *ParentBF);
510
511 ++NumFoldedLastIteration;
512
513 if (ParentBF->hasJumpTables())
514 ++NumJTFunctionsFolded;
515 }
516 }
517
518 LLVM_DEBUG(T.stopTimer());
519 };
520
521 // Create a task for each congruent bucket
522 for (auto &Entry : CongruentBuckets) {
523 std::set<BinaryFunction *> &Bucket = Entry.second;
524 if (Bucket.size() < 2)
525 continue;
526
527 if (opts::NoThreads)
528 processSingleBucket(Bucket);
529 else
530 ThPool->async(processSingleBucket, std::ref(Bucket));
531 }
532
533 if (!opts::NoThreads)
534 ThPool->wait();
535
536 LLVM_DEBUG(SinglePass.stopTimer());
537 };
538
539 hashFunctions();
540 createCongruentBuckets();
541
542 unsigned Iteration = 1;
543 // We repeat the pass until no new modifications happen.
544 do {
545 NumFoldedLastIteration = 0;
546 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: ICF iteration " << Iteration << "...\n");
547
548 performFoldingPass();
549
550 NumFunctionsFolded += NumFoldedLastIteration;
551 ++Iteration;
552
553 } while (NumFoldedLastIteration > 0);
554
555 LLVM_DEBUG({
556 // Print functions that are congruent but not identical.
557 for (auto &CBI : CongruentBuckets) {
558 std::set<BinaryFunction *> &Candidates = CBI.second;
559 if (Candidates.size() < 2)
560 continue;
561 dbgs() << "BOLT-DEBUG: the following " << Candidates.size()
562 << " functions (each of size " << (*Candidates.begin())->getSize()
563 << " bytes) are congruent but not identical:\n";
564 for (BinaryFunction *BF : Candidates) {
565 dbgs() << " " << *BF;
566 if (BF->getKnownExecutionCount())
567 dbgs() << " (executed " << BF->getKnownExecutionCount() << " times)";
568 dbgs() << '\n';
569 }
570 }
571 });
572
573 if (NumFunctionsFolded)
574 outs() << "BOLT-INFO: ICF folded " << NumFunctionsFolded << " out of "
575 << OriginalFunctionCount << " functions in " << Iteration
576 << " passes. " << NumJTFunctionsFolded
577 << " functions had jump tables.\n"
578 << "BOLT-INFO: Removing all identical functions will save "
579 << format("%.2lf", (double)BytesSavedEstimate / 1024)
580 << " KB of code space. Folded functions were called "
581 << CallsSavedEstimate << " times based on profile.\n";
582 }
583
584 } // namespace bolt
585 } // namespace llvm
586