xref: /llvm-project-15.0.7/bolt/lib/Passes/MCF.cpp (revision d3cbcc4e)
1 //===- bolt/Passes/MCF.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements functions for solving minimum-cost flow problem.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/MCF.h"
14 #include "bolt/Core/BinaryFunction.h"
15 #include "bolt/Passes/DataflowInfoManager.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/Support/CommandLine.h"
18 #include <algorithm>
19 #include <vector>
20 
21 #undef  DEBUG_TYPE
22 #define DEBUG_TYPE "mcf"
23 
24 using namespace llvm;
25 using namespace bolt;
26 
27 namespace opts {
28 
29 extern cl::OptionCategory BoltOptCategory;
30 
31 extern cl::opt<bool> TimeOpts;
32 
33 static cl::opt<bool> IterativeGuess(
34     "iterative-guess",
35     cl::desc("in non-LBR mode, guess edge counts using iterative technique"),
36 
37     cl::Hidden, cl::cat(BoltOptCategory));
38 
39 static cl::opt<bool>
40     EqualizeBBCounts("equalize-bb-counts",
41                      cl::desc("in non-LBR mode, use same count for BBs "
42                               "that should have equivalent count"),
43                      cl::Hidden, cl::cat(BoltOptCategory));
44 
45 static cl::opt<bool> UseRArcs(
46     "mcf-use-rarcs",
47     cl::desc("in MCF, consider the possibility of cancelling flow to balance "
48              "edges"),
49     cl::Hidden, cl::cat(BoltOptCategory));
50 
51 } // namespace opts
52 
53 namespace llvm {
54 namespace bolt {
55 
56 namespace {
57 
58 // Edge Weight Inference Heuristic
59 //
60 // We start by maintaining the invariant used in LBR mode where the sum of
61 // pred edges count is equal to the block execution count. This loop will set
62 // pred edges count by balancing its own execution count in different pred
63 // edges. The weight of each edge is guessed by looking at how hot each pred
64 // block is (in terms of samples).
65 // There are two caveats in this approach. One is for critical edges and the
66 // other is for self-referencing blocks (loops of 1 BB). For critical edges,
67 // we can't infer the hotness of them based solely on pred BBs execution
68 // count. For each critical edge we look at the pred BB, then look at its
69 // succs to adjust its weight.
70 //
71 //    [ 60  ]       [ 25 ]
72 //       |      \     |
73 //    [ 10  ]       [ 75 ]
74 //
75 // The illustration above shows a critical edge \. We wish to adjust bb count
76 // 60 to 50 to properly determine the weight of the critical edge to be
77 // 50 / 75.
78 // For self-referencing edges, we attribute its weight by subtracting the
79 // current BB execution count by the sum of predecessors count if this result
80 // is non-negative.
81 using EdgeWeightMap =
82     DenseMap<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>,
83              double>;
84 
85 template <class NodeT>
86 void updateEdgeWeight(EdgeWeightMap &EdgeWeights, const BinaryBasicBlock *A,
87                       const BinaryBasicBlock *B, double Weight);
88 
89 template <>
90 void updateEdgeWeight<BinaryBasicBlock *>(EdgeWeightMap &EdgeWeights,
91                                           const BinaryBasicBlock *A,
92                                           const BinaryBasicBlock *B,
93                                           double Weight) {
94   EdgeWeights[std::make_pair(A, B)] = Weight;
95   return;
96 }
97 
98 template <>
99 void updateEdgeWeight<Inverse<BinaryBasicBlock *>>(EdgeWeightMap &EdgeWeights,
100                                                    const BinaryBasicBlock *A,
101                                                    const BinaryBasicBlock *B,
102                                                    double Weight) {
103   EdgeWeights[std::make_pair(B, A)] = Weight;
104   return;
105 }
106 
107 template <class NodeT>
108 void computeEdgeWeights(BinaryBasicBlock *BB, EdgeWeightMap &EdgeWeights) {
109   typedef GraphTraits<NodeT> GraphT;
110   typedef GraphTraits<Inverse<NodeT>> InvTraits;
111 
112   double TotalChildrenCount = 0.0;
113   SmallVector<double, 4> ChildrenExecCount;
114   // First pass computes total children execution count that directly
115   // contribute to this BB.
116   for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
117                                           E = GraphT::child_end(BB);
118        CI != E; ++CI) {
119     typename GraphT::NodeRef Child = *CI;
120     double ChildExecCount = Child->getExecutionCount();
121     // Is self-reference?
122     if (Child == BB) {
123       ChildExecCount = 0.0; // will fill this in second pass
124     } else if (GraphT::child_end(BB) - GraphT::child_begin(BB) > 1 &&
125                InvTraits::child_end(Child) - InvTraits::child_begin(Child) >
126                    1) {
127       // Handle critical edges. This will cause a skew towards crit edges, but
128       // it is a quick solution.
129       double CritWeight = 0.0;
130       uint64_t Denominator = 0;
131       for (typename InvTraits::ChildIteratorType
132                II = InvTraits::child_begin(Child),
133                IE = InvTraits::child_end(Child);
134            II != IE; ++II) {
135         typename GraphT::NodeRef N = *II;
136         Denominator += N->getExecutionCount();
137         if (N != BB)
138           continue;
139         CritWeight = N->getExecutionCount();
140       }
141       if (Denominator)
142         CritWeight /= static_cast<double>(Denominator);
143       ChildExecCount *= CritWeight;
144     }
145     ChildrenExecCount.push_back(ChildExecCount);
146     TotalChildrenCount += ChildExecCount;
147   }
148   // Second pass fixes the weight of a possible self-reference edge
149   uint32_t ChildIndex = 0;
150   for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
151                                           E = GraphT::child_end(BB);
152        CI != E; ++CI) {
153     typename GraphT::NodeRef Child = *CI;
154     if (Child != BB) {
155       ++ChildIndex;
156       continue;
157     }
158     if (static_cast<double>(BB->getExecutionCount()) > TotalChildrenCount) {
159       ChildrenExecCount[ChildIndex] =
160           BB->getExecutionCount() - TotalChildrenCount;
161       TotalChildrenCount += ChildrenExecCount[ChildIndex];
162     }
163     break;
164   }
165   // Third pass finally assigns weights to edges
166   ChildIndex = 0;
167   for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
168                                           E = GraphT::child_end(BB);
169        CI != E; ++CI) {
170     typename GraphT::NodeRef Child = *CI;
171     double Weight = 1 / (GraphT::child_end(BB) - GraphT::child_begin(BB));
172     if (TotalChildrenCount != 0.0)
173       Weight = ChildrenExecCount[ChildIndex] / TotalChildrenCount;
174     updateEdgeWeight<NodeT>(EdgeWeights, BB, Child, Weight);
175     ++ChildIndex;
176   }
177 }
178 
179 template <class NodeT>
180 void computeEdgeWeights(BinaryFunction &BF, EdgeWeightMap &EdgeWeights) {
181   for (BinaryBasicBlock &BB : BF)
182     computeEdgeWeights<NodeT>(&BB, EdgeWeights);
183 }
184 
185 /// Make BB count match the sum of all incoming edges. If AllEdges is true,
186 /// make it match max(SumPredEdges, SumSuccEdges).
187 void recalculateBBCounts(BinaryFunction &BF, bool AllEdges) {
188   for (BinaryBasicBlock &BB : BF) {
189     uint64_t TotalPredsEWeight = 0;
190     for (BinaryBasicBlock *Pred : BB.predecessors())
191       TotalPredsEWeight += Pred->getBranchInfo(BB).Count;
192 
193     if (TotalPredsEWeight > BB.getExecutionCount())
194       BB.setExecutionCount(TotalPredsEWeight);
195 
196     if (!AllEdges)
197       continue;
198 
199     uint64_t TotalSuccsEWeight = 0;
200     for (BinaryBasicBlock::BinaryBranchInfo &BI : BB.branch_info())
201       TotalSuccsEWeight += BI.Count;
202 
203     if (TotalSuccsEWeight > BB.getExecutionCount())
204       BB.setExecutionCount(TotalSuccsEWeight);
205   }
206 }
207 
208 // This is our main edge count guessing heuristic. Look at predecessors and
209 // assign a proportionally higher count to pred edges coming from blocks with
210 // a higher execution count in comparison with the other predecessor blocks,
211 // making SumPredEdges match the current BB count.
212 // If "UseSucc" is true, apply the same logic to successor edges as well. Since
213 // some successor edges may already have assigned a count, only update it if the
214 // new count is higher.
215 void guessEdgeByRelHotness(BinaryFunction &BF, bool UseSucc,
216                            EdgeWeightMap &PredEdgeWeights,
217                            EdgeWeightMap &SuccEdgeWeights) {
218   for (BinaryBasicBlock &BB : BF) {
219     for (BinaryBasicBlock *Pred : BB.predecessors()) {
220       double RelativeExec = PredEdgeWeights[std::make_pair(Pred, &BB)];
221       RelativeExec *= BB.getExecutionCount();
222       BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(BB);
223       if (static_cast<uint64_t>(RelativeExec) > BI.Count)
224         BI.Count = static_cast<uint64_t>(RelativeExec);
225     }
226 
227     if (!UseSucc)
228       continue;
229 
230     auto BI = BB.branch_info_begin();
231     for (BinaryBasicBlock *Succ : BB.successors()) {
232       double RelativeExec = SuccEdgeWeights[std::make_pair(&BB, Succ)];
233       RelativeExec *= BB.getExecutionCount();
234       if (static_cast<uint64_t>(RelativeExec) > BI->Count)
235         BI->Count = static_cast<uint64_t>(RelativeExec);
236       ++BI;
237     }
238   }
239 }
240 
241 using ArcSet =
242     DenseSet<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>>;
243 
244 /// Predecessor edges version of guessEdgeByIterativeApproach. GuessedArcs has
245 /// all edges we already established their count. Try to guess the count of
246 /// the remaining edge, if there is only one to guess, and return true if we
247 /// were able to guess.
248 bool guessPredEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) {
249   if (BB->pred_size() == 0)
250     return false;
251 
252   uint64_t TotalPredCount = 0;
253   unsigned NumGuessedEdges = 0;
254   for (BinaryBasicBlock *Pred : BB->predecessors()) {
255     if (GuessedArcs.count(std::make_pair(Pred, BB)))
256       ++NumGuessedEdges;
257     TotalPredCount += Pred->getBranchInfo(*BB).Count;
258   }
259 
260   if (NumGuessedEdges != BB->pred_size() - 1)
261     return false;
262 
263   int64_t Guessed =
264       static_cast<int64_t>(BB->getExecutionCount()) - TotalPredCount;
265   if (Guessed < 0)
266     Guessed = 0;
267 
268   for (BinaryBasicBlock *Pred : BB->predecessors()) {
269     if (GuessedArcs.count(std::make_pair(Pred, BB)))
270       continue;
271 
272     Pred->getBranchInfo(*BB).Count = Guessed;
273     return true;
274   }
275   llvm_unreachable("Expected unguessed arc");
276 }
277 
278 /// Successor edges version of guessEdgeByIterativeApproach. GuessedArcs has
279 /// all edges we already established their count. Try to guess the count of
280 /// the remaining edge, if there is only one to guess, and return true if we
281 /// were able to guess.
282 bool guessSuccEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) {
283   if (BB->succ_size() == 0)
284     return false;
285 
286   uint64_t TotalSuccCount = 0;
287   unsigned NumGuessedEdges = 0;
288   auto BI = BB->branch_info_begin();
289   for (BinaryBasicBlock *Succ : BB->successors()) {
290     if (GuessedArcs.count(std::make_pair(BB, Succ)))
291       ++NumGuessedEdges;
292     TotalSuccCount += BI->Count;
293     ++BI;
294   }
295 
296   if (NumGuessedEdges != BB->succ_size() - 1)
297     return false;
298 
299   int64_t Guessed =
300       static_cast<int64_t>(BB->getExecutionCount()) - TotalSuccCount;
301   if (Guessed < 0)
302     Guessed = 0;
303 
304   BI = BB->branch_info_begin();
305   for (BinaryBasicBlock *Succ : BB->successors()) {
306     if (GuessedArcs.count(std::make_pair(BB, Succ))) {
307       ++BI;
308       continue;
309     }
310 
311     BI->Count = Guessed;
312     GuessedArcs.insert(std::make_pair(BB, Succ));
313     return true;
314   }
315   llvm_unreachable("Expected unguessed arc");
316 }
317 
318 /// Guess edge count whenever we have only one edge (pred or succ) left
319 /// to guess. Then make its count equal to BB count minus all other edge
320 /// counts we already know their count. Repeat this until there is no
321 /// change.
322 void guessEdgeByIterativeApproach(BinaryFunction &BF) {
323   ArcSet KnownArcs;
324   bool Changed = false;
325 
326   do {
327     Changed = false;
328     for (BinaryBasicBlock &BB : BF) {
329       if (guessPredEdgeCounts(&BB, KnownArcs))
330         Changed = true;
331       if (guessSuccEdgeCounts(&BB, KnownArcs))
332         Changed = true;
333     }
334   } while (Changed);
335 
336   // Guess count for non-inferred edges
337   for (BinaryBasicBlock &BB : BF) {
338     for (BinaryBasicBlock *Pred : BB.predecessors()) {
339       if (KnownArcs.count(std::make_pair(Pred, &BB)))
340         continue;
341       BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(BB);
342       BI.Count =
343           std::min(Pred->getExecutionCount(), BB.getExecutionCount()) / 2;
344       KnownArcs.insert(std::make_pair(Pred, &BB));
345     }
346     auto BI = BB.branch_info_begin();
347     for (BinaryBasicBlock *Succ : BB.successors()) {
348       if (KnownArcs.count(std::make_pair(&BB, Succ))) {
349         ++BI;
350         continue;
351       }
352       BI->Count =
353           std::min(BB.getExecutionCount(), Succ->getExecutionCount()) / 2;
354       KnownArcs.insert(std::make_pair(&BB, Succ));
355       break;
356     }
357   }
358 }
359 
360 /// Associate each basic block with the BinaryLoop object corresponding to the
361 /// innermost loop containing this block.
362 DenseMap<const BinaryBasicBlock *, const BinaryLoop *>
363 createLoopNestLevelMap(BinaryFunction &BF) {
364   DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel;
365   const BinaryLoopInfo &BLI = BF.getLoopInfo();
366 
367   for (BinaryBasicBlock &BB : BF)
368     LoopNestLevel[&BB] = BLI[&BB];
369 
370   return LoopNestLevel;
371 }
372 
373 /// Implement the idea in "SamplePGO - The Power of Profile Guided Optimizations
374 /// without the Usability Burden" by Diego Novillo to make basic block counts
375 /// equal if we show that A dominates B, B post-dominates A and they are in the
376 /// same loop and same loop nesting level.
377 void equalizeBBCounts(BinaryFunction &BF) {
378   auto Info = DataflowInfoManager(BF, nullptr, nullptr);
379   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
380   DominatorAnalysis<true> &PDA = Info.getPostDominatorAnalysis();
381   auto &InsnToBB = Info.getInsnToBBMap();
382   // These analyses work at the instruction granularity, but we really only need
383   // basic block granularity here. So we'll use a set of visited edges to avoid
384   // revisiting the same BBs again and again.
385   DenseMap<const BinaryBasicBlock *, std::set<const BinaryBasicBlock *>>
386       Visited;
387   // Equivalence classes mapping. Each equivalence class is defined by the set
388   // of BBs that obeys the aforementioned properties.
389   DenseMap<const BinaryBasicBlock *, signed> BBsToEC;
390   std::vector<std::vector<BinaryBasicBlock *>> Classes;
391 
392   BF.calculateLoopInfo();
393   DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel =
394       createLoopNestLevelMap(BF);
395 
396   for (BinaryBasicBlock &BB : BF)
397     BBsToEC[&BB] = -1;
398 
399   for (BinaryBasicBlock &BB : BF) {
400     auto I = BB.begin();
401     if (I == BB.end())
402       continue;
403 
404     DA.doForAllDominators(*I, [&](const MCInst &DomInst) {
405       BinaryBasicBlock *DomBB = InsnToBB[&DomInst];
406       if (Visited[DomBB].count(&BB))
407         return;
408       Visited[DomBB].insert(&BB);
409       if (!PDA.doesADominateB(*I, DomInst))
410         return;
411       if (LoopNestLevel[&BB] != LoopNestLevel[DomBB])
412         return;
413       if (BBsToEC[DomBB] == -1 && BBsToEC[&BB] == -1) {
414         BBsToEC[DomBB] = Classes.size();
415         BBsToEC[&BB] = Classes.size();
416         Classes.emplace_back();
417         Classes.back().push_back(DomBB);
418         Classes.back().push_back(&BB);
419         return;
420       }
421       if (BBsToEC[DomBB] == -1) {
422         BBsToEC[DomBB] = BBsToEC[&BB];
423         Classes[BBsToEC[&BB]].push_back(DomBB);
424         return;
425       }
426       if (BBsToEC[&BB] == -1) {
427         BBsToEC[&BB] = BBsToEC[DomBB];
428         Classes[BBsToEC[DomBB]].push_back(&BB);
429         return;
430       }
431       signed BBECNum = BBsToEC[&BB];
432       std::vector<BinaryBasicBlock *> DomEC = Classes[BBsToEC[DomBB]];
433       std::vector<BinaryBasicBlock *> BBEC = Classes[BBECNum];
434       for (BinaryBasicBlock *Block : DomEC) {
435         BBsToEC[Block] = BBECNum;
436         BBEC.push_back(Block);
437       }
438       DomEC.clear();
439     });
440   }
441 
442   for (std::vector<BinaryBasicBlock *> &Class : Classes) {
443     uint64_t Max = 0ULL;
444     for (BinaryBasicBlock *BB : Class)
445       Max = std::max(Max, BB->getExecutionCount());
446     for (BinaryBasicBlock *BB : Class)
447       BB->setExecutionCount(Max);
448   }
449 }
450 
451 } // end anonymous namespace
452 
453 void estimateEdgeCounts(BinaryFunction &BF) {
454   EdgeWeightMap PredEdgeWeights;
455   EdgeWeightMap SuccEdgeWeights;
456   if (!opts::IterativeGuess) {
457     computeEdgeWeights<Inverse<BinaryBasicBlock *>>(BF, PredEdgeWeights);
458     computeEdgeWeights<BinaryBasicBlock *>(BF, SuccEdgeWeights);
459   }
460   if (opts::EqualizeBBCounts) {
461     LLVM_DEBUG(BF.print(dbgs(), "before equalize BB counts", true));
462     equalizeBBCounts(BF);
463     LLVM_DEBUG(BF.print(dbgs(), "after equalize BB counts", true));
464   }
465   if (opts::IterativeGuess)
466     guessEdgeByIterativeApproach(BF);
467   else
468     guessEdgeByRelHotness(BF, /*UseSuccs=*/false, PredEdgeWeights,
469                           SuccEdgeWeights);
470   recalculateBBCounts(BF, /*AllEdges=*/false);
471 }
472 
473 void solveMCF(BinaryFunction &BF, MCFCostFunction CostFunction) {
474   llvm_unreachable("not implemented");
475 }
476 
477 } // namespace bolt
478 } // namespace llvm
479