xref: /llvm-project-15.0.7/bolt/lib/Passes/MCF.cpp (revision 4e00a192)
1 //===- bolt/Passes/MCF.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements functions for solving minimum-cost flow problem.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/MCF.h"
14 #include "bolt/Core/BinaryFunction.h"
15 #include "bolt/Passes/DataflowInfoManager.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/Support/CommandLine.h"
18 #include <algorithm>
19 #include <vector>
20 
21 #undef  DEBUG_TYPE
22 #define DEBUG_TYPE "mcf"
23 
24 using namespace llvm;
25 using namespace bolt;
26 
27 namespace opts {
28 
29 extern cl::OptionCategory BoltOptCategory;
30 
31 extern cl::opt<bool> TimeOpts;
32 
33 static cl::opt<bool>
34 IterativeGuess("iterative-guess",
35   cl::desc("in non-LBR mode, guess edge counts using iterative technique"),
36   cl::ZeroOrMore,
37   cl::init(false),
38   cl::Hidden,
39   cl::cat(BoltOptCategory));
40 
41 static cl::opt<bool>
42 EqualizeBBCounts("equalize-bb-counts",
43   cl::desc("in non-LBR mode, use same count for BBs "
44            "that should have equivalent count"),
45   cl::ZeroOrMore,
46   cl::init(false),
47   cl::Hidden,
48   cl::cat(BoltOptCategory));
49 
50 static cl::opt<bool>
51 UseRArcs("mcf-use-rarcs",
52   cl::desc("in MCF, consider the possibility of cancelling flow to balance "
53            "edges"),
54   cl::ZeroOrMore,
55   cl::init(false),
56   cl::Hidden,
57   cl::cat(BoltOptCategory));
58 
59 } // namespace opts
60 
61 namespace llvm {
62 namespace bolt {
63 
64 namespace {
65 
66 // Edge Weight Inference Heuristic
67 //
68 // We start by maintaining the invariant used in LBR mode where the sum of
69 // pred edges count is equal to the block execution count. This loop will set
70 // pred edges count by balancing its own execution count in different pred
71 // edges. The weight of each edge is guessed by looking at how hot each pred
72 // block is (in terms of samples).
73 // There are two caveats in this approach. One is for critical edges and the
74 // other is for self-referencing blocks (loops of 1 BB). For critical edges,
75 // we can't infer the hotness of them based solely on pred BBs execution
76 // count. For each critical edge we look at the pred BB, then look at its
77 // succs to adjust its weight.
78 //
79 //    [ 60  ]       [ 25 ]
80 //       |      \     |
81 //    [ 10  ]       [ 75 ]
82 //
83 // The illustration above shows a critical edge \. We wish to adjust bb count
84 // 60 to 50 to properly determine the weight of the critical edge to be
85 // 50 / 75.
86 // For self-referencing edges, we attribute its weight by subtracting the
87 // current BB execution count by the sum of predecessors count if this result
88 // is non-negative.
89 using EdgeWeightMap =
90     DenseMap<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>,
91              double>;
92 
93 template <class NodeT>
94 void updateEdgeWeight(EdgeWeightMap &EdgeWeights, const BinaryBasicBlock *A,
95                       const BinaryBasicBlock *B, double Weight);
96 
97 template <>
98 void updateEdgeWeight<BinaryBasicBlock *>(EdgeWeightMap &EdgeWeights,
99                                           const BinaryBasicBlock *A,
100                                           const BinaryBasicBlock *B,
101                                           double Weight) {
102   EdgeWeights[std::make_pair(A, B)] = Weight;
103   return;
104 }
105 
106 template <>
107 void updateEdgeWeight<Inverse<BinaryBasicBlock *>>(EdgeWeightMap &EdgeWeights,
108                                                    const BinaryBasicBlock *A,
109                                                    const BinaryBasicBlock *B,
110                                                    double Weight) {
111   EdgeWeights[std::make_pair(B, A)] = Weight;
112   return;
113 }
114 
115 template <class NodeT>
116 void computeEdgeWeights(BinaryBasicBlock *BB, EdgeWeightMap &EdgeWeights) {
117   typedef GraphTraits<NodeT> GraphT;
118   typedef GraphTraits<Inverse<NodeT>> InvTraits;
119 
120   double TotalChildrenCount = 0.0;
121   SmallVector<double, 4> ChildrenExecCount;
122   // First pass computes total children execution count that directly
123   // contribute to this BB.
124   for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
125                                           E = GraphT::child_end(BB);
126        CI != E; ++CI) {
127     typename GraphT::NodeRef Child = *CI;
128     double ChildExecCount = Child->getExecutionCount();
129     // Is self-reference?
130     if (Child == BB) {
131       ChildExecCount = 0.0; // will fill this in second pass
132     } else if (GraphT::child_end(BB) - GraphT::child_begin(BB) > 1 &&
133                InvTraits::child_end(Child) - InvTraits::child_begin(Child) >
134                    1) {
135       // Handle critical edges. This will cause a skew towards crit edges, but
136       // it is a quick solution.
137       double CritWeight = 0.0;
138       uint64_t Denominator = 0;
139       for (typename InvTraits::ChildIteratorType
140                II = InvTraits::child_begin(Child),
141                IE = InvTraits::child_end(Child);
142            II != IE; ++II) {
143         typename GraphT::NodeRef N = *II;
144         Denominator += N->getExecutionCount();
145         if (N != BB)
146           continue;
147         CritWeight = N->getExecutionCount();
148       }
149       if (Denominator)
150         CritWeight /= static_cast<double>(Denominator);
151       ChildExecCount *= CritWeight;
152     }
153     ChildrenExecCount.push_back(ChildExecCount);
154     TotalChildrenCount += ChildExecCount;
155   }
156   // Second pass fixes the weight of a possible self-reference edge
157   uint32_t ChildIndex = 0;
158   for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
159                                           E = GraphT::child_end(BB);
160        CI != E; ++CI) {
161     typename GraphT::NodeRef Child = *CI;
162     if (Child != BB) {
163       ++ChildIndex;
164       continue;
165     }
166     if (static_cast<double>(BB->getExecutionCount()) > TotalChildrenCount) {
167       ChildrenExecCount[ChildIndex] =
168           BB->getExecutionCount() - TotalChildrenCount;
169       TotalChildrenCount += ChildrenExecCount[ChildIndex];
170     }
171     break;
172   }
173   // Third pass finally assigns weights to edges
174   ChildIndex = 0;
175   for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
176                                           E = GraphT::child_end(BB);
177        CI != E; ++CI) {
178     typename GraphT::NodeRef Child = *CI;
179     double Weight = 1 / (GraphT::child_end(BB) - GraphT::child_begin(BB));
180     if (TotalChildrenCount != 0.0)
181       Weight = ChildrenExecCount[ChildIndex] / TotalChildrenCount;
182     updateEdgeWeight<NodeT>(EdgeWeights, BB, Child, Weight);
183     ++ChildIndex;
184   }
185 }
186 
187 template <class NodeT>
188 void computeEdgeWeights(BinaryFunction &BF, EdgeWeightMap &EdgeWeights) {
189   for (BinaryBasicBlock &BB : BF)
190     computeEdgeWeights<NodeT>(&BB, EdgeWeights);
191 }
192 
193 /// Make BB count match the sum of all incoming edges. If AllEdges is true,
194 /// make it match max(SumPredEdges, SumSuccEdges).
195 void recalculateBBCounts(BinaryFunction &BF, bool AllEdges) {
196   for (BinaryBasicBlock &BB : BF) {
197     uint64_t TotalPredsEWeight = 0;
198     for (BinaryBasicBlock *Pred : BB.predecessors())
199       TotalPredsEWeight += Pred->getBranchInfo(BB).Count;
200 
201     if (TotalPredsEWeight > BB.getExecutionCount())
202       BB.setExecutionCount(TotalPredsEWeight);
203 
204     if (!AllEdges)
205       continue;
206 
207     uint64_t TotalSuccsEWeight = 0;
208     for (BinaryBasicBlock::BinaryBranchInfo &BI : BB.branch_info())
209       TotalSuccsEWeight += BI.Count;
210 
211     if (TotalSuccsEWeight > BB.getExecutionCount())
212       BB.setExecutionCount(TotalSuccsEWeight);
213   }
214 }
215 
216 // This is our main edge count guessing heuristic. Look at predecessors and
217 // assign a proportionally higher count to pred edges coming from blocks with
218 // a higher execution count in comparison with the other predecessor blocks,
219 // making SumPredEdges match the current BB count.
220 // If "UseSucc" is true, apply the same logic to successor edges as well. Since
221 // some successor edges may already have assigned a count, only update it if the
222 // new count is higher.
223 void guessEdgeByRelHotness(BinaryFunction &BF, bool UseSucc,
224                            EdgeWeightMap &PredEdgeWeights,
225                            EdgeWeightMap &SuccEdgeWeights) {
226   for (BinaryBasicBlock &BB : BF) {
227     for (BinaryBasicBlock *Pred : BB.predecessors()) {
228       double RelativeExec = PredEdgeWeights[std::make_pair(Pred, &BB)];
229       RelativeExec *= BB.getExecutionCount();
230       BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(BB);
231       if (static_cast<uint64_t>(RelativeExec) > BI.Count)
232         BI.Count = static_cast<uint64_t>(RelativeExec);
233     }
234 
235     if (!UseSucc)
236       continue;
237 
238     auto BI = BB.branch_info_begin();
239     for (BinaryBasicBlock *Succ : BB.successors()) {
240       double RelativeExec = SuccEdgeWeights[std::make_pair(&BB, Succ)];
241       RelativeExec *= BB.getExecutionCount();
242       if (static_cast<uint64_t>(RelativeExec) > BI->Count)
243         BI->Count = static_cast<uint64_t>(RelativeExec);
244       ++BI;
245     }
246   }
247 }
248 
249 using ArcSet =
250     DenseSet<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>>;
251 
252 /// Predecessor edges version of guessEdgeByIterativeApproach. GuessedArcs has
253 /// all edges we already established their count. Try to guess the count of
254 /// the remaining edge, if there is only one to guess, and return true if we
255 /// were able to guess.
256 bool guessPredEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) {
257   if (BB->pred_size() == 0)
258     return false;
259 
260   uint64_t TotalPredCount = 0;
261   unsigned NumGuessedEdges = 0;
262   for (BinaryBasicBlock *Pred : BB->predecessors()) {
263     if (GuessedArcs.count(std::make_pair(Pred, BB)))
264       ++NumGuessedEdges;
265     TotalPredCount += Pred->getBranchInfo(*BB).Count;
266   }
267 
268   if (NumGuessedEdges != BB->pred_size() - 1)
269     return false;
270 
271   int64_t Guessed =
272       static_cast<int64_t>(BB->getExecutionCount()) - TotalPredCount;
273   if (Guessed < 0)
274     Guessed = 0;
275 
276   for (BinaryBasicBlock *Pred : BB->predecessors()) {
277     if (GuessedArcs.count(std::make_pair(Pred, BB)))
278       continue;
279 
280     Pred->getBranchInfo(*BB).Count = Guessed;
281     return true;
282   }
283   llvm_unreachable("Expected unguessed arc");
284 }
285 
286 /// Successor edges version of guessEdgeByIterativeApproach. GuessedArcs has
287 /// all edges we already established their count. Try to guess the count of
288 /// the remaining edge, if there is only one to guess, and return true if we
289 /// were able to guess.
290 bool guessSuccEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) {
291   if (BB->succ_size() == 0)
292     return false;
293 
294   uint64_t TotalSuccCount = 0;
295   unsigned NumGuessedEdges = 0;
296   auto BI = BB->branch_info_begin();
297   for (BinaryBasicBlock *Succ : BB->successors()) {
298     if (GuessedArcs.count(std::make_pair(BB, Succ)))
299       ++NumGuessedEdges;
300     TotalSuccCount += BI->Count;
301     ++BI;
302   }
303 
304   if (NumGuessedEdges != BB->succ_size() - 1)
305     return false;
306 
307   int64_t Guessed =
308       static_cast<int64_t>(BB->getExecutionCount()) - TotalSuccCount;
309   if (Guessed < 0)
310     Guessed = 0;
311 
312   BI = BB->branch_info_begin();
313   for (BinaryBasicBlock *Succ : BB->successors()) {
314     if (GuessedArcs.count(std::make_pair(BB, Succ))) {
315       ++BI;
316       continue;
317     }
318 
319     BI->Count = Guessed;
320     GuessedArcs.insert(std::make_pair(BB, Succ));
321     return true;
322   }
323   llvm_unreachable("Expected unguessed arc");
324 }
325 
326 /// Guess edge count whenever we have only one edge (pred or succ) left
327 /// to guess. Then make its count equal to BB count minus all other edge
328 /// counts we already know their count. Repeat this until there is no
329 /// change.
330 void guessEdgeByIterativeApproach(BinaryFunction &BF) {
331   ArcSet KnownArcs;
332   bool Changed = false;
333 
334   do {
335     Changed = false;
336     for (BinaryBasicBlock &BB : BF) {
337       if (guessPredEdgeCounts(&BB, KnownArcs))
338         Changed = true;
339       if (guessSuccEdgeCounts(&BB, KnownArcs))
340         Changed = true;
341     }
342   } while (Changed);
343 
344   // Guess count for non-inferred edges
345   for (BinaryBasicBlock &BB : BF) {
346     for (BinaryBasicBlock *Pred : BB.predecessors()) {
347       if (KnownArcs.count(std::make_pair(Pred, &BB)))
348         continue;
349       BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(BB);
350       BI.Count =
351           std::min(Pred->getExecutionCount(), BB.getExecutionCount()) / 2;
352       KnownArcs.insert(std::make_pair(Pred, &BB));
353     }
354     auto BI = BB.branch_info_begin();
355     for (BinaryBasicBlock *Succ : BB.successors()) {
356       if (KnownArcs.count(std::make_pair(&BB, Succ))) {
357         ++BI;
358         continue;
359       }
360       BI->Count =
361           std::min(BB.getExecutionCount(), Succ->getExecutionCount()) / 2;
362       KnownArcs.insert(std::make_pair(&BB, Succ));
363       break;
364     }
365   }
366 }
367 
368 /// Associate each basic block with the BinaryLoop object corresponding to the
369 /// innermost loop containing this block.
370 DenseMap<const BinaryBasicBlock *, const BinaryLoop *>
371 createLoopNestLevelMap(BinaryFunction &BF) {
372   DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel;
373   const BinaryLoopInfo &BLI = BF.getLoopInfo();
374 
375   for (BinaryBasicBlock &BB : BF)
376     LoopNestLevel[&BB] = BLI[&BB];
377 
378   return LoopNestLevel;
379 }
380 
381 /// Implement the idea in "SamplePGO - The Power of Profile Guided Optimizations
382 /// without the Usability Burden" by Diego Novillo to make basic block counts
383 /// equal if we show that A dominates B, B post-dominates A and they are in the
384 /// same loop and same loop nesting level.
385 void equalizeBBCounts(BinaryFunction &BF) {
386   auto Info = DataflowInfoManager(BF, nullptr, nullptr);
387   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
388   DominatorAnalysis<true> &PDA = Info.getPostDominatorAnalysis();
389   auto &InsnToBB = Info.getInsnToBBMap();
390   // These analyses work at the instruction granularity, but we really only need
391   // basic block granularity here. So we'll use a set of visited edges to avoid
392   // revisiting the same BBs again and again.
393   DenseMap<const BinaryBasicBlock *, std::set<const BinaryBasicBlock *>>
394       Visited;
395   // Equivalence classes mapping. Each equivalence class is defined by the set
396   // of BBs that obeys the aforementioned properties.
397   DenseMap<const BinaryBasicBlock *, signed> BBsToEC;
398   std::vector<std::vector<BinaryBasicBlock *>> Classes;
399 
400   BF.calculateLoopInfo();
401   DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel =
402       createLoopNestLevelMap(BF);
403 
404   for (BinaryBasicBlock &BB : BF)
405     BBsToEC[&BB] = -1;
406 
407   for (BinaryBasicBlock &BB : BF) {
408     auto I = BB.begin();
409     if (I == BB.end())
410       continue;
411 
412     DA.doForAllDominators(*I, [&](const MCInst &DomInst) {
413       BinaryBasicBlock *DomBB = InsnToBB[&DomInst];
414       if (Visited[DomBB].count(&BB))
415         return;
416       Visited[DomBB].insert(&BB);
417       if (!PDA.doesADominateB(*I, DomInst))
418         return;
419       if (LoopNestLevel[&BB] != LoopNestLevel[DomBB])
420         return;
421       if (BBsToEC[DomBB] == -1 && BBsToEC[&BB] == -1) {
422         BBsToEC[DomBB] = Classes.size();
423         BBsToEC[&BB] = Classes.size();
424         Classes.emplace_back();
425         Classes.back().push_back(DomBB);
426         Classes.back().push_back(&BB);
427         return;
428       }
429       if (BBsToEC[DomBB] == -1) {
430         BBsToEC[DomBB] = BBsToEC[&BB];
431         Classes[BBsToEC[&BB]].push_back(DomBB);
432         return;
433       }
434       if (BBsToEC[&BB] == -1) {
435         BBsToEC[&BB] = BBsToEC[DomBB];
436         Classes[BBsToEC[DomBB]].push_back(&BB);
437         return;
438       }
439       signed BBECNum = BBsToEC[&BB];
440       std::vector<BinaryBasicBlock *> DomEC = Classes[BBsToEC[DomBB]];
441       std::vector<BinaryBasicBlock *> BBEC = Classes[BBECNum];
442       for (BinaryBasicBlock *Block : DomEC) {
443         BBsToEC[Block] = BBECNum;
444         BBEC.push_back(Block);
445       }
446       DomEC.clear();
447     });
448   }
449 
450   for (std::vector<BinaryBasicBlock *> &Class : Classes) {
451     uint64_t Max = 0ULL;
452     for (BinaryBasicBlock *BB : Class)
453       Max = std::max(Max, BB->getExecutionCount());
454     for (BinaryBasicBlock *BB : Class)
455       BB->setExecutionCount(Max);
456   }
457 }
458 
459 } // end anonymous namespace
460 
461 void estimateEdgeCounts(BinaryFunction &BF) {
462   EdgeWeightMap PredEdgeWeights;
463   EdgeWeightMap SuccEdgeWeights;
464   if (!opts::IterativeGuess) {
465     computeEdgeWeights<Inverse<BinaryBasicBlock *>>(BF, PredEdgeWeights);
466     computeEdgeWeights<BinaryBasicBlock *>(BF, SuccEdgeWeights);
467   }
468   if (opts::EqualizeBBCounts) {
469     LLVM_DEBUG(BF.print(dbgs(), "before equalize BB counts", true));
470     equalizeBBCounts(BF);
471     LLVM_DEBUG(BF.print(dbgs(), "after equalize BB counts", true));
472   }
473   if (opts::IterativeGuess)
474     guessEdgeByIterativeApproach(BF);
475   else
476     guessEdgeByRelHotness(BF, /*UseSuccs=*/false, PredEdgeWeights,
477                           SuccEdgeWeights);
478   recalculateBBCounts(BF, /*AllEdges=*/false);
479 }
480 
481 void solveMCF(BinaryFunction &BF, MCFCostFunction CostFunction) {
482   llvm_unreachable("not implemented");
483 }
484 
485 } // namespace bolt
486 } // namespace llvm
487