xref: /llvm-project-15.0.7/bolt/lib/Passes/MCF.cpp (revision 3332904a)
1 //===- bolt/Passes/MCF.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements functions for solving minimum-cost flow problem.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/MCF.h"
14 #include "bolt/Core/BinaryFunction.h"
15 #include "bolt/Passes/DataflowInfoManager.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/Support/CommandLine.h"
19 #include <algorithm>
20 #include <vector>
21 
22 #undef  DEBUG_TYPE
23 #define DEBUG_TYPE "mcf"
24 
25 using namespace llvm;
26 using namespace bolt;
27 
28 namespace opts {
29 
30 extern cl::OptionCategory BoltOptCategory;
31 
32 extern cl::opt<bool> TimeOpts;
33 
34 static cl::opt<bool> IterativeGuess(
35     "iterative-guess",
36     cl::desc("in non-LBR mode, guess edge counts using iterative technique"),
37     cl::Hidden, cl::cat(BoltOptCategory));
38 
39 static cl::opt<bool> UseRArcs(
40     "mcf-use-rarcs",
41     cl::desc("in MCF, consider the possibility of cancelling flow to balance "
42              "edges"),
43     cl::Hidden, cl::cat(BoltOptCategory));
44 
45 } // namespace opts
46 
47 namespace llvm {
48 namespace bolt {
49 
50 namespace {
51 
52 // Edge Weight Inference Heuristic
53 //
54 // We start by maintaining the invariant used in LBR mode where the sum of
55 // pred edges count is equal to the block execution count. This loop will set
56 // pred edges count by balancing its own execution count in different pred
57 // edges. The weight of each edge is guessed by looking at how hot each pred
58 // block is (in terms of samples).
59 // There are two caveats in this approach. One is for critical edges and the
60 // other is for self-referencing blocks (loops of 1 BB). For critical edges,
61 // we can't infer the hotness of them based solely on pred BBs execution
62 // count. For each critical edge we look at the pred BB, then look at its
63 // succs to adjust its weight.
64 //
65 //    [ 60  ]       [ 25 ]
66 //       |      \     |
67 //    [ 10  ]       [ 75 ]
68 //
69 // The illustration above shows a critical edge \. We wish to adjust bb count
70 // 60 to 50 to properly determine the weight of the critical edge to be
71 // 50 / 75.
72 // For self-referencing edges, we attribute its weight by subtracting the
73 // current BB execution count by the sum of predecessors count if this result
74 // is non-negative.
75 using EdgeWeightMap =
76     DenseMap<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>,
77              double>;
78 
79 template <class NodeT>
80 void updateEdgeWeight(EdgeWeightMap &EdgeWeights, const BinaryBasicBlock *A,
81                       const BinaryBasicBlock *B, double Weight);
82 
83 template <>
updateEdgeWeight(EdgeWeightMap & EdgeWeights,const BinaryBasicBlock * A,const BinaryBasicBlock * B,double Weight)84 void updateEdgeWeight<BinaryBasicBlock *>(EdgeWeightMap &EdgeWeights,
85                                           const BinaryBasicBlock *A,
86                                           const BinaryBasicBlock *B,
87                                           double Weight) {
88   EdgeWeights[std::make_pair(A, B)] = Weight;
89   return;
90 }
91 
92 template <>
updateEdgeWeight(EdgeWeightMap & EdgeWeights,const BinaryBasicBlock * A,const BinaryBasicBlock * B,double Weight)93 void updateEdgeWeight<Inverse<BinaryBasicBlock *>>(EdgeWeightMap &EdgeWeights,
94                                                    const BinaryBasicBlock *A,
95                                                    const BinaryBasicBlock *B,
96                                                    double Weight) {
97   EdgeWeights[std::make_pair(B, A)] = Weight;
98   return;
99 }
100 
101 template <class NodeT>
computeEdgeWeights(BinaryBasicBlock * BB,EdgeWeightMap & EdgeWeights)102 void computeEdgeWeights(BinaryBasicBlock *BB, EdgeWeightMap &EdgeWeights) {
103   typedef GraphTraits<NodeT> GraphT;
104   typedef GraphTraits<Inverse<NodeT>> InvTraits;
105 
106   double TotalChildrenCount = 0.0;
107   SmallVector<double, 4> ChildrenExecCount;
108   // First pass computes total children execution count that directly
109   // contribute to this BB.
110   for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
111                                           E = GraphT::child_end(BB);
112        CI != E; ++CI) {
113     typename GraphT::NodeRef Child = *CI;
114     double ChildExecCount = Child->getExecutionCount();
115     // Is self-reference?
116     if (Child == BB) {
117       ChildExecCount = 0.0; // will fill this in second pass
118     } else if (GraphT::child_end(BB) - GraphT::child_begin(BB) > 1 &&
119                InvTraits::child_end(Child) - InvTraits::child_begin(Child) >
120                    1) {
121       // Handle critical edges. This will cause a skew towards crit edges, but
122       // it is a quick solution.
123       double CritWeight = 0.0;
124       uint64_t Denominator = 0;
125       for (typename InvTraits::ChildIteratorType
126                II = InvTraits::child_begin(Child),
127                IE = InvTraits::child_end(Child);
128            II != IE; ++II) {
129         typename GraphT::NodeRef N = *II;
130         Denominator += N->getExecutionCount();
131         if (N != BB)
132           continue;
133         CritWeight = N->getExecutionCount();
134       }
135       if (Denominator)
136         CritWeight /= static_cast<double>(Denominator);
137       ChildExecCount *= CritWeight;
138     }
139     ChildrenExecCount.push_back(ChildExecCount);
140     TotalChildrenCount += ChildExecCount;
141   }
142   // Second pass fixes the weight of a possible self-reference edge
143   uint32_t ChildIndex = 0;
144   for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
145                                           E = GraphT::child_end(BB);
146        CI != E; ++CI) {
147     typename GraphT::NodeRef Child = *CI;
148     if (Child != BB) {
149       ++ChildIndex;
150       continue;
151     }
152     if (static_cast<double>(BB->getExecutionCount()) > TotalChildrenCount) {
153       ChildrenExecCount[ChildIndex] =
154           BB->getExecutionCount() - TotalChildrenCount;
155       TotalChildrenCount += ChildrenExecCount[ChildIndex];
156     }
157     break;
158   }
159   // Third pass finally assigns weights to edges
160   ChildIndex = 0;
161   for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
162                                           E = GraphT::child_end(BB);
163        CI != E; ++CI) {
164     typename GraphT::NodeRef Child = *CI;
165     double Weight = 1 / (GraphT::child_end(BB) - GraphT::child_begin(BB));
166     if (TotalChildrenCount != 0.0)
167       Weight = ChildrenExecCount[ChildIndex] / TotalChildrenCount;
168     updateEdgeWeight<NodeT>(EdgeWeights, BB, Child, Weight);
169     ++ChildIndex;
170   }
171 }
172 
173 template <class NodeT>
computeEdgeWeights(BinaryFunction & BF,EdgeWeightMap & EdgeWeights)174 void computeEdgeWeights(BinaryFunction &BF, EdgeWeightMap &EdgeWeights) {
175   for (BinaryBasicBlock &BB : BF)
176     computeEdgeWeights<NodeT>(&BB, EdgeWeights);
177 }
178 
179 /// Make BB count match the sum of all incoming edges. If AllEdges is true,
180 /// make it match max(SumPredEdges, SumSuccEdges).
recalculateBBCounts(BinaryFunction & BF,bool AllEdges)181 void recalculateBBCounts(BinaryFunction &BF, bool AllEdges) {
182   for (BinaryBasicBlock &BB : BF) {
183     uint64_t TotalPredsEWeight = 0;
184     for (BinaryBasicBlock *Pred : BB.predecessors())
185       TotalPredsEWeight += Pred->getBranchInfo(BB).Count;
186 
187     if (TotalPredsEWeight > BB.getExecutionCount())
188       BB.setExecutionCount(TotalPredsEWeight);
189 
190     if (!AllEdges)
191       continue;
192 
193     uint64_t TotalSuccsEWeight = 0;
194     for (BinaryBasicBlock::BinaryBranchInfo &BI : BB.branch_info())
195       TotalSuccsEWeight += BI.Count;
196 
197     if (TotalSuccsEWeight > BB.getExecutionCount())
198       BB.setExecutionCount(TotalSuccsEWeight);
199   }
200 }
201 
202 // This is our main edge count guessing heuristic. Look at predecessors and
203 // assign a proportionally higher count to pred edges coming from blocks with
204 // a higher execution count in comparison with the other predecessor blocks,
205 // making SumPredEdges match the current BB count.
206 // If "UseSucc" is true, apply the same logic to successor edges as well. Since
207 // some successor edges may already have assigned a count, only update it if the
208 // new count is higher.
guessEdgeByRelHotness(BinaryFunction & BF,bool UseSucc,EdgeWeightMap & PredEdgeWeights,EdgeWeightMap & SuccEdgeWeights)209 void guessEdgeByRelHotness(BinaryFunction &BF, bool UseSucc,
210                            EdgeWeightMap &PredEdgeWeights,
211                            EdgeWeightMap &SuccEdgeWeights) {
212   for (BinaryBasicBlock &BB : BF) {
213     for (BinaryBasicBlock *Pred : BB.predecessors()) {
214       double RelativeExec = PredEdgeWeights[std::make_pair(Pred, &BB)];
215       RelativeExec *= BB.getExecutionCount();
216       BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(BB);
217       if (static_cast<uint64_t>(RelativeExec) > BI.Count)
218         BI.Count = static_cast<uint64_t>(RelativeExec);
219     }
220 
221     if (!UseSucc)
222       continue;
223 
224     auto BI = BB.branch_info_begin();
225     for (BinaryBasicBlock *Succ : BB.successors()) {
226       double RelativeExec = SuccEdgeWeights[std::make_pair(&BB, Succ)];
227       RelativeExec *= BB.getExecutionCount();
228       if (static_cast<uint64_t>(RelativeExec) > BI->Count)
229         BI->Count = static_cast<uint64_t>(RelativeExec);
230       ++BI;
231     }
232   }
233 }
234 
235 using ArcSet =
236     DenseSet<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>>;
237 
238 /// Predecessor edges version of guessEdgeByIterativeApproach. GuessedArcs has
239 /// all edges we already established their count. Try to guess the count of
240 /// the remaining edge, if there is only one to guess, and return true if we
241 /// were able to guess.
guessPredEdgeCounts(BinaryBasicBlock * BB,ArcSet & GuessedArcs)242 bool guessPredEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) {
243   if (BB->pred_size() == 0)
244     return false;
245 
246   uint64_t TotalPredCount = 0;
247   unsigned NumGuessedEdges = 0;
248   for (BinaryBasicBlock *Pred : BB->predecessors()) {
249     if (GuessedArcs.count(std::make_pair(Pred, BB)))
250       ++NumGuessedEdges;
251     TotalPredCount += Pred->getBranchInfo(*BB).Count;
252   }
253 
254   if (NumGuessedEdges != BB->pred_size() - 1)
255     return false;
256 
257   int64_t Guessed =
258       static_cast<int64_t>(BB->getExecutionCount()) - TotalPredCount;
259   if (Guessed < 0)
260     Guessed = 0;
261 
262   for (BinaryBasicBlock *Pred : BB->predecessors()) {
263     if (GuessedArcs.count(std::make_pair(Pred, BB)))
264       continue;
265 
266     Pred->getBranchInfo(*BB).Count = Guessed;
267     return true;
268   }
269   llvm_unreachable("Expected unguessed arc");
270 }
271 
272 /// Successor edges version of guessEdgeByIterativeApproach. GuessedArcs has
273 /// all edges we already established their count. Try to guess the count of
274 /// the remaining edge, if there is only one to guess, and return true if we
275 /// were able to guess.
guessSuccEdgeCounts(BinaryBasicBlock * BB,ArcSet & GuessedArcs)276 bool guessSuccEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) {
277   if (BB->succ_size() == 0)
278     return false;
279 
280   uint64_t TotalSuccCount = 0;
281   unsigned NumGuessedEdges = 0;
282   auto BI = BB->branch_info_begin();
283   for (BinaryBasicBlock *Succ : BB->successors()) {
284     if (GuessedArcs.count(std::make_pair(BB, Succ)))
285       ++NumGuessedEdges;
286     TotalSuccCount += BI->Count;
287     ++BI;
288   }
289 
290   if (NumGuessedEdges != BB->succ_size() - 1)
291     return false;
292 
293   int64_t Guessed =
294       static_cast<int64_t>(BB->getExecutionCount()) - TotalSuccCount;
295   if (Guessed < 0)
296     Guessed = 0;
297 
298   BI = BB->branch_info_begin();
299   for (BinaryBasicBlock *Succ : BB->successors()) {
300     if (GuessedArcs.count(std::make_pair(BB, Succ))) {
301       ++BI;
302       continue;
303     }
304 
305     BI->Count = Guessed;
306     GuessedArcs.insert(std::make_pair(BB, Succ));
307     return true;
308   }
309   llvm_unreachable("Expected unguessed arc");
310 }
311 
312 /// Guess edge count whenever we have only one edge (pred or succ) left
313 /// to guess. Then make its count equal to BB count minus all other edge
314 /// counts we already know their count. Repeat this until there is no
315 /// change.
guessEdgeByIterativeApproach(BinaryFunction & BF)316 void guessEdgeByIterativeApproach(BinaryFunction &BF) {
317   ArcSet KnownArcs;
318   bool Changed = false;
319 
320   do {
321     Changed = false;
322     for (BinaryBasicBlock &BB : BF) {
323       if (guessPredEdgeCounts(&BB, KnownArcs))
324         Changed = true;
325       if (guessSuccEdgeCounts(&BB, KnownArcs))
326         Changed = true;
327     }
328   } while (Changed);
329 
330   // Guess count for non-inferred edges
331   for (BinaryBasicBlock &BB : BF) {
332     for (BinaryBasicBlock *Pred : BB.predecessors()) {
333       if (KnownArcs.count(std::make_pair(Pred, &BB)))
334         continue;
335       BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(BB);
336       BI.Count =
337           std::min(Pred->getExecutionCount(), BB.getExecutionCount()) / 2;
338       KnownArcs.insert(std::make_pair(Pred, &BB));
339     }
340     auto BI = BB.branch_info_begin();
341     for (BinaryBasicBlock *Succ : BB.successors()) {
342       if (KnownArcs.count(std::make_pair(&BB, Succ))) {
343         ++BI;
344         continue;
345       }
346       BI->Count =
347           std::min(BB.getExecutionCount(), Succ->getExecutionCount()) / 2;
348       KnownArcs.insert(std::make_pair(&BB, Succ));
349       break;
350     }
351   }
352 }
353 
354 /// Associate each basic block with the BinaryLoop object corresponding to the
355 /// innermost loop containing this block.
356 DenseMap<const BinaryBasicBlock *, const BinaryLoop *>
createLoopNestLevelMap(BinaryFunction & BF)357 createLoopNestLevelMap(BinaryFunction &BF) {
358   DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel;
359   const BinaryLoopInfo &BLI = BF.getLoopInfo();
360 
361   for (BinaryBasicBlock &BB : BF)
362     LoopNestLevel[&BB] = BLI[&BB];
363 
364   return LoopNestLevel;
365 }
366 
367 } // end anonymous namespace
368 
equalizeBBCounts(DataflowInfoManager & Info,BinaryFunction & BF)369 void equalizeBBCounts(DataflowInfoManager &Info, BinaryFunction &BF) {
370   if (BF.begin() == BF.end())
371     return;
372 
373   DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
374   DominatorAnalysis<true> &PDA = Info.getPostDominatorAnalysis();
375   auto &InsnToBB = Info.getInsnToBBMap();
376   // These analyses work at the instruction granularity, but we really only need
377   // basic block granularity here. So we'll use a set of visited edges to avoid
378   // revisiting the same BBs again and again.
379   DenseMap<const BinaryBasicBlock *, std::set<const BinaryBasicBlock *>>
380       Visited;
381   // Equivalence classes mapping. Each equivalence class is defined by the set
382   // of BBs that obeys the aforementioned properties.
383   DenseMap<const BinaryBasicBlock *, signed> BBsToEC;
384   std::vector<std::vector<BinaryBasicBlock *>> Classes;
385 
386   BF.calculateLoopInfo();
387   DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel =
388       createLoopNestLevelMap(BF);
389 
390   for (BinaryBasicBlock &BB : BF)
391     BBsToEC[&BB] = -1;
392 
393   for (BinaryBasicBlock &BB : BF) {
394     auto I = BB.begin();
395     if (I == BB.end())
396       continue;
397 
398     DA.doForAllDominators(*I, [&](const MCInst &DomInst) {
399       BinaryBasicBlock *DomBB = InsnToBB[&DomInst];
400       if (Visited[DomBB].count(&BB))
401         return;
402       Visited[DomBB].insert(&BB);
403       if (!PDA.doesADominateB(*I, DomInst))
404         return;
405       if (LoopNestLevel[&BB] != LoopNestLevel[DomBB])
406         return;
407       if (BBsToEC[DomBB] == -1 && BBsToEC[&BB] == -1) {
408         BBsToEC[DomBB] = Classes.size();
409         BBsToEC[&BB] = Classes.size();
410         Classes.emplace_back();
411         Classes.back().push_back(DomBB);
412         Classes.back().push_back(&BB);
413         return;
414       }
415       if (BBsToEC[DomBB] == -1) {
416         BBsToEC[DomBB] = BBsToEC[&BB];
417         Classes[BBsToEC[&BB]].push_back(DomBB);
418         return;
419       }
420       if (BBsToEC[&BB] == -1) {
421         BBsToEC[&BB] = BBsToEC[DomBB];
422         Classes[BBsToEC[DomBB]].push_back(&BB);
423         return;
424       }
425       signed BBECNum = BBsToEC[&BB];
426       std::vector<BinaryBasicBlock *> DomEC = Classes[BBsToEC[DomBB]];
427       std::vector<BinaryBasicBlock *> BBEC = Classes[BBECNum];
428       for (BinaryBasicBlock *Block : DomEC) {
429         BBsToEC[Block] = BBECNum;
430         BBEC.push_back(Block);
431       }
432       DomEC.clear();
433     });
434   }
435 
436   for (std::vector<BinaryBasicBlock *> &Class : Classes) {
437     uint64_t Max = 0ULL;
438     for (BinaryBasicBlock *BB : Class)
439       Max = std::max(Max, BB->getExecutionCount());
440     for (BinaryBasicBlock *BB : Class)
441       BB->setExecutionCount(Max);
442   }
443 }
444 
estimateEdgeCounts(BinaryFunction & BF)445 void estimateEdgeCounts(BinaryFunction &BF) {
446   EdgeWeightMap PredEdgeWeights;
447   EdgeWeightMap SuccEdgeWeights;
448   if (!opts::IterativeGuess) {
449     computeEdgeWeights<Inverse<BinaryBasicBlock *>>(BF, PredEdgeWeights);
450     computeEdgeWeights<BinaryBasicBlock *>(BF, SuccEdgeWeights);
451   }
452   if (opts::EqualizeBBCounts) {
453     LLVM_DEBUG(BF.print(dbgs(), "before equalize BB counts", true));
454     auto Info = DataflowInfoManager(BF, nullptr, nullptr);
455     equalizeBBCounts(Info, BF);
456     LLVM_DEBUG(BF.print(dbgs(), "after equalize BB counts", true));
457   }
458   if (opts::IterativeGuess)
459     guessEdgeByIterativeApproach(BF);
460   else
461     guessEdgeByRelHotness(BF, /*UseSuccs=*/false, PredEdgeWeights,
462                           SuccEdgeWeights);
463   recalculateBBCounts(BF, /*AllEdges=*/false);
464 }
465 
solveMCF(BinaryFunction & BF,MCFCostFunction CostFunction)466 void solveMCF(BinaryFunction &BF, MCFCostFunction CostFunction) {
467   llvm_unreachable("not implemented");
468 }
469 
470 } // namespace bolt
471 } // namespace llvm
472