1 //===- bolt/Passes/MCF.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements functions for solving minimum-cost flow problem.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "bolt/Passes/MCF.h"
14 #include "bolt/Core/BinaryFunction.h"
15 #include "bolt/Passes/DataflowInfoManager.h"
16 #include "bolt/Utils/CommandLineOpts.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/Support/CommandLine.h"
19 #include <algorithm>
20 #include <vector>
21
22 #undef DEBUG_TYPE
23 #define DEBUG_TYPE "mcf"
24
25 using namespace llvm;
26 using namespace bolt;
27
28 namespace opts {
29
30 extern cl::OptionCategory BoltOptCategory;
31
32 extern cl::opt<bool> TimeOpts;
33
34 static cl::opt<bool> IterativeGuess(
35 "iterative-guess",
36 cl::desc("in non-LBR mode, guess edge counts using iterative technique"),
37 cl::Hidden, cl::cat(BoltOptCategory));
38
39 static cl::opt<bool> UseRArcs(
40 "mcf-use-rarcs",
41 cl::desc("in MCF, consider the possibility of cancelling flow to balance "
42 "edges"),
43 cl::Hidden, cl::cat(BoltOptCategory));
44
45 } // namespace opts
46
47 namespace llvm {
48 namespace bolt {
49
50 namespace {
51
52 // Edge Weight Inference Heuristic
53 //
54 // We start by maintaining the invariant used in LBR mode where the sum of
55 // pred edges count is equal to the block execution count. This loop will set
56 // pred edges count by balancing its own execution count in different pred
57 // edges. The weight of each edge is guessed by looking at how hot each pred
58 // block is (in terms of samples).
59 // There are two caveats in this approach. One is for critical edges and the
60 // other is for self-referencing blocks (loops of 1 BB). For critical edges,
61 // we can't infer the hotness of them based solely on pred BBs execution
62 // count. For each critical edge we look at the pred BB, then look at its
63 // succs to adjust its weight.
64 //
65 // [ 60 ] [ 25 ]
66 // | \ |
67 // [ 10 ] [ 75 ]
68 //
69 // The illustration above shows a critical edge \. We wish to adjust bb count
70 // 60 to 50 to properly determine the weight of the critical edge to be
71 // 50 / 75.
72 // For self-referencing edges, we attribute its weight by subtracting the
73 // current BB execution count by the sum of predecessors count if this result
74 // is non-negative.
75 using EdgeWeightMap =
76 DenseMap<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>,
77 double>;
78
79 template <class NodeT>
80 void updateEdgeWeight(EdgeWeightMap &EdgeWeights, const BinaryBasicBlock *A,
81 const BinaryBasicBlock *B, double Weight);
82
83 template <>
updateEdgeWeight(EdgeWeightMap & EdgeWeights,const BinaryBasicBlock * A,const BinaryBasicBlock * B,double Weight)84 void updateEdgeWeight<BinaryBasicBlock *>(EdgeWeightMap &EdgeWeights,
85 const BinaryBasicBlock *A,
86 const BinaryBasicBlock *B,
87 double Weight) {
88 EdgeWeights[std::make_pair(A, B)] = Weight;
89 return;
90 }
91
92 template <>
updateEdgeWeight(EdgeWeightMap & EdgeWeights,const BinaryBasicBlock * A,const BinaryBasicBlock * B,double Weight)93 void updateEdgeWeight<Inverse<BinaryBasicBlock *>>(EdgeWeightMap &EdgeWeights,
94 const BinaryBasicBlock *A,
95 const BinaryBasicBlock *B,
96 double Weight) {
97 EdgeWeights[std::make_pair(B, A)] = Weight;
98 return;
99 }
100
101 template <class NodeT>
computeEdgeWeights(BinaryBasicBlock * BB,EdgeWeightMap & EdgeWeights)102 void computeEdgeWeights(BinaryBasicBlock *BB, EdgeWeightMap &EdgeWeights) {
103 typedef GraphTraits<NodeT> GraphT;
104 typedef GraphTraits<Inverse<NodeT>> InvTraits;
105
106 double TotalChildrenCount = 0.0;
107 SmallVector<double, 4> ChildrenExecCount;
108 // First pass computes total children execution count that directly
109 // contribute to this BB.
110 for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
111 E = GraphT::child_end(BB);
112 CI != E; ++CI) {
113 typename GraphT::NodeRef Child = *CI;
114 double ChildExecCount = Child->getExecutionCount();
115 // Is self-reference?
116 if (Child == BB) {
117 ChildExecCount = 0.0; // will fill this in second pass
118 } else if (GraphT::child_end(BB) - GraphT::child_begin(BB) > 1 &&
119 InvTraits::child_end(Child) - InvTraits::child_begin(Child) >
120 1) {
121 // Handle critical edges. This will cause a skew towards crit edges, but
122 // it is a quick solution.
123 double CritWeight = 0.0;
124 uint64_t Denominator = 0;
125 for (typename InvTraits::ChildIteratorType
126 II = InvTraits::child_begin(Child),
127 IE = InvTraits::child_end(Child);
128 II != IE; ++II) {
129 typename GraphT::NodeRef N = *II;
130 Denominator += N->getExecutionCount();
131 if (N != BB)
132 continue;
133 CritWeight = N->getExecutionCount();
134 }
135 if (Denominator)
136 CritWeight /= static_cast<double>(Denominator);
137 ChildExecCount *= CritWeight;
138 }
139 ChildrenExecCount.push_back(ChildExecCount);
140 TotalChildrenCount += ChildExecCount;
141 }
142 // Second pass fixes the weight of a possible self-reference edge
143 uint32_t ChildIndex = 0;
144 for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
145 E = GraphT::child_end(BB);
146 CI != E; ++CI) {
147 typename GraphT::NodeRef Child = *CI;
148 if (Child != BB) {
149 ++ChildIndex;
150 continue;
151 }
152 if (static_cast<double>(BB->getExecutionCount()) > TotalChildrenCount) {
153 ChildrenExecCount[ChildIndex] =
154 BB->getExecutionCount() - TotalChildrenCount;
155 TotalChildrenCount += ChildrenExecCount[ChildIndex];
156 }
157 break;
158 }
159 // Third pass finally assigns weights to edges
160 ChildIndex = 0;
161 for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB),
162 E = GraphT::child_end(BB);
163 CI != E; ++CI) {
164 typename GraphT::NodeRef Child = *CI;
165 double Weight = 1 / (GraphT::child_end(BB) - GraphT::child_begin(BB));
166 if (TotalChildrenCount != 0.0)
167 Weight = ChildrenExecCount[ChildIndex] / TotalChildrenCount;
168 updateEdgeWeight<NodeT>(EdgeWeights, BB, Child, Weight);
169 ++ChildIndex;
170 }
171 }
172
173 template <class NodeT>
computeEdgeWeights(BinaryFunction & BF,EdgeWeightMap & EdgeWeights)174 void computeEdgeWeights(BinaryFunction &BF, EdgeWeightMap &EdgeWeights) {
175 for (BinaryBasicBlock &BB : BF)
176 computeEdgeWeights<NodeT>(&BB, EdgeWeights);
177 }
178
179 /// Make BB count match the sum of all incoming edges. If AllEdges is true,
180 /// make it match max(SumPredEdges, SumSuccEdges).
recalculateBBCounts(BinaryFunction & BF,bool AllEdges)181 void recalculateBBCounts(BinaryFunction &BF, bool AllEdges) {
182 for (BinaryBasicBlock &BB : BF) {
183 uint64_t TotalPredsEWeight = 0;
184 for (BinaryBasicBlock *Pred : BB.predecessors())
185 TotalPredsEWeight += Pred->getBranchInfo(BB).Count;
186
187 if (TotalPredsEWeight > BB.getExecutionCount())
188 BB.setExecutionCount(TotalPredsEWeight);
189
190 if (!AllEdges)
191 continue;
192
193 uint64_t TotalSuccsEWeight = 0;
194 for (BinaryBasicBlock::BinaryBranchInfo &BI : BB.branch_info())
195 TotalSuccsEWeight += BI.Count;
196
197 if (TotalSuccsEWeight > BB.getExecutionCount())
198 BB.setExecutionCount(TotalSuccsEWeight);
199 }
200 }
201
202 // This is our main edge count guessing heuristic. Look at predecessors and
203 // assign a proportionally higher count to pred edges coming from blocks with
204 // a higher execution count in comparison with the other predecessor blocks,
205 // making SumPredEdges match the current BB count.
206 // If "UseSucc" is true, apply the same logic to successor edges as well. Since
207 // some successor edges may already have assigned a count, only update it if the
208 // new count is higher.
guessEdgeByRelHotness(BinaryFunction & BF,bool UseSucc,EdgeWeightMap & PredEdgeWeights,EdgeWeightMap & SuccEdgeWeights)209 void guessEdgeByRelHotness(BinaryFunction &BF, bool UseSucc,
210 EdgeWeightMap &PredEdgeWeights,
211 EdgeWeightMap &SuccEdgeWeights) {
212 for (BinaryBasicBlock &BB : BF) {
213 for (BinaryBasicBlock *Pred : BB.predecessors()) {
214 double RelativeExec = PredEdgeWeights[std::make_pair(Pred, &BB)];
215 RelativeExec *= BB.getExecutionCount();
216 BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(BB);
217 if (static_cast<uint64_t>(RelativeExec) > BI.Count)
218 BI.Count = static_cast<uint64_t>(RelativeExec);
219 }
220
221 if (!UseSucc)
222 continue;
223
224 auto BI = BB.branch_info_begin();
225 for (BinaryBasicBlock *Succ : BB.successors()) {
226 double RelativeExec = SuccEdgeWeights[std::make_pair(&BB, Succ)];
227 RelativeExec *= BB.getExecutionCount();
228 if (static_cast<uint64_t>(RelativeExec) > BI->Count)
229 BI->Count = static_cast<uint64_t>(RelativeExec);
230 ++BI;
231 }
232 }
233 }
234
235 using ArcSet =
236 DenseSet<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>>;
237
238 /// Predecessor edges version of guessEdgeByIterativeApproach. GuessedArcs has
239 /// all edges we already established their count. Try to guess the count of
240 /// the remaining edge, if there is only one to guess, and return true if we
241 /// were able to guess.
guessPredEdgeCounts(BinaryBasicBlock * BB,ArcSet & GuessedArcs)242 bool guessPredEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) {
243 if (BB->pred_size() == 0)
244 return false;
245
246 uint64_t TotalPredCount = 0;
247 unsigned NumGuessedEdges = 0;
248 for (BinaryBasicBlock *Pred : BB->predecessors()) {
249 if (GuessedArcs.count(std::make_pair(Pred, BB)))
250 ++NumGuessedEdges;
251 TotalPredCount += Pred->getBranchInfo(*BB).Count;
252 }
253
254 if (NumGuessedEdges != BB->pred_size() - 1)
255 return false;
256
257 int64_t Guessed =
258 static_cast<int64_t>(BB->getExecutionCount()) - TotalPredCount;
259 if (Guessed < 0)
260 Guessed = 0;
261
262 for (BinaryBasicBlock *Pred : BB->predecessors()) {
263 if (GuessedArcs.count(std::make_pair(Pred, BB)))
264 continue;
265
266 Pred->getBranchInfo(*BB).Count = Guessed;
267 return true;
268 }
269 llvm_unreachable("Expected unguessed arc");
270 }
271
272 /// Successor edges version of guessEdgeByIterativeApproach. GuessedArcs has
273 /// all edges we already established their count. Try to guess the count of
274 /// the remaining edge, if there is only one to guess, and return true if we
275 /// were able to guess.
guessSuccEdgeCounts(BinaryBasicBlock * BB,ArcSet & GuessedArcs)276 bool guessSuccEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) {
277 if (BB->succ_size() == 0)
278 return false;
279
280 uint64_t TotalSuccCount = 0;
281 unsigned NumGuessedEdges = 0;
282 auto BI = BB->branch_info_begin();
283 for (BinaryBasicBlock *Succ : BB->successors()) {
284 if (GuessedArcs.count(std::make_pair(BB, Succ)))
285 ++NumGuessedEdges;
286 TotalSuccCount += BI->Count;
287 ++BI;
288 }
289
290 if (NumGuessedEdges != BB->succ_size() - 1)
291 return false;
292
293 int64_t Guessed =
294 static_cast<int64_t>(BB->getExecutionCount()) - TotalSuccCount;
295 if (Guessed < 0)
296 Guessed = 0;
297
298 BI = BB->branch_info_begin();
299 for (BinaryBasicBlock *Succ : BB->successors()) {
300 if (GuessedArcs.count(std::make_pair(BB, Succ))) {
301 ++BI;
302 continue;
303 }
304
305 BI->Count = Guessed;
306 GuessedArcs.insert(std::make_pair(BB, Succ));
307 return true;
308 }
309 llvm_unreachable("Expected unguessed arc");
310 }
311
312 /// Guess edge count whenever we have only one edge (pred or succ) left
313 /// to guess. Then make its count equal to BB count minus all other edge
314 /// counts we already know their count. Repeat this until there is no
315 /// change.
guessEdgeByIterativeApproach(BinaryFunction & BF)316 void guessEdgeByIterativeApproach(BinaryFunction &BF) {
317 ArcSet KnownArcs;
318 bool Changed = false;
319
320 do {
321 Changed = false;
322 for (BinaryBasicBlock &BB : BF) {
323 if (guessPredEdgeCounts(&BB, KnownArcs))
324 Changed = true;
325 if (guessSuccEdgeCounts(&BB, KnownArcs))
326 Changed = true;
327 }
328 } while (Changed);
329
330 // Guess count for non-inferred edges
331 for (BinaryBasicBlock &BB : BF) {
332 for (BinaryBasicBlock *Pred : BB.predecessors()) {
333 if (KnownArcs.count(std::make_pair(Pred, &BB)))
334 continue;
335 BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(BB);
336 BI.Count =
337 std::min(Pred->getExecutionCount(), BB.getExecutionCount()) / 2;
338 KnownArcs.insert(std::make_pair(Pred, &BB));
339 }
340 auto BI = BB.branch_info_begin();
341 for (BinaryBasicBlock *Succ : BB.successors()) {
342 if (KnownArcs.count(std::make_pair(&BB, Succ))) {
343 ++BI;
344 continue;
345 }
346 BI->Count =
347 std::min(BB.getExecutionCount(), Succ->getExecutionCount()) / 2;
348 KnownArcs.insert(std::make_pair(&BB, Succ));
349 break;
350 }
351 }
352 }
353
354 /// Associate each basic block with the BinaryLoop object corresponding to the
355 /// innermost loop containing this block.
356 DenseMap<const BinaryBasicBlock *, const BinaryLoop *>
createLoopNestLevelMap(BinaryFunction & BF)357 createLoopNestLevelMap(BinaryFunction &BF) {
358 DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel;
359 const BinaryLoopInfo &BLI = BF.getLoopInfo();
360
361 for (BinaryBasicBlock &BB : BF)
362 LoopNestLevel[&BB] = BLI[&BB];
363
364 return LoopNestLevel;
365 }
366
367 } // end anonymous namespace
368
equalizeBBCounts(DataflowInfoManager & Info,BinaryFunction & BF)369 void equalizeBBCounts(DataflowInfoManager &Info, BinaryFunction &BF) {
370 if (BF.begin() == BF.end())
371 return;
372
373 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis();
374 DominatorAnalysis<true> &PDA = Info.getPostDominatorAnalysis();
375 auto &InsnToBB = Info.getInsnToBBMap();
376 // These analyses work at the instruction granularity, but we really only need
377 // basic block granularity here. So we'll use a set of visited edges to avoid
378 // revisiting the same BBs again and again.
379 DenseMap<const BinaryBasicBlock *, std::set<const BinaryBasicBlock *>>
380 Visited;
381 // Equivalence classes mapping. Each equivalence class is defined by the set
382 // of BBs that obeys the aforementioned properties.
383 DenseMap<const BinaryBasicBlock *, signed> BBsToEC;
384 std::vector<std::vector<BinaryBasicBlock *>> Classes;
385
386 BF.calculateLoopInfo();
387 DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel =
388 createLoopNestLevelMap(BF);
389
390 for (BinaryBasicBlock &BB : BF)
391 BBsToEC[&BB] = -1;
392
393 for (BinaryBasicBlock &BB : BF) {
394 auto I = BB.begin();
395 if (I == BB.end())
396 continue;
397
398 DA.doForAllDominators(*I, [&](const MCInst &DomInst) {
399 BinaryBasicBlock *DomBB = InsnToBB[&DomInst];
400 if (Visited[DomBB].count(&BB))
401 return;
402 Visited[DomBB].insert(&BB);
403 if (!PDA.doesADominateB(*I, DomInst))
404 return;
405 if (LoopNestLevel[&BB] != LoopNestLevel[DomBB])
406 return;
407 if (BBsToEC[DomBB] == -1 && BBsToEC[&BB] == -1) {
408 BBsToEC[DomBB] = Classes.size();
409 BBsToEC[&BB] = Classes.size();
410 Classes.emplace_back();
411 Classes.back().push_back(DomBB);
412 Classes.back().push_back(&BB);
413 return;
414 }
415 if (BBsToEC[DomBB] == -1) {
416 BBsToEC[DomBB] = BBsToEC[&BB];
417 Classes[BBsToEC[&BB]].push_back(DomBB);
418 return;
419 }
420 if (BBsToEC[&BB] == -1) {
421 BBsToEC[&BB] = BBsToEC[DomBB];
422 Classes[BBsToEC[DomBB]].push_back(&BB);
423 return;
424 }
425 signed BBECNum = BBsToEC[&BB];
426 std::vector<BinaryBasicBlock *> DomEC = Classes[BBsToEC[DomBB]];
427 std::vector<BinaryBasicBlock *> BBEC = Classes[BBECNum];
428 for (BinaryBasicBlock *Block : DomEC) {
429 BBsToEC[Block] = BBECNum;
430 BBEC.push_back(Block);
431 }
432 DomEC.clear();
433 });
434 }
435
436 for (std::vector<BinaryBasicBlock *> &Class : Classes) {
437 uint64_t Max = 0ULL;
438 for (BinaryBasicBlock *BB : Class)
439 Max = std::max(Max, BB->getExecutionCount());
440 for (BinaryBasicBlock *BB : Class)
441 BB->setExecutionCount(Max);
442 }
443 }
444
estimateEdgeCounts(BinaryFunction & BF)445 void estimateEdgeCounts(BinaryFunction &BF) {
446 EdgeWeightMap PredEdgeWeights;
447 EdgeWeightMap SuccEdgeWeights;
448 if (!opts::IterativeGuess) {
449 computeEdgeWeights<Inverse<BinaryBasicBlock *>>(BF, PredEdgeWeights);
450 computeEdgeWeights<BinaryBasicBlock *>(BF, SuccEdgeWeights);
451 }
452 if (opts::EqualizeBBCounts) {
453 LLVM_DEBUG(BF.print(dbgs(), "before equalize BB counts", true));
454 auto Info = DataflowInfoManager(BF, nullptr, nullptr);
455 equalizeBBCounts(Info, BF);
456 LLVM_DEBUG(BF.print(dbgs(), "after equalize BB counts", true));
457 }
458 if (opts::IterativeGuess)
459 guessEdgeByIterativeApproach(BF);
460 else
461 guessEdgeByRelHotness(BF, /*UseSuccs=*/false, PredEdgeWeights,
462 SuccEdgeWeights);
463 recalculateBBCounts(BF, /*AllEdges=*/false);
464 }
465
solveMCF(BinaryFunction & BF,MCFCostFunction CostFunction)466 void solveMCF(BinaryFunction &BF, MCFCostFunction CostFunction) {
467 llvm_unreachable("not implemented");
468 }
469
470 } // namespace bolt
471 } // namespace llvm
472