1 //===- bolt/Passes/MCF.cpp ------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements functions for solving minimum-cost flow problem. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/MCF.h" 14 #include "bolt/Core/BinaryFunction.h" 15 #include "bolt/Passes/DataflowInfoManager.h" 16 #include "bolt/Utils/CommandLineOpts.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/Support/CommandLine.h" 19 #include <algorithm> 20 #include <vector> 21 22 #undef DEBUG_TYPE 23 #define DEBUG_TYPE "mcf" 24 25 using namespace llvm; 26 using namespace bolt; 27 28 namespace opts { 29 30 extern cl::OptionCategory BoltOptCategory; 31 32 extern cl::opt<bool> TimeOpts; 33 34 static cl::opt<bool> IterativeGuess( 35 "iterative-guess", 36 cl::desc("in non-LBR mode, guess edge counts using iterative technique"), 37 cl::Hidden, cl::cat(BoltOptCategory)); 38 39 static cl::opt<bool> UseRArcs( 40 "mcf-use-rarcs", 41 cl::desc("in MCF, consider the possibility of cancelling flow to balance " 42 "edges"), 43 cl::Hidden, cl::cat(BoltOptCategory)); 44 45 } // namespace opts 46 47 namespace llvm { 48 namespace bolt { 49 50 namespace { 51 52 // Edge Weight Inference Heuristic 53 // 54 // We start by maintaining the invariant used in LBR mode where the sum of 55 // pred edges count is equal to the block execution count. This loop will set 56 // pred edges count by balancing its own execution count in different pred 57 // edges. The weight of each edge is guessed by looking at how hot each pred 58 // block is (in terms of samples). 59 // There are two caveats in this approach. One is for critical edges and the 60 // other is for self-referencing blocks (loops of 1 BB). For critical edges, 61 // we can't infer the hotness of them based solely on pred BBs execution 62 // count. For each critical edge we look at the pred BB, then look at its 63 // succs to adjust its weight. 64 // 65 // [ 60 ] [ 25 ] 66 // | \ | 67 // [ 10 ] [ 75 ] 68 // 69 // The illustration above shows a critical edge \. We wish to adjust bb count 70 // 60 to 50 to properly determine the weight of the critical edge to be 71 // 50 / 75. 72 // For self-referencing edges, we attribute its weight by subtracting the 73 // current BB execution count by the sum of predecessors count if this result 74 // is non-negative. 75 using EdgeWeightMap = 76 DenseMap<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>, 77 double>; 78 79 template <class NodeT> 80 void updateEdgeWeight(EdgeWeightMap &EdgeWeights, const BinaryBasicBlock *A, 81 const BinaryBasicBlock *B, double Weight); 82 83 template <> 84 void updateEdgeWeight<BinaryBasicBlock *>(EdgeWeightMap &EdgeWeights, 85 const BinaryBasicBlock *A, 86 const BinaryBasicBlock *B, 87 double Weight) { 88 EdgeWeights[std::make_pair(A, B)] = Weight; 89 return; 90 } 91 92 template <> 93 void updateEdgeWeight<Inverse<BinaryBasicBlock *>>(EdgeWeightMap &EdgeWeights, 94 const BinaryBasicBlock *A, 95 const BinaryBasicBlock *B, 96 double Weight) { 97 EdgeWeights[std::make_pair(B, A)] = Weight; 98 return; 99 } 100 101 template <class NodeT> 102 void computeEdgeWeights(BinaryBasicBlock *BB, EdgeWeightMap &EdgeWeights) { 103 typedef GraphTraits<NodeT> GraphT; 104 typedef GraphTraits<Inverse<NodeT>> InvTraits; 105 106 double TotalChildrenCount = 0.0; 107 SmallVector<double, 4> ChildrenExecCount; 108 // First pass computes total children execution count that directly 109 // contribute to this BB. 110 for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB), 111 E = GraphT::child_end(BB); 112 CI != E; ++CI) { 113 typename GraphT::NodeRef Child = *CI; 114 double ChildExecCount = Child->getExecutionCount(); 115 // Is self-reference? 116 if (Child == BB) { 117 ChildExecCount = 0.0; // will fill this in second pass 118 } else if (GraphT::child_end(BB) - GraphT::child_begin(BB) > 1 && 119 InvTraits::child_end(Child) - InvTraits::child_begin(Child) > 120 1) { 121 // Handle critical edges. This will cause a skew towards crit edges, but 122 // it is a quick solution. 123 double CritWeight = 0.0; 124 uint64_t Denominator = 0; 125 for (typename InvTraits::ChildIteratorType 126 II = InvTraits::child_begin(Child), 127 IE = InvTraits::child_end(Child); 128 II != IE; ++II) { 129 typename GraphT::NodeRef N = *II; 130 Denominator += N->getExecutionCount(); 131 if (N != BB) 132 continue; 133 CritWeight = N->getExecutionCount(); 134 } 135 if (Denominator) 136 CritWeight /= static_cast<double>(Denominator); 137 ChildExecCount *= CritWeight; 138 } 139 ChildrenExecCount.push_back(ChildExecCount); 140 TotalChildrenCount += ChildExecCount; 141 } 142 // Second pass fixes the weight of a possible self-reference edge 143 uint32_t ChildIndex = 0; 144 for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB), 145 E = GraphT::child_end(BB); 146 CI != E; ++CI) { 147 typename GraphT::NodeRef Child = *CI; 148 if (Child != BB) { 149 ++ChildIndex; 150 continue; 151 } 152 if (static_cast<double>(BB->getExecutionCount()) > TotalChildrenCount) { 153 ChildrenExecCount[ChildIndex] = 154 BB->getExecutionCount() - TotalChildrenCount; 155 TotalChildrenCount += ChildrenExecCount[ChildIndex]; 156 } 157 break; 158 } 159 // Third pass finally assigns weights to edges 160 ChildIndex = 0; 161 for (typename GraphT::ChildIteratorType CI = GraphT::child_begin(BB), 162 E = GraphT::child_end(BB); 163 CI != E; ++CI) { 164 typename GraphT::NodeRef Child = *CI; 165 double Weight = 1 / (GraphT::child_end(BB) - GraphT::child_begin(BB)); 166 if (TotalChildrenCount != 0.0) 167 Weight = ChildrenExecCount[ChildIndex] / TotalChildrenCount; 168 updateEdgeWeight<NodeT>(EdgeWeights, BB, Child, Weight); 169 ++ChildIndex; 170 } 171 } 172 173 template <class NodeT> 174 void computeEdgeWeights(BinaryFunction &BF, EdgeWeightMap &EdgeWeights) { 175 for (BinaryBasicBlock &BB : BF) 176 computeEdgeWeights<NodeT>(&BB, EdgeWeights); 177 } 178 179 /// Make BB count match the sum of all incoming edges. If AllEdges is true, 180 /// make it match max(SumPredEdges, SumSuccEdges). 181 void recalculateBBCounts(BinaryFunction &BF, bool AllEdges) { 182 for (BinaryBasicBlock &BB : BF) { 183 uint64_t TotalPredsEWeight = 0; 184 for (BinaryBasicBlock *Pred : BB.predecessors()) 185 TotalPredsEWeight += Pred->getBranchInfo(BB).Count; 186 187 if (TotalPredsEWeight > BB.getExecutionCount()) 188 BB.setExecutionCount(TotalPredsEWeight); 189 190 if (!AllEdges) 191 continue; 192 193 uint64_t TotalSuccsEWeight = 0; 194 for (BinaryBasicBlock::BinaryBranchInfo &BI : BB.branch_info()) 195 TotalSuccsEWeight += BI.Count; 196 197 if (TotalSuccsEWeight > BB.getExecutionCount()) 198 BB.setExecutionCount(TotalSuccsEWeight); 199 } 200 } 201 202 // This is our main edge count guessing heuristic. Look at predecessors and 203 // assign a proportionally higher count to pred edges coming from blocks with 204 // a higher execution count in comparison with the other predecessor blocks, 205 // making SumPredEdges match the current BB count. 206 // If "UseSucc" is true, apply the same logic to successor edges as well. Since 207 // some successor edges may already have assigned a count, only update it if the 208 // new count is higher. 209 void guessEdgeByRelHotness(BinaryFunction &BF, bool UseSucc, 210 EdgeWeightMap &PredEdgeWeights, 211 EdgeWeightMap &SuccEdgeWeights) { 212 for (BinaryBasicBlock &BB : BF) { 213 for (BinaryBasicBlock *Pred : BB.predecessors()) { 214 double RelativeExec = PredEdgeWeights[std::make_pair(Pred, &BB)]; 215 RelativeExec *= BB.getExecutionCount(); 216 BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(BB); 217 if (static_cast<uint64_t>(RelativeExec) > BI.Count) 218 BI.Count = static_cast<uint64_t>(RelativeExec); 219 } 220 221 if (!UseSucc) 222 continue; 223 224 auto BI = BB.branch_info_begin(); 225 for (BinaryBasicBlock *Succ : BB.successors()) { 226 double RelativeExec = SuccEdgeWeights[std::make_pair(&BB, Succ)]; 227 RelativeExec *= BB.getExecutionCount(); 228 if (static_cast<uint64_t>(RelativeExec) > BI->Count) 229 BI->Count = static_cast<uint64_t>(RelativeExec); 230 ++BI; 231 } 232 } 233 } 234 235 using ArcSet = 236 DenseSet<std::pair<const BinaryBasicBlock *, const BinaryBasicBlock *>>; 237 238 /// Predecessor edges version of guessEdgeByIterativeApproach. GuessedArcs has 239 /// all edges we already established their count. Try to guess the count of 240 /// the remaining edge, if there is only one to guess, and return true if we 241 /// were able to guess. 242 bool guessPredEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) { 243 if (BB->pred_size() == 0) 244 return false; 245 246 uint64_t TotalPredCount = 0; 247 unsigned NumGuessedEdges = 0; 248 for (BinaryBasicBlock *Pred : BB->predecessors()) { 249 if (GuessedArcs.count(std::make_pair(Pred, BB))) 250 ++NumGuessedEdges; 251 TotalPredCount += Pred->getBranchInfo(*BB).Count; 252 } 253 254 if (NumGuessedEdges != BB->pred_size() - 1) 255 return false; 256 257 int64_t Guessed = 258 static_cast<int64_t>(BB->getExecutionCount()) - TotalPredCount; 259 if (Guessed < 0) 260 Guessed = 0; 261 262 for (BinaryBasicBlock *Pred : BB->predecessors()) { 263 if (GuessedArcs.count(std::make_pair(Pred, BB))) 264 continue; 265 266 Pred->getBranchInfo(*BB).Count = Guessed; 267 return true; 268 } 269 llvm_unreachable("Expected unguessed arc"); 270 } 271 272 /// Successor edges version of guessEdgeByIterativeApproach. GuessedArcs has 273 /// all edges we already established their count. Try to guess the count of 274 /// the remaining edge, if there is only one to guess, and return true if we 275 /// were able to guess. 276 bool guessSuccEdgeCounts(BinaryBasicBlock *BB, ArcSet &GuessedArcs) { 277 if (BB->succ_size() == 0) 278 return false; 279 280 uint64_t TotalSuccCount = 0; 281 unsigned NumGuessedEdges = 0; 282 auto BI = BB->branch_info_begin(); 283 for (BinaryBasicBlock *Succ : BB->successors()) { 284 if (GuessedArcs.count(std::make_pair(BB, Succ))) 285 ++NumGuessedEdges; 286 TotalSuccCount += BI->Count; 287 ++BI; 288 } 289 290 if (NumGuessedEdges != BB->succ_size() - 1) 291 return false; 292 293 int64_t Guessed = 294 static_cast<int64_t>(BB->getExecutionCount()) - TotalSuccCount; 295 if (Guessed < 0) 296 Guessed = 0; 297 298 BI = BB->branch_info_begin(); 299 for (BinaryBasicBlock *Succ : BB->successors()) { 300 if (GuessedArcs.count(std::make_pair(BB, Succ))) { 301 ++BI; 302 continue; 303 } 304 305 BI->Count = Guessed; 306 GuessedArcs.insert(std::make_pair(BB, Succ)); 307 return true; 308 } 309 llvm_unreachable("Expected unguessed arc"); 310 } 311 312 /// Guess edge count whenever we have only one edge (pred or succ) left 313 /// to guess. Then make its count equal to BB count minus all other edge 314 /// counts we already know their count. Repeat this until there is no 315 /// change. 316 void guessEdgeByIterativeApproach(BinaryFunction &BF) { 317 ArcSet KnownArcs; 318 bool Changed = false; 319 320 do { 321 Changed = false; 322 for (BinaryBasicBlock &BB : BF) { 323 if (guessPredEdgeCounts(&BB, KnownArcs)) 324 Changed = true; 325 if (guessSuccEdgeCounts(&BB, KnownArcs)) 326 Changed = true; 327 } 328 } while (Changed); 329 330 // Guess count for non-inferred edges 331 for (BinaryBasicBlock &BB : BF) { 332 for (BinaryBasicBlock *Pred : BB.predecessors()) { 333 if (KnownArcs.count(std::make_pair(Pred, &BB))) 334 continue; 335 BinaryBasicBlock::BinaryBranchInfo &BI = Pred->getBranchInfo(BB); 336 BI.Count = 337 std::min(Pred->getExecutionCount(), BB.getExecutionCount()) / 2; 338 KnownArcs.insert(std::make_pair(Pred, &BB)); 339 } 340 auto BI = BB.branch_info_begin(); 341 for (BinaryBasicBlock *Succ : BB.successors()) { 342 if (KnownArcs.count(std::make_pair(&BB, Succ))) { 343 ++BI; 344 continue; 345 } 346 BI->Count = 347 std::min(BB.getExecutionCount(), Succ->getExecutionCount()) / 2; 348 KnownArcs.insert(std::make_pair(&BB, Succ)); 349 break; 350 } 351 } 352 } 353 354 /// Associate each basic block with the BinaryLoop object corresponding to the 355 /// innermost loop containing this block. 356 DenseMap<const BinaryBasicBlock *, const BinaryLoop *> 357 createLoopNestLevelMap(BinaryFunction &BF) { 358 DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel; 359 const BinaryLoopInfo &BLI = BF.getLoopInfo(); 360 361 for (BinaryBasicBlock &BB : BF) 362 LoopNestLevel[&BB] = BLI[&BB]; 363 364 return LoopNestLevel; 365 } 366 367 } // end anonymous namespace 368 369 void equalizeBBCounts(DataflowInfoManager &Info, BinaryFunction &BF) { 370 if (BF.begin() == BF.end()) 371 return; 372 373 DominatorAnalysis<false> &DA = Info.getDominatorAnalysis(); 374 DominatorAnalysis<true> &PDA = Info.getPostDominatorAnalysis(); 375 auto &InsnToBB = Info.getInsnToBBMap(); 376 // These analyses work at the instruction granularity, but we really only need 377 // basic block granularity here. So we'll use a set of visited edges to avoid 378 // revisiting the same BBs again and again. 379 DenseMap<const BinaryBasicBlock *, std::set<const BinaryBasicBlock *>> 380 Visited; 381 // Equivalence classes mapping. Each equivalence class is defined by the set 382 // of BBs that obeys the aforementioned properties. 383 DenseMap<const BinaryBasicBlock *, signed> BBsToEC; 384 std::vector<std::vector<BinaryBasicBlock *>> Classes; 385 386 BF.calculateLoopInfo(); 387 DenseMap<const BinaryBasicBlock *, const BinaryLoop *> LoopNestLevel = 388 createLoopNestLevelMap(BF); 389 390 for (BinaryBasicBlock &BB : BF) 391 BBsToEC[&BB] = -1; 392 393 for (BinaryBasicBlock &BB : BF) { 394 auto I = BB.begin(); 395 if (I == BB.end()) 396 continue; 397 398 DA.doForAllDominators(*I, [&](const MCInst &DomInst) { 399 BinaryBasicBlock *DomBB = InsnToBB[&DomInst]; 400 if (Visited[DomBB].count(&BB)) 401 return; 402 Visited[DomBB].insert(&BB); 403 if (!PDA.doesADominateB(*I, DomInst)) 404 return; 405 if (LoopNestLevel[&BB] != LoopNestLevel[DomBB]) 406 return; 407 if (BBsToEC[DomBB] == -1 && BBsToEC[&BB] == -1) { 408 BBsToEC[DomBB] = Classes.size(); 409 BBsToEC[&BB] = Classes.size(); 410 Classes.emplace_back(); 411 Classes.back().push_back(DomBB); 412 Classes.back().push_back(&BB); 413 return; 414 } 415 if (BBsToEC[DomBB] == -1) { 416 BBsToEC[DomBB] = BBsToEC[&BB]; 417 Classes[BBsToEC[&BB]].push_back(DomBB); 418 return; 419 } 420 if (BBsToEC[&BB] == -1) { 421 BBsToEC[&BB] = BBsToEC[DomBB]; 422 Classes[BBsToEC[DomBB]].push_back(&BB); 423 return; 424 } 425 signed BBECNum = BBsToEC[&BB]; 426 std::vector<BinaryBasicBlock *> DomEC = Classes[BBsToEC[DomBB]]; 427 std::vector<BinaryBasicBlock *> BBEC = Classes[BBECNum]; 428 for (BinaryBasicBlock *Block : DomEC) { 429 BBsToEC[Block] = BBECNum; 430 BBEC.push_back(Block); 431 } 432 DomEC.clear(); 433 }); 434 } 435 436 for (std::vector<BinaryBasicBlock *> &Class : Classes) { 437 uint64_t Max = 0ULL; 438 for (BinaryBasicBlock *BB : Class) 439 Max = std::max(Max, BB->getExecutionCount()); 440 for (BinaryBasicBlock *BB : Class) 441 BB->setExecutionCount(Max); 442 } 443 } 444 445 void estimateEdgeCounts(BinaryFunction &BF) { 446 EdgeWeightMap PredEdgeWeights; 447 EdgeWeightMap SuccEdgeWeights; 448 if (!opts::IterativeGuess) { 449 computeEdgeWeights<Inverse<BinaryBasicBlock *>>(BF, PredEdgeWeights); 450 computeEdgeWeights<BinaryBasicBlock *>(BF, SuccEdgeWeights); 451 } 452 if (opts::EqualizeBBCounts) { 453 LLVM_DEBUG(BF.print(dbgs(), "before equalize BB counts", true)); 454 auto Info = DataflowInfoManager(BF, nullptr, nullptr); 455 equalizeBBCounts(Info, BF); 456 LLVM_DEBUG(BF.print(dbgs(), "after equalize BB counts", true)); 457 } 458 if (opts::IterativeGuess) 459 guessEdgeByIterativeApproach(BF); 460 else 461 guessEdgeByRelHotness(BF, /*UseSuccs=*/false, PredEdgeWeights, 462 SuccEdgeWeights); 463 recalculateBBCounts(BF, /*AllEdges=*/false); 464 } 465 466 void solveMCF(BinaryFunction &BF, MCFCostFunction CostFunction) { 467 llvm_unreachable("not implemented"); 468 } 469 470 } // namespace bolt 471 } // namespace llvm 472