1 //===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Loops should be simplified before this analysis. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Analysis/BlockFrequencyInfoImpl.h" 15 #include "llvm/ADT/SCCIterator.h" 16 #include "llvm/IR/Function.h" 17 #include "llvm/Support/raw_ostream.h" 18 #include <numeric> 19 20 using namespace llvm; 21 using namespace llvm::bfi_detail; 22 23 #define DEBUG_TYPE "block-freq" 24 25 ScaledNumber<uint64_t> BlockMass::toScaled() const { 26 if (isFull()) 27 return ScaledNumber<uint64_t>(1, 0); 28 return ScaledNumber<uint64_t>(getMass() + 1, -64); 29 } 30 31 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 32 LLVM_DUMP_METHOD void BlockMass::dump() const { print(dbgs()); } 33 #endif 34 35 static char getHexDigit(int N) { 36 assert(N < 16); 37 if (N < 10) 38 return '0' + N; 39 return 'a' + N - 10; 40 } 41 42 raw_ostream &BlockMass::print(raw_ostream &OS) const { 43 for (int Digits = 0; Digits < 16; ++Digits) 44 OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf); 45 return OS; 46 } 47 48 namespace { 49 50 typedef BlockFrequencyInfoImplBase::BlockNode BlockNode; 51 typedef BlockFrequencyInfoImplBase::Distribution Distribution; 52 typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList; 53 typedef BlockFrequencyInfoImplBase::Scaled64 Scaled64; 54 typedef BlockFrequencyInfoImplBase::LoopData LoopData; 55 typedef BlockFrequencyInfoImplBase::Weight Weight; 56 typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData; 57 58 /// \brief Dithering mass distributer. 59 /// 60 /// This class splits up a single mass into portions by weight, dithering to 61 /// spread out error. No mass is lost. The dithering precision depends on the 62 /// precision of the product of \a BlockMass and \a BranchProbability. 63 /// 64 /// The distribution algorithm follows. 65 /// 66 /// 1. Initialize by saving the sum of the weights in \a RemWeight and the 67 /// mass to distribute in \a RemMass. 68 /// 69 /// 2. For each portion: 70 /// 71 /// 1. Construct a branch probability, P, as the portion's weight divided 72 /// by the current value of \a RemWeight. 73 /// 2. Calculate the portion's mass as \a RemMass times P. 74 /// 3. Update \a RemWeight and \a RemMass at each portion by subtracting 75 /// the current portion's weight and mass. 76 struct DitheringDistributer { 77 uint32_t RemWeight; 78 BlockMass RemMass; 79 80 DitheringDistributer(Distribution &Dist, const BlockMass &Mass); 81 82 BlockMass takeMass(uint32_t Weight); 83 }; 84 85 } // end anonymous namespace 86 87 DitheringDistributer::DitheringDistributer(Distribution &Dist, 88 const BlockMass &Mass) { 89 Dist.normalize(); 90 RemWeight = Dist.Total; 91 RemMass = Mass; 92 } 93 94 BlockMass DitheringDistributer::takeMass(uint32_t Weight) { 95 assert(Weight && "invalid weight"); 96 assert(Weight <= RemWeight); 97 BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight); 98 99 // Decrement totals (dither). 100 RemWeight -= Weight; 101 RemMass -= Mass; 102 return Mass; 103 } 104 105 void Distribution::add(const BlockNode &Node, uint64_t Amount, 106 Weight::DistType Type) { 107 assert(Amount && "invalid weight of 0"); 108 uint64_t NewTotal = Total + Amount; 109 110 // Check for overflow. It should be impossible to overflow twice. 111 bool IsOverflow = NewTotal < Total; 112 assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow"); 113 DidOverflow |= IsOverflow; 114 115 // Update the total. 116 Total = NewTotal; 117 118 // Save the weight. 119 Weights.push_back(Weight(Type, Node, Amount)); 120 } 121 122 static void combineWeight(Weight &W, const Weight &OtherW) { 123 assert(OtherW.TargetNode.isValid()); 124 if (!W.Amount) { 125 W = OtherW; 126 return; 127 } 128 assert(W.Type == OtherW.Type); 129 assert(W.TargetNode == OtherW.TargetNode); 130 assert(OtherW.Amount && "Expected non-zero weight"); 131 if (W.Amount > W.Amount + OtherW.Amount) 132 // Saturate on overflow. 133 W.Amount = UINT64_MAX; 134 else 135 W.Amount += OtherW.Amount; 136 } 137 138 static void combineWeightsBySorting(WeightList &Weights) { 139 // Sort so edges to the same node are adjacent. 140 std::sort(Weights.begin(), Weights.end(), 141 [](const Weight &L, 142 const Weight &R) { return L.TargetNode < R.TargetNode; }); 143 144 // Combine adjacent edges. 145 WeightList::iterator O = Weights.begin(); 146 for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E; 147 ++O, (I = L)) { 148 *O = *I; 149 150 // Find the adjacent weights to the same node. 151 for (++L; L != E && I->TargetNode == L->TargetNode; ++L) 152 combineWeight(*O, *L); 153 } 154 155 // Erase extra entries. 156 Weights.erase(O, Weights.end()); 157 } 158 159 static void combineWeightsByHashing(WeightList &Weights) { 160 // Collect weights into a DenseMap. 161 typedef DenseMap<BlockNode::IndexType, Weight> HashTable; 162 HashTable Combined(NextPowerOf2(2 * Weights.size())); 163 for (const Weight &W : Weights) 164 combineWeight(Combined[W.TargetNode.Index], W); 165 166 // Check whether anything changed. 167 if (Weights.size() == Combined.size()) 168 return; 169 170 // Fill in the new weights. 171 Weights.clear(); 172 Weights.reserve(Combined.size()); 173 for (const auto &I : Combined) 174 Weights.push_back(I.second); 175 } 176 177 static void combineWeights(WeightList &Weights) { 178 // Use a hash table for many successors to keep this linear. 179 if (Weights.size() > 128) { 180 combineWeightsByHashing(Weights); 181 return; 182 } 183 184 combineWeightsBySorting(Weights); 185 } 186 187 static uint64_t shiftRightAndRound(uint64_t N, int Shift) { 188 assert(Shift >= 0); 189 assert(Shift < 64); 190 if (!Shift) 191 return N; 192 return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1)); 193 } 194 195 void Distribution::normalize() { 196 // Early exit for termination nodes. 197 if (Weights.empty()) 198 return; 199 200 // Only bother if there are multiple successors. 201 if (Weights.size() > 1) 202 combineWeights(Weights); 203 204 // Early exit when combined into a single successor. 205 if (Weights.size() == 1) { 206 Total = 1; 207 Weights.front().Amount = 1; 208 return; 209 } 210 211 // Determine how much to shift right so that the total fits into 32-bits. 212 // 213 // If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1 214 // for each weight can cause a 32-bit overflow. 215 int Shift = 0; 216 if (DidOverflow) 217 Shift = 33; 218 else if (Total > UINT32_MAX) 219 Shift = 33 - countLeadingZeros(Total); 220 221 // Early exit if nothing needs to be scaled. 222 if (!Shift) { 223 // If we didn't overflow then combineWeights() shouldn't have changed the 224 // sum of the weights, but let's double-check. 225 assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0), 226 [](uint64_t Sum, const Weight &W) { 227 return Sum + W.Amount; 228 }) && 229 "Expected total to be correct"); 230 return; 231 } 232 233 // Recompute the total through accumulation (rather than shifting it) so that 234 // it's accurate after shifting and any changes combineWeights() made above. 235 Total = 0; 236 237 // Sum the weights to each node and shift right if necessary. 238 for (Weight &W : Weights) { 239 // Scale down below UINT32_MAX. Since Shift is larger than necessary, we 240 // can round here without concern about overflow. 241 assert(W.TargetNode.isValid()); 242 W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift)); 243 assert(W.Amount <= UINT32_MAX); 244 245 // Update the total. 246 Total += W.Amount; 247 } 248 assert(Total <= UINT32_MAX); 249 } 250 251 void BlockFrequencyInfoImplBase::clear() { 252 // Swap with a default-constructed std::vector, since std::vector<>::clear() 253 // does not actually clear heap storage. 254 std::vector<FrequencyData>().swap(Freqs); 255 std::vector<WorkingData>().swap(Working); 256 Loops.clear(); 257 } 258 259 /// \brief Clear all memory not needed downstream. 260 /// 261 /// Releases all memory not used downstream. In particular, saves Freqs. 262 static void cleanup(BlockFrequencyInfoImplBase &BFI) { 263 std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs)); 264 BFI.clear(); 265 BFI.Freqs = std::move(SavedFreqs); 266 } 267 268 bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist, 269 const LoopData *OuterLoop, 270 const BlockNode &Pred, 271 const BlockNode &Succ, 272 uint64_t Weight) { 273 if (!Weight) 274 Weight = 1; 275 276 auto isLoopHeader = [&OuterLoop](const BlockNode &Node) { 277 return OuterLoop && OuterLoop->isHeader(Node); 278 }; 279 280 BlockNode Resolved = Working[Succ.Index].getResolvedNode(); 281 282 #ifndef NDEBUG 283 auto debugSuccessor = [&](const char *Type) { 284 dbgs() << " =>" 285 << " [" << Type << "] weight = " << Weight; 286 if (!isLoopHeader(Resolved)) 287 dbgs() << ", succ = " << getBlockName(Succ); 288 if (Resolved != Succ) 289 dbgs() << ", resolved = " << getBlockName(Resolved); 290 dbgs() << "\n"; 291 }; 292 (void)debugSuccessor; 293 #endif 294 295 if (isLoopHeader(Resolved)) { 296 DEBUG(debugSuccessor("backedge")); 297 Dist.addBackedge(Resolved, Weight); 298 return true; 299 } 300 301 if (Working[Resolved.Index].getContainingLoop() != OuterLoop) { 302 DEBUG(debugSuccessor(" exit ")); 303 Dist.addExit(Resolved, Weight); 304 return true; 305 } 306 307 if (Resolved < Pred) { 308 if (!isLoopHeader(Pred)) { 309 // If OuterLoop is an irreducible loop, we can't actually handle this. 310 assert((!OuterLoop || !OuterLoop->isIrreducible()) && 311 "unhandled irreducible control flow"); 312 313 // Irreducible backedge. Abort. 314 DEBUG(debugSuccessor("abort!!!")); 315 return false; 316 } 317 318 // If "Pred" is a loop header, then this isn't really a backedge; rather, 319 // OuterLoop must be irreducible. These false backedges can come only from 320 // secondary loop headers. 321 assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) && 322 "unhandled irreducible control flow"); 323 } 324 325 DEBUG(debugSuccessor(" local ")); 326 Dist.addLocal(Resolved, Weight); 327 return true; 328 } 329 330 bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist( 331 const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) { 332 // Copy the exit map into Dist. 333 for (const auto &I : Loop.Exits) 334 if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first, 335 I.second.getMass())) 336 // Irreducible backedge. 337 return false; 338 339 return true; 340 } 341 342 /// \brief Compute the loop scale for a loop. 343 void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) { 344 // Compute loop scale. 345 DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n"); 346 347 // Infinite loops need special handling. If we give the back edge an infinite 348 // mass, they may saturate all the other scales in the function down to 1, 349 // making all the other region temperatures look exactly the same. Choose an 350 // arbitrary scale to avoid these issues. 351 // 352 // FIXME: An alternate way would be to select a symbolic scale which is later 353 // replaced to be the maximum of all computed scales plus 1. This would 354 // appropriately describe the loop as having a large scale, without skewing 355 // the final frequency computation. 356 const Scaled64 InfiniteLoopScale(1, 12); 357 358 // LoopScale == 1 / ExitMass 359 // ExitMass == HeadMass - BackedgeMass 360 BlockMass TotalBackedgeMass; 361 for (auto &Mass : Loop.BackedgeMass) 362 TotalBackedgeMass += Mass; 363 BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass; 364 365 // Block scale stores the inverse of the scale. If this is an infinite loop, 366 // its exit mass will be zero. In this case, use an arbitrary scale for the 367 // loop scale. 368 Loop.Scale = 369 ExitMass.isEmpty() ? InfiniteLoopScale : ExitMass.toScaled().inverse(); 370 371 DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull() 372 << " - " << TotalBackedgeMass << ")\n" 373 << " - scale = " << Loop.Scale << "\n"); 374 } 375 376 /// \brief Package up a loop. 377 void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) { 378 DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n"); 379 380 // Clear the subloop exits to prevent quadratic memory usage. 381 for (const BlockNode &M : Loop.Nodes) { 382 if (auto *Loop = Working[M.Index].getPackagedLoop()) 383 Loop->Exits.clear(); 384 DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n"); 385 } 386 Loop.IsPackaged = true; 387 } 388 389 #ifndef NDEBUG 390 static void debugAssign(const BlockFrequencyInfoImplBase &BFI, 391 const DitheringDistributer &D, const BlockNode &T, 392 const BlockMass &M, const char *Desc) { 393 dbgs() << " => assign " << M << " (" << D.RemMass << ")"; 394 if (Desc) 395 dbgs() << " [" << Desc << "]"; 396 if (T.isValid()) 397 dbgs() << " to " << BFI.getBlockName(T); 398 dbgs() << "\n"; 399 } 400 #endif 401 402 void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source, 403 LoopData *OuterLoop, 404 Distribution &Dist) { 405 BlockMass Mass = Working[Source.Index].getMass(); 406 DEBUG(dbgs() << " => mass: " << Mass << "\n"); 407 408 // Distribute mass to successors as laid out in Dist. 409 DitheringDistributer D(Dist, Mass); 410 411 for (const Weight &W : Dist.Weights) { 412 // Check for a local edge (non-backedge and non-exit). 413 BlockMass Taken = D.takeMass(W.Amount); 414 if (W.Type == Weight::Local) { 415 Working[W.TargetNode.Index].getMass() += Taken; 416 DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr)); 417 continue; 418 } 419 420 // Backedges and exits only make sense if we're processing a loop. 421 assert(OuterLoop && "backedge or exit outside of loop"); 422 423 // Check for a backedge. 424 if (W.Type == Weight::Backedge) { 425 OuterLoop->BackedgeMass[OuterLoop->getHeaderIndex(W.TargetNode)] += Taken; 426 DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back")); 427 continue; 428 } 429 430 // This must be an exit. 431 assert(W.Type == Weight::Exit); 432 OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken)); 433 DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit")); 434 } 435 } 436 437 static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, 438 const Scaled64 &Min, const Scaled64 &Max) { 439 // Scale the Factor to a size that creates integers. Ideally, integers would 440 // be scaled so that Max == UINT64_MAX so that they can be best 441 // differentiated. However, in the presence of large frequency values, small 442 // frequencies are scaled down to 1, making it impossible to differentiate 443 // small, unequal numbers. When the spread between Min and Max frequencies 444 // fits well within MaxBits, we make the scale be at least 8. 445 const unsigned MaxBits = 64; 446 const unsigned SpreadBits = (Max / Min).lg(); 447 Scaled64 ScalingFactor; 448 if (SpreadBits <= MaxBits - 3) { 449 // If the values are small enough, make the scaling factor at least 8 to 450 // allow distinguishing small values. 451 ScalingFactor = Min.inverse(); 452 ScalingFactor <<= 3; 453 } else { 454 // If the values need more than MaxBits to be represented, saturate small 455 // frequency values down to 1 by using a scaling factor that benefits large 456 // frequency values. 457 ScalingFactor = Scaled64(1, MaxBits) / Max; 458 } 459 460 // Translate the floats to integers. 461 DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max 462 << ", factor = " << ScalingFactor << "\n"); 463 for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) { 464 Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor; 465 BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>()); 466 DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = " 467 << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled 468 << ", int = " << BFI.Freqs[Index].Integer << "\n"); 469 } 470 } 471 472 /// \brief Unwrap a loop package. 473 /// 474 /// Visits all the members of a loop, adjusting their BlockData according to 475 /// the loop's pseudo-node. 476 static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) { 477 DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop) 478 << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale 479 << "\n"); 480 Loop.Scale *= Loop.Mass.toScaled(); 481 Loop.IsPackaged = false; 482 DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n"); 483 484 // Propagate the head scale through the loop. Since members are visited in 485 // RPO, the head scale will be updated by the loop scale first, and then the 486 // final head scale will be used for updated the rest of the members. 487 for (const BlockNode &N : Loop.Nodes) { 488 const auto &Working = BFI.Working[N.Index]; 489 Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale 490 : BFI.Freqs[N.Index].Scaled; 491 Scaled64 New = Loop.Scale * F; 492 DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New 493 << "\n"); 494 F = New; 495 } 496 } 497 498 void BlockFrequencyInfoImplBase::unwrapLoops() { 499 // Set initial frequencies from loop-local masses. 500 for (size_t Index = 0; Index < Working.size(); ++Index) 501 Freqs[Index].Scaled = Working[Index].Mass.toScaled(); 502 503 for (LoopData &Loop : Loops) 504 unwrapLoop(*this, Loop); 505 } 506 507 void BlockFrequencyInfoImplBase::finalizeMetrics() { 508 // Unwrap loop packages in reverse post-order, tracking min and max 509 // frequencies. 510 auto Min = Scaled64::getLargest(); 511 auto Max = Scaled64::getZero(); 512 for (size_t Index = 0; Index < Working.size(); ++Index) { 513 // Update min/max scale. 514 Min = std::min(Min, Freqs[Index].Scaled); 515 Max = std::max(Max, Freqs[Index].Scaled); 516 } 517 518 // Convert to integers. 519 convertFloatingToInteger(*this, Min, Max); 520 521 // Clean up data structures. 522 cleanup(*this); 523 524 // Print out the final stats. 525 DEBUG(dump()); 526 } 527 528 BlockFrequency 529 BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const { 530 if (!Node.isValid()) 531 return 0; 532 return Freqs[Node.Index].Integer; 533 } 534 535 Optional<uint64_t> 536 BlockFrequencyInfoImplBase::getBlockProfileCount(const Function &F, 537 const BlockNode &Node) const { 538 return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency()); 539 } 540 541 Optional<uint64_t> 542 BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F, 543 uint64_t Freq) const { 544 auto EntryCount = F.getEntryCount(); 545 if (!EntryCount) 546 return None; 547 // Use 128 bit APInt to do the arithmetic to avoid overflow. 548 APInt BlockCount(128, EntryCount.getValue()); 549 APInt BlockFreq(128, Freq); 550 APInt EntryFreq(128, getEntryFreq()); 551 BlockCount *= BlockFreq; 552 BlockCount = BlockCount.udiv(EntryFreq); 553 return BlockCount.getLimitedValue(); 554 } 555 556 Scaled64 557 BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const { 558 if (!Node.isValid()) 559 return Scaled64::getZero(); 560 return Freqs[Node.Index].Scaled; 561 } 562 563 void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node, 564 uint64_t Freq) { 565 assert(Node.isValid() && "Expected valid node"); 566 assert(Node.Index < Freqs.size() && "Expected legal index"); 567 Freqs[Node.Index].Integer = Freq; 568 } 569 570 std::string 571 BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const { 572 return std::string(); 573 } 574 575 std::string 576 BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const { 577 return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*"); 578 } 579 580 raw_ostream & 581 BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS, 582 const BlockNode &Node) const { 583 return OS << getFloatingBlockFreq(Node); 584 } 585 586 raw_ostream & 587 BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS, 588 const BlockFrequency &Freq) const { 589 Scaled64 Block(Freq.getFrequency(), 0); 590 Scaled64 Entry(getEntryFreq(), 0); 591 592 return OS << Block / Entry; 593 } 594 595 void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) { 596 Start = OuterLoop.getHeader(); 597 Nodes.reserve(OuterLoop.Nodes.size()); 598 for (auto N : OuterLoop.Nodes) 599 addNode(N); 600 indexNodes(); 601 } 602 603 void IrreducibleGraph::addNodesInFunction() { 604 Start = 0; 605 for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index) 606 if (!BFI.Working[Index].isPackaged()) 607 addNode(Index); 608 indexNodes(); 609 } 610 611 void IrreducibleGraph::indexNodes() { 612 for (auto &I : Nodes) 613 Lookup[I.Node.Index] = &I; 614 } 615 616 void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ, 617 const BFIBase::LoopData *OuterLoop) { 618 if (OuterLoop && OuterLoop->isHeader(Succ)) 619 return; 620 auto L = Lookup.find(Succ.Index); 621 if (L == Lookup.end()) 622 return; 623 IrrNode &SuccIrr = *L->second; 624 Irr.Edges.push_back(&SuccIrr); 625 SuccIrr.Edges.push_front(&Irr); 626 ++SuccIrr.NumIn; 627 } 628 629 namespace llvm { 630 template <> struct GraphTraits<IrreducibleGraph> { 631 typedef bfi_detail::IrreducibleGraph GraphT; 632 633 typedef const GraphT::IrrNode *NodeRef; 634 typedef GraphT::IrrNode::iterator ChildIteratorType; 635 636 static NodeRef getEntryNode(const GraphT &G) { return G.StartIrr; } 637 static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); } 638 static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); } 639 }; 640 } // end namespace llvm 641 642 /// \brief Find extra irreducible headers. 643 /// 644 /// Find entry blocks and other blocks with backedges, which exist when \c G 645 /// contains irreducible sub-SCCs. 646 static void findIrreducibleHeaders( 647 const BlockFrequencyInfoImplBase &BFI, 648 const IrreducibleGraph &G, 649 const std::vector<const IrreducibleGraph::IrrNode *> &SCC, 650 LoopData::NodeList &Headers, LoopData::NodeList &Others) { 651 // Map from nodes in the SCC to whether it's an entry block. 652 SmallDenseMap<const IrreducibleGraph::IrrNode *, bool, 8> InSCC; 653 654 // InSCC also acts the set of nodes in the graph. Seed it. 655 for (const auto *I : SCC) 656 InSCC[I] = false; 657 658 for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) { 659 auto &Irr = *I->first; 660 for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) { 661 if (InSCC.count(P)) 662 continue; 663 664 // This is an entry block. 665 I->second = true; 666 Headers.push_back(Irr.Node); 667 DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node) << "\n"); 668 break; 669 } 670 } 671 assert(Headers.size() >= 2 && 672 "Expected irreducible CFG; -loop-info is likely invalid"); 673 if (Headers.size() == InSCC.size()) { 674 // Every block is a header. 675 std::sort(Headers.begin(), Headers.end()); 676 return; 677 } 678 679 // Look for extra headers from irreducible sub-SCCs. 680 for (const auto &I : InSCC) { 681 // Entry blocks are already headers. 682 if (I.second) 683 continue; 684 685 auto &Irr = *I.first; 686 for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) { 687 // Skip forward edges. 688 if (P->Node < Irr.Node) 689 continue; 690 691 // Skip predecessors from entry blocks. These can have inverted 692 // ordering. 693 if (InSCC.lookup(P)) 694 continue; 695 696 // Store the extra header. 697 Headers.push_back(Irr.Node); 698 DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node) << "\n"); 699 break; 700 } 701 if (Headers.back() == Irr.Node) 702 // Added this as a header. 703 continue; 704 705 // This is not a header. 706 Others.push_back(Irr.Node); 707 DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n"); 708 } 709 std::sort(Headers.begin(), Headers.end()); 710 std::sort(Others.begin(), Others.end()); 711 } 712 713 static void createIrreducibleLoop( 714 BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, 715 LoopData *OuterLoop, std::list<LoopData>::iterator Insert, 716 const std::vector<const IrreducibleGraph::IrrNode *> &SCC) { 717 // Translate the SCC into RPO. 718 DEBUG(dbgs() << " - found-scc\n"); 719 720 LoopData::NodeList Headers; 721 LoopData::NodeList Others; 722 findIrreducibleHeaders(BFI, G, SCC, Headers, Others); 723 724 auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(), 725 Headers.end(), Others.begin(), Others.end()); 726 727 // Update loop hierarchy. 728 for (const auto &N : Loop->Nodes) 729 if (BFI.Working[N.Index].isLoopHeader()) 730 BFI.Working[N.Index].Loop->Parent = &*Loop; 731 else 732 BFI.Working[N.Index].Loop = &*Loop; 733 } 734 735 iterator_range<std::list<LoopData>::iterator> 736 BlockFrequencyInfoImplBase::analyzeIrreducible( 737 const IrreducibleGraph &G, LoopData *OuterLoop, 738 std::list<LoopData>::iterator Insert) { 739 assert((OuterLoop == nullptr) == (Insert == Loops.begin())); 740 auto Prev = OuterLoop ? std::prev(Insert) : Loops.end(); 741 742 for (auto I = scc_begin(G); !I.isAtEnd(); ++I) { 743 if (I->size() < 2) 744 continue; 745 746 // Translate the SCC into RPO. 747 createIrreducibleLoop(*this, G, OuterLoop, Insert, *I); 748 } 749 750 if (OuterLoop) 751 return make_range(std::next(Prev), Insert); 752 return make_range(Loops.begin(), Insert); 753 } 754 755 void 756 BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) { 757 OuterLoop.Exits.clear(); 758 for (auto &Mass : OuterLoop.BackedgeMass) 759 Mass = BlockMass::getEmpty(); 760 auto O = OuterLoop.Nodes.begin() + 1; 761 for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I) 762 if (!Working[I->Index].isPackaged()) 763 *O++ = *I; 764 OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end()); 765 } 766 767 void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) { 768 assert(Loop.isIrreducible() && "this only makes sense on irreducible loops"); 769 770 // Since the loop has more than one header block, the mass flowing back into 771 // each header will be different. Adjust the mass in each header loop to 772 // reflect the masses flowing through back edges. 773 // 774 // To do this, we distribute the initial mass using the backedge masses 775 // as weights for the distribution. 776 BlockMass LoopMass = BlockMass::getFull(); 777 Distribution Dist; 778 779 DEBUG(dbgs() << "adjust-loop-header-mass:\n"); 780 for (uint32_t H = 0; H < Loop.NumHeaders; ++H) { 781 auto &HeaderNode = Loop.Nodes[H]; 782 auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)]; 783 DEBUG(dbgs() << " - Add back edge mass for node " 784 << getBlockName(HeaderNode) << ": " << BackedgeMass << "\n"); 785 if (BackedgeMass.getMass() > 0) 786 Dist.addLocal(HeaderNode, BackedgeMass.getMass()); 787 else 788 DEBUG(dbgs() << " Nothing added. Back edge mass is zero\n"); 789 } 790 791 DitheringDistributer D(Dist, LoopMass); 792 793 DEBUG(dbgs() << " Distribute loop mass " << LoopMass 794 << " to headers using above weights\n"); 795 for (const Weight &W : Dist.Weights) { 796 BlockMass Taken = D.takeMass(W.Amount); 797 assert(W.Type == Weight::Local && "all weights should be local"); 798 Working[W.TargetNode.Index].getMass() = Taken; 799 DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr)); 800 } 801 } 802