1 //===- BlockFrequencyImplInfo.cpp - Block Frequency Info Implementation ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Loops should be simplified before this analysis. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Analysis/BlockFrequencyInfoImpl.h" 15 #include "llvm/ADT/SCCIterator.h" 16 #include "llvm/Support/raw_ostream.h" 17 #include <numeric> 18 19 using namespace llvm; 20 using namespace llvm::bfi_detail; 21 22 #define DEBUG_TYPE "block-freq" 23 24 ScaledNumber<uint64_t> BlockMass::toScaled() const { 25 if (isFull()) 26 return ScaledNumber<uint64_t>(1, 0); 27 return ScaledNumber<uint64_t>(getMass() + 1, -64); 28 } 29 30 LLVM_DUMP_METHOD void BlockMass::dump() const { print(dbgs()); } 31 32 static char getHexDigit(int N) { 33 assert(N < 16); 34 if (N < 10) 35 return '0' + N; 36 return 'a' + N - 10; 37 } 38 39 raw_ostream &BlockMass::print(raw_ostream &OS) const { 40 for (int Digits = 0; Digits < 16; ++Digits) 41 OS << getHexDigit(Mass >> (60 - Digits * 4) & 0xf); 42 return OS; 43 } 44 45 namespace { 46 47 typedef BlockFrequencyInfoImplBase::BlockNode BlockNode; 48 typedef BlockFrequencyInfoImplBase::Distribution Distribution; 49 typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList; 50 typedef BlockFrequencyInfoImplBase::Scaled64 Scaled64; 51 typedef BlockFrequencyInfoImplBase::LoopData LoopData; 52 typedef BlockFrequencyInfoImplBase::Weight Weight; 53 typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData; 54 55 /// \brief Dithering mass distributer. 56 /// 57 /// This class splits up a single mass into portions by weight, dithering to 58 /// spread out error. No mass is lost. The dithering precision depends on the 59 /// precision of the product of \a BlockMass and \a BranchProbability. 60 /// 61 /// The distribution algorithm follows. 62 /// 63 /// 1. Initialize by saving the sum of the weights in \a RemWeight and the 64 /// mass to distribute in \a RemMass. 65 /// 66 /// 2. For each portion: 67 /// 68 /// 1. Construct a branch probability, P, as the portion's weight divided 69 /// by the current value of \a RemWeight. 70 /// 2. Calculate the portion's mass as \a RemMass times P. 71 /// 3. Update \a RemWeight and \a RemMass at each portion by subtracting 72 /// the current portion's weight and mass. 73 struct DitheringDistributer { 74 uint32_t RemWeight; 75 BlockMass RemMass; 76 77 DitheringDistributer(Distribution &Dist, const BlockMass &Mass); 78 79 BlockMass takeMass(uint32_t Weight); 80 }; 81 82 } // end anonymous namespace 83 84 DitheringDistributer::DitheringDistributer(Distribution &Dist, 85 const BlockMass &Mass) { 86 Dist.normalize(); 87 RemWeight = Dist.Total; 88 RemMass = Mass; 89 } 90 91 BlockMass DitheringDistributer::takeMass(uint32_t Weight) { 92 assert(Weight && "invalid weight"); 93 assert(Weight <= RemWeight); 94 BlockMass Mass = RemMass * BranchProbability(Weight, RemWeight); 95 96 // Decrement totals (dither). 97 RemWeight -= Weight; 98 RemMass -= Mass; 99 return Mass; 100 } 101 102 void Distribution::add(const BlockNode &Node, uint64_t Amount, 103 Weight::DistType Type) { 104 assert(Amount && "invalid weight of 0"); 105 uint64_t NewTotal = Total + Amount; 106 107 // Check for overflow. It should be impossible to overflow twice. 108 bool IsOverflow = NewTotal < Total; 109 assert(!(DidOverflow && IsOverflow) && "unexpected repeated overflow"); 110 DidOverflow |= IsOverflow; 111 112 // Update the total. 113 Total = NewTotal; 114 115 // Save the weight. 116 Weights.push_back(Weight(Type, Node, Amount)); 117 } 118 119 static void combineWeight(Weight &W, const Weight &OtherW) { 120 assert(OtherW.TargetNode.isValid()); 121 if (!W.Amount) { 122 W = OtherW; 123 return; 124 } 125 assert(W.Type == OtherW.Type); 126 assert(W.TargetNode == OtherW.TargetNode); 127 assert(OtherW.Amount && "Expected non-zero weight"); 128 if (W.Amount > W.Amount + OtherW.Amount) 129 // Saturate on overflow. 130 W.Amount = UINT64_MAX; 131 else 132 W.Amount += OtherW.Amount; 133 } 134 135 static void combineWeightsBySorting(WeightList &Weights) { 136 // Sort so edges to the same node are adjacent. 137 std::sort(Weights.begin(), Weights.end(), 138 [](const Weight &L, 139 const Weight &R) { return L.TargetNode < R.TargetNode; }); 140 141 // Combine adjacent edges. 142 WeightList::iterator O = Weights.begin(); 143 for (WeightList::const_iterator I = O, L = O, E = Weights.end(); I != E; 144 ++O, (I = L)) { 145 *O = *I; 146 147 // Find the adjacent weights to the same node. 148 for (++L; L != E && I->TargetNode == L->TargetNode; ++L) 149 combineWeight(*O, *L); 150 } 151 152 // Erase extra entries. 153 Weights.erase(O, Weights.end()); 154 } 155 156 static void combineWeightsByHashing(WeightList &Weights) { 157 // Collect weights into a DenseMap. 158 typedef DenseMap<BlockNode::IndexType, Weight> HashTable; 159 HashTable Combined(NextPowerOf2(2 * Weights.size())); 160 for (const Weight &W : Weights) 161 combineWeight(Combined[W.TargetNode.Index], W); 162 163 // Check whether anything changed. 164 if (Weights.size() == Combined.size()) 165 return; 166 167 // Fill in the new weights. 168 Weights.clear(); 169 Weights.reserve(Combined.size()); 170 for (const auto &I : Combined) 171 Weights.push_back(I.second); 172 } 173 174 static void combineWeights(WeightList &Weights) { 175 // Use a hash table for many successors to keep this linear. 176 if (Weights.size() > 128) { 177 combineWeightsByHashing(Weights); 178 return; 179 } 180 181 combineWeightsBySorting(Weights); 182 } 183 184 static uint64_t shiftRightAndRound(uint64_t N, int Shift) { 185 assert(Shift >= 0); 186 assert(Shift < 64); 187 if (!Shift) 188 return N; 189 return (N >> Shift) + (UINT64_C(1) & N >> (Shift - 1)); 190 } 191 192 void Distribution::normalize() { 193 // Early exit for termination nodes. 194 if (Weights.empty()) 195 return; 196 197 // Only bother if there are multiple successors. 198 if (Weights.size() > 1) 199 combineWeights(Weights); 200 201 // Early exit when combined into a single successor. 202 if (Weights.size() == 1) { 203 Total = 1; 204 Weights.front().Amount = 1; 205 return; 206 } 207 208 // Determine how much to shift right so that the total fits into 32-bits. 209 // 210 // If we shift at all, shift by 1 extra. Otherwise, the lower limit of 1 211 // for each weight can cause a 32-bit overflow. 212 int Shift = 0; 213 if (DidOverflow) 214 Shift = 33; 215 else if (Total > UINT32_MAX) 216 Shift = 33 - countLeadingZeros(Total); 217 218 // Early exit if nothing needs to be scaled. 219 if (!Shift) { 220 // If we didn't overflow then combineWeights() shouldn't have changed the 221 // sum of the weights, but let's double-check. 222 assert(Total == std::accumulate(Weights.begin(), Weights.end(), UINT64_C(0), 223 [](uint64_t Sum, const Weight &W) { 224 return Sum + W.Amount; 225 }) && 226 "Expected total to be correct"); 227 return; 228 } 229 230 // Recompute the total through accumulation (rather than shifting it) so that 231 // it's accurate after shifting and any changes combineWeights() made above. 232 Total = 0; 233 234 // Sum the weights to each node and shift right if necessary. 235 for (Weight &W : Weights) { 236 // Scale down below UINT32_MAX. Since Shift is larger than necessary, we 237 // can round here without concern about overflow. 238 assert(W.TargetNode.isValid()); 239 W.Amount = std::max(UINT64_C(1), shiftRightAndRound(W.Amount, Shift)); 240 assert(W.Amount <= UINT32_MAX); 241 242 // Update the total. 243 Total += W.Amount; 244 } 245 assert(Total <= UINT32_MAX); 246 } 247 248 void BlockFrequencyInfoImplBase::clear() { 249 // Swap with a default-constructed std::vector, since std::vector<>::clear() 250 // does not actually clear heap storage. 251 std::vector<FrequencyData>().swap(Freqs); 252 std::vector<WorkingData>().swap(Working); 253 Loops.clear(); 254 } 255 256 /// \brief Clear all memory not needed downstream. 257 /// 258 /// Releases all memory not used downstream. In particular, saves Freqs. 259 static void cleanup(BlockFrequencyInfoImplBase &BFI) { 260 std::vector<FrequencyData> SavedFreqs(std::move(BFI.Freqs)); 261 BFI.clear(); 262 BFI.Freqs = std::move(SavedFreqs); 263 } 264 265 bool BlockFrequencyInfoImplBase::addToDist(Distribution &Dist, 266 const LoopData *OuterLoop, 267 const BlockNode &Pred, 268 const BlockNode &Succ, 269 uint64_t Weight) { 270 if (!Weight) 271 Weight = 1; 272 273 auto isLoopHeader = [&OuterLoop](const BlockNode &Node) { 274 return OuterLoop && OuterLoop->isHeader(Node); 275 }; 276 277 BlockNode Resolved = Working[Succ.Index].getResolvedNode(); 278 279 #ifndef NDEBUG 280 auto debugSuccessor = [&](const char *Type) { 281 dbgs() << " =>" 282 << " [" << Type << "] weight = " << Weight; 283 if (!isLoopHeader(Resolved)) 284 dbgs() << ", succ = " << getBlockName(Succ); 285 if (Resolved != Succ) 286 dbgs() << ", resolved = " << getBlockName(Resolved); 287 dbgs() << "\n"; 288 }; 289 (void)debugSuccessor; 290 #endif 291 292 if (isLoopHeader(Resolved)) { 293 DEBUG(debugSuccessor("backedge")); 294 Dist.addBackedge(Resolved, Weight); 295 return true; 296 } 297 298 if (Working[Resolved.Index].getContainingLoop() != OuterLoop) { 299 DEBUG(debugSuccessor(" exit ")); 300 Dist.addExit(Resolved, Weight); 301 return true; 302 } 303 304 if (Resolved < Pred) { 305 if (!isLoopHeader(Pred)) { 306 // If OuterLoop is an irreducible loop, we can't actually handle this. 307 assert((!OuterLoop || !OuterLoop->isIrreducible()) && 308 "unhandled irreducible control flow"); 309 310 // Irreducible backedge. Abort. 311 DEBUG(debugSuccessor("abort!!!")); 312 return false; 313 } 314 315 // If "Pred" is a loop header, then this isn't really a backedge; rather, 316 // OuterLoop must be irreducible. These false backedges can come only from 317 // secondary loop headers. 318 assert(OuterLoop && OuterLoop->isIrreducible() && !isLoopHeader(Resolved) && 319 "unhandled irreducible control flow"); 320 } 321 322 DEBUG(debugSuccessor(" local ")); 323 Dist.addLocal(Resolved, Weight); 324 return true; 325 } 326 327 bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist( 328 const LoopData *OuterLoop, LoopData &Loop, Distribution &Dist) { 329 // Copy the exit map into Dist. 330 for (const auto &I : Loop.Exits) 331 if (!addToDist(Dist, OuterLoop, Loop.getHeader(), I.first, 332 I.second.getMass())) 333 // Irreducible backedge. 334 return false; 335 336 return true; 337 } 338 339 /// \brief Compute the loop scale for a loop. 340 void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) { 341 // Compute loop scale. 342 DEBUG(dbgs() << "compute-loop-scale: " << getLoopName(Loop) << "\n"); 343 344 // Infinite loops need special handling. If we give the back edge an infinite 345 // mass, they may saturate all the other scales in the function down to 1, 346 // making all the other region temperatures look exactly the same. Choose an 347 // arbitrary scale to avoid these issues. 348 // 349 // FIXME: An alternate way would be to select a symbolic scale which is later 350 // replaced to be the maximum of all computed scales plus 1. This would 351 // appropriately describe the loop as having a large scale, without skewing 352 // the final frequency computation. 353 const Scaled64 InifiniteLoopScale(1, 12); 354 355 // LoopScale == 1 / ExitMass 356 // ExitMass == HeadMass - BackedgeMass 357 BlockMass TotalBackedgeMass; 358 for (auto &Mass : Loop.BackedgeMass) 359 TotalBackedgeMass += Mass; 360 BlockMass ExitMass = BlockMass::getFull() - TotalBackedgeMass; 361 362 // Block scale stores the inverse of the scale. If this is an infinite loop, 363 // its exit mass will be zero. In this case, use an arbitrary scale for the 364 // loop scale. 365 Loop.Scale = 366 ExitMass.isEmpty() ? InifiniteLoopScale : ExitMass.toScaled().inverse(); 367 368 DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull() 369 << " - " << TotalBackedgeMass << ")\n" 370 << " - scale = " << Loop.Scale << "\n"); 371 } 372 373 /// \brief Package up a loop. 374 void BlockFrequencyInfoImplBase::packageLoop(LoopData &Loop) { 375 DEBUG(dbgs() << "packaging-loop: " << getLoopName(Loop) << "\n"); 376 377 // Clear the subloop exits to prevent quadratic memory usage. 378 for (const BlockNode &M : Loop.Nodes) { 379 if (auto *Loop = Working[M.Index].getPackagedLoop()) 380 Loop->Exits.clear(); 381 DEBUG(dbgs() << " - node: " << getBlockName(M.Index) << "\n"); 382 } 383 Loop.IsPackaged = true; 384 } 385 386 #ifndef NDEBUG 387 static void debugAssign(const BlockFrequencyInfoImplBase &BFI, 388 const DitheringDistributer &D, const BlockNode &T, 389 const BlockMass &M, const char *Desc) { 390 dbgs() << " => assign " << M << " (" << D.RemMass << ")"; 391 if (Desc) 392 dbgs() << " [" << Desc << "]"; 393 if (T.isValid()) 394 dbgs() << " to " << BFI.getBlockName(T); 395 dbgs() << "\n"; 396 } 397 #endif 398 399 void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source, 400 LoopData *OuterLoop, 401 Distribution &Dist) { 402 BlockMass Mass = Working[Source.Index].getMass(); 403 DEBUG(dbgs() << " => mass: " << Mass << "\n"); 404 405 // Distribute mass to successors as laid out in Dist. 406 DitheringDistributer D(Dist, Mass); 407 408 for (const Weight &W : Dist.Weights) { 409 // Check for a local edge (non-backedge and non-exit). 410 BlockMass Taken = D.takeMass(W.Amount); 411 if (W.Type == Weight::Local) { 412 Working[W.TargetNode.Index].getMass() += Taken; 413 DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr)); 414 continue; 415 } 416 417 // Backedges and exits only make sense if we're processing a loop. 418 assert(OuterLoop && "backedge or exit outside of loop"); 419 420 // Check for a backedge. 421 if (W.Type == Weight::Backedge) { 422 OuterLoop->BackedgeMass[OuterLoop->getHeaderIndex(W.TargetNode)] += Taken; 423 DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "back")); 424 continue; 425 } 426 427 // This must be an exit. 428 assert(W.Type == Weight::Exit); 429 OuterLoop->Exits.push_back(std::make_pair(W.TargetNode, Taken)); 430 DEBUG(debugAssign(*this, D, W.TargetNode, Taken, "exit")); 431 } 432 } 433 434 static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, 435 const Scaled64 &Min, const Scaled64 &Max) { 436 // Scale the Factor to a size that creates integers. Ideally, integers would 437 // be scaled so that Max == UINT64_MAX so that they can be best 438 // differentiated. However, in the presence of large frequency values, small 439 // frequencies are scaled down to 1, making it impossible to differentiate 440 // small, unequal numbers. When the spread between Min and Max frequencies 441 // fits well within MaxBits, we make the scale be at least 8. 442 const unsigned MaxBits = 64; 443 const unsigned SpreadBits = (Max / Min).lg(); 444 Scaled64 ScalingFactor; 445 if (SpreadBits <= MaxBits - 3) { 446 // If the values are small enough, make the scaling factor at least 8 to 447 // allow distinguishing small values. 448 ScalingFactor = Min.inverse(); 449 ScalingFactor <<= 3; 450 } else { 451 // If the values need more than MaxBits to be represented, saturate small 452 // frequency values down to 1 by using a scaling factor that benefits large 453 // frequency values. 454 ScalingFactor = Scaled64(1, MaxBits) / Max; 455 } 456 457 // Translate the floats to integers. 458 DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max 459 << ", factor = " << ScalingFactor << "\n"); 460 for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) { 461 Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor; 462 BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>()); 463 DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = " 464 << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled 465 << ", int = " << BFI.Freqs[Index].Integer << "\n"); 466 } 467 } 468 469 /// \brief Unwrap a loop package. 470 /// 471 /// Visits all the members of a loop, adjusting their BlockData according to 472 /// the loop's pseudo-node. 473 static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) { 474 DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop) 475 << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale 476 << "\n"); 477 Loop.Scale *= Loop.Mass.toScaled(); 478 Loop.IsPackaged = false; 479 DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n"); 480 481 // Propagate the head scale through the loop. Since members are visited in 482 // RPO, the head scale will be updated by the loop scale first, and then the 483 // final head scale will be used for updated the rest of the members. 484 for (const BlockNode &N : Loop.Nodes) { 485 const auto &Working = BFI.Working[N.Index]; 486 Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale 487 : BFI.Freqs[N.Index].Scaled; 488 Scaled64 New = Loop.Scale * F; 489 DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New 490 << "\n"); 491 F = New; 492 } 493 } 494 495 void BlockFrequencyInfoImplBase::unwrapLoops() { 496 // Set initial frequencies from loop-local masses. 497 for (size_t Index = 0; Index < Working.size(); ++Index) 498 Freqs[Index].Scaled = Working[Index].Mass.toScaled(); 499 500 for (LoopData &Loop : Loops) 501 unwrapLoop(*this, Loop); 502 } 503 504 void BlockFrequencyInfoImplBase::finalizeMetrics() { 505 // Unwrap loop packages in reverse post-order, tracking min and max 506 // frequencies. 507 auto Min = Scaled64::getLargest(); 508 auto Max = Scaled64::getZero(); 509 for (size_t Index = 0; Index < Working.size(); ++Index) { 510 // Update min/max scale. 511 Min = std::min(Min, Freqs[Index].Scaled); 512 Max = std::max(Max, Freqs[Index].Scaled); 513 } 514 515 // Convert to integers. 516 convertFloatingToInteger(*this, Min, Max); 517 518 // Clean up data structures. 519 cleanup(*this); 520 521 // Print out the final stats. 522 DEBUG(dump()); 523 } 524 525 BlockFrequency 526 BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const { 527 if (!Node.isValid()) 528 return 0; 529 return Freqs[Node.Index].Integer; 530 } 531 532 Scaled64 533 BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const { 534 if (!Node.isValid()) 535 return Scaled64::getZero(); 536 return Freqs[Node.Index].Scaled; 537 } 538 539 void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node, 540 uint64_t Freq) { 541 assert(Node.isValid() && "Expected valid node"); 542 assert(Node.Index < Freqs.size() && "Expected legal index"); 543 Freqs[Node.Index].Integer = Freq; 544 } 545 546 std::string 547 BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const { 548 return std::string(); 549 } 550 551 std::string 552 BlockFrequencyInfoImplBase::getLoopName(const LoopData &Loop) const { 553 return getBlockName(Loop.getHeader()) + (Loop.isIrreducible() ? "**" : "*"); 554 } 555 556 raw_ostream & 557 BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS, 558 const BlockNode &Node) const { 559 return OS << getFloatingBlockFreq(Node); 560 } 561 562 raw_ostream & 563 BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS, 564 const BlockFrequency &Freq) const { 565 Scaled64 Block(Freq.getFrequency(), 0); 566 Scaled64 Entry(getEntryFreq(), 0); 567 568 return OS << Block / Entry; 569 } 570 571 void IrreducibleGraph::addNodesInLoop(const BFIBase::LoopData &OuterLoop) { 572 Start = OuterLoop.getHeader(); 573 Nodes.reserve(OuterLoop.Nodes.size()); 574 for (auto N : OuterLoop.Nodes) 575 addNode(N); 576 indexNodes(); 577 } 578 579 void IrreducibleGraph::addNodesInFunction() { 580 Start = 0; 581 for (uint32_t Index = 0; Index < BFI.Working.size(); ++Index) 582 if (!BFI.Working[Index].isPackaged()) 583 addNode(Index); 584 indexNodes(); 585 } 586 587 void IrreducibleGraph::indexNodes() { 588 for (auto &I : Nodes) 589 Lookup[I.Node.Index] = &I; 590 } 591 592 void IrreducibleGraph::addEdge(IrrNode &Irr, const BlockNode &Succ, 593 const BFIBase::LoopData *OuterLoop) { 594 if (OuterLoop && OuterLoop->isHeader(Succ)) 595 return; 596 auto L = Lookup.find(Succ.Index); 597 if (L == Lookup.end()) 598 return; 599 IrrNode &SuccIrr = *L->second; 600 Irr.Edges.push_back(&SuccIrr); 601 SuccIrr.Edges.push_front(&Irr); 602 ++SuccIrr.NumIn; 603 } 604 605 namespace llvm { 606 template <> struct GraphTraits<IrreducibleGraph> { 607 typedef bfi_detail::IrreducibleGraph GraphT; 608 609 typedef const GraphT::IrrNode NodeType; 610 typedef GraphT::IrrNode::iterator ChildIteratorType; 611 612 static const NodeType *getEntryNode(const GraphT &G) { 613 return G.StartIrr; 614 } 615 static ChildIteratorType child_begin(NodeType *N) { return N->succ_begin(); } 616 static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); } 617 }; 618 } // end namespace llvm 619 620 /// \brief Find extra irreducible headers. 621 /// 622 /// Find entry blocks and other blocks with backedges, which exist when \c G 623 /// contains irreducible sub-SCCs. 624 static void findIrreducibleHeaders( 625 const BlockFrequencyInfoImplBase &BFI, 626 const IrreducibleGraph &G, 627 const std::vector<const IrreducibleGraph::IrrNode *> &SCC, 628 LoopData::NodeList &Headers, LoopData::NodeList &Others) { 629 // Map from nodes in the SCC to whether it's an entry block. 630 SmallDenseMap<const IrreducibleGraph::IrrNode *, bool, 8> InSCC; 631 632 // InSCC also acts the set of nodes in the graph. Seed it. 633 for (const auto *I : SCC) 634 InSCC[I] = false; 635 636 for (auto I = InSCC.begin(), E = InSCC.end(); I != E; ++I) { 637 auto &Irr = *I->first; 638 for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) { 639 if (InSCC.count(P)) 640 continue; 641 642 // This is an entry block. 643 I->second = true; 644 Headers.push_back(Irr.Node); 645 DEBUG(dbgs() << " => entry = " << BFI.getBlockName(Irr.Node) << "\n"); 646 break; 647 } 648 } 649 assert(Headers.size() >= 2 && 650 "Expected irreducible CFG; -loop-info is likely invalid"); 651 if (Headers.size() == InSCC.size()) { 652 // Every block is a header. 653 std::sort(Headers.begin(), Headers.end()); 654 return; 655 } 656 657 // Look for extra headers from irreducible sub-SCCs. 658 for (const auto &I : InSCC) { 659 // Entry blocks are already headers. 660 if (I.second) 661 continue; 662 663 auto &Irr = *I.first; 664 for (const auto *P : make_range(Irr.pred_begin(), Irr.pred_end())) { 665 // Skip forward edges. 666 if (P->Node < Irr.Node) 667 continue; 668 669 // Skip predecessors from entry blocks. These can have inverted 670 // ordering. 671 if (InSCC.lookup(P)) 672 continue; 673 674 // Store the extra header. 675 Headers.push_back(Irr.Node); 676 DEBUG(dbgs() << " => extra = " << BFI.getBlockName(Irr.Node) << "\n"); 677 break; 678 } 679 if (Headers.back() == Irr.Node) 680 // Added this as a header. 681 continue; 682 683 // This is not a header. 684 Others.push_back(Irr.Node); 685 DEBUG(dbgs() << " => other = " << BFI.getBlockName(Irr.Node) << "\n"); 686 } 687 std::sort(Headers.begin(), Headers.end()); 688 std::sort(Others.begin(), Others.end()); 689 } 690 691 static void createIrreducibleLoop( 692 BlockFrequencyInfoImplBase &BFI, const IrreducibleGraph &G, 693 LoopData *OuterLoop, std::list<LoopData>::iterator Insert, 694 const std::vector<const IrreducibleGraph::IrrNode *> &SCC) { 695 // Translate the SCC into RPO. 696 DEBUG(dbgs() << " - found-scc\n"); 697 698 LoopData::NodeList Headers; 699 LoopData::NodeList Others; 700 findIrreducibleHeaders(BFI, G, SCC, Headers, Others); 701 702 auto Loop = BFI.Loops.emplace(Insert, OuterLoop, Headers.begin(), 703 Headers.end(), Others.begin(), Others.end()); 704 705 // Update loop hierarchy. 706 for (const auto &N : Loop->Nodes) 707 if (BFI.Working[N.Index].isLoopHeader()) 708 BFI.Working[N.Index].Loop->Parent = &*Loop; 709 else 710 BFI.Working[N.Index].Loop = &*Loop; 711 } 712 713 iterator_range<std::list<LoopData>::iterator> 714 BlockFrequencyInfoImplBase::analyzeIrreducible( 715 const IrreducibleGraph &G, LoopData *OuterLoop, 716 std::list<LoopData>::iterator Insert) { 717 assert((OuterLoop == nullptr) == (Insert == Loops.begin())); 718 auto Prev = OuterLoop ? std::prev(Insert) : Loops.end(); 719 720 for (auto I = scc_begin(G); !I.isAtEnd(); ++I) { 721 if (I->size() < 2) 722 continue; 723 724 // Translate the SCC into RPO. 725 createIrreducibleLoop(*this, G, OuterLoop, Insert, *I); 726 } 727 728 if (OuterLoop) 729 return make_range(std::next(Prev), Insert); 730 return make_range(Loops.begin(), Insert); 731 } 732 733 void 734 BlockFrequencyInfoImplBase::updateLoopWithIrreducible(LoopData &OuterLoop) { 735 OuterLoop.Exits.clear(); 736 for (auto &Mass : OuterLoop.BackedgeMass) 737 Mass = BlockMass::getEmpty(); 738 auto O = OuterLoop.Nodes.begin() + 1; 739 for (auto I = O, E = OuterLoop.Nodes.end(); I != E; ++I) 740 if (!Working[I->Index].isPackaged()) 741 *O++ = *I; 742 OuterLoop.Nodes.erase(O, OuterLoop.Nodes.end()); 743 } 744 745 void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) { 746 assert(Loop.isIrreducible() && "this only makes sense on irreducible loops"); 747 748 // Since the loop has more than one header block, the mass flowing back into 749 // each header will be different. Adjust the mass in each header loop to 750 // reflect the masses flowing through back edges. 751 // 752 // To do this, we distribute the initial mass using the backedge masses 753 // as weights for the distribution. 754 BlockMass LoopMass = BlockMass::getFull(); 755 Distribution Dist; 756 757 DEBUG(dbgs() << "adjust-loop-header-mass:\n"); 758 for (uint32_t H = 0; H < Loop.NumHeaders; ++H) { 759 auto &HeaderNode = Loop.Nodes[H]; 760 auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)]; 761 DEBUG(dbgs() << " - Add back edge mass for node " 762 << getBlockName(HeaderNode) << ": " << BackedgeMass << "\n"); 763 if (BackedgeMass.getMass() > 0) 764 Dist.addLocal(HeaderNode, BackedgeMass.getMass()); 765 else 766 DEBUG(dbgs() << " Nothing added. Back edge mass is zero\n"); 767 } 768 769 DitheringDistributer D(Dist, LoopMass); 770 771 DEBUG(dbgs() << " Distribute loop mass " << LoopMass 772 << " to headers using above weights\n"); 773 for (const Weight &W : Dist.Weights) { 774 BlockMass Taken = D.takeMass(W.Amount); 775 assert(W.Type == Weight::Local && "all weights should be local"); 776 Working[W.TargetNode.Index].getMass() = Taken; 777 DEBUG(debugAssign(*this, D, W.TargetNode, Taken, nullptr)); 778 } 779 } 780