17cc2493dSspupyrev //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===// 27cc2493dSspupyrev // 37cc2493dSspupyrev // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 47cc2493dSspupyrev // See https://llvm.org/LICENSE.txt for license information. 57cc2493dSspupyrev // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 67cc2493dSspupyrev // 77cc2493dSspupyrev //===----------------------------------------------------------------------===// 87cc2493dSspupyrev // 97cc2493dSspupyrev // This file implements a profile inference algorithm. Given an incomplete and 107cc2493dSspupyrev // possibly imprecise block counts, the algorithm reconstructs realistic block 117cc2493dSspupyrev // and edge counts that satisfy flow conservation rules, while minimally modify 127cc2493dSspupyrev // input block counts. 137cc2493dSspupyrev // 147cc2493dSspupyrev //===----------------------------------------------------------------------===// 157cc2493dSspupyrev 167cc2493dSspupyrev #include "llvm/Transforms/Utils/SampleProfileInference.h" 175f4ae564SJan Svoboda #include "llvm/ADT/BitVector.h" 18f2ade65fSspupyrev #include "llvm/Support/CommandLine.h" 197cc2493dSspupyrev #include "llvm/Support/Debug.h" 207cc2493dSspupyrev #include <queue> 217cc2493dSspupyrev #include <set> 22f2ade65fSspupyrev #include <stack> 237cc2493dSspupyrev 247cc2493dSspupyrev using namespace llvm; 257cc2493dSspupyrev #define DEBUG_TYPE "sample-profile-inference" 267cc2493dSspupyrev 277cc2493dSspupyrev namespace { 287cc2493dSspupyrev 29f2ade65fSspupyrev static cl::opt<bool> SampleProfileEvenCountDistribution( 30f2ade65fSspupyrev "sample-profile-even-count-distribution", cl::init(true), cl::Hidden, 31f2ade65fSspupyrev cl::ZeroOrMore, 32f2ade65fSspupyrev cl::desc("Try to evenly distribute counts when there are multiple equally " 33f2ade65fSspupyrev "likely options.")); 34f2ade65fSspupyrev 35f2ade65fSspupyrev static cl::opt<unsigned> SampleProfileMaxDfsCalls( 36f2ade65fSspupyrev "sample-profile-max-dfs-calls", cl::init(10), cl::Hidden, cl::ZeroOrMore, 37f2ade65fSspupyrev cl::desc("Maximum number of dfs iterations for even count distribution.")); 38f2ade65fSspupyrev 39*81aedab7Sspupyrev static cl::opt<unsigned> SampleProfileProfiCostInc( 40*81aedab7Sspupyrev "sample-profile-profi-cost-inc", cl::init(10), cl::Hidden, cl::ZeroOrMore, 41*81aedab7Sspupyrev cl::desc("A cost of increasing a block's count by one.")); 42*81aedab7Sspupyrev 43*81aedab7Sspupyrev static cl::opt<unsigned> SampleProfileProfiCostDec( 44*81aedab7Sspupyrev "sample-profile-profi-cost-dec", cl::init(20), cl::Hidden, cl::ZeroOrMore, 45*81aedab7Sspupyrev cl::desc("A cost of decreasing a block's count by one.")); 46*81aedab7Sspupyrev 47*81aedab7Sspupyrev static cl::opt<unsigned> SampleProfileProfiCostIncZero( 48*81aedab7Sspupyrev "sample-profile-profi-cost-inc-zero", cl::init(11), cl::Hidden, 49*81aedab7Sspupyrev cl::ZeroOrMore, 50*81aedab7Sspupyrev cl::desc("A cost of increasing a count of zero-weight block by one.")); 51*81aedab7Sspupyrev 52*81aedab7Sspupyrev static cl::opt<unsigned> SampleProfileProfiCostIncEntry( 53*81aedab7Sspupyrev "sample-profile-profi-cost-inc-entry", cl::init(40), cl::Hidden, 54*81aedab7Sspupyrev cl::ZeroOrMore, 55*81aedab7Sspupyrev cl::desc("A cost of increasing the entry block's count by one.")); 56*81aedab7Sspupyrev 57*81aedab7Sspupyrev static cl::opt<unsigned> SampleProfileProfiCostDecEntry( 58*81aedab7Sspupyrev "sample-profile-profi-cost-dec-entry", cl::init(10), cl::Hidden, 59*81aedab7Sspupyrev cl::ZeroOrMore, 60*81aedab7Sspupyrev cl::desc("A cost of decreasing the entry block's count by one.")); 61*81aedab7Sspupyrev 627cc2493dSspupyrev /// A value indicating an infinite flow/capacity/weight of a block/edge. 637cc2493dSspupyrev /// Not using numeric_limits<int64_t>::max(), as the values can be summed up 647cc2493dSspupyrev /// during the execution. 657cc2493dSspupyrev static constexpr int64_t INF = ((int64_t)1) << 50; 667cc2493dSspupyrev 677cc2493dSspupyrev /// The minimum-cost maximum flow algorithm. 687cc2493dSspupyrev /// 697cc2493dSspupyrev /// The algorithm finds the maximum flow of minimum cost on a given (directed) 707cc2493dSspupyrev /// network using a modified version of the classical Moore-Bellman-Ford 717cc2493dSspupyrev /// approach. The algorithm applies a number of augmentation iterations in which 727cc2493dSspupyrev /// flow is sent along paths of positive capacity from the source to the sink. 737cc2493dSspupyrev /// The worst-case time complexity of the implementation is O(v(f)*m*n), where 747cc2493dSspupyrev /// where m is the number of edges, n is the number of vertices, and v(f) is the 757cc2493dSspupyrev /// value of the maximum flow. However, the observed running time on typical 767cc2493dSspupyrev /// instances is sub-quadratic, that is, o(n^2). 777cc2493dSspupyrev /// 787cc2493dSspupyrev /// The input is a set of edges with specified costs and capacities, and a pair 797cc2493dSspupyrev /// of nodes (source and sink). The output is the flow along each edge of the 807cc2493dSspupyrev /// minimum total cost respecting the given edge capacities. 817cc2493dSspupyrev class MinCostMaxFlow { 827cc2493dSspupyrev public: 837cc2493dSspupyrev // Initialize algorithm's data structures for a network of a given size. 847cc2493dSspupyrev void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) { 857cc2493dSspupyrev Source = SourceNode; 867cc2493dSspupyrev Target = SinkNode; 877cc2493dSspupyrev 887cc2493dSspupyrev Nodes = std::vector<Node>(NodeCount); 897cc2493dSspupyrev Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>()); 90f2ade65fSspupyrev if (SampleProfileEvenCountDistribution) 91f2ade65fSspupyrev AugmentingEdges = 92f2ade65fSspupyrev std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>()); 937cc2493dSspupyrev } 947cc2493dSspupyrev 957cc2493dSspupyrev // Run the algorithm. 967cc2493dSspupyrev int64_t run() { 97f2ade65fSspupyrev // Iteratively find an augmentation path/dag in the network and send the 98f2ade65fSspupyrev // flow along its edges 99f2ade65fSspupyrev size_t AugmentationIters = applyFlowAugmentation(); 1007cc2493dSspupyrev 1017cc2493dSspupyrev // Compute the total flow and its cost 1027cc2493dSspupyrev int64_t TotalCost = 0; 1037cc2493dSspupyrev int64_t TotalFlow = 0; 1047cc2493dSspupyrev for (uint64_t Src = 0; Src < Nodes.size(); Src++) { 1057cc2493dSspupyrev for (auto &Edge : Edges[Src]) { 1067cc2493dSspupyrev if (Edge.Flow > 0) { 1077cc2493dSspupyrev TotalCost += Edge.Cost * Edge.Flow; 1087cc2493dSspupyrev if (Src == Source) 1097cc2493dSspupyrev TotalFlow += Edge.Flow; 1107cc2493dSspupyrev } 1117cc2493dSspupyrev } 1127cc2493dSspupyrev } 1137cc2493dSspupyrev LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters 1147cc2493dSspupyrev << " iterations with " << TotalFlow << " total flow" 1157cc2493dSspupyrev << " of " << TotalCost << " cost\n"); 11622d82949SKazu Hirata (void)TotalFlow; 117f2ade65fSspupyrev (void)AugmentationIters; 1187cc2493dSspupyrev return TotalCost; 1197cc2493dSspupyrev } 1207cc2493dSspupyrev 1217cc2493dSspupyrev /// Adding an edge to the network with a specified capacity and a cost. 1227cc2493dSspupyrev /// Multiple edges between a pair of nodes are allowed but self-edges 1237cc2493dSspupyrev /// are not supported. 1247cc2493dSspupyrev void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) { 1257cc2493dSspupyrev assert(Capacity > 0 && "adding an edge of zero capacity"); 1267cc2493dSspupyrev assert(Src != Dst && "loop edge are not supported"); 1277cc2493dSspupyrev 1287cc2493dSspupyrev Edge SrcEdge; 1297cc2493dSspupyrev SrcEdge.Dst = Dst; 1307cc2493dSspupyrev SrcEdge.Cost = Cost; 1317cc2493dSspupyrev SrcEdge.Capacity = Capacity; 1327cc2493dSspupyrev SrcEdge.Flow = 0; 1337cc2493dSspupyrev SrcEdge.RevEdgeIndex = Edges[Dst].size(); 1347cc2493dSspupyrev 1357cc2493dSspupyrev Edge DstEdge; 1367cc2493dSspupyrev DstEdge.Dst = Src; 1377cc2493dSspupyrev DstEdge.Cost = -Cost; 1387cc2493dSspupyrev DstEdge.Capacity = 0; 1397cc2493dSspupyrev DstEdge.Flow = 0; 1407cc2493dSspupyrev DstEdge.RevEdgeIndex = Edges[Src].size(); 1417cc2493dSspupyrev 1427cc2493dSspupyrev Edges[Src].push_back(SrcEdge); 1437cc2493dSspupyrev Edges[Dst].push_back(DstEdge); 1447cc2493dSspupyrev } 1457cc2493dSspupyrev 1467cc2493dSspupyrev /// Adding an edge to the network of infinite capacity and a given cost. 1477cc2493dSspupyrev void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) { 1487cc2493dSspupyrev addEdge(Src, Dst, INF, Cost); 1497cc2493dSspupyrev } 1507cc2493dSspupyrev 1517cc2493dSspupyrev /// Get the total flow from a given source node. 1527cc2493dSspupyrev /// Returns a list of pairs (target node, amount of flow to the target). 1537cc2493dSspupyrev const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const { 1547cc2493dSspupyrev std::vector<std::pair<uint64_t, int64_t>> Flow; 1557cc2493dSspupyrev for (auto &Edge : Edges[Src]) { 1567cc2493dSspupyrev if (Edge.Flow > 0) 1577cc2493dSspupyrev Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow)); 1587cc2493dSspupyrev } 1597cc2493dSspupyrev return Flow; 1607cc2493dSspupyrev } 1617cc2493dSspupyrev 1627cc2493dSspupyrev /// Get the total flow between a pair of nodes. 1637cc2493dSspupyrev int64_t getFlow(uint64_t Src, uint64_t Dst) const { 1647cc2493dSspupyrev int64_t Flow = 0; 1657cc2493dSspupyrev for (auto &Edge : Edges[Src]) { 1667cc2493dSspupyrev if (Edge.Dst == Dst) { 1677cc2493dSspupyrev Flow += Edge.Flow; 1687cc2493dSspupyrev } 1697cc2493dSspupyrev } 1707cc2493dSspupyrev return Flow; 1717cc2493dSspupyrev } 1727cc2493dSspupyrev 1737cc2493dSspupyrev /// A cost of taking an unlikely jump. 17413d1364aSspupyrev static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30; 175*81aedab7Sspupyrev /// Minimum BaseDistance for the jump distance values in island joining. 176*81aedab7Sspupyrev static constexpr uint64_t MinBaseDistance = 10000; 1777cc2493dSspupyrev 1787cc2493dSspupyrev private: 179f2ade65fSspupyrev /// Iteratively find an augmentation path/dag in the network and send the 180f2ade65fSspupyrev /// flow along its edges. The method returns the number of applied iterations. 181f2ade65fSspupyrev size_t applyFlowAugmentation() { 182f2ade65fSspupyrev size_t AugmentationIters = 0; 183f2ade65fSspupyrev while (findAugmentingPath()) { 184f2ade65fSspupyrev uint64_t PathCapacity = computeAugmentingPathCapacity(); 185f2ade65fSspupyrev while (PathCapacity > 0) { 186f2ade65fSspupyrev bool Progress = false; 187f2ade65fSspupyrev if (SampleProfileEvenCountDistribution) { 188f2ade65fSspupyrev // Identify node/edge candidates for augmentation 189f2ade65fSspupyrev identifyShortestEdges(PathCapacity); 190f2ade65fSspupyrev 191f2ade65fSspupyrev // Find an augmenting DAG 192f2ade65fSspupyrev auto AugmentingOrder = findAugmentingDAG(); 193f2ade65fSspupyrev 194f2ade65fSspupyrev // Apply the DAG augmentation 195f2ade65fSspupyrev Progress = augmentFlowAlongDAG(AugmentingOrder); 196f2ade65fSspupyrev PathCapacity = computeAugmentingPathCapacity(); 197f2ade65fSspupyrev } 198f2ade65fSspupyrev 199f2ade65fSspupyrev if (!Progress) { 200f2ade65fSspupyrev augmentFlowAlongPath(PathCapacity); 201f2ade65fSspupyrev PathCapacity = 0; 202f2ade65fSspupyrev } 203f2ade65fSspupyrev 204f2ade65fSspupyrev AugmentationIters++; 205f2ade65fSspupyrev } 206f2ade65fSspupyrev } 207f2ade65fSspupyrev return AugmentationIters; 208f2ade65fSspupyrev } 209f2ade65fSspupyrev 210f2ade65fSspupyrev /// Compute the capacity of the cannonical augmenting path. If the path is 211f2ade65fSspupyrev /// saturated (that is, no flow can be sent along the path), then return 0. 212f2ade65fSspupyrev uint64_t computeAugmentingPathCapacity() { 213f2ade65fSspupyrev uint64_t PathCapacity = INF; 214f2ade65fSspupyrev uint64_t Now = Target; 215f2ade65fSspupyrev while (Now != Source) { 216f2ade65fSspupyrev uint64_t Pred = Nodes[Now].ParentNode; 217f2ade65fSspupyrev auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 218f2ade65fSspupyrev 219f2ade65fSspupyrev assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow"); 220f2ade65fSspupyrev uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow); 221f2ade65fSspupyrev PathCapacity = std::min(PathCapacity, EdgeCapacity); 222f2ade65fSspupyrev 223f2ade65fSspupyrev Now = Pred; 224f2ade65fSspupyrev } 225f2ade65fSspupyrev return PathCapacity; 226f2ade65fSspupyrev } 227f2ade65fSspupyrev 2287cc2493dSspupyrev /// Check for existence of an augmenting path with a positive capacity. 2297cc2493dSspupyrev bool findAugmentingPath() { 2307cc2493dSspupyrev // Initialize data structures 2317cc2493dSspupyrev for (auto &Node : Nodes) { 2327cc2493dSspupyrev Node.Distance = INF; 2337cc2493dSspupyrev Node.ParentNode = uint64_t(-1); 2347cc2493dSspupyrev Node.ParentEdgeIndex = uint64_t(-1); 2357cc2493dSspupyrev Node.Taken = false; 2367cc2493dSspupyrev } 2377cc2493dSspupyrev 2387cc2493dSspupyrev std::queue<uint64_t> Queue; 2397cc2493dSspupyrev Queue.push(Source); 2407cc2493dSspupyrev Nodes[Source].Distance = 0; 2417cc2493dSspupyrev Nodes[Source].Taken = true; 2427cc2493dSspupyrev while (!Queue.empty()) { 2437cc2493dSspupyrev uint64_t Src = Queue.front(); 2447cc2493dSspupyrev Queue.pop(); 2457cc2493dSspupyrev Nodes[Src].Taken = false; 2467cc2493dSspupyrev // Although the residual network contains edges with negative costs 2477cc2493dSspupyrev // (in particular, backward edges), it can be shown that there are no 2487cc2493dSspupyrev // negative-weight cycles and the following two invariants are maintained: 2497cc2493dSspupyrev // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V, 2507cc2493dSspupyrev // where Dist is the length of the shortest path between two nodes. This 2517cc2493dSspupyrev // allows to prune the search-space of the path-finding algorithm using 2527cc2493dSspupyrev // the following early-stop criteria: 2537cc2493dSspupyrev // -- If we find a path with zero-distance from Source to Target, stop the 2547cc2493dSspupyrev // search, as the path is the shortest since Dist[Source, Target] >= 0; 2557cc2493dSspupyrev // -- If we have Dist[Source, V] > Dist[Source, Target], then do not 2567cc2493dSspupyrev // process node V, as it is guaranteed _not_ to be on a shortest path 2577cc2493dSspupyrev // from Source to Target; it follows from inequalities 2587cc2493dSspupyrev // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target] 2597cc2493dSspupyrev // >= Dist[Source, V] 260f2ade65fSspupyrev if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0) 2617cc2493dSspupyrev break; 2627cc2493dSspupyrev if (Nodes[Src].Distance > Nodes[Target].Distance) 2637cc2493dSspupyrev continue; 2647cc2493dSspupyrev 2657cc2493dSspupyrev // Process adjacent edges 2667cc2493dSspupyrev for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) { 2677cc2493dSspupyrev auto &Edge = Edges[Src][EdgeIdx]; 2687cc2493dSspupyrev if (Edge.Flow < Edge.Capacity) { 2697cc2493dSspupyrev uint64_t Dst = Edge.Dst; 2707cc2493dSspupyrev int64_t NewDistance = Nodes[Src].Distance + Edge.Cost; 2717cc2493dSspupyrev if (Nodes[Dst].Distance > NewDistance) { 2727cc2493dSspupyrev // Update the distance and the parent node/edge 2737cc2493dSspupyrev Nodes[Dst].Distance = NewDistance; 2747cc2493dSspupyrev Nodes[Dst].ParentNode = Src; 2757cc2493dSspupyrev Nodes[Dst].ParentEdgeIndex = EdgeIdx; 2767cc2493dSspupyrev // Add the node to the queue, if it is not there yet 2777cc2493dSspupyrev if (!Nodes[Dst].Taken) { 2787cc2493dSspupyrev Queue.push(Dst); 2797cc2493dSspupyrev Nodes[Dst].Taken = true; 2807cc2493dSspupyrev } 2817cc2493dSspupyrev } 2827cc2493dSspupyrev } 2837cc2493dSspupyrev } 2847cc2493dSspupyrev } 2857cc2493dSspupyrev 2867cc2493dSspupyrev return Nodes[Target].Distance != INF; 2877cc2493dSspupyrev } 2887cc2493dSspupyrev 2897cc2493dSspupyrev /// Update the current flow along the augmenting path. 290f2ade65fSspupyrev void augmentFlowAlongPath(uint64_t PathCapacity) { 29193a2c291Sspupyrev assert(PathCapacity > 0 && "found an incorrect augmenting path"); 292f2ade65fSspupyrev uint64_t Now = Target; 2937cc2493dSspupyrev while (Now != Source) { 2947cc2493dSspupyrev uint64_t Pred = Nodes[Now].ParentNode; 2957cc2493dSspupyrev auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 2967cc2493dSspupyrev auto &RevEdge = Edges[Now][Edge.RevEdgeIndex]; 2977cc2493dSspupyrev 2987cc2493dSspupyrev Edge.Flow += PathCapacity; 2997cc2493dSspupyrev RevEdge.Flow -= PathCapacity; 3007cc2493dSspupyrev 3017cc2493dSspupyrev Now = Pred; 3027cc2493dSspupyrev } 3037cc2493dSspupyrev } 3047cc2493dSspupyrev 305f2ade65fSspupyrev /// Find an Augmenting DAG order using a modified version of DFS in which we 306f2ade65fSspupyrev /// can visit a node multiple times. In the DFS search, when scanning each 307f2ade65fSspupyrev /// edge out of a node, continue search at Edge.Dst endpoint if it has not 308f2ade65fSspupyrev /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm 309f2ade65fSspupyrev /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time. 310f2ade65fSspupyrev /// It returns an Augmenting Order (Taken nodes in decreasing Finish time) 311f2ade65fSspupyrev /// that starts with Source and ends with Target. 312f2ade65fSspupyrev std::vector<uint64_t> findAugmentingDAG() { 313f2ade65fSspupyrev // We use a stack based implemenation of DFS to avoid recursion. 314f2ade65fSspupyrev // Defining DFS data structures: 315f2ade65fSspupyrev // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that 316f2ade65fSspupyrev // - we are currently visiting Nodes[NodeIdx] and 317f2ade65fSspupyrev // - the next edge to scan is Edges[NodeIdx][EdgeIdx] 318f2ade65fSspupyrev typedef std::pair<uint64_t, uint64_t> StackItemType; 319f2ade65fSspupyrev std::stack<StackItemType> Stack; 320f2ade65fSspupyrev std::vector<uint64_t> AugmentingOrder; 321f2ade65fSspupyrev 322f2ade65fSspupyrev // Phase 0: Initialize Node attributes and Time for DFS run 323f2ade65fSspupyrev for (auto &Node : Nodes) { 324f2ade65fSspupyrev Node.Discovery = 0; 325f2ade65fSspupyrev Node.Finish = 0; 326f2ade65fSspupyrev Node.NumCalls = 0; 327f2ade65fSspupyrev Node.Taken = false; 328f2ade65fSspupyrev } 329f2ade65fSspupyrev uint64_t Time = 0; 330f2ade65fSspupyrev // Mark Target as Taken 331f2ade65fSspupyrev // Taken attribute will be propagated backwards from Target towards Source 332f2ade65fSspupyrev Nodes[Target].Taken = true; 333f2ade65fSspupyrev 334f2ade65fSspupyrev // Phase 1: Start DFS traversal from Source 335f2ade65fSspupyrev Stack.emplace(Source, 0); 336f2ade65fSspupyrev Nodes[Source].Discovery = ++Time; 337f2ade65fSspupyrev while (!Stack.empty()) { 338f2ade65fSspupyrev auto NodeIdx = Stack.top().first; 339f2ade65fSspupyrev auto EdgeIdx = Stack.top().second; 340f2ade65fSspupyrev 341f2ade65fSspupyrev // If we haven't scanned all edges out of NodeIdx, continue scanning 342f2ade65fSspupyrev if (EdgeIdx < Edges[NodeIdx].size()) { 343f2ade65fSspupyrev auto &Edge = Edges[NodeIdx][EdgeIdx]; 344f2ade65fSspupyrev auto &Dst = Nodes[Edge.Dst]; 345f2ade65fSspupyrev Stack.top().second++; 346f2ade65fSspupyrev 347f2ade65fSspupyrev if (Edge.OnShortestPath) { 348f2ade65fSspupyrev // If we haven't seen Edge.Dst so far, continue DFS search there 349f2ade65fSspupyrev if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) { 350f2ade65fSspupyrev Dst.Discovery = ++Time; 351f2ade65fSspupyrev Stack.emplace(Edge.Dst, 0); 352f2ade65fSspupyrev Dst.NumCalls++; 353f2ade65fSspupyrev } else if (Dst.Taken && Dst.Finish != 0) { 354f2ade65fSspupyrev // Else, if Edge.Dst already have a path to Target, so that NodeIdx 355f2ade65fSspupyrev Nodes[NodeIdx].Taken = true; 356f2ade65fSspupyrev } 357f2ade65fSspupyrev } 358f2ade65fSspupyrev } else { 359f2ade65fSspupyrev // If we are done scanning all edge out of NodeIdx 360f2ade65fSspupyrev Stack.pop(); 361f2ade65fSspupyrev // If we haven't found a path from NodeIdx to Target, forget about it 362f2ade65fSspupyrev if (!Nodes[NodeIdx].Taken) { 363f2ade65fSspupyrev Nodes[NodeIdx].Discovery = 0; 364f2ade65fSspupyrev } else { 365f2ade65fSspupyrev // If we have found a path from NodeIdx to Target, then finish NodeIdx 366f2ade65fSspupyrev // and propagate Taken flag to DFS parent unless at the Source 367f2ade65fSspupyrev Nodes[NodeIdx].Finish = ++Time; 368f2ade65fSspupyrev // NodeIdx == Source if and only if the stack is empty 369f2ade65fSspupyrev if (NodeIdx != Source) { 370f2ade65fSspupyrev assert(!Stack.empty() && "empty stack while running dfs"); 371f2ade65fSspupyrev Nodes[Stack.top().first].Taken = true; 372f2ade65fSspupyrev } 373f2ade65fSspupyrev AugmentingOrder.push_back(NodeIdx); 374f2ade65fSspupyrev } 375f2ade65fSspupyrev } 376f2ade65fSspupyrev } 377f2ade65fSspupyrev // Nodes are collected decreasing Finish time, so the order is reversed 378f2ade65fSspupyrev std::reverse(AugmentingOrder.begin(), AugmentingOrder.end()); 379f2ade65fSspupyrev 380f2ade65fSspupyrev // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges 381f2ade65fSspupyrev for (size_t Src : AugmentingOrder) { 382f2ade65fSspupyrev AugmentingEdges[Src].clear(); 383f2ade65fSspupyrev for (auto &Edge : Edges[Src]) { 384f2ade65fSspupyrev uint64_t Dst = Edge.Dst; 385f2ade65fSspupyrev if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken && 386f2ade65fSspupyrev Nodes[Dst].Finish < Nodes[Src].Finish) { 387f2ade65fSspupyrev AugmentingEdges[Src].push_back(&Edge); 388f2ade65fSspupyrev } 389f2ade65fSspupyrev } 390f2ade65fSspupyrev assert((Src == Target || !AugmentingEdges[Src].empty()) && 391f2ade65fSspupyrev "incorrectly constructed augmenting edges"); 392f2ade65fSspupyrev } 393f2ade65fSspupyrev 394f2ade65fSspupyrev return AugmentingOrder; 395f2ade65fSspupyrev } 396f2ade65fSspupyrev 397f2ade65fSspupyrev /// Update the current flow along the given (acyclic) subgraph specified by 398f2ade65fSspupyrev /// the vertex order, AugmentingOrder. The objective is to send as much flow 399f2ade65fSspupyrev /// as possible while evenly distributing flow among successors of each node. 400f2ade65fSspupyrev /// After the update at least one edge is saturated. 401f2ade65fSspupyrev bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) { 402f2ade65fSspupyrev // Phase 0: Initialization 403f2ade65fSspupyrev for (uint64_t Src : AugmentingOrder) { 404f2ade65fSspupyrev Nodes[Src].FracFlow = 0; 405f2ade65fSspupyrev Nodes[Src].IntFlow = 0; 406f2ade65fSspupyrev for (auto &Edge : AugmentingEdges[Src]) { 407f2ade65fSspupyrev Edge->AugmentedFlow = 0; 408f2ade65fSspupyrev } 409f2ade65fSspupyrev } 410f2ade65fSspupyrev 411f2ade65fSspupyrev // Phase 1: Send a unit of fractional flow along the DAG 412f2ade65fSspupyrev uint64_t MaxFlowAmount = INF; 413f2ade65fSspupyrev Nodes[Source].FracFlow = 1.0; 414f2ade65fSspupyrev for (uint64_t Src : AugmentingOrder) { 415f2ade65fSspupyrev assert((Src == Target || Nodes[Src].FracFlow > 0.0) && 416f2ade65fSspupyrev "incorrectly computed fractional flow"); 417f2ade65fSspupyrev // Distribute flow evenly among successors of Src 418f2ade65fSspupyrev uint64_t Degree = AugmentingEdges[Src].size(); 419f2ade65fSspupyrev for (auto &Edge : AugmentingEdges[Src]) { 420f2ade65fSspupyrev double EdgeFlow = Nodes[Src].FracFlow / Degree; 421f2ade65fSspupyrev Nodes[Edge->Dst].FracFlow += EdgeFlow; 422f2ade65fSspupyrev if (Edge->Capacity == INF) 423f2ade65fSspupyrev continue; 424f2ade65fSspupyrev uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow; 425f2ade65fSspupyrev MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow); 426f2ade65fSspupyrev } 427f2ade65fSspupyrev } 428f2ade65fSspupyrev // Stop early if we cannot send any (integral) flow from Source to Target 429f2ade65fSspupyrev if (MaxFlowAmount == 0) 430f2ade65fSspupyrev return false; 431f2ade65fSspupyrev 432f2ade65fSspupyrev // Phase 2: Send an integral flow of MaxFlowAmount 433f2ade65fSspupyrev Nodes[Source].IntFlow = MaxFlowAmount; 434f2ade65fSspupyrev for (uint64_t Src : AugmentingOrder) { 435f2ade65fSspupyrev if (Src == Target) 436f2ade65fSspupyrev break; 437f2ade65fSspupyrev // Distribute flow evenly among successors of Src, rounding up to make 438f2ade65fSspupyrev // sure all flow is sent 439f2ade65fSspupyrev uint64_t Degree = AugmentingEdges[Src].size(); 440f2ade65fSspupyrev // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree 441f2ade65fSspupyrev uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree; 442f2ade65fSspupyrev for (auto &Edge : AugmentingEdges[Src]) { 443f2ade65fSspupyrev uint64_t Dst = Edge->Dst; 444f2ade65fSspupyrev uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow); 445f2ade65fSspupyrev EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow)); 446f2ade65fSspupyrev Nodes[Dst].IntFlow += EdgeFlow; 447f2ade65fSspupyrev Nodes[Src].IntFlow -= EdgeFlow; 448f2ade65fSspupyrev Edge->AugmentedFlow += EdgeFlow; 449f2ade65fSspupyrev } 450f2ade65fSspupyrev } 451f2ade65fSspupyrev assert(Nodes[Target].IntFlow <= MaxFlowAmount); 452f2ade65fSspupyrev Nodes[Target].IntFlow = 0; 453f2ade65fSspupyrev 454f2ade65fSspupyrev // Phase 3: Send excess flow back traversing the nodes backwards. 455f2ade65fSspupyrev // Because of rounding, not all flow can be sent along the edges of Src. 456f2ade65fSspupyrev // Hence, sending the remaining flow back to maintain flow conservation 457f2ade65fSspupyrev for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) { 458f2ade65fSspupyrev uint64_t Src = AugmentingOrder[Idx - 1]; 459f2ade65fSspupyrev // Try to send excess flow back along each edge. 460f2ade65fSspupyrev // Make sure we only send back flow we just augmented (AugmentedFlow). 461f2ade65fSspupyrev for (auto &Edge : AugmentingEdges[Src]) { 462f2ade65fSspupyrev uint64_t Dst = Edge->Dst; 463f2ade65fSspupyrev if (Nodes[Dst].IntFlow == 0) 464f2ade65fSspupyrev continue; 465f2ade65fSspupyrev uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow); 466f2ade65fSspupyrev Nodes[Dst].IntFlow -= EdgeFlow; 467f2ade65fSspupyrev Nodes[Src].IntFlow += EdgeFlow; 468f2ade65fSspupyrev Edge->AugmentedFlow -= EdgeFlow; 469f2ade65fSspupyrev } 470f2ade65fSspupyrev } 471f2ade65fSspupyrev 472f2ade65fSspupyrev // Phase 4: Update flow values along all edges 473f2ade65fSspupyrev bool HasSaturatedEdges = false; 474f2ade65fSspupyrev for (uint64_t Src : AugmentingOrder) { 475f2ade65fSspupyrev // Verify that we have sent all the excess flow from the node 476f2ade65fSspupyrev assert(Src == Source || Nodes[Src].IntFlow == 0); 477f2ade65fSspupyrev for (auto &Edge : AugmentingEdges[Src]) { 478f2ade65fSspupyrev assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow); 479f2ade65fSspupyrev // Update flow values along the edge and its reverse copy 480f2ade65fSspupyrev auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex]; 481f2ade65fSspupyrev Edge->Flow += Edge->AugmentedFlow; 482f2ade65fSspupyrev RevEdge.Flow -= Edge->AugmentedFlow; 483f2ade65fSspupyrev if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0) 484f2ade65fSspupyrev HasSaturatedEdges = true; 485f2ade65fSspupyrev } 486f2ade65fSspupyrev } 487f2ade65fSspupyrev 488f2ade65fSspupyrev // The augmentation is successful iff at least one edge becomes saturated 489f2ade65fSspupyrev return HasSaturatedEdges; 490f2ade65fSspupyrev } 491f2ade65fSspupyrev 492f2ade65fSspupyrev /// Identify candidate (shortest) edges for augmentation. 493f2ade65fSspupyrev void identifyShortestEdges(uint64_t PathCapacity) { 494f2ade65fSspupyrev assert(PathCapacity > 0 && "found an incorrect augmenting DAG"); 495f2ade65fSspupyrev // To make sure the augmentation DAG contains only edges with large residual 496f2ade65fSspupyrev // capacity, we prune all edges whose capacity is below a fraction of 497f2ade65fSspupyrev // the capacity of the augmented path. 498f2ade65fSspupyrev // (All edges of the path itself are always in the DAG) 499f2ade65fSspupyrev uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1)); 500f2ade65fSspupyrev 501f2ade65fSspupyrev // Decide which edges are on a shortest path from Source to Target 502f2ade65fSspupyrev for (size_t Src = 0; Src < Nodes.size(); Src++) { 503f2ade65fSspupyrev // An edge cannot be augmenting if the endpoint has large distance 504f2ade65fSspupyrev if (Nodes[Src].Distance > Nodes[Target].Distance) 505f2ade65fSspupyrev continue; 506f2ade65fSspupyrev 507f2ade65fSspupyrev for (auto &Edge : Edges[Src]) { 508f2ade65fSspupyrev uint64_t Dst = Edge.Dst; 509f2ade65fSspupyrev Edge.OnShortestPath = 510f2ade65fSspupyrev Src != Target && Dst != Source && 511f2ade65fSspupyrev Nodes[Dst].Distance <= Nodes[Target].Distance && 512f2ade65fSspupyrev Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost && 513f2ade65fSspupyrev Edge.Capacity > Edge.Flow && 514f2ade65fSspupyrev uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity; 515f2ade65fSspupyrev } 516f2ade65fSspupyrev } 517f2ade65fSspupyrev } 518f2ade65fSspupyrev 51913d1364aSspupyrev /// A node in a flow network. 5207cc2493dSspupyrev struct Node { 5217cc2493dSspupyrev /// The cost of the cheapest path from the source to the current node. 5227cc2493dSspupyrev int64_t Distance; 5237cc2493dSspupyrev /// The node preceding the current one in the path. 5247cc2493dSspupyrev uint64_t ParentNode; 5257cc2493dSspupyrev /// The index of the edge between ParentNode and the current node. 5267cc2493dSspupyrev uint64_t ParentEdgeIndex; 5277cc2493dSspupyrev /// An indicator of whether the current node is in a queue. 5287cc2493dSspupyrev bool Taken; 529f2ade65fSspupyrev 530f2ade65fSspupyrev /// Data fields utilized in DAG-augmentation: 531f2ade65fSspupyrev /// Fractional flow. 532f2ade65fSspupyrev double FracFlow; 533f2ade65fSspupyrev /// Integral flow. 534f2ade65fSspupyrev uint64_t IntFlow; 535f2ade65fSspupyrev /// Discovery time. 536f2ade65fSspupyrev uint64_t Discovery; 537f2ade65fSspupyrev /// Finish time. 538f2ade65fSspupyrev uint64_t Finish; 539f2ade65fSspupyrev /// NumCalls. 540f2ade65fSspupyrev uint64_t NumCalls; 5417cc2493dSspupyrev }; 542f2ade65fSspupyrev 5437cc2493dSspupyrev /// An edge in a flow network. 5447cc2493dSspupyrev struct Edge { 5457cc2493dSspupyrev /// The cost of the edge. 5467cc2493dSspupyrev int64_t Cost; 5477cc2493dSspupyrev /// The capacity of the edge. 5487cc2493dSspupyrev int64_t Capacity; 5497cc2493dSspupyrev /// The current flow on the edge. 5507cc2493dSspupyrev int64_t Flow; 5517cc2493dSspupyrev /// The destination node of the edge. 5527cc2493dSspupyrev uint64_t Dst; 5537cc2493dSspupyrev /// The index of the reverse edge between Dst and the current node. 5547cc2493dSspupyrev uint64_t RevEdgeIndex; 555f2ade65fSspupyrev 556f2ade65fSspupyrev /// Data fields utilized in DAG-augmentation: 557f2ade65fSspupyrev /// Whether the edge is currently on a shortest path from Source to Target. 558f2ade65fSspupyrev bool OnShortestPath; 559f2ade65fSspupyrev /// Extra flow along the edge. 560f2ade65fSspupyrev uint64_t AugmentedFlow; 5617cc2493dSspupyrev }; 5627cc2493dSspupyrev 5637cc2493dSspupyrev /// The set of network nodes. 5647cc2493dSspupyrev std::vector<Node> Nodes; 5657cc2493dSspupyrev /// The set of network edges. 5667cc2493dSspupyrev std::vector<std::vector<Edge>> Edges; 5677cc2493dSspupyrev /// Source node of the flow. 5687cc2493dSspupyrev uint64_t Source; 5697cc2493dSspupyrev /// Target (sink) node of the flow. 5707cc2493dSspupyrev uint64_t Target; 571f2ade65fSspupyrev /// Augmenting edges. 572f2ade65fSspupyrev std::vector<std::vector<Edge *>> AugmentingEdges; 5737cc2493dSspupyrev }; 5747cc2493dSspupyrev 57593a2c291Sspupyrev /// A post-processing adjustment of control flow. It applies two steps by 57693a2c291Sspupyrev /// rerouting some flow and making it more realistic: 57793a2c291Sspupyrev /// 57893a2c291Sspupyrev /// - First, it removes all isolated components ("islands") with a positive flow 57993a2c291Sspupyrev /// that are unreachable from the entry block. For every such component, we 58093a2c291Sspupyrev /// find the shortest from the entry to an exit passing through the component, 58193a2c291Sspupyrev /// and increase the flow by one unit along the path. 58293a2c291Sspupyrev /// 58393a2c291Sspupyrev /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks 58493a2c291Sspupyrev /// with no sampled counts. Then it rebalnces the flow that goes through such 58593a2c291Sspupyrev /// a subgraph so that each branch is taken with probability 50%. 58693a2c291Sspupyrev /// An unknown subgraph is such that for every two nodes u and v: 58793a2c291Sspupyrev /// - u dominates v and u is not unknown; 58893a2c291Sspupyrev /// - v post-dominates u; and 58993a2c291Sspupyrev /// - all inner-nodes of all (u,v)-paths are unknown. 59093a2c291Sspupyrev /// 59198dd2f9eSspupyrev class FlowAdjuster { 59298dd2f9eSspupyrev public: 59398dd2f9eSspupyrev FlowAdjuster(FlowFunction &Func) : Func(Func) { 59498dd2f9eSspupyrev assert(Func.Blocks[Func.Entry].isEntry() && 59598dd2f9eSspupyrev "incorrect index of the entry block"); 59698dd2f9eSspupyrev } 59798dd2f9eSspupyrev 59898dd2f9eSspupyrev // Run the post-processing 59998dd2f9eSspupyrev void run() { 60093a2c291Sspupyrev /// Adjust the flow to get rid of isolated components. 60198dd2f9eSspupyrev joinIsolatedComponents(); 60293a2c291Sspupyrev 60393a2c291Sspupyrev /// Rebalance the flow inside unknown subgraphs. 60493a2c291Sspupyrev rebalanceUnknownSubgraphs(); 60598dd2f9eSspupyrev } 60698dd2f9eSspupyrev 60798dd2f9eSspupyrev private: 60898dd2f9eSspupyrev void joinIsolatedComponents() { 60998dd2f9eSspupyrev // Find blocks that are reachable from the source 6105f4ae564SJan Svoboda auto Visited = BitVector(NumBlocks(), false); 61198dd2f9eSspupyrev findReachable(Func.Entry, Visited); 61298dd2f9eSspupyrev 61398dd2f9eSspupyrev // Iterate over all non-reachable blocks and adjust their weights 61498dd2f9eSspupyrev for (uint64_t I = 0; I < NumBlocks(); I++) { 61598dd2f9eSspupyrev auto &Block = Func.Blocks[I]; 61698dd2f9eSspupyrev if (Block.Flow > 0 && !Visited[I]) { 61798dd2f9eSspupyrev // Find a path from the entry to an exit passing through the block I 61898dd2f9eSspupyrev auto Path = findShortestPath(I); 61998dd2f9eSspupyrev // Increase the flow along the path 62098dd2f9eSspupyrev assert(Path.size() > 0 && Path[0]->Source == Func.Entry && 62198dd2f9eSspupyrev "incorrectly computed path adjusting control flow"); 62298dd2f9eSspupyrev Func.Blocks[Func.Entry].Flow += 1; 62398dd2f9eSspupyrev for (auto &Jump : Path) { 62498dd2f9eSspupyrev Jump->Flow += 1; 62598dd2f9eSspupyrev Func.Blocks[Jump->Target].Flow += 1; 62698dd2f9eSspupyrev // Update reachability 62798dd2f9eSspupyrev findReachable(Jump->Target, Visited); 62898dd2f9eSspupyrev } 62998dd2f9eSspupyrev } 63098dd2f9eSspupyrev } 63198dd2f9eSspupyrev } 63298dd2f9eSspupyrev 63393a2c291Sspupyrev /// Run BFS from a given block along the jumps with a positive flow and mark 63498dd2f9eSspupyrev /// all reachable blocks. 6355f4ae564SJan Svoboda void findReachable(uint64_t Src, BitVector &Visited) { 63698dd2f9eSspupyrev if (Visited[Src]) 63798dd2f9eSspupyrev return; 63898dd2f9eSspupyrev std::queue<uint64_t> Queue; 63998dd2f9eSspupyrev Queue.push(Src); 64098dd2f9eSspupyrev Visited[Src] = true; 64198dd2f9eSspupyrev while (!Queue.empty()) { 64298dd2f9eSspupyrev Src = Queue.front(); 64398dd2f9eSspupyrev Queue.pop(); 64498dd2f9eSspupyrev for (auto Jump : Func.Blocks[Src].SuccJumps) { 64598dd2f9eSspupyrev uint64_t Dst = Jump->Target; 64698dd2f9eSspupyrev if (Jump->Flow > 0 && !Visited[Dst]) { 64798dd2f9eSspupyrev Queue.push(Dst); 64898dd2f9eSspupyrev Visited[Dst] = true; 64998dd2f9eSspupyrev } 65098dd2f9eSspupyrev } 65198dd2f9eSspupyrev } 65298dd2f9eSspupyrev } 65398dd2f9eSspupyrev 65498dd2f9eSspupyrev /// Find the shortest path from the entry block to an exit block passing 65598dd2f9eSspupyrev /// through a given block. 65698dd2f9eSspupyrev std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) { 65798dd2f9eSspupyrev // A path from the entry block to BlockIdx 65898dd2f9eSspupyrev auto ForwardPath = findShortestPath(Func.Entry, BlockIdx); 65998dd2f9eSspupyrev // A path from BlockIdx to an exit block 66098dd2f9eSspupyrev auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock); 66198dd2f9eSspupyrev 66298dd2f9eSspupyrev // Concatenate the two paths 66398dd2f9eSspupyrev std::vector<FlowJump *> Result; 66498dd2f9eSspupyrev Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end()); 66598dd2f9eSspupyrev Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end()); 66698dd2f9eSspupyrev return Result; 66798dd2f9eSspupyrev } 66898dd2f9eSspupyrev 66998dd2f9eSspupyrev /// Apply the Dijkstra algorithm to find the shortest path from a given 67098dd2f9eSspupyrev /// Source to a given Target block. 67198dd2f9eSspupyrev /// If Target == -1, then the path ends at an exit block. 67298dd2f9eSspupyrev std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) { 67398dd2f9eSspupyrev // Quit early, if possible 67498dd2f9eSspupyrev if (Source == Target) 67598dd2f9eSspupyrev return std::vector<FlowJump *>(); 67698dd2f9eSspupyrev if (Func.Blocks[Source].isExit() && Target == AnyExitBlock) 67798dd2f9eSspupyrev return std::vector<FlowJump *>(); 67898dd2f9eSspupyrev 67998dd2f9eSspupyrev // Initialize data structures 68098dd2f9eSspupyrev auto Distance = std::vector<int64_t>(NumBlocks(), INF); 68198dd2f9eSspupyrev auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr); 68298dd2f9eSspupyrev Distance[Source] = 0; 68398dd2f9eSspupyrev std::set<std::pair<uint64_t, uint64_t>> Queue; 68498dd2f9eSspupyrev Queue.insert(std::make_pair(Distance[Source], Source)); 68598dd2f9eSspupyrev 68698dd2f9eSspupyrev // Run the Dijkstra algorithm 68798dd2f9eSspupyrev while (!Queue.empty()) { 68898dd2f9eSspupyrev uint64_t Src = Queue.begin()->second; 68998dd2f9eSspupyrev Queue.erase(Queue.begin()); 69098dd2f9eSspupyrev // If we found a solution, quit early 69198dd2f9eSspupyrev if (Src == Target || 69298dd2f9eSspupyrev (Func.Blocks[Src].isExit() && Target == AnyExitBlock)) 69398dd2f9eSspupyrev break; 69498dd2f9eSspupyrev 69598dd2f9eSspupyrev for (auto Jump : Func.Blocks[Src].SuccJumps) { 69698dd2f9eSspupyrev uint64_t Dst = Jump->Target; 69798dd2f9eSspupyrev int64_t JumpDist = jumpDistance(Jump); 69898dd2f9eSspupyrev if (Distance[Dst] > Distance[Src] + JumpDist) { 69998dd2f9eSspupyrev Queue.erase(std::make_pair(Distance[Dst], Dst)); 70098dd2f9eSspupyrev 70198dd2f9eSspupyrev Distance[Dst] = Distance[Src] + JumpDist; 70298dd2f9eSspupyrev Parent[Dst] = Jump; 70398dd2f9eSspupyrev 70498dd2f9eSspupyrev Queue.insert(std::make_pair(Distance[Dst], Dst)); 70598dd2f9eSspupyrev } 70698dd2f9eSspupyrev } 70798dd2f9eSspupyrev } 70898dd2f9eSspupyrev // If Target is not provided, find the closest exit block 70998dd2f9eSspupyrev if (Target == AnyExitBlock) { 71098dd2f9eSspupyrev for (uint64_t I = 0; I < NumBlocks(); I++) { 71198dd2f9eSspupyrev if (Func.Blocks[I].isExit() && Parent[I] != nullptr) { 71298dd2f9eSspupyrev if (Target == AnyExitBlock || Distance[Target] > Distance[I]) { 71398dd2f9eSspupyrev Target = I; 71498dd2f9eSspupyrev } 71598dd2f9eSspupyrev } 71698dd2f9eSspupyrev } 71798dd2f9eSspupyrev } 71898dd2f9eSspupyrev assert(Parent[Target] != nullptr && "a path does not exist"); 71998dd2f9eSspupyrev 72098dd2f9eSspupyrev // Extract the constructed path 72198dd2f9eSspupyrev std::vector<FlowJump *> Result; 72298dd2f9eSspupyrev uint64_t Now = Target; 72398dd2f9eSspupyrev while (Now != Source) { 72498dd2f9eSspupyrev assert(Now == Parent[Now]->Target && "incorrect parent jump"); 72598dd2f9eSspupyrev Result.push_back(Parent[Now]); 72698dd2f9eSspupyrev Now = Parent[Now]->Source; 72798dd2f9eSspupyrev } 72898dd2f9eSspupyrev // Reverse the path, since it is extracted from Target to Source 72998dd2f9eSspupyrev std::reverse(Result.begin(), Result.end()); 73098dd2f9eSspupyrev return Result; 73198dd2f9eSspupyrev } 73298dd2f9eSspupyrev 73398dd2f9eSspupyrev /// A distance of a path for a given jump. 73498dd2f9eSspupyrev /// In order to incite the path to use blocks/jumps with large positive flow, 73598dd2f9eSspupyrev /// and avoid changing branch probability of outgoing edges drastically, 736*81aedab7Sspupyrev /// set the jump distance so as: 737*81aedab7Sspupyrev /// - to minimize the number of unlikely jumps used and subject to that, 738*81aedab7Sspupyrev /// - to minimize the number of Flow == 0 jumps used and subject to that, 739*81aedab7Sspupyrev /// - minimizes total multiplicative Flow increase for the remaining edges. 740*81aedab7Sspupyrev /// To capture this objective with integer distances, we round off fractional 741*81aedab7Sspupyrev /// parts to a multiple of 1 / BaseDistance. 74298dd2f9eSspupyrev int64_t jumpDistance(FlowJump *Jump) const { 743*81aedab7Sspupyrev uint64_t BaseDistance = 744*81aedab7Sspupyrev std::max(MinCostMaxFlow::MinBaseDistance, 745*81aedab7Sspupyrev std::min(Func.Blocks[Func.Entry].Flow, 746*81aedab7Sspupyrev MinCostMaxFlow::AuxCostUnlikely / NumBlocks())); 74798dd2f9eSspupyrev if (Jump->IsUnlikely) 74898dd2f9eSspupyrev return MinCostMaxFlow::AuxCostUnlikely; 74998dd2f9eSspupyrev if (Jump->Flow > 0) 750*81aedab7Sspupyrev return BaseDistance + BaseDistance / Jump->Flow; 751*81aedab7Sspupyrev return BaseDistance * NumBlocks(); 75298dd2f9eSspupyrev }; 75398dd2f9eSspupyrev 75498dd2f9eSspupyrev uint64_t NumBlocks() const { return Func.Blocks.size(); } 75598dd2f9eSspupyrev 75613d1364aSspupyrev /// Rebalance unknown subgraphs so that the flow is split evenly across the 75713d1364aSspupyrev /// outgoing branches of every block of the subgraph. The method iterates over 75813d1364aSspupyrev /// blocks with known weight and identifies unknown subgraphs rooted at the 75913d1364aSspupyrev /// blocks. Then it verifies if flow rebalancing is feasible and applies it. 76093a2c291Sspupyrev void rebalanceUnknownSubgraphs() { 76113d1364aSspupyrev // Try to find unknown subgraphs from each block 76293a2c291Sspupyrev for (uint64_t I = 0; I < Func.Blocks.size(); I++) { 76393a2c291Sspupyrev auto SrcBlock = &Func.Blocks[I]; 76413d1364aSspupyrev // Verify if rebalancing rooted at SrcBlock is feasible 76513d1364aSspupyrev if (!canRebalanceAtRoot(SrcBlock)) 76693a2c291Sspupyrev continue; 76793a2c291Sspupyrev 76813d1364aSspupyrev // Find an unknown subgraphs starting at SrcBlock. Along the way, 76913d1364aSspupyrev // fill in known destinations and intermediate unknown blocks. 77013d1364aSspupyrev std::vector<FlowBlock *> UnknownBlocks; 77113d1364aSspupyrev std::vector<FlowBlock *> KnownDstBlocks; 77213d1364aSspupyrev findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks); 77313d1364aSspupyrev 77413d1364aSspupyrev // Verify if rebalancing of the subgraph is feasible. If the search is 77513d1364aSspupyrev // successful, find the unique destination block (which can be null) 77693a2c291Sspupyrev FlowBlock *DstBlock = nullptr; 77713d1364aSspupyrev if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks, 77813d1364aSspupyrev DstBlock)) 77993a2c291Sspupyrev continue; 78013d1364aSspupyrev 78113d1364aSspupyrev // We cannot rebalance subgraphs containing cycles among unknown blocks 78213d1364aSspupyrev if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks)) 78393a2c291Sspupyrev continue; 78493a2c291Sspupyrev 78593a2c291Sspupyrev // Rebalance the flow 78613d1364aSspupyrev rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks); 78793a2c291Sspupyrev } 78893a2c291Sspupyrev } 78993a2c291Sspupyrev 79013d1364aSspupyrev /// Verify if rebalancing rooted at a given block is possible. 79113d1364aSspupyrev bool canRebalanceAtRoot(const FlowBlock *SrcBlock) { 79213d1364aSspupyrev // Do not attempt to find unknown subgraphs from an unknown or a 79313d1364aSspupyrev // zero-flow block 79413d1364aSspupyrev if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0) 79513d1364aSspupyrev return false; 79613d1364aSspupyrev 79713d1364aSspupyrev // Do not attempt to process subgraphs from a block w/o unknown sucessors 79813d1364aSspupyrev bool HasUnknownSuccs = false; 79913d1364aSspupyrev for (auto Jump : SrcBlock->SuccJumps) { 80013d1364aSspupyrev if (Func.Blocks[Jump->Target].UnknownWeight) { 80113d1364aSspupyrev HasUnknownSuccs = true; 80213d1364aSspupyrev break; 80313d1364aSspupyrev } 80413d1364aSspupyrev } 80513d1364aSspupyrev if (!HasUnknownSuccs) 80613d1364aSspupyrev return false; 80713d1364aSspupyrev 80813d1364aSspupyrev return true; 80913d1364aSspupyrev } 81013d1364aSspupyrev 81113d1364aSspupyrev /// Find an unknown subgraph starting at block SrcBlock. The method sets 81213d1364aSspupyrev /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks. 81313d1364aSspupyrev void findUnknownSubgraph(const FlowBlock *SrcBlock, 81413d1364aSspupyrev std::vector<FlowBlock *> &KnownDstBlocks, 81513d1364aSspupyrev std::vector<FlowBlock *> &UnknownBlocks) { 81693a2c291Sspupyrev // Run BFS from SrcBlock and make sure all paths are going through unknown 817f2ade65fSspupyrev // blocks and end at a known DstBlock 8185f4ae564SJan Svoboda auto Visited = BitVector(NumBlocks(), false); 81993a2c291Sspupyrev std::queue<uint64_t> Queue; 82093a2c291Sspupyrev 82193a2c291Sspupyrev Queue.push(SrcBlock->Index); 82293a2c291Sspupyrev Visited[SrcBlock->Index] = true; 82393a2c291Sspupyrev while (!Queue.empty()) { 82493a2c291Sspupyrev auto &Block = Func.Blocks[Queue.front()]; 82593a2c291Sspupyrev Queue.pop(); 82693a2c291Sspupyrev // Process blocks reachable from Block 82793a2c291Sspupyrev for (auto Jump : Block.SuccJumps) { 82813d1364aSspupyrev // If Jump can be ignored, skip it 82913d1364aSspupyrev if (ignoreJump(SrcBlock, nullptr, Jump)) 83013d1364aSspupyrev continue; 83113d1364aSspupyrev 83293a2c291Sspupyrev uint64_t Dst = Jump->Target; 83313d1364aSspupyrev // If Dst has been visited, skip Jump 83493a2c291Sspupyrev if (Visited[Dst]) 83593a2c291Sspupyrev continue; 83613d1364aSspupyrev // Process block Dst 83793a2c291Sspupyrev Visited[Dst] = true; 83893a2c291Sspupyrev if (!Func.Blocks[Dst].UnknownWeight) { 83913d1364aSspupyrev KnownDstBlocks.push_back(&Func.Blocks[Dst]); 84093a2c291Sspupyrev } else { 84193a2c291Sspupyrev Queue.push(Dst); 84213d1364aSspupyrev UnknownBlocks.push_back(&Func.Blocks[Dst]); 84313d1364aSspupyrev } 84493a2c291Sspupyrev } 84593a2c291Sspupyrev } 84693a2c291Sspupyrev } 84793a2c291Sspupyrev 84813d1364aSspupyrev /// Verify if rebalancing of the subgraph is feasible. If the checks are 84913d1364aSspupyrev /// successful, set the unique destination block, DstBlock (can be null). 85013d1364aSspupyrev bool canRebalanceSubgraph(const FlowBlock *SrcBlock, 85113d1364aSspupyrev const std::vector<FlowBlock *> &KnownDstBlocks, 85213d1364aSspupyrev const std::vector<FlowBlock *> &UnknownBlocks, 85313d1364aSspupyrev FlowBlock *&DstBlock) { 85493a2c291Sspupyrev // If the list of unknown blocks is empty, we don't need rebalancing 85513d1364aSspupyrev if (UnknownBlocks.empty()) 85693a2c291Sspupyrev return false; 85713d1364aSspupyrev 85813d1364aSspupyrev // If there are multiple known sinks, we can't rebalance 85913d1364aSspupyrev if (KnownDstBlocks.size() > 1) 86093a2c291Sspupyrev return false; 86113d1364aSspupyrev DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front(); 86213d1364aSspupyrev 86313d1364aSspupyrev // Verify sinks of the subgraph 86413d1364aSspupyrev for (auto Block : UnknownBlocks) { 86513d1364aSspupyrev if (Block->SuccJumps.empty()) { 86613d1364aSspupyrev // If there are multiple (known and unknown) sinks, we can't rebalance 86713d1364aSspupyrev if (DstBlock != nullptr) 86813d1364aSspupyrev return false; 86913d1364aSspupyrev continue; 87013d1364aSspupyrev } 87113d1364aSspupyrev size_t NumIgnoredJumps = 0; 87213d1364aSspupyrev for (auto Jump : Block->SuccJumps) { 87313d1364aSspupyrev if (ignoreJump(SrcBlock, DstBlock, Jump)) 87413d1364aSspupyrev NumIgnoredJumps++; 87513d1364aSspupyrev } 87613d1364aSspupyrev // If there is a non-sink block in UnknownBlocks with all jumps ignored, 87713d1364aSspupyrev // then we can't rebalance 87813d1364aSspupyrev if (NumIgnoredJumps == Block->SuccJumps.size()) 87993a2c291Sspupyrev return false; 88093a2c291Sspupyrev } 88193a2c291Sspupyrev 88293a2c291Sspupyrev return true; 88393a2c291Sspupyrev } 88493a2c291Sspupyrev 88513d1364aSspupyrev /// Decide whether the Jump is ignored while processing an unknown subgraphs 88613d1364aSspupyrev /// rooted at basic block SrcBlock with the destination block, DstBlock. 88713d1364aSspupyrev bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, 88813d1364aSspupyrev const FlowJump *Jump) { 88913d1364aSspupyrev // Ignore unlikely jumps with zero flow 89013d1364aSspupyrev if (Jump->IsUnlikely && Jump->Flow == 0) 89113d1364aSspupyrev return true; 89213d1364aSspupyrev 89313d1364aSspupyrev auto JumpSource = &Func.Blocks[Jump->Source]; 89413d1364aSspupyrev auto JumpTarget = &Func.Blocks[Jump->Target]; 89513d1364aSspupyrev 89613d1364aSspupyrev // Do not ignore jumps coming into DstBlock 89713d1364aSspupyrev if (DstBlock != nullptr && JumpTarget == DstBlock) 89813d1364aSspupyrev return false; 89913d1364aSspupyrev 90013d1364aSspupyrev // Ignore jumps out of SrcBlock to known blocks 90113d1364aSspupyrev if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock) 90213d1364aSspupyrev return true; 90313d1364aSspupyrev 90413d1364aSspupyrev // Ignore jumps to known blocks with zero flow 90513d1364aSspupyrev if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0) 90613d1364aSspupyrev return true; 90713d1364aSspupyrev 90813d1364aSspupyrev return false; 90913d1364aSspupyrev } 91013d1364aSspupyrev 91193a2c291Sspupyrev /// Verify if the given unknown subgraph is acyclic, and if yes, reorder 91213d1364aSspupyrev /// UnknownBlocks in the topological order (so that all jumps are "forward"). 91313d1364aSspupyrev bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, 91413d1364aSspupyrev std::vector<FlowBlock *> &UnknownBlocks) { 91593a2c291Sspupyrev // Extract local in-degrees in the considered subgraph 91693a2c291Sspupyrev auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0); 91713d1364aSspupyrev auto fillInDegree = [&](const FlowBlock *Block) { 91813d1364aSspupyrev for (auto Jump : Block->SuccJumps) { 91913d1364aSspupyrev if (ignoreJump(SrcBlock, DstBlock, Jump)) 92013d1364aSspupyrev continue; 92193a2c291Sspupyrev LocalInDegree[Jump->Target]++; 92293a2c291Sspupyrev } 92313d1364aSspupyrev }; 92413d1364aSspupyrev fillInDegree(SrcBlock); 92513d1364aSspupyrev for (auto Block : UnknownBlocks) { 92613d1364aSspupyrev fillInDegree(Block); 92793a2c291Sspupyrev } 92893a2c291Sspupyrev // A loop containing SrcBlock 92993a2c291Sspupyrev if (LocalInDegree[SrcBlock->Index] > 0) 93093a2c291Sspupyrev return false; 93193a2c291Sspupyrev 93293a2c291Sspupyrev std::vector<FlowBlock *> AcyclicOrder; 93393a2c291Sspupyrev std::queue<uint64_t> Queue; 93493a2c291Sspupyrev Queue.push(SrcBlock->Index); 93593a2c291Sspupyrev while (!Queue.empty()) { 93613d1364aSspupyrev FlowBlock *Block = &Func.Blocks[Queue.front()]; 93793a2c291Sspupyrev Queue.pop(); 93813d1364aSspupyrev // Stop propagation once we reach DstBlock, if any 93913d1364aSspupyrev if (DstBlock != nullptr && Block == DstBlock) 94093a2c291Sspupyrev break; 94193a2c291Sspupyrev 94213d1364aSspupyrev // Keep an acyclic order of unknown blocks 94313d1364aSspupyrev if (Block->UnknownWeight && Block != SrcBlock) 94413d1364aSspupyrev AcyclicOrder.push_back(Block); 94513d1364aSspupyrev 94693a2c291Sspupyrev // Add to the queue all successors with zero local in-degree 94713d1364aSspupyrev for (auto Jump : Block->SuccJumps) { 94813d1364aSspupyrev if (ignoreJump(SrcBlock, DstBlock, Jump)) 94913d1364aSspupyrev continue; 95093a2c291Sspupyrev uint64_t Dst = Jump->Target; 95193a2c291Sspupyrev LocalInDegree[Dst]--; 95293a2c291Sspupyrev if (LocalInDegree[Dst] == 0) { 95393a2c291Sspupyrev Queue.push(Dst); 95493a2c291Sspupyrev } 95593a2c291Sspupyrev } 95693a2c291Sspupyrev } 95793a2c291Sspupyrev 95893a2c291Sspupyrev // If there is a cycle in the subgraph, AcyclicOrder contains only a subset 95993a2c291Sspupyrev // of all blocks 96013d1364aSspupyrev if (UnknownBlocks.size() != AcyclicOrder.size()) 96193a2c291Sspupyrev return false; 96213d1364aSspupyrev UnknownBlocks = AcyclicOrder; 96393a2c291Sspupyrev return true; 96493a2c291Sspupyrev } 96593a2c291Sspupyrev 96613d1364aSspupyrev /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and 96713d1364aSspupyrev /// having UnknownBlocks intermediate blocks. 96813d1364aSspupyrev void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock, 96913d1364aSspupyrev const FlowBlock *DstBlock, 97013d1364aSspupyrev const std::vector<FlowBlock *> &UnknownBlocks) { 97193a2c291Sspupyrev assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph"); 97293a2c291Sspupyrev 97313d1364aSspupyrev // Ditribute flow from the source block 97413d1364aSspupyrev uint64_t BlockFlow = 0; 97513d1364aSspupyrev // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps 97613d1364aSspupyrev for (auto Jump : SrcBlock->SuccJumps) { 97713d1364aSspupyrev if (ignoreJump(SrcBlock, DstBlock, Jump)) 97893a2c291Sspupyrev continue; 97913d1364aSspupyrev BlockFlow += Jump->Flow; 98093a2c291Sspupyrev } 98113d1364aSspupyrev rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow); 98213d1364aSspupyrev 98313d1364aSspupyrev // Ditribute flow from the remaining blocks 98413d1364aSspupyrev for (auto Block : UnknownBlocks) { 98513d1364aSspupyrev assert(Block->UnknownWeight && "incorrect unknown subgraph"); 98613d1364aSspupyrev uint64_t BlockFlow = 0; 98713d1364aSspupyrev // Block's flow is the sum of incoming flows 98813d1364aSspupyrev for (auto Jump : Block->PredJumps) { 98913d1364aSspupyrev BlockFlow += Jump->Flow; 99013d1364aSspupyrev } 99113d1364aSspupyrev Block->Flow = BlockFlow; 99213d1364aSspupyrev rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow); 99313d1364aSspupyrev } 99413d1364aSspupyrev } 99513d1364aSspupyrev 99613d1364aSspupyrev /// Redistribute flow for a block in a subgraph rooted at SrcBlock, 99713d1364aSspupyrev /// and ending at DstBlock. 99813d1364aSspupyrev void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock, 99913d1364aSspupyrev const FlowBlock *Block, uint64_t BlockFlow) { 100013d1364aSspupyrev // Process all successor jumps and update corresponding flow values 100113d1364aSspupyrev size_t BlockDegree = 0; 100213d1364aSspupyrev for (auto Jump : Block->SuccJumps) { 100313d1364aSspupyrev if (ignoreJump(SrcBlock, DstBlock, Jump)) 100413d1364aSspupyrev continue; 100513d1364aSspupyrev BlockDegree++; 100613d1364aSspupyrev } 100713d1364aSspupyrev // If all successor jumps of the block are ignored, skip it 100813d1364aSspupyrev if (DstBlock == nullptr && BlockDegree == 0) 100913d1364aSspupyrev return; 101013d1364aSspupyrev assert(BlockDegree > 0 && "all outgoing jumps are ignored"); 101113d1364aSspupyrev 101213d1364aSspupyrev // Each of the Block's successors gets the following amount of flow. 101313d1364aSspupyrev // Rounding the value up so that all flow is propagated 101413d1364aSspupyrev uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree; 101513d1364aSspupyrev for (auto Jump : Block->SuccJumps) { 101613d1364aSspupyrev if (ignoreJump(SrcBlock, DstBlock, Jump)) 101713d1364aSspupyrev continue; 101813d1364aSspupyrev uint64_t Flow = std::min(SuccFlow, BlockFlow); 101993a2c291Sspupyrev Jump->Flow = Flow; 102013d1364aSspupyrev BlockFlow -= Flow; 102193a2c291Sspupyrev } 102213d1364aSspupyrev assert(BlockFlow == 0 && "not all flow is propagated"); 102393a2c291Sspupyrev } 102493a2c291Sspupyrev 102598dd2f9eSspupyrev /// A constant indicating an arbitrary exit block of a function. 102698dd2f9eSspupyrev static constexpr uint64_t AnyExitBlock = uint64_t(-1); 102798dd2f9eSspupyrev 102898dd2f9eSspupyrev /// The function. 102998dd2f9eSspupyrev FlowFunction &Func; 103098dd2f9eSspupyrev }; 103198dd2f9eSspupyrev 10327cc2493dSspupyrev /// Initializing flow network for a given function. 10337cc2493dSspupyrev /// 10347cc2493dSspupyrev /// Every block is split into three nodes that are responsible for (i) an 10357cc2493dSspupyrev /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or 10367cc2493dSspupyrev /// reduction of the block weight. 10377cc2493dSspupyrev void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) { 10387cc2493dSspupyrev uint64_t NumBlocks = Func.Blocks.size(); 10397cc2493dSspupyrev assert(NumBlocks > 1 && "Too few blocks in a function"); 10407cc2493dSspupyrev LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n"); 10417cc2493dSspupyrev 10427cc2493dSspupyrev // Pre-process data: make sure the entry weight is at least 1 10437cc2493dSspupyrev if (Func.Blocks[Func.Entry].Weight == 0) { 10447cc2493dSspupyrev Func.Blocks[Func.Entry].Weight = 1; 10457cc2493dSspupyrev } 10467cc2493dSspupyrev // Introducing dummy source/sink pairs to allow flow circulation. 10477cc2493dSspupyrev // The nodes corresponding to blocks of Func have indicies in the range 10487cc2493dSspupyrev // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values. 10497cc2493dSspupyrev uint64_t S = 3 * NumBlocks; 10507cc2493dSspupyrev uint64_t T = S + 1; 10517cc2493dSspupyrev uint64_t S1 = S + 2; 10527cc2493dSspupyrev uint64_t T1 = S + 3; 10537cc2493dSspupyrev 10547cc2493dSspupyrev Network.initialize(3 * NumBlocks + 4, S1, T1); 10557cc2493dSspupyrev 10567cc2493dSspupyrev // Create three nodes for every block of the function 10577cc2493dSspupyrev for (uint64_t B = 0; B < NumBlocks; B++) { 10587cc2493dSspupyrev auto &Block = Func.Blocks[B]; 10597cc2493dSspupyrev assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) && 10607cc2493dSspupyrev "non-zero weight of a block w/o weight except for an entry"); 10617cc2493dSspupyrev 10627cc2493dSspupyrev // Split every block into two nodes 10637cc2493dSspupyrev uint64_t Bin = 3 * B; 10647cc2493dSspupyrev uint64_t Bout = 3 * B + 1; 10657cc2493dSspupyrev uint64_t Baux = 3 * B + 2; 10667cc2493dSspupyrev if (Block.Weight > 0) { 10677cc2493dSspupyrev Network.addEdge(S1, Bout, Block.Weight, 0); 10687cc2493dSspupyrev Network.addEdge(Bin, T1, Block.Weight, 0); 10697cc2493dSspupyrev } 10707cc2493dSspupyrev 10717cc2493dSspupyrev // Edges from S and to T 10727cc2493dSspupyrev assert((!Block.isEntry() || !Block.isExit()) && 10737cc2493dSspupyrev "a block cannot be an entry and an exit"); 10747cc2493dSspupyrev if (Block.isEntry()) { 10757cc2493dSspupyrev Network.addEdge(S, Bin, 0); 10767cc2493dSspupyrev } else if (Block.isExit()) { 10777cc2493dSspupyrev Network.addEdge(Bout, T, 0); 10787cc2493dSspupyrev } 10797cc2493dSspupyrev 10807cc2493dSspupyrev // An auxiliary node to allow increase/reduction of block counts: 10817cc2493dSspupyrev // We assume that decreasing block counts is more expensive than increasing, 10827cc2493dSspupyrev // and thus, setting separate costs here. In the future we may want to tune 10837cc2493dSspupyrev // the relative costs so as to maximize the quality of generated profiles. 1084*81aedab7Sspupyrev int64_t AuxCostInc = SampleProfileProfiCostInc; 1085*81aedab7Sspupyrev int64_t AuxCostDec = SampleProfileProfiCostDec; 10867cc2493dSspupyrev if (Block.UnknownWeight) { 10877cc2493dSspupyrev // Do not penalize changing weights of blocks w/o known profile count 10887cc2493dSspupyrev AuxCostInc = 0; 10897cc2493dSspupyrev AuxCostDec = 0; 10907cc2493dSspupyrev } else { 10917cc2493dSspupyrev // Increasing the count for "cold" blocks with zero initial count is more 10927cc2493dSspupyrev // expensive than for "hot" ones 10937cc2493dSspupyrev if (Block.Weight == 0) { 1094*81aedab7Sspupyrev AuxCostInc = SampleProfileProfiCostIncZero; 10957cc2493dSspupyrev } 10967cc2493dSspupyrev // Modifying the count of the entry block is expensive 10977cc2493dSspupyrev if (Block.isEntry()) { 1098*81aedab7Sspupyrev AuxCostInc = SampleProfileProfiCostIncEntry; 1099*81aedab7Sspupyrev AuxCostDec = SampleProfileProfiCostDecEntry; 11007cc2493dSspupyrev } 11017cc2493dSspupyrev } 11027cc2493dSspupyrev // For blocks with self-edges, do not penalize a reduction of the count, 11037cc2493dSspupyrev // as all of the increase can be attributed to the self-edge 11047cc2493dSspupyrev if (Block.HasSelfEdge) { 11057cc2493dSspupyrev AuxCostDec = 0; 11067cc2493dSspupyrev } 11077cc2493dSspupyrev 11087cc2493dSspupyrev Network.addEdge(Bin, Baux, AuxCostInc); 11097cc2493dSspupyrev Network.addEdge(Baux, Bout, AuxCostInc); 11107cc2493dSspupyrev if (Block.Weight > 0) { 11117cc2493dSspupyrev Network.addEdge(Bout, Baux, AuxCostDec); 11127cc2493dSspupyrev Network.addEdge(Baux, Bin, AuxCostDec); 11137cc2493dSspupyrev } 11147cc2493dSspupyrev } 11157cc2493dSspupyrev 11167cc2493dSspupyrev // Creating edges for every jump 11177cc2493dSspupyrev for (auto &Jump : Func.Jumps) { 11187cc2493dSspupyrev uint64_t Src = Jump.Source; 11197cc2493dSspupyrev uint64_t Dst = Jump.Target; 11207cc2493dSspupyrev if (Src != Dst) { 11217cc2493dSspupyrev uint64_t SrcOut = 3 * Src + 1; 11227cc2493dSspupyrev uint64_t DstIn = 3 * Dst; 11237cc2493dSspupyrev uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0; 11247cc2493dSspupyrev Network.addEdge(SrcOut, DstIn, Cost); 11257cc2493dSspupyrev } 11267cc2493dSspupyrev } 11277cc2493dSspupyrev 11287cc2493dSspupyrev // Make sure we have a valid flow circulation 11297cc2493dSspupyrev Network.addEdge(T, S, 0); 11307cc2493dSspupyrev } 11317cc2493dSspupyrev 11327cc2493dSspupyrev /// Extract resulting block and edge counts from the flow network. 11337cc2493dSspupyrev void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) { 11347cc2493dSspupyrev uint64_t NumBlocks = Func.Blocks.size(); 11357cc2493dSspupyrev 11367cc2493dSspupyrev // Extract resulting block counts 11377cc2493dSspupyrev for (uint64_t Src = 0; Src < NumBlocks; Src++) { 11387cc2493dSspupyrev auto &Block = Func.Blocks[Src]; 11397cc2493dSspupyrev uint64_t SrcOut = 3 * Src + 1; 11407cc2493dSspupyrev int64_t Flow = 0; 11417cc2493dSspupyrev for (auto &Adj : Network.getFlow(SrcOut)) { 11427cc2493dSspupyrev uint64_t DstIn = Adj.first; 11437cc2493dSspupyrev int64_t DstFlow = Adj.second; 11447cc2493dSspupyrev bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2); 11457cc2493dSspupyrev if (!IsAuxNode || Block.HasSelfEdge) { 11467cc2493dSspupyrev Flow += DstFlow; 11477cc2493dSspupyrev } 11487cc2493dSspupyrev } 11497cc2493dSspupyrev Block.Flow = Flow; 11507cc2493dSspupyrev assert(Flow >= 0 && "negative block flow"); 11517cc2493dSspupyrev } 11527cc2493dSspupyrev 11537cc2493dSspupyrev // Extract resulting jump counts 11547cc2493dSspupyrev for (auto &Jump : Func.Jumps) { 11557cc2493dSspupyrev uint64_t Src = Jump.Source; 11567cc2493dSspupyrev uint64_t Dst = Jump.Target; 11577cc2493dSspupyrev int64_t Flow = 0; 11587cc2493dSspupyrev if (Src != Dst) { 11597cc2493dSspupyrev uint64_t SrcOut = 3 * Src + 1; 11607cc2493dSspupyrev uint64_t DstIn = 3 * Dst; 11617cc2493dSspupyrev Flow = Network.getFlow(SrcOut, DstIn); 11627cc2493dSspupyrev } else { 11637cc2493dSspupyrev uint64_t SrcOut = 3 * Src + 1; 11647cc2493dSspupyrev uint64_t SrcAux = 3 * Src + 2; 11657cc2493dSspupyrev int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux); 11667cc2493dSspupyrev if (AuxFlow > 0) 11677cc2493dSspupyrev Flow = AuxFlow; 11687cc2493dSspupyrev } 11697cc2493dSspupyrev Jump.Flow = Flow; 11707cc2493dSspupyrev assert(Flow >= 0 && "negative jump flow"); 11717cc2493dSspupyrev } 11727cc2493dSspupyrev } 11737cc2493dSspupyrev 11747cc2493dSspupyrev #ifndef NDEBUG 11757cc2493dSspupyrev /// Verify that the computed flow values satisfy flow conservation rules 11767cc2493dSspupyrev void verifyWeights(const FlowFunction &Func) { 11777cc2493dSspupyrev const uint64_t NumBlocks = Func.Blocks.size(); 11787cc2493dSspupyrev auto InFlow = std::vector<uint64_t>(NumBlocks, 0); 11797cc2493dSspupyrev auto OutFlow = std::vector<uint64_t>(NumBlocks, 0); 11807cc2493dSspupyrev for (auto &Jump : Func.Jumps) { 11817cc2493dSspupyrev InFlow[Jump.Target] += Jump.Flow; 11827cc2493dSspupyrev OutFlow[Jump.Source] += Jump.Flow; 11837cc2493dSspupyrev } 11847cc2493dSspupyrev 11857cc2493dSspupyrev uint64_t TotalInFlow = 0; 11867cc2493dSspupyrev uint64_t TotalOutFlow = 0; 11877cc2493dSspupyrev for (uint64_t I = 0; I < NumBlocks; I++) { 11887cc2493dSspupyrev auto &Block = Func.Blocks[I]; 11897cc2493dSspupyrev if (Block.isEntry()) { 11907cc2493dSspupyrev TotalInFlow += Block.Flow; 11917cc2493dSspupyrev assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 11927cc2493dSspupyrev } else if (Block.isExit()) { 11937cc2493dSspupyrev TotalOutFlow += Block.Flow; 11947cc2493dSspupyrev assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 11957cc2493dSspupyrev } else { 11967cc2493dSspupyrev assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 11977cc2493dSspupyrev assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 11987cc2493dSspupyrev } 11997cc2493dSspupyrev } 12007cc2493dSspupyrev assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow"); 120198dd2f9eSspupyrev 120298dd2f9eSspupyrev // Verify that there are no isolated flow components 120398dd2f9eSspupyrev // One could modify FlowFunction to hold edges indexed by the sources, which 120498dd2f9eSspupyrev // will avoid a creation of the object 120598dd2f9eSspupyrev auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks); 120698dd2f9eSspupyrev for (auto &Jump : Func.Jumps) { 120798dd2f9eSspupyrev if (Jump.Flow > 0) { 120898dd2f9eSspupyrev PositiveFlowEdges[Jump.Source].push_back(Jump.Target); 120998dd2f9eSspupyrev } 121098dd2f9eSspupyrev } 121198dd2f9eSspupyrev 121293a2c291Sspupyrev // Run BFS from the source along edges with positive flow 121398dd2f9eSspupyrev std::queue<uint64_t> Queue; 12145f4ae564SJan Svoboda auto Visited = BitVector(NumBlocks, false); 121598dd2f9eSspupyrev Queue.push(Func.Entry); 121698dd2f9eSspupyrev Visited[Func.Entry] = true; 121798dd2f9eSspupyrev while (!Queue.empty()) { 121898dd2f9eSspupyrev uint64_t Src = Queue.front(); 121998dd2f9eSspupyrev Queue.pop(); 122098dd2f9eSspupyrev for (uint64_t Dst : PositiveFlowEdges[Src]) { 122198dd2f9eSspupyrev if (!Visited[Dst]) { 122298dd2f9eSspupyrev Queue.push(Dst); 122398dd2f9eSspupyrev Visited[Dst] = true; 122498dd2f9eSspupyrev } 122598dd2f9eSspupyrev } 122698dd2f9eSspupyrev } 122798dd2f9eSspupyrev 122898dd2f9eSspupyrev // Verify that every block that has a positive flow is reached from the source 122998dd2f9eSspupyrev // along edges with a positive flow 123098dd2f9eSspupyrev for (uint64_t I = 0; I < NumBlocks; I++) { 123198dd2f9eSspupyrev auto &Block = Func.Blocks[I]; 123298dd2f9eSspupyrev assert((Visited[I] || Block.Flow == 0) && "an isolated flow component"); 123398dd2f9eSspupyrev } 12347cc2493dSspupyrev } 12357cc2493dSspupyrev #endif 12367cc2493dSspupyrev 12377cc2493dSspupyrev } // end of anonymous namespace 12387cc2493dSspupyrev 12397cc2493dSspupyrev /// Apply the profile inference algorithm for a given flow function 12407cc2493dSspupyrev void llvm::applyFlowInference(FlowFunction &Func) { 12417cc2493dSspupyrev // Create and apply an inference network model 12427cc2493dSspupyrev auto InferenceNetwork = MinCostMaxFlow(); 12437cc2493dSspupyrev initializeNetwork(InferenceNetwork, Func); 12447cc2493dSspupyrev InferenceNetwork.run(); 12457cc2493dSspupyrev 12467cc2493dSspupyrev // Extract flow values for every block and every edge 12477cc2493dSspupyrev extractWeights(InferenceNetwork, Func); 12487cc2493dSspupyrev 124998dd2f9eSspupyrev // Post-processing adjustments to the flow 125098dd2f9eSspupyrev auto Adjuster = FlowAdjuster(Func); 125198dd2f9eSspupyrev Adjuster.run(); 125298dd2f9eSspupyrev 12537cc2493dSspupyrev #ifndef NDEBUG 12547cc2493dSspupyrev // Verify the result 12557cc2493dSspupyrev verifyWeights(Func); 12567cc2493dSspupyrev #endif 12577cc2493dSspupyrev } 1258