17cc2493dSspupyrev //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
27cc2493dSspupyrev //
37cc2493dSspupyrev // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47cc2493dSspupyrev // See https://llvm.org/LICENSE.txt for license information.
57cc2493dSspupyrev // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67cc2493dSspupyrev //
77cc2493dSspupyrev //===----------------------------------------------------------------------===//
87cc2493dSspupyrev //
97cc2493dSspupyrev // This file implements a profile inference algorithm. Given an incomplete and
107cc2493dSspupyrev // possibly imprecise block counts, the algorithm reconstructs realistic block
117cc2493dSspupyrev // and edge counts that satisfy flow conservation rules, while minimally modify
127cc2493dSspupyrev // input block counts.
137cc2493dSspupyrev //
147cc2493dSspupyrev //===----------------------------------------------------------------------===//
157cc2493dSspupyrev 
167cc2493dSspupyrev #include "llvm/Transforms/Utils/SampleProfileInference.h"
177cc2493dSspupyrev #include "llvm/Support/Debug.h"
187cc2493dSspupyrev #include <queue>
197cc2493dSspupyrev #include <set>
207cc2493dSspupyrev 
217cc2493dSspupyrev using namespace llvm;
227cc2493dSspupyrev #define DEBUG_TYPE "sample-profile-inference"
237cc2493dSspupyrev 
247cc2493dSspupyrev namespace {
257cc2493dSspupyrev 
267cc2493dSspupyrev /// A value indicating an infinite flow/capacity/weight of a block/edge.
277cc2493dSspupyrev /// Not using numeric_limits<int64_t>::max(), as the values can be summed up
287cc2493dSspupyrev /// during the execution.
297cc2493dSspupyrev static constexpr int64_t INF = ((int64_t)1) << 50;
307cc2493dSspupyrev 
317cc2493dSspupyrev /// The minimum-cost maximum flow algorithm.
327cc2493dSspupyrev ///
337cc2493dSspupyrev /// The algorithm finds the maximum flow of minimum cost on a given (directed)
347cc2493dSspupyrev /// network using a modified version of the classical Moore-Bellman-Ford
357cc2493dSspupyrev /// approach. The algorithm applies a number of augmentation iterations in which
367cc2493dSspupyrev /// flow is sent along paths of positive capacity from the source to the sink.
377cc2493dSspupyrev /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
387cc2493dSspupyrev /// where m is the number of edges, n is the number of vertices, and v(f) is the
397cc2493dSspupyrev /// value of the maximum flow. However, the observed running time on typical
407cc2493dSspupyrev /// instances is sub-quadratic, that is, o(n^2).
417cc2493dSspupyrev ///
427cc2493dSspupyrev /// The input is a set of edges with specified costs and capacities, and a pair
437cc2493dSspupyrev /// of nodes (source and sink). The output is the flow along each edge of the
447cc2493dSspupyrev /// minimum total cost respecting the given edge capacities.
457cc2493dSspupyrev class MinCostMaxFlow {
467cc2493dSspupyrev public:
477cc2493dSspupyrev   // Initialize algorithm's data structures for a network of a given size.
487cc2493dSspupyrev   void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
497cc2493dSspupyrev     Source = SourceNode;
507cc2493dSspupyrev     Target = SinkNode;
517cc2493dSspupyrev 
527cc2493dSspupyrev     Nodes = std::vector<Node>(NodeCount);
537cc2493dSspupyrev     Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
547cc2493dSspupyrev   }
557cc2493dSspupyrev 
567cc2493dSspupyrev   // Run the algorithm.
577cc2493dSspupyrev   int64_t run() {
587cc2493dSspupyrev     // Find an augmenting path and update the flow along the path
597cc2493dSspupyrev     size_t AugmentationIters = 0;
607cc2493dSspupyrev     while (findAugmentingPath()) {
617cc2493dSspupyrev       augmentFlowAlongPath();
627cc2493dSspupyrev       AugmentationIters++;
637cc2493dSspupyrev     }
647cc2493dSspupyrev 
657cc2493dSspupyrev     // Compute the total flow and its cost
667cc2493dSspupyrev     int64_t TotalCost = 0;
677cc2493dSspupyrev     int64_t TotalFlow = 0;
687cc2493dSspupyrev     for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
697cc2493dSspupyrev       for (auto &Edge : Edges[Src]) {
707cc2493dSspupyrev         if (Edge.Flow > 0) {
717cc2493dSspupyrev           TotalCost += Edge.Cost * Edge.Flow;
727cc2493dSspupyrev           if (Src == Source)
737cc2493dSspupyrev             TotalFlow += Edge.Flow;
747cc2493dSspupyrev         }
757cc2493dSspupyrev       }
767cc2493dSspupyrev     }
777cc2493dSspupyrev     LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
787cc2493dSspupyrev                       << " iterations with " << TotalFlow << " total flow"
797cc2493dSspupyrev                       << " of " << TotalCost << " cost\n");
8022d82949SKazu Hirata     (void)TotalFlow;
817cc2493dSspupyrev     return TotalCost;
827cc2493dSspupyrev   }
837cc2493dSspupyrev 
847cc2493dSspupyrev   /// Adding an edge to the network with a specified capacity and a cost.
857cc2493dSspupyrev   /// Multiple edges between a pair of nodes are allowed but self-edges
867cc2493dSspupyrev   /// are not supported.
877cc2493dSspupyrev   void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
887cc2493dSspupyrev     assert(Capacity > 0 && "adding an edge of zero capacity");
897cc2493dSspupyrev     assert(Src != Dst && "loop edge are not supported");
907cc2493dSspupyrev 
917cc2493dSspupyrev     Edge SrcEdge;
927cc2493dSspupyrev     SrcEdge.Dst = Dst;
937cc2493dSspupyrev     SrcEdge.Cost = Cost;
947cc2493dSspupyrev     SrcEdge.Capacity = Capacity;
957cc2493dSspupyrev     SrcEdge.Flow = 0;
967cc2493dSspupyrev     SrcEdge.RevEdgeIndex = Edges[Dst].size();
977cc2493dSspupyrev 
987cc2493dSspupyrev     Edge DstEdge;
997cc2493dSspupyrev     DstEdge.Dst = Src;
1007cc2493dSspupyrev     DstEdge.Cost = -Cost;
1017cc2493dSspupyrev     DstEdge.Capacity = 0;
1027cc2493dSspupyrev     DstEdge.Flow = 0;
1037cc2493dSspupyrev     DstEdge.RevEdgeIndex = Edges[Src].size();
1047cc2493dSspupyrev 
1057cc2493dSspupyrev     Edges[Src].push_back(SrcEdge);
1067cc2493dSspupyrev     Edges[Dst].push_back(DstEdge);
1077cc2493dSspupyrev   }
1087cc2493dSspupyrev 
1097cc2493dSspupyrev   /// Adding an edge to the network of infinite capacity and a given cost.
1107cc2493dSspupyrev   void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
1117cc2493dSspupyrev     addEdge(Src, Dst, INF, Cost);
1127cc2493dSspupyrev   }
1137cc2493dSspupyrev 
1147cc2493dSspupyrev   /// Get the total flow from a given source node.
1157cc2493dSspupyrev   /// Returns a list of pairs (target node, amount of flow to the target).
1167cc2493dSspupyrev   const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
1177cc2493dSspupyrev     std::vector<std::pair<uint64_t, int64_t>> Flow;
1187cc2493dSspupyrev     for (auto &Edge : Edges[Src]) {
1197cc2493dSspupyrev       if (Edge.Flow > 0)
1207cc2493dSspupyrev         Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
1217cc2493dSspupyrev     }
1227cc2493dSspupyrev     return Flow;
1237cc2493dSspupyrev   }
1247cc2493dSspupyrev 
1257cc2493dSspupyrev   /// Get the total flow between a pair of nodes.
1267cc2493dSspupyrev   int64_t getFlow(uint64_t Src, uint64_t Dst) const {
1277cc2493dSspupyrev     int64_t Flow = 0;
1287cc2493dSspupyrev     for (auto &Edge : Edges[Src]) {
1297cc2493dSspupyrev       if (Edge.Dst == Dst) {
1307cc2493dSspupyrev         Flow += Edge.Flow;
1317cc2493dSspupyrev       }
1327cc2493dSspupyrev     }
1337cc2493dSspupyrev     return Flow;
1347cc2493dSspupyrev   }
1357cc2493dSspupyrev 
1367cc2493dSspupyrev   /// A cost of increasing a block's count by one.
1377cc2493dSspupyrev   static constexpr int64_t AuxCostInc = 10;
1387cc2493dSspupyrev   /// A cost of decreasing a block's count by one.
1397cc2493dSspupyrev   static constexpr int64_t AuxCostDec = 20;
1407cc2493dSspupyrev   /// A cost of increasing a count of zero-weight block by one.
1417cc2493dSspupyrev   static constexpr int64_t AuxCostIncZero = 11;
1427cc2493dSspupyrev   /// A cost of increasing the entry block's count by one.
1437cc2493dSspupyrev   static constexpr int64_t AuxCostIncEntry = 40;
1447cc2493dSspupyrev   /// A cost of decreasing the entry block's count by one.
1457cc2493dSspupyrev   static constexpr int64_t AuxCostDecEntry = 10;
1467cc2493dSspupyrev   /// A cost of taking an unlikely jump.
1477cc2493dSspupyrev   static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 20;
1487cc2493dSspupyrev 
1497cc2493dSspupyrev private:
1507cc2493dSspupyrev   /// Check for existence of an augmenting path with a positive capacity.
1517cc2493dSspupyrev   bool findAugmentingPath() {
1527cc2493dSspupyrev     // Initialize data structures
1537cc2493dSspupyrev     for (auto &Node : Nodes) {
1547cc2493dSspupyrev       Node.Distance = INF;
1557cc2493dSspupyrev       Node.ParentNode = uint64_t(-1);
1567cc2493dSspupyrev       Node.ParentEdgeIndex = uint64_t(-1);
1577cc2493dSspupyrev       Node.Taken = false;
1587cc2493dSspupyrev     }
1597cc2493dSspupyrev 
1607cc2493dSspupyrev     std::queue<uint64_t> Queue;
1617cc2493dSspupyrev     Queue.push(Source);
1627cc2493dSspupyrev     Nodes[Source].Distance = 0;
1637cc2493dSspupyrev     Nodes[Source].Taken = true;
1647cc2493dSspupyrev     while (!Queue.empty()) {
1657cc2493dSspupyrev       uint64_t Src = Queue.front();
1667cc2493dSspupyrev       Queue.pop();
1677cc2493dSspupyrev       Nodes[Src].Taken = false;
1687cc2493dSspupyrev       // Although the residual network contains edges with negative costs
1697cc2493dSspupyrev       // (in particular, backward edges), it can be shown that there are no
1707cc2493dSspupyrev       // negative-weight cycles and the following two invariants are maintained:
1717cc2493dSspupyrev       // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
1727cc2493dSspupyrev       // where Dist is the length of the shortest path between two nodes. This
1737cc2493dSspupyrev       // allows to prune the search-space of the path-finding algorithm using
1747cc2493dSspupyrev       // the following early-stop criteria:
1757cc2493dSspupyrev       // -- If we find a path with zero-distance from Source to Target, stop the
1767cc2493dSspupyrev       //    search, as the path is the shortest since Dist[Source, Target] >= 0;
1777cc2493dSspupyrev       // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
1787cc2493dSspupyrev       //    process node V, as it is guaranteed _not_ to be on a shortest path
1797cc2493dSspupyrev       //    from Source to Target; it follows from inequalities
1807cc2493dSspupyrev       //    Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
1817cc2493dSspupyrev       //                         >= Dist[Source, V]
1827cc2493dSspupyrev       if (Nodes[Target].Distance == 0)
1837cc2493dSspupyrev         break;
1847cc2493dSspupyrev       if (Nodes[Src].Distance > Nodes[Target].Distance)
1857cc2493dSspupyrev         continue;
1867cc2493dSspupyrev 
1877cc2493dSspupyrev       // Process adjacent edges
1887cc2493dSspupyrev       for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
1897cc2493dSspupyrev         auto &Edge = Edges[Src][EdgeIdx];
1907cc2493dSspupyrev         if (Edge.Flow < Edge.Capacity) {
1917cc2493dSspupyrev           uint64_t Dst = Edge.Dst;
1927cc2493dSspupyrev           int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
1937cc2493dSspupyrev           if (Nodes[Dst].Distance > NewDistance) {
1947cc2493dSspupyrev             // Update the distance and the parent node/edge
1957cc2493dSspupyrev             Nodes[Dst].Distance = NewDistance;
1967cc2493dSspupyrev             Nodes[Dst].ParentNode = Src;
1977cc2493dSspupyrev             Nodes[Dst].ParentEdgeIndex = EdgeIdx;
1987cc2493dSspupyrev             // Add the node to the queue, if it is not there yet
1997cc2493dSspupyrev             if (!Nodes[Dst].Taken) {
2007cc2493dSspupyrev               Queue.push(Dst);
2017cc2493dSspupyrev               Nodes[Dst].Taken = true;
2027cc2493dSspupyrev             }
2037cc2493dSspupyrev           }
2047cc2493dSspupyrev         }
2057cc2493dSspupyrev       }
2067cc2493dSspupyrev     }
2077cc2493dSspupyrev 
2087cc2493dSspupyrev     return Nodes[Target].Distance != INF;
2097cc2493dSspupyrev   }
2107cc2493dSspupyrev 
2117cc2493dSspupyrev   /// Update the current flow along the augmenting path.
2127cc2493dSspupyrev   void augmentFlowAlongPath() {
2137cc2493dSspupyrev     // Find path capacity
2147cc2493dSspupyrev     int64_t PathCapacity = INF;
2157cc2493dSspupyrev     uint64_t Now = Target;
2167cc2493dSspupyrev     while (Now != Source) {
2177cc2493dSspupyrev       uint64_t Pred = Nodes[Now].ParentNode;
2187cc2493dSspupyrev       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
2197cc2493dSspupyrev       PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow);
2207cc2493dSspupyrev       Now = Pred;
2217cc2493dSspupyrev     }
2227cc2493dSspupyrev 
223*93a2c291Sspupyrev     assert(PathCapacity > 0 && "found an incorrect augmenting path");
2247cc2493dSspupyrev 
2257cc2493dSspupyrev     // Update the flow along the path
2267cc2493dSspupyrev     Now = Target;
2277cc2493dSspupyrev     while (Now != Source) {
2287cc2493dSspupyrev       uint64_t Pred = Nodes[Now].ParentNode;
2297cc2493dSspupyrev       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
2307cc2493dSspupyrev       auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
2317cc2493dSspupyrev 
2327cc2493dSspupyrev       Edge.Flow += PathCapacity;
2337cc2493dSspupyrev       RevEdge.Flow -= PathCapacity;
2347cc2493dSspupyrev 
2357cc2493dSspupyrev       Now = Pred;
2367cc2493dSspupyrev     }
2377cc2493dSspupyrev   }
2387cc2493dSspupyrev 
2397cc2493dSspupyrev   /// An node in a flow network.
2407cc2493dSspupyrev   struct Node {
2417cc2493dSspupyrev     /// The cost of the cheapest path from the source to the current node.
2427cc2493dSspupyrev     int64_t Distance;
2437cc2493dSspupyrev     /// The node preceding the current one in the path.
2447cc2493dSspupyrev     uint64_t ParentNode;
2457cc2493dSspupyrev     /// The index of the edge between ParentNode and the current node.
2467cc2493dSspupyrev     uint64_t ParentEdgeIndex;
2477cc2493dSspupyrev     /// An indicator of whether the current node is in a queue.
2487cc2493dSspupyrev     bool Taken;
2497cc2493dSspupyrev   };
2507cc2493dSspupyrev   /// An edge in a flow network.
2517cc2493dSspupyrev   struct Edge {
2527cc2493dSspupyrev     /// The cost of the edge.
2537cc2493dSspupyrev     int64_t Cost;
2547cc2493dSspupyrev     /// The capacity of the edge.
2557cc2493dSspupyrev     int64_t Capacity;
2567cc2493dSspupyrev     /// The current flow on the edge.
2577cc2493dSspupyrev     int64_t Flow;
2587cc2493dSspupyrev     /// The destination node of the edge.
2597cc2493dSspupyrev     uint64_t Dst;
2607cc2493dSspupyrev     /// The index of the reverse edge between Dst and the current node.
2617cc2493dSspupyrev     uint64_t RevEdgeIndex;
2627cc2493dSspupyrev   };
2637cc2493dSspupyrev 
2647cc2493dSspupyrev   /// The set of network nodes.
2657cc2493dSspupyrev   std::vector<Node> Nodes;
2667cc2493dSspupyrev   /// The set of network edges.
2677cc2493dSspupyrev   std::vector<std::vector<Edge>> Edges;
2687cc2493dSspupyrev   /// Source node of the flow.
2697cc2493dSspupyrev   uint64_t Source;
2707cc2493dSspupyrev   /// Target (sink) node of the flow.
2717cc2493dSspupyrev   uint64_t Target;
2727cc2493dSspupyrev };
2737cc2493dSspupyrev 
274*93a2c291Sspupyrev /// A post-processing adjustment of control flow. It applies two steps by
275*93a2c291Sspupyrev /// rerouting some flow and making it more realistic:
276*93a2c291Sspupyrev ///
277*93a2c291Sspupyrev /// - First, it removes all isolated components ("islands") with a positive flow
278*93a2c291Sspupyrev ///   that are unreachable from the entry block. For every such component, we
279*93a2c291Sspupyrev ///   find the shortest from the entry to an exit passing through the component,
280*93a2c291Sspupyrev ///   and increase the flow by one unit along the path.
281*93a2c291Sspupyrev ///
282*93a2c291Sspupyrev /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
283*93a2c291Sspupyrev ///   with no sampled counts. Then it rebalnces the flow that goes through such
284*93a2c291Sspupyrev ///   a subgraph so that each branch is taken with probability 50%.
285*93a2c291Sspupyrev ///   An unknown subgraph is such that for every two nodes u and v:
286*93a2c291Sspupyrev ///     - u dominates v and u is not unknown;
287*93a2c291Sspupyrev ///     - v post-dominates u; and
288*93a2c291Sspupyrev ///     - all inner-nodes of all (u,v)-paths are unknown.
289*93a2c291Sspupyrev ///
29098dd2f9eSspupyrev class FlowAdjuster {
29198dd2f9eSspupyrev public:
29298dd2f9eSspupyrev   FlowAdjuster(FlowFunction &Func) : Func(Func) {
29398dd2f9eSspupyrev     assert(Func.Blocks[Func.Entry].isEntry() &&
29498dd2f9eSspupyrev            "incorrect index of the entry block");
29598dd2f9eSspupyrev   }
29698dd2f9eSspupyrev 
29798dd2f9eSspupyrev   // Run the post-processing
29898dd2f9eSspupyrev   void run() {
299*93a2c291Sspupyrev     /// Adjust the flow to get rid of isolated components.
30098dd2f9eSspupyrev     joinIsolatedComponents();
301*93a2c291Sspupyrev 
302*93a2c291Sspupyrev     /// Rebalance the flow inside unknown subgraphs.
303*93a2c291Sspupyrev     rebalanceUnknownSubgraphs();
30498dd2f9eSspupyrev   }
30598dd2f9eSspupyrev 
306*93a2c291Sspupyrev   /// The probability for the first successor of a unknown subgraph
307*93a2c291Sspupyrev   static constexpr double UnknownFirstSuccProbability = 0.5;
308*93a2c291Sspupyrev 
30998dd2f9eSspupyrev private:
31098dd2f9eSspupyrev   void joinIsolatedComponents() {
31198dd2f9eSspupyrev     // Find blocks that are reachable from the source
31298dd2f9eSspupyrev     auto Visited = std::vector<bool>(NumBlocks(), false);
31398dd2f9eSspupyrev     findReachable(Func.Entry, Visited);
31498dd2f9eSspupyrev 
31598dd2f9eSspupyrev     // Iterate over all non-reachable blocks and adjust their weights
31698dd2f9eSspupyrev     for (uint64_t I = 0; I < NumBlocks(); I++) {
31798dd2f9eSspupyrev       auto &Block = Func.Blocks[I];
31898dd2f9eSspupyrev       if (Block.Flow > 0 && !Visited[I]) {
31998dd2f9eSspupyrev         // Find a path from the entry to an exit passing through the block I
32098dd2f9eSspupyrev         auto Path = findShortestPath(I);
32198dd2f9eSspupyrev         // Increase the flow along the path
32298dd2f9eSspupyrev         assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
32398dd2f9eSspupyrev                "incorrectly computed path adjusting control flow");
32498dd2f9eSspupyrev         Func.Blocks[Func.Entry].Flow += 1;
32598dd2f9eSspupyrev         for (auto &Jump : Path) {
32698dd2f9eSspupyrev           Jump->Flow += 1;
32798dd2f9eSspupyrev           Func.Blocks[Jump->Target].Flow += 1;
32898dd2f9eSspupyrev           // Update reachability
32998dd2f9eSspupyrev           findReachable(Jump->Target, Visited);
33098dd2f9eSspupyrev         }
33198dd2f9eSspupyrev       }
33298dd2f9eSspupyrev     }
33398dd2f9eSspupyrev   }
33498dd2f9eSspupyrev 
335*93a2c291Sspupyrev   /// Run BFS from a given block along the jumps with a positive flow and mark
33698dd2f9eSspupyrev   /// all reachable blocks.
33798dd2f9eSspupyrev   void findReachable(uint64_t Src, std::vector<bool> &Visited) {
33898dd2f9eSspupyrev     if (Visited[Src])
33998dd2f9eSspupyrev       return;
34098dd2f9eSspupyrev     std::queue<uint64_t> Queue;
34198dd2f9eSspupyrev     Queue.push(Src);
34298dd2f9eSspupyrev     Visited[Src] = true;
34398dd2f9eSspupyrev     while (!Queue.empty()) {
34498dd2f9eSspupyrev       Src = Queue.front();
34598dd2f9eSspupyrev       Queue.pop();
34698dd2f9eSspupyrev       for (auto Jump : Func.Blocks[Src].SuccJumps) {
34798dd2f9eSspupyrev         uint64_t Dst = Jump->Target;
34898dd2f9eSspupyrev         if (Jump->Flow > 0 && !Visited[Dst]) {
34998dd2f9eSspupyrev           Queue.push(Dst);
35098dd2f9eSspupyrev           Visited[Dst] = true;
35198dd2f9eSspupyrev         }
35298dd2f9eSspupyrev       }
35398dd2f9eSspupyrev     }
35498dd2f9eSspupyrev   }
35598dd2f9eSspupyrev 
35698dd2f9eSspupyrev   /// Find the shortest path from the entry block to an exit block passing
35798dd2f9eSspupyrev   /// through a given block.
35898dd2f9eSspupyrev   std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
35998dd2f9eSspupyrev     // A path from the entry block to BlockIdx
36098dd2f9eSspupyrev     auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
36198dd2f9eSspupyrev     // A path from BlockIdx to an exit block
36298dd2f9eSspupyrev     auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
36398dd2f9eSspupyrev 
36498dd2f9eSspupyrev     // Concatenate the two paths
36598dd2f9eSspupyrev     std::vector<FlowJump *> Result;
36698dd2f9eSspupyrev     Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
36798dd2f9eSspupyrev     Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
36898dd2f9eSspupyrev     return Result;
36998dd2f9eSspupyrev   }
37098dd2f9eSspupyrev 
37198dd2f9eSspupyrev   /// Apply the Dijkstra algorithm to find the shortest path from a given
37298dd2f9eSspupyrev   /// Source to a given Target block.
37398dd2f9eSspupyrev   /// If Target == -1, then the path ends at an exit block.
37498dd2f9eSspupyrev   std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
37598dd2f9eSspupyrev     // Quit early, if possible
37698dd2f9eSspupyrev     if (Source == Target)
37798dd2f9eSspupyrev       return std::vector<FlowJump *>();
37898dd2f9eSspupyrev     if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
37998dd2f9eSspupyrev       return std::vector<FlowJump *>();
38098dd2f9eSspupyrev 
38198dd2f9eSspupyrev     // Initialize data structures
38298dd2f9eSspupyrev     auto Distance = std::vector<int64_t>(NumBlocks(), INF);
38398dd2f9eSspupyrev     auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
38498dd2f9eSspupyrev     Distance[Source] = 0;
38598dd2f9eSspupyrev     std::set<std::pair<uint64_t, uint64_t>> Queue;
38698dd2f9eSspupyrev     Queue.insert(std::make_pair(Distance[Source], Source));
38798dd2f9eSspupyrev 
38898dd2f9eSspupyrev     // Run the Dijkstra algorithm
38998dd2f9eSspupyrev     while (!Queue.empty()) {
39098dd2f9eSspupyrev       uint64_t Src = Queue.begin()->second;
39198dd2f9eSspupyrev       Queue.erase(Queue.begin());
39298dd2f9eSspupyrev       // If we found a solution, quit early
39398dd2f9eSspupyrev       if (Src == Target ||
39498dd2f9eSspupyrev           (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
39598dd2f9eSspupyrev         break;
39698dd2f9eSspupyrev 
39798dd2f9eSspupyrev       for (auto Jump : Func.Blocks[Src].SuccJumps) {
39898dd2f9eSspupyrev         uint64_t Dst = Jump->Target;
39998dd2f9eSspupyrev         int64_t JumpDist = jumpDistance(Jump);
40098dd2f9eSspupyrev         if (Distance[Dst] > Distance[Src] + JumpDist) {
40198dd2f9eSspupyrev           Queue.erase(std::make_pair(Distance[Dst], Dst));
40298dd2f9eSspupyrev 
40398dd2f9eSspupyrev           Distance[Dst] = Distance[Src] + JumpDist;
40498dd2f9eSspupyrev           Parent[Dst] = Jump;
40598dd2f9eSspupyrev 
40698dd2f9eSspupyrev           Queue.insert(std::make_pair(Distance[Dst], Dst));
40798dd2f9eSspupyrev         }
40898dd2f9eSspupyrev       }
40998dd2f9eSspupyrev     }
41098dd2f9eSspupyrev     // If Target is not provided, find the closest exit block
41198dd2f9eSspupyrev     if (Target == AnyExitBlock) {
41298dd2f9eSspupyrev       for (uint64_t I = 0; I < NumBlocks(); I++) {
41398dd2f9eSspupyrev         if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
41498dd2f9eSspupyrev           if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
41598dd2f9eSspupyrev             Target = I;
41698dd2f9eSspupyrev           }
41798dd2f9eSspupyrev         }
41898dd2f9eSspupyrev       }
41998dd2f9eSspupyrev     }
42098dd2f9eSspupyrev     assert(Parent[Target] != nullptr && "a path does not exist");
42198dd2f9eSspupyrev 
42298dd2f9eSspupyrev     // Extract the constructed path
42398dd2f9eSspupyrev     std::vector<FlowJump *> Result;
42498dd2f9eSspupyrev     uint64_t Now = Target;
42598dd2f9eSspupyrev     while (Now != Source) {
42698dd2f9eSspupyrev       assert(Now == Parent[Now]->Target && "incorrect parent jump");
42798dd2f9eSspupyrev       Result.push_back(Parent[Now]);
42898dd2f9eSspupyrev       Now = Parent[Now]->Source;
42998dd2f9eSspupyrev     }
43098dd2f9eSspupyrev     // Reverse the path, since it is extracted from Target to Source
43198dd2f9eSspupyrev     std::reverse(Result.begin(), Result.end());
43298dd2f9eSspupyrev     return Result;
43398dd2f9eSspupyrev   }
43498dd2f9eSspupyrev 
43598dd2f9eSspupyrev   /// A distance of a path for a given jump.
43698dd2f9eSspupyrev   /// In order to incite the path to use blocks/jumps with large positive flow,
43798dd2f9eSspupyrev   /// and avoid changing branch probability of outgoing edges drastically,
43898dd2f9eSspupyrev   /// set the distance as follows:
43998dd2f9eSspupyrev   ///   if Jump.Flow > 0, then distance = max(100 - Jump->Flow, 0)
44098dd2f9eSspupyrev   ///   if Block.Weight > 0, then distance = 1
44198dd2f9eSspupyrev   ///   otherwise distance >> 1
44298dd2f9eSspupyrev   int64_t jumpDistance(FlowJump *Jump) const {
44398dd2f9eSspupyrev     int64_t BaseDistance = 100;
44498dd2f9eSspupyrev     if (Jump->IsUnlikely)
44598dd2f9eSspupyrev       return MinCostMaxFlow::AuxCostUnlikely;
44698dd2f9eSspupyrev     if (Jump->Flow > 0)
44798dd2f9eSspupyrev       return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0);
44898dd2f9eSspupyrev     if (Func.Blocks[Jump->Target].Weight > 0)
44998dd2f9eSspupyrev       return BaseDistance;
45098dd2f9eSspupyrev     return BaseDistance * (NumBlocks() + 1);
45198dd2f9eSspupyrev   };
45298dd2f9eSspupyrev 
45398dd2f9eSspupyrev   uint64_t NumBlocks() const { return Func.Blocks.size(); }
45498dd2f9eSspupyrev 
455*93a2c291Sspupyrev   /// Rebalance unknown subgraphs so as each branch splits with probabilities
456*93a2c291Sspupyrev   /// UnknownFirstSuccProbability and 1 - UnknownFirstSuccProbability
457*93a2c291Sspupyrev   void rebalanceUnknownSubgraphs() {
458*93a2c291Sspupyrev     assert(UnknownFirstSuccProbability >= 0.0 &&
459*93a2c291Sspupyrev            UnknownFirstSuccProbability <= 1.0 &&
460*93a2c291Sspupyrev            "the share of the unknown successor should be between 0 and 1");
461*93a2c291Sspupyrev     // Try to find unknown subgraphs from each non-unknown block
462*93a2c291Sspupyrev     for (uint64_t I = 0; I < Func.Blocks.size(); I++) {
463*93a2c291Sspupyrev       auto SrcBlock = &Func.Blocks[I];
464*93a2c291Sspupyrev       // Do not attempt to find unknown successors from a unknown or a
465*93a2c291Sspupyrev       // zero-flow block
466*93a2c291Sspupyrev       if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
467*93a2c291Sspupyrev         continue;
468*93a2c291Sspupyrev 
469*93a2c291Sspupyrev       std::vector<FlowBlock *> UnknownSuccs;
470*93a2c291Sspupyrev       FlowBlock *DstBlock = nullptr;
471*93a2c291Sspupyrev       // Find a unknown subgraphs starting at block SrcBlock
472*93a2c291Sspupyrev       if (!findUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs))
473*93a2c291Sspupyrev         continue;
474*93a2c291Sspupyrev       // At the moment, we do not rebalance subgraphs containing cycles among
475*93a2c291Sspupyrev       // unknown blocks
476*93a2c291Sspupyrev       if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownSuccs))
477*93a2c291Sspupyrev         continue;
478*93a2c291Sspupyrev 
479*93a2c291Sspupyrev       // Rebalance the flow
480*93a2c291Sspupyrev       rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs);
481*93a2c291Sspupyrev     }
482*93a2c291Sspupyrev   }
483*93a2c291Sspupyrev 
484*93a2c291Sspupyrev   /// Find a unknown subgraph starting at block SrcBlock.
485*93a2c291Sspupyrev   /// If the search is successful, the method sets DstBlock and UnknownSuccs.
486*93a2c291Sspupyrev   bool findUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *&DstBlock,
487*93a2c291Sspupyrev                            std::vector<FlowBlock *> &UnknownSuccs) {
488*93a2c291Sspupyrev     // Run BFS from SrcBlock and make sure all paths are going through unknown
489*93a2c291Sspupyrev     // blocks and end at a non-unknown DstBlock
490*93a2c291Sspupyrev     auto Visited = std::vector<bool>(NumBlocks(), false);
491*93a2c291Sspupyrev     std::queue<uint64_t> Queue;
492*93a2c291Sspupyrev     DstBlock = nullptr;
493*93a2c291Sspupyrev 
494*93a2c291Sspupyrev     Queue.push(SrcBlock->Index);
495*93a2c291Sspupyrev     Visited[SrcBlock->Index] = true;
496*93a2c291Sspupyrev     while (!Queue.empty()) {
497*93a2c291Sspupyrev       auto &Block = Func.Blocks[Queue.front()];
498*93a2c291Sspupyrev       Queue.pop();
499*93a2c291Sspupyrev       // Process blocks reachable from Block
500*93a2c291Sspupyrev       for (auto Jump : Block.SuccJumps) {
501*93a2c291Sspupyrev         uint64_t Dst = Jump->Target;
502*93a2c291Sspupyrev         if (Visited[Dst])
503*93a2c291Sspupyrev           continue;
504*93a2c291Sspupyrev         Visited[Dst] = true;
505*93a2c291Sspupyrev         if (!Func.Blocks[Dst].UnknownWeight) {
506*93a2c291Sspupyrev           // If we see non-unique non-unknown block reachable from SrcBlock,
507*93a2c291Sspupyrev           // stop processing and skip rebalancing
508*93a2c291Sspupyrev           FlowBlock *CandidateDstBlock = &Func.Blocks[Dst];
509*93a2c291Sspupyrev           if (DstBlock != nullptr && DstBlock != CandidateDstBlock)
510*93a2c291Sspupyrev             return false;
511*93a2c291Sspupyrev           DstBlock = CandidateDstBlock;
512*93a2c291Sspupyrev         } else {
513*93a2c291Sspupyrev           Queue.push(Dst);
514*93a2c291Sspupyrev           UnknownSuccs.push_back(&Func.Blocks[Dst]);
515*93a2c291Sspupyrev         }
516*93a2c291Sspupyrev       }
517*93a2c291Sspupyrev     }
518*93a2c291Sspupyrev 
519*93a2c291Sspupyrev     // If the list of unknown blocks is empty, we don't need rebalancing
520*93a2c291Sspupyrev     if (UnknownSuccs.empty())
521*93a2c291Sspupyrev       return false;
522*93a2c291Sspupyrev     // If all reachable nodes from SrcBlock are unknown, skip rebalancing
523*93a2c291Sspupyrev     if (DstBlock == nullptr)
524*93a2c291Sspupyrev       return false;
525*93a2c291Sspupyrev     // If any of the unknown blocks is an exit block, skip rebalancing
526*93a2c291Sspupyrev     for (auto Block : UnknownSuccs) {
527*93a2c291Sspupyrev       if (Block->isExit())
528*93a2c291Sspupyrev         return false;
529*93a2c291Sspupyrev     }
530*93a2c291Sspupyrev 
531*93a2c291Sspupyrev     return true;
532*93a2c291Sspupyrev   }
533*93a2c291Sspupyrev 
534*93a2c291Sspupyrev   /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
535*93a2c291Sspupyrev   /// UnknownSuccs in the topological order (so that all jumps are "forward").
536*93a2c291Sspupyrev   bool isAcyclicSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock,
537*93a2c291Sspupyrev                          std::vector<FlowBlock *> &UnknownSuccs) {
538*93a2c291Sspupyrev     // Extract local in-degrees in the considered subgraph
539*93a2c291Sspupyrev     auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
540*93a2c291Sspupyrev     for (auto Jump : SrcBlock->SuccJumps) {
541*93a2c291Sspupyrev       LocalInDegree[Jump->Target]++;
542*93a2c291Sspupyrev     }
543*93a2c291Sspupyrev     for (uint64_t I = 0; I < UnknownSuccs.size(); I++) {
544*93a2c291Sspupyrev       for (auto Jump : UnknownSuccs[I]->SuccJumps) {
545*93a2c291Sspupyrev         LocalInDegree[Jump->Target]++;
546*93a2c291Sspupyrev       }
547*93a2c291Sspupyrev     }
548*93a2c291Sspupyrev     // A loop containing SrcBlock
549*93a2c291Sspupyrev     if (LocalInDegree[SrcBlock->Index] > 0)
550*93a2c291Sspupyrev       return false;
551*93a2c291Sspupyrev 
552*93a2c291Sspupyrev     std::vector<FlowBlock *> AcyclicOrder;
553*93a2c291Sspupyrev     std::queue<uint64_t> Queue;
554*93a2c291Sspupyrev     Queue.push(SrcBlock->Index);
555*93a2c291Sspupyrev     while (!Queue.empty()) {
556*93a2c291Sspupyrev       auto &Block = Func.Blocks[Queue.front()];
557*93a2c291Sspupyrev       Queue.pop();
558*93a2c291Sspupyrev       // Stop propagation once we reach DstBlock
559*93a2c291Sspupyrev       if (Block.Index == DstBlock->Index)
560*93a2c291Sspupyrev         break;
561*93a2c291Sspupyrev 
562*93a2c291Sspupyrev       AcyclicOrder.push_back(&Block);
563*93a2c291Sspupyrev       // Add to the queue all successors with zero local in-degree
564*93a2c291Sspupyrev       for (auto Jump : Block.SuccJumps) {
565*93a2c291Sspupyrev         uint64_t Dst = Jump->Target;
566*93a2c291Sspupyrev         LocalInDegree[Dst]--;
567*93a2c291Sspupyrev         if (LocalInDegree[Dst] == 0) {
568*93a2c291Sspupyrev           Queue.push(Dst);
569*93a2c291Sspupyrev         }
570*93a2c291Sspupyrev       }
571*93a2c291Sspupyrev     }
572*93a2c291Sspupyrev 
573*93a2c291Sspupyrev     // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
574*93a2c291Sspupyrev     // of all blocks
575*93a2c291Sspupyrev     if (UnknownSuccs.size() + 1 != AcyclicOrder.size())
576*93a2c291Sspupyrev       return false;
577*93a2c291Sspupyrev     UnknownSuccs = AcyclicOrder;
578*93a2c291Sspupyrev     return true;
579*93a2c291Sspupyrev   }
580*93a2c291Sspupyrev 
581*93a2c291Sspupyrev   /// Rebalance a given subgraph.
582*93a2c291Sspupyrev   void rebalanceUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock,
583*93a2c291Sspupyrev                                 std::vector<FlowBlock *> &UnknownSuccs) {
584*93a2c291Sspupyrev     assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
585*93a2c291Sspupyrev     assert(UnknownSuccs.front() == SrcBlock && "incorrect order of unknowns");
586*93a2c291Sspupyrev 
587*93a2c291Sspupyrev     for (auto Block : UnknownSuccs) {
588*93a2c291Sspupyrev       // Block's flow is the sum of incoming flows
589*93a2c291Sspupyrev       uint64_t TotalFlow = 0;
590*93a2c291Sspupyrev       if (Block == SrcBlock) {
591*93a2c291Sspupyrev         TotalFlow = Block->Flow;
592*93a2c291Sspupyrev       } else {
593*93a2c291Sspupyrev         for (auto Jump : Block->PredJumps) {
594*93a2c291Sspupyrev           TotalFlow += Jump->Flow;
595*93a2c291Sspupyrev         }
596*93a2c291Sspupyrev         Block->Flow = TotalFlow;
597*93a2c291Sspupyrev       }
598*93a2c291Sspupyrev 
599*93a2c291Sspupyrev       // Process all successor jumps and update corresponding flow values
600*93a2c291Sspupyrev       for (uint64_t I = 0; I < Block->SuccJumps.size(); I++) {
601*93a2c291Sspupyrev         auto Jump = Block->SuccJumps[I];
602*93a2c291Sspupyrev         if (I + 1 == Block->SuccJumps.size()) {
603*93a2c291Sspupyrev           Jump->Flow = TotalFlow;
604*93a2c291Sspupyrev           continue;
605*93a2c291Sspupyrev         }
606*93a2c291Sspupyrev         uint64_t Flow = uint64_t(TotalFlow * UnknownFirstSuccProbability);
607*93a2c291Sspupyrev         Jump->Flow = Flow;
608*93a2c291Sspupyrev         TotalFlow -= Flow;
609*93a2c291Sspupyrev       }
610*93a2c291Sspupyrev     }
611*93a2c291Sspupyrev   }
612*93a2c291Sspupyrev 
61398dd2f9eSspupyrev   /// A constant indicating an arbitrary exit block of a function.
61498dd2f9eSspupyrev   static constexpr uint64_t AnyExitBlock = uint64_t(-1);
61598dd2f9eSspupyrev 
61698dd2f9eSspupyrev   /// The function.
61798dd2f9eSspupyrev   FlowFunction &Func;
61898dd2f9eSspupyrev };
61998dd2f9eSspupyrev 
6207cc2493dSspupyrev /// Initializing flow network for a given function.
6217cc2493dSspupyrev ///
6227cc2493dSspupyrev /// Every block is split into three nodes that are responsible for (i) an
6237cc2493dSspupyrev /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
6247cc2493dSspupyrev /// reduction of the block weight.
6257cc2493dSspupyrev void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
6267cc2493dSspupyrev   uint64_t NumBlocks = Func.Blocks.size();
6277cc2493dSspupyrev   assert(NumBlocks > 1 && "Too few blocks in a function");
6287cc2493dSspupyrev   LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
6297cc2493dSspupyrev 
6307cc2493dSspupyrev   // Pre-process data: make sure the entry weight is at least 1
6317cc2493dSspupyrev   if (Func.Blocks[Func.Entry].Weight == 0) {
6327cc2493dSspupyrev     Func.Blocks[Func.Entry].Weight = 1;
6337cc2493dSspupyrev   }
6347cc2493dSspupyrev   // Introducing dummy source/sink pairs to allow flow circulation.
6357cc2493dSspupyrev   // The nodes corresponding to blocks of Func have indicies in the range
6367cc2493dSspupyrev   // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.
6377cc2493dSspupyrev   uint64_t S = 3 * NumBlocks;
6387cc2493dSspupyrev   uint64_t T = S + 1;
6397cc2493dSspupyrev   uint64_t S1 = S + 2;
6407cc2493dSspupyrev   uint64_t T1 = S + 3;
6417cc2493dSspupyrev 
6427cc2493dSspupyrev   Network.initialize(3 * NumBlocks + 4, S1, T1);
6437cc2493dSspupyrev 
6447cc2493dSspupyrev   // Create three nodes for every block of the function
6457cc2493dSspupyrev   for (uint64_t B = 0; B < NumBlocks; B++) {
6467cc2493dSspupyrev     auto &Block = Func.Blocks[B];
6477cc2493dSspupyrev     assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
6487cc2493dSspupyrev            "non-zero weight of a block w/o weight except for an entry");
6497cc2493dSspupyrev 
6507cc2493dSspupyrev     // Split every block into two nodes
6517cc2493dSspupyrev     uint64_t Bin = 3 * B;
6527cc2493dSspupyrev     uint64_t Bout = 3 * B + 1;
6537cc2493dSspupyrev     uint64_t Baux = 3 * B + 2;
6547cc2493dSspupyrev     if (Block.Weight > 0) {
6557cc2493dSspupyrev       Network.addEdge(S1, Bout, Block.Weight, 0);
6567cc2493dSspupyrev       Network.addEdge(Bin, T1, Block.Weight, 0);
6577cc2493dSspupyrev     }
6587cc2493dSspupyrev 
6597cc2493dSspupyrev     // Edges from S and to T
6607cc2493dSspupyrev     assert((!Block.isEntry() || !Block.isExit()) &&
6617cc2493dSspupyrev            "a block cannot be an entry and an exit");
6627cc2493dSspupyrev     if (Block.isEntry()) {
6637cc2493dSspupyrev       Network.addEdge(S, Bin, 0);
6647cc2493dSspupyrev     } else if (Block.isExit()) {
6657cc2493dSspupyrev       Network.addEdge(Bout, T, 0);
6667cc2493dSspupyrev     }
6677cc2493dSspupyrev 
6687cc2493dSspupyrev     // An auxiliary node to allow increase/reduction of block counts:
6697cc2493dSspupyrev     // We assume that decreasing block counts is more expensive than increasing,
6707cc2493dSspupyrev     // and thus, setting separate costs here. In the future we may want to tune
6717cc2493dSspupyrev     // the relative costs so as to maximize the quality of generated profiles.
6727cc2493dSspupyrev     int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc;
6737cc2493dSspupyrev     int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec;
6747cc2493dSspupyrev     if (Block.UnknownWeight) {
6757cc2493dSspupyrev       // Do not penalize changing weights of blocks w/o known profile count
6767cc2493dSspupyrev       AuxCostInc = 0;
6777cc2493dSspupyrev       AuxCostDec = 0;
6787cc2493dSspupyrev     } else {
6797cc2493dSspupyrev       // Increasing the count for "cold" blocks with zero initial count is more
6807cc2493dSspupyrev       // expensive than for "hot" ones
6817cc2493dSspupyrev       if (Block.Weight == 0) {
6827cc2493dSspupyrev         AuxCostInc = MinCostMaxFlow::AuxCostIncZero;
6837cc2493dSspupyrev       }
6847cc2493dSspupyrev       // Modifying the count of the entry block is expensive
6857cc2493dSspupyrev       if (Block.isEntry()) {
6867cc2493dSspupyrev         AuxCostInc = MinCostMaxFlow::AuxCostIncEntry;
6877cc2493dSspupyrev         AuxCostDec = MinCostMaxFlow::AuxCostDecEntry;
6887cc2493dSspupyrev       }
6897cc2493dSspupyrev     }
6907cc2493dSspupyrev     // For blocks with self-edges, do not penalize a reduction of the count,
6917cc2493dSspupyrev     // as all of the increase can be attributed to the self-edge
6927cc2493dSspupyrev     if (Block.HasSelfEdge) {
6937cc2493dSspupyrev       AuxCostDec = 0;
6947cc2493dSspupyrev     }
6957cc2493dSspupyrev 
6967cc2493dSspupyrev     Network.addEdge(Bin, Baux, AuxCostInc);
6977cc2493dSspupyrev     Network.addEdge(Baux, Bout, AuxCostInc);
6987cc2493dSspupyrev     if (Block.Weight > 0) {
6997cc2493dSspupyrev       Network.addEdge(Bout, Baux, AuxCostDec);
7007cc2493dSspupyrev       Network.addEdge(Baux, Bin, AuxCostDec);
7017cc2493dSspupyrev     }
7027cc2493dSspupyrev   }
7037cc2493dSspupyrev 
7047cc2493dSspupyrev   // Creating edges for every jump
7057cc2493dSspupyrev   for (auto &Jump : Func.Jumps) {
7067cc2493dSspupyrev     uint64_t Src = Jump.Source;
7077cc2493dSspupyrev     uint64_t Dst = Jump.Target;
7087cc2493dSspupyrev     if (Src != Dst) {
7097cc2493dSspupyrev       uint64_t SrcOut = 3 * Src + 1;
7107cc2493dSspupyrev       uint64_t DstIn = 3 * Dst;
7117cc2493dSspupyrev       uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
7127cc2493dSspupyrev       Network.addEdge(SrcOut, DstIn, Cost);
7137cc2493dSspupyrev     }
7147cc2493dSspupyrev   }
7157cc2493dSspupyrev 
7167cc2493dSspupyrev   // Make sure we have a valid flow circulation
7177cc2493dSspupyrev   Network.addEdge(T, S, 0);
7187cc2493dSspupyrev }
7197cc2493dSspupyrev 
7207cc2493dSspupyrev /// Extract resulting block and edge counts from the flow network.
7217cc2493dSspupyrev void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
7227cc2493dSspupyrev   uint64_t NumBlocks = Func.Blocks.size();
7237cc2493dSspupyrev 
7247cc2493dSspupyrev   // Extract resulting block counts
7257cc2493dSspupyrev   for (uint64_t Src = 0; Src < NumBlocks; Src++) {
7267cc2493dSspupyrev     auto &Block = Func.Blocks[Src];
7277cc2493dSspupyrev     uint64_t SrcOut = 3 * Src + 1;
7287cc2493dSspupyrev     int64_t Flow = 0;
7297cc2493dSspupyrev     for (auto &Adj : Network.getFlow(SrcOut)) {
7307cc2493dSspupyrev       uint64_t DstIn = Adj.first;
7317cc2493dSspupyrev       int64_t DstFlow = Adj.second;
7327cc2493dSspupyrev       bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
7337cc2493dSspupyrev       if (!IsAuxNode || Block.HasSelfEdge) {
7347cc2493dSspupyrev         Flow += DstFlow;
7357cc2493dSspupyrev       }
7367cc2493dSspupyrev     }
7377cc2493dSspupyrev     Block.Flow = Flow;
7387cc2493dSspupyrev     assert(Flow >= 0 && "negative block flow");
7397cc2493dSspupyrev   }
7407cc2493dSspupyrev 
7417cc2493dSspupyrev   // Extract resulting jump counts
7427cc2493dSspupyrev   for (auto &Jump : Func.Jumps) {
7437cc2493dSspupyrev     uint64_t Src = Jump.Source;
7447cc2493dSspupyrev     uint64_t Dst = Jump.Target;
7457cc2493dSspupyrev     int64_t Flow = 0;
7467cc2493dSspupyrev     if (Src != Dst) {
7477cc2493dSspupyrev       uint64_t SrcOut = 3 * Src + 1;
7487cc2493dSspupyrev       uint64_t DstIn = 3 * Dst;
7497cc2493dSspupyrev       Flow = Network.getFlow(SrcOut, DstIn);
7507cc2493dSspupyrev     } else {
7517cc2493dSspupyrev       uint64_t SrcOut = 3 * Src + 1;
7527cc2493dSspupyrev       uint64_t SrcAux = 3 * Src + 2;
7537cc2493dSspupyrev       int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
7547cc2493dSspupyrev       if (AuxFlow > 0)
7557cc2493dSspupyrev         Flow = AuxFlow;
7567cc2493dSspupyrev     }
7577cc2493dSspupyrev     Jump.Flow = Flow;
7587cc2493dSspupyrev     assert(Flow >= 0 && "negative jump flow");
7597cc2493dSspupyrev   }
7607cc2493dSspupyrev }
7617cc2493dSspupyrev 
7627cc2493dSspupyrev #ifndef NDEBUG
7637cc2493dSspupyrev /// Verify that the computed flow values satisfy flow conservation rules
7647cc2493dSspupyrev void verifyWeights(const FlowFunction &Func) {
7657cc2493dSspupyrev   const uint64_t NumBlocks = Func.Blocks.size();
7667cc2493dSspupyrev   auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
7677cc2493dSspupyrev   auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
7687cc2493dSspupyrev   for (auto &Jump : Func.Jumps) {
7697cc2493dSspupyrev     InFlow[Jump.Target] += Jump.Flow;
7707cc2493dSspupyrev     OutFlow[Jump.Source] += Jump.Flow;
7717cc2493dSspupyrev   }
7727cc2493dSspupyrev 
7737cc2493dSspupyrev   uint64_t TotalInFlow = 0;
7747cc2493dSspupyrev   uint64_t TotalOutFlow = 0;
7757cc2493dSspupyrev   for (uint64_t I = 0; I < NumBlocks; I++) {
7767cc2493dSspupyrev     auto &Block = Func.Blocks[I];
7777cc2493dSspupyrev     if (Block.isEntry()) {
7787cc2493dSspupyrev       TotalInFlow += Block.Flow;
7797cc2493dSspupyrev       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
7807cc2493dSspupyrev     } else if (Block.isExit()) {
7817cc2493dSspupyrev       TotalOutFlow += Block.Flow;
7827cc2493dSspupyrev       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
7837cc2493dSspupyrev     } else {
7847cc2493dSspupyrev       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
7857cc2493dSspupyrev       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
7867cc2493dSspupyrev     }
7877cc2493dSspupyrev   }
7887cc2493dSspupyrev   assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
78998dd2f9eSspupyrev 
79098dd2f9eSspupyrev   // Verify that there are no isolated flow components
79198dd2f9eSspupyrev   // One could modify FlowFunction to hold edges indexed by the sources, which
79298dd2f9eSspupyrev   // will avoid a creation of the object
79398dd2f9eSspupyrev   auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
79498dd2f9eSspupyrev   for (auto &Jump : Func.Jumps) {
79598dd2f9eSspupyrev     if (Jump.Flow > 0) {
79698dd2f9eSspupyrev       PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
79798dd2f9eSspupyrev     }
79898dd2f9eSspupyrev   }
79998dd2f9eSspupyrev 
800*93a2c291Sspupyrev   // Run BFS from the source along edges with positive flow
80198dd2f9eSspupyrev   std::queue<uint64_t> Queue;
80298dd2f9eSspupyrev   auto Visited = std::vector<bool>(NumBlocks, false);
80398dd2f9eSspupyrev   Queue.push(Func.Entry);
80498dd2f9eSspupyrev   Visited[Func.Entry] = true;
80598dd2f9eSspupyrev   while (!Queue.empty()) {
80698dd2f9eSspupyrev     uint64_t Src = Queue.front();
80798dd2f9eSspupyrev     Queue.pop();
80898dd2f9eSspupyrev     for (uint64_t Dst : PositiveFlowEdges[Src]) {
80998dd2f9eSspupyrev       if (!Visited[Dst]) {
81098dd2f9eSspupyrev         Queue.push(Dst);
81198dd2f9eSspupyrev         Visited[Dst] = true;
81298dd2f9eSspupyrev       }
81398dd2f9eSspupyrev     }
81498dd2f9eSspupyrev   }
81598dd2f9eSspupyrev 
81698dd2f9eSspupyrev   // Verify that every block that has a positive flow is reached from the source
81798dd2f9eSspupyrev   // along edges with a positive flow
81898dd2f9eSspupyrev   for (uint64_t I = 0; I < NumBlocks; I++) {
81998dd2f9eSspupyrev     auto &Block = Func.Blocks[I];
82098dd2f9eSspupyrev     assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
82198dd2f9eSspupyrev   }
8227cc2493dSspupyrev }
8237cc2493dSspupyrev #endif
8247cc2493dSspupyrev 
8257cc2493dSspupyrev } // end of anonymous namespace
8267cc2493dSspupyrev 
8277cc2493dSspupyrev /// Apply the profile inference algorithm for a given flow function
8287cc2493dSspupyrev void llvm::applyFlowInference(FlowFunction &Func) {
8297cc2493dSspupyrev   // Create and apply an inference network model
8307cc2493dSspupyrev   auto InferenceNetwork = MinCostMaxFlow();
8317cc2493dSspupyrev   initializeNetwork(InferenceNetwork, Func);
8327cc2493dSspupyrev   InferenceNetwork.run();
8337cc2493dSspupyrev 
8347cc2493dSspupyrev   // Extract flow values for every block and every edge
8357cc2493dSspupyrev   extractWeights(InferenceNetwork, Func);
8367cc2493dSspupyrev 
83798dd2f9eSspupyrev   // Post-processing adjustments to the flow
83898dd2f9eSspupyrev   auto Adjuster = FlowAdjuster(Func);
83998dd2f9eSspupyrev   Adjuster.run();
84098dd2f9eSspupyrev 
8417cc2493dSspupyrev #ifndef NDEBUG
8427cc2493dSspupyrev   // Verify the result
8437cc2493dSspupyrev   verifyWeights(Func);
8447cc2493dSspupyrev #endif
8457cc2493dSspupyrev }
846