17cc2493dSspupyrev //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
27cc2493dSspupyrev //
37cc2493dSspupyrev // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47cc2493dSspupyrev // See https://llvm.org/LICENSE.txt for license information.
57cc2493dSspupyrev // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67cc2493dSspupyrev //
77cc2493dSspupyrev //===----------------------------------------------------------------------===//
87cc2493dSspupyrev //
97cc2493dSspupyrev // This file implements a profile inference algorithm. Given an incomplete and
107cc2493dSspupyrev // possibly imprecise block counts, the algorithm reconstructs realistic block
117cc2493dSspupyrev // and edge counts that satisfy flow conservation rules, while minimally modify
127cc2493dSspupyrev // input block counts.
137cc2493dSspupyrev //
147cc2493dSspupyrev //===----------------------------------------------------------------------===//
157cc2493dSspupyrev 
167cc2493dSspupyrev #include "llvm/Transforms/Utils/SampleProfileInference.h"
177cc2493dSspupyrev #include "llvm/Support/Debug.h"
187cc2493dSspupyrev #include <queue>
197cc2493dSspupyrev #include <set>
207cc2493dSspupyrev 
217cc2493dSspupyrev using namespace llvm;
227cc2493dSspupyrev #define DEBUG_TYPE "sample-profile-inference"
237cc2493dSspupyrev 
247cc2493dSspupyrev namespace {
257cc2493dSspupyrev 
267cc2493dSspupyrev /// A value indicating an infinite flow/capacity/weight of a block/edge.
277cc2493dSspupyrev /// Not using numeric_limits<int64_t>::max(), as the values can be summed up
287cc2493dSspupyrev /// during the execution.
297cc2493dSspupyrev static constexpr int64_t INF = ((int64_t)1) << 50;
307cc2493dSspupyrev 
317cc2493dSspupyrev /// The minimum-cost maximum flow algorithm.
327cc2493dSspupyrev ///
337cc2493dSspupyrev /// The algorithm finds the maximum flow of minimum cost on a given (directed)
347cc2493dSspupyrev /// network using a modified version of the classical Moore-Bellman-Ford
357cc2493dSspupyrev /// approach. The algorithm applies a number of augmentation iterations in which
367cc2493dSspupyrev /// flow is sent along paths of positive capacity from the source to the sink.
377cc2493dSspupyrev /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
387cc2493dSspupyrev /// where m is the number of edges, n is the number of vertices, and v(f) is the
397cc2493dSspupyrev /// value of the maximum flow. However, the observed running time on typical
407cc2493dSspupyrev /// instances is sub-quadratic, that is, o(n^2).
417cc2493dSspupyrev ///
427cc2493dSspupyrev /// The input is a set of edges with specified costs and capacities, and a pair
437cc2493dSspupyrev /// of nodes (source and sink). The output is the flow along each edge of the
447cc2493dSspupyrev /// minimum total cost respecting the given edge capacities.
457cc2493dSspupyrev class MinCostMaxFlow {
467cc2493dSspupyrev public:
477cc2493dSspupyrev   // Initialize algorithm's data structures for a network of a given size.
487cc2493dSspupyrev   void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
497cc2493dSspupyrev     Source = SourceNode;
507cc2493dSspupyrev     Target = SinkNode;
517cc2493dSspupyrev 
527cc2493dSspupyrev     Nodes = std::vector<Node>(NodeCount);
537cc2493dSspupyrev     Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
547cc2493dSspupyrev   }
557cc2493dSspupyrev 
567cc2493dSspupyrev   // Run the algorithm.
577cc2493dSspupyrev   int64_t run() {
587cc2493dSspupyrev     // Find an augmenting path and update the flow along the path
597cc2493dSspupyrev     size_t AugmentationIters = 0;
607cc2493dSspupyrev     while (findAugmentingPath()) {
617cc2493dSspupyrev       augmentFlowAlongPath();
627cc2493dSspupyrev       AugmentationIters++;
637cc2493dSspupyrev     }
647cc2493dSspupyrev 
657cc2493dSspupyrev     // Compute the total flow and its cost
667cc2493dSspupyrev     int64_t TotalCost = 0;
677cc2493dSspupyrev     int64_t TotalFlow = 0;
687cc2493dSspupyrev     for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
697cc2493dSspupyrev       for (auto &Edge : Edges[Src]) {
707cc2493dSspupyrev         if (Edge.Flow > 0) {
717cc2493dSspupyrev           TotalCost += Edge.Cost * Edge.Flow;
727cc2493dSspupyrev           if (Src == Source)
737cc2493dSspupyrev             TotalFlow += Edge.Flow;
747cc2493dSspupyrev         }
757cc2493dSspupyrev       }
767cc2493dSspupyrev     }
777cc2493dSspupyrev     LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
787cc2493dSspupyrev                       << " iterations with " << TotalFlow << " total flow"
797cc2493dSspupyrev                       << " of " << TotalCost << " cost\n");
8022d82949SKazu Hirata     (void)TotalFlow;
817cc2493dSspupyrev     return TotalCost;
827cc2493dSspupyrev   }
837cc2493dSspupyrev 
847cc2493dSspupyrev   /// Adding an edge to the network with a specified capacity and a cost.
857cc2493dSspupyrev   /// Multiple edges between a pair of nodes are allowed but self-edges
867cc2493dSspupyrev   /// are not supported.
877cc2493dSspupyrev   void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
887cc2493dSspupyrev     assert(Capacity > 0 && "adding an edge of zero capacity");
897cc2493dSspupyrev     assert(Src != Dst && "loop edge are not supported");
907cc2493dSspupyrev 
917cc2493dSspupyrev     Edge SrcEdge;
927cc2493dSspupyrev     SrcEdge.Dst = Dst;
937cc2493dSspupyrev     SrcEdge.Cost = Cost;
947cc2493dSspupyrev     SrcEdge.Capacity = Capacity;
957cc2493dSspupyrev     SrcEdge.Flow = 0;
967cc2493dSspupyrev     SrcEdge.RevEdgeIndex = Edges[Dst].size();
977cc2493dSspupyrev 
987cc2493dSspupyrev     Edge DstEdge;
997cc2493dSspupyrev     DstEdge.Dst = Src;
1007cc2493dSspupyrev     DstEdge.Cost = -Cost;
1017cc2493dSspupyrev     DstEdge.Capacity = 0;
1027cc2493dSspupyrev     DstEdge.Flow = 0;
1037cc2493dSspupyrev     DstEdge.RevEdgeIndex = Edges[Src].size();
1047cc2493dSspupyrev 
1057cc2493dSspupyrev     Edges[Src].push_back(SrcEdge);
1067cc2493dSspupyrev     Edges[Dst].push_back(DstEdge);
1077cc2493dSspupyrev   }
1087cc2493dSspupyrev 
1097cc2493dSspupyrev   /// Adding an edge to the network of infinite capacity and a given cost.
1107cc2493dSspupyrev   void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
1117cc2493dSspupyrev     addEdge(Src, Dst, INF, Cost);
1127cc2493dSspupyrev   }
1137cc2493dSspupyrev 
1147cc2493dSspupyrev   /// Get the total flow from a given source node.
1157cc2493dSspupyrev   /// Returns a list of pairs (target node, amount of flow to the target).
1167cc2493dSspupyrev   const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
1177cc2493dSspupyrev     std::vector<std::pair<uint64_t, int64_t>> Flow;
1187cc2493dSspupyrev     for (auto &Edge : Edges[Src]) {
1197cc2493dSspupyrev       if (Edge.Flow > 0)
1207cc2493dSspupyrev         Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
1217cc2493dSspupyrev     }
1227cc2493dSspupyrev     return Flow;
1237cc2493dSspupyrev   }
1247cc2493dSspupyrev 
1257cc2493dSspupyrev   /// Get the total flow between a pair of nodes.
1267cc2493dSspupyrev   int64_t getFlow(uint64_t Src, uint64_t Dst) const {
1277cc2493dSspupyrev     int64_t Flow = 0;
1287cc2493dSspupyrev     for (auto &Edge : Edges[Src]) {
1297cc2493dSspupyrev       if (Edge.Dst == Dst) {
1307cc2493dSspupyrev         Flow += Edge.Flow;
1317cc2493dSspupyrev       }
1327cc2493dSspupyrev     }
1337cc2493dSspupyrev     return Flow;
1347cc2493dSspupyrev   }
1357cc2493dSspupyrev 
1367cc2493dSspupyrev   /// A cost of increasing a block's count by one.
1377cc2493dSspupyrev   static constexpr int64_t AuxCostInc = 10;
1387cc2493dSspupyrev   /// A cost of decreasing a block's count by one.
1397cc2493dSspupyrev   static constexpr int64_t AuxCostDec = 20;
1407cc2493dSspupyrev   /// A cost of increasing a count of zero-weight block by one.
1417cc2493dSspupyrev   static constexpr int64_t AuxCostIncZero = 11;
1427cc2493dSspupyrev   /// A cost of increasing the entry block's count by one.
1437cc2493dSspupyrev   static constexpr int64_t AuxCostIncEntry = 40;
1447cc2493dSspupyrev   /// A cost of decreasing the entry block's count by one.
1457cc2493dSspupyrev   static constexpr int64_t AuxCostDecEntry = 10;
1467cc2493dSspupyrev   /// A cost of taking an unlikely jump.
1477cc2493dSspupyrev   static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 20;
1487cc2493dSspupyrev 
1497cc2493dSspupyrev private:
1507cc2493dSspupyrev   /// Check for existence of an augmenting path with a positive capacity.
1517cc2493dSspupyrev   bool findAugmentingPath() {
1527cc2493dSspupyrev     // Initialize data structures
1537cc2493dSspupyrev     for (auto &Node : Nodes) {
1547cc2493dSspupyrev       Node.Distance = INF;
1557cc2493dSspupyrev       Node.ParentNode = uint64_t(-1);
1567cc2493dSspupyrev       Node.ParentEdgeIndex = uint64_t(-1);
1577cc2493dSspupyrev       Node.Taken = false;
1587cc2493dSspupyrev     }
1597cc2493dSspupyrev 
1607cc2493dSspupyrev     std::queue<uint64_t> Queue;
1617cc2493dSspupyrev     Queue.push(Source);
1627cc2493dSspupyrev     Nodes[Source].Distance = 0;
1637cc2493dSspupyrev     Nodes[Source].Taken = true;
1647cc2493dSspupyrev     while (!Queue.empty()) {
1657cc2493dSspupyrev       uint64_t Src = Queue.front();
1667cc2493dSspupyrev       Queue.pop();
1677cc2493dSspupyrev       Nodes[Src].Taken = false;
1687cc2493dSspupyrev       // Although the residual network contains edges with negative costs
1697cc2493dSspupyrev       // (in particular, backward edges), it can be shown that there are no
1707cc2493dSspupyrev       // negative-weight cycles and the following two invariants are maintained:
1717cc2493dSspupyrev       // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
1727cc2493dSspupyrev       // where Dist is the length of the shortest path between two nodes. This
1737cc2493dSspupyrev       // allows to prune the search-space of the path-finding algorithm using
1747cc2493dSspupyrev       // the following early-stop criteria:
1757cc2493dSspupyrev       // -- If we find a path with zero-distance from Source to Target, stop the
1767cc2493dSspupyrev       //    search, as the path is the shortest since Dist[Source, Target] >= 0;
1777cc2493dSspupyrev       // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
1787cc2493dSspupyrev       //    process node V, as it is guaranteed _not_ to be on a shortest path
1797cc2493dSspupyrev       //    from Source to Target; it follows from inequalities
1807cc2493dSspupyrev       //    Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
1817cc2493dSspupyrev       //                         >= Dist[Source, V]
1827cc2493dSspupyrev       if (Nodes[Target].Distance == 0)
1837cc2493dSspupyrev         break;
1847cc2493dSspupyrev       if (Nodes[Src].Distance > Nodes[Target].Distance)
1857cc2493dSspupyrev         continue;
1867cc2493dSspupyrev 
1877cc2493dSspupyrev       // Process adjacent edges
1887cc2493dSspupyrev       for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
1897cc2493dSspupyrev         auto &Edge = Edges[Src][EdgeIdx];
1907cc2493dSspupyrev         if (Edge.Flow < Edge.Capacity) {
1917cc2493dSspupyrev           uint64_t Dst = Edge.Dst;
1927cc2493dSspupyrev           int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
1937cc2493dSspupyrev           if (Nodes[Dst].Distance > NewDistance) {
1947cc2493dSspupyrev             // Update the distance and the parent node/edge
1957cc2493dSspupyrev             Nodes[Dst].Distance = NewDistance;
1967cc2493dSspupyrev             Nodes[Dst].ParentNode = Src;
1977cc2493dSspupyrev             Nodes[Dst].ParentEdgeIndex = EdgeIdx;
1987cc2493dSspupyrev             // Add the node to the queue, if it is not there yet
1997cc2493dSspupyrev             if (!Nodes[Dst].Taken) {
2007cc2493dSspupyrev               Queue.push(Dst);
2017cc2493dSspupyrev               Nodes[Dst].Taken = true;
2027cc2493dSspupyrev             }
2037cc2493dSspupyrev           }
2047cc2493dSspupyrev         }
2057cc2493dSspupyrev       }
2067cc2493dSspupyrev     }
2077cc2493dSspupyrev 
2087cc2493dSspupyrev     return Nodes[Target].Distance != INF;
2097cc2493dSspupyrev   }
2107cc2493dSspupyrev 
2117cc2493dSspupyrev   /// Update the current flow along the augmenting path.
2127cc2493dSspupyrev   void augmentFlowAlongPath() {
2137cc2493dSspupyrev     // Find path capacity
2147cc2493dSspupyrev     int64_t PathCapacity = INF;
2157cc2493dSspupyrev     uint64_t Now = Target;
2167cc2493dSspupyrev     while (Now != Source) {
2177cc2493dSspupyrev       uint64_t Pred = Nodes[Now].ParentNode;
2187cc2493dSspupyrev       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
2197cc2493dSspupyrev       PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow);
2207cc2493dSspupyrev       Now = Pred;
2217cc2493dSspupyrev     }
2227cc2493dSspupyrev 
2237cc2493dSspupyrev     assert(PathCapacity > 0 && "found incorrect augmenting path");
2247cc2493dSspupyrev 
2257cc2493dSspupyrev     // Update the flow along the path
2267cc2493dSspupyrev     Now = Target;
2277cc2493dSspupyrev     while (Now != Source) {
2287cc2493dSspupyrev       uint64_t Pred = Nodes[Now].ParentNode;
2297cc2493dSspupyrev       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
2307cc2493dSspupyrev       auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
2317cc2493dSspupyrev 
2327cc2493dSspupyrev       Edge.Flow += PathCapacity;
2337cc2493dSspupyrev       RevEdge.Flow -= PathCapacity;
2347cc2493dSspupyrev 
2357cc2493dSspupyrev       Now = Pred;
2367cc2493dSspupyrev     }
2377cc2493dSspupyrev   }
2387cc2493dSspupyrev 
2397cc2493dSspupyrev   /// An node in a flow network.
2407cc2493dSspupyrev   struct Node {
2417cc2493dSspupyrev     /// The cost of the cheapest path from the source to the current node.
2427cc2493dSspupyrev     int64_t Distance;
2437cc2493dSspupyrev     /// The node preceding the current one in the path.
2447cc2493dSspupyrev     uint64_t ParentNode;
2457cc2493dSspupyrev     /// The index of the edge between ParentNode and the current node.
2467cc2493dSspupyrev     uint64_t ParentEdgeIndex;
2477cc2493dSspupyrev     /// An indicator of whether the current node is in a queue.
2487cc2493dSspupyrev     bool Taken;
2497cc2493dSspupyrev   };
2507cc2493dSspupyrev   /// An edge in a flow network.
2517cc2493dSspupyrev   struct Edge {
2527cc2493dSspupyrev     /// The cost of the edge.
2537cc2493dSspupyrev     int64_t Cost;
2547cc2493dSspupyrev     /// The capacity of the edge.
2557cc2493dSspupyrev     int64_t Capacity;
2567cc2493dSspupyrev     /// The current flow on the edge.
2577cc2493dSspupyrev     int64_t Flow;
2587cc2493dSspupyrev     /// The destination node of the edge.
2597cc2493dSspupyrev     uint64_t Dst;
2607cc2493dSspupyrev     /// The index of the reverse edge between Dst and the current node.
2617cc2493dSspupyrev     uint64_t RevEdgeIndex;
2627cc2493dSspupyrev   };
2637cc2493dSspupyrev 
2647cc2493dSspupyrev   /// The set of network nodes.
2657cc2493dSspupyrev   std::vector<Node> Nodes;
2667cc2493dSspupyrev   /// The set of network edges.
2677cc2493dSspupyrev   std::vector<std::vector<Edge>> Edges;
2687cc2493dSspupyrev   /// Source node of the flow.
2697cc2493dSspupyrev   uint64_t Source;
2707cc2493dSspupyrev   /// Target (sink) node of the flow.
2717cc2493dSspupyrev   uint64_t Target;
2727cc2493dSspupyrev };
2737cc2493dSspupyrev 
274*98dd2f9eSspupyrev /// Post-processing adjustment of the control flow.
275*98dd2f9eSspupyrev class FlowAdjuster {
276*98dd2f9eSspupyrev public:
277*98dd2f9eSspupyrev   FlowAdjuster(FlowFunction &Func) : Func(Func) {
278*98dd2f9eSspupyrev     assert(Func.Blocks[Func.Entry].isEntry() &&
279*98dd2f9eSspupyrev            "incorrect index of the entry block");
280*98dd2f9eSspupyrev   }
281*98dd2f9eSspupyrev 
282*98dd2f9eSspupyrev   // Run the post-processing
283*98dd2f9eSspupyrev   void run() {
284*98dd2f9eSspupyrev     /// We adjust the control flow in a function so as to remove all
285*98dd2f9eSspupyrev     /// "isolated" components with positive flow that are unreachable
286*98dd2f9eSspupyrev     /// from the entry block. For every such component, we find the shortest
287*98dd2f9eSspupyrev     /// path from the entry to an exit passing through the component, and
288*98dd2f9eSspupyrev     /// increase the flow by one unit along the path.
289*98dd2f9eSspupyrev     joinIsolatedComponents();
290*98dd2f9eSspupyrev   }
291*98dd2f9eSspupyrev 
292*98dd2f9eSspupyrev private:
293*98dd2f9eSspupyrev   void joinIsolatedComponents() {
294*98dd2f9eSspupyrev     // Find blocks that are reachable from the source
295*98dd2f9eSspupyrev     auto Visited = std::vector<bool>(NumBlocks(), false);
296*98dd2f9eSspupyrev     findReachable(Func.Entry, Visited);
297*98dd2f9eSspupyrev 
298*98dd2f9eSspupyrev     // Iterate over all non-reachable blocks and adjust their weights
299*98dd2f9eSspupyrev     for (uint64_t I = 0; I < NumBlocks(); I++) {
300*98dd2f9eSspupyrev       auto &Block = Func.Blocks[I];
301*98dd2f9eSspupyrev       if (Block.Flow > 0 && !Visited[I]) {
302*98dd2f9eSspupyrev         // Find a path from the entry to an exit passing through the block I
303*98dd2f9eSspupyrev         auto Path = findShortestPath(I);
304*98dd2f9eSspupyrev         // Increase the flow along the path
305*98dd2f9eSspupyrev         assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
306*98dd2f9eSspupyrev                "incorrectly computed path adjusting control flow");
307*98dd2f9eSspupyrev         Func.Blocks[Func.Entry].Flow += 1;
308*98dd2f9eSspupyrev         for (auto &Jump : Path) {
309*98dd2f9eSspupyrev           Jump->Flow += 1;
310*98dd2f9eSspupyrev           Func.Blocks[Jump->Target].Flow += 1;
311*98dd2f9eSspupyrev           // Update reachability
312*98dd2f9eSspupyrev           findReachable(Jump->Target, Visited);
313*98dd2f9eSspupyrev         }
314*98dd2f9eSspupyrev       }
315*98dd2f9eSspupyrev     }
316*98dd2f9eSspupyrev   }
317*98dd2f9eSspupyrev 
318*98dd2f9eSspupyrev   /// Run bfs from a given block along the jumps with a positive flow and mark
319*98dd2f9eSspupyrev   /// all reachable blocks.
320*98dd2f9eSspupyrev   void findReachable(uint64_t Src, std::vector<bool> &Visited) {
321*98dd2f9eSspupyrev     if (Visited[Src])
322*98dd2f9eSspupyrev       return;
323*98dd2f9eSspupyrev     std::queue<uint64_t> Queue;
324*98dd2f9eSspupyrev     Queue.push(Src);
325*98dd2f9eSspupyrev     Visited[Src] = true;
326*98dd2f9eSspupyrev     while (!Queue.empty()) {
327*98dd2f9eSspupyrev       Src = Queue.front();
328*98dd2f9eSspupyrev       Queue.pop();
329*98dd2f9eSspupyrev       for (auto Jump : Func.Blocks[Src].SuccJumps) {
330*98dd2f9eSspupyrev         uint64_t Dst = Jump->Target;
331*98dd2f9eSspupyrev         if (Jump->Flow > 0 && !Visited[Dst]) {
332*98dd2f9eSspupyrev           Queue.push(Dst);
333*98dd2f9eSspupyrev           Visited[Dst] = true;
334*98dd2f9eSspupyrev         }
335*98dd2f9eSspupyrev       }
336*98dd2f9eSspupyrev     }
337*98dd2f9eSspupyrev   }
338*98dd2f9eSspupyrev 
339*98dd2f9eSspupyrev   /// Find the shortest path from the entry block to an exit block passing
340*98dd2f9eSspupyrev   /// through a given block.
341*98dd2f9eSspupyrev   std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
342*98dd2f9eSspupyrev     // A path from the entry block to BlockIdx
343*98dd2f9eSspupyrev     auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
344*98dd2f9eSspupyrev     // A path from BlockIdx to an exit block
345*98dd2f9eSspupyrev     auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
346*98dd2f9eSspupyrev 
347*98dd2f9eSspupyrev     // Concatenate the two paths
348*98dd2f9eSspupyrev     std::vector<FlowJump *> Result;
349*98dd2f9eSspupyrev     Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
350*98dd2f9eSspupyrev     Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
351*98dd2f9eSspupyrev     return Result;
352*98dd2f9eSspupyrev   }
353*98dd2f9eSspupyrev 
354*98dd2f9eSspupyrev   /// Apply the Dijkstra algorithm to find the shortest path from a given
355*98dd2f9eSspupyrev   /// Source to a given Target block.
356*98dd2f9eSspupyrev   /// If Target == -1, then the path ends at an exit block.
357*98dd2f9eSspupyrev   std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
358*98dd2f9eSspupyrev     // Quit early, if possible
359*98dd2f9eSspupyrev     if (Source == Target)
360*98dd2f9eSspupyrev       return std::vector<FlowJump *>();
361*98dd2f9eSspupyrev     if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
362*98dd2f9eSspupyrev       return std::vector<FlowJump *>();
363*98dd2f9eSspupyrev 
364*98dd2f9eSspupyrev     // Initialize data structures
365*98dd2f9eSspupyrev     auto Distance = std::vector<int64_t>(NumBlocks(), INF);
366*98dd2f9eSspupyrev     auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
367*98dd2f9eSspupyrev     Distance[Source] = 0;
368*98dd2f9eSspupyrev     std::set<std::pair<uint64_t, uint64_t>> Queue;
369*98dd2f9eSspupyrev     Queue.insert(std::make_pair(Distance[Source], Source));
370*98dd2f9eSspupyrev 
371*98dd2f9eSspupyrev     // Run the Dijkstra algorithm
372*98dd2f9eSspupyrev     while (!Queue.empty()) {
373*98dd2f9eSspupyrev       uint64_t Src = Queue.begin()->second;
374*98dd2f9eSspupyrev       Queue.erase(Queue.begin());
375*98dd2f9eSspupyrev       // If we found a solution, quit early
376*98dd2f9eSspupyrev       if (Src == Target ||
377*98dd2f9eSspupyrev           (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
378*98dd2f9eSspupyrev         break;
379*98dd2f9eSspupyrev 
380*98dd2f9eSspupyrev       for (auto Jump : Func.Blocks[Src].SuccJumps) {
381*98dd2f9eSspupyrev         uint64_t Dst = Jump->Target;
382*98dd2f9eSspupyrev         int64_t JumpDist = jumpDistance(Jump);
383*98dd2f9eSspupyrev         if (Distance[Dst] > Distance[Src] + JumpDist) {
384*98dd2f9eSspupyrev           Queue.erase(std::make_pair(Distance[Dst], Dst));
385*98dd2f9eSspupyrev 
386*98dd2f9eSspupyrev           Distance[Dst] = Distance[Src] + JumpDist;
387*98dd2f9eSspupyrev           Parent[Dst] = Jump;
388*98dd2f9eSspupyrev 
389*98dd2f9eSspupyrev           Queue.insert(std::make_pair(Distance[Dst], Dst));
390*98dd2f9eSspupyrev         }
391*98dd2f9eSspupyrev       }
392*98dd2f9eSspupyrev     }
393*98dd2f9eSspupyrev     // If Target is not provided, find the closest exit block
394*98dd2f9eSspupyrev     if (Target == AnyExitBlock) {
395*98dd2f9eSspupyrev       for (uint64_t I = 0; I < NumBlocks(); I++) {
396*98dd2f9eSspupyrev         if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
397*98dd2f9eSspupyrev           if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
398*98dd2f9eSspupyrev             Target = I;
399*98dd2f9eSspupyrev           }
400*98dd2f9eSspupyrev         }
401*98dd2f9eSspupyrev       }
402*98dd2f9eSspupyrev     }
403*98dd2f9eSspupyrev     assert(Parent[Target] != nullptr && "a path does not exist");
404*98dd2f9eSspupyrev 
405*98dd2f9eSspupyrev     // Extract the constructed path
406*98dd2f9eSspupyrev     std::vector<FlowJump *> Result;
407*98dd2f9eSspupyrev     uint64_t Now = Target;
408*98dd2f9eSspupyrev     while (Now != Source) {
409*98dd2f9eSspupyrev       assert(Now == Parent[Now]->Target && "incorrect parent jump");
410*98dd2f9eSspupyrev       Result.push_back(Parent[Now]);
411*98dd2f9eSspupyrev       Now = Parent[Now]->Source;
412*98dd2f9eSspupyrev     }
413*98dd2f9eSspupyrev     // Reverse the path, since it is extracted from Target to Source
414*98dd2f9eSspupyrev     std::reverse(Result.begin(), Result.end());
415*98dd2f9eSspupyrev     return Result;
416*98dd2f9eSspupyrev   }
417*98dd2f9eSspupyrev 
418*98dd2f9eSspupyrev   /// A distance of a path for a given jump.
419*98dd2f9eSspupyrev   /// In order to incite the path to use blocks/jumps with large positive flow,
420*98dd2f9eSspupyrev   /// and avoid changing branch probability of outgoing edges drastically,
421*98dd2f9eSspupyrev   /// set the distance as follows:
422*98dd2f9eSspupyrev   ///   if Jump.Flow > 0, then distance = max(100 - Jump->Flow, 0)
423*98dd2f9eSspupyrev   ///   if Block.Weight > 0, then distance = 1
424*98dd2f9eSspupyrev   ///   otherwise distance >> 1
425*98dd2f9eSspupyrev   int64_t jumpDistance(FlowJump *Jump) const {
426*98dd2f9eSspupyrev     int64_t BaseDistance = 100;
427*98dd2f9eSspupyrev     if (Jump->IsUnlikely)
428*98dd2f9eSspupyrev       return MinCostMaxFlow::AuxCostUnlikely;
429*98dd2f9eSspupyrev     if (Jump->Flow > 0)
430*98dd2f9eSspupyrev       return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0);
431*98dd2f9eSspupyrev     if (Func.Blocks[Jump->Target].Weight > 0)
432*98dd2f9eSspupyrev       return BaseDistance;
433*98dd2f9eSspupyrev     return BaseDistance * (NumBlocks() + 1);
434*98dd2f9eSspupyrev   };
435*98dd2f9eSspupyrev 
436*98dd2f9eSspupyrev   uint64_t NumBlocks() const { return Func.Blocks.size(); }
437*98dd2f9eSspupyrev 
438*98dd2f9eSspupyrev   /// A constant indicating an arbitrary exit block of a function.
439*98dd2f9eSspupyrev   static constexpr uint64_t AnyExitBlock = uint64_t(-1);
440*98dd2f9eSspupyrev 
441*98dd2f9eSspupyrev   /// The function.
442*98dd2f9eSspupyrev   FlowFunction &Func;
443*98dd2f9eSspupyrev };
444*98dd2f9eSspupyrev 
4457cc2493dSspupyrev /// Initializing flow network for a given function.
4467cc2493dSspupyrev ///
4477cc2493dSspupyrev /// Every block is split into three nodes that are responsible for (i) an
4487cc2493dSspupyrev /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
4497cc2493dSspupyrev /// reduction of the block weight.
4507cc2493dSspupyrev void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
4517cc2493dSspupyrev   uint64_t NumBlocks = Func.Blocks.size();
4527cc2493dSspupyrev   assert(NumBlocks > 1 && "Too few blocks in a function");
4537cc2493dSspupyrev   LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
4547cc2493dSspupyrev 
4557cc2493dSspupyrev   // Pre-process data: make sure the entry weight is at least 1
4567cc2493dSspupyrev   if (Func.Blocks[Func.Entry].Weight == 0) {
4577cc2493dSspupyrev     Func.Blocks[Func.Entry].Weight = 1;
4587cc2493dSspupyrev   }
4597cc2493dSspupyrev   // Introducing dummy source/sink pairs to allow flow circulation.
4607cc2493dSspupyrev   // The nodes corresponding to blocks of Func have indicies in the range
4617cc2493dSspupyrev   // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.
4627cc2493dSspupyrev   uint64_t S = 3 * NumBlocks;
4637cc2493dSspupyrev   uint64_t T = S + 1;
4647cc2493dSspupyrev   uint64_t S1 = S + 2;
4657cc2493dSspupyrev   uint64_t T1 = S + 3;
4667cc2493dSspupyrev 
4677cc2493dSspupyrev   Network.initialize(3 * NumBlocks + 4, S1, T1);
4687cc2493dSspupyrev 
4697cc2493dSspupyrev   // Create three nodes for every block of the function
4707cc2493dSspupyrev   for (uint64_t B = 0; B < NumBlocks; B++) {
4717cc2493dSspupyrev     auto &Block = Func.Blocks[B];
4727cc2493dSspupyrev     assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
4737cc2493dSspupyrev            "non-zero weight of a block w/o weight except for an entry");
4747cc2493dSspupyrev 
4757cc2493dSspupyrev     // Split every block into two nodes
4767cc2493dSspupyrev     uint64_t Bin = 3 * B;
4777cc2493dSspupyrev     uint64_t Bout = 3 * B + 1;
4787cc2493dSspupyrev     uint64_t Baux = 3 * B + 2;
4797cc2493dSspupyrev     if (Block.Weight > 0) {
4807cc2493dSspupyrev       Network.addEdge(S1, Bout, Block.Weight, 0);
4817cc2493dSspupyrev       Network.addEdge(Bin, T1, Block.Weight, 0);
4827cc2493dSspupyrev     }
4837cc2493dSspupyrev 
4847cc2493dSspupyrev     // Edges from S and to T
4857cc2493dSspupyrev     assert((!Block.isEntry() || !Block.isExit()) &&
4867cc2493dSspupyrev            "a block cannot be an entry and an exit");
4877cc2493dSspupyrev     if (Block.isEntry()) {
4887cc2493dSspupyrev       Network.addEdge(S, Bin, 0);
4897cc2493dSspupyrev     } else if (Block.isExit()) {
4907cc2493dSspupyrev       Network.addEdge(Bout, T, 0);
4917cc2493dSspupyrev     }
4927cc2493dSspupyrev 
4937cc2493dSspupyrev     // An auxiliary node to allow increase/reduction of block counts:
4947cc2493dSspupyrev     // We assume that decreasing block counts is more expensive than increasing,
4957cc2493dSspupyrev     // and thus, setting separate costs here. In the future we may want to tune
4967cc2493dSspupyrev     // the relative costs so as to maximize the quality of generated profiles.
4977cc2493dSspupyrev     int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc;
4987cc2493dSspupyrev     int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec;
4997cc2493dSspupyrev     if (Block.UnknownWeight) {
5007cc2493dSspupyrev       // Do not penalize changing weights of blocks w/o known profile count
5017cc2493dSspupyrev       AuxCostInc = 0;
5027cc2493dSspupyrev       AuxCostDec = 0;
5037cc2493dSspupyrev     } else {
5047cc2493dSspupyrev       // Increasing the count for "cold" blocks with zero initial count is more
5057cc2493dSspupyrev       // expensive than for "hot" ones
5067cc2493dSspupyrev       if (Block.Weight == 0) {
5077cc2493dSspupyrev         AuxCostInc = MinCostMaxFlow::AuxCostIncZero;
5087cc2493dSspupyrev       }
5097cc2493dSspupyrev       // Modifying the count of the entry block is expensive
5107cc2493dSspupyrev       if (Block.isEntry()) {
5117cc2493dSspupyrev         AuxCostInc = MinCostMaxFlow::AuxCostIncEntry;
5127cc2493dSspupyrev         AuxCostDec = MinCostMaxFlow::AuxCostDecEntry;
5137cc2493dSspupyrev       }
5147cc2493dSspupyrev     }
5157cc2493dSspupyrev     // For blocks with self-edges, do not penalize a reduction of the count,
5167cc2493dSspupyrev     // as all of the increase can be attributed to the self-edge
5177cc2493dSspupyrev     if (Block.HasSelfEdge) {
5187cc2493dSspupyrev       AuxCostDec = 0;
5197cc2493dSspupyrev     }
5207cc2493dSspupyrev 
5217cc2493dSspupyrev     Network.addEdge(Bin, Baux, AuxCostInc);
5227cc2493dSspupyrev     Network.addEdge(Baux, Bout, AuxCostInc);
5237cc2493dSspupyrev     if (Block.Weight > 0) {
5247cc2493dSspupyrev       Network.addEdge(Bout, Baux, AuxCostDec);
5257cc2493dSspupyrev       Network.addEdge(Baux, Bin, AuxCostDec);
5267cc2493dSspupyrev     }
5277cc2493dSspupyrev   }
5287cc2493dSspupyrev 
5297cc2493dSspupyrev   // Creating edges for every jump
5307cc2493dSspupyrev   for (auto &Jump : Func.Jumps) {
5317cc2493dSspupyrev     uint64_t Src = Jump.Source;
5327cc2493dSspupyrev     uint64_t Dst = Jump.Target;
5337cc2493dSspupyrev     if (Src != Dst) {
5347cc2493dSspupyrev       uint64_t SrcOut = 3 * Src + 1;
5357cc2493dSspupyrev       uint64_t DstIn = 3 * Dst;
5367cc2493dSspupyrev       uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
5377cc2493dSspupyrev       Network.addEdge(SrcOut, DstIn, Cost);
5387cc2493dSspupyrev     }
5397cc2493dSspupyrev   }
5407cc2493dSspupyrev 
5417cc2493dSspupyrev   // Make sure we have a valid flow circulation
5427cc2493dSspupyrev   Network.addEdge(T, S, 0);
5437cc2493dSspupyrev }
5447cc2493dSspupyrev 
5457cc2493dSspupyrev /// Extract resulting block and edge counts from the flow network.
5467cc2493dSspupyrev void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
5477cc2493dSspupyrev   uint64_t NumBlocks = Func.Blocks.size();
5487cc2493dSspupyrev 
5497cc2493dSspupyrev   // Extract resulting block counts
5507cc2493dSspupyrev   for (uint64_t Src = 0; Src < NumBlocks; Src++) {
5517cc2493dSspupyrev     auto &Block = Func.Blocks[Src];
5527cc2493dSspupyrev     uint64_t SrcOut = 3 * Src + 1;
5537cc2493dSspupyrev     int64_t Flow = 0;
5547cc2493dSspupyrev     for (auto &Adj : Network.getFlow(SrcOut)) {
5557cc2493dSspupyrev       uint64_t DstIn = Adj.first;
5567cc2493dSspupyrev       int64_t DstFlow = Adj.second;
5577cc2493dSspupyrev       bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
5587cc2493dSspupyrev       if (!IsAuxNode || Block.HasSelfEdge) {
5597cc2493dSspupyrev         Flow += DstFlow;
5607cc2493dSspupyrev       }
5617cc2493dSspupyrev     }
5627cc2493dSspupyrev     Block.Flow = Flow;
5637cc2493dSspupyrev     assert(Flow >= 0 && "negative block flow");
5647cc2493dSspupyrev   }
5657cc2493dSspupyrev 
5667cc2493dSspupyrev   // Extract resulting jump counts
5677cc2493dSspupyrev   for (auto &Jump : Func.Jumps) {
5687cc2493dSspupyrev     uint64_t Src = Jump.Source;
5697cc2493dSspupyrev     uint64_t Dst = Jump.Target;
5707cc2493dSspupyrev     int64_t Flow = 0;
5717cc2493dSspupyrev     if (Src != Dst) {
5727cc2493dSspupyrev       uint64_t SrcOut = 3 * Src + 1;
5737cc2493dSspupyrev       uint64_t DstIn = 3 * Dst;
5747cc2493dSspupyrev       Flow = Network.getFlow(SrcOut, DstIn);
5757cc2493dSspupyrev     } else {
5767cc2493dSspupyrev       uint64_t SrcOut = 3 * Src + 1;
5777cc2493dSspupyrev       uint64_t SrcAux = 3 * Src + 2;
5787cc2493dSspupyrev       int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
5797cc2493dSspupyrev       if (AuxFlow > 0)
5807cc2493dSspupyrev         Flow = AuxFlow;
5817cc2493dSspupyrev     }
5827cc2493dSspupyrev     Jump.Flow = Flow;
5837cc2493dSspupyrev     assert(Flow >= 0 && "negative jump flow");
5847cc2493dSspupyrev   }
5857cc2493dSspupyrev }
5867cc2493dSspupyrev 
5877cc2493dSspupyrev #ifndef NDEBUG
5887cc2493dSspupyrev /// Verify that the computed flow values satisfy flow conservation rules
5897cc2493dSspupyrev void verifyWeights(const FlowFunction &Func) {
5907cc2493dSspupyrev   const uint64_t NumBlocks = Func.Blocks.size();
5917cc2493dSspupyrev   auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
5927cc2493dSspupyrev   auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
5937cc2493dSspupyrev   for (auto &Jump : Func.Jumps) {
5947cc2493dSspupyrev     InFlow[Jump.Target] += Jump.Flow;
5957cc2493dSspupyrev     OutFlow[Jump.Source] += Jump.Flow;
5967cc2493dSspupyrev   }
5977cc2493dSspupyrev 
5987cc2493dSspupyrev   uint64_t TotalInFlow = 0;
5997cc2493dSspupyrev   uint64_t TotalOutFlow = 0;
6007cc2493dSspupyrev   for (uint64_t I = 0; I < NumBlocks; I++) {
6017cc2493dSspupyrev     auto &Block = Func.Blocks[I];
6027cc2493dSspupyrev     if (Block.isEntry()) {
6037cc2493dSspupyrev       TotalInFlow += Block.Flow;
6047cc2493dSspupyrev       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
6057cc2493dSspupyrev     } else if (Block.isExit()) {
6067cc2493dSspupyrev       TotalOutFlow += Block.Flow;
6077cc2493dSspupyrev       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
6087cc2493dSspupyrev     } else {
6097cc2493dSspupyrev       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
6107cc2493dSspupyrev       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
6117cc2493dSspupyrev     }
6127cc2493dSspupyrev   }
6137cc2493dSspupyrev   assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
614*98dd2f9eSspupyrev 
615*98dd2f9eSspupyrev   // Verify that there are no isolated flow components
616*98dd2f9eSspupyrev   // One could modify FlowFunction to hold edges indexed by the sources, which
617*98dd2f9eSspupyrev   // will avoid a creation of the object
618*98dd2f9eSspupyrev   auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
619*98dd2f9eSspupyrev   for (auto &Jump : Func.Jumps) {
620*98dd2f9eSspupyrev     if (Jump.Flow > 0) {
621*98dd2f9eSspupyrev       PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
622*98dd2f9eSspupyrev     }
623*98dd2f9eSspupyrev   }
624*98dd2f9eSspupyrev 
625*98dd2f9eSspupyrev   // Run bfs from the source along edges with positive flow
626*98dd2f9eSspupyrev   std::queue<uint64_t> Queue;
627*98dd2f9eSspupyrev   auto Visited = std::vector<bool>(NumBlocks, false);
628*98dd2f9eSspupyrev   Queue.push(Func.Entry);
629*98dd2f9eSspupyrev   Visited[Func.Entry] = true;
630*98dd2f9eSspupyrev   while (!Queue.empty()) {
631*98dd2f9eSspupyrev     uint64_t Src = Queue.front();
632*98dd2f9eSspupyrev     Queue.pop();
633*98dd2f9eSspupyrev     for (uint64_t Dst : PositiveFlowEdges[Src]) {
634*98dd2f9eSspupyrev       if (!Visited[Dst]) {
635*98dd2f9eSspupyrev         Queue.push(Dst);
636*98dd2f9eSspupyrev         Visited[Dst] = true;
637*98dd2f9eSspupyrev       }
638*98dd2f9eSspupyrev     }
639*98dd2f9eSspupyrev   }
640*98dd2f9eSspupyrev 
641*98dd2f9eSspupyrev   // Verify that every block that has a positive flow is reached from the source
642*98dd2f9eSspupyrev   // along edges with a positive flow
643*98dd2f9eSspupyrev   for (uint64_t I = 0; I < NumBlocks; I++) {
644*98dd2f9eSspupyrev     auto &Block = Func.Blocks[I];
645*98dd2f9eSspupyrev     assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
646*98dd2f9eSspupyrev   }
6477cc2493dSspupyrev }
6487cc2493dSspupyrev #endif
6497cc2493dSspupyrev 
6507cc2493dSspupyrev } // end of anonymous namespace
6517cc2493dSspupyrev 
6527cc2493dSspupyrev /// Apply the profile inference algorithm for a given flow function
6537cc2493dSspupyrev void llvm::applyFlowInference(FlowFunction &Func) {
6547cc2493dSspupyrev   // Create and apply an inference network model
6557cc2493dSspupyrev   auto InferenceNetwork = MinCostMaxFlow();
6567cc2493dSspupyrev   initializeNetwork(InferenceNetwork, Func);
6577cc2493dSspupyrev   InferenceNetwork.run();
6587cc2493dSspupyrev 
6597cc2493dSspupyrev   // Extract flow values for every block and every edge
6607cc2493dSspupyrev   extractWeights(InferenceNetwork, Func);
6617cc2493dSspupyrev 
662*98dd2f9eSspupyrev   // Post-processing adjustments to the flow
663*98dd2f9eSspupyrev   auto Adjuster = FlowAdjuster(Func);
664*98dd2f9eSspupyrev   Adjuster.run();
665*98dd2f9eSspupyrev 
6667cc2493dSspupyrev #ifndef NDEBUG
6677cc2493dSspupyrev   // Verify the result
6687cc2493dSspupyrev   verifyWeights(Func);
6697cc2493dSspupyrev #endif
6707cc2493dSspupyrev }
671