17cc2493dSspupyrev //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
27cc2493dSspupyrev //
37cc2493dSspupyrev // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47cc2493dSspupyrev // See https://llvm.org/LICENSE.txt for license information.
57cc2493dSspupyrev // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67cc2493dSspupyrev //
77cc2493dSspupyrev //===----------------------------------------------------------------------===//
87cc2493dSspupyrev //
97cc2493dSspupyrev // This file implements a profile inference algorithm. Given an incomplete and
107cc2493dSspupyrev // possibly imprecise block counts, the algorithm reconstructs realistic block
117cc2493dSspupyrev // and edge counts that satisfy flow conservation rules, while minimally modify
127cc2493dSspupyrev // input block counts.
137cc2493dSspupyrev //
147cc2493dSspupyrev //===----------------------------------------------------------------------===//
157cc2493dSspupyrev 
167cc2493dSspupyrev #include "llvm/Transforms/Utils/SampleProfileInference.h"
175f4ae564SJan Svoboda #include "llvm/ADT/BitVector.h"
18*f2ade65fSspupyrev #include "llvm/Support/CommandLine.h"
197cc2493dSspupyrev #include "llvm/Support/Debug.h"
207cc2493dSspupyrev #include <queue>
217cc2493dSspupyrev #include <set>
22*f2ade65fSspupyrev #include <stack>
237cc2493dSspupyrev 
247cc2493dSspupyrev using namespace llvm;
257cc2493dSspupyrev #define DEBUG_TYPE "sample-profile-inference"
267cc2493dSspupyrev 
277cc2493dSspupyrev namespace {
287cc2493dSspupyrev 
29*f2ade65fSspupyrev static cl::opt<bool> SampleProfileEvenCountDistribution(
30*f2ade65fSspupyrev     "sample-profile-even-count-distribution", cl::init(true), cl::Hidden,
31*f2ade65fSspupyrev     cl::ZeroOrMore,
32*f2ade65fSspupyrev     cl::desc("Try to evenly distribute counts when there are multiple equally "
33*f2ade65fSspupyrev              "likely options."));
34*f2ade65fSspupyrev 
35*f2ade65fSspupyrev static cl::opt<unsigned> SampleProfileMaxDfsCalls(
36*f2ade65fSspupyrev     "sample-profile-max-dfs-calls", cl::init(10), cl::Hidden, cl::ZeroOrMore,
37*f2ade65fSspupyrev     cl::desc("Maximum number of dfs iterations for even count distribution."));
38*f2ade65fSspupyrev 
397cc2493dSspupyrev /// A value indicating an infinite flow/capacity/weight of a block/edge.
407cc2493dSspupyrev /// Not using numeric_limits<int64_t>::max(), as the values can be summed up
417cc2493dSspupyrev /// during the execution.
427cc2493dSspupyrev static constexpr int64_t INF = ((int64_t)1) << 50;
437cc2493dSspupyrev 
447cc2493dSspupyrev /// The minimum-cost maximum flow algorithm.
457cc2493dSspupyrev ///
467cc2493dSspupyrev /// The algorithm finds the maximum flow of minimum cost on a given (directed)
477cc2493dSspupyrev /// network using a modified version of the classical Moore-Bellman-Ford
487cc2493dSspupyrev /// approach. The algorithm applies a number of augmentation iterations in which
497cc2493dSspupyrev /// flow is sent along paths of positive capacity from the source to the sink.
507cc2493dSspupyrev /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
517cc2493dSspupyrev /// where m is the number of edges, n is the number of vertices, and v(f) is the
527cc2493dSspupyrev /// value of the maximum flow. However, the observed running time on typical
537cc2493dSspupyrev /// instances is sub-quadratic, that is, o(n^2).
547cc2493dSspupyrev ///
557cc2493dSspupyrev /// The input is a set of edges with specified costs and capacities, and a pair
567cc2493dSspupyrev /// of nodes (source and sink). The output is the flow along each edge of the
577cc2493dSspupyrev /// minimum total cost respecting the given edge capacities.
587cc2493dSspupyrev class MinCostMaxFlow {
597cc2493dSspupyrev public:
607cc2493dSspupyrev   // Initialize algorithm's data structures for a network of a given size.
617cc2493dSspupyrev   void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
627cc2493dSspupyrev     Source = SourceNode;
637cc2493dSspupyrev     Target = SinkNode;
647cc2493dSspupyrev 
657cc2493dSspupyrev     Nodes = std::vector<Node>(NodeCount);
667cc2493dSspupyrev     Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
67*f2ade65fSspupyrev     if (SampleProfileEvenCountDistribution)
68*f2ade65fSspupyrev       AugmentingEdges =
69*f2ade65fSspupyrev           std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
707cc2493dSspupyrev   }
717cc2493dSspupyrev 
727cc2493dSspupyrev   // Run the algorithm.
737cc2493dSspupyrev   int64_t run() {
74*f2ade65fSspupyrev     // Iteratively find an augmentation path/dag in the network and send the
75*f2ade65fSspupyrev     // flow along its edges
76*f2ade65fSspupyrev     size_t AugmentationIters = applyFlowAugmentation();
777cc2493dSspupyrev 
787cc2493dSspupyrev     // Compute the total flow and its cost
797cc2493dSspupyrev     int64_t TotalCost = 0;
807cc2493dSspupyrev     int64_t TotalFlow = 0;
817cc2493dSspupyrev     for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
827cc2493dSspupyrev       for (auto &Edge : Edges[Src]) {
837cc2493dSspupyrev         if (Edge.Flow > 0) {
847cc2493dSspupyrev           TotalCost += Edge.Cost * Edge.Flow;
857cc2493dSspupyrev           if (Src == Source)
867cc2493dSspupyrev             TotalFlow += Edge.Flow;
877cc2493dSspupyrev         }
887cc2493dSspupyrev       }
897cc2493dSspupyrev     }
907cc2493dSspupyrev     LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
917cc2493dSspupyrev                       << " iterations with " << TotalFlow << " total flow"
927cc2493dSspupyrev                       << " of " << TotalCost << " cost\n");
9322d82949SKazu Hirata     (void)TotalFlow;
94*f2ade65fSspupyrev     (void)AugmentationIters;
957cc2493dSspupyrev     return TotalCost;
967cc2493dSspupyrev   }
977cc2493dSspupyrev 
987cc2493dSspupyrev   /// Adding an edge to the network with a specified capacity and a cost.
997cc2493dSspupyrev   /// Multiple edges between a pair of nodes are allowed but self-edges
1007cc2493dSspupyrev   /// are not supported.
1017cc2493dSspupyrev   void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
1027cc2493dSspupyrev     assert(Capacity > 0 && "adding an edge of zero capacity");
1037cc2493dSspupyrev     assert(Src != Dst && "loop edge are not supported");
1047cc2493dSspupyrev 
1057cc2493dSspupyrev     Edge SrcEdge;
1067cc2493dSspupyrev     SrcEdge.Dst = Dst;
1077cc2493dSspupyrev     SrcEdge.Cost = Cost;
1087cc2493dSspupyrev     SrcEdge.Capacity = Capacity;
1097cc2493dSspupyrev     SrcEdge.Flow = 0;
1107cc2493dSspupyrev     SrcEdge.RevEdgeIndex = Edges[Dst].size();
1117cc2493dSspupyrev 
1127cc2493dSspupyrev     Edge DstEdge;
1137cc2493dSspupyrev     DstEdge.Dst = Src;
1147cc2493dSspupyrev     DstEdge.Cost = -Cost;
1157cc2493dSspupyrev     DstEdge.Capacity = 0;
1167cc2493dSspupyrev     DstEdge.Flow = 0;
1177cc2493dSspupyrev     DstEdge.RevEdgeIndex = Edges[Src].size();
1187cc2493dSspupyrev 
1197cc2493dSspupyrev     Edges[Src].push_back(SrcEdge);
1207cc2493dSspupyrev     Edges[Dst].push_back(DstEdge);
1217cc2493dSspupyrev   }
1227cc2493dSspupyrev 
1237cc2493dSspupyrev   /// Adding an edge to the network of infinite capacity and a given cost.
1247cc2493dSspupyrev   void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
1257cc2493dSspupyrev     addEdge(Src, Dst, INF, Cost);
1267cc2493dSspupyrev   }
1277cc2493dSspupyrev 
1287cc2493dSspupyrev   /// Get the total flow from a given source node.
1297cc2493dSspupyrev   /// Returns a list of pairs (target node, amount of flow to the target).
1307cc2493dSspupyrev   const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
1317cc2493dSspupyrev     std::vector<std::pair<uint64_t, int64_t>> Flow;
1327cc2493dSspupyrev     for (auto &Edge : Edges[Src]) {
1337cc2493dSspupyrev       if (Edge.Flow > 0)
1347cc2493dSspupyrev         Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
1357cc2493dSspupyrev     }
1367cc2493dSspupyrev     return Flow;
1377cc2493dSspupyrev   }
1387cc2493dSspupyrev 
1397cc2493dSspupyrev   /// Get the total flow between a pair of nodes.
1407cc2493dSspupyrev   int64_t getFlow(uint64_t Src, uint64_t Dst) const {
1417cc2493dSspupyrev     int64_t Flow = 0;
1427cc2493dSspupyrev     for (auto &Edge : Edges[Src]) {
1437cc2493dSspupyrev       if (Edge.Dst == Dst) {
1447cc2493dSspupyrev         Flow += Edge.Flow;
1457cc2493dSspupyrev       }
1467cc2493dSspupyrev     }
1477cc2493dSspupyrev     return Flow;
1487cc2493dSspupyrev   }
1497cc2493dSspupyrev 
1507cc2493dSspupyrev   /// A cost of increasing a block's count by one.
1517cc2493dSspupyrev   static constexpr int64_t AuxCostInc = 10;
1527cc2493dSspupyrev   /// A cost of decreasing a block's count by one.
1537cc2493dSspupyrev   static constexpr int64_t AuxCostDec = 20;
1547cc2493dSspupyrev   /// A cost of increasing a count of zero-weight block by one.
1557cc2493dSspupyrev   static constexpr int64_t AuxCostIncZero = 11;
1567cc2493dSspupyrev   /// A cost of increasing the entry block's count by one.
1577cc2493dSspupyrev   static constexpr int64_t AuxCostIncEntry = 40;
1587cc2493dSspupyrev   /// A cost of decreasing the entry block's count by one.
1597cc2493dSspupyrev   static constexpr int64_t AuxCostDecEntry = 10;
1607cc2493dSspupyrev   /// A cost of taking an unlikely jump.
16113d1364aSspupyrev   static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30;
1627cc2493dSspupyrev 
1637cc2493dSspupyrev private:
164*f2ade65fSspupyrev   /// Iteratively find an augmentation path/dag in the network and send the
165*f2ade65fSspupyrev   /// flow along its edges. The method returns the number of applied iterations.
166*f2ade65fSspupyrev   size_t applyFlowAugmentation() {
167*f2ade65fSspupyrev     size_t AugmentationIters = 0;
168*f2ade65fSspupyrev     while (findAugmentingPath()) {
169*f2ade65fSspupyrev       uint64_t PathCapacity = computeAugmentingPathCapacity();
170*f2ade65fSspupyrev       while (PathCapacity > 0) {
171*f2ade65fSspupyrev         bool Progress = false;
172*f2ade65fSspupyrev         if (SampleProfileEvenCountDistribution) {
173*f2ade65fSspupyrev           // Identify node/edge candidates for augmentation
174*f2ade65fSspupyrev           identifyShortestEdges(PathCapacity);
175*f2ade65fSspupyrev 
176*f2ade65fSspupyrev           // Find an augmenting DAG
177*f2ade65fSspupyrev           auto AugmentingOrder = findAugmentingDAG();
178*f2ade65fSspupyrev 
179*f2ade65fSspupyrev           // Apply the DAG augmentation
180*f2ade65fSspupyrev           Progress = augmentFlowAlongDAG(AugmentingOrder);
181*f2ade65fSspupyrev           PathCapacity = computeAugmentingPathCapacity();
182*f2ade65fSspupyrev         }
183*f2ade65fSspupyrev 
184*f2ade65fSspupyrev         if (!Progress) {
185*f2ade65fSspupyrev           augmentFlowAlongPath(PathCapacity);
186*f2ade65fSspupyrev           PathCapacity = 0;
187*f2ade65fSspupyrev         }
188*f2ade65fSspupyrev 
189*f2ade65fSspupyrev         AugmentationIters++;
190*f2ade65fSspupyrev       }
191*f2ade65fSspupyrev     }
192*f2ade65fSspupyrev     return AugmentationIters;
193*f2ade65fSspupyrev   }
194*f2ade65fSspupyrev 
195*f2ade65fSspupyrev   /// Compute the capacity of the cannonical augmenting path. If the path is
196*f2ade65fSspupyrev   /// saturated (that is, no flow can be sent along the path), then return 0.
197*f2ade65fSspupyrev   uint64_t computeAugmentingPathCapacity() {
198*f2ade65fSspupyrev     uint64_t PathCapacity = INF;
199*f2ade65fSspupyrev     uint64_t Now = Target;
200*f2ade65fSspupyrev     while (Now != Source) {
201*f2ade65fSspupyrev       uint64_t Pred = Nodes[Now].ParentNode;
202*f2ade65fSspupyrev       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
203*f2ade65fSspupyrev 
204*f2ade65fSspupyrev       assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow");
205*f2ade65fSspupyrev       uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow);
206*f2ade65fSspupyrev       PathCapacity = std::min(PathCapacity, EdgeCapacity);
207*f2ade65fSspupyrev 
208*f2ade65fSspupyrev       Now = Pred;
209*f2ade65fSspupyrev     }
210*f2ade65fSspupyrev     return PathCapacity;
211*f2ade65fSspupyrev   }
212*f2ade65fSspupyrev 
2137cc2493dSspupyrev   /// Check for existence of an augmenting path with a positive capacity.
2147cc2493dSspupyrev   bool findAugmentingPath() {
2157cc2493dSspupyrev     // Initialize data structures
2167cc2493dSspupyrev     for (auto &Node : Nodes) {
2177cc2493dSspupyrev       Node.Distance = INF;
2187cc2493dSspupyrev       Node.ParentNode = uint64_t(-1);
2197cc2493dSspupyrev       Node.ParentEdgeIndex = uint64_t(-1);
2207cc2493dSspupyrev       Node.Taken = false;
2217cc2493dSspupyrev     }
2227cc2493dSspupyrev 
2237cc2493dSspupyrev     std::queue<uint64_t> Queue;
2247cc2493dSspupyrev     Queue.push(Source);
2257cc2493dSspupyrev     Nodes[Source].Distance = 0;
2267cc2493dSspupyrev     Nodes[Source].Taken = true;
2277cc2493dSspupyrev     while (!Queue.empty()) {
2287cc2493dSspupyrev       uint64_t Src = Queue.front();
2297cc2493dSspupyrev       Queue.pop();
2307cc2493dSspupyrev       Nodes[Src].Taken = false;
2317cc2493dSspupyrev       // Although the residual network contains edges with negative costs
2327cc2493dSspupyrev       // (in particular, backward edges), it can be shown that there are no
2337cc2493dSspupyrev       // negative-weight cycles and the following two invariants are maintained:
2347cc2493dSspupyrev       // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
2357cc2493dSspupyrev       // where Dist is the length of the shortest path between two nodes. This
2367cc2493dSspupyrev       // allows to prune the search-space of the path-finding algorithm using
2377cc2493dSspupyrev       // the following early-stop criteria:
2387cc2493dSspupyrev       // -- If we find a path with zero-distance from Source to Target, stop the
2397cc2493dSspupyrev       //    search, as the path is the shortest since Dist[Source, Target] >= 0;
2407cc2493dSspupyrev       // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
2417cc2493dSspupyrev       //    process node V, as it is guaranteed _not_ to be on a shortest path
2427cc2493dSspupyrev       //    from Source to Target; it follows from inequalities
2437cc2493dSspupyrev       //    Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
2447cc2493dSspupyrev       //                         >= Dist[Source, V]
245*f2ade65fSspupyrev       if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0)
2467cc2493dSspupyrev         break;
2477cc2493dSspupyrev       if (Nodes[Src].Distance > Nodes[Target].Distance)
2487cc2493dSspupyrev         continue;
2497cc2493dSspupyrev 
2507cc2493dSspupyrev       // Process adjacent edges
2517cc2493dSspupyrev       for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
2527cc2493dSspupyrev         auto &Edge = Edges[Src][EdgeIdx];
2537cc2493dSspupyrev         if (Edge.Flow < Edge.Capacity) {
2547cc2493dSspupyrev           uint64_t Dst = Edge.Dst;
2557cc2493dSspupyrev           int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
2567cc2493dSspupyrev           if (Nodes[Dst].Distance > NewDistance) {
2577cc2493dSspupyrev             // Update the distance and the parent node/edge
2587cc2493dSspupyrev             Nodes[Dst].Distance = NewDistance;
2597cc2493dSspupyrev             Nodes[Dst].ParentNode = Src;
2607cc2493dSspupyrev             Nodes[Dst].ParentEdgeIndex = EdgeIdx;
2617cc2493dSspupyrev             // Add the node to the queue, if it is not there yet
2627cc2493dSspupyrev             if (!Nodes[Dst].Taken) {
2637cc2493dSspupyrev               Queue.push(Dst);
2647cc2493dSspupyrev               Nodes[Dst].Taken = true;
2657cc2493dSspupyrev             }
2667cc2493dSspupyrev           }
2677cc2493dSspupyrev         }
2687cc2493dSspupyrev       }
2697cc2493dSspupyrev     }
2707cc2493dSspupyrev 
2717cc2493dSspupyrev     return Nodes[Target].Distance != INF;
2727cc2493dSspupyrev   }
2737cc2493dSspupyrev 
2747cc2493dSspupyrev   /// Update the current flow along the augmenting path.
275*f2ade65fSspupyrev   void augmentFlowAlongPath(uint64_t PathCapacity) {
27693a2c291Sspupyrev     assert(PathCapacity > 0 && "found an incorrect augmenting path");
277*f2ade65fSspupyrev     uint64_t Now = Target;
2787cc2493dSspupyrev     while (Now != Source) {
2797cc2493dSspupyrev       uint64_t Pred = Nodes[Now].ParentNode;
2807cc2493dSspupyrev       auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
2817cc2493dSspupyrev       auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
2827cc2493dSspupyrev 
2837cc2493dSspupyrev       Edge.Flow += PathCapacity;
2847cc2493dSspupyrev       RevEdge.Flow -= PathCapacity;
2857cc2493dSspupyrev 
2867cc2493dSspupyrev       Now = Pred;
2877cc2493dSspupyrev     }
2887cc2493dSspupyrev   }
2897cc2493dSspupyrev 
290*f2ade65fSspupyrev   /// Find an Augmenting DAG order using a modified version of DFS in which we
291*f2ade65fSspupyrev   /// can visit a node multiple times. In the DFS search, when scanning each
292*f2ade65fSspupyrev   /// edge out of a node, continue search at Edge.Dst endpoint if it has not
293*f2ade65fSspupyrev   /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm
294*f2ade65fSspupyrev   /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time.
295*f2ade65fSspupyrev   /// It returns an Augmenting Order (Taken nodes in decreasing Finish time)
296*f2ade65fSspupyrev   /// that starts with Source and ends with Target.
297*f2ade65fSspupyrev   std::vector<uint64_t> findAugmentingDAG() {
298*f2ade65fSspupyrev     // We use a stack based implemenation of DFS to avoid recursion.
299*f2ade65fSspupyrev     // Defining DFS data structures:
300*f2ade65fSspupyrev     // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that
301*f2ade65fSspupyrev     //  - we are currently visiting Nodes[NodeIdx] and
302*f2ade65fSspupyrev     //  - the next edge to scan is Edges[NodeIdx][EdgeIdx]
303*f2ade65fSspupyrev     typedef std::pair<uint64_t, uint64_t> StackItemType;
304*f2ade65fSspupyrev     std::stack<StackItemType> Stack;
305*f2ade65fSspupyrev     std::vector<uint64_t> AugmentingOrder;
306*f2ade65fSspupyrev 
307*f2ade65fSspupyrev     // Phase 0: Initialize Node attributes and Time for DFS run
308*f2ade65fSspupyrev     for (auto &Node : Nodes) {
309*f2ade65fSspupyrev       Node.Discovery = 0;
310*f2ade65fSspupyrev       Node.Finish = 0;
311*f2ade65fSspupyrev       Node.NumCalls = 0;
312*f2ade65fSspupyrev       Node.Taken = false;
313*f2ade65fSspupyrev     }
314*f2ade65fSspupyrev     uint64_t Time = 0;
315*f2ade65fSspupyrev     // Mark Target as Taken
316*f2ade65fSspupyrev     // Taken attribute will be propagated backwards from Target towards Source
317*f2ade65fSspupyrev     Nodes[Target].Taken = true;
318*f2ade65fSspupyrev 
319*f2ade65fSspupyrev     // Phase 1: Start DFS traversal from Source
320*f2ade65fSspupyrev     Stack.emplace(Source, 0);
321*f2ade65fSspupyrev     Nodes[Source].Discovery = ++Time;
322*f2ade65fSspupyrev     while (!Stack.empty()) {
323*f2ade65fSspupyrev       auto NodeIdx = Stack.top().first;
324*f2ade65fSspupyrev       auto EdgeIdx = Stack.top().second;
325*f2ade65fSspupyrev 
326*f2ade65fSspupyrev       // If we haven't scanned all edges out of NodeIdx, continue scanning
327*f2ade65fSspupyrev       if (EdgeIdx < Edges[NodeIdx].size()) {
328*f2ade65fSspupyrev         auto &Edge = Edges[NodeIdx][EdgeIdx];
329*f2ade65fSspupyrev         auto &Dst = Nodes[Edge.Dst];
330*f2ade65fSspupyrev         Stack.top().second++;
331*f2ade65fSspupyrev 
332*f2ade65fSspupyrev         if (Edge.OnShortestPath) {
333*f2ade65fSspupyrev           // If we haven't seen Edge.Dst so far, continue DFS search there
334*f2ade65fSspupyrev           if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) {
335*f2ade65fSspupyrev             Dst.Discovery = ++Time;
336*f2ade65fSspupyrev             Stack.emplace(Edge.Dst, 0);
337*f2ade65fSspupyrev             Dst.NumCalls++;
338*f2ade65fSspupyrev           } else if (Dst.Taken && Dst.Finish != 0) {
339*f2ade65fSspupyrev             // Else, if Edge.Dst already have a path to Target, so that NodeIdx
340*f2ade65fSspupyrev             Nodes[NodeIdx].Taken = true;
341*f2ade65fSspupyrev           }
342*f2ade65fSspupyrev         }
343*f2ade65fSspupyrev       } else {
344*f2ade65fSspupyrev         // If we are done scanning all edge out of NodeIdx
345*f2ade65fSspupyrev         Stack.pop();
346*f2ade65fSspupyrev         // If we haven't found a path from NodeIdx to Target, forget about it
347*f2ade65fSspupyrev         if (!Nodes[NodeIdx].Taken) {
348*f2ade65fSspupyrev           Nodes[NodeIdx].Discovery = 0;
349*f2ade65fSspupyrev         } else {
350*f2ade65fSspupyrev           // If we have found a path from NodeIdx to Target, then finish NodeIdx
351*f2ade65fSspupyrev           // and propagate Taken flag to DFS parent unless at the Source
352*f2ade65fSspupyrev           Nodes[NodeIdx].Finish = ++Time;
353*f2ade65fSspupyrev           // NodeIdx == Source if and only if the stack is empty
354*f2ade65fSspupyrev           if (NodeIdx != Source) {
355*f2ade65fSspupyrev             assert(!Stack.empty() && "empty stack while running dfs");
356*f2ade65fSspupyrev             Nodes[Stack.top().first].Taken = true;
357*f2ade65fSspupyrev           }
358*f2ade65fSspupyrev           AugmentingOrder.push_back(NodeIdx);
359*f2ade65fSspupyrev         }
360*f2ade65fSspupyrev       }
361*f2ade65fSspupyrev     }
362*f2ade65fSspupyrev     // Nodes are collected decreasing Finish time, so the order is reversed
363*f2ade65fSspupyrev     std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());
364*f2ade65fSspupyrev 
365*f2ade65fSspupyrev     // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges
366*f2ade65fSspupyrev     for (size_t Src : AugmentingOrder) {
367*f2ade65fSspupyrev       AugmentingEdges[Src].clear();
368*f2ade65fSspupyrev       for (auto &Edge : Edges[Src]) {
369*f2ade65fSspupyrev         uint64_t Dst = Edge.Dst;
370*f2ade65fSspupyrev         if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
371*f2ade65fSspupyrev             Nodes[Dst].Finish < Nodes[Src].Finish) {
372*f2ade65fSspupyrev           AugmentingEdges[Src].push_back(&Edge);
373*f2ade65fSspupyrev         }
374*f2ade65fSspupyrev       }
375*f2ade65fSspupyrev       assert((Src == Target || !AugmentingEdges[Src].empty()) &&
376*f2ade65fSspupyrev              "incorrectly constructed augmenting edges");
377*f2ade65fSspupyrev     }
378*f2ade65fSspupyrev 
379*f2ade65fSspupyrev     return AugmentingOrder;
380*f2ade65fSspupyrev   }
381*f2ade65fSspupyrev 
382*f2ade65fSspupyrev   /// Update the current flow along the given (acyclic) subgraph specified by
383*f2ade65fSspupyrev   /// the vertex order, AugmentingOrder. The objective is to send as much flow
384*f2ade65fSspupyrev   /// as possible while evenly distributing flow among successors of each node.
385*f2ade65fSspupyrev   /// After the update at least one edge is saturated.
386*f2ade65fSspupyrev   bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {
387*f2ade65fSspupyrev     // Phase 0: Initialization
388*f2ade65fSspupyrev     for (uint64_t Src : AugmentingOrder) {
389*f2ade65fSspupyrev       Nodes[Src].FracFlow = 0;
390*f2ade65fSspupyrev       Nodes[Src].IntFlow = 0;
391*f2ade65fSspupyrev       for (auto &Edge : AugmentingEdges[Src]) {
392*f2ade65fSspupyrev         Edge->AugmentedFlow = 0;
393*f2ade65fSspupyrev       }
394*f2ade65fSspupyrev     }
395*f2ade65fSspupyrev 
396*f2ade65fSspupyrev     // Phase 1: Send a unit of fractional flow along the DAG
397*f2ade65fSspupyrev     uint64_t MaxFlowAmount = INF;
398*f2ade65fSspupyrev     Nodes[Source].FracFlow = 1.0;
399*f2ade65fSspupyrev     for (uint64_t Src : AugmentingOrder) {
400*f2ade65fSspupyrev       assert((Src == Target || Nodes[Src].FracFlow > 0.0) &&
401*f2ade65fSspupyrev              "incorrectly computed fractional flow");
402*f2ade65fSspupyrev       // Distribute flow evenly among successors of Src
403*f2ade65fSspupyrev       uint64_t Degree = AugmentingEdges[Src].size();
404*f2ade65fSspupyrev       for (auto &Edge : AugmentingEdges[Src]) {
405*f2ade65fSspupyrev         double EdgeFlow = Nodes[Src].FracFlow / Degree;
406*f2ade65fSspupyrev         Nodes[Edge->Dst].FracFlow += EdgeFlow;
407*f2ade65fSspupyrev         if (Edge->Capacity == INF)
408*f2ade65fSspupyrev           continue;
409*f2ade65fSspupyrev         uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
410*f2ade65fSspupyrev         MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
411*f2ade65fSspupyrev       }
412*f2ade65fSspupyrev     }
413*f2ade65fSspupyrev     // Stop early if we cannot send any (integral) flow from Source to Target
414*f2ade65fSspupyrev     if (MaxFlowAmount == 0)
415*f2ade65fSspupyrev       return false;
416*f2ade65fSspupyrev 
417*f2ade65fSspupyrev     // Phase 2: Send an integral flow of MaxFlowAmount
418*f2ade65fSspupyrev     Nodes[Source].IntFlow = MaxFlowAmount;
419*f2ade65fSspupyrev     for (uint64_t Src : AugmentingOrder) {
420*f2ade65fSspupyrev       if (Src == Target)
421*f2ade65fSspupyrev         break;
422*f2ade65fSspupyrev       // Distribute flow evenly among successors of Src, rounding up to make
423*f2ade65fSspupyrev       // sure all flow is sent
424*f2ade65fSspupyrev       uint64_t Degree = AugmentingEdges[Src].size();
425*f2ade65fSspupyrev       // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree
426*f2ade65fSspupyrev       uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
427*f2ade65fSspupyrev       for (auto &Edge : AugmentingEdges[Src]) {
428*f2ade65fSspupyrev         uint64_t Dst = Edge->Dst;
429*f2ade65fSspupyrev         uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
430*f2ade65fSspupyrev         EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow));
431*f2ade65fSspupyrev         Nodes[Dst].IntFlow += EdgeFlow;
432*f2ade65fSspupyrev         Nodes[Src].IntFlow -= EdgeFlow;
433*f2ade65fSspupyrev         Edge->AugmentedFlow += EdgeFlow;
434*f2ade65fSspupyrev       }
435*f2ade65fSspupyrev     }
436*f2ade65fSspupyrev     assert(Nodes[Target].IntFlow <= MaxFlowAmount);
437*f2ade65fSspupyrev     Nodes[Target].IntFlow = 0;
438*f2ade65fSspupyrev 
439*f2ade65fSspupyrev     // Phase 3: Send excess flow back traversing the nodes backwards.
440*f2ade65fSspupyrev     // Because of rounding, not all flow can be sent along the edges of Src.
441*f2ade65fSspupyrev     // Hence, sending the remaining flow back to maintain flow conservation
442*f2ade65fSspupyrev     for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {
443*f2ade65fSspupyrev       uint64_t Src = AugmentingOrder[Idx - 1];
444*f2ade65fSspupyrev       // Try to send excess flow back along each edge.
445*f2ade65fSspupyrev       // Make sure we only send back flow we just augmented (AugmentedFlow).
446*f2ade65fSspupyrev       for (auto &Edge : AugmentingEdges[Src]) {
447*f2ade65fSspupyrev         uint64_t Dst = Edge->Dst;
448*f2ade65fSspupyrev         if (Nodes[Dst].IntFlow == 0)
449*f2ade65fSspupyrev           continue;
450*f2ade65fSspupyrev         uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
451*f2ade65fSspupyrev         Nodes[Dst].IntFlow -= EdgeFlow;
452*f2ade65fSspupyrev         Nodes[Src].IntFlow += EdgeFlow;
453*f2ade65fSspupyrev         Edge->AugmentedFlow -= EdgeFlow;
454*f2ade65fSspupyrev       }
455*f2ade65fSspupyrev     }
456*f2ade65fSspupyrev 
457*f2ade65fSspupyrev     // Phase 4: Update flow values along all edges
458*f2ade65fSspupyrev     bool HasSaturatedEdges = false;
459*f2ade65fSspupyrev     for (uint64_t Src : AugmentingOrder) {
460*f2ade65fSspupyrev       // Verify that we have sent all the excess flow from the node
461*f2ade65fSspupyrev       assert(Src == Source || Nodes[Src].IntFlow == 0);
462*f2ade65fSspupyrev       for (auto &Edge : AugmentingEdges[Src]) {
463*f2ade65fSspupyrev         assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
464*f2ade65fSspupyrev         // Update flow values along the edge and its reverse copy
465*f2ade65fSspupyrev         auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
466*f2ade65fSspupyrev         Edge->Flow += Edge->AugmentedFlow;
467*f2ade65fSspupyrev         RevEdge.Flow -= Edge->AugmentedFlow;
468*f2ade65fSspupyrev         if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
469*f2ade65fSspupyrev           HasSaturatedEdges = true;
470*f2ade65fSspupyrev       }
471*f2ade65fSspupyrev     }
472*f2ade65fSspupyrev 
473*f2ade65fSspupyrev     // The augmentation is successful iff at least one edge becomes saturated
474*f2ade65fSspupyrev     return HasSaturatedEdges;
475*f2ade65fSspupyrev   }
476*f2ade65fSspupyrev 
477*f2ade65fSspupyrev   /// Identify candidate (shortest) edges for augmentation.
478*f2ade65fSspupyrev   void identifyShortestEdges(uint64_t PathCapacity) {
479*f2ade65fSspupyrev     assert(PathCapacity > 0 && "found an incorrect augmenting DAG");
480*f2ade65fSspupyrev     // To make sure the augmentation DAG contains only edges with large residual
481*f2ade65fSspupyrev     // capacity, we prune all edges whose capacity is below a fraction of
482*f2ade65fSspupyrev     // the capacity of the augmented path.
483*f2ade65fSspupyrev     // (All edges of the path itself are always in the DAG)
484*f2ade65fSspupyrev     uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1));
485*f2ade65fSspupyrev 
486*f2ade65fSspupyrev     // Decide which edges are on a shortest path from Source to Target
487*f2ade65fSspupyrev     for (size_t Src = 0; Src < Nodes.size(); Src++) {
488*f2ade65fSspupyrev       // An edge cannot be augmenting if the endpoint has large distance
489*f2ade65fSspupyrev       if (Nodes[Src].Distance > Nodes[Target].Distance)
490*f2ade65fSspupyrev         continue;
491*f2ade65fSspupyrev 
492*f2ade65fSspupyrev       for (auto &Edge : Edges[Src]) {
493*f2ade65fSspupyrev         uint64_t Dst = Edge.Dst;
494*f2ade65fSspupyrev         Edge.OnShortestPath =
495*f2ade65fSspupyrev             Src != Target && Dst != Source &&
496*f2ade65fSspupyrev             Nodes[Dst].Distance <= Nodes[Target].Distance &&
497*f2ade65fSspupyrev             Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
498*f2ade65fSspupyrev             Edge.Capacity > Edge.Flow &&
499*f2ade65fSspupyrev             uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
500*f2ade65fSspupyrev       }
501*f2ade65fSspupyrev     }
502*f2ade65fSspupyrev   }
503*f2ade65fSspupyrev 
50413d1364aSspupyrev   /// A node in a flow network.
5057cc2493dSspupyrev   struct Node {
5067cc2493dSspupyrev     /// The cost of the cheapest path from the source to the current node.
5077cc2493dSspupyrev     int64_t Distance;
5087cc2493dSspupyrev     /// The node preceding the current one in the path.
5097cc2493dSspupyrev     uint64_t ParentNode;
5107cc2493dSspupyrev     /// The index of the edge between ParentNode and the current node.
5117cc2493dSspupyrev     uint64_t ParentEdgeIndex;
5127cc2493dSspupyrev     /// An indicator of whether the current node is in a queue.
5137cc2493dSspupyrev     bool Taken;
514*f2ade65fSspupyrev 
515*f2ade65fSspupyrev     /// Data fields utilized in DAG-augmentation:
516*f2ade65fSspupyrev     /// Fractional flow.
517*f2ade65fSspupyrev     double FracFlow;
518*f2ade65fSspupyrev     /// Integral flow.
519*f2ade65fSspupyrev     uint64_t IntFlow;
520*f2ade65fSspupyrev     /// Discovery time.
521*f2ade65fSspupyrev     uint64_t Discovery;
522*f2ade65fSspupyrev     /// Finish time.
523*f2ade65fSspupyrev     uint64_t Finish;
524*f2ade65fSspupyrev     /// NumCalls.
525*f2ade65fSspupyrev     uint64_t NumCalls;
5267cc2493dSspupyrev   };
527*f2ade65fSspupyrev 
5287cc2493dSspupyrev   /// An edge in a flow network.
5297cc2493dSspupyrev   struct Edge {
5307cc2493dSspupyrev     /// The cost of the edge.
5317cc2493dSspupyrev     int64_t Cost;
5327cc2493dSspupyrev     /// The capacity of the edge.
5337cc2493dSspupyrev     int64_t Capacity;
5347cc2493dSspupyrev     /// The current flow on the edge.
5357cc2493dSspupyrev     int64_t Flow;
5367cc2493dSspupyrev     /// The destination node of the edge.
5377cc2493dSspupyrev     uint64_t Dst;
5387cc2493dSspupyrev     /// The index of the reverse edge between Dst and the current node.
5397cc2493dSspupyrev     uint64_t RevEdgeIndex;
540*f2ade65fSspupyrev 
541*f2ade65fSspupyrev     /// Data fields utilized in DAG-augmentation:
542*f2ade65fSspupyrev     /// Whether the edge is currently on a shortest path from Source to Target.
543*f2ade65fSspupyrev     bool OnShortestPath;
544*f2ade65fSspupyrev     /// Extra flow along the edge.
545*f2ade65fSspupyrev     uint64_t AugmentedFlow;
5467cc2493dSspupyrev   };
5477cc2493dSspupyrev 
5487cc2493dSspupyrev   /// The set of network nodes.
5497cc2493dSspupyrev   std::vector<Node> Nodes;
5507cc2493dSspupyrev   /// The set of network edges.
5517cc2493dSspupyrev   std::vector<std::vector<Edge>> Edges;
5527cc2493dSspupyrev   /// Source node of the flow.
5537cc2493dSspupyrev   uint64_t Source;
5547cc2493dSspupyrev   /// Target (sink) node of the flow.
5557cc2493dSspupyrev   uint64_t Target;
556*f2ade65fSspupyrev   /// Augmenting edges.
557*f2ade65fSspupyrev   std::vector<std::vector<Edge *>> AugmentingEdges;
5587cc2493dSspupyrev };
5597cc2493dSspupyrev 
56093a2c291Sspupyrev /// A post-processing adjustment of control flow. It applies two steps by
56193a2c291Sspupyrev /// rerouting some flow and making it more realistic:
56293a2c291Sspupyrev ///
56393a2c291Sspupyrev /// - First, it removes all isolated components ("islands") with a positive flow
56493a2c291Sspupyrev ///   that are unreachable from the entry block. For every such component, we
56593a2c291Sspupyrev ///   find the shortest from the entry to an exit passing through the component,
56693a2c291Sspupyrev ///   and increase the flow by one unit along the path.
56793a2c291Sspupyrev ///
56893a2c291Sspupyrev /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
56993a2c291Sspupyrev ///   with no sampled counts. Then it rebalnces the flow that goes through such
57093a2c291Sspupyrev ///   a subgraph so that each branch is taken with probability 50%.
57193a2c291Sspupyrev ///   An unknown subgraph is such that for every two nodes u and v:
57293a2c291Sspupyrev ///     - u dominates v and u is not unknown;
57393a2c291Sspupyrev ///     - v post-dominates u; and
57493a2c291Sspupyrev ///     - all inner-nodes of all (u,v)-paths are unknown.
57593a2c291Sspupyrev ///
57698dd2f9eSspupyrev class FlowAdjuster {
57798dd2f9eSspupyrev public:
57898dd2f9eSspupyrev   FlowAdjuster(FlowFunction &Func) : Func(Func) {
57998dd2f9eSspupyrev     assert(Func.Blocks[Func.Entry].isEntry() &&
58098dd2f9eSspupyrev            "incorrect index of the entry block");
58198dd2f9eSspupyrev   }
58298dd2f9eSspupyrev 
58398dd2f9eSspupyrev   // Run the post-processing
58498dd2f9eSspupyrev   void run() {
58593a2c291Sspupyrev     /// Adjust the flow to get rid of isolated components.
58698dd2f9eSspupyrev     joinIsolatedComponents();
58793a2c291Sspupyrev 
58893a2c291Sspupyrev     /// Rebalance the flow inside unknown subgraphs.
58993a2c291Sspupyrev     rebalanceUnknownSubgraphs();
59098dd2f9eSspupyrev   }
59198dd2f9eSspupyrev 
59298dd2f9eSspupyrev private:
59398dd2f9eSspupyrev   void joinIsolatedComponents() {
59498dd2f9eSspupyrev     // Find blocks that are reachable from the source
5955f4ae564SJan Svoboda     auto Visited = BitVector(NumBlocks(), false);
59698dd2f9eSspupyrev     findReachable(Func.Entry, Visited);
59798dd2f9eSspupyrev 
59898dd2f9eSspupyrev     // Iterate over all non-reachable blocks and adjust their weights
59998dd2f9eSspupyrev     for (uint64_t I = 0; I < NumBlocks(); I++) {
60098dd2f9eSspupyrev       auto &Block = Func.Blocks[I];
60198dd2f9eSspupyrev       if (Block.Flow > 0 && !Visited[I]) {
60298dd2f9eSspupyrev         // Find a path from the entry to an exit passing through the block I
60398dd2f9eSspupyrev         auto Path = findShortestPath(I);
60498dd2f9eSspupyrev         // Increase the flow along the path
60598dd2f9eSspupyrev         assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
60698dd2f9eSspupyrev                "incorrectly computed path adjusting control flow");
60798dd2f9eSspupyrev         Func.Blocks[Func.Entry].Flow += 1;
60898dd2f9eSspupyrev         for (auto &Jump : Path) {
60998dd2f9eSspupyrev           Jump->Flow += 1;
61098dd2f9eSspupyrev           Func.Blocks[Jump->Target].Flow += 1;
61198dd2f9eSspupyrev           // Update reachability
61298dd2f9eSspupyrev           findReachable(Jump->Target, Visited);
61398dd2f9eSspupyrev         }
61498dd2f9eSspupyrev       }
61598dd2f9eSspupyrev     }
61698dd2f9eSspupyrev   }
61798dd2f9eSspupyrev 
61893a2c291Sspupyrev   /// Run BFS from a given block along the jumps with a positive flow and mark
61998dd2f9eSspupyrev   /// all reachable blocks.
6205f4ae564SJan Svoboda   void findReachable(uint64_t Src, BitVector &Visited) {
62198dd2f9eSspupyrev     if (Visited[Src])
62298dd2f9eSspupyrev       return;
62398dd2f9eSspupyrev     std::queue<uint64_t> Queue;
62498dd2f9eSspupyrev     Queue.push(Src);
62598dd2f9eSspupyrev     Visited[Src] = true;
62698dd2f9eSspupyrev     while (!Queue.empty()) {
62798dd2f9eSspupyrev       Src = Queue.front();
62898dd2f9eSspupyrev       Queue.pop();
62998dd2f9eSspupyrev       for (auto Jump : Func.Blocks[Src].SuccJumps) {
63098dd2f9eSspupyrev         uint64_t Dst = Jump->Target;
63198dd2f9eSspupyrev         if (Jump->Flow > 0 && !Visited[Dst]) {
63298dd2f9eSspupyrev           Queue.push(Dst);
63398dd2f9eSspupyrev           Visited[Dst] = true;
63498dd2f9eSspupyrev         }
63598dd2f9eSspupyrev       }
63698dd2f9eSspupyrev     }
63798dd2f9eSspupyrev   }
63898dd2f9eSspupyrev 
63998dd2f9eSspupyrev   /// Find the shortest path from the entry block to an exit block passing
64098dd2f9eSspupyrev   /// through a given block.
64198dd2f9eSspupyrev   std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
64298dd2f9eSspupyrev     // A path from the entry block to BlockIdx
64398dd2f9eSspupyrev     auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
64498dd2f9eSspupyrev     // A path from BlockIdx to an exit block
64598dd2f9eSspupyrev     auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
64698dd2f9eSspupyrev 
64798dd2f9eSspupyrev     // Concatenate the two paths
64898dd2f9eSspupyrev     std::vector<FlowJump *> Result;
64998dd2f9eSspupyrev     Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
65098dd2f9eSspupyrev     Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
65198dd2f9eSspupyrev     return Result;
65298dd2f9eSspupyrev   }
65398dd2f9eSspupyrev 
65498dd2f9eSspupyrev   /// Apply the Dijkstra algorithm to find the shortest path from a given
65598dd2f9eSspupyrev   /// Source to a given Target block.
65698dd2f9eSspupyrev   /// If Target == -1, then the path ends at an exit block.
65798dd2f9eSspupyrev   std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
65898dd2f9eSspupyrev     // Quit early, if possible
65998dd2f9eSspupyrev     if (Source == Target)
66098dd2f9eSspupyrev       return std::vector<FlowJump *>();
66198dd2f9eSspupyrev     if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
66298dd2f9eSspupyrev       return std::vector<FlowJump *>();
66398dd2f9eSspupyrev 
66498dd2f9eSspupyrev     // Initialize data structures
66598dd2f9eSspupyrev     auto Distance = std::vector<int64_t>(NumBlocks(), INF);
66698dd2f9eSspupyrev     auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
66798dd2f9eSspupyrev     Distance[Source] = 0;
66898dd2f9eSspupyrev     std::set<std::pair<uint64_t, uint64_t>> Queue;
66998dd2f9eSspupyrev     Queue.insert(std::make_pair(Distance[Source], Source));
67098dd2f9eSspupyrev 
67198dd2f9eSspupyrev     // Run the Dijkstra algorithm
67298dd2f9eSspupyrev     while (!Queue.empty()) {
67398dd2f9eSspupyrev       uint64_t Src = Queue.begin()->second;
67498dd2f9eSspupyrev       Queue.erase(Queue.begin());
67598dd2f9eSspupyrev       // If we found a solution, quit early
67698dd2f9eSspupyrev       if (Src == Target ||
67798dd2f9eSspupyrev           (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
67898dd2f9eSspupyrev         break;
67998dd2f9eSspupyrev 
68098dd2f9eSspupyrev       for (auto Jump : Func.Blocks[Src].SuccJumps) {
68198dd2f9eSspupyrev         uint64_t Dst = Jump->Target;
68298dd2f9eSspupyrev         int64_t JumpDist = jumpDistance(Jump);
68398dd2f9eSspupyrev         if (Distance[Dst] > Distance[Src] + JumpDist) {
68498dd2f9eSspupyrev           Queue.erase(std::make_pair(Distance[Dst], Dst));
68598dd2f9eSspupyrev 
68698dd2f9eSspupyrev           Distance[Dst] = Distance[Src] + JumpDist;
68798dd2f9eSspupyrev           Parent[Dst] = Jump;
68898dd2f9eSspupyrev 
68998dd2f9eSspupyrev           Queue.insert(std::make_pair(Distance[Dst], Dst));
69098dd2f9eSspupyrev         }
69198dd2f9eSspupyrev       }
69298dd2f9eSspupyrev     }
69398dd2f9eSspupyrev     // If Target is not provided, find the closest exit block
69498dd2f9eSspupyrev     if (Target == AnyExitBlock) {
69598dd2f9eSspupyrev       for (uint64_t I = 0; I < NumBlocks(); I++) {
69698dd2f9eSspupyrev         if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
69798dd2f9eSspupyrev           if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
69898dd2f9eSspupyrev             Target = I;
69998dd2f9eSspupyrev           }
70098dd2f9eSspupyrev         }
70198dd2f9eSspupyrev       }
70298dd2f9eSspupyrev     }
70398dd2f9eSspupyrev     assert(Parent[Target] != nullptr && "a path does not exist");
70498dd2f9eSspupyrev 
70598dd2f9eSspupyrev     // Extract the constructed path
70698dd2f9eSspupyrev     std::vector<FlowJump *> Result;
70798dd2f9eSspupyrev     uint64_t Now = Target;
70898dd2f9eSspupyrev     while (Now != Source) {
70998dd2f9eSspupyrev       assert(Now == Parent[Now]->Target && "incorrect parent jump");
71098dd2f9eSspupyrev       Result.push_back(Parent[Now]);
71198dd2f9eSspupyrev       Now = Parent[Now]->Source;
71298dd2f9eSspupyrev     }
71398dd2f9eSspupyrev     // Reverse the path, since it is extracted from Target to Source
71498dd2f9eSspupyrev     std::reverse(Result.begin(), Result.end());
71598dd2f9eSspupyrev     return Result;
71698dd2f9eSspupyrev   }
71798dd2f9eSspupyrev 
71898dd2f9eSspupyrev   /// A distance of a path for a given jump.
71998dd2f9eSspupyrev   /// In order to incite the path to use blocks/jumps with large positive flow,
72098dd2f9eSspupyrev   /// and avoid changing branch probability of outgoing edges drastically,
72198dd2f9eSspupyrev   /// set the distance as follows:
72298dd2f9eSspupyrev   ///   if Jump.Flow > 0, then distance = max(100 - Jump->Flow, 0)
72398dd2f9eSspupyrev   ///   if Block.Weight > 0, then distance = 1
72498dd2f9eSspupyrev   ///   otherwise distance >> 1
72598dd2f9eSspupyrev   int64_t jumpDistance(FlowJump *Jump) const {
72698dd2f9eSspupyrev     int64_t BaseDistance = 100;
72798dd2f9eSspupyrev     if (Jump->IsUnlikely)
72898dd2f9eSspupyrev       return MinCostMaxFlow::AuxCostUnlikely;
72998dd2f9eSspupyrev     if (Jump->Flow > 0)
73098dd2f9eSspupyrev       return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0);
73198dd2f9eSspupyrev     if (Func.Blocks[Jump->Target].Weight > 0)
73298dd2f9eSspupyrev       return BaseDistance;
73398dd2f9eSspupyrev     return BaseDistance * (NumBlocks() + 1);
73498dd2f9eSspupyrev   };
73598dd2f9eSspupyrev 
73698dd2f9eSspupyrev   uint64_t NumBlocks() const { return Func.Blocks.size(); }
73798dd2f9eSspupyrev 
73813d1364aSspupyrev   /// Rebalance unknown subgraphs so that the flow is split evenly across the
73913d1364aSspupyrev   /// outgoing branches of every block of the subgraph. The method iterates over
74013d1364aSspupyrev   /// blocks with known weight and identifies unknown subgraphs rooted at the
74113d1364aSspupyrev   /// blocks. Then it verifies if flow rebalancing is feasible and applies it.
74293a2c291Sspupyrev   void rebalanceUnknownSubgraphs() {
74313d1364aSspupyrev     // Try to find unknown subgraphs from each block
74493a2c291Sspupyrev     for (uint64_t I = 0; I < Func.Blocks.size(); I++) {
74593a2c291Sspupyrev       auto SrcBlock = &Func.Blocks[I];
74613d1364aSspupyrev       // Verify if rebalancing rooted at SrcBlock is feasible
74713d1364aSspupyrev       if (!canRebalanceAtRoot(SrcBlock))
74893a2c291Sspupyrev         continue;
74993a2c291Sspupyrev 
75013d1364aSspupyrev       // Find an unknown subgraphs starting at SrcBlock. Along the way,
75113d1364aSspupyrev       // fill in known destinations and intermediate unknown blocks.
75213d1364aSspupyrev       std::vector<FlowBlock *> UnknownBlocks;
75313d1364aSspupyrev       std::vector<FlowBlock *> KnownDstBlocks;
75413d1364aSspupyrev       findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks);
75513d1364aSspupyrev 
75613d1364aSspupyrev       // Verify if rebalancing of the subgraph is feasible. If the search is
75713d1364aSspupyrev       // successful, find the unique destination block (which can be null)
75893a2c291Sspupyrev       FlowBlock *DstBlock = nullptr;
75913d1364aSspupyrev       if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks,
76013d1364aSspupyrev                                 DstBlock))
76193a2c291Sspupyrev         continue;
76213d1364aSspupyrev 
76313d1364aSspupyrev       // We cannot rebalance subgraphs containing cycles among unknown blocks
76413d1364aSspupyrev       if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks))
76593a2c291Sspupyrev         continue;
76693a2c291Sspupyrev 
76793a2c291Sspupyrev       // Rebalance the flow
76813d1364aSspupyrev       rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks);
76993a2c291Sspupyrev     }
77093a2c291Sspupyrev   }
77193a2c291Sspupyrev 
77213d1364aSspupyrev   /// Verify if rebalancing rooted at a given block is possible.
77313d1364aSspupyrev   bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
77413d1364aSspupyrev     // Do not attempt to find unknown subgraphs from an unknown or a
77513d1364aSspupyrev     // zero-flow block
77613d1364aSspupyrev     if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
77713d1364aSspupyrev       return false;
77813d1364aSspupyrev 
77913d1364aSspupyrev     // Do not attempt to process subgraphs from a block w/o unknown sucessors
78013d1364aSspupyrev     bool HasUnknownSuccs = false;
78113d1364aSspupyrev     for (auto Jump : SrcBlock->SuccJumps) {
78213d1364aSspupyrev       if (Func.Blocks[Jump->Target].UnknownWeight) {
78313d1364aSspupyrev         HasUnknownSuccs = true;
78413d1364aSspupyrev         break;
78513d1364aSspupyrev       }
78613d1364aSspupyrev     }
78713d1364aSspupyrev     if (!HasUnknownSuccs)
78813d1364aSspupyrev       return false;
78913d1364aSspupyrev 
79013d1364aSspupyrev     return true;
79113d1364aSspupyrev   }
79213d1364aSspupyrev 
79313d1364aSspupyrev   /// Find an unknown subgraph starting at block SrcBlock. The method sets
79413d1364aSspupyrev   /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
79513d1364aSspupyrev   void findUnknownSubgraph(const FlowBlock *SrcBlock,
79613d1364aSspupyrev                            std::vector<FlowBlock *> &KnownDstBlocks,
79713d1364aSspupyrev                            std::vector<FlowBlock *> &UnknownBlocks) {
79893a2c291Sspupyrev     // Run BFS from SrcBlock and make sure all paths are going through unknown
799*f2ade65fSspupyrev     // blocks and end at a known DstBlock
8005f4ae564SJan Svoboda     auto Visited = BitVector(NumBlocks(), false);
80193a2c291Sspupyrev     std::queue<uint64_t> Queue;
80293a2c291Sspupyrev 
80393a2c291Sspupyrev     Queue.push(SrcBlock->Index);
80493a2c291Sspupyrev     Visited[SrcBlock->Index] = true;
80593a2c291Sspupyrev     while (!Queue.empty()) {
80693a2c291Sspupyrev       auto &Block = Func.Blocks[Queue.front()];
80793a2c291Sspupyrev       Queue.pop();
80893a2c291Sspupyrev       // Process blocks reachable from Block
80993a2c291Sspupyrev       for (auto Jump : Block.SuccJumps) {
81013d1364aSspupyrev         // If Jump can be ignored, skip it
81113d1364aSspupyrev         if (ignoreJump(SrcBlock, nullptr, Jump))
81213d1364aSspupyrev           continue;
81313d1364aSspupyrev 
81493a2c291Sspupyrev         uint64_t Dst = Jump->Target;
81513d1364aSspupyrev         // If Dst has been visited, skip Jump
81693a2c291Sspupyrev         if (Visited[Dst])
81793a2c291Sspupyrev           continue;
81813d1364aSspupyrev         // Process block Dst
81993a2c291Sspupyrev         Visited[Dst] = true;
82093a2c291Sspupyrev         if (!Func.Blocks[Dst].UnknownWeight) {
82113d1364aSspupyrev           KnownDstBlocks.push_back(&Func.Blocks[Dst]);
82293a2c291Sspupyrev         } else {
82393a2c291Sspupyrev           Queue.push(Dst);
82413d1364aSspupyrev           UnknownBlocks.push_back(&Func.Blocks[Dst]);
82513d1364aSspupyrev         }
82693a2c291Sspupyrev       }
82793a2c291Sspupyrev     }
82893a2c291Sspupyrev   }
82993a2c291Sspupyrev 
83013d1364aSspupyrev   /// Verify if rebalancing of the subgraph is feasible. If the checks are
83113d1364aSspupyrev   /// successful, set the unique destination block, DstBlock (can be null).
83213d1364aSspupyrev   bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
83313d1364aSspupyrev                             const std::vector<FlowBlock *> &KnownDstBlocks,
83413d1364aSspupyrev                             const std::vector<FlowBlock *> &UnknownBlocks,
83513d1364aSspupyrev                             FlowBlock *&DstBlock) {
83693a2c291Sspupyrev     // If the list of unknown blocks is empty, we don't need rebalancing
83713d1364aSspupyrev     if (UnknownBlocks.empty())
83893a2c291Sspupyrev       return false;
83913d1364aSspupyrev 
84013d1364aSspupyrev     // If there are multiple known sinks, we can't rebalance
84113d1364aSspupyrev     if (KnownDstBlocks.size() > 1)
84293a2c291Sspupyrev       return false;
84313d1364aSspupyrev     DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
84413d1364aSspupyrev 
84513d1364aSspupyrev     // Verify sinks of the subgraph
84613d1364aSspupyrev     for (auto Block : UnknownBlocks) {
84713d1364aSspupyrev       if (Block->SuccJumps.empty()) {
84813d1364aSspupyrev         // If there are multiple (known and unknown) sinks, we can't rebalance
84913d1364aSspupyrev         if (DstBlock != nullptr)
85013d1364aSspupyrev           return false;
85113d1364aSspupyrev         continue;
85213d1364aSspupyrev       }
85313d1364aSspupyrev       size_t NumIgnoredJumps = 0;
85413d1364aSspupyrev       for (auto Jump : Block->SuccJumps) {
85513d1364aSspupyrev         if (ignoreJump(SrcBlock, DstBlock, Jump))
85613d1364aSspupyrev           NumIgnoredJumps++;
85713d1364aSspupyrev       }
85813d1364aSspupyrev       // If there is a non-sink block in UnknownBlocks with all jumps ignored,
85913d1364aSspupyrev       // then we can't rebalance
86013d1364aSspupyrev       if (NumIgnoredJumps == Block->SuccJumps.size())
86193a2c291Sspupyrev         return false;
86293a2c291Sspupyrev     }
86393a2c291Sspupyrev 
86493a2c291Sspupyrev     return true;
86593a2c291Sspupyrev   }
86693a2c291Sspupyrev 
86713d1364aSspupyrev   /// Decide whether the Jump is ignored while processing an unknown subgraphs
86813d1364aSspupyrev   /// rooted at basic block SrcBlock with the destination block, DstBlock.
86913d1364aSspupyrev   bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
87013d1364aSspupyrev                   const FlowJump *Jump) {
87113d1364aSspupyrev     // Ignore unlikely jumps with zero flow
87213d1364aSspupyrev     if (Jump->IsUnlikely && Jump->Flow == 0)
87313d1364aSspupyrev       return true;
87413d1364aSspupyrev 
87513d1364aSspupyrev     auto JumpSource = &Func.Blocks[Jump->Source];
87613d1364aSspupyrev     auto JumpTarget = &Func.Blocks[Jump->Target];
87713d1364aSspupyrev 
87813d1364aSspupyrev     // Do not ignore jumps coming into DstBlock
87913d1364aSspupyrev     if (DstBlock != nullptr && JumpTarget == DstBlock)
88013d1364aSspupyrev       return false;
88113d1364aSspupyrev 
88213d1364aSspupyrev     // Ignore jumps out of SrcBlock to known blocks
88313d1364aSspupyrev     if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock)
88413d1364aSspupyrev       return true;
88513d1364aSspupyrev 
88613d1364aSspupyrev     // Ignore jumps to known blocks with zero flow
88713d1364aSspupyrev     if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0)
88813d1364aSspupyrev       return true;
88913d1364aSspupyrev 
89013d1364aSspupyrev     return false;
89113d1364aSspupyrev   }
89213d1364aSspupyrev 
89393a2c291Sspupyrev   /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
89413d1364aSspupyrev   /// UnknownBlocks in the topological order (so that all jumps are "forward").
89513d1364aSspupyrev   bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
89613d1364aSspupyrev                          std::vector<FlowBlock *> &UnknownBlocks) {
89793a2c291Sspupyrev     // Extract local in-degrees in the considered subgraph
89893a2c291Sspupyrev     auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
89913d1364aSspupyrev     auto fillInDegree = [&](const FlowBlock *Block) {
90013d1364aSspupyrev       for (auto Jump : Block->SuccJumps) {
90113d1364aSspupyrev         if (ignoreJump(SrcBlock, DstBlock, Jump))
90213d1364aSspupyrev           continue;
90393a2c291Sspupyrev         LocalInDegree[Jump->Target]++;
90493a2c291Sspupyrev       }
90513d1364aSspupyrev     };
90613d1364aSspupyrev     fillInDegree(SrcBlock);
90713d1364aSspupyrev     for (auto Block : UnknownBlocks) {
90813d1364aSspupyrev       fillInDegree(Block);
90993a2c291Sspupyrev     }
91093a2c291Sspupyrev     // A loop containing SrcBlock
91193a2c291Sspupyrev     if (LocalInDegree[SrcBlock->Index] > 0)
91293a2c291Sspupyrev       return false;
91393a2c291Sspupyrev 
91493a2c291Sspupyrev     std::vector<FlowBlock *> AcyclicOrder;
91593a2c291Sspupyrev     std::queue<uint64_t> Queue;
91693a2c291Sspupyrev     Queue.push(SrcBlock->Index);
91793a2c291Sspupyrev     while (!Queue.empty()) {
91813d1364aSspupyrev       FlowBlock *Block = &Func.Blocks[Queue.front()];
91993a2c291Sspupyrev       Queue.pop();
92013d1364aSspupyrev       // Stop propagation once we reach DstBlock, if any
92113d1364aSspupyrev       if (DstBlock != nullptr && Block == DstBlock)
92293a2c291Sspupyrev         break;
92393a2c291Sspupyrev 
92413d1364aSspupyrev       // Keep an acyclic order of unknown blocks
92513d1364aSspupyrev       if (Block->UnknownWeight && Block != SrcBlock)
92613d1364aSspupyrev         AcyclicOrder.push_back(Block);
92713d1364aSspupyrev 
92893a2c291Sspupyrev       // Add to the queue all successors with zero local in-degree
92913d1364aSspupyrev       for (auto Jump : Block->SuccJumps) {
93013d1364aSspupyrev         if (ignoreJump(SrcBlock, DstBlock, Jump))
93113d1364aSspupyrev           continue;
93293a2c291Sspupyrev         uint64_t Dst = Jump->Target;
93393a2c291Sspupyrev         LocalInDegree[Dst]--;
93493a2c291Sspupyrev         if (LocalInDegree[Dst] == 0) {
93593a2c291Sspupyrev           Queue.push(Dst);
93693a2c291Sspupyrev         }
93793a2c291Sspupyrev       }
93893a2c291Sspupyrev     }
93993a2c291Sspupyrev 
94093a2c291Sspupyrev     // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
94193a2c291Sspupyrev     // of all blocks
94213d1364aSspupyrev     if (UnknownBlocks.size() != AcyclicOrder.size())
94393a2c291Sspupyrev       return false;
94413d1364aSspupyrev     UnknownBlocks = AcyclicOrder;
94593a2c291Sspupyrev     return true;
94693a2c291Sspupyrev   }
94793a2c291Sspupyrev 
94813d1364aSspupyrev   /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
94913d1364aSspupyrev   /// having UnknownBlocks intermediate blocks.
95013d1364aSspupyrev   void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
95113d1364aSspupyrev                                 const FlowBlock *DstBlock,
95213d1364aSspupyrev                                 const std::vector<FlowBlock *> &UnknownBlocks) {
95393a2c291Sspupyrev     assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
95493a2c291Sspupyrev 
95513d1364aSspupyrev     // Ditribute flow from the source block
95613d1364aSspupyrev     uint64_t BlockFlow = 0;
95713d1364aSspupyrev     // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
95813d1364aSspupyrev     for (auto Jump : SrcBlock->SuccJumps) {
95913d1364aSspupyrev       if (ignoreJump(SrcBlock, DstBlock, Jump))
96093a2c291Sspupyrev         continue;
96113d1364aSspupyrev       BlockFlow += Jump->Flow;
96293a2c291Sspupyrev     }
96313d1364aSspupyrev     rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
96413d1364aSspupyrev 
96513d1364aSspupyrev     // Ditribute flow from the remaining blocks
96613d1364aSspupyrev     for (auto Block : UnknownBlocks) {
96713d1364aSspupyrev       assert(Block->UnknownWeight && "incorrect unknown subgraph");
96813d1364aSspupyrev       uint64_t BlockFlow = 0;
96913d1364aSspupyrev       // Block's flow is the sum of incoming flows
97013d1364aSspupyrev       for (auto Jump : Block->PredJumps) {
97113d1364aSspupyrev         BlockFlow += Jump->Flow;
97213d1364aSspupyrev       }
97313d1364aSspupyrev       Block->Flow = BlockFlow;
97413d1364aSspupyrev       rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
97513d1364aSspupyrev     }
97613d1364aSspupyrev   }
97713d1364aSspupyrev 
97813d1364aSspupyrev   /// Redistribute flow for a block in a subgraph rooted at SrcBlock,
97913d1364aSspupyrev   /// and ending at DstBlock.
98013d1364aSspupyrev   void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
98113d1364aSspupyrev                       const FlowBlock *Block, uint64_t BlockFlow) {
98213d1364aSspupyrev     // Process all successor jumps and update corresponding flow values
98313d1364aSspupyrev     size_t BlockDegree = 0;
98413d1364aSspupyrev     for (auto Jump : Block->SuccJumps) {
98513d1364aSspupyrev       if (ignoreJump(SrcBlock, DstBlock, Jump))
98613d1364aSspupyrev         continue;
98713d1364aSspupyrev       BlockDegree++;
98813d1364aSspupyrev     }
98913d1364aSspupyrev     // If all successor jumps of the block are ignored, skip it
99013d1364aSspupyrev     if (DstBlock == nullptr && BlockDegree == 0)
99113d1364aSspupyrev       return;
99213d1364aSspupyrev     assert(BlockDegree > 0 && "all outgoing jumps are ignored");
99313d1364aSspupyrev 
99413d1364aSspupyrev     // Each of the Block's successors gets the following amount of flow.
99513d1364aSspupyrev     // Rounding the value up so that all flow is propagated
99613d1364aSspupyrev     uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
99713d1364aSspupyrev     for (auto Jump : Block->SuccJumps) {
99813d1364aSspupyrev       if (ignoreJump(SrcBlock, DstBlock, Jump))
99913d1364aSspupyrev         continue;
100013d1364aSspupyrev       uint64_t Flow = std::min(SuccFlow, BlockFlow);
100193a2c291Sspupyrev       Jump->Flow = Flow;
100213d1364aSspupyrev       BlockFlow -= Flow;
100393a2c291Sspupyrev     }
100413d1364aSspupyrev     assert(BlockFlow == 0 && "not all flow is propagated");
100593a2c291Sspupyrev   }
100693a2c291Sspupyrev 
100798dd2f9eSspupyrev   /// A constant indicating an arbitrary exit block of a function.
100898dd2f9eSspupyrev   static constexpr uint64_t AnyExitBlock = uint64_t(-1);
100998dd2f9eSspupyrev 
101098dd2f9eSspupyrev   /// The function.
101198dd2f9eSspupyrev   FlowFunction &Func;
101298dd2f9eSspupyrev };
101398dd2f9eSspupyrev 
10147cc2493dSspupyrev /// Initializing flow network for a given function.
10157cc2493dSspupyrev ///
10167cc2493dSspupyrev /// Every block is split into three nodes that are responsible for (i) an
10177cc2493dSspupyrev /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
10187cc2493dSspupyrev /// reduction of the block weight.
10197cc2493dSspupyrev void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
10207cc2493dSspupyrev   uint64_t NumBlocks = Func.Blocks.size();
10217cc2493dSspupyrev   assert(NumBlocks > 1 && "Too few blocks in a function");
10227cc2493dSspupyrev   LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
10237cc2493dSspupyrev 
10247cc2493dSspupyrev   // Pre-process data: make sure the entry weight is at least 1
10257cc2493dSspupyrev   if (Func.Blocks[Func.Entry].Weight == 0) {
10267cc2493dSspupyrev     Func.Blocks[Func.Entry].Weight = 1;
10277cc2493dSspupyrev   }
10287cc2493dSspupyrev   // Introducing dummy source/sink pairs to allow flow circulation.
10297cc2493dSspupyrev   // The nodes corresponding to blocks of Func have indicies in the range
10307cc2493dSspupyrev   // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.
10317cc2493dSspupyrev   uint64_t S = 3 * NumBlocks;
10327cc2493dSspupyrev   uint64_t T = S + 1;
10337cc2493dSspupyrev   uint64_t S1 = S + 2;
10347cc2493dSspupyrev   uint64_t T1 = S + 3;
10357cc2493dSspupyrev 
10367cc2493dSspupyrev   Network.initialize(3 * NumBlocks + 4, S1, T1);
10377cc2493dSspupyrev 
10387cc2493dSspupyrev   // Create three nodes for every block of the function
10397cc2493dSspupyrev   for (uint64_t B = 0; B < NumBlocks; B++) {
10407cc2493dSspupyrev     auto &Block = Func.Blocks[B];
10417cc2493dSspupyrev     assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
10427cc2493dSspupyrev            "non-zero weight of a block w/o weight except for an entry");
10437cc2493dSspupyrev 
10447cc2493dSspupyrev     // Split every block into two nodes
10457cc2493dSspupyrev     uint64_t Bin = 3 * B;
10467cc2493dSspupyrev     uint64_t Bout = 3 * B + 1;
10477cc2493dSspupyrev     uint64_t Baux = 3 * B + 2;
10487cc2493dSspupyrev     if (Block.Weight > 0) {
10497cc2493dSspupyrev       Network.addEdge(S1, Bout, Block.Weight, 0);
10507cc2493dSspupyrev       Network.addEdge(Bin, T1, Block.Weight, 0);
10517cc2493dSspupyrev     }
10527cc2493dSspupyrev 
10537cc2493dSspupyrev     // Edges from S and to T
10547cc2493dSspupyrev     assert((!Block.isEntry() || !Block.isExit()) &&
10557cc2493dSspupyrev            "a block cannot be an entry and an exit");
10567cc2493dSspupyrev     if (Block.isEntry()) {
10577cc2493dSspupyrev       Network.addEdge(S, Bin, 0);
10587cc2493dSspupyrev     } else if (Block.isExit()) {
10597cc2493dSspupyrev       Network.addEdge(Bout, T, 0);
10607cc2493dSspupyrev     }
10617cc2493dSspupyrev 
10627cc2493dSspupyrev     // An auxiliary node to allow increase/reduction of block counts:
10637cc2493dSspupyrev     // We assume that decreasing block counts is more expensive than increasing,
10647cc2493dSspupyrev     // and thus, setting separate costs here. In the future we may want to tune
10657cc2493dSspupyrev     // the relative costs so as to maximize the quality of generated profiles.
10667cc2493dSspupyrev     int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc;
10677cc2493dSspupyrev     int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec;
10687cc2493dSspupyrev     if (Block.UnknownWeight) {
10697cc2493dSspupyrev       // Do not penalize changing weights of blocks w/o known profile count
10707cc2493dSspupyrev       AuxCostInc = 0;
10717cc2493dSspupyrev       AuxCostDec = 0;
10727cc2493dSspupyrev     } else {
10737cc2493dSspupyrev       // Increasing the count for "cold" blocks with zero initial count is more
10747cc2493dSspupyrev       // expensive than for "hot" ones
10757cc2493dSspupyrev       if (Block.Weight == 0) {
10767cc2493dSspupyrev         AuxCostInc = MinCostMaxFlow::AuxCostIncZero;
10777cc2493dSspupyrev       }
10787cc2493dSspupyrev       // Modifying the count of the entry block is expensive
10797cc2493dSspupyrev       if (Block.isEntry()) {
10807cc2493dSspupyrev         AuxCostInc = MinCostMaxFlow::AuxCostIncEntry;
10817cc2493dSspupyrev         AuxCostDec = MinCostMaxFlow::AuxCostDecEntry;
10827cc2493dSspupyrev       }
10837cc2493dSspupyrev     }
10847cc2493dSspupyrev     // For blocks with self-edges, do not penalize a reduction of the count,
10857cc2493dSspupyrev     // as all of the increase can be attributed to the self-edge
10867cc2493dSspupyrev     if (Block.HasSelfEdge) {
10877cc2493dSspupyrev       AuxCostDec = 0;
10887cc2493dSspupyrev     }
10897cc2493dSspupyrev 
10907cc2493dSspupyrev     Network.addEdge(Bin, Baux, AuxCostInc);
10917cc2493dSspupyrev     Network.addEdge(Baux, Bout, AuxCostInc);
10927cc2493dSspupyrev     if (Block.Weight > 0) {
10937cc2493dSspupyrev       Network.addEdge(Bout, Baux, AuxCostDec);
10947cc2493dSspupyrev       Network.addEdge(Baux, Bin, AuxCostDec);
10957cc2493dSspupyrev     }
10967cc2493dSspupyrev   }
10977cc2493dSspupyrev 
10987cc2493dSspupyrev   // Creating edges for every jump
10997cc2493dSspupyrev   for (auto &Jump : Func.Jumps) {
11007cc2493dSspupyrev     uint64_t Src = Jump.Source;
11017cc2493dSspupyrev     uint64_t Dst = Jump.Target;
11027cc2493dSspupyrev     if (Src != Dst) {
11037cc2493dSspupyrev       uint64_t SrcOut = 3 * Src + 1;
11047cc2493dSspupyrev       uint64_t DstIn = 3 * Dst;
11057cc2493dSspupyrev       uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
11067cc2493dSspupyrev       Network.addEdge(SrcOut, DstIn, Cost);
11077cc2493dSspupyrev     }
11087cc2493dSspupyrev   }
11097cc2493dSspupyrev 
11107cc2493dSspupyrev   // Make sure we have a valid flow circulation
11117cc2493dSspupyrev   Network.addEdge(T, S, 0);
11127cc2493dSspupyrev }
11137cc2493dSspupyrev 
11147cc2493dSspupyrev /// Extract resulting block and edge counts from the flow network.
11157cc2493dSspupyrev void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
11167cc2493dSspupyrev   uint64_t NumBlocks = Func.Blocks.size();
11177cc2493dSspupyrev 
11187cc2493dSspupyrev   // Extract resulting block counts
11197cc2493dSspupyrev   for (uint64_t Src = 0; Src < NumBlocks; Src++) {
11207cc2493dSspupyrev     auto &Block = Func.Blocks[Src];
11217cc2493dSspupyrev     uint64_t SrcOut = 3 * Src + 1;
11227cc2493dSspupyrev     int64_t Flow = 0;
11237cc2493dSspupyrev     for (auto &Adj : Network.getFlow(SrcOut)) {
11247cc2493dSspupyrev       uint64_t DstIn = Adj.first;
11257cc2493dSspupyrev       int64_t DstFlow = Adj.second;
11267cc2493dSspupyrev       bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
11277cc2493dSspupyrev       if (!IsAuxNode || Block.HasSelfEdge) {
11287cc2493dSspupyrev         Flow += DstFlow;
11297cc2493dSspupyrev       }
11307cc2493dSspupyrev     }
11317cc2493dSspupyrev     Block.Flow = Flow;
11327cc2493dSspupyrev     assert(Flow >= 0 && "negative block flow");
11337cc2493dSspupyrev   }
11347cc2493dSspupyrev 
11357cc2493dSspupyrev   // Extract resulting jump counts
11367cc2493dSspupyrev   for (auto &Jump : Func.Jumps) {
11377cc2493dSspupyrev     uint64_t Src = Jump.Source;
11387cc2493dSspupyrev     uint64_t Dst = Jump.Target;
11397cc2493dSspupyrev     int64_t Flow = 0;
11407cc2493dSspupyrev     if (Src != Dst) {
11417cc2493dSspupyrev       uint64_t SrcOut = 3 * Src + 1;
11427cc2493dSspupyrev       uint64_t DstIn = 3 * Dst;
11437cc2493dSspupyrev       Flow = Network.getFlow(SrcOut, DstIn);
11447cc2493dSspupyrev     } else {
11457cc2493dSspupyrev       uint64_t SrcOut = 3 * Src + 1;
11467cc2493dSspupyrev       uint64_t SrcAux = 3 * Src + 2;
11477cc2493dSspupyrev       int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
11487cc2493dSspupyrev       if (AuxFlow > 0)
11497cc2493dSspupyrev         Flow = AuxFlow;
11507cc2493dSspupyrev     }
11517cc2493dSspupyrev     Jump.Flow = Flow;
11527cc2493dSspupyrev     assert(Flow >= 0 && "negative jump flow");
11537cc2493dSspupyrev   }
11547cc2493dSspupyrev }
11557cc2493dSspupyrev 
11567cc2493dSspupyrev #ifndef NDEBUG
11577cc2493dSspupyrev /// Verify that the computed flow values satisfy flow conservation rules
11587cc2493dSspupyrev void verifyWeights(const FlowFunction &Func) {
11597cc2493dSspupyrev   const uint64_t NumBlocks = Func.Blocks.size();
11607cc2493dSspupyrev   auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
11617cc2493dSspupyrev   auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
11627cc2493dSspupyrev   for (auto &Jump : Func.Jumps) {
11637cc2493dSspupyrev     InFlow[Jump.Target] += Jump.Flow;
11647cc2493dSspupyrev     OutFlow[Jump.Source] += Jump.Flow;
11657cc2493dSspupyrev   }
11667cc2493dSspupyrev 
11677cc2493dSspupyrev   uint64_t TotalInFlow = 0;
11687cc2493dSspupyrev   uint64_t TotalOutFlow = 0;
11697cc2493dSspupyrev   for (uint64_t I = 0; I < NumBlocks; I++) {
11707cc2493dSspupyrev     auto &Block = Func.Blocks[I];
11717cc2493dSspupyrev     if (Block.isEntry()) {
11727cc2493dSspupyrev       TotalInFlow += Block.Flow;
11737cc2493dSspupyrev       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
11747cc2493dSspupyrev     } else if (Block.isExit()) {
11757cc2493dSspupyrev       TotalOutFlow += Block.Flow;
11767cc2493dSspupyrev       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
11777cc2493dSspupyrev     } else {
11787cc2493dSspupyrev       assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
11797cc2493dSspupyrev       assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
11807cc2493dSspupyrev     }
11817cc2493dSspupyrev   }
11827cc2493dSspupyrev   assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
118398dd2f9eSspupyrev 
118498dd2f9eSspupyrev   // Verify that there are no isolated flow components
118598dd2f9eSspupyrev   // One could modify FlowFunction to hold edges indexed by the sources, which
118698dd2f9eSspupyrev   // will avoid a creation of the object
118798dd2f9eSspupyrev   auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
118898dd2f9eSspupyrev   for (auto &Jump : Func.Jumps) {
118998dd2f9eSspupyrev     if (Jump.Flow > 0) {
119098dd2f9eSspupyrev       PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
119198dd2f9eSspupyrev     }
119298dd2f9eSspupyrev   }
119398dd2f9eSspupyrev 
119493a2c291Sspupyrev   // Run BFS from the source along edges with positive flow
119598dd2f9eSspupyrev   std::queue<uint64_t> Queue;
11965f4ae564SJan Svoboda   auto Visited = BitVector(NumBlocks, false);
119798dd2f9eSspupyrev   Queue.push(Func.Entry);
119898dd2f9eSspupyrev   Visited[Func.Entry] = true;
119998dd2f9eSspupyrev   while (!Queue.empty()) {
120098dd2f9eSspupyrev     uint64_t Src = Queue.front();
120198dd2f9eSspupyrev     Queue.pop();
120298dd2f9eSspupyrev     for (uint64_t Dst : PositiveFlowEdges[Src]) {
120398dd2f9eSspupyrev       if (!Visited[Dst]) {
120498dd2f9eSspupyrev         Queue.push(Dst);
120598dd2f9eSspupyrev         Visited[Dst] = true;
120698dd2f9eSspupyrev       }
120798dd2f9eSspupyrev     }
120898dd2f9eSspupyrev   }
120998dd2f9eSspupyrev 
121098dd2f9eSspupyrev   // Verify that every block that has a positive flow is reached from the source
121198dd2f9eSspupyrev   // along edges with a positive flow
121298dd2f9eSspupyrev   for (uint64_t I = 0; I < NumBlocks; I++) {
121398dd2f9eSspupyrev     auto &Block = Func.Blocks[I];
121498dd2f9eSspupyrev     assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
121598dd2f9eSspupyrev   }
12167cc2493dSspupyrev }
12177cc2493dSspupyrev #endif
12187cc2493dSspupyrev 
12197cc2493dSspupyrev } // end of anonymous namespace
12207cc2493dSspupyrev 
12217cc2493dSspupyrev /// Apply the profile inference algorithm for a given flow function
12227cc2493dSspupyrev void llvm::applyFlowInference(FlowFunction &Func) {
12237cc2493dSspupyrev   // Create and apply an inference network model
12247cc2493dSspupyrev   auto InferenceNetwork = MinCostMaxFlow();
12257cc2493dSspupyrev   initializeNetwork(InferenceNetwork, Func);
12267cc2493dSspupyrev   InferenceNetwork.run();
12277cc2493dSspupyrev 
12287cc2493dSspupyrev   // Extract flow values for every block and every edge
12297cc2493dSspupyrev   extractWeights(InferenceNetwork, Func);
12307cc2493dSspupyrev 
123198dd2f9eSspupyrev   // Post-processing adjustments to the flow
123298dd2f9eSspupyrev   auto Adjuster = FlowAdjuster(Func);
123398dd2f9eSspupyrev   Adjuster.run();
123498dd2f9eSspupyrev 
12357cc2493dSspupyrev #ifndef NDEBUG
12367cc2493dSspupyrev   // Verify the result
12377cc2493dSspupyrev   verifyWeights(Func);
12387cc2493dSspupyrev #endif
12397cc2493dSspupyrev }
1240