17cc2493dSspupyrev //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===//
27cc2493dSspupyrev //
37cc2493dSspupyrev // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47cc2493dSspupyrev // See https://llvm.org/LICENSE.txt for license information.
57cc2493dSspupyrev // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67cc2493dSspupyrev //
77cc2493dSspupyrev //===----------------------------------------------------------------------===//
87cc2493dSspupyrev //
97cc2493dSspupyrev // This file implements a profile inference algorithm. Given an incomplete and
107cc2493dSspupyrev // possibly imprecise block counts, the algorithm reconstructs realistic block
117cc2493dSspupyrev // and edge counts that satisfy flow conservation rules, while minimally modify
127cc2493dSspupyrev // input block counts.
137cc2493dSspupyrev //
147cc2493dSspupyrev //===----------------------------------------------------------------------===//
157cc2493dSspupyrev
167cc2493dSspupyrev #include "llvm/Transforms/Utils/SampleProfileInference.h"
175f4ae564SJan Svoboda #include "llvm/ADT/BitVector.h"
18f2ade65fSspupyrev #include "llvm/Support/CommandLine.h"
197cc2493dSspupyrev #include "llvm/Support/Debug.h"
207cc2493dSspupyrev #include <queue>
217cc2493dSspupyrev #include <set>
22f2ade65fSspupyrev #include <stack>
237cc2493dSspupyrev
247cc2493dSspupyrev using namespace llvm;
257cc2493dSspupyrev #define DEBUG_TYPE "sample-profile-inference"
267cc2493dSspupyrev
277cc2493dSspupyrev namespace {
287cc2493dSspupyrev
29f2ade65fSspupyrev static cl::opt<bool> SampleProfileEvenCountDistribution(
30f2ade65fSspupyrev "sample-profile-even-count-distribution", cl::init(true), cl::Hidden,
31f2ade65fSspupyrev cl::desc("Try to evenly distribute counts when there are multiple equally "
32f2ade65fSspupyrev "likely options."));
33f2ade65fSspupyrev
34f2ade65fSspupyrev static cl::opt<unsigned> SampleProfileMaxDfsCalls(
35*557efc9aSFangrui Song "sample-profile-max-dfs-calls", cl::init(10), cl::Hidden,
36f2ade65fSspupyrev cl::desc("Maximum number of dfs iterations for even count distribution."));
37f2ade65fSspupyrev
3881aedab7Sspupyrev static cl::opt<unsigned> SampleProfileProfiCostInc(
39*557efc9aSFangrui Song "sample-profile-profi-cost-inc", cl::init(10), cl::Hidden,
4081aedab7Sspupyrev cl::desc("A cost of increasing a block's count by one."));
4181aedab7Sspupyrev
4281aedab7Sspupyrev static cl::opt<unsigned> SampleProfileProfiCostDec(
43*557efc9aSFangrui Song "sample-profile-profi-cost-dec", cl::init(20), cl::Hidden,
4481aedab7Sspupyrev cl::desc("A cost of decreasing a block's count by one."));
4581aedab7Sspupyrev
4681aedab7Sspupyrev static cl::opt<unsigned> SampleProfileProfiCostIncZero(
4781aedab7Sspupyrev "sample-profile-profi-cost-inc-zero", cl::init(11), cl::Hidden,
4881aedab7Sspupyrev cl::desc("A cost of increasing a count of zero-weight block by one."));
4981aedab7Sspupyrev
5081aedab7Sspupyrev static cl::opt<unsigned> SampleProfileProfiCostIncEntry(
5181aedab7Sspupyrev "sample-profile-profi-cost-inc-entry", cl::init(40), cl::Hidden,
5281aedab7Sspupyrev cl::desc("A cost of increasing the entry block's count by one."));
5381aedab7Sspupyrev
5481aedab7Sspupyrev static cl::opt<unsigned> SampleProfileProfiCostDecEntry(
5581aedab7Sspupyrev "sample-profile-profi-cost-dec-entry", cl::init(10), cl::Hidden,
5681aedab7Sspupyrev cl::desc("A cost of decreasing the entry block's count by one."));
5781aedab7Sspupyrev
587cc2493dSspupyrev /// A value indicating an infinite flow/capacity/weight of a block/edge.
597cc2493dSspupyrev /// Not using numeric_limits<int64_t>::max(), as the values can be summed up
607cc2493dSspupyrev /// during the execution.
617cc2493dSspupyrev static constexpr int64_t INF = ((int64_t)1) << 50;
627cc2493dSspupyrev
637cc2493dSspupyrev /// The minimum-cost maximum flow algorithm.
647cc2493dSspupyrev ///
657cc2493dSspupyrev /// The algorithm finds the maximum flow of minimum cost on a given (directed)
667cc2493dSspupyrev /// network using a modified version of the classical Moore-Bellman-Ford
677cc2493dSspupyrev /// approach. The algorithm applies a number of augmentation iterations in which
687cc2493dSspupyrev /// flow is sent along paths of positive capacity from the source to the sink.
697cc2493dSspupyrev /// The worst-case time complexity of the implementation is O(v(f)*m*n), where
707cc2493dSspupyrev /// where m is the number of edges, n is the number of vertices, and v(f) is the
717cc2493dSspupyrev /// value of the maximum flow. However, the observed running time on typical
727cc2493dSspupyrev /// instances is sub-quadratic, that is, o(n^2).
737cc2493dSspupyrev ///
747cc2493dSspupyrev /// The input is a set of edges with specified costs and capacities, and a pair
757cc2493dSspupyrev /// of nodes (source and sink). The output is the flow along each edge of the
767cc2493dSspupyrev /// minimum total cost respecting the given edge capacities.
777cc2493dSspupyrev class MinCostMaxFlow {
787cc2493dSspupyrev public:
797cc2493dSspupyrev // Initialize algorithm's data structures for a network of a given size.
initialize(uint64_t NodeCount,uint64_t SourceNode,uint64_t SinkNode)807cc2493dSspupyrev void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) {
817cc2493dSspupyrev Source = SourceNode;
827cc2493dSspupyrev Target = SinkNode;
837cc2493dSspupyrev
847cc2493dSspupyrev Nodes = std::vector<Node>(NodeCount);
857cc2493dSspupyrev Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>());
86f2ade65fSspupyrev if (SampleProfileEvenCountDistribution)
87f2ade65fSspupyrev AugmentingEdges =
88f2ade65fSspupyrev std::vector<std::vector<Edge *>>(NodeCount, std::vector<Edge *>());
897cc2493dSspupyrev }
907cc2493dSspupyrev
917cc2493dSspupyrev // Run the algorithm.
run()927cc2493dSspupyrev int64_t run() {
93f2ade65fSspupyrev // Iteratively find an augmentation path/dag in the network and send the
94f2ade65fSspupyrev // flow along its edges
95f2ade65fSspupyrev size_t AugmentationIters = applyFlowAugmentation();
967cc2493dSspupyrev
977cc2493dSspupyrev // Compute the total flow and its cost
987cc2493dSspupyrev int64_t TotalCost = 0;
997cc2493dSspupyrev int64_t TotalFlow = 0;
1007cc2493dSspupyrev for (uint64_t Src = 0; Src < Nodes.size(); Src++) {
1017cc2493dSspupyrev for (auto &Edge : Edges[Src]) {
1027cc2493dSspupyrev if (Edge.Flow > 0) {
1037cc2493dSspupyrev TotalCost += Edge.Cost * Edge.Flow;
1047cc2493dSspupyrev if (Src == Source)
1057cc2493dSspupyrev TotalFlow += Edge.Flow;
1067cc2493dSspupyrev }
1077cc2493dSspupyrev }
1087cc2493dSspupyrev }
1097cc2493dSspupyrev LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters
1107cc2493dSspupyrev << " iterations with " << TotalFlow << " total flow"
1117cc2493dSspupyrev << " of " << TotalCost << " cost\n");
11222d82949SKazu Hirata (void)TotalFlow;
113f2ade65fSspupyrev (void)AugmentationIters;
1147cc2493dSspupyrev return TotalCost;
1157cc2493dSspupyrev }
1167cc2493dSspupyrev
1177cc2493dSspupyrev /// Adding an edge to the network with a specified capacity and a cost.
1187cc2493dSspupyrev /// Multiple edges between a pair of nodes are allowed but self-edges
1197cc2493dSspupyrev /// are not supported.
addEdge(uint64_t Src,uint64_t Dst,int64_t Capacity,int64_t Cost)1207cc2493dSspupyrev void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) {
1217cc2493dSspupyrev assert(Capacity > 0 && "adding an edge of zero capacity");
1227cc2493dSspupyrev assert(Src != Dst && "loop edge are not supported");
1237cc2493dSspupyrev
1247cc2493dSspupyrev Edge SrcEdge;
1257cc2493dSspupyrev SrcEdge.Dst = Dst;
1267cc2493dSspupyrev SrcEdge.Cost = Cost;
1277cc2493dSspupyrev SrcEdge.Capacity = Capacity;
1287cc2493dSspupyrev SrcEdge.Flow = 0;
1297cc2493dSspupyrev SrcEdge.RevEdgeIndex = Edges[Dst].size();
1307cc2493dSspupyrev
1317cc2493dSspupyrev Edge DstEdge;
1327cc2493dSspupyrev DstEdge.Dst = Src;
1337cc2493dSspupyrev DstEdge.Cost = -Cost;
1347cc2493dSspupyrev DstEdge.Capacity = 0;
1357cc2493dSspupyrev DstEdge.Flow = 0;
1367cc2493dSspupyrev DstEdge.RevEdgeIndex = Edges[Src].size();
1377cc2493dSspupyrev
1387cc2493dSspupyrev Edges[Src].push_back(SrcEdge);
1397cc2493dSspupyrev Edges[Dst].push_back(DstEdge);
1407cc2493dSspupyrev }
1417cc2493dSspupyrev
1427cc2493dSspupyrev /// Adding an edge to the network of infinite capacity and a given cost.
addEdge(uint64_t Src,uint64_t Dst,int64_t Cost)1437cc2493dSspupyrev void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) {
1447cc2493dSspupyrev addEdge(Src, Dst, INF, Cost);
1457cc2493dSspupyrev }
1467cc2493dSspupyrev
1477cc2493dSspupyrev /// Get the total flow from a given source node.
1487cc2493dSspupyrev /// Returns a list of pairs (target node, amount of flow to the target).
getFlow(uint64_t Src) const1497cc2493dSspupyrev const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const {
1507cc2493dSspupyrev std::vector<std::pair<uint64_t, int64_t>> Flow;
1517cc2493dSspupyrev for (auto &Edge : Edges[Src]) {
1527cc2493dSspupyrev if (Edge.Flow > 0)
1537cc2493dSspupyrev Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow));
1547cc2493dSspupyrev }
1557cc2493dSspupyrev return Flow;
1567cc2493dSspupyrev }
1577cc2493dSspupyrev
1587cc2493dSspupyrev /// Get the total flow between a pair of nodes.
getFlow(uint64_t Src,uint64_t Dst) const1597cc2493dSspupyrev int64_t getFlow(uint64_t Src, uint64_t Dst) const {
1607cc2493dSspupyrev int64_t Flow = 0;
1617cc2493dSspupyrev for (auto &Edge : Edges[Src]) {
1627cc2493dSspupyrev if (Edge.Dst == Dst) {
1637cc2493dSspupyrev Flow += Edge.Flow;
1647cc2493dSspupyrev }
1657cc2493dSspupyrev }
1667cc2493dSspupyrev return Flow;
1677cc2493dSspupyrev }
1687cc2493dSspupyrev
1697cc2493dSspupyrev /// A cost of taking an unlikely jump.
17013d1364aSspupyrev static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 30;
17181aedab7Sspupyrev /// Minimum BaseDistance for the jump distance values in island joining.
172ce29a042SVitaly Buka static constexpr uint64_t MinBaseDistance = 10000;
1737cc2493dSspupyrev
1747cc2493dSspupyrev private:
175f2ade65fSspupyrev /// Iteratively find an augmentation path/dag in the network and send the
176f2ade65fSspupyrev /// flow along its edges. The method returns the number of applied iterations.
applyFlowAugmentation()177f2ade65fSspupyrev size_t applyFlowAugmentation() {
178f2ade65fSspupyrev size_t AugmentationIters = 0;
179f2ade65fSspupyrev while (findAugmentingPath()) {
180f2ade65fSspupyrev uint64_t PathCapacity = computeAugmentingPathCapacity();
181f2ade65fSspupyrev while (PathCapacity > 0) {
182f2ade65fSspupyrev bool Progress = false;
183f2ade65fSspupyrev if (SampleProfileEvenCountDistribution) {
184f2ade65fSspupyrev // Identify node/edge candidates for augmentation
185f2ade65fSspupyrev identifyShortestEdges(PathCapacity);
186f2ade65fSspupyrev
187f2ade65fSspupyrev // Find an augmenting DAG
188f2ade65fSspupyrev auto AugmentingOrder = findAugmentingDAG();
189f2ade65fSspupyrev
190f2ade65fSspupyrev // Apply the DAG augmentation
191f2ade65fSspupyrev Progress = augmentFlowAlongDAG(AugmentingOrder);
192f2ade65fSspupyrev PathCapacity = computeAugmentingPathCapacity();
193f2ade65fSspupyrev }
194f2ade65fSspupyrev
195f2ade65fSspupyrev if (!Progress) {
196f2ade65fSspupyrev augmentFlowAlongPath(PathCapacity);
197f2ade65fSspupyrev PathCapacity = 0;
198f2ade65fSspupyrev }
199f2ade65fSspupyrev
200f2ade65fSspupyrev AugmentationIters++;
201f2ade65fSspupyrev }
202f2ade65fSspupyrev }
203f2ade65fSspupyrev return AugmentationIters;
204f2ade65fSspupyrev }
205f2ade65fSspupyrev
206f2ade65fSspupyrev /// Compute the capacity of the cannonical augmenting path. If the path is
207f2ade65fSspupyrev /// saturated (that is, no flow can be sent along the path), then return 0.
computeAugmentingPathCapacity()208f2ade65fSspupyrev uint64_t computeAugmentingPathCapacity() {
209f2ade65fSspupyrev uint64_t PathCapacity = INF;
210f2ade65fSspupyrev uint64_t Now = Target;
211f2ade65fSspupyrev while (Now != Source) {
212f2ade65fSspupyrev uint64_t Pred = Nodes[Now].ParentNode;
213f2ade65fSspupyrev auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
214f2ade65fSspupyrev
215f2ade65fSspupyrev assert(Edge.Capacity >= Edge.Flow && "incorrect edge flow");
216f2ade65fSspupyrev uint64_t EdgeCapacity = uint64_t(Edge.Capacity - Edge.Flow);
217f2ade65fSspupyrev PathCapacity = std::min(PathCapacity, EdgeCapacity);
218f2ade65fSspupyrev
219f2ade65fSspupyrev Now = Pred;
220f2ade65fSspupyrev }
221f2ade65fSspupyrev return PathCapacity;
222f2ade65fSspupyrev }
223f2ade65fSspupyrev
2247cc2493dSspupyrev /// Check for existence of an augmenting path with a positive capacity.
findAugmentingPath()2257cc2493dSspupyrev bool findAugmentingPath() {
2267cc2493dSspupyrev // Initialize data structures
2277cc2493dSspupyrev for (auto &Node : Nodes) {
2287cc2493dSspupyrev Node.Distance = INF;
2297cc2493dSspupyrev Node.ParentNode = uint64_t(-1);
2307cc2493dSspupyrev Node.ParentEdgeIndex = uint64_t(-1);
2317cc2493dSspupyrev Node.Taken = false;
2327cc2493dSspupyrev }
2337cc2493dSspupyrev
2347cc2493dSspupyrev std::queue<uint64_t> Queue;
2357cc2493dSspupyrev Queue.push(Source);
2367cc2493dSspupyrev Nodes[Source].Distance = 0;
2377cc2493dSspupyrev Nodes[Source].Taken = true;
2387cc2493dSspupyrev while (!Queue.empty()) {
2397cc2493dSspupyrev uint64_t Src = Queue.front();
2407cc2493dSspupyrev Queue.pop();
2417cc2493dSspupyrev Nodes[Src].Taken = false;
2427cc2493dSspupyrev // Although the residual network contains edges with negative costs
2437cc2493dSspupyrev // (in particular, backward edges), it can be shown that there are no
2447cc2493dSspupyrev // negative-weight cycles and the following two invariants are maintained:
2457cc2493dSspupyrev // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V,
2467cc2493dSspupyrev // where Dist is the length of the shortest path between two nodes. This
2477cc2493dSspupyrev // allows to prune the search-space of the path-finding algorithm using
2487cc2493dSspupyrev // the following early-stop criteria:
2497cc2493dSspupyrev // -- If we find a path with zero-distance from Source to Target, stop the
2507cc2493dSspupyrev // search, as the path is the shortest since Dist[Source, Target] >= 0;
2517cc2493dSspupyrev // -- If we have Dist[Source, V] > Dist[Source, Target], then do not
2527cc2493dSspupyrev // process node V, as it is guaranteed _not_ to be on a shortest path
2537cc2493dSspupyrev // from Source to Target; it follows from inequalities
2547cc2493dSspupyrev // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target]
2557cc2493dSspupyrev // >= Dist[Source, V]
256f2ade65fSspupyrev if (!SampleProfileEvenCountDistribution && Nodes[Target].Distance == 0)
2577cc2493dSspupyrev break;
2587cc2493dSspupyrev if (Nodes[Src].Distance > Nodes[Target].Distance)
2597cc2493dSspupyrev continue;
2607cc2493dSspupyrev
2617cc2493dSspupyrev // Process adjacent edges
2627cc2493dSspupyrev for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) {
2637cc2493dSspupyrev auto &Edge = Edges[Src][EdgeIdx];
2647cc2493dSspupyrev if (Edge.Flow < Edge.Capacity) {
2657cc2493dSspupyrev uint64_t Dst = Edge.Dst;
2667cc2493dSspupyrev int64_t NewDistance = Nodes[Src].Distance + Edge.Cost;
2677cc2493dSspupyrev if (Nodes[Dst].Distance > NewDistance) {
2687cc2493dSspupyrev // Update the distance and the parent node/edge
2697cc2493dSspupyrev Nodes[Dst].Distance = NewDistance;
2707cc2493dSspupyrev Nodes[Dst].ParentNode = Src;
2717cc2493dSspupyrev Nodes[Dst].ParentEdgeIndex = EdgeIdx;
2727cc2493dSspupyrev // Add the node to the queue, if it is not there yet
2737cc2493dSspupyrev if (!Nodes[Dst].Taken) {
2747cc2493dSspupyrev Queue.push(Dst);
2757cc2493dSspupyrev Nodes[Dst].Taken = true;
2767cc2493dSspupyrev }
2777cc2493dSspupyrev }
2787cc2493dSspupyrev }
2797cc2493dSspupyrev }
2807cc2493dSspupyrev }
2817cc2493dSspupyrev
2827cc2493dSspupyrev return Nodes[Target].Distance != INF;
2837cc2493dSspupyrev }
2847cc2493dSspupyrev
2857cc2493dSspupyrev /// Update the current flow along the augmenting path.
augmentFlowAlongPath(uint64_t PathCapacity)286f2ade65fSspupyrev void augmentFlowAlongPath(uint64_t PathCapacity) {
28793a2c291Sspupyrev assert(PathCapacity > 0 && "found an incorrect augmenting path");
288f2ade65fSspupyrev uint64_t Now = Target;
2897cc2493dSspupyrev while (Now != Source) {
2907cc2493dSspupyrev uint64_t Pred = Nodes[Now].ParentNode;
2917cc2493dSspupyrev auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex];
2927cc2493dSspupyrev auto &RevEdge = Edges[Now][Edge.RevEdgeIndex];
2937cc2493dSspupyrev
2947cc2493dSspupyrev Edge.Flow += PathCapacity;
2957cc2493dSspupyrev RevEdge.Flow -= PathCapacity;
2967cc2493dSspupyrev
2977cc2493dSspupyrev Now = Pred;
2987cc2493dSspupyrev }
2997cc2493dSspupyrev }
3007cc2493dSspupyrev
301f2ade65fSspupyrev /// Find an Augmenting DAG order using a modified version of DFS in which we
302f2ade65fSspupyrev /// can visit a node multiple times. In the DFS search, when scanning each
303f2ade65fSspupyrev /// edge out of a node, continue search at Edge.Dst endpoint if it has not
304f2ade65fSspupyrev /// been discovered yet and its NumCalls < MaxDfsCalls. The algorithm
305f2ade65fSspupyrev /// runs in O(MaxDfsCalls * |Edges| + |Nodes|) time.
306f2ade65fSspupyrev /// It returns an Augmenting Order (Taken nodes in decreasing Finish time)
307f2ade65fSspupyrev /// that starts with Source and ends with Target.
findAugmentingDAG()308f2ade65fSspupyrev std::vector<uint64_t> findAugmentingDAG() {
309f2ade65fSspupyrev // We use a stack based implemenation of DFS to avoid recursion.
310f2ade65fSspupyrev // Defining DFS data structures:
311f2ade65fSspupyrev // A pair (NodeIdx, EdgeIdx) at the top of the Stack denotes that
312f2ade65fSspupyrev // - we are currently visiting Nodes[NodeIdx] and
313f2ade65fSspupyrev // - the next edge to scan is Edges[NodeIdx][EdgeIdx]
314f2ade65fSspupyrev typedef std::pair<uint64_t, uint64_t> StackItemType;
315f2ade65fSspupyrev std::stack<StackItemType> Stack;
316f2ade65fSspupyrev std::vector<uint64_t> AugmentingOrder;
317f2ade65fSspupyrev
318f2ade65fSspupyrev // Phase 0: Initialize Node attributes and Time for DFS run
319f2ade65fSspupyrev for (auto &Node : Nodes) {
320f2ade65fSspupyrev Node.Discovery = 0;
321f2ade65fSspupyrev Node.Finish = 0;
322f2ade65fSspupyrev Node.NumCalls = 0;
323f2ade65fSspupyrev Node.Taken = false;
324f2ade65fSspupyrev }
325f2ade65fSspupyrev uint64_t Time = 0;
326f2ade65fSspupyrev // Mark Target as Taken
327f2ade65fSspupyrev // Taken attribute will be propagated backwards from Target towards Source
328f2ade65fSspupyrev Nodes[Target].Taken = true;
329f2ade65fSspupyrev
330f2ade65fSspupyrev // Phase 1: Start DFS traversal from Source
331f2ade65fSspupyrev Stack.emplace(Source, 0);
332f2ade65fSspupyrev Nodes[Source].Discovery = ++Time;
333f2ade65fSspupyrev while (!Stack.empty()) {
334f2ade65fSspupyrev auto NodeIdx = Stack.top().first;
335f2ade65fSspupyrev auto EdgeIdx = Stack.top().second;
336f2ade65fSspupyrev
337f2ade65fSspupyrev // If we haven't scanned all edges out of NodeIdx, continue scanning
338f2ade65fSspupyrev if (EdgeIdx < Edges[NodeIdx].size()) {
339f2ade65fSspupyrev auto &Edge = Edges[NodeIdx][EdgeIdx];
340f2ade65fSspupyrev auto &Dst = Nodes[Edge.Dst];
341f2ade65fSspupyrev Stack.top().second++;
342f2ade65fSspupyrev
343f2ade65fSspupyrev if (Edge.OnShortestPath) {
344f2ade65fSspupyrev // If we haven't seen Edge.Dst so far, continue DFS search there
345f2ade65fSspupyrev if (Dst.Discovery == 0 && Dst.NumCalls < SampleProfileMaxDfsCalls) {
346f2ade65fSspupyrev Dst.Discovery = ++Time;
347f2ade65fSspupyrev Stack.emplace(Edge.Dst, 0);
348f2ade65fSspupyrev Dst.NumCalls++;
349f2ade65fSspupyrev } else if (Dst.Taken && Dst.Finish != 0) {
350f2ade65fSspupyrev // Else, if Edge.Dst already have a path to Target, so that NodeIdx
351f2ade65fSspupyrev Nodes[NodeIdx].Taken = true;
352f2ade65fSspupyrev }
353f2ade65fSspupyrev }
354f2ade65fSspupyrev } else {
355f2ade65fSspupyrev // If we are done scanning all edge out of NodeIdx
356f2ade65fSspupyrev Stack.pop();
357f2ade65fSspupyrev // If we haven't found a path from NodeIdx to Target, forget about it
358f2ade65fSspupyrev if (!Nodes[NodeIdx].Taken) {
359f2ade65fSspupyrev Nodes[NodeIdx].Discovery = 0;
360f2ade65fSspupyrev } else {
361f2ade65fSspupyrev // If we have found a path from NodeIdx to Target, then finish NodeIdx
362f2ade65fSspupyrev // and propagate Taken flag to DFS parent unless at the Source
363f2ade65fSspupyrev Nodes[NodeIdx].Finish = ++Time;
364f2ade65fSspupyrev // NodeIdx == Source if and only if the stack is empty
365f2ade65fSspupyrev if (NodeIdx != Source) {
366f2ade65fSspupyrev assert(!Stack.empty() && "empty stack while running dfs");
367f2ade65fSspupyrev Nodes[Stack.top().first].Taken = true;
368f2ade65fSspupyrev }
369f2ade65fSspupyrev AugmentingOrder.push_back(NodeIdx);
370f2ade65fSspupyrev }
371f2ade65fSspupyrev }
372f2ade65fSspupyrev }
373f2ade65fSspupyrev // Nodes are collected decreasing Finish time, so the order is reversed
374f2ade65fSspupyrev std::reverse(AugmentingOrder.begin(), AugmentingOrder.end());
375f2ade65fSspupyrev
376f2ade65fSspupyrev // Phase 2: Extract all forward (DAG) edges and fill in AugmentingEdges
377f2ade65fSspupyrev for (size_t Src : AugmentingOrder) {
378f2ade65fSspupyrev AugmentingEdges[Src].clear();
379f2ade65fSspupyrev for (auto &Edge : Edges[Src]) {
380f2ade65fSspupyrev uint64_t Dst = Edge.Dst;
381f2ade65fSspupyrev if (Edge.OnShortestPath && Nodes[Src].Taken && Nodes[Dst].Taken &&
382f2ade65fSspupyrev Nodes[Dst].Finish < Nodes[Src].Finish) {
383f2ade65fSspupyrev AugmentingEdges[Src].push_back(&Edge);
384f2ade65fSspupyrev }
385f2ade65fSspupyrev }
386f2ade65fSspupyrev assert((Src == Target || !AugmentingEdges[Src].empty()) &&
387f2ade65fSspupyrev "incorrectly constructed augmenting edges");
388f2ade65fSspupyrev }
389f2ade65fSspupyrev
390f2ade65fSspupyrev return AugmentingOrder;
391f2ade65fSspupyrev }
392f2ade65fSspupyrev
393f2ade65fSspupyrev /// Update the current flow along the given (acyclic) subgraph specified by
394f2ade65fSspupyrev /// the vertex order, AugmentingOrder. The objective is to send as much flow
395f2ade65fSspupyrev /// as possible while evenly distributing flow among successors of each node.
396f2ade65fSspupyrev /// After the update at least one edge is saturated.
augmentFlowAlongDAG(const std::vector<uint64_t> & AugmentingOrder)397f2ade65fSspupyrev bool augmentFlowAlongDAG(const std::vector<uint64_t> &AugmentingOrder) {
398f2ade65fSspupyrev // Phase 0: Initialization
399f2ade65fSspupyrev for (uint64_t Src : AugmentingOrder) {
400f2ade65fSspupyrev Nodes[Src].FracFlow = 0;
401f2ade65fSspupyrev Nodes[Src].IntFlow = 0;
402f2ade65fSspupyrev for (auto &Edge : AugmentingEdges[Src]) {
403f2ade65fSspupyrev Edge->AugmentedFlow = 0;
404f2ade65fSspupyrev }
405f2ade65fSspupyrev }
406f2ade65fSspupyrev
407f2ade65fSspupyrev // Phase 1: Send a unit of fractional flow along the DAG
408f2ade65fSspupyrev uint64_t MaxFlowAmount = INF;
409f2ade65fSspupyrev Nodes[Source].FracFlow = 1.0;
410f2ade65fSspupyrev for (uint64_t Src : AugmentingOrder) {
411f2ade65fSspupyrev assert((Src == Target || Nodes[Src].FracFlow > 0.0) &&
412f2ade65fSspupyrev "incorrectly computed fractional flow");
413f2ade65fSspupyrev // Distribute flow evenly among successors of Src
414f2ade65fSspupyrev uint64_t Degree = AugmentingEdges[Src].size();
415f2ade65fSspupyrev for (auto &Edge : AugmentingEdges[Src]) {
416f2ade65fSspupyrev double EdgeFlow = Nodes[Src].FracFlow / Degree;
417f2ade65fSspupyrev Nodes[Edge->Dst].FracFlow += EdgeFlow;
418f2ade65fSspupyrev if (Edge->Capacity == INF)
419f2ade65fSspupyrev continue;
420f2ade65fSspupyrev uint64_t MaxIntFlow = double(Edge->Capacity - Edge->Flow) / EdgeFlow;
421f2ade65fSspupyrev MaxFlowAmount = std::min(MaxFlowAmount, MaxIntFlow);
422f2ade65fSspupyrev }
423f2ade65fSspupyrev }
424f2ade65fSspupyrev // Stop early if we cannot send any (integral) flow from Source to Target
425f2ade65fSspupyrev if (MaxFlowAmount == 0)
426f2ade65fSspupyrev return false;
427f2ade65fSspupyrev
428f2ade65fSspupyrev // Phase 2: Send an integral flow of MaxFlowAmount
429f2ade65fSspupyrev Nodes[Source].IntFlow = MaxFlowAmount;
430f2ade65fSspupyrev for (uint64_t Src : AugmentingOrder) {
431f2ade65fSspupyrev if (Src == Target)
432f2ade65fSspupyrev break;
433f2ade65fSspupyrev // Distribute flow evenly among successors of Src, rounding up to make
434f2ade65fSspupyrev // sure all flow is sent
435f2ade65fSspupyrev uint64_t Degree = AugmentingEdges[Src].size();
436f2ade65fSspupyrev // We are guaranteeed that Node[Src].IntFlow <= SuccFlow * Degree
437f2ade65fSspupyrev uint64_t SuccFlow = (Nodes[Src].IntFlow + Degree - 1) / Degree;
438f2ade65fSspupyrev for (auto &Edge : AugmentingEdges[Src]) {
439f2ade65fSspupyrev uint64_t Dst = Edge->Dst;
440f2ade65fSspupyrev uint64_t EdgeFlow = std::min(Nodes[Src].IntFlow, SuccFlow);
441f2ade65fSspupyrev EdgeFlow = std::min(EdgeFlow, uint64_t(Edge->Capacity - Edge->Flow));
442f2ade65fSspupyrev Nodes[Dst].IntFlow += EdgeFlow;
443f2ade65fSspupyrev Nodes[Src].IntFlow -= EdgeFlow;
444f2ade65fSspupyrev Edge->AugmentedFlow += EdgeFlow;
445f2ade65fSspupyrev }
446f2ade65fSspupyrev }
447f2ade65fSspupyrev assert(Nodes[Target].IntFlow <= MaxFlowAmount);
448f2ade65fSspupyrev Nodes[Target].IntFlow = 0;
449f2ade65fSspupyrev
450f2ade65fSspupyrev // Phase 3: Send excess flow back traversing the nodes backwards.
451f2ade65fSspupyrev // Because of rounding, not all flow can be sent along the edges of Src.
452f2ade65fSspupyrev // Hence, sending the remaining flow back to maintain flow conservation
453f2ade65fSspupyrev for (size_t Idx = AugmentingOrder.size() - 1; Idx > 0; Idx--) {
454f2ade65fSspupyrev uint64_t Src = AugmentingOrder[Idx - 1];
455f2ade65fSspupyrev // Try to send excess flow back along each edge.
456f2ade65fSspupyrev // Make sure we only send back flow we just augmented (AugmentedFlow).
457f2ade65fSspupyrev for (auto &Edge : AugmentingEdges[Src]) {
458f2ade65fSspupyrev uint64_t Dst = Edge->Dst;
459f2ade65fSspupyrev if (Nodes[Dst].IntFlow == 0)
460f2ade65fSspupyrev continue;
461f2ade65fSspupyrev uint64_t EdgeFlow = std::min(Nodes[Dst].IntFlow, Edge->AugmentedFlow);
462f2ade65fSspupyrev Nodes[Dst].IntFlow -= EdgeFlow;
463f2ade65fSspupyrev Nodes[Src].IntFlow += EdgeFlow;
464f2ade65fSspupyrev Edge->AugmentedFlow -= EdgeFlow;
465f2ade65fSspupyrev }
466f2ade65fSspupyrev }
467f2ade65fSspupyrev
468f2ade65fSspupyrev // Phase 4: Update flow values along all edges
469f2ade65fSspupyrev bool HasSaturatedEdges = false;
470f2ade65fSspupyrev for (uint64_t Src : AugmentingOrder) {
471f2ade65fSspupyrev // Verify that we have sent all the excess flow from the node
472f2ade65fSspupyrev assert(Src == Source || Nodes[Src].IntFlow == 0);
473f2ade65fSspupyrev for (auto &Edge : AugmentingEdges[Src]) {
474f2ade65fSspupyrev assert(uint64_t(Edge->Capacity - Edge->Flow) >= Edge->AugmentedFlow);
475f2ade65fSspupyrev // Update flow values along the edge and its reverse copy
476f2ade65fSspupyrev auto &RevEdge = Edges[Edge->Dst][Edge->RevEdgeIndex];
477f2ade65fSspupyrev Edge->Flow += Edge->AugmentedFlow;
478f2ade65fSspupyrev RevEdge.Flow -= Edge->AugmentedFlow;
479f2ade65fSspupyrev if (Edge->Capacity == Edge->Flow && Edge->AugmentedFlow > 0)
480f2ade65fSspupyrev HasSaturatedEdges = true;
481f2ade65fSspupyrev }
482f2ade65fSspupyrev }
483f2ade65fSspupyrev
484f2ade65fSspupyrev // The augmentation is successful iff at least one edge becomes saturated
485f2ade65fSspupyrev return HasSaturatedEdges;
486f2ade65fSspupyrev }
487f2ade65fSspupyrev
488f2ade65fSspupyrev /// Identify candidate (shortest) edges for augmentation.
identifyShortestEdges(uint64_t PathCapacity)489f2ade65fSspupyrev void identifyShortestEdges(uint64_t PathCapacity) {
490f2ade65fSspupyrev assert(PathCapacity > 0 && "found an incorrect augmenting DAG");
491f2ade65fSspupyrev // To make sure the augmentation DAG contains only edges with large residual
492f2ade65fSspupyrev // capacity, we prune all edges whose capacity is below a fraction of
493f2ade65fSspupyrev // the capacity of the augmented path.
494f2ade65fSspupyrev // (All edges of the path itself are always in the DAG)
495f2ade65fSspupyrev uint64_t MinCapacity = std::max(PathCapacity / 2, uint64_t(1));
496f2ade65fSspupyrev
497f2ade65fSspupyrev // Decide which edges are on a shortest path from Source to Target
498f2ade65fSspupyrev for (size_t Src = 0; Src < Nodes.size(); Src++) {
499f2ade65fSspupyrev // An edge cannot be augmenting if the endpoint has large distance
500f2ade65fSspupyrev if (Nodes[Src].Distance > Nodes[Target].Distance)
501f2ade65fSspupyrev continue;
502f2ade65fSspupyrev
503f2ade65fSspupyrev for (auto &Edge : Edges[Src]) {
504f2ade65fSspupyrev uint64_t Dst = Edge.Dst;
505f2ade65fSspupyrev Edge.OnShortestPath =
506f2ade65fSspupyrev Src != Target && Dst != Source &&
507f2ade65fSspupyrev Nodes[Dst].Distance <= Nodes[Target].Distance &&
508f2ade65fSspupyrev Nodes[Dst].Distance == Nodes[Src].Distance + Edge.Cost &&
509f2ade65fSspupyrev Edge.Capacity > Edge.Flow &&
510f2ade65fSspupyrev uint64_t(Edge.Capacity - Edge.Flow) >= MinCapacity;
511f2ade65fSspupyrev }
512f2ade65fSspupyrev }
513f2ade65fSspupyrev }
514f2ade65fSspupyrev
51513d1364aSspupyrev /// A node in a flow network.
5167cc2493dSspupyrev struct Node {
5177cc2493dSspupyrev /// The cost of the cheapest path from the source to the current node.
5187cc2493dSspupyrev int64_t Distance;
5197cc2493dSspupyrev /// The node preceding the current one in the path.
5207cc2493dSspupyrev uint64_t ParentNode;
5217cc2493dSspupyrev /// The index of the edge between ParentNode and the current node.
5227cc2493dSspupyrev uint64_t ParentEdgeIndex;
5237cc2493dSspupyrev /// An indicator of whether the current node is in a queue.
5247cc2493dSspupyrev bool Taken;
525f2ade65fSspupyrev
526f2ade65fSspupyrev /// Data fields utilized in DAG-augmentation:
527f2ade65fSspupyrev /// Fractional flow.
528f2ade65fSspupyrev double FracFlow;
529f2ade65fSspupyrev /// Integral flow.
530f2ade65fSspupyrev uint64_t IntFlow;
531f2ade65fSspupyrev /// Discovery time.
532f2ade65fSspupyrev uint64_t Discovery;
533f2ade65fSspupyrev /// Finish time.
534f2ade65fSspupyrev uint64_t Finish;
535f2ade65fSspupyrev /// NumCalls.
536f2ade65fSspupyrev uint64_t NumCalls;
5377cc2493dSspupyrev };
538f2ade65fSspupyrev
5397cc2493dSspupyrev /// An edge in a flow network.
5407cc2493dSspupyrev struct Edge {
5417cc2493dSspupyrev /// The cost of the edge.
5427cc2493dSspupyrev int64_t Cost;
5437cc2493dSspupyrev /// The capacity of the edge.
5447cc2493dSspupyrev int64_t Capacity;
5457cc2493dSspupyrev /// The current flow on the edge.
5467cc2493dSspupyrev int64_t Flow;
5477cc2493dSspupyrev /// The destination node of the edge.
5487cc2493dSspupyrev uint64_t Dst;
5497cc2493dSspupyrev /// The index of the reverse edge between Dst and the current node.
5507cc2493dSspupyrev uint64_t RevEdgeIndex;
551f2ade65fSspupyrev
552f2ade65fSspupyrev /// Data fields utilized in DAG-augmentation:
553f2ade65fSspupyrev /// Whether the edge is currently on a shortest path from Source to Target.
554f2ade65fSspupyrev bool OnShortestPath;
555f2ade65fSspupyrev /// Extra flow along the edge.
556f2ade65fSspupyrev uint64_t AugmentedFlow;
5577cc2493dSspupyrev };
5587cc2493dSspupyrev
5597cc2493dSspupyrev /// The set of network nodes.
5607cc2493dSspupyrev std::vector<Node> Nodes;
5617cc2493dSspupyrev /// The set of network edges.
5627cc2493dSspupyrev std::vector<std::vector<Edge>> Edges;
5637cc2493dSspupyrev /// Source node of the flow.
5647cc2493dSspupyrev uint64_t Source;
5657cc2493dSspupyrev /// Target (sink) node of the flow.
5667cc2493dSspupyrev uint64_t Target;
567f2ade65fSspupyrev /// Augmenting edges.
568f2ade65fSspupyrev std::vector<std::vector<Edge *>> AugmentingEdges;
5697cc2493dSspupyrev };
5707cc2493dSspupyrev
571851332a1SBenoit Jacob constexpr int64_t MinCostMaxFlow::AuxCostUnlikely;
572851332a1SBenoit Jacob constexpr uint64_t MinCostMaxFlow::MinBaseDistance;
573851332a1SBenoit Jacob
57493a2c291Sspupyrev /// A post-processing adjustment of control flow. It applies two steps by
57593a2c291Sspupyrev /// rerouting some flow and making it more realistic:
57693a2c291Sspupyrev ///
57793a2c291Sspupyrev /// - First, it removes all isolated components ("islands") with a positive flow
57893a2c291Sspupyrev /// that are unreachable from the entry block. For every such component, we
57993a2c291Sspupyrev /// find the shortest from the entry to an exit passing through the component,
58093a2c291Sspupyrev /// and increase the flow by one unit along the path.
58193a2c291Sspupyrev ///
58293a2c291Sspupyrev /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks
58393a2c291Sspupyrev /// with no sampled counts. Then it rebalnces the flow that goes through such
58493a2c291Sspupyrev /// a subgraph so that each branch is taken with probability 50%.
58593a2c291Sspupyrev /// An unknown subgraph is such that for every two nodes u and v:
58693a2c291Sspupyrev /// - u dominates v and u is not unknown;
58793a2c291Sspupyrev /// - v post-dominates u; and
58893a2c291Sspupyrev /// - all inner-nodes of all (u,v)-paths are unknown.
58993a2c291Sspupyrev ///
59098dd2f9eSspupyrev class FlowAdjuster {
59198dd2f9eSspupyrev public:
FlowAdjuster(FlowFunction & Func)59298dd2f9eSspupyrev FlowAdjuster(FlowFunction &Func) : Func(Func) {
59398dd2f9eSspupyrev assert(Func.Blocks[Func.Entry].isEntry() &&
59498dd2f9eSspupyrev "incorrect index of the entry block");
59598dd2f9eSspupyrev }
59698dd2f9eSspupyrev
59798dd2f9eSspupyrev // Run the post-processing
run()59898dd2f9eSspupyrev void run() {
59993a2c291Sspupyrev /// Adjust the flow to get rid of isolated components.
60098dd2f9eSspupyrev joinIsolatedComponents();
60193a2c291Sspupyrev
60293a2c291Sspupyrev /// Rebalance the flow inside unknown subgraphs.
60393a2c291Sspupyrev rebalanceUnknownSubgraphs();
60498dd2f9eSspupyrev }
60598dd2f9eSspupyrev
60698dd2f9eSspupyrev private:
joinIsolatedComponents()60798dd2f9eSspupyrev void joinIsolatedComponents() {
60898dd2f9eSspupyrev // Find blocks that are reachable from the source
6095f4ae564SJan Svoboda auto Visited = BitVector(NumBlocks(), false);
61098dd2f9eSspupyrev findReachable(Func.Entry, Visited);
61198dd2f9eSspupyrev
61298dd2f9eSspupyrev // Iterate over all non-reachable blocks and adjust their weights
61398dd2f9eSspupyrev for (uint64_t I = 0; I < NumBlocks(); I++) {
61498dd2f9eSspupyrev auto &Block = Func.Blocks[I];
61598dd2f9eSspupyrev if (Block.Flow > 0 && !Visited[I]) {
61698dd2f9eSspupyrev // Find a path from the entry to an exit passing through the block I
61798dd2f9eSspupyrev auto Path = findShortestPath(I);
61898dd2f9eSspupyrev // Increase the flow along the path
61998dd2f9eSspupyrev assert(Path.size() > 0 && Path[0]->Source == Func.Entry &&
62098dd2f9eSspupyrev "incorrectly computed path adjusting control flow");
62198dd2f9eSspupyrev Func.Blocks[Func.Entry].Flow += 1;
62298dd2f9eSspupyrev for (auto &Jump : Path) {
62398dd2f9eSspupyrev Jump->Flow += 1;
62498dd2f9eSspupyrev Func.Blocks[Jump->Target].Flow += 1;
62598dd2f9eSspupyrev // Update reachability
62698dd2f9eSspupyrev findReachable(Jump->Target, Visited);
62798dd2f9eSspupyrev }
62898dd2f9eSspupyrev }
62998dd2f9eSspupyrev }
63098dd2f9eSspupyrev }
63198dd2f9eSspupyrev
63293a2c291Sspupyrev /// Run BFS from a given block along the jumps with a positive flow and mark
63398dd2f9eSspupyrev /// all reachable blocks.
findReachable(uint64_t Src,BitVector & Visited)6345f4ae564SJan Svoboda void findReachable(uint64_t Src, BitVector &Visited) {
63598dd2f9eSspupyrev if (Visited[Src])
63698dd2f9eSspupyrev return;
63798dd2f9eSspupyrev std::queue<uint64_t> Queue;
63898dd2f9eSspupyrev Queue.push(Src);
63998dd2f9eSspupyrev Visited[Src] = true;
64098dd2f9eSspupyrev while (!Queue.empty()) {
64198dd2f9eSspupyrev Src = Queue.front();
64298dd2f9eSspupyrev Queue.pop();
64398dd2f9eSspupyrev for (auto Jump : Func.Blocks[Src].SuccJumps) {
64498dd2f9eSspupyrev uint64_t Dst = Jump->Target;
64598dd2f9eSspupyrev if (Jump->Flow > 0 && !Visited[Dst]) {
64698dd2f9eSspupyrev Queue.push(Dst);
64798dd2f9eSspupyrev Visited[Dst] = true;
64898dd2f9eSspupyrev }
64998dd2f9eSspupyrev }
65098dd2f9eSspupyrev }
65198dd2f9eSspupyrev }
65298dd2f9eSspupyrev
65398dd2f9eSspupyrev /// Find the shortest path from the entry block to an exit block passing
65498dd2f9eSspupyrev /// through a given block.
findShortestPath(uint64_t BlockIdx)65598dd2f9eSspupyrev std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) {
65698dd2f9eSspupyrev // A path from the entry block to BlockIdx
65798dd2f9eSspupyrev auto ForwardPath = findShortestPath(Func.Entry, BlockIdx);
65898dd2f9eSspupyrev // A path from BlockIdx to an exit block
65998dd2f9eSspupyrev auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock);
66098dd2f9eSspupyrev
66198dd2f9eSspupyrev // Concatenate the two paths
66298dd2f9eSspupyrev std::vector<FlowJump *> Result;
66398dd2f9eSspupyrev Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end());
66498dd2f9eSspupyrev Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end());
66598dd2f9eSspupyrev return Result;
66698dd2f9eSspupyrev }
66798dd2f9eSspupyrev
66898dd2f9eSspupyrev /// Apply the Dijkstra algorithm to find the shortest path from a given
66998dd2f9eSspupyrev /// Source to a given Target block.
67098dd2f9eSspupyrev /// If Target == -1, then the path ends at an exit block.
findShortestPath(uint64_t Source,uint64_t Target)67198dd2f9eSspupyrev std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) {
67298dd2f9eSspupyrev // Quit early, if possible
67398dd2f9eSspupyrev if (Source == Target)
67498dd2f9eSspupyrev return std::vector<FlowJump *>();
67598dd2f9eSspupyrev if (Func.Blocks[Source].isExit() && Target == AnyExitBlock)
67698dd2f9eSspupyrev return std::vector<FlowJump *>();
67798dd2f9eSspupyrev
67898dd2f9eSspupyrev // Initialize data structures
67998dd2f9eSspupyrev auto Distance = std::vector<int64_t>(NumBlocks(), INF);
68098dd2f9eSspupyrev auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr);
68198dd2f9eSspupyrev Distance[Source] = 0;
68298dd2f9eSspupyrev std::set<std::pair<uint64_t, uint64_t>> Queue;
68398dd2f9eSspupyrev Queue.insert(std::make_pair(Distance[Source], Source));
68498dd2f9eSspupyrev
68598dd2f9eSspupyrev // Run the Dijkstra algorithm
68698dd2f9eSspupyrev while (!Queue.empty()) {
68798dd2f9eSspupyrev uint64_t Src = Queue.begin()->second;
68898dd2f9eSspupyrev Queue.erase(Queue.begin());
68998dd2f9eSspupyrev // If we found a solution, quit early
69098dd2f9eSspupyrev if (Src == Target ||
69198dd2f9eSspupyrev (Func.Blocks[Src].isExit() && Target == AnyExitBlock))
69298dd2f9eSspupyrev break;
69398dd2f9eSspupyrev
69498dd2f9eSspupyrev for (auto Jump : Func.Blocks[Src].SuccJumps) {
69598dd2f9eSspupyrev uint64_t Dst = Jump->Target;
69698dd2f9eSspupyrev int64_t JumpDist = jumpDistance(Jump);
69798dd2f9eSspupyrev if (Distance[Dst] > Distance[Src] + JumpDist) {
69898dd2f9eSspupyrev Queue.erase(std::make_pair(Distance[Dst], Dst));
69998dd2f9eSspupyrev
70098dd2f9eSspupyrev Distance[Dst] = Distance[Src] + JumpDist;
70198dd2f9eSspupyrev Parent[Dst] = Jump;
70298dd2f9eSspupyrev
70398dd2f9eSspupyrev Queue.insert(std::make_pair(Distance[Dst], Dst));
70498dd2f9eSspupyrev }
70598dd2f9eSspupyrev }
70698dd2f9eSspupyrev }
70798dd2f9eSspupyrev // If Target is not provided, find the closest exit block
70898dd2f9eSspupyrev if (Target == AnyExitBlock) {
70998dd2f9eSspupyrev for (uint64_t I = 0; I < NumBlocks(); I++) {
71098dd2f9eSspupyrev if (Func.Blocks[I].isExit() && Parent[I] != nullptr) {
71198dd2f9eSspupyrev if (Target == AnyExitBlock || Distance[Target] > Distance[I]) {
71298dd2f9eSspupyrev Target = I;
71398dd2f9eSspupyrev }
71498dd2f9eSspupyrev }
71598dd2f9eSspupyrev }
71698dd2f9eSspupyrev }
71798dd2f9eSspupyrev assert(Parent[Target] != nullptr && "a path does not exist");
71898dd2f9eSspupyrev
71998dd2f9eSspupyrev // Extract the constructed path
72098dd2f9eSspupyrev std::vector<FlowJump *> Result;
72198dd2f9eSspupyrev uint64_t Now = Target;
72298dd2f9eSspupyrev while (Now != Source) {
72398dd2f9eSspupyrev assert(Now == Parent[Now]->Target && "incorrect parent jump");
72498dd2f9eSspupyrev Result.push_back(Parent[Now]);
72598dd2f9eSspupyrev Now = Parent[Now]->Source;
72698dd2f9eSspupyrev }
72798dd2f9eSspupyrev // Reverse the path, since it is extracted from Target to Source
72898dd2f9eSspupyrev std::reverse(Result.begin(), Result.end());
72998dd2f9eSspupyrev return Result;
73098dd2f9eSspupyrev }
73198dd2f9eSspupyrev
73298dd2f9eSspupyrev /// A distance of a path for a given jump.
73398dd2f9eSspupyrev /// In order to incite the path to use blocks/jumps with large positive flow,
73498dd2f9eSspupyrev /// and avoid changing branch probability of outgoing edges drastically,
73581aedab7Sspupyrev /// set the jump distance so as:
73681aedab7Sspupyrev /// - to minimize the number of unlikely jumps used and subject to that,
73781aedab7Sspupyrev /// - to minimize the number of Flow == 0 jumps used and subject to that,
73881aedab7Sspupyrev /// - minimizes total multiplicative Flow increase for the remaining edges.
73981aedab7Sspupyrev /// To capture this objective with integer distances, we round off fractional
74081aedab7Sspupyrev /// parts to a multiple of 1 / BaseDistance.
jumpDistance(FlowJump * Jump) const74198dd2f9eSspupyrev int64_t jumpDistance(FlowJump *Jump) const {
74281aedab7Sspupyrev uint64_t BaseDistance =
743ce29a042SVitaly Buka std::max(static_cast<uint64_t>(MinCostMaxFlow::MinBaseDistance),
74481aedab7Sspupyrev std::min(Func.Blocks[Func.Entry].Flow,
74581aedab7Sspupyrev MinCostMaxFlow::AuxCostUnlikely / NumBlocks()));
74698dd2f9eSspupyrev if (Jump->IsUnlikely)
74798dd2f9eSspupyrev return MinCostMaxFlow::AuxCostUnlikely;
74898dd2f9eSspupyrev if (Jump->Flow > 0)
74981aedab7Sspupyrev return BaseDistance + BaseDistance / Jump->Flow;
75081aedab7Sspupyrev return BaseDistance * NumBlocks();
75198dd2f9eSspupyrev };
75298dd2f9eSspupyrev
NumBlocks() const75398dd2f9eSspupyrev uint64_t NumBlocks() const { return Func.Blocks.size(); }
75498dd2f9eSspupyrev
75513d1364aSspupyrev /// Rebalance unknown subgraphs so that the flow is split evenly across the
75613d1364aSspupyrev /// outgoing branches of every block of the subgraph. The method iterates over
75713d1364aSspupyrev /// blocks with known weight and identifies unknown subgraphs rooted at the
75813d1364aSspupyrev /// blocks. Then it verifies if flow rebalancing is feasible and applies it.
rebalanceUnknownSubgraphs()75993a2c291Sspupyrev void rebalanceUnknownSubgraphs() {
76013d1364aSspupyrev // Try to find unknown subgraphs from each block
76193a2c291Sspupyrev for (uint64_t I = 0; I < Func.Blocks.size(); I++) {
76293a2c291Sspupyrev auto SrcBlock = &Func.Blocks[I];
76313d1364aSspupyrev // Verify if rebalancing rooted at SrcBlock is feasible
76413d1364aSspupyrev if (!canRebalanceAtRoot(SrcBlock))
76593a2c291Sspupyrev continue;
76693a2c291Sspupyrev
76713d1364aSspupyrev // Find an unknown subgraphs starting at SrcBlock. Along the way,
76813d1364aSspupyrev // fill in known destinations and intermediate unknown blocks.
76913d1364aSspupyrev std::vector<FlowBlock *> UnknownBlocks;
77013d1364aSspupyrev std::vector<FlowBlock *> KnownDstBlocks;
77113d1364aSspupyrev findUnknownSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks);
77213d1364aSspupyrev
77313d1364aSspupyrev // Verify if rebalancing of the subgraph is feasible. If the search is
77413d1364aSspupyrev // successful, find the unique destination block (which can be null)
77593a2c291Sspupyrev FlowBlock *DstBlock = nullptr;
77613d1364aSspupyrev if (!canRebalanceSubgraph(SrcBlock, KnownDstBlocks, UnknownBlocks,
77713d1364aSspupyrev DstBlock))
77893a2c291Sspupyrev continue;
77913d1364aSspupyrev
78013d1364aSspupyrev // We cannot rebalance subgraphs containing cycles among unknown blocks
78113d1364aSspupyrev if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownBlocks))
78293a2c291Sspupyrev continue;
78393a2c291Sspupyrev
78493a2c291Sspupyrev // Rebalance the flow
78513d1364aSspupyrev rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownBlocks);
78693a2c291Sspupyrev }
78793a2c291Sspupyrev }
78893a2c291Sspupyrev
78913d1364aSspupyrev /// Verify if rebalancing rooted at a given block is possible.
canRebalanceAtRoot(const FlowBlock * SrcBlock)79013d1364aSspupyrev bool canRebalanceAtRoot(const FlowBlock *SrcBlock) {
79113d1364aSspupyrev // Do not attempt to find unknown subgraphs from an unknown or a
79213d1364aSspupyrev // zero-flow block
79313d1364aSspupyrev if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0)
79413d1364aSspupyrev return false;
79513d1364aSspupyrev
79613d1364aSspupyrev // Do not attempt to process subgraphs from a block w/o unknown sucessors
79713d1364aSspupyrev bool HasUnknownSuccs = false;
79813d1364aSspupyrev for (auto Jump : SrcBlock->SuccJumps) {
79913d1364aSspupyrev if (Func.Blocks[Jump->Target].UnknownWeight) {
80013d1364aSspupyrev HasUnknownSuccs = true;
80113d1364aSspupyrev break;
80213d1364aSspupyrev }
80313d1364aSspupyrev }
80413d1364aSspupyrev if (!HasUnknownSuccs)
80513d1364aSspupyrev return false;
80613d1364aSspupyrev
80713d1364aSspupyrev return true;
80813d1364aSspupyrev }
80913d1364aSspupyrev
81013d1364aSspupyrev /// Find an unknown subgraph starting at block SrcBlock. The method sets
81113d1364aSspupyrev /// identified destinations, KnownDstBlocks, and intermediate UnknownBlocks.
findUnknownSubgraph(const FlowBlock * SrcBlock,std::vector<FlowBlock * > & KnownDstBlocks,std::vector<FlowBlock * > & UnknownBlocks)81213d1364aSspupyrev void findUnknownSubgraph(const FlowBlock *SrcBlock,
81313d1364aSspupyrev std::vector<FlowBlock *> &KnownDstBlocks,
81413d1364aSspupyrev std::vector<FlowBlock *> &UnknownBlocks) {
81593a2c291Sspupyrev // Run BFS from SrcBlock and make sure all paths are going through unknown
816f2ade65fSspupyrev // blocks and end at a known DstBlock
8175f4ae564SJan Svoboda auto Visited = BitVector(NumBlocks(), false);
81893a2c291Sspupyrev std::queue<uint64_t> Queue;
81993a2c291Sspupyrev
82093a2c291Sspupyrev Queue.push(SrcBlock->Index);
82193a2c291Sspupyrev Visited[SrcBlock->Index] = true;
82293a2c291Sspupyrev while (!Queue.empty()) {
82393a2c291Sspupyrev auto &Block = Func.Blocks[Queue.front()];
82493a2c291Sspupyrev Queue.pop();
82593a2c291Sspupyrev // Process blocks reachable from Block
82693a2c291Sspupyrev for (auto Jump : Block.SuccJumps) {
82713d1364aSspupyrev // If Jump can be ignored, skip it
82813d1364aSspupyrev if (ignoreJump(SrcBlock, nullptr, Jump))
82913d1364aSspupyrev continue;
83013d1364aSspupyrev
83193a2c291Sspupyrev uint64_t Dst = Jump->Target;
83213d1364aSspupyrev // If Dst has been visited, skip Jump
83393a2c291Sspupyrev if (Visited[Dst])
83493a2c291Sspupyrev continue;
83513d1364aSspupyrev // Process block Dst
83693a2c291Sspupyrev Visited[Dst] = true;
83793a2c291Sspupyrev if (!Func.Blocks[Dst].UnknownWeight) {
83813d1364aSspupyrev KnownDstBlocks.push_back(&Func.Blocks[Dst]);
83993a2c291Sspupyrev } else {
84093a2c291Sspupyrev Queue.push(Dst);
84113d1364aSspupyrev UnknownBlocks.push_back(&Func.Blocks[Dst]);
84213d1364aSspupyrev }
84393a2c291Sspupyrev }
84493a2c291Sspupyrev }
84593a2c291Sspupyrev }
84693a2c291Sspupyrev
84713d1364aSspupyrev /// Verify if rebalancing of the subgraph is feasible. If the checks are
84813d1364aSspupyrev /// successful, set the unique destination block, DstBlock (can be null).
canRebalanceSubgraph(const FlowBlock * SrcBlock,const std::vector<FlowBlock * > & KnownDstBlocks,const std::vector<FlowBlock * > & UnknownBlocks,FlowBlock * & DstBlock)84913d1364aSspupyrev bool canRebalanceSubgraph(const FlowBlock *SrcBlock,
85013d1364aSspupyrev const std::vector<FlowBlock *> &KnownDstBlocks,
85113d1364aSspupyrev const std::vector<FlowBlock *> &UnknownBlocks,
85213d1364aSspupyrev FlowBlock *&DstBlock) {
85393a2c291Sspupyrev // If the list of unknown blocks is empty, we don't need rebalancing
85413d1364aSspupyrev if (UnknownBlocks.empty())
85593a2c291Sspupyrev return false;
85613d1364aSspupyrev
85713d1364aSspupyrev // If there are multiple known sinks, we can't rebalance
85813d1364aSspupyrev if (KnownDstBlocks.size() > 1)
85993a2c291Sspupyrev return false;
86013d1364aSspupyrev DstBlock = KnownDstBlocks.empty() ? nullptr : KnownDstBlocks.front();
86113d1364aSspupyrev
86213d1364aSspupyrev // Verify sinks of the subgraph
86313d1364aSspupyrev for (auto Block : UnknownBlocks) {
86413d1364aSspupyrev if (Block->SuccJumps.empty()) {
86513d1364aSspupyrev // If there are multiple (known and unknown) sinks, we can't rebalance
86613d1364aSspupyrev if (DstBlock != nullptr)
86713d1364aSspupyrev return false;
86813d1364aSspupyrev continue;
86913d1364aSspupyrev }
87013d1364aSspupyrev size_t NumIgnoredJumps = 0;
87113d1364aSspupyrev for (auto Jump : Block->SuccJumps) {
87213d1364aSspupyrev if (ignoreJump(SrcBlock, DstBlock, Jump))
87313d1364aSspupyrev NumIgnoredJumps++;
87413d1364aSspupyrev }
87513d1364aSspupyrev // If there is a non-sink block in UnknownBlocks with all jumps ignored,
87613d1364aSspupyrev // then we can't rebalance
87713d1364aSspupyrev if (NumIgnoredJumps == Block->SuccJumps.size())
87893a2c291Sspupyrev return false;
87993a2c291Sspupyrev }
88093a2c291Sspupyrev
88193a2c291Sspupyrev return true;
88293a2c291Sspupyrev }
88393a2c291Sspupyrev
88413d1364aSspupyrev /// Decide whether the Jump is ignored while processing an unknown subgraphs
88513d1364aSspupyrev /// rooted at basic block SrcBlock with the destination block, DstBlock.
ignoreJump(const FlowBlock * SrcBlock,const FlowBlock * DstBlock,const FlowJump * Jump)88613d1364aSspupyrev bool ignoreJump(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
88713d1364aSspupyrev const FlowJump *Jump) {
88813d1364aSspupyrev // Ignore unlikely jumps with zero flow
88913d1364aSspupyrev if (Jump->IsUnlikely && Jump->Flow == 0)
89013d1364aSspupyrev return true;
89113d1364aSspupyrev
89213d1364aSspupyrev auto JumpSource = &Func.Blocks[Jump->Source];
89313d1364aSspupyrev auto JumpTarget = &Func.Blocks[Jump->Target];
89413d1364aSspupyrev
89513d1364aSspupyrev // Do not ignore jumps coming into DstBlock
89613d1364aSspupyrev if (DstBlock != nullptr && JumpTarget == DstBlock)
89713d1364aSspupyrev return false;
89813d1364aSspupyrev
89913d1364aSspupyrev // Ignore jumps out of SrcBlock to known blocks
90013d1364aSspupyrev if (!JumpTarget->UnknownWeight && JumpSource == SrcBlock)
90113d1364aSspupyrev return true;
90213d1364aSspupyrev
90313d1364aSspupyrev // Ignore jumps to known blocks with zero flow
90413d1364aSspupyrev if (!JumpTarget->UnknownWeight && JumpTarget->Flow == 0)
90513d1364aSspupyrev return true;
90613d1364aSspupyrev
90713d1364aSspupyrev return false;
90813d1364aSspupyrev }
90913d1364aSspupyrev
91093a2c291Sspupyrev /// Verify if the given unknown subgraph is acyclic, and if yes, reorder
91113d1364aSspupyrev /// UnknownBlocks in the topological order (so that all jumps are "forward").
isAcyclicSubgraph(const FlowBlock * SrcBlock,const FlowBlock * DstBlock,std::vector<FlowBlock * > & UnknownBlocks)91213d1364aSspupyrev bool isAcyclicSubgraph(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
91313d1364aSspupyrev std::vector<FlowBlock *> &UnknownBlocks) {
91493a2c291Sspupyrev // Extract local in-degrees in the considered subgraph
91593a2c291Sspupyrev auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0);
91613d1364aSspupyrev auto fillInDegree = [&](const FlowBlock *Block) {
91713d1364aSspupyrev for (auto Jump : Block->SuccJumps) {
91813d1364aSspupyrev if (ignoreJump(SrcBlock, DstBlock, Jump))
91913d1364aSspupyrev continue;
92093a2c291Sspupyrev LocalInDegree[Jump->Target]++;
92193a2c291Sspupyrev }
92213d1364aSspupyrev };
92313d1364aSspupyrev fillInDegree(SrcBlock);
92413d1364aSspupyrev for (auto Block : UnknownBlocks) {
92513d1364aSspupyrev fillInDegree(Block);
92693a2c291Sspupyrev }
92793a2c291Sspupyrev // A loop containing SrcBlock
92893a2c291Sspupyrev if (LocalInDegree[SrcBlock->Index] > 0)
92993a2c291Sspupyrev return false;
93093a2c291Sspupyrev
93193a2c291Sspupyrev std::vector<FlowBlock *> AcyclicOrder;
93293a2c291Sspupyrev std::queue<uint64_t> Queue;
93393a2c291Sspupyrev Queue.push(SrcBlock->Index);
93493a2c291Sspupyrev while (!Queue.empty()) {
93513d1364aSspupyrev FlowBlock *Block = &Func.Blocks[Queue.front()];
93693a2c291Sspupyrev Queue.pop();
93713d1364aSspupyrev // Stop propagation once we reach DstBlock, if any
93813d1364aSspupyrev if (DstBlock != nullptr && Block == DstBlock)
93993a2c291Sspupyrev break;
94093a2c291Sspupyrev
94113d1364aSspupyrev // Keep an acyclic order of unknown blocks
94213d1364aSspupyrev if (Block->UnknownWeight && Block != SrcBlock)
94313d1364aSspupyrev AcyclicOrder.push_back(Block);
94413d1364aSspupyrev
94593a2c291Sspupyrev // Add to the queue all successors with zero local in-degree
94613d1364aSspupyrev for (auto Jump : Block->SuccJumps) {
94713d1364aSspupyrev if (ignoreJump(SrcBlock, DstBlock, Jump))
94813d1364aSspupyrev continue;
94993a2c291Sspupyrev uint64_t Dst = Jump->Target;
95093a2c291Sspupyrev LocalInDegree[Dst]--;
95193a2c291Sspupyrev if (LocalInDegree[Dst] == 0) {
95293a2c291Sspupyrev Queue.push(Dst);
95393a2c291Sspupyrev }
95493a2c291Sspupyrev }
95593a2c291Sspupyrev }
95693a2c291Sspupyrev
95793a2c291Sspupyrev // If there is a cycle in the subgraph, AcyclicOrder contains only a subset
95893a2c291Sspupyrev // of all blocks
95913d1364aSspupyrev if (UnknownBlocks.size() != AcyclicOrder.size())
96093a2c291Sspupyrev return false;
96113d1364aSspupyrev UnknownBlocks = AcyclicOrder;
96293a2c291Sspupyrev return true;
96393a2c291Sspupyrev }
96493a2c291Sspupyrev
96513d1364aSspupyrev /// Rebalance a given subgraph rooted at SrcBlock, ending at DstBlock and
96613d1364aSspupyrev /// having UnknownBlocks intermediate blocks.
rebalanceUnknownSubgraph(const FlowBlock * SrcBlock,const FlowBlock * DstBlock,const std::vector<FlowBlock * > & UnknownBlocks)96713d1364aSspupyrev void rebalanceUnknownSubgraph(const FlowBlock *SrcBlock,
96813d1364aSspupyrev const FlowBlock *DstBlock,
96913d1364aSspupyrev const std::vector<FlowBlock *> &UnknownBlocks) {
97093a2c291Sspupyrev assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph");
97193a2c291Sspupyrev
97213d1364aSspupyrev // Ditribute flow from the source block
97313d1364aSspupyrev uint64_t BlockFlow = 0;
97413d1364aSspupyrev // SrcBlock's flow is the sum of outgoing flows along non-ignored jumps
97513d1364aSspupyrev for (auto Jump : SrcBlock->SuccJumps) {
97613d1364aSspupyrev if (ignoreJump(SrcBlock, DstBlock, Jump))
97793a2c291Sspupyrev continue;
97813d1364aSspupyrev BlockFlow += Jump->Flow;
97993a2c291Sspupyrev }
98013d1364aSspupyrev rebalanceBlock(SrcBlock, DstBlock, SrcBlock, BlockFlow);
98113d1364aSspupyrev
98213d1364aSspupyrev // Ditribute flow from the remaining blocks
98313d1364aSspupyrev for (auto Block : UnknownBlocks) {
98413d1364aSspupyrev assert(Block->UnknownWeight && "incorrect unknown subgraph");
98513d1364aSspupyrev uint64_t BlockFlow = 0;
98613d1364aSspupyrev // Block's flow is the sum of incoming flows
98713d1364aSspupyrev for (auto Jump : Block->PredJumps) {
98813d1364aSspupyrev BlockFlow += Jump->Flow;
98913d1364aSspupyrev }
99013d1364aSspupyrev Block->Flow = BlockFlow;
99113d1364aSspupyrev rebalanceBlock(SrcBlock, DstBlock, Block, BlockFlow);
99213d1364aSspupyrev }
99313d1364aSspupyrev }
99413d1364aSspupyrev
99513d1364aSspupyrev /// Redistribute flow for a block in a subgraph rooted at SrcBlock,
99613d1364aSspupyrev /// and ending at DstBlock.
rebalanceBlock(const FlowBlock * SrcBlock,const FlowBlock * DstBlock,const FlowBlock * Block,uint64_t BlockFlow)99713d1364aSspupyrev void rebalanceBlock(const FlowBlock *SrcBlock, const FlowBlock *DstBlock,
99813d1364aSspupyrev const FlowBlock *Block, uint64_t BlockFlow) {
99913d1364aSspupyrev // Process all successor jumps and update corresponding flow values
100013d1364aSspupyrev size_t BlockDegree = 0;
100113d1364aSspupyrev for (auto Jump : Block->SuccJumps) {
100213d1364aSspupyrev if (ignoreJump(SrcBlock, DstBlock, Jump))
100313d1364aSspupyrev continue;
100413d1364aSspupyrev BlockDegree++;
100513d1364aSspupyrev }
100613d1364aSspupyrev // If all successor jumps of the block are ignored, skip it
100713d1364aSspupyrev if (DstBlock == nullptr && BlockDegree == 0)
100813d1364aSspupyrev return;
100913d1364aSspupyrev assert(BlockDegree > 0 && "all outgoing jumps are ignored");
101013d1364aSspupyrev
101113d1364aSspupyrev // Each of the Block's successors gets the following amount of flow.
101213d1364aSspupyrev // Rounding the value up so that all flow is propagated
101313d1364aSspupyrev uint64_t SuccFlow = (BlockFlow + BlockDegree - 1) / BlockDegree;
101413d1364aSspupyrev for (auto Jump : Block->SuccJumps) {
101513d1364aSspupyrev if (ignoreJump(SrcBlock, DstBlock, Jump))
101613d1364aSspupyrev continue;
101713d1364aSspupyrev uint64_t Flow = std::min(SuccFlow, BlockFlow);
101893a2c291Sspupyrev Jump->Flow = Flow;
101913d1364aSspupyrev BlockFlow -= Flow;
102093a2c291Sspupyrev }
102113d1364aSspupyrev assert(BlockFlow == 0 && "not all flow is propagated");
102293a2c291Sspupyrev }
102393a2c291Sspupyrev
102498dd2f9eSspupyrev /// A constant indicating an arbitrary exit block of a function.
102598dd2f9eSspupyrev static constexpr uint64_t AnyExitBlock = uint64_t(-1);
102698dd2f9eSspupyrev
102798dd2f9eSspupyrev /// The function.
102898dd2f9eSspupyrev FlowFunction &Func;
102998dd2f9eSspupyrev };
103098dd2f9eSspupyrev
10317cc2493dSspupyrev /// Initializing flow network for a given function.
10327cc2493dSspupyrev ///
10337cc2493dSspupyrev /// Every block is split into three nodes that are responsible for (i) an
10347cc2493dSspupyrev /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or
10357cc2493dSspupyrev /// reduction of the block weight.
initializeNetwork(MinCostMaxFlow & Network,FlowFunction & Func)10367cc2493dSspupyrev void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) {
10377cc2493dSspupyrev uint64_t NumBlocks = Func.Blocks.size();
10387cc2493dSspupyrev assert(NumBlocks > 1 && "Too few blocks in a function");
10397cc2493dSspupyrev LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n");
10407cc2493dSspupyrev
10417cc2493dSspupyrev // Pre-process data: make sure the entry weight is at least 1
10427cc2493dSspupyrev if (Func.Blocks[Func.Entry].Weight == 0) {
10437cc2493dSspupyrev Func.Blocks[Func.Entry].Weight = 1;
10447cc2493dSspupyrev }
10457cc2493dSspupyrev // Introducing dummy source/sink pairs to allow flow circulation.
10467cc2493dSspupyrev // The nodes corresponding to blocks of Func have indicies in the range
10477cc2493dSspupyrev // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values.
10487cc2493dSspupyrev uint64_t S = 3 * NumBlocks;
10497cc2493dSspupyrev uint64_t T = S + 1;
10507cc2493dSspupyrev uint64_t S1 = S + 2;
10517cc2493dSspupyrev uint64_t T1 = S + 3;
10527cc2493dSspupyrev
10537cc2493dSspupyrev Network.initialize(3 * NumBlocks + 4, S1, T1);
10547cc2493dSspupyrev
10557cc2493dSspupyrev // Create three nodes for every block of the function
10567cc2493dSspupyrev for (uint64_t B = 0; B < NumBlocks; B++) {
10577cc2493dSspupyrev auto &Block = Func.Blocks[B];
10587cc2493dSspupyrev assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) &&
10597cc2493dSspupyrev "non-zero weight of a block w/o weight except for an entry");
10607cc2493dSspupyrev
10617cc2493dSspupyrev // Split every block into two nodes
10627cc2493dSspupyrev uint64_t Bin = 3 * B;
10637cc2493dSspupyrev uint64_t Bout = 3 * B + 1;
10647cc2493dSspupyrev uint64_t Baux = 3 * B + 2;
10657cc2493dSspupyrev if (Block.Weight > 0) {
10667cc2493dSspupyrev Network.addEdge(S1, Bout, Block.Weight, 0);
10677cc2493dSspupyrev Network.addEdge(Bin, T1, Block.Weight, 0);
10687cc2493dSspupyrev }
10697cc2493dSspupyrev
10707cc2493dSspupyrev // Edges from S and to T
10717cc2493dSspupyrev assert((!Block.isEntry() || !Block.isExit()) &&
10727cc2493dSspupyrev "a block cannot be an entry and an exit");
10737cc2493dSspupyrev if (Block.isEntry()) {
10747cc2493dSspupyrev Network.addEdge(S, Bin, 0);
10757cc2493dSspupyrev } else if (Block.isExit()) {
10767cc2493dSspupyrev Network.addEdge(Bout, T, 0);
10777cc2493dSspupyrev }
10787cc2493dSspupyrev
10797cc2493dSspupyrev // An auxiliary node to allow increase/reduction of block counts:
10807cc2493dSspupyrev // We assume that decreasing block counts is more expensive than increasing,
10817cc2493dSspupyrev // and thus, setting separate costs here. In the future we may want to tune
10827cc2493dSspupyrev // the relative costs so as to maximize the quality of generated profiles.
108381aedab7Sspupyrev int64_t AuxCostInc = SampleProfileProfiCostInc;
108481aedab7Sspupyrev int64_t AuxCostDec = SampleProfileProfiCostDec;
10857cc2493dSspupyrev if (Block.UnknownWeight) {
10867cc2493dSspupyrev // Do not penalize changing weights of blocks w/o known profile count
10877cc2493dSspupyrev AuxCostInc = 0;
10887cc2493dSspupyrev AuxCostDec = 0;
10897cc2493dSspupyrev } else {
10907cc2493dSspupyrev // Increasing the count for "cold" blocks with zero initial count is more
10917cc2493dSspupyrev // expensive than for "hot" ones
10927cc2493dSspupyrev if (Block.Weight == 0) {
109381aedab7Sspupyrev AuxCostInc = SampleProfileProfiCostIncZero;
10947cc2493dSspupyrev }
10957cc2493dSspupyrev // Modifying the count of the entry block is expensive
10967cc2493dSspupyrev if (Block.isEntry()) {
109781aedab7Sspupyrev AuxCostInc = SampleProfileProfiCostIncEntry;
109881aedab7Sspupyrev AuxCostDec = SampleProfileProfiCostDecEntry;
10997cc2493dSspupyrev }
11007cc2493dSspupyrev }
11017cc2493dSspupyrev // For blocks with self-edges, do not penalize a reduction of the count,
11027cc2493dSspupyrev // as all of the increase can be attributed to the self-edge
11037cc2493dSspupyrev if (Block.HasSelfEdge) {
11047cc2493dSspupyrev AuxCostDec = 0;
11057cc2493dSspupyrev }
11067cc2493dSspupyrev
11077cc2493dSspupyrev Network.addEdge(Bin, Baux, AuxCostInc);
11087cc2493dSspupyrev Network.addEdge(Baux, Bout, AuxCostInc);
11097cc2493dSspupyrev if (Block.Weight > 0) {
11107cc2493dSspupyrev Network.addEdge(Bout, Baux, AuxCostDec);
11117cc2493dSspupyrev Network.addEdge(Baux, Bin, AuxCostDec);
11127cc2493dSspupyrev }
11137cc2493dSspupyrev }
11147cc2493dSspupyrev
11157cc2493dSspupyrev // Creating edges for every jump
11167cc2493dSspupyrev for (auto &Jump : Func.Jumps) {
11177cc2493dSspupyrev uint64_t Src = Jump.Source;
11187cc2493dSspupyrev uint64_t Dst = Jump.Target;
11197cc2493dSspupyrev if (Src != Dst) {
11207cc2493dSspupyrev uint64_t SrcOut = 3 * Src + 1;
11217cc2493dSspupyrev uint64_t DstIn = 3 * Dst;
11227cc2493dSspupyrev uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0;
11237cc2493dSspupyrev Network.addEdge(SrcOut, DstIn, Cost);
11247cc2493dSspupyrev }
11257cc2493dSspupyrev }
11267cc2493dSspupyrev
11277cc2493dSspupyrev // Make sure we have a valid flow circulation
11287cc2493dSspupyrev Network.addEdge(T, S, 0);
11297cc2493dSspupyrev }
11307cc2493dSspupyrev
11317cc2493dSspupyrev /// Extract resulting block and edge counts from the flow network.
extractWeights(MinCostMaxFlow & Network,FlowFunction & Func)11327cc2493dSspupyrev void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) {
11337cc2493dSspupyrev uint64_t NumBlocks = Func.Blocks.size();
11347cc2493dSspupyrev
11357cc2493dSspupyrev // Extract resulting block counts
11367cc2493dSspupyrev for (uint64_t Src = 0; Src < NumBlocks; Src++) {
11377cc2493dSspupyrev auto &Block = Func.Blocks[Src];
11387cc2493dSspupyrev uint64_t SrcOut = 3 * Src + 1;
11397cc2493dSspupyrev int64_t Flow = 0;
11407cc2493dSspupyrev for (auto &Adj : Network.getFlow(SrcOut)) {
11417cc2493dSspupyrev uint64_t DstIn = Adj.first;
11427cc2493dSspupyrev int64_t DstFlow = Adj.second;
11437cc2493dSspupyrev bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2);
11447cc2493dSspupyrev if (!IsAuxNode || Block.HasSelfEdge) {
11457cc2493dSspupyrev Flow += DstFlow;
11467cc2493dSspupyrev }
11477cc2493dSspupyrev }
11487cc2493dSspupyrev Block.Flow = Flow;
11497cc2493dSspupyrev assert(Flow >= 0 && "negative block flow");
11507cc2493dSspupyrev }
11517cc2493dSspupyrev
11527cc2493dSspupyrev // Extract resulting jump counts
11537cc2493dSspupyrev for (auto &Jump : Func.Jumps) {
11547cc2493dSspupyrev uint64_t Src = Jump.Source;
11557cc2493dSspupyrev uint64_t Dst = Jump.Target;
11567cc2493dSspupyrev int64_t Flow = 0;
11577cc2493dSspupyrev if (Src != Dst) {
11587cc2493dSspupyrev uint64_t SrcOut = 3 * Src + 1;
11597cc2493dSspupyrev uint64_t DstIn = 3 * Dst;
11607cc2493dSspupyrev Flow = Network.getFlow(SrcOut, DstIn);
11617cc2493dSspupyrev } else {
11627cc2493dSspupyrev uint64_t SrcOut = 3 * Src + 1;
11637cc2493dSspupyrev uint64_t SrcAux = 3 * Src + 2;
11647cc2493dSspupyrev int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux);
11657cc2493dSspupyrev if (AuxFlow > 0)
11667cc2493dSspupyrev Flow = AuxFlow;
11677cc2493dSspupyrev }
11687cc2493dSspupyrev Jump.Flow = Flow;
11697cc2493dSspupyrev assert(Flow >= 0 && "negative jump flow");
11707cc2493dSspupyrev }
11717cc2493dSspupyrev }
11727cc2493dSspupyrev
11737cc2493dSspupyrev #ifndef NDEBUG
11747cc2493dSspupyrev /// Verify that the computed flow values satisfy flow conservation rules
verifyWeights(const FlowFunction & Func)11757cc2493dSspupyrev void verifyWeights(const FlowFunction &Func) {
11767cc2493dSspupyrev const uint64_t NumBlocks = Func.Blocks.size();
11777cc2493dSspupyrev auto InFlow = std::vector<uint64_t>(NumBlocks, 0);
11787cc2493dSspupyrev auto OutFlow = std::vector<uint64_t>(NumBlocks, 0);
11797cc2493dSspupyrev for (auto &Jump : Func.Jumps) {
11807cc2493dSspupyrev InFlow[Jump.Target] += Jump.Flow;
11817cc2493dSspupyrev OutFlow[Jump.Source] += Jump.Flow;
11827cc2493dSspupyrev }
11837cc2493dSspupyrev
11847cc2493dSspupyrev uint64_t TotalInFlow = 0;
11857cc2493dSspupyrev uint64_t TotalOutFlow = 0;
11867cc2493dSspupyrev for (uint64_t I = 0; I < NumBlocks; I++) {
11877cc2493dSspupyrev auto &Block = Func.Blocks[I];
11887cc2493dSspupyrev if (Block.isEntry()) {
11897cc2493dSspupyrev TotalInFlow += Block.Flow;
11907cc2493dSspupyrev assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
11917cc2493dSspupyrev } else if (Block.isExit()) {
11927cc2493dSspupyrev TotalOutFlow += Block.Flow;
11937cc2493dSspupyrev assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
11947cc2493dSspupyrev } else {
11957cc2493dSspupyrev assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow");
11967cc2493dSspupyrev assert(Block.Flow == InFlow[I] && "incorrectly computed control flow");
11977cc2493dSspupyrev }
11987cc2493dSspupyrev }
11997cc2493dSspupyrev assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow");
120098dd2f9eSspupyrev
120198dd2f9eSspupyrev // Verify that there are no isolated flow components
120298dd2f9eSspupyrev // One could modify FlowFunction to hold edges indexed by the sources, which
120398dd2f9eSspupyrev // will avoid a creation of the object
120498dd2f9eSspupyrev auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks);
120598dd2f9eSspupyrev for (auto &Jump : Func.Jumps) {
120698dd2f9eSspupyrev if (Jump.Flow > 0) {
120798dd2f9eSspupyrev PositiveFlowEdges[Jump.Source].push_back(Jump.Target);
120898dd2f9eSspupyrev }
120998dd2f9eSspupyrev }
121098dd2f9eSspupyrev
121193a2c291Sspupyrev // Run BFS from the source along edges with positive flow
121298dd2f9eSspupyrev std::queue<uint64_t> Queue;
12135f4ae564SJan Svoboda auto Visited = BitVector(NumBlocks, false);
121498dd2f9eSspupyrev Queue.push(Func.Entry);
121598dd2f9eSspupyrev Visited[Func.Entry] = true;
121698dd2f9eSspupyrev while (!Queue.empty()) {
121798dd2f9eSspupyrev uint64_t Src = Queue.front();
121898dd2f9eSspupyrev Queue.pop();
121998dd2f9eSspupyrev for (uint64_t Dst : PositiveFlowEdges[Src]) {
122098dd2f9eSspupyrev if (!Visited[Dst]) {
122198dd2f9eSspupyrev Queue.push(Dst);
122298dd2f9eSspupyrev Visited[Dst] = true;
122398dd2f9eSspupyrev }
122498dd2f9eSspupyrev }
122598dd2f9eSspupyrev }
122698dd2f9eSspupyrev
122798dd2f9eSspupyrev // Verify that every block that has a positive flow is reached from the source
122898dd2f9eSspupyrev // along edges with a positive flow
122998dd2f9eSspupyrev for (uint64_t I = 0; I < NumBlocks; I++) {
123098dd2f9eSspupyrev auto &Block = Func.Blocks[I];
123198dd2f9eSspupyrev assert((Visited[I] || Block.Flow == 0) && "an isolated flow component");
123298dd2f9eSspupyrev }
12337cc2493dSspupyrev }
12347cc2493dSspupyrev #endif
12357cc2493dSspupyrev
12367cc2493dSspupyrev } // end of anonymous namespace
12377cc2493dSspupyrev
12387cc2493dSspupyrev /// Apply the profile inference algorithm for a given flow function
applyFlowInference(FlowFunction & Func)12397cc2493dSspupyrev void llvm::applyFlowInference(FlowFunction &Func) {
12407cc2493dSspupyrev // Create and apply an inference network model
12417cc2493dSspupyrev auto InferenceNetwork = MinCostMaxFlow();
12427cc2493dSspupyrev initializeNetwork(InferenceNetwork, Func);
12437cc2493dSspupyrev InferenceNetwork.run();
12447cc2493dSspupyrev
12457cc2493dSspupyrev // Extract flow values for every block and every edge
12467cc2493dSspupyrev extractWeights(InferenceNetwork, Func);
12477cc2493dSspupyrev
124898dd2f9eSspupyrev // Post-processing adjustments to the flow
124998dd2f9eSspupyrev auto Adjuster = FlowAdjuster(Func);
125098dd2f9eSspupyrev Adjuster.run();
125198dd2f9eSspupyrev
12527cc2493dSspupyrev #ifndef NDEBUG
12537cc2493dSspupyrev // Verify the result
12547cc2493dSspupyrev verifyWeights(Func);
12557cc2493dSspupyrev #endif
12567cc2493dSspupyrev }
1257