17cc2493dSspupyrev //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===// 27cc2493dSspupyrev // 37cc2493dSspupyrev // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 47cc2493dSspupyrev // See https://llvm.org/LICENSE.txt for license information. 57cc2493dSspupyrev // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 67cc2493dSspupyrev // 77cc2493dSspupyrev //===----------------------------------------------------------------------===// 87cc2493dSspupyrev // 97cc2493dSspupyrev // This file implements a profile inference algorithm. Given an incomplete and 107cc2493dSspupyrev // possibly imprecise block counts, the algorithm reconstructs realistic block 117cc2493dSspupyrev // and edge counts that satisfy flow conservation rules, while minimally modify 127cc2493dSspupyrev // input block counts. 137cc2493dSspupyrev // 147cc2493dSspupyrev //===----------------------------------------------------------------------===// 157cc2493dSspupyrev 167cc2493dSspupyrev #include "llvm/Transforms/Utils/SampleProfileInference.h" 177cc2493dSspupyrev #include "llvm/Support/Debug.h" 187cc2493dSspupyrev #include <queue> 197cc2493dSspupyrev #include <set> 207cc2493dSspupyrev 217cc2493dSspupyrev using namespace llvm; 227cc2493dSspupyrev #define DEBUG_TYPE "sample-profile-inference" 237cc2493dSspupyrev 247cc2493dSspupyrev namespace { 257cc2493dSspupyrev 267cc2493dSspupyrev /// A value indicating an infinite flow/capacity/weight of a block/edge. 277cc2493dSspupyrev /// Not using numeric_limits<int64_t>::max(), as the values can be summed up 287cc2493dSspupyrev /// during the execution. 297cc2493dSspupyrev static constexpr int64_t INF = ((int64_t)1) << 50; 307cc2493dSspupyrev 317cc2493dSspupyrev /// The minimum-cost maximum flow algorithm. 327cc2493dSspupyrev /// 337cc2493dSspupyrev /// The algorithm finds the maximum flow of minimum cost on a given (directed) 347cc2493dSspupyrev /// network using a modified version of the classical Moore-Bellman-Ford 357cc2493dSspupyrev /// approach. The algorithm applies a number of augmentation iterations in which 367cc2493dSspupyrev /// flow is sent along paths of positive capacity from the source to the sink. 377cc2493dSspupyrev /// The worst-case time complexity of the implementation is O(v(f)*m*n), where 387cc2493dSspupyrev /// where m is the number of edges, n is the number of vertices, and v(f) is the 397cc2493dSspupyrev /// value of the maximum flow. However, the observed running time on typical 407cc2493dSspupyrev /// instances is sub-quadratic, that is, o(n^2). 417cc2493dSspupyrev /// 427cc2493dSspupyrev /// The input is a set of edges with specified costs and capacities, and a pair 437cc2493dSspupyrev /// of nodes (source and sink). The output is the flow along each edge of the 447cc2493dSspupyrev /// minimum total cost respecting the given edge capacities. 457cc2493dSspupyrev class MinCostMaxFlow { 467cc2493dSspupyrev public: 477cc2493dSspupyrev // Initialize algorithm's data structures for a network of a given size. 487cc2493dSspupyrev void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) { 497cc2493dSspupyrev Source = SourceNode; 507cc2493dSspupyrev Target = SinkNode; 517cc2493dSspupyrev 527cc2493dSspupyrev Nodes = std::vector<Node>(NodeCount); 537cc2493dSspupyrev Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>()); 547cc2493dSspupyrev } 557cc2493dSspupyrev 567cc2493dSspupyrev // Run the algorithm. 577cc2493dSspupyrev int64_t run() { 587cc2493dSspupyrev // Find an augmenting path and update the flow along the path 597cc2493dSspupyrev size_t AugmentationIters = 0; 607cc2493dSspupyrev while (findAugmentingPath()) { 617cc2493dSspupyrev augmentFlowAlongPath(); 627cc2493dSspupyrev AugmentationIters++; 637cc2493dSspupyrev } 647cc2493dSspupyrev 657cc2493dSspupyrev // Compute the total flow and its cost 667cc2493dSspupyrev int64_t TotalCost = 0; 677cc2493dSspupyrev int64_t TotalFlow = 0; 687cc2493dSspupyrev for (uint64_t Src = 0; Src < Nodes.size(); Src++) { 697cc2493dSspupyrev for (auto &Edge : Edges[Src]) { 707cc2493dSspupyrev if (Edge.Flow > 0) { 717cc2493dSspupyrev TotalCost += Edge.Cost * Edge.Flow; 727cc2493dSspupyrev if (Src == Source) 737cc2493dSspupyrev TotalFlow += Edge.Flow; 747cc2493dSspupyrev } 757cc2493dSspupyrev } 767cc2493dSspupyrev } 777cc2493dSspupyrev LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters 787cc2493dSspupyrev << " iterations with " << TotalFlow << " total flow" 797cc2493dSspupyrev << " of " << TotalCost << " cost\n"); 8022d82949SKazu Hirata (void)TotalFlow; 817cc2493dSspupyrev return TotalCost; 827cc2493dSspupyrev } 837cc2493dSspupyrev 847cc2493dSspupyrev /// Adding an edge to the network with a specified capacity and a cost. 857cc2493dSspupyrev /// Multiple edges between a pair of nodes are allowed but self-edges 867cc2493dSspupyrev /// are not supported. 877cc2493dSspupyrev void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) { 887cc2493dSspupyrev assert(Capacity > 0 && "adding an edge of zero capacity"); 897cc2493dSspupyrev assert(Src != Dst && "loop edge are not supported"); 907cc2493dSspupyrev 917cc2493dSspupyrev Edge SrcEdge; 927cc2493dSspupyrev SrcEdge.Dst = Dst; 937cc2493dSspupyrev SrcEdge.Cost = Cost; 947cc2493dSspupyrev SrcEdge.Capacity = Capacity; 957cc2493dSspupyrev SrcEdge.Flow = 0; 967cc2493dSspupyrev SrcEdge.RevEdgeIndex = Edges[Dst].size(); 977cc2493dSspupyrev 987cc2493dSspupyrev Edge DstEdge; 997cc2493dSspupyrev DstEdge.Dst = Src; 1007cc2493dSspupyrev DstEdge.Cost = -Cost; 1017cc2493dSspupyrev DstEdge.Capacity = 0; 1027cc2493dSspupyrev DstEdge.Flow = 0; 1037cc2493dSspupyrev DstEdge.RevEdgeIndex = Edges[Src].size(); 1047cc2493dSspupyrev 1057cc2493dSspupyrev Edges[Src].push_back(SrcEdge); 1067cc2493dSspupyrev Edges[Dst].push_back(DstEdge); 1077cc2493dSspupyrev } 1087cc2493dSspupyrev 1097cc2493dSspupyrev /// Adding an edge to the network of infinite capacity and a given cost. 1107cc2493dSspupyrev void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) { 1117cc2493dSspupyrev addEdge(Src, Dst, INF, Cost); 1127cc2493dSspupyrev } 1137cc2493dSspupyrev 1147cc2493dSspupyrev /// Get the total flow from a given source node. 1157cc2493dSspupyrev /// Returns a list of pairs (target node, amount of flow to the target). 1167cc2493dSspupyrev const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const { 1177cc2493dSspupyrev std::vector<std::pair<uint64_t, int64_t>> Flow; 1187cc2493dSspupyrev for (auto &Edge : Edges[Src]) { 1197cc2493dSspupyrev if (Edge.Flow > 0) 1207cc2493dSspupyrev Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow)); 1217cc2493dSspupyrev } 1227cc2493dSspupyrev return Flow; 1237cc2493dSspupyrev } 1247cc2493dSspupyrev 1257cc2493dSspupyrev /// Get the total flow between a pair of nodes. 1267cc2493dSspupyrev int64_t getFlow(uint64_t Src, uint64_t Dst) const { 1277cc2493dSspupyrev int64_t Flow = 0; 1287cc2493dSspupyrev for (auto &Edge : Edges[Src]) { 1297cc2493dSspupyrev if (Edge.Dst == Dst) { 1307cc2493dSspupyrev Flow += Edge.Flow; 1317cc2493dSspupyrev } 1327cc2493dSspupyrev } 1337cc2493dSspupyrev return Flow; 1347cc2493dSspupyrev } 1357cc2493dSspupyrev 1367cc2493dSspupyrev /// A cost of increasing a block's count by one. 1377cc2493dSspupyrev static constexpr int64_t AuxCostInc = 10; 1387cc2493dSspupyrev /// A cost of decreasing a block's count by one. 1397cc2493dSspupyrev static constexpr int64_t AuxCostDec = 20; 1407cc2493dSspupyrev /// A cost of increasing a count of zero-weight block by one. 1417cc2493dSspupyrev static constexpr int64_t AuxCostIncZero = 11; 1427cc2493dSspupyrev /// A cost of increasing the entry block's count by one. 1437cc2493dSspupyrev static constexpr int64_t AuxCostIncEntry = 40; 1447cc2493dSspupyrev /// A cost of decreasing the entry block's count by one. 1457cc2493dSspupyrev static constexpr int64_t AuxCostDecEntry = 10; 1467cc2493dSspupyrev /// A cost of taking an unlikely jump. 1477cc2493dSspupyrev static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 20; 1487cc2493dSspupyrev 1497cc2493dSspupyrev private: 1507cc2493dSspupyrev /// Check for existence of an augmenting path with a positive capacity. 1517cc2493dSspupyrev bool findAugmentingPath() { 1527cc2493dSspupyrev // Initialize data structures 1537cc2493dSspupyrev for (auto &Node : Nodes) { 1547cc2493dSspupyrev Node.Distance = INF; 1557cc2493dSspupyrev Node.ParentNode = uint64_t(-1); 1567cc2493dSspupyrev Node.ParentEdgeIndex = uint64_t(-1); 1577cc2493dSspupyrev Node.Taken = false; 1587cc2493dSspupyrev } 1597cc2493dSspupyrev 1607cc2493dSspupyrev std::queue<uint64_t> Queue; 1617cc2493dSspupyrev Queue.push(Source); 1627cc2493dSspupyrev Nodes[Source].Distance = 0; 1637cc2493dSspupyrev Nodes[Source].Taken = true; 1647cc2493dSspupyrev while (!Queue.empty()) { 1657cc2493dSspupyrev uint64_t Src = Queue.front(); 1667cc2493dSspupyrev Queue.pop(); 1677cc2493dSspupyrev Nodes[Src].Taken = false; 1687cc2493dSspupyrev // Although the residual network contains edges with negative costs 1697cc2493dSspupyrev // (in particular, backward edges), it can be shown that there are no 1707cc2493dSspupyrev // negative-weight cycles and the following two invariants are maintained: 1717cc2493dSspupyrev // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V, 1727cc2493dSspupyrev // where Dist is the length of the shortest path between two nodes. This 1737cc2493dSspupyrev // allows to prune the search-space of the path-finding algorithm using 1747cc2493dSspupyrev // the following early-stop criteria: 1757cc2493dSspupyrev // -- If we find a path with zero-distance from Source to Target, stop the 1767cc2493dSspupyrev // search, as the path is the shortest since Dist[Source, Target] >= 0; 1777cc2493dSspupyrev // -- If we have Dist[Source, V] > Dist[Source, Target], then do not 1787cc2493dSspupyrev // process node V, as it is guaranteed _not_ to be on a shortest path 1797cc2493dSspupyrev // from Source to Target; it follows from inequalities 1807cc2493dSspupyrev // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target] 1817cc2493dSspupyrev // >= Dist[Source, V] 1827cc2493dSspupyrev if (Nodes[Target].Distance == 0) 1837cc2493dSspupyrev break; 1847cc2493dSspupyrev if (Nodes[Src].Distance > Nodes[Target].Distance) 1857cc2493dSspupyrev continue; 1867cc2493dSspupyrev 1877cc2493dSspupyrev // Process adjacent edges 1887cc2493dSspupyrev for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) { 1897cc2493dSspupyrev auto &Edge = Edges[Src][EdgeIdx]; 1907cc2493dSspupyrev if (Edge.Flow < Edge.Capacity) { 1917cc2493dSspupyrev uint64_t Dst = Edge.Dst; 1927cc2493dSspupyrev int64_t NewDistance = Nodes[Src].Distance + Edge.Cost; 1937cc2493dSspupyrev if (Nodes[Dst].Distance > NewDistance) { 1947cc2493dSspupyrev // Update the distance and the parent node/edge 1957cc2493dSspupyrev Nodes[Dst].Distance = NewDistance; 1967cc2493dSspupyrev Nodes[Dst].ParentNode = Src; 1977cc2493dSspupyrev Nodes[Dst].ParentEdgeIndex = EdgeIdx; 1987cc2493dSspupyrev // Add the node to the queue, if it is not there yet 1997cc2493dSspupyrev if (!Nodes[Dst].Taken) { 2007cc2493dSspupyrev Queue.push(Dst); 2017cc2493dSspupyrev Nodes[Dst].Taken = true; 2027cc2493dSspupyrev } 2037cc2493dSspupyrev } 2047cc2493dSspupyrev } 2057cc2493dSspupyrev } 2067cc2493dSspupyrev } 2077cc2493dSspupyrev 2087cc2493dSspupyrev return Nodes[Target].Distance != INF; 2097cc2493dSspupyrev } 2107cc2493dSspupyrev 2117cc2493dSspupyrev /// Update the current flow along the augmenting path. 2127cc2493dSspupyrev void augmentFlowAlongPath() { 2137cc2493dSspupyrev // Find path capacity 2147cc2493dSspupyrev int64_t PathCapacity = INF; 2157cc2493dSspupyrev uint64_t Now = Target; 2167cc2493dSspupyrev while (Now != Source) { 2177cc2493dSspupyrev uint64_t Pred = Nodes[Now].ParentNode; 2187cc2493dSspupyrev auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 2197cc2493dSspupyrev PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow); 2207cc2493dSspupyrev Now = Pred; 2217cc2493dSspupyrev } 2227cc2493dSspupyrev 223*93a2c291Sspupyrev assert(PathCapacity > 0 && "found an incorrect augmenting path"); 2247cc2493dSspupyrev 2257cc2493dSspupyrev // Update the flow along the path 2267cc2493dSspupyrev Now = Target; 2277cc2493dSspupyrev while (Now != Source) { 2287cc2493dSspupyrev uint64_t Pred = Nodes[Now].ParentNode; 2297cc2493dSspupyrev auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 2307cc2493dSspupyrev auto &RevEdge = Edges[Now][Edge.RevEdgeIndex]; 2317cc2493dSspupyrev 2327cc2493dSspupyrev Edge.Flow += PathCapacity; 2337cc2493dSspupyrev RevEdge.Flow -= PathCapacity; 2347cc2493dSspupyrev 2357cc2493dSspupyrev Now = Pred; 2367cc2493dSspupyrev } 2377cc2493dSspupyrev } 2387cc2493dSspupyrev 2397cc2493dSspupyrev /// An node in a flow network. 2407cc2493dSspupyrev struct Node { 2417cc2493dSspupyrev /// The cost of the cheapest path from the source to the current node. 2427cc2493dSspupyrev int64_t Distance; 2437cc2493dSspupyrev /// The node preceding the current one in the path. 2447cc2493dSspupyrev uint64_t ParentNode; 2457cc2493dSspupyrev /// The index of the edge between ParentNode and the current node. 2467cc2493dSspupyrev uint64_t ParentEdgeIndex; 2477cc2493dSspupyrev /// An indicator of whether the current node is in a queue. 2487cc2493dSspupyrev bool Taken; 2497cc2493dSspupyrev }; 2507cc2493dSspupyrev /// An edge in a flow network. 2517cc2493dSspupyrev struct Edge { 2527cc2493dSspupyrev /// The cost of the edge. 2537cc2493dSspupyrev int64_t Cost; 2547cc2493dSspupyrev /// The capacity of the edge. 2557cc2493dSspupyrev int64_t Capacity; 2567cc2493dSspupyrev /// The current flow on the edge. 2577cc2493dSspupyrev int64_t Flow; 2587cc2493dSspupyrev /// The destination node of the edge. 2597cc2493dSspupyrev uint64_t Dst; 2607cc2493dSspupyrev /// The index of the reverse edge between Dst and the current node. 2617cc2493dSspupyrev uint64_t RevEdgeIndex; 2627cc2493dSspupyrev }; 2637cc2493dSspupyrev 2647cc2493dSspupyrev /// The set of network nodes. 2657cc2493dSspupyrev std::vector<Node> Nodes; 2667cc2493dSspupyrev /// The set of network edges. 2677cc2493dSspupyrev std::vector<std::vector<Edge>> Edges; 2687cc2493dSspupyrev /// Source node of the flow. 2697cc2493dSspupyrev uint64_t Source; 2707cc2493dSspupyrev /// Target (sink) node of the flow. 2717cc2493dSspupyrev uint64_t Target; 2727cc2493dSspupyrev }; 2737cc2493dSspupyrev 274*93a2c291Sspupyrev /// A post-processing adjustment of control flow. It applies two steps by 275*93a2c291Sspupyrev /// rerouting some flow and making it more realistic: 276*93a2c291Sspupyrev /// 277*93a2c291Sspupyrev /// - First, it removes all isolated components ("islands") with a positive flow 278*93a2c291Sspupyrev /// that are unreachable from the entry block. For every such component, we 279*93a2c291Sspupyrev /// find the shortest from the entry to an exit passing through the component, 280*93a2c291Sspupyrev /// and increase the flow by one unit along the path. 281*93a2c291Sspupyrev /// 282*93a2c291Sspupyrev /// - Second, it identifies all "unknown subgraphs" consisting of basic blocks 283*93a2c291Sspupyrev /// with no sampled counts. Then it rebalnces the flow that goes through such 284*93a2c291Sspupyrev /// a subgraph so that each branch is taken with probability 50%. 285*93a2c291Sspupyrev /// An unknown subgraph is such that for every two nodes u and v: 286*93a2c291Sspupyrev /// - u dominates v and u is not unknown; 287*93a2c291Sspupyrev /// - v post-dominates u; and 288*93a2c291Sspupyrev /// - all inner-nodes of all (u,v)-paths are unknown. 289*93a2c291Sspupyrev /// 29098dd2f9eSspupyrev class FlowAdjuster { 29198dd2f9eSspupyrev public: 29298dd2f9eSspupyrev FlowAdjuster(FlowFunction &Func) : Func(Func) { 29398dd2f9eSspupyrev assert(Func.Blocks[Func.Entry].isEntry() && 29498dd2f9eSspupyrev "incorrect index of the entry block"); 29598dd2f9eSspupyrev } 29698dd2f9eSspupyrev 29798dd2f9eSspupyrev // Run the post-processing 29898dd2f9eSspupyrev void run() { 299*93a2c291Sspupyrev /// Adjust the flow to get rid of isolated components. 30098dd2f9eSspupyrev joinIsolatedComponents(); 301*93a2c291Sspupyrev 302*93a2c291Sspupyrev /// Rebalance the flow inside unknown subgraphs. 303*93a2c291Sspupyrev rebalanceUnknownSubgraphs(); 30498dd2f9eSspupyrev } 30598dd2f9eSspupyrev 306*93a2c291Sspupyrev /// The probability for the first successor of a unknown subgraph 307*93a2c291Sspupyrev static constexpr double UnknownFirstSuccProbability = 0.5; 308*93a2c291Sspupyrev 30998dd2f9eSspupyrev private: 31098dd2f9eSspupyrev void joinIsolatedComponents() { 31198dd2f9eSspupyrev // Find blocks that are reachable from the source 31298dd2f9eSspupyrev auto Visited = std::vector<bool>(NumBlocks(), false); 31398dd2f9eSspupyrev findReachable(Func.Entry, Visited); 31498dd2f9eSspupyrev 31598dd2f9eSspupyrev // Iterate over all non-reachable blocks and adjust their weights 31698dd2f9eSspupyrev for (uint64_t I = 0; I < NumBlocks(); I++) { 31798dd2f9eSspupyrev auto &Block = Func.Blocks[I]; 31898dd2f9eSspupyrev if (Block.Flow > 0 && !Visited[I]) { 31998dd2f9eSspupyrev // Find a path from the entry to an exit passing through the block I 32098dd2f9eSspupyrev auto Path = findShortestPath(I); 32198dd2f9eSspupyrev // Increase the flow along the path 32298dd2f9eSspupyrev assert(Path.size() > 0 && Path[0]->Source == Func.Entry && 32398dd2f9eSspupyrev "incorrectly computed path adjusting control flow"); 32498dd2f9eSspupyrev Func.Blocks[Func.Entry].Flow += 1; 32598dd2f9eSspupyrev for (auto &Jump : Path) { 32698dd2f9eSspupyrev Jump->Flow += 1; 32798dd2f9eSspupyrev Func.Blocks[Jump->Target].Flow += 1; 32898dd2f9eSspupyrev // Update reachability 32998dd2f9eSspupyrev findReachable(Jump->Target, Visited); 33098dd2f9eSspupyrev } 33198dd2f9eSspupyrev } 33298dd2f9eSspupyrev } 33398dd2f9eSspupyrev } 33498dd2f9eSspupyrev 335*93a2c291Sspupyrev /// Run BFS from a given block along the jumps with a positive flow and mark 33698dd2f9eSspupyrev /// all reachable blocks. 33798dd2f9eSspupyrev void findReachable(uint64_t Src, std::vector<bool> &Visited) { 33898dd2f9eSspupyrev if (Visited[Src]) 33998dd2f9eSspupyrev return; 34098dd2f9eSspupyrev std::queue<uint64_t> Queue; 34198dd2f9eSspupyrev Queue.push(Src); 34298dd2f9eSspupyrev Visited[Src] = true; 34398dd2f9eSspupyrev while (!Queue.empty()) { 34498dd2f9eSspupyrev Src = Queue.front(); 34598dd2f9eSspupyrev Queue.pop(); 34698dd2f9eSspupyrev for (auto Jump : Func.Blocks[Src].SuccJumps) { 34798dd2f9eSspupyrev uint64_t Dst = Jump->Target; 34898dd2f9eSspupyrev if (Jump->Flow > 0 && !Visited[Dst]) { 34998dd2f9eSspupyrev Queue.push(Dst); 35098dd2f9eSspupyrev Visited[Dst] = true; 35198dd2f9eSspupyrev } 35298dd2f9eSspupyrev } 35398dd2f9eSspupyrev } 35498dd2f9eSspupyrev } 35598dd2f9eSspupyrev 35698dd2f9eSspupyrev /// Find the shortest path from the entry block to an exit block passing 35798dd2f9eSspupyrev /// through a given block. 35898dd2f9eSspupyrev std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) { 35998dd2f9eSspupyrev // A path from the entry block to BlockIdx 36098dd2f9eSspupyrev auto ForwardPath = findShortestPath(Func.Entry, BlockIdx); 36198dd2f9eSspupyrev // A path from BlockIdx to an exit block 36298dd2f9eSspupyrev auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock); 36398dd2f9eSspupyrev 36498dd2f9eSspupyrev // Concatenate the two paths 36598dd2f9eSspupyrev std::vector<FlowJump *> Result; 36698dd2f9eSspupyrev Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end()); 36798dd2f9eSspupyrev Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end()); 36898dd2f9eSspupyrev return Result; 36998dd2f9eSspupyrev } 37098dd2f9eSspupyrev 37198dd2f9eSspupyrev /// Apply the Dijkstra algorithm to find the shortest path from a given 37298dd2f9eSspupyrev /// Source to a given Target block. 37398dd2f9eSspupyrev /// If Target == -1, then the path ends at an exit block. 37498dd2f9eSspupyrev std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) { 37598dd2f9eSspupyrev // Quit early, if possible 37698dd2f9eSspupyrev if (Source == Target) 37798dd2f9eSspupyrev return std::vector<FlowJump *>(); 37898dd2f9eSspupyrev if (Func.Blocks[Source].isExit() && Target == AnyExitBlock) 37998dd2f9eSspupyrev return std::vector<FlowJump *>(); 38098dd2f9eSspupyrev 38198dd2f9eSspupyrev // Initialize data structures 38298dd2f9eSspupyrev auto Distance = std::vector<int64_t>(NumBlocks(), INF); 38398dd2f9eSspupyrev auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr); 38498dd2f9eSspupyrev Distance[Source] = 0; 38598dd2f9eSspupyrev std::set<std::pair<uint64_t, uint64_t>> Queue; 38698dd2f9eSspupyrev Queue.insert(std::make_pair(Distance[Source], Source)); 38798dd2f9eSspupyrev 38898dd2f9eSspupyrev // Run the Dijkstra algorithm 38998dd2f9eSspupyrev while (!Queue.empty()) { 39098dd2f9eSspupyrev uint64_t Src = Queue.begin()->second; 39198dd2f9eSspupyrev Queue.erase(Queue.begin()); 39298dd2f9eSspupyrev // If we found a solution, quit early 39398dd2f9eSspupyrev if (Src == Target || 39498dd2f9eSspupyrev (Func.Blocks[Src].isExit() && Target == AnyExitBlock)) 39598dd2f9eSspupyrev break; 39698dd2f9eSspupyrev 39798dd2f9eSspupyrev for (auto Jump : Func.Blocks[Src].SuccJumps) { 39898dd2f9eSspupyrev uint64_t Dst = Jump->Target; 39998dd2f9eSspupyrev int64_t JumpDist = jumpDistance(Jump); 40098dd2f9eSspupyrev if (Distance[Dst] > Distance[Src] + JumpDist) { 40198dd2f9eSspupyrev Queue.erase(std::make_pair(Distance[Dst], Dst)); 40298dd2f9eSspupyrev 40398dd2f9eSspupyrev Distance[Dst] = Distance[Src] + JumpDist; 40498dd2f9eSspupyrev Parent[Dst] = Jump; 40598dd2f9eSspupyrev 40698dd2f9eSspupyrev Queue.insert(std::make_pair(Distance[Dst], Dst)); 40798dd2f9eSspupyrev } 40898dd2f9eSspupyrev } 40998dd2f9eSspupyrev } 41098dd2f9eSspupyrev // If Target is not provided, find the closest exit block 41198dd2f9eSspupyrev if (Target == AnyExitBlock) { 41298dd2f9eSspupyrev for (uint64_t I = 0; I < NumBlocks(); I++) { 41398dd2f9eSspupyrev if (Func.Blocks[I].isExit() && Parent[I] != nullptr) { 41498dd2f9eSspupyrev if (Target == AnyExitBlock || Distance[Target] > Distance[I]) { 41598dd2f9eSspupyrev Target = I; 41698dd2f9eSspupyrev } 41798dd2f9eSspupyrev } 41898dd2f9eSspupyrev } 41998dd2f9eSspupyrev } 42098dd2f9eSspupyrev assert(Parent[Target] != nullptr && "a path does not exist"); 42198dd2f9eSspupyrev 42298dd2f9eSspupyrev // Extract the constructed path 42398dd2f9eSspupyrev std::vector<FlowJump *> Result; 42498dd2f9eSspupyrev uint64_t Now = Target; 42598dd2f9eSspupyrev while (Now != Source) { 42698dd2f9eSspupyrev assert(Now == Parent[Now]->Target && "incorrect parent jump"); 42798dd2f9eSspupyrev Result.push_back(Parent[Now]); 42898dd2f9eSspupyrev Now = Parent[Now]->Source; 42998dd2f9eSspupyrev } 43098dd2f9eSspupyrev // Reverse the path, since it is extracted from Target to Source 43198dd2f9eSspupyrev std::reverse(Result.begin(), Result.end()); 43298dd2f9eSspupyrev return Result; 43398dd2f9eSspupyrev } 43498dd2f9eSspupyrev 43598dd2f9eSspupyrev /// A distance of a path for a given jump. 43698dd2f9eSspupyrev /// In order to incite the path to use blocks/jumps with large positive flow, 43798dd2f9eSspupyrev /// and avoid changing branch probability of outgoing edges drastically, 43898dd2f9eSspupyrev /// set the distance as follows: 43998dd2f9eSspupyrev /// if Jump.Flow > 0, then distance = max(100 - Jump->Flow, 0) 44098dd2f9eSspupyrev /// if Block.Weight > 0, then distance = 1 44198dd2f9eSspupyrev /// otherwise distance >> 1 44298dd2f9eSspupyrev int64_t jumpDistance(FlowJump *Jump) const { 44398dd2f9eSspupyrev int64_t BaseDistance = 100; 44498dd2f9eSspupyrev if (Jump->IsUnlikely) 44598dd2f9eSspupyrev return MinCostMaxFlow::AuxCostUnlikely; 44698dd2f9eSspupyrev if (Jump->Flow > 0) 44798dd2f9eSspupyrev return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0); 44898dd2f9eSspupyrev if (Func.Blocks[Jump->Target].Weight > 0) 44998dd2f9eSspupyrev return BaseDistance; 45098dd2f9eSspupyrev return BaseDistance * (NumBlocks() + 1); 45198dd2f9eSspupyrev }; 45298dd2f9eSspupyrev 45398dd2f9eSspupyrev uint64_t NumBlocks() const { return Func.Blocks.size(); } 45498dd2f9eSspupyrev 455*93a2c291Sspupyrev /// Rebalance unknown subgraphs so as each branch splits with probabilities 456*93a2c291Sspupyrev /// UnknownFirstSuccProbability and 1 - UnknownFirstSuccProbability 457*93a2c291Sspupyrev void rebalanceUnknownSubgraphs() { 458*93a2c291Sspupyrev assert(UnknownFirstSuccProbability >= 0.0 && 459*93a2c291Sspupyrev UnknownFirstSuccProbability <= 1.0 && 460*93a2c291Sspupyrev "the share of the unknown successor should be between 0 and 1"); 461*93a2c291Sspupyrev // Try to find unknown subgraphs from each non-unknown block 462*93a2c291Sspupyrev for (uint64_t I = 0; I < Func.Blocks.size(); I++) { 463*93a2c291Sspupyrev auto SrcBlock = &Func.Blocks[I]; 464*93a2c291Sspupyrev // Do not attempt to find unknown successors from a unknown or a 465*93a2c291Sspupyrev // zero-flow block 466*93a2c291Sspupyrev if (SrcBlock->UnknownWeight || SrcBlock->Flow == 0) 467*93a2c291Sspupyrev continue; 468*93a2c291Sspupyrev 469*93a2c291Sspupyrev std::vector<FlowBlock *> UnknownSuccs; 470*93a2c291Sspupyrev FlowBlock *DstBlock = nullptr; 471*93a2c291Sspupyrev // Find a unknown subgraphs starting at block SrcBlock 472*93a2c291Sspupyrev if (!findUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs)) 473*93a2c291Sspupyrev continue; 474*93a2c291Sspupyrev // At the moment, we do not rebalance subgraphs containing cycles among 475*93a2c291Sspupyrev // unknown blocks 476*93a2c291Sspupyrev if (!isAcyclicSubgraph(SrcBlock, DstBlock, UnknownSuccs)) 477*93a2c291Sspupyrev continue; 478*93a2c291Sspupyrev 479*93a2c291Sspupyrev // Rebalance the flow 480*93a2c291Sspupyrev rebalanceUnknownSubgraph(SrcBlock, DstBlock, UnknownSuccs); 481*93a2c291Sspupyrev } 482*93a2c291Sspupyrev } 483*93a2c291Sspupyrev 484*93a2c291Sspupyrev /// Find a unknown subgraph starting at block SrcBlock. 485*93a2c291Sspupyrev /// If the search is successful, the method sets DstBlock and UnknownSuccs. 486*93a2c291Sspupyrev bool findUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *&DstBlock, 487*93a2c291Sspupyrev std::vector<FlowBlock *> &UnknownSuccs) { 488*93a2c291Sspupyrev // Run BFS from SrcBlock and make sure all paths are going through unknown 489*93a2c291Sspupyrev // blocks and end at a non-unknown DstBlock 490*93a2c291Sspupyrev auto Visited = std::vector<bool>(NumBlocks(), false); 491*93a2c291Sspupyrev std::queue<uint64_t> Queue; 492*93a2c291Sspupyrev DstBlock = nullptr; 493*93a2c291Sspupyrev 494*93a2c291Sspupyrev Queue.push(SrcBlock->Index); 495*93a2c291Sspupyrev Visited[SrcBlock->Index] = true; 496*93a2c291Sspupyrev while (!Queue.empty()) { 497*93a2c291Sspupyrev auto &Block = Func.Blocks[Queue.front()]; 498*93a2c291Sspupyrev Queue.pop(); 499*93a2c291Sspupyrev // Process blocks reachable from Block 500*93a2c291Sspupyrev for (auto Jump : Block.SuccJumps) { 501*93a2c291Sspupyrev uint64_t Dst = Jump->Target; 502*93a2c291Sspupyrev if (Visited[Dst]) 503*93a2c291Sspupyrev continue; 504*93a2c291Sspupyrev Visited[Dst] = true; 505*93a2c291Sspupyrev if (!Func.Blocks[Dst].UnknownWeight) { 506*93a2c291Sspupyrev // If we see non-unique non-unknown block reachable from SrcBlock, 507*93a2c291Sspupyrev // stop processing and skip rebalancing 508*93a2c291Sspupyrev FlowBlock *CandidateDstBlock = &Func.Blocks[Dst]; 509*93a2c291Sspupyrev if (DstBlock != nullptr && DstBlock != CandidateDstBlock) 510*93a2c291Sspupyrev return false; 511*93a2c291Sspupyrev DstBlock = CandidateDstBlock; 512*93a2c291Sspupyrev } else { 513*93a2c291Sspupyrev Queue.push(Dst); 514*93a2c291Sspupyrev UnknownSuccs.push_back(&Func.Blocks[Dst]); 515*93a2c291Sspupyrev } 516*93a2c291Sspupyrev } 517*93a2c291Sspupyrev } 518*93a2c291Sspupyrev 519*93a2c291Sspupyrev // If the list of unknown blocks is empty, we don't need rebalancing 520*93a2c291Sspupyrev if (UnknownSuccs.empty()) 521*93a2c291Sspupyrev return false; 522*93a2c291Sspupyrev // If all reachable nodes from SrcBlock are unknown, skip rebalancing 523*93a2c291Sspupyrev if (DstBlock == nullptr) 524*93a2c291Sspupyrev return false; 525*93a2c291Sspupyrev // If any of the unknown blocks is an exit block, skip rebalancing 526*93a2c291Sspupyrev for (auto Block : UnknownSuccs) { 527*93a2c291Sspupyrev if (Block->isExit()) 528*93a2c291Sspupyrev return false; 529*93a2c291Sspupyrev } 530*93a2c291Sspupyrev 531*93a2c291Sspupyrev return true; 532*93a2c291Sspupyrev } 533*93a2c291Sspupyrev 534*93a2c291Sspupyrev /// Verify if the given unknown subgraph is acyclic, and if yes, reorder 535*93a2c291Sspupyrev /// UnknownSuccs in the topological order (so that all jumps are "forward"). 536*93a2c291Sspupyrev bool isAcyclicSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock, 537*93a2c291Sspupyrev std::vector<FlowBlock *> &UnknownSuccs) { 538*93a2c291Sspupyrev // Extract local in-degrees in the considered subgraph 539*93a2c291Sspupyrev auto LocalInDegree = std::vector<uint64_t>(NumBlocks(), 0); 540*93a2c291Sspupyrev for (auto Jump : SrcBlock->SuccJumps) { 541*93a2c291Sspupyrev LocalInDegree[Jump->Target]++; 542*93a2c291Sspupyrev } 543*93a2c291Sspupyrev for (uint64_t I = 0; I < UnknownSuccs.size(); I++) { 544*93a2c291Sspupyrev for (auto Jump : UnknownSuccs[I]->SuccJumps) { 545*93a2c291Sspupyrev LocalInDegree[Jump->Target]++; 546*93a2c291Sspupyrev } 547*93a2c291Sspupyrev } 548*93a2c291Sspupyrev // A loop containing SrcBlock 549*93a2c291Sspupyrev if (LocalInDegree[SrcBlock->Index] > 0) 550*93a2c291Sspupyrev return false; 551*93a2c291Sspupyrev 552*93a2c291Sspupyrev std::vector<FlowBlock *> AcyclicOrder; 553*93a2c291Sspupyrev std::queue<uint64_t> Queue; 554*93a2c291Sspupyrev Queue.push(SrcBlock->Index); 555*93a2c291Sspupyrev while (!Queue.empty()) { 556*93a2c291Sspupyrev auto &Block = Func.Blocks[Queue.front()]; 557*93a2c291Sspupyrev Queue.pop(); 558*93a2c291Sspupyrev // Stop propagation once we reach DstBlock 559*93a2c291Sspupyrev if (Block.Index == DstBlock->Index) 560*93a2c291Sspupyrev break; 561*93a2c291Sspupyrev 562*93a2c291Sspupyrev AcyclicOrder.push_back(&Block); 563*93a2c291Sspupyrev // Add to the queue all successors with zero local in-degree 564*93a2c291Sspupyrev for (auto Jump : Block.SuccJumps) { 565*93a2c291Sspupyrev uint64_t Dst = Jump->Target; 566*93a2c291Sspupyrev LocalInDegree[Dst]--; 567*93a2c291Sspupyrev if (LocalInDegree[Dst] == 0) { 568*93a2c291Sspupyrev Queue.push(Dst); 569*93a2c291Sspupyrev } 570*93a2c291Sspupyrev } 571*93a2c291Sspupyrev } 572*93a2c291Sspupyrev 573*93a2c291Sspupyrev // If there is a cycle in the subgraph, AcyclicOrder contains only a subset 574*93a2c291Sspupyrev // of all blocks 575*93a2c291Sspupyrev if (UnknownSuccs.size() + 1 != AcyclicOrder.size()) 576*93a2c291Sspupyrev return false; 577*93a2c291Sspupyrev UnknownSuccs = AcyclicOrder; 578*93a2c291Sspupyrev return true; 579*93a2c291Sspupyrev } 580*93a2c291Sspupyrev 581*93a2c291Sspupyrev /// Rebalance a given subgraph. 582*93a2c291Sspupyrev void rebalanceUnknownSubgraph(FlowBlock *SrcBlock, FlowBlock *DstBlock, 583*93a2c291Sspupyrev std::vector<FlowBlock *> &UnknownSuccs) { 584*93a2c291Sspupyrev assert(SrcBlock->Flow > 0 && "zero-flow block in unknown subgraph"); 585*93a2c291Sspupyrev assert(UnknownSuccs.front() == SrcBlock && "incorrect order of unknowns"); 586*93a2c291Sspupyrev 587*93a2c291Sspupyrev for (auto Block : UnknownSuccs) { 588*93a2c291Sspupyrev // Block's flow is the sum of incoming flows 589*93a2c291Sspupyrev uint64_t TotalFlow = 0; 590*93a2c291Sspupyrev if (Block == SrcBlock) { 591*93a2c291Sspupyrev TotalFlow = Block->Flow; 592*93a2c291Sspupyrev } else { 593*93a2c291Sspupyrev for (auto Jump : Block->PredJumps) { 594*93a2c291Sspupyrev TotalFlow += Jump->Flow; 595*93a2c291Sspupyrev } 596*93a2c291Sspupyrev Block->Flow = TotalFlow; 597*93a2c291Sspupyrev } 598*93a2c291Sspupyrev 599*93a2c291Sspupyrev // Process all successor jumps and update corresponding flow values 600*93a2c291Sspupyrev for (uint64_t I = 0; I < Block->SuccJumps.size(); I++) { 601*93a2c291Sspupyrev auto Jump = Block->SuccJumps[I]; 602*93a2c291Sspupyrev if (I + 1 == Block->SuccJumps.size()) { 603*93a2c291Sspupyrev Jump->Flow = TotalFlow; 604*93a2c291Sspupyrev continue; 605*93a2c291Sspupyrev } 606*93a2c291Sspupyrev uint64_t Flow = uint64_t(TotalFlow * UnknownFirstSuccProbability); 607*93a2c291Sspupyrev Jump->Flow = Flow; 608*93a2c291Sspupyrev TotalFlow -= Flow; 609*93a2c291Sspupyrev } 610*93a2c291Sspupyrev } 611*93a2c291Sspupyrev } 612*93a2c291Sspupyrev 61398dd2f9eSspupyrev /// A constant indicating an arbitrary exit block of a function. 61498dd2f9eSspupyrev static constexpr uint64_t AnyExitBlock = uint64_t(-1); 61598dd2f9eSspupyrev 61698dd2f9eSspupyrev /// The function. 61798dd2f9eSspupyrev FlowFunction &Func; 61898dd2f9eSspupyrev }; 61998dd2f9eSspupyrev 6207cc2493dSspupyrev /// Initializing flow network for a given function. 6217cc2493dSspupyrev /// 6227cc2493dSspupyrev /// Every block is split into three nodes that are responsible for (i) an 6237cc2493dSspupyrev /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or 6247cc2493dSspupyrev /// reduction of the block weight. 6257cc2493dSspupyrev void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) { 6267cc2493dSspupyrev uint64_t NumBlocks = Func.Blocks.size(); 6277cc2493dSspupyrev assert(NumBlocks > 1 && "Too few blocks in a function"); 6287cc2493dSspupyrev LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n"); 6297cc2493dSspupyrev 6307cc2493dSspupyrev // Pre-process data: make sure the entry weight is at least 1 6317cc2493dSspupyrev if (Func.Blocks[Func.Entry].Weight == 0) { 6327cc2493dSspupyrev Func.Blocks[Func.Entry].Weight = 1; 6337cc2493dSspupyrev } 6347cc2493dSspupyrev // Introducing dummy source/sink pairs to allow flow circulation. 6357cc2493dSspupyrev // The nodes corresponding to blocks of Func have indicies in the range 6367cc2493dSspupyrev // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values. 6377cc2493dSspupyrev uint64_t S = 3 * NumBlocks; 6387cc2493dSspupyrev uint64_t T = S + 1; 6397cc2493dSspupyrev uint64_t S1 = S + 2; 6407cc2493dSspupyrev uint64_t T1 = S + 3; 6417cc2493dSspupyrev 6427cc2493dSspupyrev Network.initialize(3 * NumBlocks + 4, S1, T1); 6437cc2493dSspupyrev 6447cc2493dSspupyrev // Create three nodes for every block of the function 6457cc2493dSspupyrev for (uint64_t B = 0; B < NumBlocks; B++) { 6467cc2493dSspupyrev auto &Block = Func.Blocks[B]; 6477cc2493dSspupyrev assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) && 6487cc2493dSspupyrev "non-zero weight of a block w/o weight except for an entry"); 6497cc2493dSspupyrev 6507cc2493dSspupyrev // Split every block into two nodes 6517cc2493dSspupyrev uint64_t Bin = 3 * B; 6527cc2493dSspupyrev uint64_t Bout = 3 * B + 1; 6537cc2493dSspupyrev uint64_t Baux = 3 * B + 2; 6547cc2493dSspupyrev if (Block.Weight > 0) { 6557cc2493dSspupyrev Network.addEdge(S1, Bout, Block.Weight, 0); 6567cc2493dSspupyrev Network.addEdge(Bin, T1, Block.Weight, 0); 6577cc2493dSspupyrev } 6587cc2493dSspupyrev 6597cc2493dSspupyrev // Edges from S and to T 6607cc2493dSspupyrev assert((!Block.isEntry() || !Block.isExit()) && 6617cc2493dSspupyrev "a block cannot be an entry and an exit"); 6627cc2493dSspupyrev if (Block.isEntry()) { 6637cc2493dSspupyrev Network.addEdge(S, Bin, 0); 6647cc2493dSspupyrev } else if (Block.isExit()) { 6657cc2493dSspupyrev Network.addEdge(Bout, T, 0); 6667cc2493dSspupyrev } 6677cc2493dSspupyrev 6687cc2493dSspupyrev // An auxiliary node to allow increase/reduction of block counts: 6697cc2493dSspupyrev // We assume that decreasing block counts is more expensive than increasing, 6707cc2493dSspupyrev // and thus, setting separate costs here. In the future we may want to tune 6717cc2493dSspupyrev // the relative costs so as to maximize the quality of generated profiles. 6727cc2493dSspupyrev int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc; 6737cc2493dSspupyrev int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec; 6747cc2493dSspupyrev if (Block.UnknownWeight) { 6757cc2493dSspupyrev // Do not penalize changing weights of blocks w/o known profile count 6767cc2493dSspupyrev AuxCostInc = 0; 6777cc2493dSspupyrev AuxCostDec = 0; 6787cc2493dSspupyrev } else { 6797cc2493dSspupyrev // Increasing the count for "cold" blocks with zero initial count is more 6807cc2493dSspupyrev // expensive than for "hot" ones 6817cc2493dSspupyrev if (Block.Weight == 0) { 6827cc2493dSspupyrev AuxCostInc = MinCostMaxFlow::AuxCostIncZero; 6837cc2493dSspupyrev } 6847cc2493dSspupyrev // Modifying the count of the entry block is expensive 6857cc2493dSspupyrev if (Block.isEntry()) { 6867cc2493dSspupyrev AuxCostInc = MinCostMaxFlow::AuxCostIncEntry; 6877cc2493dSspupyrev AuxCostDec = MinCostMaxFlow::AuxCostDecEntry; 6887cc2493dSspupyrev } 6897cc2493dSspupyrev } 6907cc2493dSspupyrev // For blocks with self-edges, do not penalize a reduction of the count, 6917cc2493dSspupyrev // as all of the increase can be attributed to the self-edge 6927cc2493dSspupyrev if (Block.HasSelfEdge) { 6937cc2493dSspupyrev AuxCostDec = 0; 6947cc2493dSspupyrev } 6957cc2493dSspupyrev 6967cc2493dSspupyrev Network.addEdge(Bin, Baux, AuxCostInc); 6977cc2493dSspupyrev Network.addEdge(Baux, Bout, AuxCostInc); 6987cc2493dSspupyrev if (Block.Weight > 0) { 6997cc2493dSspupyrev Network.addEdge(Bout, Baux, AuxCostDec); 7007cc2493dSspupyrev Network.addEdge(Baux, Bin, AuxCostDec); 7017cc2493dSspupyrev } 7027cc2493dSspupyrev } 7037cc2493dSspupyrev 7047cc2493dSspupyrev // Creating edges for every jump 7057cc2493dSspupyrev for (auto &Jump : Func.Jumps) { 7067cc2493dSspupyrev uint64_t Src = Jump.Source; 7077cc2493dSspupyrev uint64_t Dst = Jump.Target; 7087cc2493dSspupyrev if (Src != Dst) { 7097cc2493dSspupyrev uint64_t SrcOut = 3 * Src + 1; 7107cc2493dSspupyrev uint64_t DstIn = 3 * Dst; 7117cc2493dSspupyrev uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0; 7127cc2493dSspupyrev Network.addEdge(SrcOut, DstIn, Cost); 7137cc2493dSspupyrev } 7147cc2493dSspupyrev } 7157cc2493dSspupyrev 7167cc2493dSspupyrev // Make sure we have a valid flow circulation 7177cc2493dSspupyrev Network.addEdge(T, S, 0); 7187cc2493dSspupyrev } 7197cc2493dSspupyrev 7207cc2493dSspupyrev /// Extract resulting block and edge counts from the flow network. 7217cc2493dSspupyrev void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) { 7227cc2493dSspupyrev uint64_t NumBlocks = Func.Blocks.size(); 7237cc2493dSspupyrev 7247cc2493dSspupyrev // Extract resulting block counts 7257cc2493dSspupyrev for (uint64_t Src = 0; Src < NumBlocks; Src++) { 7267cc2493dSspupyrev auto &Block = Func.Blocks[Src]; 7277cc2493dSspupyrev uint64_t SrcOut = 3 * Src + 1; 7287cc2493dSspupyrev int64_t Flow = 0; 7297cc2493dSspupyrev for (auto &Adj : Network.getFlow(SrcOut)) { 7307cc2493dSspupyrev uint64_t DstIn = Adj.first; 7317cc2493dSspupyrev int64_t DstFlow = Adj.second; 7327cc2493dSspupyrev bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2); 7337cc2493dSspupyrev if (!IsAuxNode || Block.HasSelfEdge) { 7347cc2493dSspupyrev Flow += DstFlow; 7357cc2493dSspupyrev } 7367cc2493dSspupyrev } 7377cc2493dSspupyrev Block.Flow = Flow; 7387cc2493dSspupyrev assert(Flow >= 0 && "negative block flow"); 7397cc2493dSspupyrev } 7407cc2493dSspupyrev 7417cc2493dSspupyrev // Extract resulting jump counts 7427cc2493dSspupyrev for (auto &Jump : Func.Jumps) { 7437cc2493dSspupyrev uint64_t Src = Jump.Source; 7447cc2493dSspupyrev uint64_t Dst = Jump.Target; 7457cc2493dSspupyrev int64_t Flow = 0; 7467cc2493dSspupyrev if (Src != Dst) { 7477cc2493dSspupyrev uint64_t SrcOut = 3 * Src + 1; 7487cc2493dSspupyrev uint64_t DstIn = 3 * Dst; 7497cc2493dSspupyrev Flow = Network.getFlow(SrcOut, DstIn); 7507cc2493dSspupyrev } else { 7517cc2493dSspupyrev uint64_t SrcOut = 3 * Src + 1; 7527cc2493dSspupyrev uint64_t SrcAux = 3 * Src + 2; 7537cc2493dSspupyrev int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux); 7547cc2493dSspupyrev if (AuxFlow > 0) 7557cc2493dSspupyrev Flow = AuxFlow; 7567cc2493dSspupyrev } 7577cc2493dSspupyrev Jump.Flow = Flow; 7587cc2493dSspupyrev assert(Flow >= 0 && "negative jump flow"); 7597cc2493dSspupyrev } 7607cc2493dSspupyrev } 7617cc2493dSspupyrev 7627cc2493dSspupyrev #ifndef NDEBUG 7637cc2493dSspupyrev /// Verify that the computed flow values satisfy flow conservation rules 7647cc2493dSspupyrev void verifyWeights(const FlowFunction &Func) { 7657cc2493dSspupyrev const uint64_t NumBlocks = Func.Blocks.size(); 7667cc2493dSspupyrev auto InFlow = std::vector<uint64_t>(NumBlocks, 0); 7677cc2493dSspupyrev auto OutFlow = std::vector<uint64_t>(NumBlocks, 0); 7687cc2493dSspupyrev for (auto &Jump : Func.Jumps) { 7697cc2493dSspupyrev InFlow[Jump.Target] += Jump.Flow; 7707cc2493dSspupyrev OutFlow[Jump.Source] += Jump.Flow; 7717cc2493dSspupyrev } 7727cc2493dSspupyrev 7737cc2493dSspupyrev uint64_t TotalInFlow = 0; 7747cc2493dSspupyrev uint64_t TotalOutFlow = 0; 7757cc2493dSspupyrev for (uint64_t I = 0; I < NumBlocks; I++) { 7767cc2493dSspupyrev auto &Block = Func.Blocks[I]; 7777cc2493dSspupyrev if (Block.isEntry()) { 7787cc2493dSspupyrev TotalInFlow += Block.Flow; 7797cc2493dSspupyrev assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 7807cc2493dSspupyrev } else if (Block.isExit()) { 7817cc2493dSspupyrev TotalOutFlow += Block.Flow; 7827cc2493dSspupyrev assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 7837cc2493dSspupyrev } else { 7847cc2493dSspupyrev assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 7857cc2493dSspupyrev assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 7867cc2493dSspupyrev } 7877cc2493dSspupyrev } 7887cc2493dSspupyrev assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow"); 78998dd2f9eSspupyrev 79098dd2f9eSspupyrev // Verify that there are no isolated flow components 79198dd2f9eSspupyrev // One could modify FlowFunction to hold edges indexed by the sources, which 79298dd2f9eSspupyrev // will avoid a creation of the object 79398dd2f9eSspupyrev auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks); 79498dd2f9eSspupyrev for (auto &Jump : Func.Jumps) { 79598dd2f9eSspupyrev if (Jump.Flow > 0) { 79698dd2f9eSspupyrev PositiveFlowEdges[Jump.Source].push_back(Jump.Target); 79798dd2f9eSspupyrev } 79898dd2f9eSspupyrev } 79998dd2f9eSspupyrev 800*93a2c291Sspupyrev // Run BFS from the source along edges with positive flow 80198dd2f9eSspupyrev std::queue<uint64_t> Queue; 80298dd2f9eSspupyrev auto Visited = std::vector<bool>(NumBlocks, false); 80398dd2f9eSspupyrev Queue.push(Func.Entry); 80498dd2f9eSspupyrev Visited[Func.Entry] = true; 80598dd2f9eSspupyrev while (!Queue.empty()) { 80698dd2f9eSspupyrev uint64_t Src = Queue.front(); 80798dd2f9eSspupyrev Queue.pop(); 80898dd2f9eSspupyrev for (uint64_t Dst : PositiveFlowEdges[Src]) { 80998dd2f9eSspupyrev if (!Visited[Dst]) { 81098dd2f9eSspupyrev Queue.push(Dst); 81198dd2f9eSspupyrev Visited[Dst] = true; 81298dd2f9eSspupyrev } 81398dd2f9eSspupyrev } 81498dd2f9eSspupyrev } 81598dd2f9eSspupyrev 81698dd2f9eSspupyrev // Verify that every block that has a positive flow is reached from the source 81798dd2f9eSspupyrev // along edges with a positive flow 81898dd2f9eSspupyrev for (uint64_t I = 0; I < NumBlocks; I++) { 81998dd2f9eSspupyrev auto &Block = Func.Blocks[I]; 82098dd2f9eSspupyrev assert((Visited[I] || Block.Flow == 0) && "an isolated flow component"); 82198dd2f9eSspupyrev } 8227cc2493dSspupyrev } 8237cc2493dSspupyrev #endif 8247cc2493dSspupyrev 8257cc2493dSspupyrev } // end of anonymous namespace 8267cc2493dSspupyrev 8277cc2493dSspupyrev /// Apply the profile inference algorithm for a given flow function 8287cc2493dSspupyrev void llvm::applyFlowInference(FlowFunction &Func) { 8297cc2493dSspupyrev // Create and apply an inference network model 8307cc2493dSspupyrev auto InferenceNetwork = MinCostMaxFlow(); 8317cc2493dSspupyrev initializeNetwork(InferenceNetwork, Func); 8327cc2493dSspupyrev InferenceNetwork.run(); 8337cc2493dSspupyrev 8347cc2493dSspupyrev // Extract flow values for every block and every edge 8357cc2493dSspupyrev extractWeights(InferenceNetwork, Func); 8367cc2493dSspupyrev 83798dd2f9eSspupyrev // Post-processing adjustments to the flow 83898dd2f9eSspupyrev auto Adjuster = FlowAdjuster(Func); 83998dd2f9eSspupyrev Adjuster.run(); 84098dd2f9eSspupyrev 8417cc2493dSspupyrev #ifndef NDEBUG 8427cc2493dSspupyrev // Verify the result 8437cc2493dSspupyrev verifyWeights(Func); 8447cc2493dSspupyrev #endif 8457cc2493dSspupyrev } 846