17cc2493dSspupyrev //===- SampleProfileInference.cpp - Adjust sample profiles in the IR ------===// 27cc2493dSspupyrev // 37cc2493dSspupyrev // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 47cc2493dSspupyrev // See https://llvm.org/LICENSE.txt for license information. 57cc2493dSspupyrev // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 67cc2493dSspupyrev // 77cc2493dSspupyrev //===----------------------------------------------------------------------===// 87cc2493dSspupyrev // 97cc2493dSspupyrev // This file implements a profile inference algorithm. Given an incomplete and 107cc2493dSspupyrev // possibly imprecise block counts, the algorithm reconstructs realistic block 117cc2493dSspupyrev // and edge counts that satisfy flow conservation rules, while minimally modify 127cc2493dSspupyrev // input block counts. 137cc2493dSspupyrev // 147cc2493dSspupyrev //===----------------------------------------------------------------------===// 157cc2493dSspupyrev 167cc2493dSspupyrev #include "llvm/Transforms/Utils/SampleProfileInference.h" 177cc2493dSspupyrev #include "llvm/Support/Debug.h" 187cc2493dSspupyrev #include <queue> 197cc2493dSspupyrev #include <set> 207cc2493dSspupyrev 217cc2493dSspupyrev using namespace llvm; 227cc2493dSspupyrev #define DEBUG_TYPE "sample-profile-inference" 237cc2493dSspupyrev 247cc2493dSspupyrev namespace { 257cc2493dSspupyrev 267cc2493dSspupyrev /// A value indicating an infinite flow/capacity/weight of a block/edge. 277cc2493dSspupyrev /// Not using numeric_limits<int64_t>::max(), as the values can be summed up 287cc2493dSspupyrev /// during the execution. 297cc2493dSspupyrev static constexpr int64_t INF = ((int64_t)1) << 50; 307cc2493dSspupyrev 317cc2493dSspupyrev /// The minimum-cost maximum flow algorithm. 327cc2493dSspupyrev /// 337cc2493dSspupyrev /// The algorithm finds the maximum flow of minimum cost on a given (directed) 347cc2493dSspupyrev /// network using a modified version of the classical Moore-Bellman-Ford 357cc2493dSspupyrev /// approach. The algorithm applies a number of augmentation iterations in which 367cc2493dSspupyrev /// flow is sent along paths of positive capacity from the source to the sink. 377cc2493dSspupyrev /// The worst-case time complexity of the implementation is O(v(f)*m*n), where 387cc2493dSspupyrev /// where m is the number of edges, n is the number of vertices, and v(f) is the 397cc2493dSspupyrev /// value of the maximum flow. However, the observed running time on typical 407cc2493dSspupyrev /// instances is sub-quadratic, that is, o(n^2). 417cc2493dSspupyrev /// 427cc2493dSspupyrev /// The input is a set of edges with specified costs and capacities, and a pair 437cc2493dSspupyrev /// of nodes (source and sink). The output is the flow along each edge of the 447cc2493dSspupyrev /// minimum total cost respecting the given edge capacities. 457cc2493dSspupyrev class MinCostMaxFlow { 467cc2493dSspupyrev public: 477cc2493dSspupyrev // Initialize algorithm's data structures for a network of a given size. 487cc2493dSspupyrev void initialize(uint64_t NodeCount, uint64_t SourceNode, uint64_t SinkNode) { 497cc2493dSspupyrev Source = SourceNode; 507cc2493dSspupyrev Target = SinkNode; 517cc2493dSspupyrev 527cc2493dSspupyrev Nodes = std::vector<Node>(NodeCount); 537cc2493dSspupyrev Edges = std::vector<std::vector<Edge>>(NodeCount, std::vector<Edge>()); 547cc2493dSspupyrev } 557cc2493dSspupyrev 567cc2493dSspupyrev // Run the algorithm. 577cc2493dSspupyrev int64_t run() { 587cc2493dSspupyrev // Find an augmenting path and update the flow along the path 597cc2493dSspupyrev size_t AugmentationIters = 0; 607cc2493dSspupyrev while (findAugmentingPath()) { 617cc2493dSspupyrev augmentFlowAlongPath(); 627cc2493dSspupyrev AugmentationIters++; 637cc2493dSspupyrev } 647cc2493dSspupyrev 657cc2493dSspupyrev // Compute the total flow and its cost 667cc2493dSspupyrev int64_t TotalCost = 0; 677cc2493dSspupyrev int64_t TotalFlow = 0; 687cc2493dSspupyrev for (uint64_t Src = 0; Src < Nodes.size(); Src++) { 697cc2493dSspupyrev for (auto &Edge : Edges[Src]) { 707cc2493dSspupyrev if (Edge.Flow > 0) { 717cc2493dSspupyrev TotalCost += Edge.Cost * Edge.Flow; 727cc2493dSspupyrev if (Src == Source) 737cc2493dSspupyrev TotalFlow += Edge.Flow; 747cc2493dSspupyrev } 757cc2493dSspupyrev } 767cc2493dSspupyrev } 777cc2493dSspupyrev LLVM_DEBUG(dbgs() << "Completed profi after " << AugmentationIters 787cc2493dSspupyrev << " iterations with " << TotalFlow << " total flow" 797cc2493dSspupyrev << " of " << TotalCost << " cost\n"); 8022d82949SKazu Hirata (void)TotalFlow; 817cc2493dSspupyrev return TotalCost; 827cc2493dSspupyrev } 837cc2493dSspupyrev 847cc2493dSspupyrev /// Adding an edge to the network with a specified capacity and a cost. 857cc2493dSspupyrev /// Multiple edges between a pair of nodes are allowed but self-edges 867cc2493dSspupyrev /// are not supported. 877cc2493dSspupyrev void addEdge(uint64_t Src, uint64_t Dst, int64_t Capacity, int64_t Cost) { 887cc2493dSspupyrev assert(Capacity > 0 && "adding an edge of zero capacity"); 897cc2493dSspupyrev assert(Src != Dst && "loop edge are not supported"); 907cc2493dSspupyrev 917cc2493dSspupyrev Edge SrcEdge; 927cc2493dSspupyrev SrcEdge.Dst = Dst; 937cc2493dSspupyrev SrcEdge.Cost = Cost; 947cc2493dSspupyrev SrcEdge.Capacity = Capacity; 957cc2493dSspupyrev SrcEdge.Flow = 0; 967cc2493dSspupyrev SrcEdge.RevEdgeIndex = Edges[Dst].size(); 977cc2493dSspupyrev 987cc2493dSspupyrev Edge DstEdge; 997cc2493dSspupyrev DstEdge.Dst = Src; 1007cc2493dSspupyrev DstEdge.Cost = -Cost; 1017cc2493dSspupyrev DstEdge.Capacity = 0; 1027cc2493dSspupyrev DstEdge.Flow = 0; 1037cc2493dSspupyrev DstEdge.RevEdgeIndex = Edges[Src].size(); 1047cc2493dSspupyrev 1057cc2493dSspupyrev Edges[Src].push_back(SrcEdge); 1067cc2493dSspupyrev Edges[Dst].push_back(DstEdge); 1077cc2493dSspupyrev } 1087cc2493dSspupyrev 1097cc2493dSspupyrev /// Adding an edge to the network of infinite capacity and a given cost. 1107cc2493dSspupyrev void addEdge(uint64_t Src, uint64_t Dst, int64_t Cost) { 1117cc2493dSspupyrev addEdge(Src, Dst, INF, Cost); 1127cc2493dSspupyrev } 1137cc2493dSspupyrev 1147cc2493dSspupyrev /// Get the total flow from a given source node. 1157cc2493dSspupyrev /// Returns a list of pairs (target node, amount of flow to the target). 1167cc2493dSspupyrev const std::vector<std::pair<uint64_t, int64_t>> getFlow(uint64_t Src) const { 1177cc2493dSspupyrev std::vector<std::pair<uint64_t, int64_t>> Flow; 1187cc2493dSspupyrev for (auto &Edge : Edges[Src]) { 1197cc2493dSspupyrev if (Edge.Flow > 0) 1207cc2493dSspupyrev Flow.push_back(std::make_pair(Edge.Dst, Edge.Flow)); 1217cc2493dSspupyrev } 1227cc2493dSspupyrev return Flow; 1237cc2493dSspupyrev } 1247cc2493dSspupyrev 1257cc2493dSspupyrev /// Get the total flow between a pair of nodes. 1267cc2493dSspupyrev int64_t getFlow(uint64_t Src, uint64_t Dst) const { 1277cc2493dSspupyrev int64_t Flow = 0; 1287cc2493dSspupyrev for (auto &Edge : Edges[Src]) { 1297cc2493dSspupyrev if (Edge.Dst == Dst) { 1307cc2493dSspupyrev Flow += Edge.Flow; 1317cc2493dSspupyrev } 1327cc2493dSspupyrev } 1337cc2493dSspupyrev return Flow; 1347cc2493dSspupyrev } 1357cc2493dSspupyrev 1367cc2493dSspupyrev /// A cost of increasing a block's count by one. 1377cc2493dSspupyrev static constexpr int64_t AuxCostInc = 10; 1387cc2493dSspupyrev /// A cost of decreasing a block's count by one. 1397cc2493dSspupyrev static constexpr int64_t AuxCostDec = 20; 1407cc2493dSspupyrev /// A cost of increasing a count of zero-weight block by one. 1417cc2493dSspupyrev static constexpr int64_t AuxCostIncZero = 11; 1427cc2493dSspupyrev /// A cost of increasing the entry block's count by one. 1437cc2493dSspupyrev static constexpr int64_t AuxCostIncEntry = 40; 1447cc2493dSspupyrev /// A cost of decreasing the entry block's count by one. 1457cc2493dSspupyrev static constexpr int64_t AuxCostDecEntry = 10; 1467cc2493dSspupyrev /// A cost of taking an unlikely jump. 1477cc2493dSspupyrev static constexpr int64_t AuxCostUnlikely = ((int64_t)1) << 20; 1487cc2493dSspupyrev 1497cc2493dSspupyrev private: 1507cc2493dSspupyrev /// Check for existence of an augmenting path with a positive capacity. 1517cc2493dSspupyrev bool findAugmentingPath() { 1527cc2493dSspupyrev // Initialize data structures 1537cc2493dSspupyrev for (auto &Node : Nodes) { 1547cc2493dSspupyrev Node.Distance = INF; 1557cc2493dSspupyrev Node.ParentNode = uint64_t(-1); 1567cc2493dSspupyrev Node.ParentEdgeIndex = uint64_t(-1); 1577cc2493dSspupyrev Node.Taken = false; 1587cc2493dSspupyrev } 1597cc2493dSspupyrev 1607cc2493dSspupyrev std::queue<uint64_t> Queue; 1617cc2493dSspupyrev Queue.push(Source); 1627cc2493dSspupyrev Nodes[Source].Distance = 0; 1637cc2493dSspupyrev Nodes[Source].Taken = true; 1647cc2493dSspupyrev while (!Queue.empty()) { 1657cc2493dSspupyrev uint64_t Src = Queue.front(); 1667cc2493dSspupyrev Queue.pop(); 1677cc2493dSspupyrev Nodes[Src].Taken = false; 1687cc2493dSspupyrev // Although the residual network contains edges with negative costs 1697cc2493dSspupyrev // (in particular, backward edges), it can be shown that there are no 1707cc2493dSspupyrev // negative-weight cycles and the following two invariants are maintained: 1717cc2493dSspupyrev // (i) Dist[Source, V] >= 0 and (ii) Dist[V, Target] >= 0 for all nodes V, 1727cc2493dSspupyrev // where Dist is the length of the shortest path between two nodes. This 1737cc2493dSspupyrev // allows to prune the search-space of the path-finding algorithm using 1747cc2493dSspupyrev // the following early-stop criteria: 1757cc2493dSspupyrev // -- If we find a path with zero-distance from Source to Target, stop the 1767cc2493dSspupyrev // search, as the path is the shortest since Dist[Source, Target] >= 0; 1777cc2493dSspupyrev // -- If we have Dist[Source, V] > Dist[Source, Target], then do not 1787cc2493dSspupyrev // process node V, as it is guaranteed _not_ to be on a shortest path 1797cc2493dSspupyrev // from Source to Target; it follows from inequalities 1807cc2493dSspupyrev // Dist[Source, Target] >= Dist[Source, V] + Dist[V, Target] 1817cc2493dSspupyrev // >= Dist[Source, V] 1827cc2493dSspupyrev if (Nodes[Target].Distance == 0) 1837cc2493dSspupyrev break; 1847cc2493dSspupyrev if (Nodes[Src].Distance > Nodes[Target].Distance) 1857cc2493dSspupyrev continue; 1867cc2493dSspupyrev 1877cc2493dSspupyrev // Process adjacent edges 1887cc2493dSspupyrev for (uint64_t EdgeIdx = 0; EdgeIdx < Edges[Src].size(); EdgeIdx++) { 1897cc2493dSspupyrev auto &Edge = Edges[Src][EdgeIdx]; 1907cc2493dSspupyrev if (Edge.Flow < Edge.Capacity) { 1917cc2493dSspupyrev uint64_t Dst = Edge.Dst; 1927cc2493dSspupyrev int64_t NewDistance = Nodes[Src].Distance + Edge.Cost; 1937cc2493dSspupyrev if (Nodes[Dst].Distance > NewDistance) { 1947cc2493dSspupyrev // Update the distance and the parent node/edge 1957cc2493dSspupyrev Nodes[Dst].Distance = NewDistance; 1967cc2493dSspupyrev Nodes[Dst].ParentNode = Src; 1977cc2493dSspupyrev Nodes[Dst].ParentEdgeIndex = EdgeIdx; 1987cc2493dSspupyrev // Add the node to the queue, if it is not there yet 1997cc2493dSspupyrev if (!Nodes[Dst].Taken) { 2007cc2493dSspupyrev Queue.push(Dst); 2017cc2493dSspupyrev Nodes[Dst].Taken = true; 2027cc2493dSspupyrev } 2037cc2493dSspupyrev } 2047cc2493dSspupyrev } 2057cc2493dSspupyrev } 2067cc2493dSspupyrev } 2077cc2493dSspupyrev 2087cc2493dSspupyrev return Nodes[Target].Distance != INF; 2097cc2493dSspupyrev } 2107cc2493dSspupyrev 2117cc2493dSspupyrev /// Update the current flow along the augmenting path. 2127cc2493dSspupyrev void augmentFlowAlongPath() { 2137cc2493dSspupyrev // Find path capacity 2147cc2493dSspupyrev int64_t PathCapacity = INF; 2157cc2493dSspupyrev uint64_t Now = Target; 2167cc2493dSspupyrev while (Now != Source) { 2177cc2493dSspupyrev uint64_t Pred = Nodes[Now].ParentNode; 2187cc2493dSspupyrev auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 2197cc2493dSspupyrev PathCapacity = std::min(PathCapacity, Edge.Capacity - Edge.Flow); 2207cc2493dSspupyrev Now = Pred; 2217cc2493dSspupyrev } 2227cc2493dSspupyrev 2237cc2493dSspupyrev assert(PathCapacity > 0 && "found incorrect augmenting path"); 2247cc2493dSspupyrev 2257cc2493dSspupyrev // Update the flow along the path 2267cc2493dSspupyrev Now = Target; 2277cc2493dSspupyrev while (Now != Source) { 2287cc2493dSspupyrev uint64_t Pred = Nodes[Now].ParentNode; 2297cc2493dSspupyrev auto &Edge = Edges[Pred][Nodes[Now].ParentEdgeIndex]; 2307cc2493dSspupyrev auto &RevEdge = Edges[Now][Edge.RevEdgeIndex]; 2317cc2493dSspupyrev 2327cc2493dSspupyrev Edge.Flow += PathCapacity; 2337cc2493dSspupyrev RevEdge.Flow -= PathCapacity; 2347cc2493dSspupyrev 2357cc2493dSspupyrev Now = Pred; 2367cc2493dSspupyrev } 2377cc2493dSspupyrev } 2387cc2493dSspupyrev 2397cc2493dSspupyrev /// An node in a flow network. 2407cc2493dSspupyrev struct Node { 2417cc2493dSspupyrev /// The cost of the cheapest path from the source to the current node. 2427cc2493dSspupyrev int64_t Distance; 2437cc2493dSspupyrev /// The node preceding the current one in the path. 2447cc2493dSspupyrev uint64_t ParentNode; 2457cc2493dSspupyrev /// The index of the edge between ParentNode and the current node. 2467cc2493dSspupyrev uint64_t ParentEdgeIndex; 2477cc2493dSspupyrev /// An indicator of whether the current node is in a queue. 2487cc2493dSspupyrev bool Taken; 2497cc2493dSspupyrev }; 2507cc2493dSspupyrev /// An edge in a flow network. 2517cc2493dSspupyrev struct Edge { 2527cc2493dSspupyrev /// The cost of the edge. 2537cc2493dSspupyrev int64_t Cost; 2547cc2493dSspupyrev /// The capacity of the edge. 2557cc2493dSspupyrev int64_t Capacity; 2567cc2493dSspupyrev /// The current flow on the edge. 2577cc2493dSspupyrev int64_t Flow; 2587cc2493dSspupyrev /// The destination node of the edge. 2597cc2493dSspupyrev uint64_t Dst; 2607cc2493dSspupyrev /// The index of the reverse edge between Dst and the current node. 2617cc2493dSspupyrev uint64_t RevEdgeIndex; 2627cc2493dSspupyrev }; 2637cc2493dSspupyrev 2647cc2493dSspupyrev /// The set of network nodes. 2657cc2493dSspupyrev std::vector<Node> Nodes; 2667cc2493dSspupyrev /// The set of network edges. 2677cc2493dSspupyrev std::vector<std::vector<Edge>> Edges; 2687cc2493dSspupyrev /// Source node of the flow. 2697cc2493dSspupyrev uint64_t Source; 2707cc2493dSspupyrev /// Target (sink) node of the flow. 2717cc2493dSspupyrev uint64_t Target; 2727cc2493dSspupyrev }; 2737cc2493dSspupyrev 274*98dd2f9eSspupyrev /// Post-processing adjustment of the control flow. 275*98dd2f9eSspupyrev class FlowAdjuster { 276*98dd2f9eSspupyrev public: 277*98dd2f9eSspupyrev FlowAdjuster(FlowFunction &Func) : Func(Func) { 278*98dd2f9eSspupyrev assert(Func.Blocks[Func.Entry].isEntry() && 279*98dd2f9eSspupyrev "incorrect index of the entry block"); 280*98dd2f9eSspupyrev } 281*98dd2f9eSspupyrev 282*98dd2f9eSspupyrev // Run the post-processing 283*98dd2f9eSspupyrev void run() { 284*98dd2f9eSspupyrev /// We adjust the control flow in a function so as to remove all 285*98dd2f9eSspupyrev /// "isolated" components with positive flow that are unreachable 286*98dd2f9eSspupyrev /// from the entry block. For every such component, we find the shortest 287*98dd2f9eSspupyrev /// path from the entry to an exit passing through the component, and 288*98dd2f9eSspupyrev /// increase the flow by one unit along the path. 289*98dd2f9eSspupyrev joinIsolatedComponents(); 290*98dd2f9eSspupyrev } 291*98dd2f9eSspupyrev 292*98dd2f9eSspupyrev private: 293*98dd2f9eSspupyrev void joinIsolatedComponents() { 294*98dd2f9eSspupyrev // Find blocks that are reachable from the source 295*98dd2f9eSspupyrev auto Visited = std::vector<bool>(NumBlocks(), false); 296*98dd2f9eSspupyrev findReachable(Func.Entry, Visited); 297*98dd2f9eSspupyrev 298*98dd2f9eSspupyrev // Iterate over all non-reachable blocks and adjust their weights 299*98dd2f9eSspupyrev for (uint64_t I = 0; I < NumBlocks(); I++) { 300*98dd2f9eSspupyrev auto &Block = Func.Blocks[I]; 301*98dd2f9eSspupyrev if (Block.Flow > 0 && !Visited[I]) { 302*98dd2f9eSspupyrev // Find a path from the entry to an exit passing through the block I 303*98dd2f9eSspupyrev auto Path = findShortestPath(I); 304*98dd2f9eSspupyrev // Increase the flow along the path 305*98dd2f9eSspupyrev assert(Path.size() > 0 && Path[0]->Source == Func.Entry && 306*98dd2f9eSspupyrev "incorrectly computed path adjusting control flow"); 307*98dd2f9eSspupyrev Func.Blocks[Func.Entry].Flow += 1; 308*98dd2f9eSspupyrev for (auto &Jump : Path) { 309*98dd2f9eSspupyrev Jump->Flow += 1; 310*98dd2f9eSspupyrev Func.Blocks[Jump->Target].Flow += 1; 311*98dd2f9eSspupyrev // Update reachability 312*98dd2f9eSspupyrev findReachable(Jump->Target, Visited); 313*98dd2f9eSspupyrev } 314*98dd2f9eSspupyrev } 315*98dd2f9eSspupyrev } 316*98dd2f9eSspupyrev } 317*98dd2f9eSspupyrev 318*98dd2f9eSspupyrev /// Run bfs from a given block along the jumps with a positive flow and mark 319*98dd2f9eSspupyrev /// all reachable blocks. 320*98dd2f9eSspupyrev void findReachable(uint64_t Src, std::vector<bool> &Visited) { 321*98dd2f9eSspupyrev if (Visited[Src]) 322*98dd2f9eSspupyrev return; 323*98dd2f9eSspupyrev std::queue<uint64_t> Queue; 324*98dd2f9eSspupyrev Queue.push(Src); 325*98dd2f9eSspupyrev Visited[Src] = true; 326*98dd2f9eSspupyrev while (!Queue.empty()) { 327*98dd2f9eSspupyrev Src = Queue.front(); 328*98dd2f9eSspupyrev Queue.pop(); 329*98dd2f9eSspupyrev for (auto Jump : Func.Blocks[Src].SuccJumps) { 330*98dd2f9eSspupyrev uint64_t Dst = Jump->Target; 331*98dd2f9eSspupyrev if (Jump->Flow > 0 && !Visited[Dst]) { 332*98dd2f9eSspupyrev Queue.push(Dst); 333*98dd2f9eSspupyrev Visited[Dst] = true; 334*98dd2f9eSspupyrev } 335*98dd2f9eSspupyrev } 336*98dd2f9eSspupyrev } 337*98dd2f9eSspupyrev } 338*98dd2f9eSspupyrev 339*98dd2f9eSspupyrev /// Find the shortest path from the entry block to an exit block passing 340*98dd2f9eSspupyrev /// through a given block. 341*98dd2f9eSspupyrev std::vector<FlowJump *> findShortestPath(uint64_t BlockIdx) { 342*98dd2f9eSspupyrev // A path from the entry block to BlockIdx 343*98dd2f9eSspupyrev auto ForwardPath = findShortestPath(Func.Entry, BlockIdx); 344*98dd2f9eSspupyrev // A path from BlockIdx to an exit block 345*98dd2f9eSspupyrev auto BackwardPath = findShortestPath(BlockIdx, AnyExitBlock); 346*98dd2f9eSspupyrev 347*98dd2f9eSspupyrev // Concatenate the two paths 348*98dd2f9eSspupyrev std::vector<FlowJump *> Result; 349*98dd2f9eSspupyrev Result.insert(Result.end(), ForwardPath.begin(), ForwardPath.end()); 350*98dd2f9eSspupyrev Result.insert(Result.end(), BackwardPath.begin(), BackwardPath.end()); 351*98dd2f9eSspupyrev return Result; 352*98dd2f9eSspupyrev } 353*98dd2f9eSspupyrev 354*98dd2f9eSspupyrev /// Apply the Dijkstra algorithm to find the shortest path from a given 355*98dd2f9eSspupyrev /// Source to a given Target block. 356*98dd2f9eSspupyrev /// If Target == -1, then the path ends at an exit block. 357*98dd2f9eSspupyrev std::vector<FlowJump *> findShortestPath(uint64_t Source, uint64_t Target) { 358*98dd2f9eSspupyrev // Quit early, if possible 359*98dd2f9eSspupyrev if (Source == Target) 360*98dd2f9eSspupyrev return std::vector<FlowJump *>(); 361*98dd2f9eSspupyrev if (Func.Blocks[Source].isExit() && Target == AnyExitBlock) 362*98dd2f9eSspupyrev return std::vector<FlowJump *>(); 363*98dd2f9eSspupyrev 364*98dd2f9eSspupyrev // Initialize data structures 365*98dd2f9eSspupyrev auto Distance = std::vector<int64_t>(NumBlocks(), INF); 366*98dd2f9eSspupyrev auto Parent = std::vector<FlowJump *>(NumBlocks(), nullptr); 367*98dd2f9eSspupyrev Distance[Source] = 0; 368*98dd2f9eSspupyrev std::set<std::pair<uint64_t, uint64_t>> Queue; 369*98dd2f9eSspupyrev Queue.insert(std::make_pair(Distance[Source], Source)); 370*98dd2f9eSspupyrev 371*98dd2f9eSspupyrev // Run the Dijkstra algorithm 372*98dd2f9eSspupyrev while (!Queue.empty()) { 373*98dd2f9eSspupyrev uint64_t Src = Queue.begin()->second; 374*98dd2f9eSspupyrev Queue.erase(Queue.begin()); 375*98dd2f9eSspupyrev // If we found a solution, quit early 376*98dd2f9eSspupyrev if (Src == Target || 377*98dd2f9eSspupyrev (Func.Blocks[Src].isExit() && Target == AnyExitBlock)) 378*98dd2f9eSspupyrev break; 379*98dd2f9eSspupyrev 380*98dd2f9eSspupyrev for (auto Jump : Func.Blocks[Src].SuccJumps) { 381*98dd2f9eSspupyrev uint64_t Dst = Jump->Target; 382*98dd2f9eSspupyrev int64_t JumpDist = jumpDistance(Jump); 383*98dd2f9eSspupyrev if (Distance[Dst] > Distance[Src] + JumpDist) { 384*98dd2f9eSspupyrev Queue.erase(std::make_pair(Distance[Dst], Dst)); 385*98dd2f9eSspupyrev 386*98dd2f9eSspupyrev Distance[Dst] = Distance[Src] + JumpDist; 387*98dd2f9eSspupyrev Parent[Dst] = Jump; 388*98dd2f9eSspupyrev 389*98dd2f9eSspupyrev Queue.insert(std::make_pair(Distance[Dst], Dst)); 390*98dd2f9eSspupyrev } 391*98dd2f9eSspupyrev } 392*98dd2f9eSspupyrev } 393*98dd2f9eSspupyrev // If Target is not provided, find the closest exit block 394*98dd2f9eSspupyrev if (Target == AnyExitBlock) { 395*98dd2f9eSspupyrev for (uint64_t I = 0; I < NumBlocks(); I++) { 396*98dd2f9eSspupyrev if (Func.Blocks[I].isExit() && Parent[I] != nullptr) { 397*98dd2f9eSspupyrev if (Target == AnyExitBlock || Distance[Target] > Distance[I]) { 398*98dd2f9eSspupyrev Target = I; 399*98dd2f9eSspupyrev } 400*98dd2f9eSspupyrev } 401*98dd2f9eSspupyrev } 402*98dd2f9eSspupyrev } 403*98dd2f9eSspupyrev assert(Parent[Target] != nullptr && "a path does not exist"); 404*98dd2f9eSspupyrev 405*98dd2f9eSspupyrev // Extract the constructed path 406*98dd2f9eSspupyrev std::vector<FlowJump *> Result; 407*98dd2f9eSspupyrev uint64_t Now = Target; 408*98dd2f9eSspupyrev while (Now != Source) { 409*98dd2f9eSspupyrev assert(Now == Parent[Now]->Target && "incorrect parent jump"); 410*98dd2f9eSspupyrev Result.push_back(Parent[Now]); 411*98dd2f9eSspupyrev Now = Parent[Now]->Source; 412*98dd2f9eSspupyrev } 413*98dd2f9eSspupyrev // Reverse the path, since it is extracted from Target to Source 414*98dd2f9eSspupyrev std::reverse(Result.begin(), Result.end()); 415*98dd2f9eSspupyrev return Result; 416*98dd2f9eSspupyrev } 417*98dd2f9eSspupyrev 418*98dd2f9eSspupyrev /// A distance of a path for a given jump. 419*98dd2f9eSspupyrev /// In order to incite the path to use blocks/jumps with large positive flow, 420*98dd2f9eSspupyrev /// and avoid changing branch probability of outgoing edges drastically, 421*98dd2f9eSspupyrev /// set the distance as follows: 422*98dd2f9eSspupyrev /// if Jump.Flow > 0, then distance = max(100 - Jump->Flow, 0) 423*98dd2f9eSspupyrev /// if Block.Weight > 0, then distance = 1 424*98dd2f9eSspupyrev /// otherwise distance >> 1 425*98dd2f9eSspupyrev int64_t jumpDistance(FlowJump *Jump) const { 426*98dd2f9eSspupyrev int64_t BaseDistance = 100; 427*98dd2f9eSspupyrev if (Jump->IsUnlikely) 428*98dd2f9eSspupyrev return MinCostMaxFlow::AuxCostUnlikely; 429*98dd2f9eSspupyrev if (Jump->Flow > 0) 430*98dd2f9eSspupyrev return std::max(BaseDistance - (int64_t)Jump->Flow, (int64_t)0); 431*98dd2f9eSspupyrev if (Func.Blocks[Jump->Target].Weight > 0) 432*98dd2f9eSspupyrev return BaseDistance; 433*98dd2f9eSspupyrev return BaseDistance * (NumBlocks() + 1); 434*98dd2f9eSspupyrev }; 435*98dd2f9eSspupyrev 436*98dd2f9eSspupyrev uint64_t NumBlocks() const { return Func.Blocks.size(); } 437*98dd2f9eSspupyrev 438*98dd2f9eSspupyrev /// A constant indicating an arbitrary exit block of a function. 439*98dd2f9eSspupyrev static constexpr uint64_t AnyExitBlock = uint64_t(-1); 440*98dd2f9eSspupyrev 441*98dd2f9eSspupyrev /// The function. 442*98dd2f9eSspupyrev FlowFunction &Func; 443*98dd2f9eSspupyrev }; 444*98dd2f9eSspupyrev 4457cc2493dSspupyrev /// Initializing flow network for a given function. 4467cc2493dSspupyrev /// 4477cc2493dSspupyrev /// Every block is split into three nodes that are responsible for (i) an 4487cc2493dSspupyrev /// incoming flow, (ii) an outgoing flow, and (iii) penalizing an increase or 4497cc2493dSspupyrev /// reduction of the block weight. 4507cc2493dSspupyrev void initializeNetwork(MinCostMaxFlow &Network, FlowFunction &Func) { 4517cc2493dSspupyrev uint64_t NumBlocks = Func.Blocks.size(); 4527cc2493dSspupyrev assert(NumBlocks > 1 && "Too few blocks in a function"); 4537cc2493dSspupyrev LLVM_DEBUG(dbgs() << "Initializing profi for " << NumBlocks << " blocks\n"); 4547cc2493dSspupyrev 4557cc2493dSspupyrev // Pre-process data: make sure the entry weight is at least 1 4567cc2493dSspupyrev if (Func.Blocks[Func.Entry].Weight == 0) { 4577cc2493dSspupyrev Func.Blocks[Func.Entry].Weight = 1; 4587cc2493dSspupyrev } 4597cc2493dSspupyrev // Introducing dummy source/sink pairs to allow flow circulation. 4607cc2493dSspupyrev // The nodes corresponding to blocks of Func have indicies in the range 4617cc2493dSspupyrev // [0..3 * NumBlocks); the dummy nodes are indexed by the next four values. 4627cc2493dSspupyrev uint64_t S = 3 * NumBlocks; 4637cc2493dSspupyrev uint64_t T = S + 1; 4647cc2493dSspupyrev uint64_t S1 = S + 2; 4657cc2493dSspupyrev uint64_t T1 = S + 3; 4667cc2493dSspupyrev 4677cc2493dSspupyrev Network.initialize(3 * NumBlocks + 4, S1, T1); 4687cc2493dSspupyrev 4697cc2493dSspupyrev // Create three nodes for every block of the function 4707cc2493dSspupyrev for (uint64_t B = 0; B < NumBlocks; B++) { 4717cc2493dSspupyrev auto &Block = Func.Blocks[B]; 4727cc2493dSspupyrev assert((!Block.UnknownWeight || Block.Weight == 0 || Block.isEntry()) && 4737cc2493dSspupyrev "non-zero weight of a block w/o weight except for an entry"); 4747cc2493dSspupyrev 4757cc2493dSspupyrev // Split every block into two nodes 4767cc2493dSspupyrev uint64_t Bin = 3 * B; 4777cc2493dSspupyrev uint64_t Bout = 3 * B + 1; 4787cc2493dSspupyrev uint64_t Baux = 3 * B + 2; 4797cc2493dSspupyrev if (Block.Weight > 0) { 4807cc2493dSspupyrev Network.addEdge(S1, Bout, Block.Weight, 0); 4817cc2493dSspupyrev Network.addEdge(Bin, T1, Block.Weight, 0); 4827cc2493dSspupyrev } 4837cc2493dSspupyrev 4847cc2493dSspupyrev // Edges from S and to T 4857cc2493dSspupyrev assert((!Block.isEntry() || !Block.isExit()) && 4867cc2493dSspupyrev "a block cannot be an entry and an exit"); 4877cc2493dSspupyrev if (Block.isEntry()) { 4887cc2493dSspupyrev Network.addEdge(S, Bin, 0); 4897cc2493dSspupyrev } else if (Block.isExit()) { 4907cc2493dSspupyrev Network.addEdge(Bout, T, 0); 4917cc2493dSspupyrev } 4927cc2493dSspupyrev 4937cc2493dSspupyrev // An auxiliary node to allow increase/reduction of block counts: 4947cc2493dSspupyrev // We assume that decreasing block counts is more expensive than increasing, 4957cc2493dSspupyrev // and thus, setting separate costs here. In the future we may want to tune 4967cc2493dSspupyrev // the relative costs so as to maximize the quality of generated profiles. 4977cc2493dSspupyrev int64_t AuxCostInc = MinCostMaxFlow::AuxCostInc; 4987cc2493dSspupyrev int64_t AuxCostDec = MinCostMaxFlow::AuxCostDec; 4997cc2493dSspupyrev if (Block.UnknownWeight) { 5007cc2493dSspupyrev // Do not penalize changing weights of blocks w/o known profile count 5017cc2493dSspupyrev AuxCostInc = 0; 5027cc2493dSspupyrev AuxCostDec = 0; 5037cc2493dSspupyrev } else { 5047cc2493dSspupyrev // Increasing the count for "cold" blocks with zero initial count is more 5057cc2493dSspupyrev // expensive than for "hot" ones 5067cc2493dSspupyrev if (Block.Weight == 0) { 5077cc2493dSspupyrev AuxCostInc = MinCostMaxFlow::AuxCostIncZero; 5087cc2493dSspupyrev } 5097cc2493dSspupyrev // Modifying the count of the entry block is expensive 5107cc2493dSspupyrev if (Block.isEntry()) { 5117cc2493dSspupyrev AuxCostInc = MinCostMaxFlow::AuxCostIncEntry; 5127cc2493dSspupyrev AuxCostDec = MinCostMaxFlow::AuxCostDecEntry; 5137cc2493dSspupyrev } 5147cc2493dSspupyrev } 5157cc2493dSspupyrev // For blocks with self-edges, do not penalize a reduction of the count, 5167cc2493dSspupyrev // as all of the increase can be attributed to the self-edge 5177cc2493dSspupyrev if (Block.HasSelfEdge) { 5187cc2493dSspupyrev AuxCostDec = 0; 5197cc2493dSspupyrev } 5207cc2493dSspupyrev 5217cc2493dSspupyrev Network.addEdge(Bin, Baux, AuxCostInc); 5227cc2493dSspupyrev Network.addEdge(Baux, Bout, AuxCostInc); 5237cc2493dSspupyrev if (Block.Weight > 0) { 5247cc2493dSspupyrev Network.addEdge(Bout, Baux, AuxCostDec); 5257cc2493dSspupyrev Network.addEdge(Baux, Bin, AuxCostDec); 5267cc2493dSspupyrev } 5277cc2493dSspupyrev } 5287cc2493dSspupyrev 5297cc2493dSspupyrev // Creating edges for every jump 5307cc2493dSspupyrev for (auto &Jump : Func.Jumps) { 5317cc2493dSspupyrev uint64_t Src = Jump.Source; 5327cc2493dSspupyrev uint64_t Dst = Jump.Target; 5337cc2493dSspupyrev if (Src != Dst) { 5347cc2493dSspupyrev uint64_t SrcOut = 3 * Src + 1; 5357cc2493dSspupyrev uint64_t DstIn = 3 * Dst; 5367cc2493dSspupyrev uint64_t Cost = Jump.IsUnlikely ? MinCostMaxFlow::AuxCostUnlikely : 0; 5377cc2493dSspupyrev Network.addEdge(SrcOut, DstIn, Cost); 5387cc2493dSspupyrev } 5397cc2493dSspupyrev } 5407cc2493dSspupyrev 5417cc2493dSspupyrev // Make sure we have a valid flow circulation 5427cc2493dSspupyrev Network.addEdge(T, S, 0); 5437cc2493dSspupyrev } 5447cc2493dSspupyrev 5457cc2493dSspupyrev /// Extract resulting block and edge counts from the flow network. 5467cc2493dSspupyrev void extractWeights(MinCostMaxFlow &Network, FlowFunction &Func) { 5477cc2493dSspupyrev uint64_t NumBlocks = Func.Blocks.size(); 5487cc2493dSspupyrev 5497cc2493dSspupyrev // Extract resulting block counts 5507cc2493dSspupyrev for (uint64_t Src = 0; Src < NumBlocks; Src++) { 5517cc2493dSspupyrev auto &Block = Func.Blocks[Src]; 5527cc2493dSspupyrev uint64_t SrcOut = 3 * Src + 1; 5537cc2493dSspupyrev int64_t Flow = 0; 5547cc2493dSspupyrev for (auto &Adj : Network.getFlow(SrcOut)) { 5557cc2493dSspupyrev uint64_t DstIn = Adj.first; 5567cc2493dSspupyrev int64_t DstFlow = Adj.second; 5577cc2493dSspupyrev bool IsAuxNode = (DstIn < 3 * NumBlocks && DstIn % 3 == 2); 5587cc2493dSspupyrev if (!IsAuxNode || Block.HasSelfEdge) { 5597cc2493dSspupyrev Flow += DstFlow; 5607cc2493dSspupyrev } 5617cc2493dSspupyrev } 5627cc2493dSspupyrev Block.Flow = Flow; 5637cc2493dSspupyrev assert(Flow >= 0 && "negative block flow"); 5647cc2493dSspupyrev } 5657cc2493dSspupyrev 5667cc2493dSspupyrev // Extract resulting jump counts 5677cc2493dSspupyrev for (auto &Jump : Func.Jumps) { 5687cc2493dSspupyrev uint64_t Src = Jump.Source; 5697cc2493dSspupyrev uint64_t Dst = Jump.Target; 5707cc2493dSspupyrev int64_t Flow = 0; 5717cc2493dSspupyrev if (Src != Dst) { 5727cc2493dSspupyrev uint64_t SrcOut = 3 * Src + 1; 5737cc2493dSspupyrev uint64_t DstIn = 3 * Dst; 5747cc2493dSspupyrev Flow = Network.getFlow(SrcOut, DstIn); 5757cc2493dSspupyrev } else { 5767cc2493dSspupyrev uint64_t SrcOut = 3 * Src + 1; 5777cc2493dSspupyrev uint64_t SrcAux = 3 * Src + 2; 5787cc2493dSspupyrev int64_t AuxFlow = Network.getFlow(SrcOut, SrcAux); 5797cc2493dSspupyrev if (AuxFlow > 0) 5807cc2493dSspupyrev Flow = AuxFlow; 5817cc2493dSspupyrev } 5827cc2493dSspupyrev Jump.Flow = Flow; 5837cc2493dSspupyrev assert(Flow >= 0 && "negative jump flow"); 5847cc2493dSspupyrev } 5857cc2493dSspupyrev } 5867cc2493dSspupyrev 5877cc2493dSspupyrev #ifndef NDEBUG 5887cc2493dSspupyrev /// Verify that the computed flow values satisfy flow conservation rules 5897cc2493dSspupyrev void verifyWeights(const FlowFunction &Func) { 5907cc2493dSspupyrev const uint64_t NumBlocks = Func.Blocks.size(); 5917cc2493dSspupyrev auto InFlow = std::vector<uint64_t>(NumBlocks, 0); 5927cc2493dSspupyrev auto OutFlow = std::vector<uint64_t>(NumBlocks, 0); 5937cc2493dSspupyrev for (auto &Jump : Func.Jumps) { 5947cc2493dSspupyrev InFlow[Jump.Target] += Jump.Flow; 5957cc2493dSspupyrev OutFlow[Jump.Source] += Jump.Flow; 5967cc2493dSspupyrev } 5977cc2493dSspupyrev 5987cc2493dSspupyrev uint64_t TotalInFlow = 0; 5997cc2493dSspupyrev uint64_t TotalOutFlow = 0; 6007cc2493dSspupyrev for (uint64_t I = 0; I < NumBlocks; I++) { 6017cc2493dSspupyrev auto &Block = Func.Blocks[I]; 6027cc2493dSspupyrev if (Block.isEntry()) { 6037cc2493dSspupyrev TotalInFlow += Block.Flow; 6047cc2493dSspupyrev assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 6057cc2493dSspupyrev } else if (Block.isExit()) { 6067cc2493dSspupyrev TotalOutFlow += Block.Flow; 6077cc2493dSspupyrev assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 6087cc2493dSspupyrev } else { 6097cc2493dSspupyrev assert(Block.Flow == OutFlow[I] && "incorrectly computed control flow"); 6107cc2493dSspupyrev assert(Block.Flow == InFlow[I] && "incorrectly computed control flow"); 6117cc2493dSspupyrev } 6127cc2493dSspupyrev } 6137cc2493dSspupyrev assert(TotalInFlow == TotalOutFlow && "incorrectly computed control flow"); 614*98dd2f9eSspupyrev 615*98dd2f9eSspupyrev // Verify that there are no isolated flow components 616*98dd2f9eSspupyrev // One could modify FlowFunction to hold edges indexed by the sources, which 617*98dd2f9eSspupyrev // will avoid a creation of the object 618*98dd2f9eSspupyrev auto PositiveFlowEdges = std::vector<std::vector<uint64_t>>(NumBlocks); 619*98dd2f9eSspupyrev for (auto &Jump : Func.Jumps) { 620*98dd2f9eSspupyrev if (Jump.Flow > 0) { 621*98dd2f9eSspupyrev PositiveFlowEdges[Jump.Source].push_back(Jump.Target); 622*98dd2f9eSspupyrev } 623*98dd2f9eSspupyrev } 624*98dd2f9eSspupyrev 625*98dd2f9eSspupyrev // Run bfs from the source along edges with positive flow 626*98dd2f9eSspupyrev std::queue<uint64_t> Queue; 627*98dd2f9eSspupyrev auto Visited = std::vector<bool>(NumBlocks, false); 628*98dd2f9eSspupyrev Queue.push(Func.Entry); 629*98dd2f9eSspupyrev Visited[Func.Entry] = true; 630*98dd2f9eSspupyrev while (!Queue.empty()) { 631*98dd2f9eSspupyrev uint64_t Src = Queue.front(); 632*98dd2f9eSspupyrev Queue.pop(); 633*98dd2f9eSspupyrev for (uint64_t Dst : PositiveFlowEdges[Src]) { 634*98dd2f9eSspupyrev if (!Visited[Dst]) { 635*98dd2f9eSspupyrev Queue.push(Dst); 636*98dd2f9eSspupyrev Visited[Dst] = true; 637*98dd2f9eSspupyrev } 638*98dd2f9eSspupyrev } 639*98dd2f9eSspupyrev } 640*98dd2f9eSspupyrev 641*98dd2f9eSspupyrev // Verify that every block that has a positive flow is reached from the source 642*98dd2f9eSspupyrev // along edges with a positive flow 643*98dd2f9eSspupyrev for (uint64_t I = 0; I < NumBlocks; I++) { 644*98dd2f9eSspupyrev auto &Block = Func.Blocks[I]; 645*98dd2f9eSspupyrev assert((Visited[I] || Block.Flow == 0) && "an isolated flow component"); 646*98dd2f9eSspupyrev } 6477cc2493dSspupyrev } 6487cc2493dSspupyrev #endif 6497cc2493dSspupyrev 6507cc2493dSspupyrev } // end of anonymous namespace 6517cc2493dSspupyrev 6527cc2493dSspupyrev /// Apply the profile inference algorithm for a given flow function 6537cc2493dSspupyrev void llvm::applyFlowInference(FlowFunction &Func) { 6547cc2493dSspupyrev // Create and apply an inference network model 6557cc2493dSspupyrev auto InferenceNetwork = MinCostMaxFlow(); 6567cc2493dSspupyrev initializeNetwork(InferenceNetwork, Func); 6577cc2493dSspupyrev InferenceNetwork.run(); 6587cc2493dSspupyrev 6597cc2493dSspupyrev // Extract flow values for every block and every edge 6607cc2493dSspupyrev extractWeights(InferenceNetwork, Func); 6617cc2493dSspupyrev 662*98dd2f9eSspupyrev // Post-processing adjustments to the flow 663*98dd2f9eSspupyrev auto Adjuster = FlowAdjuster(Func); 664*98dd2f9eSspupyrev Adjuster.run(); 665*98dd2f9eSspupyrev 6667cc2493dSspupyrev #ifndef NDEBUG 6677cc2493dSspupyrev // Verify the result 6687cc2493dSspupyrev verifyWeights(Func); 6697cc2493dSspupyrev #endif 6707cc2493dSspupyrev } 671