12f09f445SMaksim Panchenko //===- bolt/Passes/ReorderAlgorithm.cpp - Basic block reordering ----------===//
2a34c753fSRafael Auler //
3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information.
5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a34c753fSRafael Auler //
7a34c753fSRafael Auler //===----------------------------------------------------------------------===//
8a34c753fSRafael Auler //
92f09f445SMaksim Panchenko // This file implements classes used by several basic block reordering
102f09f445SMaksim Panchenko // algorithms.
11a34c753fSRafael Auler //
12a34c753fSRafael Auler //===----------------------------------------------------------------------===//
13a34c753fSRafael Auler 
14a34c753fSRafael Auler #include "bolt/Passes/ReorderAlgorithm.h"
15a34c753fSRafael Auler #include "bolt/Core/BinaryBasicBlock.h"
16a34c753fSRafael Auler #include "bolt/Core/BinaryFunction.h"
17a34c753fSRafael Auler #include "llvm/Support/CommandLine.h"
18a34c753fSRafael Auler #include <queue>
19a34c753fSRafael Auler #include <random>
20a34c753fSRafael Auler #include <stack>
21a34c753fSRafael Auler 
22a34c753fSRafael Auler #undef  DEBUG_TYPE
23a34c753fSRafael Auler #define DEBUG_TYPE "bolt"
24a34c753fSRafael Auler 
25a34c753fSRafael Auler using namespace llvm;
26a34c753fSRafael Auler using namespace bolt;
27a34c753fSRafael Auler 
28a34c753fSRafael Auler namespace opts {
29a34c753fSRafael Auler 
30a34c753fSRafael Auler extern cl::OptionCategory BoltOptCategory;
31a34c753fSRafael Auler extern cl::opt<bool> NoThreads;
32a34c753fSRafael Auler 
33a34c753fSRafael Auler static cl::opt<unsigned> ColdThreshold(
34a34c753fSRafael Auler     "cold-threshold",
35a34c753fSRafael Auler     cl::desc("tenths of percents of main entry frequency to use as a "
36a34c753fSRafael Auler              "threshold when evaluating whether a basic block is cold "
37a34c753fSRafael Auler              "(0 means it is only considered cold if the block has zero "
38a34c753fSRafael Auler              "samples). Default: 0 "),
39a34c753fSRafael Auler     cl::init(0), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
40a34c753fSRafael Auler 
41b92436efSFangrui Song static cl::opt<bool> PrintClusters("print-clusters", cl::desc("print clusters"),
42b92436efSFangrui Song                                    cl::Hidden, cl::cat(BoltOptCategory));
43a34c753fSRafael Auler 
44b92436efSFangrui Song cl::opt<uint32_t> RandomSeed("bolt-seed", cl::desc("seed for randomization"),
45b92436efSFangrui Song                              cl::init(42), cl::Hidden,
46a34c753fSRafael Auler                              cl::cat(BoltOptCategory));
47a34c753fSRafael Auler 
48a34c753fSRafael Auler } // namespace opts
49a34c753fSRafael Auler 
50a34c753fSRafael Auler namespace {
51a34c753fSRafael Auler 
hashCombine(size_t & Seed,const T & Val)5240c2e0faSMaksim Panchenko template <class T> inline void hashCombine(size_t &Seed, const T &Val) {
53a34c753fSRafael Auler   std::hash<T> Hasher;
54a34c753fSRafael Auler   Seed ^= Hasher(Val) + 0x9e3779b9 + (Seed << 6) + (Seed >> 2);
55a34c753fSRafael Auler }
56a34c753fSRafael Auler 
5740c2e0faSMaksim Panchenko template <typename A, typename B> struct HashPair {
operator ()__anon4595c24e0111::HashPair58a34c753fSRafael Auler   size_t operator()(const std::pair<A, B> &Val) const {
59a34c753fSRafael Auler     std::hash<A> Hasher;
60a34c753fSRafael Auler     size_t Seed = Hasher(Val.first);
61a34c753fSRafael Auler     hashCombine(Seed, Val.second);
62a34c753fSRafael Auler     return Seed;
63a34c753fSRafael Auler   }
64a34c753fSRafael Auler };
65a34c753fSRafael Auler 
6640c2e0faSMaksim Panchenko } // namespace
67a34c753fSRafael Auler 
computeClusterAverageFrequency(const BinaryContext & BC)68a34c753fSRafael Auler void ClusterAlgorithm::computeClusterAverageFrequency(const BinaryContext &BC) {
69a34c753fSRafael Auler   // Create a separate MCCodeEmitter to allow lock-free execution
70a34c753fSRafael Auler   BinaryContext::IndependentCodeEmitter Emitter;
71f92ab6afSAmir Ayupov   if (!opts::NoThreads)
72a34c753fSRafael Auler     Emitter = BC.createIndependentMCCodeEmitter();
73a34c753fSRafael Auler 
74a34c753fSRafael Auler   AvgFreq.resize(Clusters.size(), 0.0);
75a34c753fSRafael Auler   for (uint32_t I = 0, E = Clusters.size(); I < E; ++I) {
76a34c753fSRafael Auler     double Freq = 0.0;
77a34c753fSRafael Auler     uint64_t ClusterSize = 0;
78a34c753fSRafael Auler     for (BinaryBasicBlock *BB : Clusters[I]) {
79a34c753fSRafael Auler       if (BB->getNumNonPseudos() > 0) {
80a34c753fSRafael Auler         Freq += BB->getExecutionCount();
81a34c753fSRafael Auler         // Estimate the size of a block in bytes at run time
82a34c753fSRafael Auler         // NOTE: This might be inaccurate
83a34c753fSRafael Auler         ClusterSize += BB->estimateSize(Emitter.MCE.get());
84a34c753fSRafael Auler       }
85a34c753fSRafael Auler     }
86a34c753fSRafael Auler     AvgFreq[I] = ClusterSize == 0 ? 0 : Freq / ClusterSize;
87a34c753fSRafael Auler   }
88a34c753fSRafael Auler }
89a34c753fSRafael Auler 
printClusters() const90a34c753fSRafael Auler void ClusterAlgorithm::printClusters() const {
91a34c753fSRafael Auler   for (uint32_t I = 0, E = Clusters.size(); I < E; ++I) {
92a34c753fSRafael Auler     errs() << "Cluster number " << I;
93a34c753fSRafael Auler     if (AvgFreq.size() == Clusters.size())
94a34c753fSRafael Auler       errs() << " (frequency: " << AvgFreq[I] << ")";
95a34c753fSRafael Auler     errs() << " : ";
96a34c753fSRafael Auler     const char *Sep = "";
97a34c753fSRafael Auler     for (BinaryBasicBlock *BB : Clusters[I]) {
98a34c753fSRafael Auler       errs() << Sep << BB->getName();
99a34c753fSRafael Auler       Sep = ", ";
100a34c753fSRafael Auler     }
101a34c753fSRafael Auler     errs() << "\n";
102a34c753fSRafael Auler   }
103a34c753fSRafael Auler }
104a34c753fSRafael Auler 
reset()105a34c753fSRafael Auler void ClusterAlgorithm::reset() {
106a34c753fSRafael Auler   Clusters.clear();
107a34c753fSRafael Auler   ClusterEdges.clear();
108a34c753fSRafael Auler   AvgFreq.clear();
109a34c753fSRafael Auler }
110a34c753fSRafael Auler 
print(raw_ostream & OS) const111a34c753fSRafael Auler void GreedyClusterAlgorithm::EdgeTy::print(raw_ostream &OS) const {
112a34c753fSRafael Auler   OS << Src->getName() << " -> " << Dst->getName() << ", count: " << Count;
113a34c753fSRafael Auler }
114a34c753fSRafael Auler 
operator ()(const EdgeTy & E) const115a34c753fSRafael Auler size_t GreedyClusterAlgorithm::EdgeHash::operator()(const EdgeTy &E) const {
116a34c753fSRafael Auler   HashPair<const BinaryBasicBlock *, const BinaryBasicBlock *> Hasher;
117a34c753fSRafael Auler   return Hasher(std::make_pair(E.Src, E.Dst));
118a34c753fSRafael Auler }
119a34c753fSRafael Auler 
operator ()(const EdgeTy & A,const EdgeTy & B) const12040c2e0faSMaksim Panchenko bool GreedyClusterAlgorithm::EdgeEqual::operator()(const EdgeTy &A,
12140c2e0faSMaksim Panchenko                                                    const EdgeTy &B) const {
122a34c753fSRafael Auler   return A.Src == B.Src && A.Dst == B.Dst;
123a34c753fSRafael Auler }
124a34c753fSRafael Auler 
clusterBasicBlocks(const BinaryFunction & BF,bool ComputeEdges)125a34c753fSRafael Auler void GreedyClusterAlgorithm::clusterBasicBlocks(const BinaryFunction &BF,
126a34c753fSRafael Auler                                                 bool ComputeEdges) {
127a34c753fSRafael Auler   reset();
128a34c753fSRafael Auler 
129a34c753fSRafael Auler   // Greedy heuristic implementation for the TSP, applied to BB layout. Try to
130a34c753fSRafael Auler   // maximize weight during a path traversing all BBs. In this way, we will
131a34c753fSRafael Auler   // convert the hottest branches into fall-throughs.
132a34c753fSRafael Auler 
133a34c753fSRafael Auler   // This is the queue of edges from which we will pop edges and use them to
134a34c753fSRafael Auler   // cluster basic blocks in a greedy fashion.
135a34c753fSRafael Auler   std::vector<EdgeTy> Queue;
136a34c753fSRafael Auler 
137a34c753fSRafael Auler   // Initialize inter-cluster weights.
138a34c753fSRafael Auler   if (ComputeEdges)
139*8477bc67SFabian Parzefall     ClusterEdges.resize(BF.getLayout().block_size());
140a34c753fSRafael Auler 
141a34c753fSRafael Auler   // Initialize clusters and edge queue.
142*8477bc67SFabian Parzefall   for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
143a34c753fSRafael Auler     // Create a cluster for this BB.
144a34c753fSRafael Auler     uint32_t I = Clusters.size();
145a34c753fSRafael Auler     Clusters.emplace_back();
146a34c753fSRafael Auler     std::vector<BinaryBasicBlock *> &Cluster = Clusters.back();
147a34c753fSRafael Auler     Cluster.push_back(BB);
148a34c753fSRafael Auler     BBToClusterMap[BB] = I;
149a34c753fSRafael Auler     // Populate priority queue with edges.
150a34c753fSRafael Auler     auto BI = BB->branch_info_begin();
151a34c753fSRafael Auler     for (BinaryBasicBlock *&I : BB->successors()) {
152a34c753fSRafael Auler       assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
153a34c753fSRafael Auler              "attempted reordering blocks of function with no profile data");
154a34c753fSRafael Auler       Queue.emplace_back(EdgeTy(BB, I, BI->Count));
155a34c753fSRafael Auler       ++BI;
156a34c753fSRafael Auler     }
157a34c753fSRafael Auler   }
158a34c753fSRafael Auler   // Sort and adjust the edge queue.
159a34c753fSRafael Auler   initQueue(Queue, BF);
160a34c753fSRafael Auler 
161a34c753fSRafael Auler   // Grow clusters in a greedy fashion.
162a34c753fSRafael Auler   while (!Queue.empty()) {
163a34c753fSRafael Auler     EdgeTy E = Queue.back();
164a34c753fSRafael Auler     Queue.pop_back();
165a34c753fSRafael Auler 
166a34c753fSRafael Auler     const BinaryBasicBlock *SrcBB = E.Src;
167a34c753fSRafael Auler     const BinaryBasicBlock *DstBB = E.Dst;
168a34c753fSRafael Auler 
169a34c753fSRafael Auler     LLVM_DEBUG(dbgs() << "Popped edge "; E.print(dbgs()); dbgs() << "\n");
170a34c753fSRafael Auler 
171a34c753fSRafael Auler     // Case 1: BBSrc and BBDst are the same. Ignore this edge
172*8477bc67SFabian Parzefall     if (SrcBB == DstBB || DstBB == *BF.getLayout().block_begin()) {
173a34c753fSRafael Auler       LLVM_DEBUG(dbgs() << "\tIgnored (same src, dst)\n");
174a34c753fSRafael Auler       continue;
175a34c753fSRafael Auler     }
176a34c753fSRafael Auler 
177a34c753fSRafael Auler     int I = BBToClusterMap[SrcBB];
178a34c753fSRafael Auler     int J = BBToClusterMap[DstBB];
179a34c753fSRafael Auler 
180a34c753fSRafael Auler     // Case 2: If they are already allocated at the same cluster, just increase
181a34c753fSRafael Auler     // the weight of this cluster
182a34c753fSRafael Auler     if (I == J) {
183a34c753fSRafael Auler       if (ComputeEdges)
184a34c753fSRafael Auler         ClusterEdges[I][I] += E.Count;
185a34c753fSRafael Auler       LLVM_DEBUG(dbgs() << "\tIgnored (src, dst belong to the same cluster)\n");
186a34c753fSRafael Auler       continue;
187a34c753fSRafael Auler     }
188a34c753fSRafael Auler 
189a34c753fSRafael Auler     std::vector<BinaryBasicBlock *> &ClusterA = Clusters[I];
190a34c753fSRafael Auler     std::vector<BinaryBasicBlock *> &ClusterB = Clusters[J];
191a34c753fSRafael Auler     if (areClustersCompatible(ClusterA, ClusterB, E)) {
192a34c753fSRafael Auler       // Case 3: SrcBB is at the end of a cluster and DstBB is at the start,
193a34c753fSRafael Auler       // allowing us to merge two clusters.
194a34c753fSRafael Auler       for (BinaryBasicBlock *BB : ClusterB)
195a34c753fSRafael Auler         BBToClusterMap[BB] = I;
196a34c753fSRafael Auler       ClusterA.insert(ClusterA.end(), ClusterB.begin(), ClusterB.end());
197a34c753fSRafael Auler       ClusterB.clear();
198a34c753fSRafael Auler       if (ComputeEdges) {
199a34c753fSRafael Auler         // Increase the intra-cluster edge count of cluster A with the count of
200a34c753fSRafael Auler         // this edge as well as with the total count of previously visited edges
201a34c753fSRafael Auler         // from cluster B cluster A.
202a34c753fSRafael Auler         ClusterEdges[I][I] += E.Count;
203a34c753fSRafael Auler         ClusterEdges[I][I] += ClusterEdges[J][I];
204a34c753fSRafael Auler         // Iterate through all inter-cluster edges and transfer edges targeting
205a34c753fSRafael Auler         // cluster B to cluster A.
206a34c753fSRafael Auler         for (uint32_t K = 0, E = ClusterEdges.size(); K != E; ++K)
207a34c753fSRafael Auler           ClusterEdges[K][I] += ClusterEdges[K][J];
208a34c753fSRafael Auler       }
209a34c753fSRafael Auler       // Adjust the weights of the remaining edges and re-sort the queue.
210a34c753fSRafael Auler       adjustQueue(Queue, BF);
211a34c753fSRafael Auler       LLVM_DEBUG(dbgs() << "\tMerged clusters of src, dst\n");
212a34c753fSRafael Auler     } else {
213a34c753fSRafael Auler       // Case 4: Both SrcBB and DstBB are allocated in positions we cannot
214a34c753fSRafael Auler       // merge them. Add the count of this edge to the inter-cluster edge count
215a34c753fSRafael Auler       // between clusters A and B to help us decide ordering between these
216a34c753fSRafael Auler       // clusters.
217a34c753fSRafael Auler       if (ComputeEdges)
218a34c753fSRafael Auler         ClusterEdges[I][J] += E.Count;
219a34c753fSRafael Auler       LLVM_DEBUG(
220a34c753fSRafael Auler           dbgs() << "\tIgnored (src, dst belong to incompatible clusters)\n");
221a34c753fSRafael Auler     }
222a34c753fSRafael Auler   }
223a34c753fSRafael Auler }
224a34c753fSRafael Auler 
reset()225a34c753fSRafael Auler void GreedyClusterAlgorithm::reset() {
226a34c753fSRafael Auler   ClusterAlgorithm::reset();
227a34c753fSRafael Auler   BBToClusterMap.clear();
228a34c753fSRafael Auler }
229a34c753fSRafael Auler 
initQueue(std::vector<EdgeTy> & Queue,const BinaryFunction & BF)23040c2e0faSMaksim Panchenko void PHGreedyClusterAlgorithm::initQueue(std::vector<EdgeTy> &Queue,
23140c2e0faSMaksim Panchenko                                          const BinaryFunction &BF) {
232a34c753fSRafael Auler   // Define a comparison function to establish SWO between edges.
233a34c753fSRafael Auler   auto Comp = [&BF](const EdgeTy &A, const EdgeTy &B) {
234a34c753fSRafael Auler     // With equal weights, prioritize branches with lower index
235a34c753fSRafael Auler     // source/destination. This helps to keep original block order for blocks
236a34c753fSRafael Auler     // when optimal order cannot be deducted from a profile.
237a34c753fSRafael Auler     if (A.Count == B.Count) {
238a34c753fSRafael Auler       const signed SrcOrder = BF.getOriginalLayoutRelativeOrder(A.Src, B.Src);
239a34c753fSRafael Auler       return (SrcOrder != 0)
240a34c753fSRafael Auler                  ? SrcOrder > 0
241a34c753fSRafael Auler                  : BF.getOriginalLayoutRelativeOrder(A.Dst, B.Dst) > 0;
242a34c753fSRafael Auler     }
243a34c753fSRafael Auler     return A.Count < B.Count;
244a34c753fSRafael Auler   };
245a34c753fSRafael Auler 
246a34c753fSRafael Auler   // Sort edges in increasing profile count order.
247d2c87699SAmir Ayupov   llvm::sort(Queue, Comp);
248a34c753fSRafael Auler }
249a34c753fSRafael Auler 
adjustQueue(std::vector<EdgeTy> & Queue,const BinaryFunction & BF)25040c2e0faSMaksim Panchenko void PHGreedyClusterAlgorithm::adjustQueue(std::vector<EdgeTy> &Queue,
25140c2e0faSMaksim Panchenko                                            const BinaryFunction &BF) {
252a34c753fSRafael Auler   // Nothing to do.
253a34c753fSRafael Auler   return;
254a34c753fSRafael Auler }
255a34c753fSRafael Auler 
areClustersCompatible(const ClusterTy & Front,const ClusterTy & Back,const EdgeTy & E) const25640c2e0faSMaksim Panchenko bool PHGreedyClusterAlgorithm::areClustersCompatible(const ClusterTy &Front,
25740c2e0faSMaksim Panchenko                                                      const ClusterTy &Back,
25840c2e0faSMaksim Panchenko                                                      const EdgeTy &E) const {
259a34c753fSRafael Auler   return Front.back() == E.Src && Back.front() == E.Dst;
260a34c753fSRafael Auler }
261a34c753fSRafael Auler 
calculateWeight(const EdgeTy & E,const BinaryFunction & BF) const262a34c753fSRafael Auler int64_t MinBranchGreedyClusterAlgorithm::calculateWeight(
263a34c753fSRafael Auler     const EdgeTy &E, const BinaryFunction &BF) const {
264a34c753fSRafael Auler   const BinaryBasicBlock *SrcBB = E.Src;
265a34c753fSRafael Auler   const BinaryBasicBlock *DstBB = E.Dst;
266a34c753fSRafael Auler 
267a34c753fSRafael Auler   // Initial weight value.
268a34c753fSRafael Auler   int64_t W = (int64_t)E.Count;
269a34c753fSRafael Auler 
270a34c753fSRafael Auler   // Adjust the weight by taking into account other edges with the same source.
271a34c753fSRafael Auler   auto BI = SrcBB->branch_info_begin();
272a34c753fSRafael Auler   for (const BinaryBasicBlock *SuccBB : SrcBB->successors()) {
273a34c753fSRafael Auler     assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
274a34c753fSRafael Auler            "attempted reordering blocks of function with no profile data");
275a34c753fSRafael Auler     assert(BI->Count <= std::numeric_limits<int64_t>::max() &&
276a34c753fSRafael Auler            "overflow detected");
277a34c753fSRafael Auler     // Ignore edges with same source and destination, edges that target the
278a34c753fSRafael Auler     // entry block as well as the edge E itself.
279*8477bc67SFabian Parzefall     if (SuccBB != SrcBB && SuccBB != *BF.getLayout().block_begin() &&
280*8477bc67SFabian Parzefall         SuccBB != DstBB)
281a34c753fSRafael Auler       W -= (int64_t)BI->Count;
282a34c753fSRafael Auler     ++BI;
283a34c753fSRafael Auler   }
284a34c753fSRafael Auler 
285a34c753fSRafael Auler   // Adjust the weight by taking into account other edges with the same
286a34c753fSRafael Auler   // destination.
287a34c753fSRafael Auler   for (const BinaryBasicBlock *PredBB : DstBB->predecessors()) {
288a34c753fSRafael Auler     // Ignore edges with same source and destination as well as the edge E
289a34c753fSRafael Auler     // itself.
290a34c753fSRafael Auler     if (PredBB == DstBB || PredBB == SrcBB)
291a34c753fSRafael Auler       continue;
292a34c753fSRafael Auler     auto BI = PredBB->branch_info_begin();
293a34c753fSRafael Auler     for (const BinaryBasicBlock *SuccBB : PredBB->successors()) {
294a34c753fSRafael Auler       if (SuccBB == DstBB)
295a34c753fSRafael Auler         break;
296a34c753fSRafael Auler       ++BI;
297a34c753fSRafael Auler     }
298a34c753fSRafael Auler     assert(BI != PredBB->branch_info_end() && "invalid control flow graph");
299a34c753fSRafael Auler     assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
300a34c753fSRafael Auler            "attempted reordering blocks of function with no profile data");
301a34c753fSRafael Auler     assert(BI->Count <= std::numeric_limits<int64_t>::max() &&
302a34c753fSRafael Auler            "overflow detected");
303a34c753fSRafael Auler     W -= (int64_t)BI->Count;
304a34c753fSRafael Auler   }
305a34c753fSRafael Auler 
306a34c753fSRafael Auler   return W;
307a34c753fSRafael Auler }
308a34c753fSRafael Auler 
initQueue(std::vector<EdgeTy> & Queue,const BinaryFunction & BF)30940c2e0faSMaksim Panchenko void MinBranchGreedyClusterAlgorithm::initQueue(std::vector<EdgeTy> &Queue,
31040c2e0faSMaksim Panchenko                                                 const BinaryFunction &BF) {
311a34c753fSRafael Auler   // Initialize edge weights.
312a34c753fSRafael Auler   for (const EdgeTy &E : Queue)
313a34c753fSRafael Auler     Weight.emplace(std::make_pair(E, calculateWeight(E, BF)));
314a34c753fSRafael Auler 
315a34c753fSRafael Auler   // Sort edges in increasing weight order.
316a34c753fSRafael Auler   adjustQueue(Queue, BF);
317a34c753fSRafael Auler }
318a34c753fSRafael Auler 
adjustQueue(std::vector<EdgeTy> & Queue,const BinaryFunction & BF)31940c2e0faSMaksim Panchenko void MinBranchGreedyClusterAlgorithm::adjustQueue(std::vector<EdgeTy> &Queue,
32040c2e0faSMaksim Panchenko                                                   const BinaryFunction &BF) {
321a34c753fSRafael Auler   // Define a comparison function to establish SWO between edges.
322a34c753fSRafael Auler   auto Comp = [&](const EdgeTy &A, const EdgeTy &B) {
323a34c753fSRafael Auler     // With equal weights, prioritize branches with lower index
324a34c753fSRafael Auler     // source/destination. This helps to keep original block order for blocks
325a34c753fSRafael Auler     // when optimal order cannot be deduced from a profile.
326a34c753fSRafael Auler     if (Weight[A] == Weight[B]) {
327a34c753fSRafael Auler       const signed SrcOrder = BF.getOriginalLayoutRelativeOrder(A.Src, B.Src);
328a34c753fSRafael Auler       return (SrcOrder != 0)
329a34c753fSRafael Auler                  ? SrcOrder > 0
330a34c753fSRafael Auler                  : BF.getOriginalLayoutRelativeOrder(A.Dst, B.Dst) > 0;
331a34c753fSRafael Auler     }
332a34c753fSRafael Auler     return Weight[A] < Weight[B];
333a34c753fSRafael Auler   };
334a34c753fSRafael Auler 
335a34c753fSRafael Auler   // Iterate through all remaining edges to find edges that have their
336a34c753fSRafael Auler   // source and destination in the same cluster.
337a34c753fSRafael Auler   std::vector<EdgeTy> NewQueue;
338a34c753fSRafael Auler   for (const EdgeTy &E : Queue) {
339a34c753fSRafael Auler     const BinaryBasicBlock *SrcBB = E.Src;
340a34c753fSRafael Auler     const BinaryBasicBlock *DstBB = E.Dst;
341a34c753fSRafael Auler 
342a34c753fSRafael Auler     // Case 1: SrcBB and DstBB are the same or DstBB is the entry block. Ignore
343a34c753fSRafael Auler     // this edge.
344*8477bc67SFabian Parzefall     if (SrcBB == DstBB || DstBB == *BF.getLayout().block_begin()) {
345a34c753fSRafael Auler       LLVM_DEBUG(dbgs() << "\tAdjustment: Ignored edge "; E.print(dbgs());
346a34c753fSRafael Auler                  dbgs() << " (same src, dst)\n");
347a34c753fSRafael Auler       continue;
348a34c753fSRafael Auler     }
349a34c753fSRafael Auler 
350a34c753fSRafael Auler     int I = BBToClusterMap[SrcBB];
351a34c753fSRafael Auler     int J = BBToClusterMap[DstBB];
352a34c753fSRafael Auler     std::vector<BinaryBasicBlock *> &ClusterA = Clusters[I];
353a34c753fSRafael Auler     std::vector<BinaryBasicBlock *> &ClusterB = Clusters[J];
354a34c753fSRafael Auler 
355a34c753fSRafael Auler     // Case 2: They are already allocated at the same cluster or incompatible
356a34c753fSRafael Auler     // clusters. Adjust the weights of edges with the same source or
357a34c753fSRafael Auler     // destination, so that this edge has no effect on them any more, and ignore
358a34c753fSRafael Auler     // this edge. Also increase the intra- (or inter-) cluster edge count.
359a34c753fSRafael Auler     if (I == J || !areClustersCompatible(ClusterA, ClusterB, E)) {
360a34c753fSRafael Auler       if (!ClusterEdges.empty())
361a34c753fSRafael Auler         ClusterEdges[I][J] += E.Count;
362a34c753fSRafael Auler       LLVM_DEBUG(dbgs() << "\tAdjustment: Ignored edge "; E.print(dbgs());
363a34c753fSRafael Auler                  dbgs() << " (src, dst belong to same cluster or incompatible "
364a34c753fSRafael Auler                            "clusters)\n");
365a34c753fSRafael Auler       for (const BinaryBasicBlock *SuccBB : SrcBB->successors()) {
366a34c753fSRafael Auler         if (SuccBB == DstBB)
367a34c753fSRafael Auler           continue;
368a34c753fSRafael Auler         auto WI = Weight.find(EdgeTy(SrcBB, SuccBB, 0));
369a34c753fSRafael Auler         assert(WI != Weight.end() && "CFG edge not found in Weight map");
370a34c753fSRafael Auler         WI->second += (int64_t)E.Count;
371a34c753fSRafael Auler       }
372a34c753fSRafael Auler       for (const BinaryBasicBlock *PredBB : DstBB->predecessors()) {
373a34c753fSRafael Auler         if (PredBB == SrcBB)
374a34c753fSRafael Auler           continue;
375a34c753fSRafael Auler         auto WI = Weight.find(EdgeTy(PredBB, DstBB, 0));
376a34c753fSRafael Auler         assert(WI != Weight.end() && "CFG edge not found in Weight map");
377a34c753fSRafael Auler         WI->second += (int64_t)E.Count;
378a34c753fSRafael Auler       }
379a34c753fSRafael Auler       continue;
380a34c753fSRafael Auler     }
381a34c753fSRafael Auler 
382a34c753fSRafael Auler     // Case 3: None of the previous cases is true, so just keep this edge in
383a34c753fSRafael Auler     // the queue.
384a34c753fSRafael Auler     NewQueue.emplace_back(E);
385a34c753fSRafael Auler   }
386a34c753fSRafael Auler 
387a34c753fSRafael Auler   // Sort remaining edges in increasing weight order.
388a34c753fSRafael Auler   Queue.swap(NewQueue);
389d2c87699SAmir Ayupov   llvm::sort(Queue, Comp);
390a34c753fSRafael Auler }
391a34c753fSRafael Auler 
areClustersCompatible(const ClusterTy & Front,const ClusterTy & Back,const EdgeTy & E) const392a34c753fSRafael Auler bool MinBranchGreedyClusterAlgorithm::areClustersCompatible(
393a34c753fSRafael Auler     const ClusterTy &Front, const ClusterTy &Back, const EdgeTy &E) const {
394a34c753fSRafael Auler   return Front.back() == E.Src && Back.front() == E.Dst;
395a34c753fSRafael Auler }
396a34c753fSRafael Auler 
reset()397a34c753fSRafael Auler void MinBranchGreedyClusterAlgorithm::reset() {
398a34c753fSRafael Auler   GreedyClusterAlgorithm::reset();
399a34c753fSRafael Auler   Weight.clear();
400a34c753fSRafael Auler }
401a34c753fSRafael Auler 
reorderBasicBlocks(const BinaryFunction & BF,BasicBlockOrder & Order) const40240c2e0faSMaksim Panchenko void TSPReorderAlgorithm::reorderBasicBlocks(const BinaryFunction &BF,
40340c2e0faSMaksim Panchenko                                              BasicBlockOrder &Order) const {
404a34c753fSRafael Auler   std::vector<std::vector<uint64_t>> Weight;
405a34c753fSRafael Auler   std::vector<BinaryBasicBlock *> IndexToBB;
406a34c753fSRafael Auler 
407*8477bc67SFabian Parzefall   const size_t N = BF.getLayout().block_size();
408a34c753fSRafael Auler   assert(N <= std::numeric_limits<uint64_t>::digits &&
409a34c753fSRafael Auler          "cannot use TSP solution for sizes larger than bits in uint64_t");
410a34c753fSRafael Auler 
411a34c753fSRafael Auler   // Populating weight map and index map
412*8477bc67SFabian Parzefall   for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
413a34c753fSRafael Auler     BB->setLayoutIndex(IndexToBB.size());
414a34c753fSRafael Auler     IndexToBB.push_back(BB);
415a34c753fSRafael Auler   }
416a34c753fSRafael Auler   Weight.resize(N);
417*8477bc67SFabian Parzefall   for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
418a34c753fSRafael Auler     auto BI = BB->branch_info_begin();
419a34c753fSRafael Auler     Weight[BB->getLayoutIndex()].resize(N);
420a34c753fSRafael Auler     for (BinaryBasicBlock *SuccBB : BB->successors()) {
421a34c753fSRafael Auler       if (BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE)
422a34c753fSRafael Auler         Weight[BB->getLayoutIndex()][SuccBB->getLayoutIndex()] = BI->Count;
423a34c753fSRafael Auler       ++BI;
424a34c753fSRafael Auler     }
425a34c753fSRafael Auler   }
426a34c753fSRafael Auler 
427a34c753fSRafael Auler   std::vector<std::vector<int64_t>> DP;
428a34c753fSRafael Auler   DP.resize(1 << N);
429f92ab6afSAmir Ayupov   for (std::vector<int64_t> &Elmt : DP)
430a34c753fSRafael Auler     Elmt.resize(N, -1);
431f92ab6afSAmir Ayupov 
432a34c753fSRafael Auler   // Start with the entry basic block being allocated with cost zero
433a34c753fSRafael Auler   DP[1][0] = 0;
434a34c753fSRafael Auler   // Walk through TSP solutions using a bitmask to represent state (current set
435a34c753fSRafael Auler   // of BBs in the layout)
436a34c753fSRafael Auler   uint64_t BestSet = 1;
437a34c753fSRafael Auler   uint64_t BestLast = 0;
438a34c753fSRafael Auler   int64_t BestWeight = 0;
439a34c753fSRafael Auler   for (uint64_t Set = 1; Set < (1ULL << N); ++Set) {
440a34c753fSRafael Auler     // Traverse each possibility of Last BB visited in this layout
441a34c753fSRafael Auler     for (uint64_t Last = 0; Last < N; ++Last) {
442a34c753fSRafael Auler       // Case 1: There is no possible layout with this BB as Last
443a34c753fSRafael Auler       if (DP[Set][Last] == -1)
444a34c753fSRafael Auler         continue;
445a34c753fSRafael Auler 
446a34c753fSRafael Auler       // Case 2: There is a layout with this Set and this Last, and we try
447a34c753fSRafael Auler       // to expand this set with New
448a34c753fSRafael Auler       for (uint64_t New = 1; New < N; ++New) {
449a34c753fSRafael Auler         // Case 2a: BB "New" is already in this Set
450a34c753fSRafael Auler         if ((Set & (1ULL << New)) != 0)
451a34c753fSRafael Auler           continue;
452a34c753fSRafael Auler 
453a34c753fSRafael Auler         // Case 2b: BB "New" is not in this set and we add it to this Set and
454a34c753fSRafael Auler         // record total weight of this layout with "New" as the last BB.
455a34c753fSRafael Auler         uint64_t NewSet = (Set | (1ULL << New));
456a34c753fSRafael Auler         if (DP[NewSet][New] == -1)
457a34c753fSRafael Auler           DP[NewSet][New] = DP[Set][Last] + (int64_t)Weight[Last][New];
458a34c753fSRafael Auler         DP[NewSet][New] = std::max(DP[NewSet][New],
459a34c753fSRafael Auler                                    DP[Set][Last] + (int64_t)Weight[Last][New]);
460a34c753fSRafael Auler 
461a34c753fSRafael Auler         if (DP[NewSet][New] > BestWeight) {
462a34c753fSRafael Auler           BestWeight = DP[NewSet][New];
463a34c753fSRafael Auler           BestSet = NewSet;
464a34c753fSRafael Auler           BestLast = New;
465a34c753fSRafael Auler         }
466a34c753fSRafael Auler       }
467a34c753fSRafael Auler     }
468a34c753fSRafael Auler   }
469a34c753fSRafael Auler 
470a34c753fSRafael Auler   // Define final function layout based on layout that maximizes weight
471a34c753fSRafael Auler   uint64_t Last = BestLast;
472a34c753fSRafael Auler   uint64_t Set = BestSet;
47318bc405aSAmir Ayupov   BitVector Visited;
474a34c753fSRafael Auler   Visited.resize(N);
475a34c753fSRafael Auler   Visited[Last] = true;
476a34c753fSRafael Auler   Order.push_back(IndexToBB[Last]);
477a34c753fSRafael Auler   Set = Set & ~(1ULL << Last);
478a34c753fSRafael Auler   while (Set != 0) {
479a34c753fSRafael Auler     int64_t Best = -1;
480a34c753fSRafael Auler     uint64_t NewLast;
481a34c753fSRafael Auler     for (uint64_t I = 0; I < N; ++I) {
482a34c753fSRafael Auler       if (DP[Set][I] == -1)
483a34c753fSRafael Auler         continue;
484a34c753fSRafael Auler       int64_t AdjWeight = Weight[I][Last] > 0 ? Weight[I][Last] : 0;
485a34c753fSRafael Auler       if (DP[Set][I] + AdjWeight > Best) {
486a34c753fSRafael Auler         NewLast = I;
487a34c753fSRafael Auler         Best = DP[Set][I] + AdjWeight;
488a34c753fSRafael Auler       }
489a34c753fSRafael Auler     }
490a34c753fSRafael Auler     Last = NewLast;
491a34c753fSRafael Auler     Visited[Last] = true;
492a34c753fSRafael Auler     Order.push_back(IndexToBB[Last]);
493a34c753fSRafael Auler     Set = Set & ~(1ULL << Last);
494a34c753fSRafael Auler   }
495a34c753fSRafael Auler   std::reverse(Order.begin(), Order.end());
496a34c753fSRafael Auler 
497a34c753fSRafael Auler   // Finalize layout with BBs that weren't assigned to the layout using the
498a34c753fSRafael Auler   // input layout.
499*8477bc67SFabian Parzefall   for (BinaryBasicBlock *BB : BF.getLayout().blocks())
500a34c753fSRafael Auler     if (Visited[BB->getLayoutIndex()] == false)
501a34c753fSRafael Auler       Order.push_back(BB);
502a34c753fSRafael Auler }
503a34c753fSRafael Auler 
reorderBasicBlocks(const BinaryFunction & BF,BasicBlockOrder & Order) const504a34c753fSRafael Auler void OptimizeReorderAlgorithm::reorderBasicBlocks(
505a34c753fSRafael Auler     const BinaryFunction &BF, BasicBlockOrder &Order) const {
506*8477bc67SFabian Parzefall   if (BF.getLayout().block_empty())
507a34c753fSRafael Auler     return;
508a34c753fSRafael Auler 
509a34c753fSRafael Auler   // Cluster basic blocks.
510a34c753fSRafael Auler   CAlgo->clusterBasicBlocks(BF);
511a34c753fSRafael Auler 
512a34c753fSRafael Auler   if (opts::PrintClusters)
513a34c753fSRafael Auler     CAlgo->printClusters();
514a34c753fSRafael Auler 
515a34c753fSRafael Auler   // Arrange basic blocks according to clusters.
516a34c753fSRafael Auler   for (ClusterAlgorithm::ClusterTy &Cluster : CAlgo->Clusters)
517a34c753fSRafael Auler     Order.insert(Order.end(), Cluster.begin(), Cluster.end());
518a34c753fSRafael Auler }
519a34c753fSRafael Auler 
reorderBasicBlocks(const BinaryFunction & BF,BasicBlockOrder & Order) const520a34c753fSRafael Auler void OptimizeBranchReorderAlgorithm::reorderBasicBlocks(
521a34c753fSRafael Auler     const BinaryFunction &BF, BasicBlockOrder &Order) const {
522*8477bc67SFabian Parzefall   if (BF.getLayout().block_empty())
523a34c753fSRafael Auler     return;
524a34c753fSRafael Auler 
525a34c753fSRafael Auler   // Cluster basic blocks.
526a34c753fSRafael Auler   CAlgo->clusterBasicBlocks(BF, /* ComputeEdges = */ true);
527a34c753fSRafael Auler   std::vector<ClusterAlgorithm::ClusterTy> &Clusters = CAlgo->Clusters;
528a34c753fSRafael Auler   std::vector<std::unordered_map<uint32_t, uint64_t>> &ClusterEdges =
529a34c753fSRafael Auler       CAlgo->ClusterEdges;
530a34c753fSRafael Auler 
531a34c753fSRafael Auler   // Compute clusters' average frequencies.
532a34c753fSRafael Auler   CAlgo->computeClusterAverageFrequency(BF.getBinaryContext());
533a34c753fSRafael Auler   std::vector<double> &AvgFreq = CAlgo->AvgFreq;
534a34c753fSRafael Auler 
535a34c753fSRafael Auler   if (opts::PrintClusters)
536a34c753fSRafael Auler     CAlgo->printClusters();
537a34c753fSRafael Auler 
538a34c753fSRafael Auler   // Cluster layout order
539a34c753fSRafael Auler   std::vector<uint32_t> ClusterOrder;
540a34c753fSRafael Auler 
541a34c753fSRafael Auler   // Do a topological sort for clusters, prioritizing frequently-executed BBs
542a34c753fSRafael Auler   // during the traversal.
543a34c753fSRafael Auler   std::stack<uint32_t> Stack;
544a34c753fSRafael Auler   std::vector<uint32_t> Status;
545a34c753fSRafael Auler   std::vector<uint32_t> Parent;
546a34c753fSRafael Auler   Status.resize(Clusters.size(), 0);
547a34c753fSRafael Auler   Parent.resize(Clusters.size(), 0);
548a34c753fSRafael Auler   constexpr uint32_t STACKED = 1;
549a34c753fSRafael Auler   constexpr uint32_t VISITED = 2;
550a34c753fSRafael Auler   Status[0] = STACKED;
551a34c753fSRafael Auler   Stack.push(0);
552a34c753fSRafael Auler   while (!Stack.empty()) {
553a34c753fSRafael Auler     uint32_t I = Stack.top();
554a34c753fSRafael Auler     if (!(Status[I] & VISITED)) {
555a34c753fSRafael Auler       Status[I] |= VISITED;
556a34c753fSRafael Auler       // Order successors by weight
557a34c753fSRafael Auler       auto ClusterComp = [&ClusterEdges, I](uint32_t A, uint32_t B) {
558a34c753fSRafael Auler         return ClusterEdges[I][A] > ClusterEdges[I][B];
559a34c753fSRafael Auler       };
560a34c753fSRafael Auler       std::priority_queue<uint32_t, std::vector<uint32_t>,
56140c2e0faSMaksim Panchenko                           decltype(ClusterComp)>
56240c2e0faSMaksim Panchenko           SuccQueue(ClusterComp);
563a34c753fSRafael Auler       for (std::pair<const uint32_t, uint64_t> &Target : ClusterEdges[I]) {
564a34c753fSRafael Auler         if (Target.second > 0 && !(Status[Target.first] & STACKED) &&
565a34c753fSRafael Auler             !Clusters[Target.first].empty()) {
566a34c753fSRafael Auler           Parent[Target.first] = I;
567a34c753fSRafael Auler           Status[Target.first] = STACKED;
568a34c753fSRafael Auler           SuccQueue.push(Target.first);
569a34c753fSRafael Auler         }
570a34c753fSRafael Auler       }
571a34c753fSRafael Auler       while (!SuccQueue.empty()) {
572a34c753fSRafael Auler         Stack.push(SuccQueue.top());
573a34c753fSRafael Auler         SuccQueue.pop();
574a34c753fSRafael Auler       }
575a34c753fSRafael Auler       continue;
576a34c753fSRafael Auler     }
577a34c753fSRafael Auler     // Already visited this node
578a34c753fSRafael Auler     Stack.pop();
579a34c753fSRafael Auler     ClusterOrder.push_back(I);
580a34c753fSRafael Auler   }
581a34c753fSRafael Auler   std::reverse(ClusterOrder.begin(), ClusterOrder.end());
582a34c753fSRafael Auler   // Put unreachable clusters at the end
583a34c753fSRafael Auler   for (uint32_t I = 0, E = Clusters.size(); I < E; ++I)
584a34c753fSRafael Auler     if (!(Status[I] & VISITED) && !Clusters[I].empty())
585a34c753fSRafael Auler       ClusterOrder.push_back(I);
586a34c753fSRafael Auler 
587a34c753fSRafael Auler   // Sort nodes with equal precedence
588a34c753fSRafael Auler   auto Beg = ClusterOrder.begin();
589a34c753fSRafael Auler   // Don't reorder the first cluster, which contains the function entry point
590a34c753fSRafael Auler   ++Beg;
591a34c753fSRafael Auler   std::stable_sort(Beg, ClusterOrder.end(),
592a34c753fSRafael Auler                    [&AvgFreq, &Parent](uint32_t A, uint32_t B) {
593a34c753fSRafael Auler                      uint32_t P = Parent[A];
594a34c753fSRafael Auler                      while (Parent[P] != 0) {
595a34c753fSRafael Auler                        if (Parent[P] == B)
596a34c753fSRafael Auler                          return false;
597a34c753fSRafael Auler                        P = Parent[P];
598a34c753fSRafael Auler                      }
599a34c753fSRafael Auler                      P = Parent[B];
600a34c753fSRafael Auler                      while (Parent[P] != 0) {
601a34c753fSRafael Auler                        if (Parent[P] == A)
602a34c753fSRafael Auler                          return true;
603a34c753fSRafael Auler                        P = Parent[P];
604a34c753fSRafael Auler                      }
605a34c753fSRafael Auler                      return AvgFreq[A] > AvgFreq[B];
606a34c753fSRafael Auler                    });
607a34c753fSRafael Auler 
608a34c753fSRafael Auler   if (opts::PrintClusters) {
609a34c753fSRafael Auler     errs() << "New cluster order: ";
610a34c753fSRafael Auler     const char *Sep = "";
611a34c753fSRafael Auler     for (uint32_t O : ClusterOrder) {
612a34c753fSRafael Auler       errs() << Sep << O;
613a34c753fSRafael Auler       Sep = ", ";
614a34c753fSRafael Auler     }
615a34c753fSRafael Auler     errs() << '\n';
616a34c753fSRafael Auler   }
617a34c753fSRafael Auler 
618a34c753fSRafael Auler   // Arrange basic blocks according to cluster order.
619a34c753fSRafael Auler   for (uint32_t ClusterIndex : ClusterOrder) {
620a34c753fSRafael Auler     ClusterAlgorithm::ClusterTy &Cluster = Clusters[ClusterIndex];
621a34c753fSRafael Auler     Order.insert(Order.end(), Cluster.begin(), Cluster.end());
622a34c753fSRafael Auler   }
623a34c753fSRafael Auler }
624a34c753fSRafael Auler 
reorderBasicBlocks(const BinaryFunction & BF,BasicBlockOrder & Order) const625a34c753fSRafael Auler void OptimizeCacheReorderAlgorithm::reorderBasicBlocks(
626a34c753fSRafael Auler     const BinaryFunction &BF, BasicBlockOrder &Order) const {
627*8477bc67SFabian Parzefall   if (BF.getLayout().block_empty())
628a34c753fSRafael Auler     return;
629a34c753fSRafael Auler 
630a34c753fSRafael Auler   const uint64_t ColdThreshold =
631*8477bc67SFabian Parzefall       opts::ColdThreshold *
632*8477bc67SFabian Parzefall       (*BF.getLayout().block_begin())->getExecutionCount() / 1000;
633a34c753fSRafael Auler 
634a34c753fSRafael Auler   // Cluster basic blocks.
635a34c753fSRafael Auler   CAlgo->clusterBasicBlocks(BF);
636a34c753fSRafael Auler   std::vector<ClusterAlgorithm::ClusterTy> &Clusters = CAlgo->Clusters;
637a34c753fSRafael Auler 
638a34c753fSRafael Auler   // Compute clusters' average frequencies.
639a34c753fSRafael Auler   CAlgo->computeClusterAverageFrequency(BF.getBinaryContext());
640a34c753fSRafael Auler   std::vector<double> &AvgFreq = CAlgo->AvgFreq;
641a34c753fSRafael Auler 
642a34c753fSRafael Auler   if (opts::PrintClusters)
643a34c753fSRafael Auler     CAlgo->printClusters();
644a34c753fSRafael Auler 
645a34c753fSRafael Auler   // Cluster layout order
646a34c753fSRafael Auler   std::vector<uint32_t> ClusterOrder;
647a34c753fSRafael Auler 
648a34c753fSRafael Auler   // Order clusters based on average instruction execution frequency
649a34c753fSRafael Auler   for (uint32_t I = 0, E = Clusters.size(); I < E; ++I)
650a34c753fSRafael Auler     if (!Clusters[I].empty())
651a34c753fSRafael Auler       ClusterOrder.push_back(I);
652a34c753fSRafael Auler   // Don't reorder the first cluster, which contains the function entry point
65340c2e0faSMaksim Panchenko   std::stable_sort(
65440c2e0faSMaksim Panchenko       std::next(ClusterOrder.begin()), ClusterOrder.end(),
65540c2e0faSMaksim Panchenko       [&AvgFreq](uint32_t A, uint32_t B) { return AvgFreq[A] > AvgFreq[B]; });
656a34c753fSRafael Auler 
657a34c753fSRafael Auler   if (opts::PrintClusters) {
658a34c753fSRafael Auler     errs() << "New cluster order: ";
659a34c753fSRafael Auler     const char *Sep = "";
660a34c753fSRafael Auler     for (uint32_t O : ClusterOrder) {
661a34c753fSRafael Auler       errs() << Sep << O;
662a34c753fSRafael Auler       Sep = ", ";
663a34c753fSRafael Auler     }
664a34c753fSRafael Auler     errs() << '\n';
665a34c753fSRafael Auler   }
666a34c753fSRafael Auler 
667a34c753fSRafael Auler   // Arrange basic blocks according to cluster order.
668a34c753fSRafael Auler   for (uint32_t ClusterIndex : ClusterOrder) {
669a34c753fSRafael Auler     ClusterAlgorithm::ClusterTy &Cluster = Clusters[ClusterIndex];
670a34c753fSRafael Auler     Order.insert(Order.end(), Cluster.begin(), Cluster.end());
671a34c753fSRafael Auler     // Force zero execution count on clusters that do not meet the cut off
672a34c753fSRafael Auler     // specified by --cold-threshold.
673f92ab6afSAmir Ayupov     if (AvgFreq[ClusterIndex] < static_cast<double>(ColdThreshold))
674f92ab6afSAmir Ayupov       for (BinaryBasicBlock *BBPtr : Cluster)
675a34c753fSRafael Auler         BBPtr->setExecutionCount(0);
676a34c753fSRafael Auler   }
677a34c753fSRafael Auler }
678a34c753fSRafael Auler 
reorderBasicBlocks(const BinaryFunction & BF,BasicBlockOrder & Order) const67940c2e0faSMaksim Panchenko void ReverseReorderAlgorithm::reorderBasicBlocks(const BinaryFunction &BF,
68040c2e0faSMaksim Panchenko                                                  BasicBlockOrder &Order) const {
681*8477bc67SFabian Parzefall   if (BF.getLayout().block_empty())
682a34c753fSRafael Auler     return;
683a34c753fSRafael Auler 
684*8477bc67SFabian Parzefall   BinaryBasicBlock *FirstBB = *BF.getLayout().block_begin();
685a34c753fSRafael Auler   Order.push_back(FirstBB);
686*8477bc67SFabian Parzefall   for (auto RLI = BF.getLayout().block_rbegin(); *RLI != FirstBB; ++RLI)
687a34c753fSRafael Auler     Order.push_back(*RLI);
688a34c753fSRafael Auler }
689a34c753fSRafael Auler 
reorderBasicBlocks(const BinaryFunction & BF,BasicBlockOrder & Order) const690a34c753fSRafael Auler void RandomClusterReorderAlgorithm::reorderBasicBlocks(
691a34c753fSRafael Auler     const BinaryFunction &BF, BasicBlockOrder &Order) const {
692*8477bc67SFabian Parzefall   if (BF.getLayout().block_empty())
693a34c753fSRafael Auler     return;
694a34c753fSRafael Auler 
695a34c753fSRafael Auler   // Cluster basic blocks.
696a34c753fSRafael Auler   CAlgo->clusterBasicBlocks(BF);
697a34c753fSRafael Auler   std::vector<ClusterAlgorithm::ClusterTy> &Clusters = CAlgo->Clusters;
698a34c753fSRafael Auler 
699a34c753fSRafael Auler   if (opts::PrintClusters)
700a34c753fSRafael Auler     CAlgo->printClusters();
701a34c753fSRafael Auler 
702a34c753fSRafael Auler   // Cluster layout order
703a34c753fSRafael Auler   std::vector<uint32_t> ClusterOrder;
704a34c753fSRafael Auler 
705a34c753fSRafael Auler   // Order clusters based on average instruction execution frequency
706a34c753fSRafael Auler   for (uint32_t I = 0, E = Clusters.size(); I < E; ++I)
707a34c753fSRafael Auler     if (!Clusters[I].empty())
708a34c753fSRafael Auler       ClusterOrder.push_back(I);
709a34c753fSRafael Auler 
710a34c753fSRafael Auler   std::shuffle(std::next(ClusterOrder.begin()), ClusterOrder.end(),
711a34c753fSRafael Auler                std::default_random_engine(opts::RandomSeed.getValue()));
712a34c753fSRafael Auler 
713a34c753fSRafael Auler   if (opts::PrintClusters) {
714a34c753fSRafael Auler     errs() << "New cluster order: ";
715a34c753fSRafael Auler     const char *Sep = "";
716a34c753fSRafael Auler     for (uint32_t O : ClusterOrder) {
717a34c753fSRafael Auler       errs() << Sep << O;
718a34c753fSRafael Auler       Sep = ", ";
719a34c753fSRafael Auler     }
720a34c753fSRafael Auler     errs() << '\n';
721a34c753fSRafael Auler   }
722a34c753fSRafael Auler 
723a34c753fSRafael Auler   // Arrange basic blocks according to cluster order.
724a34c753fSRafael Auler   for (uint32_t ClusterIndex : ClusterOrder) {
725a34c753fSRafael Auler     ClusterAlgorithm::ClusterTy &Cluster = Clusters[ClusterIndex];
726a34c753fSRafael Auler     Order.insert(Order.end(), Cluster.begin(), Cluster.end());
727a34c753fSRafael Auler   }
728a34c753fSRafael Auler }
729