12f09f445SMaksim Panchenko //===- bolt/Passes/ReorderAlgorithm.cpp - Basic block reordering ----------===//
2a34c753fSRafael Auler //
3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information.
5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a34c753fSRafael Auler //
7a34c753fSRafael Auler //===----------------------------------------------------------------------===//
8a34c753fSRafael Auler //
92f09f445SMaksim Panchenko // This file implements classes used by several basic block reordering
102f09f445SMaksim Panchenko // algorithms.
11a34c753fSRafael Auler //
12a34c753fSRafael Auler //===----------------------------------------------------------------------===//
13a34c753fSRafael Auler
14a34c753fSRafael Auler #include "bolt/Passes/ReorderAlgorithm.h"
15a34c753fSRafael Auler #include "bolt/Core/BinaryBasicBlock.h"
16a34c753fSRafael Auler #include "bolt/Core/BinaryFunction.h"
17a34c753fSRafael Auler #include "llvm/Support/CommandLine.h"
18a34c753fSRafael Auler #include <queue>
19a34c753fSRafael Auler #include <random>
20a34c753fSRafael Auler #include <stack>
21a34c753fSRafael Auler
22a34c753fSRafael Auler #undef DEBUG_TYPE
23a34c753fSRafael Auler #define DEBUG_TYPE "bolt"
24a34c753fSRafael Auler
25a34c753fSRafael Auler using namespace llvm;
26a34c753fSRafael Auler using namespace bolt;
27a34c753fSRafael Auler
28a34c753fSRafael Auler namespace opts {
29a34c753fSRafael Auler
30a34c753fSRafael Auler extern cl::OptionCategory BoltOptCategory;
31a34c753fSRafael Auler extern cl::opt<bool> NoThreads;
32a34c753fSRafael Auler
33a34c753fSRafael Auler static cl::opt<unsigned> ColdThreshold(
34a34c753fSRafael Auler "cold-threshold",
35a34c753fSRafael Auler cl::desc("tenths of percents of main entry frequency to use as a "
36a34c753fSRafael Auler "threshold when evaluating whether a basic block is cold "
37a34c753fSRafael Auler "(0 means it is only considered cold if the block has zero "
38a34c753fSRafael Auler "samples). Default: 0 "),
39a34c753fSRafael Auler cl::init(0), cl::ZeroOrMore, cl::Hidden, cl::cat(BoltOptCategory));
40a34c753fSRafael Auler
41b92436efSFangrui Song static cl::opt<bool> PrintClusters("print-clusters", cl::desc("print clusters"),
42b92436efSFangrui Song cl::Hidden, cl::cat(BoltOptCategory));
43a34c753fSRafael Auler
44b92436efSFangrui Song cl::opt<uint32_t> RandomSeed("bolt-seed", cl::desc("seed for randomization"),
45b92436efSFangrui Song cl::init(42), cl::Hidden,
46a34c753fSRafael Auler cl::cat(BoltOptCategory));
47a34c753fSRafael Auler
48a34c753fSRafael Auler } // namespace opts
49a34c753fSRafael Auler
50a34c753fSRafael Auler namespace {
51a34c753fSRafael Auler
hashCombine(size_t & Seed,const T & Val)5240c2e0faSMaksim Panchenko template <class T> inline void hashCombine(size_t &Seed, const T &Val) {
53a34c753fSRafael Auler std::hash<T> Hasher;
54a34c753fSRafael Auler Seed ^= Hasher(Val) + 0x9e3779b9 + (Seed << 6) + (Seed >> 2);
55a34c753fSRafael Auler }
56a34c753fSRafael Auler
5740c2e0faSMaksim Panchenko template <typename A, typename B> struct HashPair {
operator ()__anon4595c24e0111::HashPair58a34c753fSRafael Auler size_t operator()(const std::pair<A, B> &Val) const {
59a34c753fSRafael Auler std::hash<A> Hasher;
60a34c753fSRafael Auler size_t Seed = Hasher(Val.first);
61a34c753fSRafael Auler hashCombine(Seed, Val.second);
62a34c753fSRafael Auler return Seed;
63a34c753fSRafael Auler }
64a34c753fSRafael Auler };
65a34c753fSRafael Auler
6640c2e0faSMaksim Panchenko } // namespace
67a34c753fSRafael Auler
computeClusterAverageFrequency(const BinaryContext & BC)68a34c753fSRafael Auler void ClusterAlgorithm::computeClusterAverageFrequency(const BinaryContext &BC) {
69a34c753fSRafael Auler // Create a separate MCCodeEmitter to allow lock-free execution
70a34c753fSRafael Auler BinaryContext::IndependentCodeEmitter Emitter;
71f92ab6afSAmir Ayupov if (!opts::NoThreads)
72a34c753fSRafael Auler Emitter = BC.createIndependentMCCodeEmitter();
73a34c753fSRafael Auler
74a34c753fSRafael Auler AvgFreq.resize(Clusters.size(), 0.0);
75a34c753fSRafael Auler for (uint32_t I = 0, E = Clusters.size(); I < E; ++I) {
76a34c753fSRafael Auler double Freq = 0.0;
77a34c753fSRafael Auler uint64_t ClusterSize = 0;
78a34c753fSRafael Auler for (BinaryBasicBlock *BB : Clusters[I]) {
79a34c753fSRafael Auler if (BB->getNumNonPseudos() > 0) {
80a34c753fSRafael Auler Freq += BB->getExecutionCount();
81a34c753fSRafael Auler // Estimate the size of a block in bytes at run time
82a34c753fSRafael Auler // NOTE: This might be inaccurate
83a34c753fSRafael Auler ClusterSize += BB->estimateSize(Emitter.MCE.get());
84a34c753fSRafael Auler }
85a34c753fSRafael Auler }
86a34c753fSRafael Auler AvgFreq[I] = ClusterSize == 0 ? 0 : Freq / ClusterSize;
87a34c753fSRafael Auler }
88a34c753fSRafael Auler }
89a34c753fSRafael Auler
printClusters() const90a34c753fSRafael Auler void ClusterAlgorithm::printClusters() const {
91a34c753fSRafael Auler for (uint32_t I = 0, E = Clusters.size(); I < E; ++I) {
92a34c753fSRafael Auler errs() << "Cluster number " << I;
93a34c753fSRafael Auler if (AvgFreq.size() == Clusters.size())
94a34c753fSRafael Auler errs() << " (frequency: " << AvgFreq[I] << ")";
95a34c753fSRafael Auler errs() << " : ";
96a34c753fSRafael Auler const char *Sep = "";
97a34c753fSRafael Auler for (BinaryBasicBlock *BB : Clusters[I]) {
98a34c753fSRafael Auler errs() << Sep << BB->getName();
99a34c753fSRafael Auler Sep = ", ";
100a34c753fSRafael Auler }
101a34c753fSRafael Auler errs() << "\n";
102a34c753fSRafael Auler }
103a34c753fSRafael Auler }
104a34c753fSRafael Auler
reset()105a34c753fSRafael Auler void ClusterAlgorithm::reset() {
106a34c753fSRafael Auler Clusters.clear();
107a34c753fSRafael Auler ClusterEdges.clear();
108a34c753fSRafael Auler AvgFreq.clear();
109a34c753fSRafael Auler }
110a34c753fSRafael Auler
print(raw_ostream & OS) const111a34c753fSRafael Auler void GreedyClusterAlgorithm::EdgeTy::print(raw_ostream &OS) const {
112a34c753fSRafael Auler OS << Src->getName() << " -> " << Dst->getName() << ", count: " << Count;
113a34c753fSRafael Auler }
114a34c753fSRafael Auler
operator ()(const EdgeTy & E) const115a34c753fSRafael Auler size_t GreedyClusterAlgorithm::EdgeHash::operator()(const EdgeTy &E) const {
116a34c753fSRafael Auler HashPair<const BinaryBasicBlock *, const BinaryBasicBlock *> Hasher;
117a34c753fSRafael Auler return Hasher(std::make_pair(E.Src, E.Dst));
118a34c753fSRafael Auler }
119a34c753fSRafael Auler
operator ()(const EdgeTy & A,const EdgeTy & B) const12040c2e0faSMaksim Panchenko bool GreedyClusterAlgorithm::EdgeEqual::operator()(const EdgeTy &A,
12140c2e0faSMaksim Panchenko const EdgeTy &B) const {
122a34c753fSRafael Auler return A.Src == B.Src && A.Dst == B.Dst;
123a34c753fSRafael Auler }
124a34c753fSRafael Auler
clusterBasicBlocks(const BinaryFunction & BF,bool ComputeEdges)125a34c753fSRafael Auler void GreedyClusterAlgorithm::clusterBasicBlocks(const BinaryFunction &BF,
126a34c753fSRafael Auler bool ComputeEdges) {
127a34c753fSRafael Auler reset();
128a34c753fSRafael Auler
129a34c753fSRafael Auler // Greedy heuristic implementation for the TSP, applied to BB layout. Try to
130a34c753fSRafael Auler // maximize weight during a path traversing all BBs. In this way, we will
131a34c753fSRafael Auler // convert the hottest branches into fall-throughs.
132a34c753fSRafael Auler
133a34c753fSRafael Auler // This is the queue of edges from which we will pop edges and use them to
134a34c753fSRafael Auler // cluster basic blocks in a greedy fashion.
135a34c753fSRafael Auler std::vector<EdgeTy> Queue;
136a34c753fSRafael Auler
137a34c753fSRafael Auler // Initialize inter-cluster weights.
138a34c753fSRafael Auler if (ComputeEdges)
139*8477bc67SFabian Parzefall ClusterEdges.resize(BF.getLayout().block_size());
140a34c753fSRafael Auler
141a34c753fSRafael Auler // Initialize clusters and edge queue.
142*8477bc67SFabian Parzefall for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
143a34c753fSRafael Auler // Create a cluster for this BB.
144a34c753fSRafael Auler uint32_t I = Clusters.size();
145a34c753fSRafael Auler Clusters.emplace_back();
146a34c753fSRafael Auler std::vector<BinaryBasicBlock *> &Cluster = Clusters.back();
147a34c753fSRafael Auler Cluster.push_back(BB);
148a34c753fSRafael Auler BBToClusterMap[BB] = I;
149a34c753fSRafael Auler // Populate priority queue with edges.
150a34c753fSRafael Auler auto BI = BB->branch_info_begin();
151a34c753fSRafael Auler for (BinaryBasicBlock *&I : BB->successors()) {
152a34c753fSRafael Auler assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
153a34c753fSRafael Auler "attempted reordering blocks of function with no profile data");
154a34c753fSRafael Auler Queue.emplace_back(EdgeTy(BB, I, BI->Count));
155a34c753fSRafael Auler ++BI;
156a34c753fSRafael Auler }
157a34c753fSRafael Auler }
158a34c753fSRafael Auler // Sort and adjust the edge queue.
159a34c753fSRafael Auler initQueue(Queue, BF);
160a34c753fSRafael Auler
161a34c753fSRafael Auler // Grow clusters in a greedy fashion.
162a34c753fSRafael Auler while (!Queue.empty()) {
163a34c753fSRafael Auler EdgeTy E = Queue.back();
164a34c753fSRafael Auler Queue.pop_back();
165a34c753fSRafael Auler
166a34c753fSRafael Auler const BinaryBasicBlock *SrcBB = E.Src;
167a34c753fSRafael Auler const BinaryBasicBlock *DstBB = E.Dst;
168a34c753fSRafael Auler
169a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "Popped edge "; E.print(dbgs()); dbgs() << "\n");
170a34c753fSRafael Auler
171a34c753fSRafael Auler // Case 1: BBSrc and BBDst are the same. Ignore this edge
172*8477bc67SFabian Parzefall if (SrcBB == DstBB || DstBB == *BF.getLayout().block_begin()) {
173a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "\tIgnored (same src, dst)\n");
174a34c753fSRafael Auler continue;
175a34c753fSRafael Auler }
176a34c753fSRafael Auler
177a34c753fSRafael Auler int I = BBToClusterMap[SrcBB];
178a34c753fSRafael Auler int J = BBToClusterMap[DstBB];
179a34c753fSRafael Auler
180a34c753fSRafael Auler // Case 2: If they are already allocated at the same cluster, just increase
181a34c753fSRafael Auler // the weight of this cluster
182a34c753fSRafael Auler if (I == J) {
183a34c753fSRafael Auler if (ComputeEdges)
184a34c753fSRafael Auler ClusterEdges[I][I] += E.Count;
185a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "\tIgnored (src, dst belong to the same cluster)\n");
186a34c753fSRafael Auler continue;
187a34c753fSRafael Auler }
188a34c753fSRafael Auler
189a34c753fSRafael Auler std::vector<BinaryBasicBlock *> &ClusterA = Clusters[I];
190a34c753fSRafael Auler std::vector<BinaryBasicBlock *> &ClusterB = Clusters[J];
191a34c753fSRafael Auler if (areClustersCompatible(ClusterA, ClusterB, E)) {
192a34c753fSRafael Auler // Case 3: SrcBB is at the end of a cluster and DstBB is at the start,
193a34c753fSRafael Auler // allowing us to merge two clusters.
194a34c753fSRafael Auler for (BinaryBasicBlock *BB : ClusterB)
195a34c753fSRafael Auler BBToClusterMap[BB] = I;
196a34c753fSRafael Auler ClusterA.insert(ClusterA.end(), ClusterB.begin(), ClusterB.end());
197a34c753fSRafael Auler ClusterB.clear();
198a34c753fSRafael Auler if (ComputeEdges) {
199a34c753fSRafael Auler // Increase the intra-cluster edge count of cluster A with the count of
200a34c753fSRafael Auler // this edge as well as with the total count of previously visited edges
201a34c753fSRafael Auler // from cluster B cluster A.
202a34c753fSRafael Auler ClusterEdges[I][I] += E.Count;
203a34c753fSRafael Auler ClusterEdges[I][I] += ClusterEdges[J][I];
204a34c753fSRafael Auler // Iterate through all inter-cluster edges and transfer edges targeting
205a34c753fSRafael Auler // cluster B to cluster A.
206a34c753fSRafael Auler for (uint32_t K = 0, E = ClusterEdges.size(); K != E; ++K)
207a34c753fSRafael Auler ClusterEdges[K][I] += ClusterEdges[K][J];
208a34c753fSRafael Auler }
209a34c753fSRafael Auler // Adjust the weights of the remaining edges and re-sort the queue.
210a34c753fSRafael Auler adjustQueue(Queue, BF);
211a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "\tMerged clusters of src, dst\n");
212a34c753fSRafael Auler } else {
213a34c753fSRafael Auler // Case 4: Both SrcBB and DstBB are allocated in positions we cannot
214a34c753fSRafael Auler // merge them. Add the count of this edge to the inter-cluster edge count
215a34c753fSRafael Auler // between clusters A and B to help us decide ordering between these
216a34c753fSRafael Auler // clusters.
217a34c753fSRafael Auler if (ComputeEdges)
218a34c753fSRafael Auler ClusterEdges[I][J] += E.Count;
219a34c753fSRafael Auler LLVM_DEBUG(
220a34c753fSRafael Auler dbgs() << "\tIgnored (src, dst belong to incompatible clusters)\n");
221a34c753fSRafael Auler }
222a34c753fSRafael Auler }
223a34c753fSRafael Auler }
224a34c753fSRafael Auler
reset()225a34c753fSRafael Auler void GreedyClusterAlgorithm::reset() {
226a34c753fSRafael Auler ClusterAlgorithm::reset();
227a34c753fSRafael Auler BBToClusterMap.clear();
228a34c753fSRafael Auler }
229a34c753fSRafael Auler
initQueue(std::vector<EdgeTy> & Queue,const BinaryFunction & BF)23040c2e0faSMaksim Panchenko void PHGreedyClusterAlgorithm::initQueue(std::vector<EdgeTy> &Queue,
23140c2e0faSMaksim Panchenko const BinaryFunction &BF) {
232a34c753fSRafael Auler // Define a comparison function to establish SWO between edges.
233a34c753fSRafael Auler auto Comp = [&BF](const EdgeTy &A, const EdgeTy &B) {
234a34c753fSRafael Auler // With equal weights, prioritize branches with lower index
235a34c753fSRafael Auler // source/destination. This helps to keep original block order for blocks
236a34c753fSRafael Auler // when optimal order cannot be deducted from a profile.
237a34c753fSRafael Auler if (A.Count == B.Count) {
238a34c753fSRafael Auler const signed SrcOrder = BF.getOriginalLayoutRelativeOrder(A.Src, B.Src);
239a34c753fSRafael Auler return (SrcOrder != 0)
240a34c753fSRafael Auler ? SrcOrder > 0
241a34c753fSRafael Auler : BF.getOriginalLayoutRelativeOrder(A.Dst, B.Dst) > 0;
242a34c753fSRafael Auler }
243a34c753fSRafael Auler return A.Count < B.Count;
244a34c753fSRafael Auler };
245a34c753fSRafael Auler
246a34c753fSRafael Auler // Sort edges in increasing profile count order.
247d2c87699SAmir Ayupov llvm::sort(Queue, Comp);
248a34c753fSRafael Auler }
249a34c753fSRafael Auler
adjustQueue(std::vector<EdgeTy> & Queue,const BinaryFunction & BF)25040c2e0faSMaksim Panchenko void PHGreedyClusterAlgorithm::adjustQueue(std::vector<EdgeTy> &Queue,
25140c2e0faSMaksim Panchenko const BinaryFunction &BF) {
252a34c753fSRafael Auler // Nothing to do.
253a34c753fSRafael Auler return;
254a34c753fSRafael Auler }
255a34c753fSRafael Auler
areClustersCompatible(const ClusterTy & Front,const ClusterTy & Back,const EdgeTy & E) const25640c2e0faSMaksim Panchenko bool PHGreedyClusterAlgorithm::areClustersCompatible(const ClusterTy &Front,
25740c2e0faSMaksim Panchenko const ClusterTy &Back,
25840c2e0faSMaksim Panchenko const EdgeTy &E) const {
259a34c753fSRafael Auler return Front.back() == E.Src && Back.front() == E.Dst;
260a34c753fSRafael Auler }
261a34c753fSRafael Auler
calculateWeight(const EdgeTy & E,const BinaryFunction & BF) const262a34c753fSRafael Auler int64_t MinBranchGreedyClusterAlgorithm::calculateWeight(
263a34c753fSRafael Auler const EdgeTy &E, const BinaryFunction &BF) const {
264a34c753fSRafael Auler const BinaryBasicBlock *SrcBB = E.Src;
265a34c753fSRafael Auler const BinaryBasicBlock *DstBB = E.Dst;
266a34c753fSRafael Auler
267a34c753fSRafael Auler // Initial weight value.
268a34c753fSRafael Auler int64_t W = (int64_t)E.Count;
269a34c753fSRafael Auler
270a34c753fSRafael Auler // Adjust the weight by taking into account other edges with the same source.
271a34c753fSRafael Auler auto BI = SrcBB->branch_info_begin();
272a34c753fSRafael Auler for (const BinaryBasicBlock *SuccBB : SrcBB->successors()) {
273a34c753fSRafael Auler assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
274a34c753fSRafael Auler "attempted reordering blocks of function with no profile data");
275a34c753fSRafael Auler assert(BI->Count <= std::numeric_limits<int64_t>::max() &&
276a34c753fSRafael Auler "overflow detected");
277a34c753fSRafael Auler // Ignore edges with same source and destination, edges that target the
278a34c753fSRafael Auler // entry block as well as the edge E itself.
279*8477bc67SFabian Parzefall if (SuccBB != SrcBB && SuccBB != *BF.getLayout().block_begin() &&
280*8477bc67SFabian Parzefall SuccBB != DstBB)
281a34c753fSRafael Auler W -= (int64_t)BI->Count;
282a34c753fSRafael Auler ++BI;
283a34c753fSRafael Auler }
284a34c753fSRafael Auler
285a34c753fSRafael Auler // Adjust the weight by taking into account other edges with the same
286a34c753fSRafael Auler // destination.
287a34c753fSRafael Auler for (const BinaryBasicBlock *PredBB : DstBB->predecessors()) {
288a34c753fSRafael Auler // Ignore edges with same source and destination as well as the edge E
289a34c753fSRafael Auler // itself.
290a34c753fSRafael Auler if (PredBB == DstBB || PredBB == SrcBB)
291a34c753fSRafael Auler continue;
292a34c753fSRafael Auler auto BI = PredBB->branch_info_begin();
293a34c753fSRafael Auler for (const BinaryBasicBlock *SuccBB : PredBB->successors()) {
294a34c753fSRafael Auler if (SuccBB == DstBB)
295a34c753fSRafael Auler break;
296a34c753fSRafael Auler ++BI;
297a34c753fSRafael Auler }
298a34c753fSRafael Auler assert(BI != PredBB->branch_info_end() && "invalid control flow graph");
299a34c753fSRafael Auler assert(BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE &&
300a34c753fSRafael Auler "attempted reordering blocks of function with no profile data");
301a34c753fSRafael Auler assert(BI->Count <= std::numeric_limits<int64_t>::max() &&
302a34c753fSRafael Auler "overflow detected");
303a34c753fSRafael Auler W -= (int64_t)BI->Count;
304a34c753fSRafael Auler }
305a34c753fSRafael Auler
306a34c753fSRafael Auler return W;
307a34c753fSRafael Auler }
308a34c753fSRafael Auler
initQueue(std::vector<EdgeTy> & Queue,const BinaryFunction & BF)30940c2e0faSMaksim Panchenko void MinBranchGreedyClusterAlgorithm::initQueue(std::vector<EdgeTy> &Queue,
31040c2e0faSMaksim Panchenko const BinaryFunction &BF) {
311a34c753fSRafael Auler // Initialize edge weights.
312a34c753fSRafael Auler for (const EdgeTy &E : Queue)
313a34c753fSRafael Auler Weight.emplace(std::make_pair(E, calculateWeight(E, BF)));
314a34c753fSRafael Auler
315a34c753fSRafael Auler // Sort edges in increasing weight order.
316a34c753fSRafael Auler adjustQueue(Queue, BF);
317a34c753fSRafael Auler }
318a34c753fSRafael Auler
adjustQueue(std::vector<EdgeTy> & Queue,const BinaryFunction & BF)31940c2e0faSMaksim Panchenko void MinBranchGreedyClusterAlgorithm::adjustQueue(std::vector<EdgeTy> &Queue,
32040c2e0faSMaksim Panchenko const BinaryFunction &BF) {
321a34c753fSRafael Auler // Define a comparison function to establish SWO between edges.
322a34c753fSRafael Auler auto Comp = [&](const EdgeTy &A, const EdgeTy &B) {
323a34c753fSRafael Auler // With equal weights, prioritize branches with lower index
324a34c753fSRafael Auler // source/destination. This helps to keep original block order for blocks
325a34c753fSRafael Auler // when optimal order cannot be deduced from a profile.
326a34c753fSRafael Auler if (Weight[A] == Weight[B]) {
327a34c753fSRafael Auler const signed SrcOrder = BF.getOriginalLayoutRelativeOrder(A.Src, B.Src);
328a34c753fSRafael Auler return (SrcOrder != 0)
329a34c753fSRafael Auler ? SrcOrder > 0
330a34c753fSRafael Auler : BF.getOriginalLayoutRelativeOrder(A.Dst, B.Dst) > 0;
331a34c753fSRafael Auler }
332a34c753fSRafael Auler return Weight[A] < Weight[B];
333a34c753fSRafael Auler };
334a34c753fSRafael Auler
335a34c753fSRafael Auler // Iterate through all remaining edges to find edges that have their
336a34c753fSRafael Auler // source and destination in the same cluster.
337a34c753fSRafael Auler std::vector<EdgeTy> NewQueue;
338a34c753fSRafael Auler for (const EdgeTy &E : Queue) {
339a34c753fSRafael Auler const BinaryBasicBlock *SrcBB = E.Src;
340a34c753fSRafael Auler const BinaryBasicBlock *DstBB = E.Dst;
341a34c753fSRafael Auler
342a34c753fSRafael Auler // Case 1: SrcBB and DstBB are the same or DstBB is the entry block. Ignore
343a34c753fSRafael Auler // this edge.
344*8477bc67SFabian Parzefall if (SrcBB == DstBB || DstBB == *BF.getLayout().block_begin()) {
345a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "\tAdjustment: Ignored edge "; E.print(dbgs());
346a34c753fSRafael Auler dbgs() << " (same src, dst)\n");
347a34c753fSRafael Auler continue;
348a34c753fSRafael Auler }
349a34c753fSRafael Auler
350a34c753fSRafael Auler int I = BBToClusterMap[SrcBB];
351a34c753fSRafael Auler int J = BBToClusterMap[DstBB];
352a34c753fSRafael Auler std::vector<BinaryBasicBlock *> &ClusterA = Clusters[I];
353a34c753fSRafael Auler std::vector<BinaryBasicBlock *> &ClusterB = Clusters[J];
354a34c753fSRafael Auler
355a34c753fSRafael Auler // Case 2: They are already allocated at the same cluster or incompatible
356a34c753fSRafael Auler // clusters. Adjust the weights of edges with the same source or
357a34c753fSRafael Auler // destination, so that this edge has no effect on them any more, and ignore
358a34c753fSRafael Auler // this edge. Also increase the intra- (or inter-) cluster edge count.
359a34c753fSRafael Auler if (I == J || !areClustersCompatible(ClusterA, ClusterB, E)) {
360a34c753fSRafael Auler if (!ClusterEdges.empty())
361a34c753fSRafael Auler ClusterEdges[I][J] += E.Count;
362a34c753fSRafael Auler LLVM_DEBUG(dbgs() << "\tAdjustment: Ignored edge "; E.print(dbgs());
363a34c753fSRafael Auler dbgs() << " (src, dst belong to same cluster or incompatible "
364a34c753fSRafael Auler "clusters)\n");
365a34c753fSRafael Auler for (const BinaryBasicBlock *SuccBB : SrcBB->successors()) {
366a34c753fSRafael Auler if (SuccBB == DstBB)
367a34c753fSRafael Auler continue;
368a34c753fSRafael Auler auto WI = Weight.find(EdgeTy(SrcBB, SuccBB, 0));
369a34c753fSRafael Auler assert(WI != Weight.end() && "CFG edge not found in Weight map");
370a34c753fSRafael Auler WI->second += (int64_t)E.Count;
371a34c753fSRafael Auler }
372a34c753fSRafael Auler for (const BinaryBasicBlock *PredBB : DstBB->predecessors()) {
373a34c753fSRafael Auler if (PredBB == SrcBB)
374a34c753fSRafael Auler continue;
375a34c753fSRafael Auler auto WI = Weight.find(EdgeTy(PredBB, DstBB, 0));
376a34c753fSRafael Auler assert(WI != Weight.end() && "CFG edge not found in Weight map");
377a34c753fSRafael Auler WI->second += (int64_t)E.Count;
378a34c753fSRafael Auler }
379a34c753fSRafael Auler continue;
380a34c753fSRafael Auler }
381a34c753fSRafael Auler
382a34c753fSRafael Auler // Case 3: None of the previous cases is true, so just keep this edge in
383a34c753fSRafael Auler // the queue.
384a34c753fSRafael Auler NewQueue.emplace_back(E);
385a34c753fSRafael Auler }
386a34c753fSRafael Auler
387a34c753fSRafael Auler // Sort remaining edges in increasing weight order.
388a34c753fSRafael Auler Queue.swap(NewQueue);
389d2c87699SAmir Ayupov llvm::sort(Queue, Comp);
390a34c753fSRafael Auler }
391a34c753fSRafael Auler
areClustersCompatible(const ClusterTy & Front,const ClusterTy & Back,const EdgeTy & E) const392a34c753fSRafael Auler bool MinBranchGreedyClusterAlgorithm::areClustersCompatible(
393a34c753fSRafael Auler const ClusterTy &Front, const ClusterTy &Back, const EdgeTy &E) const {
394a34c753fSRafael Auler return Front.back() == E.Src && Back.front() == E.Dst;
395a34c753fSRafael Auler }
396a34c753fSRafael Auler
reset()397a34c753fSRafael Auler void MinBranchGreedyClusterAlgorithm::reset() {
398a34c753fSRafael Auler GreedyClusterAlgorithm::reset();
399a34c753fSRafael Auler Weight.clear();
400a34c753fSRafael Auler }
401a34c753fSRafael Auler
reorderBasicBlocks(const BinaryFunction & BF,BasicBlockOrder & Order) const40240c2e0faSMaksim Panchenko void TSPReorderAlgorithm::reorderBasicBlocks(const BinaryFunction &BF,
40340c2e0faSMaksim Panchenko BasicBlockOrder &Order) const {
404a34c753fSRafael Auler std::vector<std::vector<uint64_t>> Weight;
405a34c753fSRafael Auler std::vector<BinaryBasicBlock *> IndexToBB;
406a34c753fSRafael Auler
407*8477bc67SFabian Parzefall const size_t N = BF.getLayout().block_size();
408a34c753fSRafael Auler assert(N <= std::numeric_limits<uint64_t>::digits &&
409a34c753fSRafael Auler "cannot use TSP solution for sizes larger than bits in uint64_t");
410a34c753fSRafael Auler
411a34c753fSRafael Auler // Populating weight map and index map
412*8477bc67SFabian Parzefall for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
413a34c753fSRafael Auler BB->setLayoutIndex(IndexToBB.size());
414a34c753fSRafael Auler IndexToBB.push_back(BB);
415a34c753fSRafael Auler }
416a34c753fSRafael Auler Weight.resize(N);
417*8477bc67SFabian Parzefall for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
418a34c753fSRafael Auler auto BI = BB->branch_info_begin();
419a34c753fSRafael Auler Weight[BB->getLayoutIndex()].resize(N);
420a34c753fSRafael Auler for (BinaryBasicBlock *SuccBB : BB->successors()) {
421a34c753fSRafael Auler if (BI->Count != BinaryBasicBlock::COUNT_NO_PROFILE)
422a34c753fSRafael Auler Weight[BB->getLayoutIndex()][SuccBB->getLayoutIndex()] = BI->Count;
423a34c753fSRafael Auler ++BI;
424a34c753fSRafael Auler }
425a34c753fSRafael Auler }
426a34c753fSRafael Auler
427a34c753fSRafael Auler std::vector<std::vector<int64_t>> DP;
428a34c753fSRafael Auler DP.resize(1 << N);
429f92ab6afSAmir Ayupov for (std::vector<int64_t> &Elmt : DP)
430a34c753fSRafael Auler Elmt.resize(N, -1);
431f92ab6afSAmir Ayupov
432a34c753fSRafael Auler // Start with the entry basic block being allocated with cost zero
433a34c753fSRafael Auler DP[1][0] = 0;
434a34c753fSRafael Auler // Walk through TSP solutions using a bitmask to represent state (current set
435a34c753fSRafael Auler // of BBs in the layout)
436a34c753fSRafael Auler uint64_t BestSet = 1;
437a34c753fSRafael Auler uint64_t BestLast = 0;
438a34c753fSRafael Auler int64_t BestWeight = 0;
439a34c753fSRafael Auler for (uint64_t Set = 1; Set < (1ULL << N); ++Set) {
440a34c753fSRafael Auler // Traverse each possibility of Last BB visited in this layout
441a34c753fSRafael Auler for (uint64_t Last = 0; Last < N; ++Last) {
442a34c753fSRafael Auler // Case 1: There is no possible layout with this BB as Last
443a34c753fSRafael Auler if (DP[Set][Last] == -1)
444a34c753fSRafael Auler continue;
445a34c753fSRafael Auler
446a34c753fSRafael Auler // Case 2: There is a layout with this Set and this Last, and we try
447a34c753fSRafael Auler // to expand this set with New
448a34c753fSRafael Auler for (uint64_t New = 1; New < N; ++New) {
449a34c753fSRafael Auler // Case 2a: BB "New" is already in this Set
450a34c753fSRafael Auler if ((Set & (1ULL << New)) != 0)
451a34c753fSRafael Auler continue;
452a34c753fSRafael Auler
453a34c753fSRafael Auler // Case 2b: BB "New" is not in this set and we add it to this Set and
454a34c753fSRafael Auler // record total weight of this layout with "New" as the last BB.
455a34c753fSRafael Auler uint64_t NewSet = (Set | (1ULL << New));
456a34c753fSRafael Auler if (DP[NewSet][New] == -1)
457a34c753fSRafael Auler DP[NewSet][New] = DP[Set][Last] + (int64_t)Weight[Last][New];
458a34c753fSRafael Auler DP[NewSet][New] = std::max(DP[NewSet][New],
459a34c753fSRafael Auler DP[Set][Last] + (int64_t)Weight[Last][New]);
460a34c753fSRafael Auler
461a34c753fSRafael Auler if (DP[NewSet][New] > BestWeight) {
462a34c753fSRafael Auler BestWeight = DP[NewSet][New];
463a34c753fSRafael Auler BestSet = NewSet;
464a34c753fSRafael Auler BestLast = New;
465a34c753fSRafael Auler }
466a34c753fSRafael Auler }
467a34c753fSRafael Auler }
468a34c753fSRafael Auler }
469a34c753fSRafael Auler
470a34c753fSRafael Auler // Define final function layout based on layout that maximizes weight
471a34c753fSRafael Auler uint64_t Last = BestLast;
472a34c753fSRafael Auler uint64_t Set = BestSet;
47318bc405aSAmir Ayupov BitVector Visited;
474a34c753fSRafael Auler Visited.resize(N);
475a34c753fSRafael Auler Visited[Last] = true;
476a34c753fSRafael Auler Order.push_back(IndexToBB[Last]);
477a34c753fSRafael Auler Set = Set & ~(1ULL << Last);
478a34c753fSRafael Auler while (Set != 0) {
479a34c753fSRafael Auler int64_t Best = -1;
480a34c753fSRafael Auler uint64_t NewLast;
481a34c753fSRafael Auler for (uint64_t I = 0; I < N; ++I) {
482a34c753fSRafael Auler if (DP[Set][I] == -1)
483a34c753fSRafael Auler continue;
484a34c753fSRafael Auler int64_t AdjWeight = Weight[I][Last] > 0 ? Weight[I][Last] : 0;
485a34c753fSRafael Auler if (DP[Set][I] + AdjWeight > Best) {
486a34c753fSRafael Auler NewLast = I;
487a34c753fSRafael Auler Best = DP[Set][I] + AdjWeight;
488a34c753fSRafael Auler }
489a34c753fSRafael Auler }
490a34c753fSRafael Auler Last = NewLast;
491a34c753fSRafael Auler Visited[Last] = true;
492a34c753fSRafael Auler Order.push_back(IndexToBB[Last]);
493a34c753fSRafael Auler Set = Set & ~(1ULL << Last);
494a34c753fSRafael Auler }
495a34c753fSRafael Auler std::reverse(Order.begin(), Order.end());
496a34c753fSRafael Auler
497a34c753fSRafael Auler // Finalize layout with BBs that weren't assigned to the layout using the
498a34c753fSRafael Auler // input layout.
499*8477bc67SFabian Parzefall for (BinaryBasicBlock *BB : BF.getLayout().blocks())
500a34c753fSRafael Auler if (Visited[BB->getLayoutIndex()] == false)
501a34c753fSRafael Auler Order.push_back(BB);
502a34c753fSRafael Auler }
503a34c753fSRafael Auler
reorderBasicBlocks(const BinaryFunction & BF,BasicBlockOrder & Order) const504a34c753fSRafael Auler void OptimizeReorderAlgorithm::reorderBasicBlocks(
505a34c753fSRafael Auler const BinaryFunction &BF, BasicBlockOrder &Order) const {
506*8477bc67SFabian Parzefall if (BF.getLayout().block_empty())
507a34c753fSRafael Auler return;
508a34c753fSRafael Auler
509a34c753fSRafael Auler // Cluster basic blocks.
510a34c753fSRafael Auler CAlgo->clusterBasicBlocks(BF);
511a34c753fSRafael Auler
512a34c753fSRafael Auler if (opts::PrintClusters)
513a34c753fSRafael Auler CAlgo->printClusters();
514a34c753fSRafael Auler
515a34c753fSRafael Auler // Arrange basic blocks according to clusters.
516a34c753fSRafael Auler for (ClusterAlgorithm::ClusterTy &Cluster : CAlgo->Clusters)
517a34c753fSRafael Auler Order.insert(Order.end(), Cluster.begin(), Cluster.end());
518a34c753fSRafael Auler }
519a34c753fSRafael Auler
reorderBasicBlocks(const BinaryFunction & BF,BasicBlockOrder & Order) const520a34c753fSRafael Auler void OptimizeBranchReorderAlgorithm::reorderBasicBlocks(
521a34c753fSRafael Auler const BinaryFunction &BF, BasicBlockOrder &Order) const {
522*8477bc67SFabian Parzefall if (BF.getLayout().block_empty())
523a34c753fSRafael Auler return;
524a34c753fSRafael Auler
525a34c753fSRafael Auler // Cluster basic blocks.
526a34c753fSRafael Auler CAlgo->clusterBasicBlocks(BF, /* ComputeEdges = */ true);
527a34c753fSRafael Auler std::vector<ClusterAlgorithm::ClusterTy> &Clusters = CAlgo->Clusters;
528a34c753fSRafael Auler std::vector<std::unordered_map<uint32_t, uint64_t>> &ClusterEdges =
529a34c753fSRafael Auler CAlgo->ClusterEdges;
530a34c753fSRafael Auler
531a34c753fSRafael Auler // Compute clusters' average frequencies.
532a34c753fSRafael Auler CAlgo->computeClusterAverageFrequency(BF.getBinaryContext());
533a34c753fSRafael Auler std::vector<double> &AvgFreq = CAlgo->AvgFreq;
534a34c753fSRafael Auler
535a34c753fSRafael Auler if (opts::PrintClusters)
536a34c753fSRafael Auler CAlgo->printClusters();
537a34c753fSRafael Auler
538a34c753fSRafael Auler // Cluster layout order
539a34c753fSRafael Auler std::vector<uint32_t> ClusterOrder;
540a34c753fSRafael Auler
541a34c753fSRafael Auler // Do a topological sort for clusters, prioritizing frequently-executed BBs
542a34c753fSRafael Auler // during the traversal.
543a34c753fSRafael Auler std::stack<uint32_t> Stack;
544a34c753fSRafael Auler std::vector<uint32_t> Status;
545a34c753fSRafael Auler std::vector<uint32_t> Parent;
546a34c753fSRafael Auler Status.resize(Clusters.size(), 0);
547a34c753fSRafael Auler Parent.resize(Clusters.size(), 0);
548a34c753fSRafael Auler constexpr uint32_t STACKED = 1;
549a34c753fSRafael Auler constexpr uint32_t VISITED = 2;
550a34c753fSRafael Auler Status[0] = STACKED;
551a34c753fSRafael Auler Stack.push(0);
552a34c753fSRafael Auler while (!Stack.empty()) {
553a34c753fSRafael Auler uint32_t I = Stack.top();
554a34c753fSRafael Auler if (!(Status[I] & VISITED)) {
555a34c753fSRafael Auler Status[I] |= VISITED;
556a34c753fSRafael Auler // Order successors by weight
557a34c753fSRafael Auler auto ClusterComp = [&ClusterEdges, I](uint32_t A, uint32_t B) {
558a34c753fSRafael Auler return ClusterEdges[I][A] > ClusterEdges[I][B];
559a34c753fSRafael Auler };
560a34c753fSRafael Auler std::priority_queue<uint32_t, std::vector<uint32_t>,
56140c2e0faSMaksim Panchenko decltype(ClusterComp)>
56240c2e0faSMaksim Panchenko SuccQueue(ClusterComp);
563a34c753fSRafael Auler for (std::pair<const uint32_t, uint64_t> &Target : ClusterEdges[I]) {
564a34c753fSRafael Auler if (Target.second > 0 && !(Status[Target.first] & STACKED) &&
565a34c753fSRafael Auler !Clusters[Target.first].empty()) {
566a34c753fSRafael Auler Parent[Target.first] = I;
567a34c753fSRafael Auler Status[Target.first] = STACKED;
568a34c753fSRafael Auler SuccQueue.push(Target.first);
569a34c753fSRafael Auler }
570a34c753fSRafael Auler }
571a34c753fSRafael Auler while (!SuccQueue.empty()) {
572a34c753fSRafael Auler Stack.push(SuccQueue.top());
573a34c753fSRafael Auler SuccQueue.pop();
574a34c753fSRafael Auler }
575a34c753fSRafael Auler continue;
576a34c753fSRafael Auler }
577a34c753fSRafael Auler // Already visited this node
578a34c753fSRafael Auler Stack.pop();
579a34c753fSRafael Auler ClusterOrder.push_back(I);
580a34c753fSRafael Auler }
581a34c753fSRafael Auler std::reverse(ClusterOrder.begin(), ClusterOrder.end());
582a34c753fSRafael Auler // Put unreachable clusters at the end
583a34c753fSRafael Auler for (uint32_t I = 0, E = Clusters.size(); I < E; ++I)
584a34c753fSRafael Auler if (!(Status[I] & VISITED) && !Clusters[I].empty())
585a34c753fSRafael Auler ClusterOrder.push_back(I);
586a34c753fSRafael Auler
587a34c753fSRafael Auler // Sort nodes with equal precedence
588a34c753fSRafael Auler auto Beg = ClusterOrder.begin();
589a34c753fSRafael Auler // Don't reorder the first cluster, which contains the function entry point
590a34c753fSRafael Auler ++Beg;
591a34c753fSRafael Auler std::stable_sort(Beg, ClusterOrder.end(),
592a34c753fSRafael Auler [&AvgFreq, &Parent](uint32_t A, uint32_t B) {
593a34c753fSRafael Auler uint32_t P = Parent[A];
594a34c753fSRafael Auler while (Parent[P] != 0) {
595a34c753fSRafael Auler if (Parent[P] == B)
596a34c753fSRafael Auler return false;
597a34c753fSRafael Auler P = Parent[P];
598a34c753fSRafael Auler }
599a34c753fSRafael Auler P = Parent[B];
600a34c753fSRafael Auler while (Parent[P] != 0) {
601a34c753fSRafael Auler if (Parent[P] == A)
602a34c753fSRafael Auler return true;
603a34c753fSRafael Auler P = Parent[P];
604a34c753fSRafael Auler }
605a34c753fSRafael Auler return AvgFreq[A] > AvgFreq[B];
606a34c753fSRafael Auler });
607a34c753fSRafael Auler
608a34c753fSRafael Auler if (opts::PrintClusters) {
609a34c753fSRafael Auler errs() << "New cluster order: ";
610a34c753fSRafael Auler const char *Sep = "";
611a34c753fSRafael Auler for (uint32_t O : ClusterOrder) {
612a34c753fSRafael Auler errs() << Sep << O;
613a34c753fSRafael Auler Sep = ", ";
614a34c753fSRafael Auler }
615a34c753fSRafael Auler errs() << '\n';
616a34c753fSRafael Auler }
617a34c753fSRafael Auler
618a34c753fSRafael Auler // Arrange basic blocks according to cluster order.
619a34c753fSRafael Auler for (uint32_t ClusterIndex : ClusterOrder) {
620a34c753fSRafael Auler ClusterAlgorithm::ClusterTy &Cluster = Clusters[ClusterIndex];
621a34c753fSRafael Auler Order.insert(Order.end(), Cluster.begin(), Cluster.end());
622a34c753fSRafael Auler }
623a34c753fSRafael Auler }
624a34c753fSRafael Auler
reorderBasicBlocks(const BinaryFunction & BF,BasicBlockOrder & Order) const625a34c753fSRafael Auler void OptimizeCacheReorderAlgorithm::reorderBasicBlocks(
626a34c753fSRafael Auler const BinaryFunction &BF, BasicBlockOrder &Order) const {
627*8477bc67SFabian Parzefall if (BF.getLayout().block_empty())
628a34c753fSRafael Auler return;
629a34c753fSRafael Auler
630a34c753fSRafael Auler const uint64_t ColdThreshold =
631*8477bc67SFabian Parzefall opts::ColdThreshold *
632*8477bc67SFabian Parzefall (*BF.getLayout().block_begin())->getExecutionCount() / 1000;
633a34c753fSRafael Auler
634a34c753fSRafael Auler // Cluster basic blocks.
635a34c753fSRafael Auler CAlgo->clusterBasicBlocks(BF);
636a34c753fSRafael Auler std::vector<ClusterAlgorithm::ClusterTy> &Clusters = CAlgo->Clusters;
637a34c753fSRafael Auler
638a34c753fSRafael Auler // Compute clusters' average frequencies.
639a34c753fSRafael Auler CAlgo->computeClusterAverageFrequency(BF.getBinaryContext());
640a34c753fSRafael Auler std::vector<double> &AvgFreq = CAlgo->AvgFreq;
641a34c753fSRafael Auler
642a34c753fSRafael Auler if (opts::PrintClusters)
643a34c753fSRafael Auler CAlgo->printClusters();
644a34c753fSRafael Auler
645a34c753fSRafael Auler // Cluster layout order
646a34c753fSRafael Auler std::vector<uint32_t> ClusterOrder;
647a34c753fSRafael Auler
648a34c753fSRafael Auler // Order clusters based on average instruction execution frequency
649a34c753fSRafael Auler for (uint32_t I = 0, E = Clusters.size(); I < E; ++I)
650a34c753fSRafael Auler if (!Clusters[I].empty())
651a34c753fSRafael Auler ClusterOrder.push_back(I);
652a34c753fSRafael Auler // Don't reorder the first cluster, which contains the function entry point
65340c2e0faSMaksim Panchenko std::stable_sort(
65440c2e0faSMaksim Panchenko std::next(ClusterOrder.begin()), ClusterOrder.end(),
65540c2e0faSMaksim Panchenko [&AvgFreq](uint32_t A, uint32_t B) { return AvgFreq[A] > AvgFreq[B]; });
656a34c753fSRafael Auler
657a34c753fSRafael Auler if (opts::PrintClusters) {
658a34c753fSRafael Auler errs() << "New cluster order: ";
659a34c753fSRafael Auler const char *Sep = "";
660a34c753fSRafael Auler for (uint32_t O : ClusterOrder) {
661a34c753fSRafael Auler errs() << Sep << O;
662a34c753fSRafael Auler Sep = ", ";
663a34c753fSRafael Auler }
664a34c753fSRafael Auler errs() << '\n';
665a34c753fSRafael Auler }
666a34c753fSRafael Auler
667a34c753fSRafael Auler // Arrange basic blocks according to cluster order.
668a34c753fSRafael Auler for (uint32_t ClusterIndex : ClusterOrder) {
669a34c753fSRafael Auler ClusterAlgorithm::ClusterTy &Cluster = Clusters[ClusterIndex];
670a34c753fSRafael Auler Order.insert(Order.end(), Cluster.begin(), Cluster.end());
671a34c753fSRafael Auler // Force zero execution count on clusters that do not meet the cut off
672a34c753fSRafael Auler // specified by --cold-threshold.
673f92ab6afSAmir Ayupov if (AvgFreq[ClusterIndex] < static_cast<double>(ColdThreshold))
674f92ab6afSAmir Ayupov for (BinaryBasicBlock *BBPtr : Cluster)
675a34c753fSRafael Auler BBPtr->setExecutionCount(0);
676a34c753fSRafael Auler }
677a34c753fSRafael Auler }
678a34c753fSRafael Auler
reorderBasicBlocks(const BinaryFunction & BF,BasicBlockOrder & Order) const67940c2e0faSMaksim Panchenko void ReverseReorderAlgorithm::reorderBasicBlocks(const BinaryFunction &BF,
68040c2e0faSMaksim Panchenko BasicBlockOrder &Order) const {
681*8477bc67SFabian Parzefall if (BF.getLayout().block_empty())
682a34c753fSRafael Auler return;
683a34c753fSRafael Auler
684*8477bc67SFabian Parzefall BinaryBasicBlock *FirstBB = *BF.getLayout().block_begin();
685a34c753fSRafael Auler Order.push_back(FirstBB);
686*8477bc67SFabian Parzefall for (auto RLI = BF.getLayout().block_rbegin(); *RLI != FirstBB; ++RLI)
687a34c753fSRafael Auler Order.push_back(*RLI);
688a34c753fSRafael Auler }
689a34c753fSRafael Auler
reorderBasicBlocks(const BinaryFunction & BF,BasicBlockOrder & Order) const690a34c753fSRafael Auler void RandomClusterReorderAlgorithm::reorderBasicBlocks(
691a34c753fSRafael Auler const BinaryFunction &BF, BasicBlockOrder &Order) const {
692*8477bc67SFabian Parzefall if (BF.getLayout().block_empty())
693a34c753fSRafael Auler return;
694a34c753fSRafael Auler
695a34c753fSRafael Auler // Cluster basic blocks.
696a34c753fSRafael Auler CAlgo->clusterBasicBlocks(BF);
697a34c753fSRafael Auler std::vector<ClusterAlgorithm::ClusterTy> &Clusters = CAlgo->Clusters;
698a34c753fSRafael Auler
699a34c753fSRafael Auler if (opts::PrintClusters)
700a34c753fSRafael Auler CAlgo->printClusters();
701a34c753fSRafael Auler
702a34c753fSRafael Auler // Cluster layout order
703a34c753fSRafael Auler std::vector<uint32_t> ClusterOrder;
704a34c753fSRafael Auler
705a34c753fSRafael Auler // Order clusters based on average instruction execution frequency
706a34c753fSRafael Auler for (uint32_t I = 0, E = Clusters.size(); I < E; ++I)
707a34c753fSRafael Auler if (!Clusters[I].empty())
708a34c753fSRafael Auler ClusterOrder.push_back(I);
709a34c753fSRafael Auler
710a34c753fSRafael Auler std::shuffle(std::next(ClusterOrder.begin()), ClusterOrder.end(),
711a34c753fSRafael Auler std::default_random_engine(opts::RandomSeed.getValue()));
712a34c753fSRafael Auler
713a34c753fSRafael Auler if (opts::PrintClusters) {
714a34c753fSRafael Auler errs() << "New cluster order: ";
715a34c753fSRafael Auler const char *Sep = "";
716a34c753fSRafael Auler for (uint32_t O : ClusterOrder) {
717a34c753fSRafael Auler errs() << Sep << O;
718a34c753fSRafael Auler Sep = ", ";
719a34c753fSRafael Auler }
720a34c753fSRafael Auler errs() << '\n';
721a34c753fSRafael Auler }
722a34c753fSRafael Auler
723a34c753fSRafael Auler // Arrange basic blocks according to cluster order.
724a34c753fSRafael Auler for (uint32_t ClusterIndex : ClusterOrder) {
725a34c753fSRafael Auler ClusterAlgorithm::ClusterTy &Cluster = Clusters[ClusterIndex];
726a34c753fSRafael Auler Order.insert(Order.end(), Cluster.begin(), Cluster.end());
727a34c753fSRafael Auler }
728a34c753fSRafael Auler }
729