1 //===- bolt/Passes/HFSort.cpp - Cluster functions by hotness --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Implementation of HFSort algorithm for function ordering: 10 // https://research.fb.com/wp-content/uploads/2017/01/cgo2017-hfsort-final1.pdf 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "bolt/Passes/HFSort.h" 15 #include "llvm/Support/CommandLine.h" 16 #include "llvm/Support/Debug.h" 17 #include "llvm/Support/Format.h" 18 #include "llvm/Support/raw_ostream.h" 19 #include <unordered_set> 20 21 #define DEBUG_TYPE "hfsort" 22 23 namespace opts { 24 extern llvm::cl::opt<unsigned> Verbosity; 25 } 26 27 namespace llvm { 28 namespace bolt { 29 30 using NodeId = CallGraph::NodeId; 31 using Arc = CallGraph::Arc; 32 using Node = CallGraph::Node; 33 34 namespace { 35 36 // The number of pages to reserve for the functions with highest 37 // density (samples / size). The functions put in these pages are not 38 // considered for clustering. 39 constexpr uint32_t FrozenPages = 0; 40 41 // The minimum approximate probability of a callee being called from a 42 // particular arc to consider merging with the caller's cluster. 43 constexpr double MinArcProbability = 0.1; 44 45 // This is a factor to determine by how much a caller cluster is 46 // willing to degrade it's density by merging a callee. 47 constexpr int CallerDegradeFactor = 8; 48 49 } // namespace 50 51 //////////////////////////////////////////////////////////////////////////////// 52 53 Cluster::Cluster(NodeId Id, const Node &Func) 54 : Samples(Func.samples()), Size(Func.size()), 55 Density((double)Samples / Size) { 56 Targets.push_back(Id); 57 } 58 59 Cluster::Cluster(const std::vector<NodeId> &Nodes, const CallGraph &Cg) { 60 Samples = 0; 61 Size = 0; 62 for (NodeId TargetId : Nodes) { 63 Targets.push_back(TargetId); 64 Samples += Cg.samples(TargetId); 65 Size += Cg.size(TargetId); 66 } 67 Density = (double)Samples / Size; 68 } 69 70 std::string Cluster::toString() const { 71 std::string Str; 72 raw_string_ostream CS(Str); 73 bool PrintComma = false; 74 CS << "funcs = ["; 75 for (const NodeId &Target : Targets) { 76 if (PrintComma) 77 CS << ", "; 78 CS << Target; 79 PrintComma = true; 80 } 81 CS << "]"; 82 return CS.str(); 83 } 84 85 namespace { 86 87 void freezeClusters(const CallGraph &Cg, std::vector<Cluster> &Clusters) { 88 uint32_t TotalSize = 0; 89 std::sort(Clusters.begin(), Clusters.end(), compareClustersDensity); 90 for (Cluster &C : Clusters) { 91 uint32_t NewSize = TotalSize + C.size(); 92 if (NewSize > FrozenPages * HugePageSize) 93 break; 94 C.freeze(); 95 TotalSize = NewSize; 96 LLVM_DEBUG(NodeId Fid = C.target(0); 97 dbgs() << format( 98 "freezing cluster for func %d, size = %u, samples = %lu)\n", 99 Fid, Cg.size(Fid), Cg.samples(Fid));); 100 } 101 } 102 103 } // namespace 104 105 void Cluster::reverseTargets() { std::reverse(Targets.begin(), Targets.end()); } 106 107 void Cluster::merge(const Cluster &Other, const double Aw) { 108 Targets.insert(Targets.end(), Other.Targets.begin(), Other.Targets.end()); 109 Size += Other.Size; 110 Samples += Other.Samples; 111 Density = (double)Samples / Size; 112 } 113 114 void Cluster::merge(const Cluster &Other, 115 const std::vector<CallGraph::NodeId> &Targets_) { 116 Targets = Targets_; 117 Size += Other.Size; 118 Samples += Other.Samples; 119 Density = (double)Samples / Size; 120 } 121 122 void Cluster::clear() { 123 Id = -1u; 124 Size = 0; 125 Samples = 0; 126 Density = 0.0; 127 Targets.clear(); 128 Frozen = false; 129 } 130 131 std::vector<Cluster> clusterize(const CallGraph &Cg) { 132 std::vector<NodeId> SortedFuncs; 133 134 // indexed by NodeId, keeps it's current cluster 135 std::vector<Cluster *> FuncCluster(Cg.numNodes(), nullptr); 136 std::vector<Cluster> Clusters; 137 Clusters.reserve(Cg.numNodes()); 138 139 for (NodeId F = 0; F < Cg.numNodes(); F++) { 140 if (Cg.samples(F) == 0) 141 continue; 142 Clusters.emplace_back(F, Cg.getNode(F)); 143 SortedFuncs.push_back(F); 144 } 145 146 freezeClusters(Cg, Clusters); 147 148 // The size and order of Clusters is fixed until we reshuffle it immediately 149 // before returning. 150 for (Cluster &Cluster : Clusters) 151 FuncCluster[Cluster.targets().front()] = &Cluster; 152 153 std::sort(SortedFuncs.begin(), SortedFuncs.end(), 154 [&](const NodeId F1, const NodeId F2) { 155 const CallGraph::Node &Func1 = Cg.getNode(F1); 156 const CallGraph::Node &Func2 = Cg.getNode(F2); 157 return Func1.samples() * Func2.size() > // TODO: is this correct? 158 Func2.samples() * Func1.size(); 159 }); 160 161 // Process each function, and consider merging its cluster with the 162 // one containing its most likely predecessor. 163 for (const NodeId Fid : SortedFuncs) { 164 Cluster *Cluster = FuncCluster[Fid]; 165 if (Cluster->frozen()) 166 continue; 167 168 // Find best predecessor. 169 NodeId BestPred = CallGraph::InvalidId; 170 double BestProb = 0; 171 172 for (const NodeId Src : Cg.predecessors(Fid)) { 173 const Arc &Arc = *Cg.findArc(Src, Fid); 174 if (BestPred == CallGraph::InvalidId || 175 Arc.normalizedWeight() > BestProb) { 176 BestPred = Arc.src(); 177 BestProb = Arc.normalizedWeight(); 178 } 179 } 180 181 // Check if the merge is good for the callee. 182 // Don't merge if the probability of getting to the callee from the 183 // caller is too low. 184 if (BestProb < MinArcProbability) 185 continue; 186 187 assert(BestPred != CallGraph::InvalidId); 188 189 class Cluster *PredCluster = FuncCluster[BestPred]; 190 191 // Skip if no predCluster (predecessor w/ no samples), or if same 192 // as cluster, of it's frozen. 193 if (PredCluster == nullptr || PredCluster == Cluster || 194 PredCluster->frozen()) 195 continue; 196 197 // Skip if merged cluster would be bigger than the threshold. 198 if (Cluster->size() + PredCluster->size() > MaxClusterSize) 199 continue; 200 201 // Check if the merge is good for the caller. 202 // Don't merge if the caller's density is significantly better 203 // than the density resulting from the merge. 204 const double NewDensity = 205 ((double)PredCluster->samples() + Cluster->samples()) / 206 (PredCluster->size() + Cluster->size()); 207 if (PredCluster->density() > NewDensity * CallerDegradeFactor) { 208 continue; 209 } 210 211 LLVM_DEBUG(if (opts::Verbosity > 1) { 212 dbgs() << format("merging %s -> %s: %u\n", 213 PredCluster->toString().c_str(), 214 Cluster->toString().c_str(), Cg.samples(Fid)); 215 }); 216 217 for (NodeId F : Cluster->targets()) 218 FuncCluster[F] = PredCluster; 219 220 PredCluster->merge(*Cluster); 221 Cluster->clear(); 222 } 223 224 // Return the set of Clusters that are left, which are the ones that 225 // didn't get merged (so their first func is its original func). 226 std::vector<Cluster> SortedClusters; 227 std::unordered_set<Cluster *> Visited; 228 for (const NodeId Func : SortedFuncs) { 229 Cluster *Cluster = FuncCluster[Func]; 230 if (!Cluster || Visited.count(Cluster) == 1 || Cluster->target(0) != Func) 231 continue; 232 233 SortedClusters.emplace_back(std::move(*Cluster)); 234 Visited.insert(Cluster); 235 } 236 237 std::sort(SortedClusters.begin(), SortedClusters.end(), 238 compareClustersDensity); 239 240 return SortedClusters; 241 } 242 243 std::vector<Cluster> randomClusters(const CallGraph &Cg) { 244 std::vector<NodeId> FuncIds(Cg.numNodes(), 0); 245 std::vector<Cluster> Clusters; 246 Clusters.reserve(Cg.numNodes()); 247 248 for (NodeId F = 0; F < Cg.numNodes(); F++) { 249 if (Cg.samples(F) == 0) 250 continue; 251 Clusters.emplace_back(F, Cg.getNode(F)); 252 } 253 254 std::sort( 255 Clusters.begin(), Clusters.end(), 256 [](const Cluster &A, const Cluster &B) { return A.size() < B.size(); }); 257 258 auto pickMergeCluster = [&Clusters](const size_t Idx) { 259 size_t MaxIdx = Idx + 1; 260 261 while (MaxIdx < Clusters.size() && 262 Clusters[Idx].size() + Clusters[MaxIdx].size() <= MaxClusterSize) 263 ++MaxIdx; 264 265 if (MaxIdx - Idx > 1) { 266 size_t MergeIdx = (std::rand() % (MaxIdx - Idx - 1)) + Idx + 1; 267 assert(Clusters[MergeIdx].size() + Clusters[Idx].size() <= 268 MaxClusterSize); 269 return MergeIdx; 270 } 271 return Clusters.size(); 272 }; 273 274 size_t Idx = 0; 275 while (Idx < Clusters.size()) { 276 size_t MergeIdx = pickMergeCluster(Idx); 277 if (MergeIdx == Clusters.size()) { 278 ++Idx; 279 } else { 280 Clusters[Idx].merge(Clusters[MergeIdx]); 281 Clusters.erase(Clusters.begin() + MergeIdx); 282 } 283 } 284 285 return Clusters; 286 } 287 288 } // namespace bolt 289 } // namespace llvm 290