1 //===- bolt/Passes/BinaryFunctionCallGraph.cpp ----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the BinaryFunctionCallGraph class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "bolt/Passes/BinaryFunctionCallGraph.h" 14 #include "bolt/Core/BinaryContext.h" 15 #include "bolt/Core/BinaryFunction.h" 16 #include "llvm/Support/CommandLine.h" 17 #include "llvm/Support/Timer.h" 18 #include <stack> 19 20 #define DEBUG_TYPE "callgraph" 21 22 namespace opts { 23 extern llvm::cl::opt<bool> TimeOpts; 24 extern llvm::cl::opt<unsigned> Verbosity; 25 } // namespace opts 26 27 namespace llvm { 28 namespace bolt { 29 30 CallGraph::NodeId BinaryFunctionCallGraph::addNode(BinaryFunction *BF, 31 uint32_t Size, 32 uint64_t Samples) { 33 NodeId Id = CallGraph::addNode(Size, Samples); 34 assert(size_t(Id) == Funcs.size()); 35 Funcs.push_back(BF); 36 FuncToNodeId[BF] = Id; 37 assert(Funcs[Id] == BF); 38 return Id; 39 } 40 41 std::deque<BinaryFunction *> BinaryFunctionCallGraph::buildTraversalOrder() { 42 NamedRegionTimer T1("buildcgorder", "Build cg traversal order", 43 "CG breakdown", "CG breakdown", opts::TimeOpts); 44 std::deque<BinaryFunction *> TopologicalOrder; 45 enum NodeStatus { NEW, VISITING, VISITED }; 46 std::vector<NodeStatus> NodeStatus(Funcs.size()); 47 std::stack<NodeId> Worklist; 48 49 for (BinaryFunction *Func : Funcs) { 50 const NodeId Id = FuncToNodeId.at(Func); 51 Worklist.push(Id); 52 NodeStatus[Id] = NEW; 53 } 54 55 while (!Worklist.empty()) { 56 const NodeId FuncId = Worklist.top(); 57 Worklist.pop(); 58 59 if (NodeStatus[FuncId] == VISITED) 60 continue; 61 62 if (NodeStatus[FuncId] == VISITING) { 63 TopologicalOrder.push_back(Funcs[FuncId]); 64 NodeStatus[FuncId] = VISITED; 65 continue; 66 } 67 68 assert(NodeStatus[FuncId] == NEW); 69 NodeStatus[FuncId] = VISITING; 70 Worklist.push(FuncId); 71 for (const NodeId Callee : successors(FuncId)) { 72 if (NodeStatus[Callee] == VISITING || NodeStatus[Callee] == VISITED) 73 continue; 74 Worklist.push(Callee); 75 } 76 } 77 78 return TopologicalOrder; 79 } 80 81 BinaryFunctionCallGraph 82 buildCallGraph(BinaryContext &BC, CgFilterFunction Filter, bool CgFromPerfData, 83 bool IncludeColdCalls, bool UseFunctionHotSize, 84 bool UseSplitHotSize, bool UseEdgeCounts, 85 bool IgnoreRecursiveCalls) { 86 NamedRegionTimer T1("buildcg", "Callgraph construction", "CG breakdown", 87 "CG breakdown", opts::TimeOpts); 88 BinaryFunctionCallGraph Cg; 89 static constexpr uint64_t COUNT_NO_PROFILE = 90 BinaryBasicBlock::COUNT_NO_PROFILE; 91 92 // Compute function size 93 auto functionSize = [&](const BinaryFunction *Function) { 94 return UseFunctionHotSize && Function->isSplit() 95 ? Function->estimateHotSize(UseSplitHotSize) 96 : Function->estimateSize(); 97 }; 98 99 // Add call graph nodes. 100 auto lookupNode = [&](BinaryFunction *Function) { 101 const CallGraph::NodeId Id = Cg.maybeGetNodeId(Function); 102 if (Id == CallGraph::InvalidId) { 103 // It's ok to use the hot size here when the function is split. This is 104 // because emitFunctions will emit the hot part first in the order that is 105 // computed by ReorderFunctions. The cold part will be emitted with the 106 // rest of the cold functions and code. 107 const size_t Size = functionSize(Function); 108 // NOTE: for functions without a profile, we set the number of samples 109 // to zero. This will keep these functions from appearing in the hot 110 // section. This is a little weird because we wouldn't be trying to 111 // create a node for a function unless it was the target of a call from 112 // a hot block. The alternative would be to set the count to one or 113 // accumulate the number of calls from the callsite into the function 114 // samples. Results from perfomance testing seem to favor the zero 115 // count though, so I'm leaving it this way for now. 116 return Cg.addNode(Function, Size, Function->getKnownExecutionCount()); 117 } else { 118 return Id; 119 } 120 }; 121 122 // Add call graph edges. 123 uint64_t NotProcessed = 0; 124 uint64_t TotalCallsites = 0; 125 uint64_t NoProfileCallsites = 0; 126 uint64_t NumFallbacks = 0; 127 uint64_t RecursiveCallsites = 0; 128 for (auto &It : BC.getBinaryFunctions()) { 129 BinaryFunction *Function = &It.second; 130 131 if (Filter(*Function)) { 132 continue; 133 } 134 135 const CallGraph::NodeId SrcId = lookupNode(Function); 136 // Offset of the current basic block from the beginning of the function 137 uint64_t Offset = 0; 138 139 auto recordCall = [&](const MCSymbol *DestSymbol, const uint64_t Count) { 140 if (BinaryFunction *DstFunc = 141 DestSymbol ? BC.getFunctionForSymbol(DestSymbol) : nullptr) { 142 if (DstFunc == Function) { 143 LLVM_DEBUG(dbgs() << "BOLT-INFO: recursive call detected in " 144 << *DstFunc << "\n"); 145 ++RecursiveCallsites; 146 if (IgnoreRecursiveCalls) 147 return false; 148 } 149 if (Filter(*DstFunc)) { 150 return false; 151 } 152 const CallGraph::NodeId DstId = lookupNode(DstFunc); 153 const bool IsValidCount = Count != COUNT_NO_PROFILE; 154 const uint64_t AdjCount = UseEdgeCounts && IsValidCount ? Count : 1; 155 if (!IsValidCount) 156 ++NoProfileCallsites; 157 Cg.incArcWeight(SrcId, DstId, AdjCount, Offset); 158 LLVM_DEBUG(if (opts::Verbosity > 1) { 159 dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function << " -> " 160 << *DstFunc << " @ " << Offset << "\n"; 161 }); 162 return true; 163 } 164 165 return false; 166 }; 167 168 // Pairs of (symbol, count) for each target at this callsite. 169 using TargetDesc = std::pair<const MCSymbol *, uint64_t>; 170 using CallInfoTy = std::vector<TargetDesc>; 171 172 // Get pairs of (symbol, count) for each target at this callsite. 173 // If the call is to an unknown function the symbol will be nullptr. 174 // If there is no profiling data the count will be COUNT_NO_PROFILE. 175 auto getCallInfo = [&](const BinaryBasicBlock *BB, const MCInst &Inst) { 176 CallInfoTy Counts; 177 const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst); 178 179 // If this is an indirect call use perf data directly. 180 if (!DstSym && BC.MIB->hasAnnotation(Inst, "CallProfile")) { 181 const auto &ICSP = BC.MIB->getAnnotationAs<IndirectCallSiteProfile>( 182 Inst, "CallProfile"); 183 for (const IndirectCallProfile &CSI : ICSP) { 184 if (CSI.Symbol) 185 Counts.emplace_back(CSI.Symbol, CSI.Count); 186 } 187 } else { 188 const uint64_t Count = BB->getExecutionCount(); 189 Counts.emplace_back(DstSym, Count); 190 } 191 192 return Counts; 193 }; 194 195 // If the function has an invalid profile, try to use the perf data 196 // directly (if requested). If there is no perf data for this function, 197 // fall back to the CFG walker which attempts to handle missing data. 198 if (!Function->hasValidProfile() && CgFromPerfData && 199 !Function->getAllCallSites().empty()) { 200 LLVM_DEBUG( 201 dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data" 202 << " for " << *Function << "\n"); 203 ++NumFallbacks; 204 const size_t Size = functionSize(Function); 205 for (const IndirectCallProfile &CSI : Function->getAllCallSites()) { 206 ++TotalCallsites; 207 208 if (!CSI.Symbol) 209 continue; 210 211 // The computed offset may exceed the hot part of the function; hence, 212 // bound it by the size. 213 Offset = CSI.Offset; 214 if (Offset > Size) 215 Offset = Size; 216 217 if (!recordCall(CSI.Symbol, CSI.Count)) { 218 ++NotProcessed; 219 } 220 } 221 } else { 222 for (BinaryBasicBlock *BB : Function->layout()) { 223 // Don't count calls from cold blocks unless requested. 224 if (BB->isCold() && !IncludeColdCalls) 225 continue; 226 227 // Determine whether the block is included in Function's (hot) size 228 // See BinaryFunction::estimateHotSize 229 bool BBIncludedInFunctionSize = false; 230 if (UseFunctionHotSize && Function->isSplit()) { 231 if (UseSplitHotSize) 232 BBIncludedInFunctionSize = !BB->isCold(); 233 else 234 BBIncludedInFunctionSize = BB->getKnownExecutionCount() != 0; 235 } else { 236 BBIncludedInFunctionSize = true; 237 } 238 239 for (MCInst &Inst : *BB) { 240 // Find call instructions and extract target symbols from each one. 241 if (BC.MIB->isCall(Inst)) { 242 const CallInfoTy CallInfo = getCallInfo(BB, Inst); 243 244 if (!CallInfo.empty()) { 245 for (const TargetDesc &CI : CallInfo) { 246 ++TotalCallsites; 247 if (!recordCall(CI.first, CI.second)) 248 ++NotProcessed; 249 } 250 } else { 251 ++TotalCallsites; 252 ++NotProcessed; 253 } 254 } 255 // Increase Offset if needed 256 if (BBIncludedInFunctionSize) { 257 Offset += BC.computeCodeSize(&Inst, &Inst + 1); 258 } 259 } 260 } 261 } 262 } 263 264 #ifndef NDEBUG 265 bool PrintInfo = DebugFlag && isCurrentDebugType("callgraph"); 266 #else 267 bool PrintInfo = false; 268 #endif 269 if (PrintInfo || opts::Verbosity > 0) { 270 outs() << format("BOLT-INFO: buildCallGraph: %u nodes, %u callsites " 271 "(%u recursive), density = %.6lf, %u callsites not " 272 "processed, %u callsites with invalid profile, " 273 "used perf data for %u stale functions.\n", 274 Cg.numNodes(), TotalCallsites, RecursiveCallsites, 275 Cg.density(), NotProcessed, NoProfileCallsites, 276 NumFallbacks); 277 } 278 279 return Cg; 280 } 281 282 } // namespace bolt 283 } // namespace llvm 284