1 //===--- Passes/BinaryFunctionCallGraph.cpp -------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 //===----------------------------------------------------------------------===// 10 11 #include "bolt/Passes/BinaryFunctionCallGraph.h" 12 #include "bolt/Core/BinaryContext.h" 13 #include "bolt/Core/BinaryFunction.h" 14 #include "llvm/Support/CommandLine.h" 15 #include "llvm/Support/Timer.h" 16 #include <stack> 17 18 #define DEBUG_TYPE "callgraph" 19 20 namespace opts { 21 extern llvm::cl::opt<bool> TimeOpts; 22 extern llvm::cl::opt<unsigned> Verbosity; 23 } 24 25 namespace llvm { 26 namespace bolt { 27 28 CallGraph::NodeId BinaryFunctionCallGraph::addNode(BinaryFunction *BF, 29 uint32_t Size, 30 uint64_t Samples) { 31 NodeId Id = CallGraph::addNode(Size, Samples); 32 assert(size_t(Id) == Funcs.size()); 33 Funcs.push_back(BF); 34 FuncToNodeId[BF] = Id; 35 assert(Funcs[Id] == BF); 36 return Id; 37 } 38 39 std::deque<BinaryFunction *> BinaryFunctionCallGraph::buildTraversalOrder() { 40 NamedRegionTimer T1("buildcgorder", "Build cg traversal order", 41 "CG breakdown", "CG breakdown", opts::TimeOpts); 42 std::deque<BinaryFunction *> TopologicalOrder; 43 enum NodeStatus { NEW, VISITING, VISITED }; 44 std::vector<NodeStatus> NodeStatus(Funcs.size()); 45 std::stack<NodeId> Worklist; 46 47 for (BinaryFunction *Func : Funcs) { 48 const NodeId Id = FuncToNodeId.at(Func); 49 Worklist.push(Id); 50 NodeStatus[Id] = NEW; 51 } 52 53 while (!Worklist.empty()) { 54 const NodeId FuncId = Worklist.top(); 55 Worklist.pop(); 56 57 if (NodeStatus[FuncId] == VISITED) 58 continue; 59 60 if (NodeStatus[FuncId] == VISITING) { 61 TopologicalOrder.push_back(Funcs[FuncId]); 62 NodeStatus[FuncId] = VISITED; 63 continue; 64 } 65 66 assert(NodeStatus[FuncId] == NEW); 67 NodeStatus[FuncId] = VISITING; 68 Worklist.push(FuncId); 69 for (const NodeId Callee : successors(FuncId)) { 70 if (NodeStatus[Callee] == VISITING || NodeStatus[Callee] == VISITED) 71 continue; 72 Worklist.push(Callee); 73 } 74 } 75 76 return TopologicalOrder; 77 } 78 79 BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC, 80 CgFilterFunction Filter, 81 bool CgFromPerfData, 82 bool IncludeColdCalls, 83 bool UseFunctionHotSize, 84 bool UseSplitHotSize, 85 bool UseEdgeCounts, 86 bool IgnoreRecursiveCalls) { 87 NamedRegionTimer T1("buildcg", "Callgraph construction", "CG breakdown", 88 "CG breakdown", opts::TimeOpts); 89 BinaryFunctionCallGraph Cg; 90 static constexpr uint64_t COUNT_NO_PROFILE = 91 BinaryBasicBlock::COUNT_NO_PROFILE; 92 93 // Compute function size 94 auto functionSize = [&](const BinaryFunction *Function) { 95 return UseFunctionHotSize && Function->isSplit() 96 ? Function->estimateHotSize(UseSplitHotSize) 97 : Function->estimateSize(); 98 }; 99 100 // Add call graph nodes. 101 auto lookupNode = [&](BinaryFunction *Function) { 102 const CallGraph::NodeId Id = Cg.maybeGetNodeId(Function); 103 if (Id == CallGraph::InvalidId) { 104 // It's ok to use the hot size here when the function is split. This is 105 // because emitFunctions will emit the hot part first in the order that is 106 // computed by ReorderFunctions. The cold part will be emitted with the 107 // rest of the cold functions and code. 108 const size_t Size = functionSize(Function); 109 // NOTE: for functions without a profile, we set the number of samples 110 // to zero. This will keep these functions from appearing in the hot 111 // section. This is a little weird because we wouldn't be trying to 112 // create a node for a function unless it was the target of a call from 113 // a hot block. The alternative would be to set the count to one or 114 // accumulate the number of calls from the callsite into the function 115 // samples. Results from perfomance testing seem to favor the zero 116 // count though, so I'm leaving it this way for now. 117 return Cg.addNode(Function, Size, Function->getKnownExecutionCount()); 118 } else { 119 return Id; 120 } 121 }; 122 123 // Add call graph edges. 124 uint64_t NotProcessed = 0; 125 uint64_t TotalCallsites = 0; 126 uint64_t NoProfileCallsites = 0; 127 uint64_t NumFallbacks = 0; 128 uint64_t RecursiveCallsites = 0; 129 for (auto &It : BC.getBinaryFunctions()) { 130 BinaryFunction *Function = &It.second; 131 132 if (Filter(*Function)) { 133 continue; 134 } 135 136 const CallGraph::NodeId SrcId = lookupNode(Function); 137 // Offset of the current basic block from the beginning of the function 138 uint64_t Offset = 0; 139 140 auto recordCall = [&](const MCSymbol *DestSymbol, const uint64_t Count) { 141 if (BinaryFunction *DstFunc = 142 DestSymbol ? BC.getFunctionForSymbol(DestSymbol) : nullptr) { 143 if (DstFunc == Function) { 144 LLVM_DEBUG(dbgs() << "BOLT-INFO: recursive call detected in " 145 << *DstFunc << "\n"); 146 ++RecursiveCallsites; 147 if (IgnoreRecursiveCalls) 148 return false; 149 } 150 if (Filter(*DstFunc)) { 151 return false; 152 } 153 const CallGraph::NodeId DstId = lookupNode(DstFunc); 154 const bool IsValidCount = Count != COUNT_NO_PROFILE; 155 const uint64_t AdjCount = UseEdgeCounts && IsValidCount ? Count : 1; 156 if (!IsValidCount) 157 ++NoProfileCallsites; 158 Cg.incArcWeight(SrcId, DstId, AdjCount, Offset); 159 LLVM_DEBUG( 160 if (opts::Verbosity > 1) { 161 dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function 162 << " -> " << *DstFunc << " @ " << Offset << "\n"; 163 }); 164 return true; 165 } 166 167 return false; 168 }; 169 170 // Pairs of (symbol, count) for each target at this callsite. 171 using TargetDesc = std::pair<const MCSymbol *, uint64_t>; 172 using CallInfoTy = std::vector<TargetDesc>; 173 174 // Get pairs of (symbol, count) for each target at this callsite. 175 // If the call is to an unknown function the symbol will be nullptr. 176 // If there is no profiling data the count will be COUNT_NO_PROFILE. 177 auto getCallInfo = [&](const BinaryBasicBlock *BB, const MCInst &Inst) { 178 CallInfoTy Counts; 179 const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst); 180 181 // If this is an indirect call use perf data directly. 182 if (!DstSym && BC.MIB->hasAnnotation(Inst, "CallProfile")) { 183 const auto &ICSP = 184 BC.MIB->getAnnotationAs<IndirectCallSiteProfile>(Inst, "CallProfile"); 185 for (const IndirectCallProfile &CSI : ICSP) { 186 if (CSI.Symbol) 187 Counts.emplace_back(CSI.Symbol, CSI.Count); 188 } 189 } else { 190 const uint64_t Count = BB->getExecutionCount(); 191 Counts.emplace_back(DstSym, Count); 192 } 193 194 return Counts; 195 }; 196 197 // If the function has an invalid profile, try to use the perf data 198 // directly (if requested). If there is no perf data for this function, 199 // fall back to the CFG walker which attempts to handle missing data. 200 if (!Function->hasValidProfile() && CgFromPerfData && 201 !Function->getAllCallSites().empty()) { 202 LLVM_DEBUG( 203 dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data" 204 << " for " << *Function << "\n"); 205 ++NumFallbacks; 206 const size_t Size = functionSize(Function); 207 for (const IndirectCallProfile &CSI : Function->getAllCallSites()) { 208 ++TotalCallsites; 209 210 if (!CSI.Symbol) 211 continue; 212 213 // The computed offset may exceed the hot part of the function; hence, 214 // bound it by the size. 215 Offset = CSI.Offset; 216 if (Offset > Size) 217 Offset = Size; 218 219 if (!recordCall(CSI.Symbol, CSI.Count)) { 220 ++NotProcessed; 221 } 222 } 223 } else { 224 for (BinaryBasicBlock *BB : Function->layout()) { 225 // Don't count calls from cold blocks unless requested. 226 if (BB->isCold() && !IncludeColdCalls) 227 continue; 228 229 // Determine whether the block is included in Function's (hot) size 230 // See BinaryFunction::estimateHotSize 231 bool BBIncludedInFunctionSize = false; 232 if (UseFunctionHotSize && Function->isSplit()) { 233 if (UseSplitHotSize) 234 BBIncludedInFunctionSize = !BB->isCold(); 235 else 236 BBIncludedInFunctionSize = BB->getKnownExecutionCount() != 0; 237 } else { 238 BBIncludedInFunctionSize = true; 239 } 240 241 for (MCInst &Inst : *BB) { 242 // Find call instructions and extract target symbols from each one. 243 if (BC.MIB->isCall(Inst)) { 244 const CallInfoTy CallInfo = getCallInfo(BB, Inst); 245 246 if (!CallInfo.empty()) { 247 for (const TargetDesc &CI : CallInfo) { 248 ++TotalCallsites; 249 if (!recordCall(CI.first, CI.second)) 250 ++NotProcessed; 251 } 252 } else { 253 ++TotalCallsites; 254 ++NotProcessed; 255 } 256 } 257 // Increase Offset if needed 258 if (BBIncludedInFunctionSize) { 259 Offset += BC.computeCodeSize(&Inst, &Inst + 1); 260 } 261 } 262 } 263 } 264 } 265 266 #ifndef NDEBUG 267 bool PrintInfo = DebugFlag && isCurrentDebugType("callgraph"); 268 #else 269 bool PrintInfo = false; 270 #endif 271 if (PrintInfo || opts::Verbosity > 0) { 272 outs() << format("BOLT-INFO: buildCallGraph: %u nodes, %u callsites " 273 "(%u recursive), density = %.6lf, %u callsites not " 274 "processed, %u callsites with invalid profile, " 275 "used perf data for %u stale functions.\n", 276 Cg.numNodes(), TotalCallsites, RecursiveCallsites, 277 Cg.density(), NotProcessed, NoProfileCallsites, 278 NumFallbacks); 279 } 280 281 return Cg; 282 } 283 284 } 285 } 286