1 //===- bolt/Passes/BinaryFunctionCallGraph.cpp ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryFunctionCallGraph class.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "bolt/Passes/BinaryFunctionCallGraph.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/Timer.h"
18 #include <stack>
19
20 #define DEBUG_TYPE "callgraph"
21
22 namespace opts {
23 extern llvm::cl::opt<bool> TimeOpts;
24 extern llvm::cl::opt<unsigned> Verbosity;
25 } // namespace opts
26
27 namespace llvm {
28 namespace bolt {
29
addNode(BinaryFunction * BF,uint32_t Size,uint64_t Samples)30 CallGraph::NodeId BinaryFunctionCallGraph::addNode(BinaryFunction *BF,
31 uint32_t Size,
32 uint64_t Samples) {
33 NodeId Id = CallGraph::addNode(Size, Samples);
34 assert(size_t(Id) == Funcs.size());
35 Funcs.push_back(BF);
36 FuncToNodeId[BF] = Id;
37 assert(Funcs[Id] == BF);
38 return Id;
39 }
40
buildTraversalOrder()41 std::deque<BinaryFunction *> BinaryFunctionCallGraph::buildTraversalOrder() {
42 NamedRegionTimer T1("buildcgorder", "Build cg traversal order",
43 "CG breakdown", "CG breakdown", opts::TimeOpts);
44 std::deque<BinaryFunction *> TopologicalOrder;
45 enum NodeStatus { NEW, VISITING, VISITED };
46 std::vector<NodeStatus> NodeStatus(Funcs.size());
47 std::stack<NodeId> Worklist;
48
49 for (BinaryFunction *Func : Funcs) {
50 const NodeId Id = FuncToNodeId.at(Func);
51 Worklist.push(Id);
52 NodeStatus[Id] = NEW;
53 }
54
55 while (!Worklist.empty()) {
56 const NodeId FuncId = Worklist.top();
57 Worklist.pop();
58
59 if (NodeStatus[FuncId] == VISITED)
60 continue;
61
62 if (NodeStatus[FuncId] == VISITING) {
63 TopologicalOrder.push_back(Funcs[FuncId]);
64 NodeStatus[FuncId] = VISITED;
65 continue;
66 }
67
68 assert(NodeStatus[FuncId] == NEW);
69 NodeStatus[FuncId] = VISITING;
70 Worklist.push(FuncId);
71 for (const NodeId Callee : successors(FuncId)) {
72 if (NodeStatus[Callee] == VISITING || NodeStatus[Callee] == VISITED)
73 continue;
74 Worklist.push(Callee);
75 }
76 }
77
78 return TopologicalOrder;
79 }
80
81 BinaryFunctionCallGraph
buildCallGraph(BinaryContext & BC,CgFilterFunction Filter,bool CgFromPerfData,bool IncludeColdCalls,bool UseFunctionHotSize,bool UseSplitHotSize,bool UseEdgeCounts,bool IgnoreRecursiveCalls)82 buildCallGraph(BinaryContext &BC, CgFilterFunction Filter, bool CgFromPerfData,
83 bool IncludeColdCalls, bool UseFunctionHotSize,
84 bool UseSplitHotSize, bool UseEdgeCounts,
85 bool IgnoreRecursiveCalls) {
86 NamedRegionTimer T1("buildcg", "Callgraph construction", "CG breakdown",
87 "CG breakdown", opts::TimeOpts);
88 BinaryFunctionCallGraph Cg;
89 static constexpr uint64_t COUNT_NO_PROFILE =
90 BinaryBasicBlock::COUNT_NO_PROFILE;
91
92 // Compute function size
93 auto functionSize = [&](const BinaryFunction *Function) {
94 return UseFunctionHotSize && Function->isSplit()
95 ? Function->estimateHotSize(UseSplitHotSize)
96 : Function->estimateSize();
97 };
98
99 // Add call graph nodes.
100 auto lookupNode = [&](BinaryFunction *Function) {
101 const CallGraph::NodeId Id = Cg.maybeGetNodeId(Function);
102 if (Id == CallGraph::InvalidId) {
103 // It's ok to use the hot size here when the function is split. This is
104 // because emitFunctions will emit the hot part first in the order that is
105 // computed by ReorderFunctions. The cold part will be emitted with the
106 // rest of the cold functions and code.
107 const size_t Size = functionSize(Function);
108 // NOTE: for functions without a profile, we set the number of samples
109 // to zero. This will keep these functions from appearing in the hot
110 // section. This is a little weird because we wouldn't be trying to
111 // create a node for a function unless it was the target of a call from
112 // a hot block. The alternative would be to set the count to one or
113 // accumulate the number of calls from the callsite into the function
114 // samples. Results from perfomance testing seem to favor the zero
115 // count though, so I'm leaving it this way for now.
116 return Cg.addNode(Function, Size, Function->getKnownExecutionCount());
117 }
118 return Id;
119 };
120
121 // Add call graph edges.
122 uint64_t NotProcessed = 0;
123 uint64_t TotalCallsites = 0;
124 uint64_t NoProfileCallsites = 0;
125 uint64_t NumFallbacks = 0;
126 uint64_t RecursiveCallsites = 0;
127 for (auto &It : BC.getBinaryFunctions()) {
128 BinaryFunction *Function = &It.second;
129
130 if (Filter(*Function))
131 continue;
132
133 const CallGraph::NodeId SrcId = lookupNode(Function);
134 // Offset of the current basic block from the beginning of the function
135 uint64_t Offset = 0;
136
137 auto recordCall = [&](const MCSymbol *DestSymbol, const uint64_t Count) {
138 if (BinaryFunction *DstFunc =
139 DestSymbol ? BC.getFunctionForSymbol(DestSymbol) : nullptr) {
140 if (DstFunc == Function) {
141 LLVM_DEBUG(dbgs() << "BOLT-INFO: recursive call detected in "
142 << *DstFunc << "\n");
143 ++RecursiveCallsites;
144 if (IgnoreRecursiveCalls)
145 return false;
146 }
147 if (Filter(*DstFunc))
148 return false;
149
150 const CallGraph::NodeId DstId = lookupNode(DstFunc);
151 const bool IsValidCount = Count != COUNT_NO_PROFILE;
152 const uint64_t AdjCount = UseEdgeCounts && IsValidCount ? Count : 1;
153 if (!IsValidCount)
154 ++NoProfileCallsites;
155 Cg.incArcWeight(SrcId, DstId, AdjCount, Offset);
156 LLVM_DEBUG(if (opts::Verbosity > 1) {
157 dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function << " -> "
158 << *DstFunc << " @ " << Offset << "\n";
159 });
160 return true;
161 }
162
163 return false;
164 };
165
166 // Pairs of (symbol, count) for each target at this callsite.
167 using TargetDesc = std::pair<const MCSymbol *, uint64_t>;
168 using CallInfoTy = std::vector<TargetDesc>;
169
170 // Get pairs of (symbol, count) for each target at this callsite.
171 // If the call is to an unknown function the symbol will be nullptr.
172 // If there is no profiling data the count will be COUNT_NO_PROFILE.
173 auto getCallInfo = [&](const BinaryBasicBlock *BB, const MCInst &Inst) {
174 CallInfoTy Counts;
175 const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst);
176
177 // If this is an indirect call use perf data directly.
178 if (!DstSym && BC.MIB->hasAnnotation(Inst, "CallProfile")) {
179 const auto &ICSP = BC.MIB->getAnnotationAs<IndirectCallSiteProfile>(
180 Inst, "CallProfile");
181 for (const IndirectCallProfile &CSI : ICSP)
182 if (CSI.Symbol)
183 Counts.emplace_back(CSI.Symbol, CSI.Count);
184 } else {
185 const uint64_t Count = BB->getExecutionCount();
186 Counts.emplace_back(DstSym, Count);
187 }
188
189 return Counts;
190 };
191
192 // If the function has an invalid profile, try to use the perf data
193 // directly (if requested). If there is no perf data for this function,
194 // fall back to the CFG walker which attempts to handle missing data.
195 if (!Function->hasValidProfile() && CgFromPerfData &&
196 !Function->getAllCallSites().empty()) {
197 LLVM_DEBUG(
198 dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data"
199 << " for " << *Function << "\n");
200 ++NumFallbacks;
201 const size_t Size = functionSize(Function);
202 for (const IndirectCallProfile &CSI : Function->getAllCallSites()) {
203 ++TotalCallsites;
204
205 if (!CSI.Symbol)
206 continue;
207
208 // The computed offset may exceed the hot part of the function; hence,
209 // bound it by the size.
210 Offset = CSI.Offset;
211 if (Offset > Size)
212 Offset = Size;
213
214 if (!recordCall(CSI.Symbol, CSI.Count))
215 ++NotProcessed;
216 }
217 } else {
218 for (BinaryBasicBlock *BB : Function->getLayout().blocks()) {
219 // Don't count calls from cold blocks unless requested.
220 if (BB->isCold() && !IncludeColdCalls)
221 continue;
222
223 // Determine whether the block is included in Function's (hot) size
224 // See BinaryFunction::estimateHotSize
225 bool BBIncludedInFunctionSize = false;
226 if (UseFunctionHotSize && Function->isSplit()) {
227 if (UseSplitHotSize)
228 BBIncludedInFunctionSize = !BB->isCold();
229 else
230 BBIncludedInFunctionSize = BB->getKnownExecutionCount() != 0;
231 } else {
232 BBIncludedInFunctionSize = true;
233 }
234
235 for (MCInst &Inst : *BB) {
236 // Find call instructions and extract target symbols from each one.
237 if (BC.MIB->isCall(Inst)) {
238 const CallInfoTy CallInfo = getCallInfo(BB, Inst);
239
240 if (!CallInfo.empty()) {
241 for (const TargetDesc &CI : CallInfo) {
242 ++TotalCallsites;
243 if (!recordCall(CI.first, CI.second))
244 ++NotProcessed;
245 }
246 } else {
247 ++TotalCallsites;
248 ++NotProcessed;
249 }
250 }
251 // Increase Offset if needed
252 if (BBIncludedInFunctionSize)
253 Offset += BC.computeCodeSize(&Inst, &Inst + 1);
254 }
255 }
256 }
257 }
258
259 #ifndef NDEBUG
260 bool PrintInfo = DebugFlag && isCurrentDebugType("callgraph");
261 #else
262 bool PrintInfo = false;
263 #endif
264 if (PrintInfo || opts::Verbosity > 0)
265 outs() << format("BOLT-INFO: buildCallGraph: %u nodes, %u callsites "
266 "(%u recursive), density = %.6lf, %u callsites not "
267 "processed, %u callsites with invalid profile, "
268 "used perf data for %u stale functions.\n",
269 Cg.numNodes(), TotalCallsites, RecursiveCallsites,
270 Cg.density(), NotProcessed, NoProfileCallsites,
271 NumFallbacks);
272
273 return Cg;
274 }
275
276 } // namespace bolt
277 } // namespace llvm
278