1 //===- bolt/Passes/BinaryFunctionCallGraph.cpp ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the BinaryFunctionCallGraph class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "bolt/Passes/BinaryFunctionCallGraph.h"
14 #include "bolt/Core/BinaryContext.h"
15 #include "bolt/Core/BinaryFunction.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/Timer.h"
18 #include <stack>
19 
20 #define DEBUG_TYPE "callgraph"
21 
22 namespace opts {
23 extern llvm::cl::opt<bool> TimeOpts;
24 extern llvm::cl::opt<unsigned> Verbosity;
25 } // namespace opts
26 
27 namespace llvm {
28 namespace bolt {
29 
30 CallGraph::NodeId BinaryFunctionCallGraph::addNode(BinaryFunction *BF,
31                                                    uint32_t Size,
32                                                    uint64_t Samples) {
33   NodeId Id = CallGraph::addNode(Size, Samples);
34   assert(size_t(Id) == Funcs.size());
35   Funcs.push_back(BF);
36   FuncToNodeId[BF] = Id;
37   assert(Funcs[Id] == BF);
38   return Id;
39 }
40 
41 std::deque<BinaryFunction *> BinaryFunctionCallGraph::buildTraversalOrder() {
42   NamedRegionTimer T1("buildcgorder", "Build cg traversal order",
43                       "CG breakdown", "CG breakdown", opts::TimeOpts);
44   std::deque<BinaryFunction *> TopologicalOrder;
45   enum NodeStatus { NEW, VISITING, VISITED };
46   std::vector<NodeStatus> NodeStatus(Funcs.size());
47   std::stack<NodeId> Worklist;
48 
49   for (BinaryFunction *Func : Funcs) {
50     const NodeId Id = FuncToNodeId.at(Func);
51     Worklist.push(Id);
52     NodeStatus[Id] = NEW;
53   }
54 
55   while (!Worklist.empty()) {
56     const NodeId FuncId = Worklist.top();
57     Worklist.pop();
58 
59     if (NodeStatus[FuncId] == VISITED)
60       continue;
61 
62     if (NodeStatus[FuncId] == VISITING) {
63       TopologicalOrder.push_back(Funcs[FuncId]);
64       NodeStatus[FuncId] = VISITED;
65       continue;
66     }
67 
68     assert(NodeStatus[FuncId] == NEW);
69     NodeStatus[FuncId] = VISITING;
70     Worklist.push(FuncId);
71     for (const NodeId Callee : successors(FuncId)) {
72       if (NodeStatus[Callee] == VISITING || NodeStatus[Callee] == VISITED)
73         continue;
74       Worklist.push(Callee);
75     }
76   }
77 
78   return TopologicalOrder;
79 }
80 
81 BinaryFunctionCallGraph
82 buildCallGraph(BinaryContext &BC, CgFilterFunction Filter, bool CgFromPerfData,
83                bool IncludeColdCalls, bool UseFunctionHotSize,
84                bool UseSplitHotSize, bool UseEdgeCounts,
85                bool IgnoreRecursiveCalls) {
86   NamedRegionTimer T1("buildcg", "Callgraph construction", "CG breakdown",
87                       "CG breakdown", opts::TimeOpts);
88   BinaryFunctionCallGraph Cg;
89   static constexpr uint64_t COUNT_NO_PROFILE =
90       BinaryBasicBlock::COUNT_NO_PROFILE;
91 
92   // Compute function size
93   auto functionSize = [&](const BinaryFunction *Function) {
94     return UseFunctionHotSize && Function->isSplit()
95                ? Function->estimateHotSize(UseSplitHotSize)
96                : Function->estimateSize();
97   };
98 
99   // Add call graph nodes.
100   auto lookupNode = [&](BinaryFunction *Function) {
101     const CallGraph::NodeId Id = Cg.maybeGetNodeId(Function);
102     if (Id == CallGraph::InvalidId) {
103       // It's ok to use the hot size here when the function is split.  This is
104       // because emitFunctions will emit the hot part first in the order that is
105       // computed by ReorderFunctions.  The cold part will be emitted with the
106       // rest of the cold functions and code.
107       const size_t Size = functionSize(Function);
108       // NOTE: for functions without a profile, we set the number of samples
109       // to zero.  This will keep these functions from appearing in the hot
110       // section.  This is a little weird because we wouldn't be trying to
111       // create a node for a function unless it was the target of a call from
112       // a hot block.  The alternative would be to set the count to one or
113       // accumulate the number of calls from the callsite into the function
114       // samples.  Results from perfomance testing seem to favor the zero
115       // count though, so I'm leaving it this way for now.
116       return Cg.addNode(Function, Size, Function->getKnownExecutionCount());
117     } else {
118       return Id;
119     }
120   };
121 
122   // Add call graph edges.
123   uint64_t NotProcessed = 0;
124   uint64_t TotalCallsites = 0;
125   uint64_t NoProfileCallsites = 0;
126   uint64_t NumFallbacks = 0;
127   uint64_t RecursiveCallsites = 0;
128   for (auto &It : BC.getBinaryFunctions()) {
129     BinaryFunction *Function = &It.second;
130 
131     if (Filter(*Function)) {
132       continue;
133     }
134 
135     const CallGraph::NodeId SrcId = lookupNode(Function);
136     // Offset of the current basic block from the beginning of the function
137     uint64_t Offset = 0;
138 
139     auto recordCall = [&](const MCSymbol *DestSymbol, const uint64_t Count) {
140       if (BinaryFunction *DstFunc =
141               DestSymbol ? BC.getFunctionForSymbol(DestSymbol) : nullptr) {
142         if (DstFunc == Function) {
143           LLVM_DEBUG(dbgs() << "BOLT-INFO: recursive call detected in "
144                             << *DstFunc << "\n");
145           ++RecursiveCallsites;
146           if (IgnoreRecursiveCalls)
147             return false;
148         }
149         if (Filter(*DstFunc)) {
150           return false;
151         }
152         const CallGraph::NodeId DstId = lookupNode(DstFunc);
153         const bool IsValidCount = Count != COUNT_NO_PROFILE;
154         const uint64_t AdjCount = UseEdgeCounts && IsValidCount ? Count : 1;
155         if (!IsValidCount)
156           ++NoProfileCallsites;
157         Cg.incArcWeight(SrcId, DstId, AdjCount, Offset);
158         LLVM_DEBUG(if (opts::Verbosity > 1) {
159           dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function << " -> "
160                  << *DstFunc << " @ " << Offset << "\n";
161         });
162         return true;
163       }
164 
165       return false;
166     };
167 
168     // Pairs of (symbol, count) for each target at this callsite.
169     using TargetDesc = std::pair<const MCSymbol *, uint64_t>;
170     using CallInfoTy = std::vector<TargetDesc>;
171 
172     // Get pairs of (symbol, count) for each target at this callsite.
173     // If the call is to an unknown function the symbol will be nullptr.
174     // If there is no profiling data the count will be COUNT_NO_PROFILE.
175     auto getCallInfo = [&](const BinaryBasicBlock *BB, const MCInst &Inst) {
176       CallInfoTy Counts;
177       const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst);
178 
179       // If this is an indirect call use perf data directly.
180       if (!DstSym && BC.MIB->hasAnnotation(Inst, "CallProfile")) {
181         const auto &ICSP = BC.MIB->getAnnotationAs<IndirectCallSiteProfile>(
182             Inst, "CallProfile");
183         for (const IndirectCallProfile &CSI : ICSP) {
184           if (CSI.Symbol)
185             Counts.emplace_back(CSI.Symbol, CSI.Count);
186         }
187       } else {
188         const uint64_t Count = BB->getExecutionCount();
189         Counts.emplace_back(DstSym, Count);
190       }
191 
192       return Counts;
193     };
194 
195     // If the function has an invalid profile, try to use the perf data
196     // directly (if requested).  If there is no perf data for this function,
197     // fall back to the CFG walker which attempts to handle missing data.
198     if (!Function->hasValidProfile() && CgFromPerfData &&
199         !Function->getAllCallSites().empty()) {
200       LLVM_DEBUG(
201           dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data"
202                  << " for " << *Function << "\n");
203       ++NumFallbacks;
204       const size_t Size = functionSize(Function);
205       for (const IndirectCallProfile &CSI : Function->getAllCallSites()) {
206         ++TotalCallsites;
207 
208         if (!CSI.Symbol)
209           continue;
210 
211         // The computed offset may exceed the hot part of the function; hence,
212         // bound it by the size.
213         Offset = CSI.Offset;
214         if (Offset > Size)
215           Offset = Size;
216 
217         if (!recordCall(CSI.Symbol, CSI.Count)) {
218           ++NotProcessed;
219         }
220       }
221     } else {
222       for (BinaryBasicBlock *BB : Function->layout()) {
223         // Don't count calls from cold blocks unless requested.
224         if (BB->isCold() && !IncludeColdCalls)
225           continue;
226 
227         // Determine whether the block is included in Function's (hot) size
228         // See BinaryFunction::estimateHotSize
229         bool BBIncludedInFunctionSize = false;
230         if (UseFunctionHotSize && Function->isSplit()) {
231           if (UseSplitHotSize)
232             BBIncludedInFunctionSize = !BB->isCold();
233           else
234             BBIncludedInFunctionSize = BB->getKnownExecutionCount() != 0;
235         } else {
236           BBIncludedInFunctionSize = true;
237         }
238 
239         for (MCInst &Inst : *BB) {
240           // Find call instructions and extract target symbols from each one.
241           if (BC.MIB->isCall(Inst)) {
242             const CallInfoTy CallInfo = getCallInfo(BB, Inst);
243 
244             if (!CallInfo.empty()) {
245               for (const TargetDesc &CI : CallInfo) {
246                 ++TotalCallsites;
247                 if (!recordCall(CI.first, CI.second))
248                   ++NotProcessed;
249               }
250             } else {
251               ++TotalCallsites;
252               ++NotProcessed;
253             }
254           }
255           // Increase Offset if needed
256           if (BBIncludedInFunctionSize) {
257             Offset += BC.computeCodeSize(&Inst, &Inst + 1);
258           }
259         }
260       }
261     }
262   }
263 
264 #ifndef NDEBUG
265   bool PrintInfo = DebugFlag && isCurrentDebugType("callgraph");
266 #else
267   bool PrintInfo = false;
268 #endif
269   if (PrintInfo || opts::Verbosity > 0) {
270     outs() << format("BOLT-INFO: buildCallGraph: %u nodes, %u callsites "
271                      "(%u recursive), density = %.6lf, %u callsites not "
272                      "processed, %u callsites with invalid profile, "
273                      "used perf data for %u stale functions.\n",
274                      Cg.numNodes(), TotalCallsites, RecursiveCallsites,
275                      Cg.density(), NotProcessed, NoProfileCallsites,
276                      NumFallbacks);
277   }
278 
279   return Cg;
280 }
281 
282 } // namespace bolt
283 } // namespace llvm
284