1 //===--- Passes/BinaryFunctionCallGraph.cpp -------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
10 
11 #include "bolt/Passes/BinaryFunctionCallGraph.h"
12 #include "bolt/Core/BinaryContext.h"
13 #include "bolt/Core/BinaryFunction.h"
14 #include "llvm/Support/CommandLine.h"
15 #include "llvm/Support/Timer.h"
16 #include <stack>
17 
18 #define DEBUG_TYPE "callgraph"
19 
20 namespace opts {
21 extern llvm::cl::opt<bool> TimeOpts;
22 extern llvm::cl::opt<unsigned> Verbosity;
23 }
24 
25 namespace llvm {
26 namespace bolt {
27 
28 CallGraph::NodeId BinaryFunctionCallGraph::addNode(BinaryFunction *BF,
29                                                    uint32_t Size,
30                                                    uint64_t Samples) {
31   NodeId Id = CallGraph::addNode(Size, Samples);
32   assert(size_t(Id) == Funcs.size());
33   Funcs.push_back(BF);
34   FuncToNodeId[BF] = Id;
35   assert(Funcs[Id] == BF);
36   return Id;
37 }
38 
39 std::deque<BinaryFunction *> BinaryFunctionCallGraph::buildTraversalOrder() {
40   NamedRegionTimer T1("buildcgorder", "Build cg traversal order",
41                       "CG breakdown", "CG breakdown", opts::TimeOpts);
42   std::deque<BinaryFunction *> TopologicalOrder;
43   enum NodeStatus { NEW, VISITING, VISITED };
44   std::vector<NodeStatus> NodeStatus(Funcs.size());
45   std::stack<NodeId> Worklist;
46 
47   for (BinaryFunction *Func : Funcs) {
48     const NodeId Id = FuncToNodeId.at(Func);
49     Worklist.push(Id);
50     NodeStatus[Id] = NEW;
51   }
52 
53   while (!Worklist.empty()) {
54     const NodeId FuncId = Worklist.top();
55     Worklist.pop();
56 
57     if (NodeStatus[FuncId] == VISITED)
58       continue;
59 
60     if (NodeStatus[FuncId] == VISITING) {
61       TopologicalOrder.push_back(Funcs[FuncId]);
62       NodeStatus[FuncId] = VISITED;
63       continue;
64     }
65 
66     assert(NodeStatus[FuncId] == NEW);
67     NodeStatus[FuncId] = VISITING;
68     Worklist.push(FuncId);
69     for (const NodeId Callee : successors(FuncId)) {
70       if (NodeStatus[Callee] == VISITING || NodeStatus[Callee] == VISITED)
71         continue;
72       Worklist.push(Callee);
73     }
74   }
75 
76   return TopologicalOrder;
77 }
78 
79 BinaryFunctionCallGraph buildCallGraph(BinaryContext &BC,
80                                        CgFilterFunction Filter,
81                                        bool CgFromPerfData,
82                                        bool IncludeColdCalls,
83                                        bool UseFunctionHotSize,
84                                        bool UseSplitHotSize,
85                                        bool UseEdgeCounts,
86                                        bool IgnoreRecursiveCalls) {
87   NamedRegionTimer T1("buildcg", "Callgraph construction", "CG breakdown",
88                       "CG breakdown", opts::TimeOpts);
89   BinaryFunctionCallGraph Cg;
90   static constexpr uint64_t COUNT_NO_PROFILE =
91       BinaryBasicBlock::COUNT_NO_PROFILE;
92 
93   // Compute function size
94   auto functionSize = [&](const BinaryFunction *Function) {
95     return UseFunctionHotSize && Function->isSplit()
96       ? Function->estimateHotSize(UseSplitHotSize)
97       : Function->estimateSize();
98   };
99 
100   // Add call graph nodes.
101   auto lookupNode = [&](BinaryFunction *Function) {
102     const CallGraph::NodeId Id = Cg.maybeGetNodeId(Function);
103     if (Id == CallGraph::InvalidId) {
104       // It's ok to use the hot size here when the function is split.  This is
105       // because emitFunctions will emit the hot part first in the order that is
106       // computed by ReorderFunctions.  The cold part will be emitted with the
107       // rest of the cold functions and code.
108       const size_t Size = functionSize(Function);
109       // NOTE: for functions without a profile, we set the number of samples
110       // to zero.  This will keep these functions from appearing in the hot
111       // section.  This is a little weird because we wouldn't be trying to
112       // create a node for a function unless it was the target of a call from
113       // a hot block.  The alternative would be to set the count to one or
114       // accumulate the number of calls from the callsite into the function
115       // samples.  Results from perfomance testing seem to favor the zero
116       // count though, so I'm leaving it this way for now.
117       return Cg.addNode(Function, Size, Function->getKnownExecutionCount());
118     } else {
119       return Id;
120     }
121   };
122 
123   // Add call graph edges.
124   uint64_t NotProcessed = 0;
125   uint64_t TotalCallsites = 0;
126   uint64_t NoProfileCallsites = 0;
127   uint64_t NumFallbacks = 0;
128   uint64_t RecursiveCallsites = 0;
129   for (auto &It : BC.getBinaryFunctions()) {
130     BinaryFunction *Function = &It.second;
131 
132     if (Filter(*Function)) {
133       continue;
134     }
135 
136     const CallGraph::NodeId SrcId = lookupNode(Function);
137     // Offset of the current basic block from the beginning of the function
138     uint64_t Offset = 0;
139 
140     auto recordCall = [&](const MCSymbol *DestSymbol, const uint64_t Count) {
141       if (BinaryFunction *DstFunc =
142               DestSymbol ? BC.getFunctionForSymbol(DestSymbol) : nullptr) {
143         if (DstFunc == Function) {
144           LLVM_DEBUG(dbgs() << "BOLT-INFO: recursive call detected in "
145                             << *DstFunc << "\n");
146           ++RecursiveCallsites;
147           if (IgnoreRecursiveCalls)
148             return false;
149         }
150         if (Filter(*DstFunc)) {
151           return false;
152         }
153         const CallGraph::NodeId DstId = lookupNode(DstFunc);
154         const bool IsValidCount = Count != COUNT_NO_PROFILE;
155         const uint64_t AdjCount = UseEdgeCounts && IsValidCount ? Count : 1;
156         if (!IsValidCount)
157           ++NoProfileCallsites;
158         Cg.incArcWeight(SrcId, DstId, AdjCount, Offset);
159         LLVM_DEBUG(
160           if (opts::Verbosity > 1) {
161             dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function
162                    << " -> " << *DstFunc << " @ " << Offset << "\n";
163           });
164         return true;
165       }
166 
167       return false;
168     };
169 
170     // Pairs of (symbol, count) for each target at this callsite.
171     using TargetDesc = std::pair<const MCSymbol *, uint64_t>;
172     using CallInfoTy = std::vector<TargetDesc>;
173 
174     // Get pairs of (symbol, count) for each target at this callsite.
175     // If the call is to an unknown function the symbol will be nullptr.
176     // If there is no profiling data the count will be COUNT_NO_PROFILE.
177     auto getCallInfo = [&](const BinaryBasicBlock *BB, const MCInst &Inst) {
178       CallInfoTy Counts;
179       const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst);
180 
181       // If this is an indirect call use perf data directly.
182       if (!DstSym && BC.MIB->hasAnnotation(Inst, "CallProfile")) {
183         const auto &ICSP =
184           BC.MIB->getAnnotationAs<IndirectCallSiteProfile>(Inst, "CallProfile");
185         for (const IndirectCallProfile &CSI : ICSP) {
186           if (CSI.Symbol)
187             Counts.emplace_back(CSI.Symbol, CSI.Count);
188         }
189       } else {
190         const uint64_t Count = BB->getExecutionCount();
191         Counts.emplace_back(DstSym, Count);
192       }
193 
194       return Counts;
195     };
196 
197     // If the function has an invalid profile, try to use the perf data
198     // directly (if requested).  If there is no perf data for this function,
199     // fall back to the CFG walker which attempts to handle missing data.
200     if (!Function->hasValidProfile() && CgFromPerfData &&
201         !Function->getAllCallSites().empty()) {
202       LLVM_DEBUG(
203           dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data"
204                  << " for " << *Function << "\n");
205       ++NumFallbacks;
206       const size_t Size = functionSize(Function);
207       for (const IndirectCallProfile &CSI : Function->getAllCallSites()) {
208         ++TotalCallsites;
209 
210         if (!CSI.Symbol)
211           continue;
212 
213         // The computed offset may exceed the hot part of the function; hence,
214         // bound it by the size.
215         Offset = CSI.Offset;
216         if (Offset > Size)
217           Offset = Size;
218 
219         if (!recordCall(CSI.Symbol, CSI.Count)) {
220           ++NotProcessed;
221         }
222       }
223     } else {
224       for (BinaryBasicBlock *BB : Function->layout()) {
225         // Don't count calls from cold blocks unless requested.
226         if (BB->isCold() && !IncludeColdCalls)
227           continue;
228 
229         // Determine whether the block is included in Function's (hot) size
230         // See BinaryFunction::estimateHotSize
231         bool BBIncludedInFunctionSize = false;
232         if (UseFunctionHotSize && Function->isSplit()) {
233           if (UseSplitHotSize)
234             BBIncludedInFunctionSize = !BB->isCold();
235           else
236             BBIncludedInFunctionSize = BB->getKnownExecutionCount() != 0;
237         } else {
238           BBIncludedInFunctionSize = true;
239         }
240 
241         for (MCInst &Inst : *BB) {
242           // Find call instructions and extract target symbols from each one.
243           if (BC.MIB->isCall(Inst)) {
244             const CallInfoTy CallInfo = getCallInfo(BB, Inst);
245 
246             if (!CallInfo.empty()) {
247               for (const TargetDesc &CI : CallInfo) {
248                 ++TotalCallsites;
249                 if (!recordCall(CI.first, CI.second))
250                   ++NotProcessed;
251               }
252             } else {
253               ++TotalCallsites;
254               ++NotProcessed;
255             }
256           }
257           // Increase Offset if needed
258           if (BBIncludedInFunctionSize) {
259             Offset += BC.computeCodeSize(&Inst, &Inst + 1);
260           }
261         }
262       }
263     }
264   }
265 
266 #ifndef NDEBUG
267   bool PrintInfo = DebugFlag && isCurrentDebugType("callgraph");
268 #else
269   bool PrintInfo = false;
270 #endif
271   if (PrintInfo || opts::Verbosity > 0) {
272     outs() << format("BOLT-INFO: buildCallGraph: %u nodes, %u callsites "
273                      "(%u recursive), density = %.6lf, %u callsites not "
274                      "processed, %u callsites with invalid profile, "
275                      "used perf data for %u stale functions.\n",
276                      Cg.numNodes(), TotalCallsites, RecursiveCallsites,
277                      Cg.density(), NotProcessed, NoProfileCallsites,
278                      NumFallbacks);
279   }
280 
281   return Cg;
282 }
283 
284 }
285 }
286