12f09f445SMaksim Panchenko //===- bolt/Passes/BinaryFunctionCallGraph.cpp ----------------------------===//
2a34c753fSRafael Auler //
3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information.
5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a34c753fSRafael Auler //
7a34c753fSRafael Auler //===----------------------------------------------------------------------===//
8a34c753fSRafael Auler //
92f09f445SMaksim Panchenko // This file implements the BinaryFunctionCallGraph class.
102f09f445SMaksim Panchenko //
11a34c753fSRafael Auler //===----------------------------------------------------------------------===//
12a34c753fSRafael Auler 
13a34c753fSRafael Auler #include "bolt/Passes/BinaryFunctionCallGraph.h"
14a34c753fSRafael Auler #include "bolt/Core/BinaryContext.h"
15a34c753fSRafael Auler #include "bolt/Core/BinaryFunction.h"
16a34c753fSRafael Auler #include "llvm/Support/CommandLine.h"
17a34c753fSRafael Auler #include "llvm/Support/Timer.h"
18a34c753fSRafael Auler #include <stack>
19a34c753fSRafael Auler 
20a34c753fSRafael Auler #define DEBUG_TYPE "callgraph"
21a34c753fSRafael Auler 
22a34c753fSRafael Auler namespace opts {
23a34c753fSRafael Auler extern llvm::cl::opt<bool> TimeOpts;
24a34c753fSRafael Auler extern llvm::cl::opt<unsigned> Verbosity;
2540c2e0faSMaksim Panchenko } // namespace opts
26a34c753fSRafael Auler 
27a34c753fSRafael Auler namespace llvm {
28a34c753fSRafael Auler namespace bolt {
29a34c753fSRafael Auler 
addNode(BinaryFunction * BF,uint32_t Size,uint64_t Samples)30a34c753fSRafael Auler CallGraph::NodeId BinaryFunctionCallGraph::addNode(BinaryFunction *BF,
31a34c753fSRafael Auler                                                    uint32_t Size,
32a34c753fSRafael Auler                                                    uint64_t Samples) {
33a34c753fSRafael Auler   NodeId Id = CallGraph::addNode(Size, Samples);
34a34c753fSRafael Auler   assert(size_t(Id) == Funcs.size());
35a34c753fSRafael Auler   Funcs.push_back(BF);
36a34c753fSRafael Auler   FuncToNodeId[BF] = Id;
37a34c753fSRafael Auler   assert(Funcs[Id] == BF);
38a34c753fSRafael Auler   return Id;
39a34c753fSRafael Auler }
40a34c753fSRafael Auler 
buildTraversalOrder()41a34c753fSRafael Auler std::deque<BinaryFunction *> BinaryFunctionCallGraph::buildTraversalOrder() {
42a34c753fSRafael Auler   NamedRegionTimer T1("buildcgorder", "Build cg traversal order",
43a34c753fSRafael Auler                       "CG breakdown", "CG breakdown", opts::TimeOpts);
44a34c753fSRafael Auler   std::deque<BinaryFunction *> TopologicalOrder;
45a34c753fSRafael Auler   enum NodeStatus { NEW, VISITING, VISITED };
46a34c753fSRafael Auler   std::vector<NodeStatus> NodeStatus(Funcs.size());
47a34c753fSRafael Auler   std::stack<NodeId> Worklist;
48a34c753fSRafael Auler 
49a34c753fSRafael Auler   for (BinaryFunction *Func : Funcs) {
50a34c753fSRafael Auler     const NodeId Id = FuncToNodeId.at(Func);
51a34c753fSRafael Auler     Worklist.push(Id);
52a34c753fSRafael Auler     NodeStatus[Id] = NEW;
53a34c753fSRafael Auler   }
54a34c753fSRafael Auler 
55a34c753fSRafael Auler   while (!Worklist.empty()) {
56a34c753fSRafael Auler     const NodeId FuncId = Worklist.top();
57a34c753fSRafael Auler     Worklist.pop();
58a34c753fSRafael Auler 
59a34c753fSRafael Auler     if (NodeStatus[FuncId] == VISITED)
60a34c753fSRafael Auler       continue;
61a34c753fSRafael Auler 
62a34c753fSRafael Auler     if (NodeStatus[FuncId] == VISITING) {
63a34c753fSRafael Auler       TopologicalOrder.push_back(Funcs[FuncId]);
64a34c753fSRafael Auler       NodeStatus[FuncId] = VISITED;
65a34c753fSRafael Auler       continue;
66a34c753fSRafael Auler     }
67a34c753fSRafael Auler 
68a34c753fSRafael Auler     assert(NodeStatus[FuncId] == NEW);
69a34c753fSRafael Auler     NodeStatus[FuncId] = VISITING;
70a34c753fSRafael Auler     Worklist.push(FuncId);
71a34c753fSRafael Auler     for (const NodeId Callee : successors(FuncId)) {
72a34c753fSRafael Auler       if (NodeStatus[Callee] == VISITING || NodeStatus[Callee] == VISITED)
73a34c753fSRafael Auler         continue;
74a34c753fSRafael Auler       Worklist.push(Callee);
75a34c753fSRafael Auler     }
76a34c753fSRafael Auler   }
77a34c753fSRafael Auler 
78a34c753fSRafael Auler   return TopologicalOrder;
79a34c753fSRafael Auler }
80a34c753fSRafael Auler 
8140c2e0faSMaksim Panchenko BinaryFunctionCallGraph
buildCallGraph(BinaryContext & BC,CgFilterFunction Filter,bool CgFromPerfData,bool IncludeColdCalls,bool UseFunctionHotSize,bool UseSplitHotSize,bool UseEdgeCounts,bool IgnoreRecursiveCalls)8240c2e0faSMaksim Panchenko buildCallGraph(BinaryContext &BC, CgFilterFunction Filter, bool CgFromPerfData,
8340c2e0faSMaksim Panchenko                bool IncludeColdCalls, bool UseFunctionHotSize,
8440c2e0faSMaksim Panchenko                bool UseSplitHotSize, bool UseEdgeCounts,
85a34c753fSRafael Auler                bool IgnoreRecursiveCalls) {
86a34c753fSRafael Auler   NamedRegionTimer T1("buildcg", "Callgraph construction", "CG breakdown",
87a34c753fSRafael Auler                       "CG breakdown", opts::TimeOpts);
88a34c753fSRafael Auler   BinaryFunctionCallGraph Cg;
89a34c753fSRafael Auler   static constexpr uint64_t COUNT_NO_PROFILE =
90a34c753fSRafael Auler       BinaryBasicBlock::COUNT_NO_PROFILE;
91a34c753fSRafael Auler 
92a34c753fSRafael Auler   // Compute function size
93a34c753fSRafael Auler   auto functionSize = [&](const BinaryFunction *Function) {
94a34c753fSRafael Auler     return UseFunctionHotSize && Function->isSplit()
95a34c753fSRafael Auler                ? Function->estimateHotSize(UseSplitHotSize)
96a34c753fSRafael Auler                : Function->estimateSize();
97a34c753fSRafael Auler   };
98a34c753fSRafael Auler 
99a34c753fSRafael Auler   // Add call graph nodes.
100a34c753fSRafael Auler   auto lookupNode = [&](BinaryFunction *Function) {
101a34c753fSRafael Auler     const CallGraph::NodeId Id = Cg.maybeGetNodeId(Function);
102a34c753fSRafael Auler     if (Id == CallGraph::InvalidId) {
103a34c753fSRafael Auler       // It's ok to use the hot size here when the function is split.  This is
104a34c753fSRafael Auler       // because emitFunctions will emit the hot part first in the order that is
105a34c753fSRafael Auler       // computed by ReorderFunctions.  The cold part will be emitted with the
106a34c753fSRafael Auler       // rest of the cold functions and code.
107a34c753fSRafael Auler       const size_t Size = functionSize(Function);
108a34c753fSRafael Auler       // NOTE: for functions without a profile, we set the number of samples
109a34c753fSRafael Auler       // to zero.  This will keep these functions from appearing in the hot
110a34c753fSRafael Auler       // section.  This is a little weird because we wouldn't be trying to
111a34c753fSRafael Auler       // create a node for a function unless it was the target of a call from
112a34c753fSRafael Auler       // a hot block.  The alternative would be to set the count to one or
113a34c753fSRafael Auler       // accumulate the number of calls from the callsite into the function
114a34c753fSRafael Auler       // samples.  Results from perfomance testing seem to favor the zero
115a34c753fSRafael Auler       // count though, so I'm leaving it this way for now.
116a34c753fSRafael Auler       return Cg.addNode(Function, Size, Function->getKnownExecutionCount());
117a34c753fSRafael Auler     }
118f92ab6afSAmir Ayupov     return Id;
119a34c753fSRafael Auler   };
120a34c753fSRafael Auler 
121a34c753fSRafael Auler   // Add call graph edges.
122a34c753fSRafael Auler   uint64_t NotProcessed = 0;
123a34c753fSRafael Auler   uint64_t TotalCallsites = 0;
124a34c753fSRafael Auler   uint64_t NoProfileCallsites = 0;
125a34c753fSRafael Auler   uint64_t NumFallbacks = 0;
126a34c753fSRafael Auler   uint64_t RecursiveCallsites = 0;
127a34c753fSRafael Auler   for (auto &It : BC.getBinaryFunctions()) {
128a34c753fSRafael Auler     BinaryFunction *Function = &It.second;
129a34c753fSRafael Auler 
130f92ab6afSAmir Ayupov     if (Filter(*Function))
131a34c753fSRafael Auler       continue;
132a34c753fSRafael Auler 
133a34c753fSRafael Auler     const CallGraph::NodeId SrcId = lookupNode(Function);
134a34c753fSRafael Auler     // Offset of the current basic block from the beginning of the function
135a34c753fSRafael Auler     uint64_t Offset = 0;
136a34c753fSRafael Auler 
137a34c753fSRafael Auler     auto recordCall = [&](const MCSymbol *DestSymbol, const uint64_t Count) {
138a34c753fSRafael Auler       if (BinaryFunction *DstFunc =
139a34c753fSRafael Auler               DestSymbol ? BC.getFunctionForSymbol(DestSymbol) : nullptr) {
140a34c753fSRafael Auler         if (DstFunc == Function) {
141a34c753fSRafael Auler           LLVM_DEBUG(dbgs() << "BOLT-INFO: recursive call detected in "
142a34c753fSRafael Auler                             << *DstFunc << "\n");
143a34c753fSRafael Auler           ++RecursiveCallsites;
144a34c753fSRafael Auler           if (IgnoreRecursiveCalls)
145a34c753fSRafael Auler             return false;
146a34c753fSRafael Auler         }
147f92ab6afSAmir Ayupov         if (Filter(*DstFunc))
148a34c753fSRafael Auler           return false;
149f92ab6afSAmir Ayupov 
150a34c753fSRafael Auler         const CallGraph::NodeId DstId = lookupNode(DstFunc);
151a34c753fSRafael Auler         const bool IsValidCount = Count != COUNT_NO_PROFILE;
152a34c753fSRafael Auler         const uint64_t AdjCount = UseEdgeCounts && IsValidCount ? Count : 1;
153a34c753fSRafael Auler         if (!IsValidCount)
154a34c753fSRafael Auler           ++NoProfileCallsites;
155a34c753fSRafael Auler         Cg.incArcWeight(SrcId, DstId, AdjCount, Offset);
15640c2e0faSMaksim Panchenko         LLVM_DEBUG(if (opts::Verbosity > 1) {
15740c2e0faSMaksim Panchenko           dbgs() << "BOLT-DEBUG: buildCallGraph: call " << *Function << " -> "
15840c2e0faSMaksim Panchenko                  << *DstFunc << " @ " << Offset << "\n";
159a34c753fSRafael Auler         });
160a34c753fSRafael Auler         return true;
161a34c753fSRafael Auler       }
162a34c753fSRafael Auler 
163a34c753fSRafael Auler       return false;
164a34c753fSRafael Auler     };
165a34c753fSRafael Auler 
166a34c753fSRafael Auler     // Pairs of (symbol, count) for each target at this callsite.
167a34c753fSRafael Auler     using TargetDesc = std::pair<const MCSymbol *, uint64_t>;
168a34c753fSRafael Auler     using CallInfoTy = std::vector<TargetDesc>;
169a34c753fSRafael Auler 
170a34c753fSRafael Auler     // Get pairs of (symbol, count) for each target at this callsite.
171a34c753fSRafael Auler     // If the call is to an unknown function the symbol will be nullptr.
172a34c753fSRafael Auler     // If there is no profiling data the count will be COUNT_NO_PROFILE.
173a34c753fSRafael Auler     auto getCallInfo = [&](const BinaryBasicBlock *BB, const MCInst &Inst) {
174a34c753fSRafael Auler       CallInfoTy Counts;
175a34c753fSRafael Auler       const MCSymbol *DstSym = BC.MIB->getTargetSymbol(Inst);
176a34c753fSRafael Auler 
177a34c753fSRafael Auler       // If this is an indirect call use perf data directly.
178a34c753fSRafael Auler       if (!DstSym && BC.MIB->hasAnnotation(Inst, "CallProfile")) {
17940c2e0faSMaksim Panchenko         const auto &ICSP = BC.MIB->getAnnotationAs<IndirectCallSiteProfile>(
18040c2e0faSMaksim Panchenko             Inst, "CallProfile");
181f92ab6afSAmir Ayupov         for (const IndirectCallProfile &CSI : ICSP)
182a34c753fSRafael Auler           if (CSI.Symbol)
183a34c753fSRafael Auler             Counts.emplace_back(CSI.Symbol, CSI.Count);
184a34c753fSRafael Auler       } else {
185a34c753fSRafael Auler         const uint64_t Count = BB->getExecutionCount();
186a34c753fSRafael Auler         Counts.emplace_back(DstSym, Count);
187a34c753fSRafael Auler       }
188a34c753fSRafael Auler 
189a34c753fSRafael Auler       return Counts;
190a34c753fSRafael Auler     };
191a34c753fSRafael Auler 
192a34c753fSRafael Auler     // If the function has an invalid profile, try to use the perf data
193a34c753fSRafael Auler     // directly (if requested).  If there is no perf data for this function,
194a34c753fSRafael Auler     // fall back to the CFG walker which attempts to handle missing data.
195a34c753fSRafael Auler     if (!Function->hasValidProfile() && CgFromPerfData &&
196a34c753fSRafael Auler         !Function->getAllCallSites().empty()) {
197a34c753fSRafael Auler       LLVM_DEBUG(
198a34c753fSRafael Auler           dbgs() << "BOLT-DEBUG: buildCallGraph: Falling back to perf data"
199a34c753fSRafael Auler                  << " for " << *Function << "\n");
200a34c753fSRafael Auler       ++NumFallbacks;
201a34c753fSRafael Auler       const size_t Size = functionSize(Function);
202a34c753fSRafael Auler       for (const IndirectCallProfile &CSI : Function->getAllCallSites()) {
203a34c753fSRafael Auler         ++TotalCallsites;
204a34c753fSRafael Auler 
205a34c753fSRafael Auler         if (!CSI.Symbol)
206a34c753fSRafael Auler           continue;
207a34c753fSRafael Auler 
208a34c753fSRafael Auler         // The computed offset may exceed the hot part of the function; hence,
209a34c753fSRafael Auler         // bound it by the size.
210a34c753fSRafael Auler         Offset = CSI.Offset;
211a34c753fSRafael Auler         if (Offset > Size)
212a34c753fSRafael Auler           Offset = Size;
213a34c753fSRafael Auler 
214f92ab6afSAmir Ayupov         if (!recordCall(CSI.Symbol, CSI.Count))
215a34c753fSRafael Auler           ++NotProcessed;
216a34c753fSRafael Auler       }
217a34c753fSRafael Auler     } else {
218*8477bc67SFabian Parzefall       for (BinaryBasicBlock *BB : Function->getLayout().blocks()) {
219a34c753fSRafael Auler         // Don't count calls from cold blocks unless requested.
220a34c753fSRafael Auler         if (BB->isCold() && !IncludeColdCalls)
221a34c753fSRafael Auler           continue;
222a34c753fSRafael Auler 
223a34c753fSRafael Auler         // Determine whether the block is included in Function's (hot) size
224a34c753fSRafael Auler         // See BinaryFunction::estimateHotSize
225a34c753fSRafael Auler         bool BBIncludedInFunctionSize = false;
226a34c753fSRafael Auler         if (UseFunctionHotSize && Function->isSplit()) {
227a34c753fSRafael Auler           if (UseSplitHotSize)
228a34c753fSRafael Auler             BBIncludedInFunctionSize = !BB->isCold();
229a34c753fSRafael Auler           else
230a34c753fSRafael Auler             BBIncludedInFunctionSize = BB->getKnownExecutionCount() != 0;
231a34c753fSRafael Auler         } else {
232a34c753fSRafael Auler           BBIncludedInFunctionSize = true;
233a34c753fSRafael Auler         }
234a34c753fSRafael Auler 
235a34c753fSRafael Auler         for (MCInst &Inst : *BB) {
236a34c753fSRafael Auler           // Find call instructions and extract target symbols from each one.
237a34c753fSRafael Auler           if (BC.MIB->isCall(Inst)) {
238a34c753fSRafael Auler             const CallInfoTy CallInfo = getCallInfo(BB, Inst);
239a34c753fSRafael Auler 
240a34c753fSRafael Auler             if (!CallInfo.empty()) {
241a34c753fSRafael Auler               for (const TargetDesc &CI : CallInfo) {
242a34c753fSRafael Auler                 ++TotalCallsites;
243a34c753fSRafael Auler                 if (!recordCall(CI.first, CI.second))
244a34c753fSRafael Auler                   ++NotProcessed;
245a34c753fSRafael Auler               }
246a34c753fSRafael Auler             } else {
247a34c753fSRafael Auler               ++TotalCallsites;
248a34c753fSRafael Auler               ++NotProcessed;
249a34c753fSRafael Auler             }
250a34c753fSRafael Auler           }
251a34c753fSRafael Auler           // Increase Offset if needed
252f92ab6afSAmir Ayupov           if (BBIncludedInFunctionSize)
253a34c753fSRafael Auler             Offset += BC.computeCodeSize(&Inst, &Inst + 1);
254a34c753fSRafael Auler         }
255a34c753fSRafael Auler       }
256a34c753fSRafael Auler     }
257a34c753fSRafael Auler   }
258a34c753fSRafael Auler 
259a34c753fSRafael Auler #ifndef NDEBUG
260a34c753fSRafael Auler   bool PrintInfo = DebugFlag && isCurrentDebugType("callgraph");
261a34c753fSRafael Auler #else
262a34c753fSRafael Auler   bool PrintInfo = false;
263a34c753fSRafael Auler #endif
264f92ab6afSAmir Ayupov   if (PrintInfo || opts::Verbosity > 0)
265a34c753fSRafael Auler     outs() << format("BOLT-INFO: buildCallGraph: %u nodes, %u callsites "
266a34c753fSRafael Auler                      "(%u recursive), density = %.6lf, %u callsites not "
267a34c753fSRafael Auler                      "processed, %u callsites with invalid profile, "
268a34c753fSRafael Auler                      "used perf data for %u stale functions.\n",
269a34c753fSRafael Auler                      Cg.numNodes(), TotalCallsites, RecursiveCallsites,
270a34c753fSRafael Auler                      Cg.density(), NotProcessed, NoProfileCallsites,
271a34c753fSRafael Auler                      NumFallbacks);
272a34c753fSRafael Auler 
273a34c753fSRafael Auler   return Cg;
274a34c753fSRafael Auler }
275a34c753fSRafael Auler 
27640c2e0faSMaksim Panchenko } // namespace bolt
27740c2e0faSMaksim Panchenko } // namespace llvm
278