17d523365SDimitry Andric //===- SampleProfile.cpp - Incorporate sample profiles into the IR --------===//
27d523365SDimitry Andric //
37d523365SDimitry Andric // The LLVM Compiler Infrastructure
47d523365SDimitry Andric //
57d523365SDimitry Andric // This file is distributed under the University of Illinois Open Source
67d523365SDimitry Andric // License. See LICENSE.TXT for details.
77d523365SDimitry Andric //
87d523365SDimitry Andric //===----------------------------------------------------------------------===//
97d523365SDimitry Andric //
107d523365SDimitry Andric // This file implements the SampleProfileLoader transformation. This pass
117d523365SDimitry Andric // reads a profile file generated by a sampling profiler (e.g. Linux Perf -
127d523365SDimitry Andric // http://perf.wiki.kernel.org/) and generates IR metadata to reflect the
137d523365SDimitry Andric // profile information in the given profile.
147d523365SDimitry Andric //
157d523365SDimitry Andric // This pass generates branch weight annotations on the IR:
167d523365SDimitry Andric //
177d523365SDimitry Andric // - prof: Represents branch weights. This annotation is added to branches
187d523365SDimitry Andric // to indicate the weights of each edge coming out of the branch.
197d523365SDimitry Andric // The weight of each edge is the weight of the target block for
207d523365SDimitry Andric // that edge. The weight of a block B is computed as the maximum
217d523365SDimitry Andric // number of samples found in B.
227d523365SDimitry Andric //
237d523365SDimitry Andric //===----------------------------------------------------------------------===//
247d523365SDimitry Andric
254ba319b5SDimitry Andric #include "llvm/Transforms/IPO/SampleProfile.h"
262cab237bSDimitry Andric #include "llvm/ADT/ArrayRef.h"
277d523365SDimitry Andric #include "llvm/ADT/DenseMap.h"
282cab237bSDimitry Andric #include "llvm/ADT/DenseSet.h"
292cab237bSDimitry Andric #include "llvm/ADT/None.h"
307d523365SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
317d523365SDimitry Andric #include "llvm/ADT/SmallSet.h"
322cab237bSDimitry Andric #include "llvm/ADT/SmallVector.h"
332cab237bSDimitry Andric #include "llvm/ADT/StringMap.h"
347d523365SDimitry Andric #include "llvm/ADT/StringRef.h"
352cab237bSDimitry Andric #include "llvm/ADT/Twine.h"
363ca95b02SDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
372cab237bSDimitry Andric #include "llvm/Analysis/InlineCost.h"
387d523365SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
392cab237bSDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
404ba319b5SDimitry Andric #include "llvm/Analysis/PostDominators.h"
414ba319b5SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h"
422cab237bSDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
432cab237bSDimitry Andric #include "llvm/IR/BasicBlock.h"
442cab237bSDimitry Andric #include "llvm/IR/CFG.h"
452cab237bSDimitry Andric #include "llvm/IR/CallSite.h"
462cab237bSDimitry Andric #include "llvm/IR/DebugInfoMetadata.h"
472cab237bSDimitry Andric #include "llvm/IR/DebugLoc.h"
487d523365SDimitry Andric #include "llvm/IR/DiagnosticInfo.h"
497d523365SDimitry Andric #include "llvm/IR/Dominators.h"
507d523365SDimitry Andric #include "llvm/IR/Function.h"
517a7e6055SDimitry Andric #include "llvm/IR/GlobalValue.h"
522cab237bSDimitry Andric #include "llvm/IR/InstrTypes.h"
532cab237bSDimitry Andric #include "llvm/IR/Instruction.h"
547d523365SDimitry Andric #include "llvm/IR/Instructions.h"
553ca95b02SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
567d523365SDimitry Andric #include "llvm/IR/LLVMContext.h"
577d523365SDimitry Andric #include "llvm/IR/MDBuilder.h"
587d523365SDimitry Andric #include "llvm/IR/Module.h"
592cab237bSDimitry Andric #include "llvm/IR/PassManager.h"
606bc11b14SDimitry Andric #include "llvm/IR/ValueSymbolTable.h"
617d523365SDimitry Andric #include "llvm/Pass.h"
627a7e6055SDimitry Andric #include "llvm/ProfileData/InstrProf.h"
632cab237bSDimitry Andric #include "llvm/ProfileData/SampleProf.h"
647d523365SDimitry Andric #include "llvm/ProfileData/SampleProfReader.h"
652cab237bSDimitry Andric #include "llvm/Support/Casting.h"
667d523365SDimitry Andric #include "llvm/Support/CommandLine.h"
677d523365SDimitry Andric #include "llvm/Support/Debug.h"
682cab237bSDimitry Andric #include "llvm/Support/ErrorHandling.h"
697d523365SDimitry Andric #include "llvm/Support/ErrorOr.h"
702cab237bSDimitry Andric #include "llvm/Support/GenericDomTree.h"
717d523365SDimitry Andric #include "llvm/Support/raw_ostream.h"
727d523365SDimitry Andric #include "llvm/Transforms/IPO.h"
737a7e6055SDimitry Andric #include "llvm/Transforms/Instrumentation.h"
742cab237bSDimitry Andric #include "llvm/Transforms/Utils/CallPromotionUtils.h"
757d523365SDimitry Andric #include "llvm/Transforms/Utils/Cloning.h"
762cab237bSDimitry Andric #include <algorithm>
772cab237bSDimitry Andric #include <cassert>
782cab237bSDimitry Andric #include <cstdint>
792cab237bSDimitry Andric #include <functional>
802cab237bSDimitry Andric #include <limits>
812cab237bSDimitry Andric #include <map>
822cab237bSDimitry Andric #include <memory>
832cab237bSDimitry Andric #include <string>
842cab237bSDimitry Andric #include <system_error>
852cab237bSDimitry Andric #include <utility>
862cab237bSDimitry Andric #include <vector>
877d523365SDimitry Andric
887d523365SDimitry Andric using namespace llvm;
897d523365SDimitry Andric using namespace sampleprof;
904ba319b5SDimitry Andric using ProfileCount = Function::ProfileCount;
917d523365SDimitry Andric #define DEBUG_TYPE "sample-profile"
927d523365SDimitry Andric
937d523365SDimitry Andric // Command line option to specify the file to read samples from. This is
947d523365SDimitry Andric // mainly used for debugging.
957d523365SDimitry Andric static cl::opt<std::string> SampleProfileFile(
967d523365SDimitry Andric "sample-profile-file", cl::init(""), cl::value_desc("filename"),
977d523365SDimitry Andric cl::desc("Profile file loaded by -sample-profile"), cl::Hidden);
982cab237bSDimitry Andric
99*b5893f02SDimitry Andric // The named file contains a set of transformations that may have been applied
100*b5893f02SDimitry Andric // to the symbol names between the program from which the sample data was
101*b5893f02SDimitry Andric // collected and the current program's symbols.
102*b5893f02SDimitry Andric static cl::opt<std::string> SampleProfileRemappingFile(
103*b5893f02SDimitry Andric "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
104*b5893f02SDimitry Andric cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
105*b5893f02SDimitry Andric
1067d523365SDimitry Andric static cl::opt<unsigned> SampleProfileMaxPropagateIterations(
1077d523365SDimitry Andric "sample-profile-max-propagate-iterations", cl::init(100),
1087d523365SDimitry Andric cl::desc("Maximum number of iterations to go through when propagating "
1097d523365SDimitry Andric "sample block/edge weights through the CFG."));
1102cab237bSDimitry Andric
1117d523365SDimitry Andric static cl::opt<unsigned> SampleProfileRecordCoverage(
1127d523365SDimitry Andric "sample-profile-check-record-coverage", cl::init(0), cl::value_desc("N"),
1137d523365SDimitry Andric cl::desc("Emit a warning if less than N% of records in the input profile "
1147d523365SDimitry Andric "are matched to the IR."));
1152cab237bSDimitry Andric
1167d523365SDimitry Andric static cl::opt<unsigned> SampleProfileSampleCoverage(
1177d523365SDimitry Andric "sample-profile-check-sample-coverage", cl::init(0), cl::value_desc("N"),
1187d523365SDimitry Andric cl::desc("Emit a warning if less than N% of samples in the input profile "
1197d523365SDimitry Andric "are matched to the IR."));
1202cab237bSDimitry Andric
1214ba319b5SDimitry Andric static cl::opt<bool> NoWarnSampleUnused(
1224ba319b5SDimitry Andric "no-warn-sample-unused", cl::init(false), cl::Hidden,
1234ba319b5SDimitry Andric cl::desc("Use this option to turn off/on warnings about function with "
1244ba319b5SDimitry Andric "samples but without debug information to use those samples. "));
1257d523365SDimitry Andric
126*b5893f02SDimitry Andric static cl::opt<bool> ProfileSampleAccurate(
127*b5893f02SDimitry Andric "profile-sample-accurate", cl::Hidden, cl::init(false),
128*b5893f02SDimitry Andric cl::desc("If the sample profile is accurate, we will mark all un-sampled "
129*b5893f02SDimitry Andric "callsite and function as having 0 samples. Otherwise, treat "
130*b5893f02SDimitry Andric "un-sampled callsites and functions conservatively as unknown. "));
131*b5893f02SDimitry Andric
1327d523365SDimitry Andric namespace {
1332cab237bSDimitry Andric
1342cab237bSDimitry Andric using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
1352cab237bSDimitry Andric using EquivalenceClassMap = DenseMap<const BasicBlock *, const BasicBlock *>;
1362cab237bSDimitry Andric using Edge = std::pair<const BasicBlock *, const BasicBlock *>;
1372cab237bSDimitry Andric using EdgeWeightMap = DenseMap<Edge, uint64_t>;
1382cab237bSDimitry Andric using BlockEdgeMap =
1392cab237bSDimitry Andric DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>;
1407d523365SDimitry Andric
1417d523365SDimitry Andric class SampleCoverageTracker {
1427d523365SDimitry Andric public:
1432cab237bSDimitry Andric SampleCoverageTracker() = default;
1447d523365SDimitry Andric
1457d523365SDimitry Andric bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
1467d523365SDimitry Andric uint32_t Discriminator, uint64_t Samples);
1477d523365SDimitry Andric unsigned computeCoverage(unsigned Used, unsigned Total) const;
1484ba319b5SDimitry Andric unsigned countUsedRecords(const FunctionSamples *FS,
1494ba319b5SDimitry Andric ProfileSummaryInfo *PSI) const;
1504ba319b5SDimitry Andric unsigned countBodyRecords(const FunctionSamples *FS,
1514ba319b5SDimitry Andric ProfileSummaryInfo *PSI) const;
getTotalUsedSamples() const1527d523365SDimitry Andric uint64_t getTotalUsedSamples() const { return TotalUsedSamples; }
1534ba319b5SDimitry Andric uint64_t countBodySamples(const FunctionSamples *FS,
1544ba319b5SDimitry Andric ProfileSummaryInfo *PSI) const;
1552cab237bSDimitry Andric
clear()1567d523365SDimitry Andric void clear() {
1577d523365SDimitry Andric SampleCoverage.clear();
1587d523365SDimitry Andric TotalUsedSamples = 0;
1597d523365SDimitry Andric }
1607d523365SDimitry Andric
1617d523365SDimitry Andric private:
1622cab237bSDimitry Andric using BodySampleCoverageMap = std::map<LineLocation, unsigned>;
1632cab237bSDimitry Andric using FunctionSamplesCoverageMap =
1642cab237bSDimitry Andric DenseMap<const FunctionSamples *, BodySampleCoverageMap>;
1657d523365SDimitry Andric
1667d523365SDimitry Andric /// Coverage map for sampling records.
1677d523365SDimitry Andric ///
1687d523365SDimitry Andric /// This map keeps a record of sampling records that have been matched to
1697d523365SDimitry Andric /// an IR instruction. This is used to detect some form of staleness in
1707d523365SDimitry Andric /// profiles (see flag -sample-profile-check-coverage).
1717d523365SDimitry Andric ///
1727d523365SDimitry Andric /// Each entry in the map corresponds to a FunctionSamples instance. This is
1737d523365SDimitry Andric /// another map that counts how many times the sample record at the
1747d523365SDimitry Andric /// given location has been used.
1757d523365SDimitry Andric FunctionSamplesCoverageMap SampleCoverage;
1767d523365SDimitry Andric
1777d523365SDimitry Andric /// Number of samples used from the profile.
1787d523365SDimitry Andric ///
1797d523365SDimitry Andric /// When a sampling record is used for the first time, the samples from
1807d523365SDimitry Andric /// that record are added to this accumulator. Coverage is later computed
1817d523365SDimitry Andric /// based on the total number of samples available in this function and
1827d523365SDimitry Andric /// its callsites.
1837d523365SDimitry Andric ///
1847d523365SDimitry Andric /// Note that this accumulator tracks samples used from a single function
1857d523365SDimitry Andric /// and all the inlined callsites. Strictly, we should have a map of counters
1867d523365SDimitry Andric /// keyed by FunctionSamples pointers, but these stats are cleared after
1877d523365SDimitry Andric /// every function, so we just need to keep a single counter.
1882cab237bSDimitry Andric uint64_t TotalUsedSamples = 0;
1897d523365SDimitry Andric };
1907d523365SDimitry Andric
1914ba319b5SDimitry Andric /// Sample profile pass.
192d88c1a5aSDimitry Andric ///
193d88c1a5aSDimitry Andric /// This pass reads profile data from the file specified by
194d88c1a5aSDimitry Andric /// -sample-profile-file and annotates every affected function with the
195d88c1a5aSDimitry Andric /// profile information found in that file.
196d88c1a5aSDimitry Andric class SampleProfileLoader {
197d88c1a5aSDimitry Andric public:
SampleProfileLoader(StringRef Name,StringRef RemapName,bool IsThinLTOPreLink,std::function<AssumptionCache & (Function &)> GetAssumptionCache,std::function<TargetTransformInfo & (Function &)> GetTargetTransformInfo)1982cab237bSDimitry Andric SampleProfileLoader(
199*b5893f02SDimitry Andric StringRef Name, StringRef RemapName, bool IsThinLTOPreLink,
2002cab237bSDimitry Andric std::function<AssumptionCache &(Function &)> GetAssumptionCache,
2012cab237bSDimitry Andric std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo)
202fe4fed2eSDimitry Andric : GetAC(std::move(GetAssumptionCache)),
203fe4fed2eSDimitry Andric GetTTI(std::move(GetTargetTransformInfo)), Filename(Name),
204*b5893f02SDimitry Andric RemappingFilename(RemapName), IsThinLTOPreLink(IsThinLTOPreLink) {}
205d88c1a5aSDimitry Andric
206d88c1a5aSDimitry Andric bool doInitialization(Module &M);
2074ba319b5SDimitry Andric bool runOnModule(Module &M, ModuleAnalysisManager *AM,
2084ba319b5SDimitry Andric ProfileSummaryInfo *_PSI);
209d88c1a5aSDimitry Andric
dump()210d88c1a5aSDimitry Andric void dump() { Reader->dump(); }
211d88c1a5aSDimitry Andric
212d88c1a5aSDimitry Andric protected:
2132cab237bSDimitry Andric bool runOnFunction(Function &F, ModuleAnalysisManager *AM);
214d88c1a5aSDimitry Andric unsigned getFunctionLoc(Function &F);
215d88c1a5aSDimitry Andric bool emitAnnotations(Function &F);
216d88c1a5aSDimitry Andric ErrorOr<uint64_t> getInstWeight(const Instruction &I);
217d88c1a5aSDimitry Andric ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
218d88c1a5aSDimitry Andric const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
2197a7e6055SDimitry Andric std::vector<const FunctionSamples *>
2202cab237bSDimitry Andric findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
221*b5893f02SDimitry Andric mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap;
222d88c1a5aSDimitry Andric const FunctionSamples *findFunctionSamples(const Instruction &I) const;
2232cab237bSDimitry Andric bool inlineCallInstruction(Instruction *I);
2247a7e6055SDimitry Andric bool inlineHotFunctions(Function &F,
2252cab237bSDimitry Andric DenseSet<GlobalValue::GUID> &InlinedGUIDs);
226d88c1a5aSDimitry Andric void printEdgeWeight(raw_ostream &OS, Edge E);
227d88c1a5aSDimitry Andric void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
228d88c1a5aSDimitry Andric void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
229d88c1a5aSDimitry Andric bool computeBlockWeights(Function &F);
230d88c1a5aSDimitry Andric void findEquivalenceClasses(Function &F);
231b40b48b8SDimitry Andric template <bool IsPostDom>
232d88c1a5aSDimitry Andric void findEquivalencesFor(BasicBlock *BB1, ArrayRef<BasicBlock *> Descendants,
233b40b48b8SDimitry Andric DominatorTreeBase<BasicBlock, IsPostDom> *DomTree);
234b40b48b8SDimitry Andric
235d88c1a5aSDimitry Andric void propagateWeights(Function &F);
236d88c1a5aSDimitry Andric uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
237d88c1a5aSDimitry Andric void buildEdges(Function &F);
238d88c1a5aSDimitry Andric bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
239d88c1a5aSDimitry Andric void computeDominanceAndLoopInfo(Function &F);
240d88c1a5aSDimitry Andric void clearFunctionData();
241d88c1a5aSDimitry Andric
2424ba319b5SDimitry Andric /// Map basic blocks to their computed weights.
243d88c1a5aSDimitry Andric ///
244d88c1a5aSDimitry Andric /// The weight of a basic block is defined to be the maximum
245d88c1a5aSDimitry Andric /// of all the instruction weights in that block.
246d88c1a5aSDimitry Andric BlockWeightMap BlockWeights;
247d88c1a5aSDimitry Andric
2484ba319b5SDimitry Andric /// Map edges to their computed weights.
249d88c1a5aSDimitry Andric ///
250d88c1a5aSDimitry Andric /// Edge weights are computed by propagating basic block weights in
251d88c1a5aSDimitry Andric /// SampleProfile::propagateWeights.
252d88c1a5aSDimitry Andric EdgeWeightMap EdgeWeights;
253d88c1a5aSDimitry Andric
2544ba319b5SDimitry Andric /// Set of visited blocks during propagation.
255d88c1a5aSDimitry Andric SmallPtrSet<const BasicBlock *, 32> VisitedBlocks;
256d88c1a5aSDimitry Andric
2574ba319b5SDimitry Andric /// Set of visited edges during propagation.
258d88c1a5aSDimitry Andric SmallSet<Edge, 32> VisitedEdges;
259d88c1a5aSDimitry Andric
2604ba319b5SDimitry Andric /// Equivalence classes for block weights.
261d88c1a5aSDimitry Andric ///
262d88c1a5aSDimitry Andric /// Two blocks BB1 and BB2 are in the same equivalence class if they
263d88c1a5aSDimitry Andric /// dominate and post-dominate each other, and they are in the same loop
264d88c1a5aSDimitry Andric /// nest. When this happens, the two blocks are guaranteed to execute
265d88c1a5aSDimitry Andric /// the same number of times.
266d88c1a5aSDimitry Andric EquivalenceClassMap EquivalenceClass;
267d88c1a5aSDimitry Andric
2686bc11b14SDimitry Andric /// Map from function name to Function *. Used to find the function from
2696bc11b14SDimitry Andric /// the function name. If the function name contains suffix, additional
2706bc11b14SDimitry Andric /// entry is added to map from the stripped name to the function if there
2716bc11b14SDimitry Andric /// is one-to-one mapping.
2726bc11b14SDimitry Andric StringMap<Function *> SymbolMap;
2736bc11b14SDimitry Andric
2744ba319b5SDimitry Andric /// Dominance, post-dominance and loop information.
275d88c1a5aSDimitry Andric std::unique_ptr<DominatorTree> DT;
2764ba319b5SDimitry Andric std::unique_ptr<PostDominatorTree> PDT;
277d88c1a5aSDimitry Andric std::unique_ptr<LoopInfo> LI;
278d88c1a5aSDimitry Andric
2792cab237bSDimitry Andric std::function<AssumptionCache &(Function &)> GetAC;
2802cab237bSDimitry Andric std::function<TargetTransformInfo &(Function &)> GetTTI;
281d88c1a5aSDimitry Andric
2824ba319b5SDimitry Andric /// Predecessors for each basic block in the CFG.
283d88c1a5aSDimitry Andric BlockEdgeMap Predecessors;
284d88c1a5aSDimitry Andric
2854ba319b5SDimitry Andric /// Successors for each basic block in the CFG.
286d88c1a5aSDimitry Andric BlockEdgeMap Successors;
287d88c1a5aSDimitry Andric
2887d523365SDimitry Andric SampleCoverageTracker CoverageTracker;
2897d523365SDimitry Andric
2904ba319b5SDimitry Andric /// Profile reader object.
291d88c1a5aSDimitry Andric std::unique_ptr<SampleProfileReader> Reader;
292d88c1a5aSDimitry Andric
2934ba319b5SDimitry Andric /// Samples collected for the body of this function.
2942cab237bSDimitry Andric FunctionSamples *Samples = nullptr;
295d88c1a5aSDimitry Andric
2964ba319b5SDimitry Andric /// Name of the profile file to load.
297d88c1a5aSDimitry Andric std::string Filename;
298d88c1a5aSDimitry Andric
299*b5893f02SDimitry Andric /// Name of the profile remapping file to load.
300*b5893f02SDimitry Andric std::string RemappingFilename;
301*b5893f02SDimitry Andric
3024ba319b5SDimitry Andric /// Flag indicating whether the profile input loaded successfully.
3032cab237bSDimitry Andric bool ProfileIsValid = false;
3042cab237bSDimitry Andric
3054ba319b5SDimitry Andric /// Flag indicating if the pass is invoked in ThinLTO compile phase.
3062cab237bSDimitry Andric ///
3072cab237bSDimitry Andric /// In this phase, in annotation, we should not promote indirect calls.
3082cab237bSDimitry Andric /// Instead, we will mark GUIDs that needs to be annotated to the function.
3092cab237bSDimitry Andric bool IsThinLTOPreLink;
310d88c1a5aSDimitry Andric
3114ba319b5SDimitry Andric /// Profile Summary Info computed from sample profile.
3124ba319b5SDimitry Andric ProfileSummaryInfo *PSI = nullptr;
3134ba319b5SDimitry Andric
3144ba319b5SDimitry Andric /// Total number of samples collected in this profile.
315d88c1a5aSDimitry Andric ///
316d88c1a5aSDimitry Andric /// This is the sum of all the samples collected in all the functions executed
317d88c1a5aSDimitry Andric /// at runtime.
3182cab237bSDimitry Andric uint64_t TotalCollectedSamples = 0;
3192cab237bSDimitry Andric
3204ba319b5SDimitry Andric /// Optimization Remark Emitter used to emit diagnostic remarks.
3212cab237bSDimitry Andric OptimizationRemarkEmitter *ORE = nullptr;
322d88c1a5aSDimitry Andric };
323d88c1a5aSDimitry Andric
324d88c1a5aSDimitry Andric class SampleProfileLoaderLegacyPass : public ModulePass {
325d88c1a5aSDimitry Andric public:
326d88c1a5aSDimitry Andric // Class identification, replacement for typeinfo
327d88c1a5aSDimitry Andric static char ID;
328d88c1a5aSDimitry Andric
SampleProfileLoaderLegacyPass(StringRef Name=SampleProfileFile,bool IsThinLTOPreLink=false)3292cab237bSDimitry Andric SampleProfileLoaderLegacyPass(StringRef Name = SampleProfileFile,
3302cab237bSDimitry Andric bool IsThinLTOPreLink = false)
331*b5893f02SDimitry Andric : ModulePass(ID),
332*b5893f02SDimitry Andric SampleLoader(Name, SampleProfileRemappingFile, IsThinLTOPreLink,
3332cab237bSDimitry Andric [&](Function &F) -> AssumptionCache & {
3342cab237bSDimitry Andric return ACT->getAssumptionCache(F);
3352cab237bSDimitry Andric },
__anon696b38bd0302(Function &F) 3362cab237bSDimitry Andric [&](Function &F) -> TargetTransformInfo & {
3372cab237bSDimitry Andric return TTIWP->getTTI(F);
3382cab237bSDimitry Andric }) {
339d88c1a5aSDimitry Andric initializeSampleProfileLoaderLegacyPassPass(
340d88c1a5aSDimitry Andric *PassRegistry::getPassRegistry());
341d88c1a5aSDimitry Andric }
342d88c1a5aSDimitry Andric
dump()343d88c1a5aSDimitry Andric void dump() { SampleLoader.dump(); }
344d88c1a5aSDimitry Andric
doInitialization(Module & M)345d88c1a5aSDimitry Andric bool doInitialization(Module &M) override {
346d88c1a5aSDimitry Andric return SampleLoader.doInitialization(M);
347d88c1a5aSDimitry Andric }
3482cab237bSDimitry Andric
getPassName() const349d88c1a5aSDimitry Andric StringRef getPassName() const override { return "Sample profile pass"; }
350d88c1a5aSDimitry Andric bool runOnModule(Module &M) override;
351d88c1a5aSDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const352d88c1a5aSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
353d88c1a5aSDimitry Andric AU.addRequired<AssumptionCacheTracker>();
3542cab237bSDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>();
3554ba319b5SDimitry Andric AU.addRequired<ProfileSummaryInfoWrapperPass>();
356d88c1a5aSDimitry Andric }
357d88c1a5aSDimitry Andric
358d88c1a5aSDimitry Andric private:
359d88c1a5aSDimitry Andric SampleProfileLoader SampleLoader;
3602cab237bSDimitry Andric AssumptionCacheTracker *ACT = nullptr;
3612cab237bSDimitry Andric TargetTransformInfoWrapperPass *TTIWP = nullptr;
362d88c1a5aSDimitry Andric };
363d88c1a5aSDimitry Andric
3642cab237bSDimitry Andric } // end anonymous namespace
3652cab237bSDimitry Andric
3664ba319b5SDimitry Andric /// Return true if the given callsite is hot wrt to hot cutoff threshold.
3677d523365SDimitry Andric ///
3687d523365SDimitry Andric /// Functions that were inlined in the original binary will be represented
3697d523365SDimitry Andric /// in the inline stack in the sample profile. If the profile shows that
3707d523365SDimitry Andric /// the original inline decision was "good" (i.e., the callsite is executed
3717d523365SDimitry Andric /// frequently), then we will recreate the inline decision and apply the
3727d523365SDimitry Andric /// profile from the inlined callsite.
3737d523365SDimitry Andric ///
3744ba319b5SDimitry Andric /// To decide whether an inlined callsite is hot, we compare the callsite
3754ba319b5SDimitry Andric /// sample count with the hot cutoff computed by ProfileSummaryInfo, it is
3764ba319b5SDimitry Andric /// regarded as hot if the count is above the cutoff value.
callsiteIsHot(const FunctionSamples * CallsiteFS,ProfileSummaryInfo * PSI)3774ba319b5SDimitry Andric static bool callsiteIsHot(const FunctionSamples *CallsiteFS,
3784ba319b5SDimitry Andric ProfileSummaryInfo *PSI) {
3797d523365SDimitry Andric if (!CallsiteFS)
3807d523365SDimitry Andric return false; // The callsite was not inlined in the original binary.
3817d523365SDimitry Andric
3824ba319b5SDimitry Andric assert(PSI && "PSI is expected to be non null");
3837d523365SDimitry Andric uint64_t CallsiteTotalSamples = CallsiteFS->getTotalSamples();
3844ba319b5SDimitry Andric return PSI->isHotCount(CallsiteTotalSamples);
3857d523365SDimitry Andric }
3867d523365SDimitry Andric
3877d523365SDimitry Andric /// Mark as used the sample record for the given function samples at
3887d523365SDimitry Andric /// (LineOffset, Discriminator).
3897d523365SDimitry Andric ///
3907d523365SDimitry Andric /// \returns true if this is the first time we mark the given record.
markSamplesUsed(const FunctionSamples * FS,uint32_t LineOffset,uint32_t Discriminator,uint64_t Samples)3917d523365SDimitry Andric bool SampleCoverageTracker::markSamplesUsed(const FunctionSamples *FS,
3927d523365SDimitry Andric uint32_t LineOffset,
3937d523365SDimitry Andric uint32_t Discriminator,
3947d523365SDimitry Andric uint64_t Samples) {
3957d523365SDimitry Andric LineLocation Loc(LineOffset, Discriminator);
3967d523365SDimitry Andric unsigned &Count = SampleCoverage[FS][Loc];
3977d523365SDimitry Andric bool FirstTime = (++Count == 1);
3987d523365SDimitry Andric if (FirstTime)
3997d523365SDimitry Andric TotalUsedSamples += Samples;
4007d523365SDimitry Andric return FirstTime;
4017d523365SDimitry Andric }
4027d523365SDimitry Andric
4037d523365SDimitry Andric /// Return the number of sample records that were applied from this profile.
4047d523365SDimitry Andric ///
4057d523365SDimitry Andric /// This count does not include records from cold inlined callsites.
4067d523365SDimitry Andric unsigned
countUsedRecords(const FunctionSamples * FS,ProfileSummaryInfo * PSI) const4074ba319b5SDimitry Andric SampleCoverageTracker::countUsedRecords(const FunctionSamples *FS,
4084ba319b5SDimitry Andric ProfileSummaryInfo *PSI) const {
4097d523365SDimitry Andric auto I = SampleCoverage.find(FS);
4107d523365SDimitry Andric
4117d523365SDimitry Andric // The size of the coverage map for FS represents the number of records
4127d523365SDimitry Andric // that were marked used at least once.
4137d523365SDimitry Andric unsigned Count = (I != SampleCoverage.end()) ? I->second.size() : 0;
4147d523365SDimitry Andric
4157d523365SDimitry Andric // If there are inlined callsites in this function, count the samples found
4167d523365SDimitry Andric // in the respective bodies. However, do not bother counting callees with 0
4177d523365SDimitry Andric // total samples, these are callees that were never invoked at runtime.
4187a7e6055SDimitry Andric for (const auto &I : FS->getCallsiteSamples())
4197a7e6055SDimitry Andric for (const auto &J : I.second) {
4207a7e6055SDimitry Andric const FunctionSamples *CalleeSamples = &J.second;
4214ba319b5SDimitry Andric if (callsiteIsHot(CalleeSamples, PSI))
4224ba319b5SDimitry Andric Count += countUsedRecords(CalleeSamples, PSI);
4237d523365SDimitry Andric }
4247d523365SDimitry Andric
4257d523365SDimitry Andric return Count;
4267d523365SDimitry Andric }
4277d523365SDimitry Andric
4287d523365SDimitry Andric /// Return the number of sample records in the body of this profile.
4297d523365SDimitry Andric ///
4307d523365SDimitry Andric /// This count does not include records from cold inlined callsites.
4317d523365SDimitry Andric unsigned
countBodyRecords(const FunctionSamples * FS,ProfileSummaryInfo * PSI) const4324ba319b5SDimitry Andric SampleCoverageTracker::countBodyRecords(const FunctionSamples *FS,
4334ba319b5SDimitry Andric ProfileSummaryInfo *PSI) const {
4347d523365SDimitry Andric unsigned Count = FS->getBodySamples().size();
4357d523365SDimitry Andric
4367d523365SDimitry Andric // Only count records in hot callsites.
4377a7e6055SDimitry Andric for (const auto &I : FS->getCallsiteSamples())
4387a7e6055SDimitry Andric for (const auto &J : I.second) {
4397a7e6055SDimitry Andric const FunctionSamples *CalleeSamples = &J.second;
4404ba319b5SDimitry Andric if (callsiteIsHot(CalleeSamples, PSI))
4414ba319b5SDimitry Andric Count += countBodyRecords(CalleeSamples, PSI);
4427d523365SDimitry Andric }
4437d523365SDimitry Andric
4447d523365SDimitry Andric return Count;
4457d523365SDimitry Andric }
4467d523365SDimitry Andric
4477d523365SDimitry Andric /// Return the number of samples collected in the body of this profile.
4487d523365SDimitry Andric ///
4497d523365SDimitry Andric /// This count does not include samples from cold inlined callsites.
4507d523365SDimitry Andric uint64_t
countBodySamples(const FunctionSamples * FS,ProfileSummaryInfo * PSI) const4514ba319b5SDimitry Andric SampleCoverageTracker::countBodySamples(const FunctionSamples *FS,
4524ba319b5SDimitry Andric ProfileSummaryInfo *PSI) const {
4537d523365SDimitry Andric uint64_t Total = 0;
4547d523365SDimitry Andric for (const auto &I : FS->getBodySamples())
4557d523365SDimitry Andric Total += I.second.getSamples();
4567d523365SDimitry Andric
4577d523365SDimitry Andric // Only count samples in hot callsites.
4587a7e6055SDimitry Andric for (const auto &I : FS->getCallsiteSamples())
4597a7e6055SDimitry Andric for (const auto &J : I.second) {
4607a7e6055SDimitry Andric const FunctionSamples *CalleeSamples = &J.second;
4614ba319b5SDimitry Andric if (callsiteIsHot(CalleeSamples, PSI))
4624ba319b5SDimitry Andric Total += countBodySamples(CalleeSamples, PSI);
4637d523365SDimitry Andric }
4647d523365SDimitry Andric
4657d523365SDimitry Andric return Total;
4667d523365SDimitry Andric }
4677d523365SDimitry Andric
4687d523365SDimitry Andric /// Return the fraction of sample records used in this profile.
4697d523365SDimitry Andric ///
4707d523365SDimitry Andric /// The returned value is an unsigned integer in the range 0-100 indicating
4717d523365SDimitry Andric /// the percentage of sample records that were used while applying this
4727d523365SDimitry Andric /// profile to the associated function.
computeCoverage(unsigned Used,unsigned Total) const4737d523365SDimitry Andric unsigned SampleCoverageTracker::computeCoverage(unsigned Used,
4747d523365SDimitry Andric unsigned Total) const {
4757d523365SDimitry Andric assert(Used <= Total &&
4767d523365SDimitry Andric "number of used records cannot exceed the total number of records");
4777d523365SDimitry Andric return Total > 0 ? Used * 100 / Total : 100;
4787d523365SDimitry Andric }
4797d523365SDimitry Andric
4807d523365SDimitry Andric /// Clear all the per-function data used to load samples and propagate weights.
clearFunctionData()4817d523365SDimitry Andric void SampleProfileLoader::clearFunctionData() {
4827d523365SDimitry Andric BlockWeights.clear();
4837d523365SDimitry Andric EdgeWeights.clear();
4847d523365SDimitry Andric VisitedBlocks.clear();
4857d523365SDimitry Andric VisitedEdges.clear();
4867d523365SDimitry Andric EquivalenceClass.clear();
4877d523365SDimitry Andric DT = nullptr;
4887d523365SDimitry Andric PDT = nullptr;
4897d523365SDimitry Andric LI = nullptr;
4907d523365SDimitry Andric Predecessors.clear();
4917d523365SDimitry Andric Successors.clear();
4927d523365SDimitry Andric CoverageTracker.clear();
4937d523365SDimitry Andric }
4947d523365SDimitry Andric
4952cab237bSDimitry Andric #ifndef NDEBUG
4964ba319b5SDimitry Andric /// Print the weight of edge \p E on stream \p OS.
4977d523365SDimitry Andric ///
4987d523365SDimitry Andric /// \param OS Stream to emit the output to.
4997d523365SDimitry Andric /// \param E Edge to print.
printEdgeWeight(raw_ostream & OS,Edge E)5007d523365SDimitry Andric void SampleProfileLoader::printEdgeWeight(raw_ostream &OS, Edge E) {
5017d523365SDimitry Andric OS << "weight[" << E.first->getName() << "->" << E.second->getName()
5027d523365SDimitry Andric << "]: " << EdgeWeights[E] << "\n";
5037d523365SDimitry Andric }
5047d523365SDimitry Andric
5054ba319b5SDimitry Andric /// Print the equivalence class of block \p BB on stream \p OS.
5067d523365SDimitry Andric ///
5077d523365SDimitry Andric /// \param OS Stream to emit the output to.
5087d523365SDimitry Andric /// \param BB Block to print.
printBlockEquivalence(raw_ostream & OS,const BasicBlock * BB)5097d523365SDimitry Andric void SampleProfileLoader::printBlockEquivalence(raw_ostream &OS,
5107d523365SDimitry Andric const BasicBlock *BB) {
5117d523365SDimitry Andric const BasicBlock *Equiv = EquivalenceClass[BB];
5127d523365SDimitry Andric OS << "equivalence[" << BB->getName()
5137d523365SDimitry Andric << "]: " << ((Equiv) ? EquivalenceClass[BB]->getName() : "NONE") << "\n";
5147d523365SDimitry Andric }
5157d523365SDimitry Andric
5164ba319b5SDimitry Andric /// Print the weight of block \p BB on stream \p OS.
5177d523365SDimitry Andric ///
5187d523365SDimitry Andric /// \param OS Stream to emit the output to.
5197d523365SDimitry Andric /// \param BB Block to print.
printBlockWeight(raw_ostream & OS,const BasicBlock * BB) const5207d523365SDimitry Andric void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
5217d523365SDimitry Andric const BasicBlock *BB) const {
5227d523365SDimitry Andric const auto &I = BlockWeights.find(BB);
5237d523365SDimitry Andric uint64_t W = (I == BlockWeights.end() ? 0 : I->second);
5247d523365SDimitry Andric OS << "weight[" << BB->getName() << "]: " << W << "\n";
5257d523365SDimitry Andric }
5262cab237bSDimitry Andric #endif
5277d523365SDimitry Andric
5284ba319b5SDimitry Andric /// Get the weight for an instruction.
5297d523365SDimitry Andric ///
5307d523365SDimitry Andric /// The "weight" of an instruction \p Inst is the number of samples
5317d523365SDimitry Andric /// collected on that instruction at runtime. To retrieve it, we
5327d523365SDimitry Andric /// need to compute the line number of \p Inst relative to the start of its
5337d523365SDimitry Andric /// function. We use HeaderLineno to compute the offset. We then
5347d523365SDimitry Andric /// look up the samples collected for \p Inst using BodySamples.
5357d523365SDimitry Andric ///
5367d523365SDimitry Andric /// \param Inst Instruction to query.
5377d523365SDimitry Andric ///
5387d523365SDimitry Andric /// \returns the weight of \p Inst.
getInstWeight(const Instruction & Inst)5397a7e6055SDimitry Andric ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
5403ca95b02SDimitry Andric const DebugLoc &DLoc = Inst.getDebugLoc();
5417d523365SDimitry Andric if (!DLoc)
5427d523365SDimitry Andric return std::error_code();
5437d523365SDimitry Andric
5447d523365SDimitry Andric const FunctionSamples *FS = findFunctionSamples(Inst);
5457d523365SDimitry Andric if (!FS)
5467d523365SDimitry Andric return std::error_code();
5477d523365SDimitry Andric
548*b5893f02SDimitry Andric // Ignore all intrinsics, phinodes and branch instructions.
549*b5893f02SDimitry Andric // Branch and phinodes instruction usually contains debug info from sources outside of
550d88c1a5aSDimitry Andric // the residing basic block, thus we ignore them during annotation.
551*b5893f02SDimitry Andric if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst) || isa<PHINode>(Inst))
5523ca95b02SDimitry Andric return std::error_code();
5533ca95b02SDimitry Andric
5542cab237bSDimitry Andric // If a direct call/invoke instruction is inlined in profile
5552cab237bSDimitry Andric // (findCalleeFunctionSamples returns non-empty result), but not inlined here,
556d88c1a5aSDimitry Andric // it means that the inlined callsite has no sample, thus the call
557d88c1a5aSDimitry Andric // instruction should have 0 count.
5587a7e6055SDimitry Andric if ((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
5592cab237bSDimitry Andric !ImmutableCallSite(&Inst).isIndirectCall() &&
5607a7e6055SDimitry Andric findCalleeFunctionSamples(Inst))
561d88c1a5aSDimitry Andric return 0;
562d88c1a5aSDimitry Andric
5637d523365SDimitry Andric const DILocation *DIL = DLoc;
5644ba319b5SDimitry Andric uint32_t LineOffset = FunctionSamples::getOffset(DIL);
5657a7e6055SDimitry Andric uint32_t Discriminator = DIL->getBaseDiscriminator();
5667a7e6055SDimitry Andric ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
5677d523365SDimitry Andric if (R) {
5687d523365SDimitry Andric bool FirstMark =
5697d523365SDimitry Andric CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
5707d523365SDimitry Andric if (FirstMark) {
5712cab237bSDimitry Andric ORE->emit([&]() {
5722cab237bSDimitry Andric OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
5732cab237bSDimitry Andric Remark << "Applied " << ore::NV("NumSamples", *R);
5742cab237bSDimitry Andric Remark << " samples from profile (offset: ";
5752cab237bSDimitry Andric Remark << ore::NV("LineOffset", LineOffset);
5762cab237bSDimitry Andric if (Discriminator) {
5772cab237bSDimitry Andric Remark << ".";
5782cab237bSDimitry Andric Remark << ore::NV("Discriminator", Discriminator);
5792cab237bSDimitry Andric }
5802cab237bSDimitry Andric Remark << ")";
5812cab237bSDimitry Andric return Remark;
5822cab237bSDimitry Andric });
5837d523365SDimitry Andric }
5844ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << " " << DLoc.getLine() << "."
5857a7e6055SDimitry Andric << DIL->getBaseDiscriminator() << ":" << Inst
5867a7e6055SDimitry Andric << " (line offset: " << LineOffset << "."
5877a7e6055SDimitry Andric << DIL->getBaseDiscriminator() << " - weight: " << R.get()
5887d523365SDimitry Andric << ")\n");
5897d523365SDimitry Andric }
5907d523365SDimitry Andric return R;
5917d523365SDimitry Andric }
5927d523365SDimitry Andric
5934ba319b5SDimitry Andric /// Compute the weight of a basic block.
5947d523365SDimitry Andric ///
5957d523365SDimitry Andric /// The weight of basic block \p BB is the maximum weight of all the
5967d523365SDimitry Andric /// instructions in BB.
5977d523365SDimitry Andric ///
5987d523365SDimitry Andric /// \param BB The basic block to query.
5997d523365SDimitry Andric ///
6007d523365SDimitry Andric /// \returns the weight for \p BB.
getBlockWeight(const BasicBlock * BB)6017a7e6055SDimitry Andric ErrorOr<uint64_t> SampleProfileLoader::getBlockWeight(const BasicBlock *BB) {
602d88c1a5aSDimitry Andric uint64_t Max = 0;
603d88c1a5aSDimitry Andric bool HasWeight = false;
6047d523365SDimitry Andric for (auto &I : BB->getInstList()) {
6057d523365SDimitry Andric const ErrorOr<uint64_t> &R = getInstWeight(I);
606d88c1a5aSDimitry Andric if (R) {
607d88c1a5aSDimitry Andric Max = std::max(Max, R.get());
608d88c1a5aSDimitry Andric HasWeight = true;
6097d523365SDimitry Andric }
6107d523365SDimitry Andric }
611d88c1a5aSDimitry Andric return HasWeight ? ErrorOr<uint64_t>(Max) : std::error_code();
6127d523365SDimitry Andric }
6137d523365SDimitry Andric
6144ba319b5SDimitry Andric /// Compute and store the weights of every basic block.
6157d523365SDimitry Andric ///
6167d523365SDimitry Andric /// This populates the BlockWeights map by computing
6177d523365SDimitry Andric /// the weights of every basic block in the CFG.
6187d523365SDimitry Andric ///
6197d523365SDimitry Andric /// \param F The function to query.
computeBlockWeights(Function & F)6207d523365SDimitry Andric bool SampleProfileLoader::computeBlockWeights(Function &F) {
6217d523365SDimitry Andric bool Changed = false;
6224ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Block weights\n");
6237d523365SDimitry Andric for (const auto &BB : F) {
6247d523365SDimitry Andric ErrorOr<uint64_t> Weight = getBlockWeight(&BB);
6257d523365SDimitry Andric if (Weight) {
6267d523365SDimitry Andric BlockWeights[&BB] = Weight.get();
6277d523365SDimitry Andric VisitedBlocks.insert(&BB);
6287d523365SDimitry Andric Changed = true;
6297d523365SDimitry Andric }
6304ba319b5SDimitry Andric LLVM_DEBUG(printBlockWeight(dbgs(), &BB));
6317d523365SDimitry Andric }
6327d523365SDimitry Andric
6337d523365SDimitry Andric return Changed;
6347d523365SDimitry Andric }
6357d523365SDimitry Andric
6364ba319b5SDimitry Andric /// Get the FunctionSamples for a call instruction.
6377d523365SDimitry Andric ///
638d88c1a5aSDimitry Andric /// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
6397d523365SDimitry Andric /// instance in which that call instruction is calling to. It contains
6407d523365SDimitry Andric /// all samples that resides in the inlined instance. We first find the
6417d523365SDimitry Andric /// inlined instance in which the call instruction is from, then we
6427d523365SDimitry Andric /// traverse its children to find the callsite with the matching
643d88c1a5aSDimitry Andric /// location.
6447d523365SDimitry Andric ///
645d88c1a5aSDimitry Andric /// \param Inst Call/Invoke instruction to query.
6467d523365SDimitry Andric ///
6477d523365SDimitry Andric /// \returns The FunctionSamples pointer to the inlined instance.
6487d523365SDimitry Andric const FunctionSamples *
findCalleeFunctionSamples(const Instruction & Inst) const649d88c1a5aSDimitry Andric SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
6507d523365SDimitry Andric const DILocation *DIL = Inst.getDebugLoc();
6517d523365SDimitry Andric if (!DIL) {
6527d523365SDimitry Andric return nullptr;
6537d523365SDimitry Andric }
6547a7e6055SDimitry Andric
6557a7e6055SDimitry Andric StringRef CalleeName;
6567a7e6055SDimitry Andric if (const CallInst *CI = dyn_cast<CallInst>(&Inst))
6577a7e6055SDimitry Andric if (Function *Callee = CI->getCalledFunction())
6587a7e6055SDimitry Andric CalleeName = Callee->getName();
6597d523365SDimitry Andric
6607d523365SDimitry Andric const FunctionSamples *FS = findFunctionSamples(Inst);
6617d523365SDimitry Andric if (FS == nullptr)
6627d523365SDimitry Andric return nullptr;
6637d523365SDimitry Andric
6644ba319b5SDimitry Andric return FS->findFunctionSamplesAt(LineLocation(FunctionSamples::getOffset(DIL),
6654ba319b5SDimitry Andric DIL->getBaseDiscriminator()),
6664ba319b5SDimitry Andric CalleeName);
6677a7e6055SDimitry Andric }
6687a7e6055SDimitry Andric
6697a7e6055SDimitry Andric /// Returns a vector of FunctionSamples that are the indirect call targets
6702cab237bSDimitry Andric /// of \p Inst. The vector is sorted by the total number of samples. Stores
6712cab237bSDimitry Andric /// the total call count of the indirect call in \p Sum.
6727a7e6055SDimitry Andric std::vector<const FunctionSamples *>
findIndirectCallFunctionSamples(const Instruction & Inst,uint64_t & Sum) const6737a7e6055SDimitry Andric SampleProfileLoader::findIndirectCallFunctionSamples(
6742cab237bSDimitry Andric const Instruction &Inst, uint64_t &Sum) const {
6757a7e6055SDimitry Andric const DILocation *DIL = Inst.getDebugLoc();
6767a7e6055SDimitry Andric std::vector<const FunctionSamples *> R;
6777a7e6055SDimitry Andric
6787a7e6055SDimitry Andric if (!DIL) {
6797a7e6055SDimitry Andric return R;
6807a7e6055SDimitry Andric }
6817a7e6055SDimitry Andric
6827a7e6055SDimitry Andric const FunctionSamples *FS = findFunctionSamples(Inst);
6837a7e6055SDimitry Andric if (FS == nullptr)
6847a7e6055SDimitry Andric return R;
6857a7e6055SDimitry Andric
6864ba319b5SDimitry Andric uint32_t LineOffset = FunctionSamples::getOffset(DIL);
6872cab237bSDimitry Andric uint32_t Discriminator = DIL->getBaseDiscriminator();
6882cab237bSDimitry Andric
6892cab237bSDimitry Andric auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
6902cab237bSDimitry Andric Sum = 0;
6912cab237bSDimitry Andric if (T)
6922cab237bSDimitry Andric for (const auto &T_C : T.get())
6932cab237bSDimitry Andric Sum += T_C.second;
6944ba319b5SDimitry Andric if (const FunctionSamplesMap *M = FS->findFunctionSamplesMapAt(LineLocation(
6954ba319b5SDimitry Andric FunctionSamples::getOffset(DIL), DIL->getBaseDiscriminator()))) {
6962cab237bSDimitry Andric if (M->empty())
6977a7e6055SDimitry Andric return R;
6987a7e6055SDimitry Andric for (const auto &NameFS : *M) {
6992cab237bSDimitry Andric Sum += NameFS.second.getEntrySamples();
7007a7e6055SDimitry Andric R.push_back(&NameFS.second);
7017a7e6055SDimitry Andric }
702*b5893f02SDimitry Andric llvm::sort(R, [](const FunctionSamples *L, const FunctionSamples *R) {
703*b5893f02SDimitry Andric if (L->getEntrySamples() != R->getEntrySamples())
7042cab237bSDimitry Andric return L->getEntrySamples() > R->getEntrySamples();
705*b5893f02SDimitry Andric return FunctionSamples::getGUID(L->getName()) <
706*b5893f02SDimitry Andric FunctionSamples::getGUID(R->getName());
7077a7e6055SDimitry Andric });
7087a7e6055SDimitry Andric }
7097a7e6055SDimitry Andric return R;
7107d523365SDimitry Andric }
7117d523365SDimitry Andric
7124ba319b5SDimitry Andric /// Get the FunctionSamples for an instruction.
7137d523365SDimitry Andric ///
7147d523365SDimitry Andric /// The FunctionSamples of an instruction \p Inst is the inlined instance
7157d523365SDimitry Andric /// in which that instruction is coming from. We traverse the inline stack
7167d523365SDimitry Andric /// of that instruction, and match it with the tree nodes in the profile.
7177d523365SDimitry Andric ///
7187d523365SDimitry Andric /// \param Inst Instruction to query.
7197d523365SDimitry Andric ///
7207d523365SDimitry Andric /// \returns the FunctionSamples pointer to the inlined instance.
7217d523365SDimitry Andric const FunctionSamples *
findFunctionSamples(const Instruction & Inst) const7227d523365SDimitry Andric SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
7237d523365SDimitry Andric const DILocation *DIL = Inst.getDebugLoc();
7247a7e6055SDimitry Andric if (!DIL)
7257d523365SDimitry Andric return Samples;
7267a7e6055SDimitry Andric
727*b5893f02SDimitry Andric auto it = DILocation2SampleMap.try_emplace(DIL,nullptr);
728*b5893f02SDimitry Andric if (it.second)
729*b5893f02SDimitry Andric it.first->second = Samples->findFunctionSamples(DIL);
730*b5893f02SDimitry Andric return it.first->second;
7317d523365SDimitry Andric }
7327d523365SDimitry Andric
inlineCallInstruction(Instruction * I)7332cab237bSDimitry Andric bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
7342cab237bSDimitry Andric assert(isa<CallInst>(I) || isa<InvokeInst>(I));
7352cab237bSDimitry Andric CallSite CS(I);
7362cab237bSDimitry Andric Function *CalledFunction = CS.getCalledFunction();
7372cab237bSDimitry Andric assert(CalledFunction);
7382cab237bSDimitry Andric DebugLoc DLoc = I->getDebugLoc();
7392cab237bSDimitry Andric BasicBlock *BB = I->getParent();
7402cab237bSDimitry Andric InlineParams Params = getInlineParams();
7412cab237bSDimitry Andric Params.ComputeFullInlineCost = true;
7422cab237bSDimitry Andric // Checks if there is anything in the reachable portion of the callee at
7432cab237bSDimitry Andric // this callsite that makes this inlining potentially illegal. Need to
7442cab237bSDimitry Andric // set ComputeFullInlineCost, otherwise getInlineCost may return early
7452cab237bSDimitry Andric // when cost exceeds threshold without checking all IRs in the callee.
7462cab237bSDimitry Andric // The acutal cost does not matter because we only checks isNever() to
7472cab237bSDimitry Andric // see if it is legal to inline the callsite.
7482cab237bSDimitry Andric InlineCost Cost = getInlineCost(CS, Params, GetTTI(*CalledFunction), GetAC,
7492cab237bSDimitry Andric None, nullptr, nullptr);
7502cab237bSDimitry Andric if (Cost.isNever()) {
7512cab237bSDimitry Andric ORE->emit(OptimizationRemark(DEBUG_TYPE, "Not inline", DLoc, BB)
7522cab237bSDimitry Andric << "incompatible inlining");
7532cab237bSDimitry Andric return false;
7542cab237bSDimitry Andric }
7552cab237bSDimitry Andric InlineFunctionInfo IFI(nullptr, &GetAC);
7562cab237bSDimitry Andric if (InlineFunction(CS, IFI)) {
7572cab237bSDimitry Andric // The call to InlineFunction erases I, so we can't pass it here.
7582cab237bSDimitry Andric ORE->emit(OptimizationRemark(DEBUG_TYPE, "HotInline", DLoc, BB)
7592cab237bSDimitry Andric << "inlined hot callee '" << ore::NV("Callee", CalledFunction)
7602cab237bSDimitry Andric << "' into '" << ore::NV("Caller", BB->getParent()) << "'");
7612cab237bSDimitry Andric return true;
7622cab237bSDimitry Andric }
7632cab237bSDimitry Andric return false;
7642cab237bSDimitry Andric }
7652cab237bSDimitry Andric
7664ba319b5SDimitry Andric /// Iteratively inline hot callsites of a function.
7677d523365SDimitry Andric ///
7687d523365SDimitry Andric /// Iteratively traverse all callsites of the function \p F, and find if
7697d523365SDimitry Andric /// the corresponding inlined instance exists and is hot in profile. If
7707d523365SDimitry Andric /// it is hot enough, inline the callsites and adds new callsites of the
7717a7e6055SDimitry Andric /// callee into the caller. If the call is an indirect call, first promote
7727a7e6055SDimitry Andric /// it to direct call. Each indirect call is limited with a single target.
7737d523365SDimitry Andric ///
7747d523365SDimitry Andric /// \param F function to perform iterative inlining.
7752cab237bSDimitry Andric /// \param InlinedGUIDs a set to be updated to include all GUIDs that are
7762cab237bSDimitry Andric /// inlined in the profiled binary.
7777d523365SDimitry Andric ///
7787d523365SDimitry Andric /// \returns True if there is any inline happened.
inlineHotFunctions(Function & F,DenseSet<GlobalValue::GUID> & InlinedGUIDs)7797a7e6055SDimitry Andric bool SampleProfileLoader::inlineHotFunctions(
7802cab237bSDimitry Andric Function &F, DenseSet<GlobalValue::GUID> &InlinedGUIDs) {
7817a7e6055SDimitry Andric DenseSet<Instruction *> PromotedInsns;
7827d523365SDimitry Andric bool Changed = false;
7837d523365SDimitry Andric while (true) {
7847d523365SDimitry Andric bool LocalChanged = false;
785d88c1a5aSDimitry Andric SmallVector<Instruction *, 10> CIS;
7867d523365SDimitry Andric for (auto &BB : F) {
787d88c1a5aSDimitry Andric bool Hot = false;
788d88c1a5aSDimitry Andric SmallVector<Instruction *, 10> Candidates;
7897d523365SDimitry Andric for (auto &I : BB.getInstList()) {
790d88c1a5aSDimitry Andric const FunctionSamples *FS = nullptr;
791d88c1a5aSDimitry Andric if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
7926bc11b14SDimitry Andric !isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) {
793d88c1a5aSDimitry Andric Candidates.push_back(&I);
7944ba319b5SDimitry Andric if (callsiteIsHot(FS, PSI))
795d88c1a5aSDimitry Andric Hot = true;
7967d523365SDimitry Andric }
7977d523365SDimitry Andric }
798d88c1a5aSDimitry Andric if (Hot) {
799d88c1a5aSDimitry Andric CIS.insert(CIS.begin(), Candidates.begin(), Candidates.end());
800d88c1a5aSDimitry Andric }
801d88c1a5aSDimitry Andric }
802d88c1a5aSDimitry Andric for (auto I : CIS) {
8037a7e6055SDimitry Andric Function *CalledFunction = CallSite(I).getCalledFunction();
804edd7eaddSDimitry Andric // Do not inline recursive calls.
805edd7eaddSDimitry Andric if (CalledFunction == &F)
806edd7eaddSDimitry Andric continue;
8072cab237bSDimitry Andric if (CallSite(I).isIndirectCall()) {
8082cab237bSDimitry Andric if (PromotedInsns.count(I))
8092cab237bSDimitry Andric continue;
8102cab237bSDimitry Andric uint64_t Sum;
8112cab237bSDimitry Andric for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
8122cab237bSDimitry Andric if (IsThinLTOPreLink) {
8132cab237bSDimitry Andric FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
814*b5893f02SDimitry Andric PSI->getOrCompHotCountThreshold());
8152cab237bSDimitry Andric continue;
8162cab237bSDimitry Andric }
817*b5893f02SDimitry Andric auto CalleeFunctionName = FS->getFuncNameInModule(F.getParent());
818db17bf38SDimitry Andric // If it is a recursive call, we do not inline it as it could bloat
819db17bf38SDimitry Andric // the code exponentially. There is way to better handle this, e.g.
820db17bf38SDimitry Andric // clone the caller first, and inline the cloned caller if it is
8212cab237bSDimitry Andric // recursive. As llvm does not inline recursive calls, we will
8222cab237bSDimitry Andric // simply ignore it instead of handling it explicitly.
823*b5893f02SDimitry Andric if (CalleeFunctionName == F.getName())
824db17bf38SDimitry Andric continue;
8252cab237bSDimitry Andric
8267a7e6055SDimitry Andric const char *Reason = "Callee function not available";
8276bc11b14SDimitry Andric auto R = SymbolMap.find(CalleeFunctionName);
8282cab237bSDimitry Andric if (R != SymbolMap.end() && R->getValue() &&
8292cab237bSDimitry Andric !R->getValue()->isDeclaration() &&
8302cab237bSDimitry Andric R->getValue()->getSubprogram() &&
8312cab237bSDimitry Andric isLegalToPromote(CallSite(I), R->getValue(), &Reason)) {
8322cab237bSDimitry Andric uint64_t C = FS->getEntrySamples();
8332cab237bSDimitry Andric Instruction *DI =
8342cab237bSDimitry Andric pgo::promoteIndirectCall(I, R->getValue(), C, Sum, false, ORE);
8352cab237bSDimitry Andric Sum -= C;
8367a7e6055SDimitry Andric PromotedInsns.insert(I);
8372cab237bSDimitry Andric // If profile mismatches, we should not attempt to inline DI.
8382cab237bSDimitry Andric if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
8392cab237bSDimitry Andric inlineCallInstruction(DI))
8407d523365SDimitry Andric LocalChanged = true;
8412cab237bSDimitry Andric } else {
8424ba319b5SDimitry Andric LLVM_DEBUG(dbgs()
8432cab237bSDimitry Andric << "\nFailed to promote indirect call to "
8442cab237bSDimitry Andric << CalleeFunctionName << " because " << Reason << "\n");
8452cab237bSDimitry Andric }
8462cab237bSDimitry Andric }
8472cab237bSDimitry Andric } else if (CalledFunction && CalledFunction->getSubprogram() &&
8482cab237bSDimitry Andric !CalledFunction->isDeclaration()) {
8492cab237bSDimitry Andric if (inlineCallInstruction(I))
8502cab237bSDimitry Andric LocalChanged = true;
8512cab237bSDimitry Andric } else if (IsThinLTOPreLink) {
8522cab237bSDimitry Andric findCalleeFunctionSamples(*I)->findInlinedFunctions(
853*b5893f02SDimitry Andric InlinedGUIDs, F.getParent(), PSI->getOrCompHotCountThreshold());
8547d523365SDimitry Andric }
8557d523365SDimitry Andric }
8567d523365SDimitry Andric if (LocalChanged) {
8577d523365SDimitry Andric Changed = true;
8587d523365SDimitry Andric } else {
8597d523365SDimitry Andric break;
8607d523365SDimitry Andric }
8617d523365SDimitry Andric }
8627d523365SDimitry Andric return Changed;
8637d523365SDimitry Andric }
8647d523365SDimitry Andric
8654ba319b5SDimitry Andric /// Find equivalence classes for the given block.
8667d523365SDimitry Andric ///
8677d523365SDimitry Andric /// This finds all the blocks that are guaranteed to execute the same
8687d523365SDimitry Andric /// number of times as \p BB1. To do this, it traverses all the
8697d523365SDimitry Andric /// descendants of \p BB1 in the dominator or post-dominator tree.
8707d523365SDimitry Andric ///
8717d523365SDimitry Andric /// A block BB2 will be in the same equivalence class as \p BB1 if
8727d523365SDimitry Andric /// the following holds:
8737d523365SDimitry Andric ///
8747d523365SDimitry Andric /// 1- \p BB1 is a descendant of BB2 in the opposite tree. So, if BB2
8757d523365SDimitry Andric /// is a descendant of \p BB1 in the dominator tree, then BB2 should
8767d523365SDimitry Andric /// dominate BB1 in the post-dominator tree.
8777d523365SDimitry Andric ///
8787d523365SDimitry Andric /// 2- Both BB2 and \p BB1 must be in the same loop.
8797d523365SDimitry Andric ///
8807d523365SDimitry Andric /// For every block BB2 that meets those two requirements, we set BB2's
8817d523365SDimitry Andric /// equivalence class to \p BB1.
8827d523365SDimitry Andric ///
8837d523365SDimitry Andric /// \param BB1 Block to check.
8847d523365SDimitry Andric /// \param Descendants Descendants of \p BB1 in either the dom or pdom tree.
8857d523365SDimitry Andric /// \param DomTree Opposite dominator tree. If \p Descendants is filled
8867d523365SDimitry Andric /// with blocks from \p BB1's dominator tree, then
8877d523365SDimitry Andric /// this is the post-dominator tree, and vice versa.
888b40b48b8SDimitry Andric template <bool IsPostDom>
findEquivalencesFor(BasicBlock * BB1,ArrayRef<BasicBlock * > Descendants,DominatorTreeBase<BasicBlock,IsPostDom> * DomTree)8897d523365SDimitry Andric void SampleProfileLoader::findEquivalencesFor(
8903ca95b02SDimitry Andric BasicBlock *BB1, ArrayRef<BasicBlock *> Descendants,
891b40b48b8SDimitry Andric DominatorTreeBase<BasicBlock, IsPostDom> *DomTree) {
8927d523365SDimitry Andric const BasicBlock *EC = EquivalenceClass[BB1];
8937d523365SDimitry Andric uint64_t Weight = BlockWeights[EC];
8947d523365SDimitry Andric for (const auto *BB2 : Descendants) {
8957d523365SDimitry Andric bool IsDomParent = DomTree->dominates(BB2, BB1);
8967d523365SDimitry Andric bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
8977d523365SDimitry Andric if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
8987d523365SDimitry Andric EquivalenceClass[BB2] = EC;
899d88c1a5aSDimitry Andric // If BB2 is visited, then the entire EC should be marked as visited.
900d88c1a5aSDimitry Andric if (VisitedBlocks.count(BB2)) {
901d88c1a5aSDimitry Andric VisitedBlocks.insert(EC);
902d88c1a5aSDimitry Andric }
9037d523365SDimitry Andric
9047d523365SDimitry Andric // If BB2 is heavier than BB1, make BB2 have the same weight
9057d523365SDimitry Andric // as BB1.
9067d523365SDimitry Andric //
9077d523365SDimitry Andric // Note that we don't worry about the opposite situation here
9087d523365SDimitry Andric // (when BB2 is lighter than BB1). We will deal with this
9097d523365SDimitry Andric // during the propagation phase. Right now, we just want to
9107d523365SDimitry Andric // make sure that BB1 has the largest weight of all the
9117d523365SDimitry Andric // members of its equivalence set.
9127d523365SDimitry Andric Weight = std::max(Weight, BlockWeights[BB2]);
9137d523365SDimitry Andric }
9147d523365SDimitry Andric }
915d88c1a5aSDimitry Andric if (EC == &EC->getParent()->getEntryBlock()) {
916d88c1a5aSDimitry Andric BlockWeights[EC] = Samples->getHeadSamples() + 1;
917d88c1a5aSDimitry Andric } else {
9187d523365SDimitry Andric BlockWeights[EC] = Weight;
9197d523365SDimitry Andric }
920d88c1a5aSDimitry Andric }
9217d523365SDimitry Andric
9224ba319b5SDimitry Andric /// Find equivalence classes.
9237d523365SDimitry Andric ///
9247d523365SDimitry Andric /// Since samples may be missing from blocks, we can fill in the gaps by setting
9257d523365SDimitry Andric /// the weights of all the blocks in the same equivalence class to the same
9267d523365SDimitry Andric /// weight. To compute the concept of equivalence, we use dominance and loop
9277d523365SDimitry Andric /// information. Two blocks B1 and B2 are in the same equivalence class if B1
9287d523365SDimitry Andric /// dominates B2, B2 post-dominates B1 and both are in the same loop.
9297d523365SDimitry Andric ///
9307d523365SDimitry Andric /// \param F The function to query.
findEquivalenceClasses(Function & F)9317d523365SDimitry Andric void SampleProfileLoader::findEquivalenceClasses(Function &F) {
9327d523365SDimitry Andric SmallVector<BasicBlock *, 8> DominatedBBs;
9334ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "\nBlock equivalence classes\n");
9347d523365SDimitry Andric // Find equivalence sets based on dominance and post-dominance information.
9357d523365SDimitry Andric for (auto &BB : F) {
9367d523365SDimitry Andric BasicBlock *BB1 = &BB;
9377d523365SDimitry Andric
9387d523365SDimitry Andric // Compute BB1's equivalence class once.
9397d523365SDimitry Andric if (EquivalenceClass.count(BB1)) {
9404ba319b5SDimitry Andric LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
9417d523365SDimitry Andric continue;
9427d523365SDimitry Andric }
9437d523365SDimitry Andric
9447d523365SDimitry Andric // By default, blocks are in their own equivalence class.
9457d523365SDimitry Andric EquivalenceClass[BB1] = BB1;
9467d523365SDimitry Andric
9477d523365SDimitry Andric // Traverse all the blocks dominated by BB1. We are looking for
9487d523365SDimitry Andric // every basic block BB2 such that:
9497d523365SDimitry Andric //
9507d523365SDimitry Andric // 1- BB1 dominates BB2.
9517d523365SDimitry Andric // 2- BB2 post-dominates BB1.
9527d523365SDimitry Andric // 3- BB1 and BB2 are in the same loop nest.
9537d523365SDimitry Andric //
9547d523365SDimitry Andric // If all those conditions hold, it means that BB2 is executed
9557d523365SDimitry Andric // as many times as BB1, so they are placed in the same equivalence
9567d523365SDimitry Andric // class by making BB2's equivalence class be BB1.
9577d523365SDimitry Andric DominatedBBs.clear();
9587d523365SDimitry Andric DT->getDescendants(BB1, DominatedBBs);
9597d523365SDimitry Andric findEquivalencesFor(BB1, DominatedBBs, PDT.get());
9607d523365SDimitry Andric
9614ba319b5SDimitry Andric LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
9627d523365SDimitry Andric }
9637d523365SDimitry Andric
9647d523365SDimitry Andric // Assign weights to equivalence classes.
9657d523365SDimitry Andric //
9667d523365SDimitry Andric // All the basic blocks in the same equivalence class will execute
9677d523365SDimitry Andric // the same number of times. Since we know that the head block in
9687d523365SDimitry Andric // each equivalence class has the largest weight, assign that weight
9697d523365SDimitry Andric // to all the blocks in that equivalence class.
9704ba319b5SDimitry Andric LLVM_DEBUG(
9714ba319b5SDimitry Andric dbgs() << "\nAssign the same weight to all blocks in the same class\n");
9727d523365SDimitry Andric for (auto &BI : F) {
9737d523365SDimitry Andric const BasicBlock *BB = &BI;
9747d523365SDimitry Andric const BasicBlock *EquivBB = EquivalenceClass[BB];
9757d523365SDimitry Andric if (BB != EquivBB)
9767d523365SDimitry Andric BlockWeights[BB] = BlockWeights[EquivBB];
9774ba319b5SDimitry Andric LLVM_DEBUG(printBlockWeight(dbgs(), BB));
9787d523365SDimitry Andric }
9797d523365SDimitry Andric }
9807d523365SDimitry Andric
9814ba319b5SDimitry Andric /// Visit the given edge to decide if it has a valid weight.
9827d523365SDimitry Andric ///
9837d523365SDimitry Andric /// If \p E has not been visited before, we copy to \p UnknownEdge
9847d523365SDimitry Andric /// and increment the count of unknown edges.
9857d523365SDimitry Andric ///
9867d523365SDimitry Andric /// \param E Edge to visit.
9877d523365SDimitry Andric /// \param NumUnknownEdges Current number of unknown edges.
9887d523365SDimitry Andric /// \param UnknownEdge Set if E has not been visited before.
9897d523365SDimitry Andric ///
9907d523365SDimitry Andric /// \returns E's weight, if known. Otherwise, return 0.
visitEdge(Edge E,unsigned * NumUnknownEdges,Edge * UnknownEdge)9917d523365SDimitry Andric uint64_t SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
9927d523365SDimitry Andric Edge *UnknownEdge) {
9937d523365SDimitry Andric if (!VisitedEdges.count(E)) {
9947d523365SDimitry Andric (*NumUnknownEdges)++;
9957d523365SDimitry Andric *UnknownEdge = E;
9967d523365SDimitry Andric return 0;
9977d523365SDimitry Andric }
9987d523365SDimitry Andric
9997d523365SDimitry Andric return EdgeWeights[E];
10007d523365SDimitry Andric }
10017d523365SDimitry Andric
10024ba319b5SDimitry Andric /// Propagate weights through incoming/outgoing edges.
10037d523365SDimitry Andric ///
10047d523365SDimitry Andric /// If the weight of a basic block is known, and there is only one edge
10057d523365SDimitry Andric /// with an unknown weight, we can calculate the weight of that edge.
10067d523365SDimitry Andric ///
10077d523365SDimitry Andric /// Similarly, if all the edges have a known count, we can calculate the
10087d523365SDimitry Andric /// count of the basic block, if needed.
10097d523365SDimitry Andric ///
10107d523365SDimitry Andric /// \param F Function to process.
1011d88c1a5aSDimitry Andric /// \param UpdateBlockCount Whether we should update basic block counts that
1012d88c1a5aSDimitry Andric /// has already been annotated.
10137d523365SDimitry Andric ///
10147d523365SDimitry Andric /// \returns True if new weights were assigned to edges or blocks.
propagateThroughEdges(Function & F,bool UpdateBlockCount)1015d88c1a5aSDimitry Andric bool SampleProfileLoader::propagateThroughEdges(Function &F,
1016d88c1a5aSDimitry Andric bool UpdateBlockCount) {
10177d523365SDimitry Andric bool Changed = false;
10184ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "\nPropagation through edges\n");
10197d523365SDimitry Andric for (const auto &BI : F) {
10207d523365SDimitry Andric const BasicBlock *BB = &BI;
10217d523365SDimitry Andric const BasicBlock *EC = EquivalenceClass[BB];
10227d523365SDimitry Andric
10237d523365SDimitry Andric // Visit all the predecessor and successor edges to determine
10247d523365SDimitry Andric // which ones have a weight assigned already. Note that it doesn't
10257d523365SDimitry Andric // matter that we only keep track of a single unknown edge. The
10267d523365SDimitry Andric // only case we are interested in handling is when only a single
10277d523365SDimitry Andric // edge is unknown (see setEdgeOrBlockWeight).
10287d523365SDimitry Andric for (unsigned i = 0; i < 2; i++) {
10297d523365SDimitry Andric uint64_t TotalWeight = 0;
10303ca95b02SDimitry Andric unsigned NumUnknownEdges = 0, NumTotalEdges = 0;
10313ca95b02SDimitry Andric Edge UnknownEdge, SelfReferentialEdge, SingleEdge;
10327d523365SDimitry Andric
10337d523365SDimitry Andric if (i == 0) {
10347d523365SDimitry Andric // First, visit all predecessor edges.
10353ca95b02SDimitry Andric NumTotalEdges = Predecessors[BB].size();
10367d523365SDimitry Andric for (auto *Pred : Predecessors[BB]) {
10377d523365SDimitry Andric Edge E = std::make_pair(Pred, BB);
10387d523365SDimitry Andric TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
10397d523365SDimitry Andric if (E.first == E.second)
10407d523365SDimitry Andric SelfReferentialEdge = E;
10417d523365SDimitry Andric }
10423ca95b02SDimitry Andric if (NumTotalEdges == 1) {
10433ca95b02SDimitry Andric SingleEdge = std::make_pair(Predecessors[BB][0], BB);
10443ca95b02SDimitry Andric }
10457d523365SDimitry Andric } else {
10467d523365SDimitry Andric // On the second round, visit all successor edges.
10473ca95b02SDimitry Andric NumTotalEdges = Successors[BB].size();
10487d523365SDimitry Andric for (auto *Succ : Successors[BB]) {
10497d523365SDimitry Andric Edge E = std::make_pair(BB, Succ);
10507d523365SDimitry Andric TotalWeight += visitEdge(E, &NumUnknownEdges, &UnknownEdge);
10517d523365SDimitry Andric }
10523ca95b02SDimitry Andric if (NumTotalEdges == 1) {
10533ca95b02SDimitry Andric SingleEdge = std::make_pair(BB, Successors[BB][0]);
10543ca95b02SDimitry Andric }
10557d523365SDimitry Andric }
10567d523365SDimitry Andric
10577d523365SDimitry Andric // After visiting all the edges, there are three cases that we
10587d523365SDimitry Andric // can handle immediately:
10597d523365SDimitry Andric //
10607d523365SDimitry Andric // - All the edge weights are known (i.e., NumUnknownEdges == 0).
10617d523365SDimitry Andric // In this case, we simply check that the sum of all the edges
10627d523365SDimitry Andric // is the same as BB's weight. If not, we change BB's weight
10637d523365SDimitry Andric // to match. Additionally, if BB had not been visited before,
10647d523365SDimitry Andric // we mark it visited.
10657d523365SDimitry Andric //
10667d523365SDimitry Andric // - Only one edge is unknown and BB has already been visited.
10677d523365SDimitry Andric // In this case, we can compute the weight of the edge by
10687d523365SDimitry Andric // subtracting the total block weight from all the known
10697d523365SDimitry Andric // edge weights. If the edges weight more than BB, then the
10707d523365SDimitry Andric // edge of the last remaining edge is set to zero.
10717d523365SDimitry Andric //
10727d523365SDimitry Andric // - There exists a self-referential edge and the weight of BB is
10737d523365SDimitry Andric // known. In this case, this edge can be based on BB's weight.
10747d523365SDimitry Andric // We add up all the other known edges and set the weight on
10757d523365SDimitry Andric // the self-referential edge as we did in the previous case.
10767d523365SDimitry Andric //
10777d523365SDimitry Andric // In any other case, we must continue iterating. Eventually,
10787d523365SDimitry Andric // all edges will get a weight, or iteration will stop when
10797d523365SDimitry Andric // it reaches SampleProfileMaxPropagateIterations.
10807d523365SDimitry Andric if (NumUnknownEdges <= 1) {
10817d523365SDimitry Andric uint64_t &BBWeight = BlockWeights[EC];
10827d523365SDimitry Andric if (NumUnknownEdges == 0) {
10833ca95b02SDimitry Andric if (!VisitedBlocks.count(EC)) {
10847d523365SDimitry Andric // If we already know the weight of all edges, the weight of the
10857d523365SDimitry Andric // basic block can be computed. It should be no larger than the sum
10867d523365SDimitry Andric // of all edge weights.
10877d523365SDimitry Andric if (TotalWeight > BBWeight) {
10887d523365SDimitry Andric BBWeight = TotalWeight;
10897d523365SDimitry Andric Changed = true;
10904ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "All edge weights for " << BB->getName()
10917d523365SDimitry Andric << " known. Set weight for block: ";
10927d523365SDimitry Andric printBlockWeight(dbgs(), BB););
10937d523365SDimitry Andric }
10943ca95b02SDimitry Andric } else if (NumTotalEdges == 1 &&
10953ca95b02SDimitry Andric EdgeWeights[SingleEdge] < BlockWeights[EC]) {
10963ca95b02SDimitry Andric // If there is only one edge for the visited basic block, use the
10973ca95b02SDimitry Andric // block weight to adjust edge weight if edge weight is smaller.
10983ca95b02SDimitry Andric EdgeWeights[SingleEdge] = BlockWeights[EC];
10997d523365SDimitry Andric Changed = true;
11003ca95b02SDimitry Andric }
11017d523365SDimitry Andric } else if (NumUnknownEdges == 1 && VisitedBlocks.count(EC)) {
11027d523365SDimitry Andric // If there is a single unknown edge and the block has been
11037d523365SDimitry Andric // visited, then we can compute E's weight.
11047d523365SDimitry Andric if (BBWeight >= TotalWeight)
11057d523365SDimitry Andric EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
11067d523365SDimitry Andric else
11077d523365SDimitry Andric EdgeWeights[UnknownEdge] = 0;
1108d88c1a5aSDimitry Andric const BasicBlock *OtherEC;
1109d88c1a5aSDimitry Andric if (i == 0)
1110d88c1a5aSDimitry Andric OtherEC = EquivalenceClass[UnknownEdge.first];
1111d88c1a5aSDimitry Andric else
1112d88c1a5aSDimitry Andric OtherEC = EquivalenceClass[UnknownEdge.second];
1113d88c1a5aSDimitry Andric // Edge weights should never exceed the BB weights it connects.
1114d88c1a5aSDimitry Andric if (VisitedBlocks.count(OtherEC) &&
1115d88c1a5aSDimitry Andric EdgeWeights[UnknownEdge] > BlockWeights[OtherEC])
1116d88c1a5aSDimitry Andric EdgeWeights[UnknownEdge] = BlockWeights[OtherEC];
11177d523365SDimitry Andric VisitedEdges.insert(UnknownEdge);
11187d523365SDimitry Andric Changed = true;
11194ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Set weight for edge: ";
11207d523365SDimitry Andric printEdgeWeight(dbgs(), UnknownEdge));
11217d523365SDimitry Andric }
1122d88c1a5aSDimitry Andric } else if (VisitedBlocks.count(EC) && BlockWeights[EC] == 0) {
1123d88c1a5aSDimitry Andric // If a block Weights 0, all its in/out edges should weight 0.
1124d88c1a5aSDimitry Andric if (i == 0) {
1125d88c1a5aSDimitry Andric for (auto *Pred : Predecessors[BB]) {
1126d88c1a5aSDimitry Andric Edge E = std::make_pair(Pred, BB);
1127d88c1a5aSDimitry Andric EdgeWeights[E] = 0;
1128d88c1a5aSDimitry Andric VisitedEdges.insert(E);
1129d88c1a5aSDimitry Andric }
1130d88c1a5aSDimitry Andric } else {
1131d88c1a5aSDimitry Andric for (auto *Succ : Successors[BB]) {
1132d88c1a5aSDimitry Andric Edge E = std::make_pair(BB, Succ);
1133d88c1a5aSDimitry Andric EdgeWeights[E] = 0;
1134d88c1a5aSDimitry Andric VisitedEdges.insert(E);
1135d88c1a5aSDimitry Andric }
1136d88c1a5aSDimitry Andric }
11377d523365SDimitry Andric } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
11387d523365SDimitry Andric uint64_t &BBWeight = BlockWeights[BB];
11397d523365SDimitry Andric // We have a self-referential edge and the weight of BB is known.
11407d523365SDimitry Andric if (BBWeight >= TotalWeight)
11417d523365SDimitry Andric EdgeWeights[SelfReferentialEdge] = BBWeight - TotalWeight;
11427d523365SDimitry Andric else
11437d523365SDimitry Andric EdgeWeights[SelfReferentialEdge] = 0;
11447d523365SDimitry Andric VisitedEdges.insert(SelfReferentialEdge);
11457d523365SDimitry Andric Changed = true;
11464ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Set self-referential edge weight to: ";
11477d523365SDimitry Andric printEdgeWeight(dbgs(), SelfReferentialEdge));
11487d523365SDimitry Andric }
1149d88c1a5aSDimitry Andric if (UpdateBlockCount && !VisitedBlocks.count(EC) && TotalWeight > 0) {
1150d88c1a5aSDimitry Andric BlockWeights[EC] = TotalWeight;
1151d88c1a5aSDimitry Andric VisitedBlocks.insert(EC);
1152d88c1a5aSDimitry Andric Changed = true;
1153d88c1a5aSDimitry Andric }
11547d523365SDimitry Andric }
11557d523365SDimitry Andric }
11567d523365SDimitry Andric
11577d523365SDimitry Andric return Changed;
11587d523365SDimitry Andric }
11597d523365SDimitry Andric
11604ba319b5SDimitry Andric /// Build in/out edge lists for each basic block in the CFG.
11617d523365SDimitry Andric ///
11627d523365SDimitry Andric /// We are interested in unique edges. If a block B1 has multiple
11637d523365SDimitry Andric /// edges to another block B2, we only add a single B1->B2 edge.
buildEdges(Function & F)11647d523365SDimitry Andric void SampleProfileLoader::buildEdges(Function &F) {
11657d523365SDimitry Andric for (auto &BI : F) {
11667d523365SDimitry Andric BasicBlock *B1 = &BI;
11677d523365SDimitry Andric
11687d523365SDimitry Andric // Add predecessors for B1.
11697d523365SDimitry Andric SmallPtrSet<BasicBlock *, 16> Visited;
11707d523365SDimitry Andric if (!Predecessors[B1].empty())
11717d523365SDimitry Andric llvm_unreachable("Found a stale predecessors list in a basic block.");
11727d523365SDimitry Andric for (pred_iterator PI = pred_begin(B1), PE = pred_end(B1); PI != PE; ++PI) {
11737d523365SDimitry Andric BasicBlock *B2 = *PI;
11747d523365SDimitry Andric if (Visited.insert(B2).second)
11757d523365SDimitry Andric Predecessors[B1].push_back(B2);
11767d523365SDimitry Andric }
11777d523365SDimitry Andric
11787d523365SDimitry Andric // Add successors for B1.
11797d523365SDimitry Andric Visited.clear();
11807d523365SDimitry Andric if (!Successors[B1].empty())
11817d523365SDimitry Andric llvm_unreachable("Found a stale successors list in a basic block.");
11827d523365SDimitry Andric for (succ_iterator SI = succ_begin(B1), SE = succ_end(B1); SI != SE; ++SI) {
11837d523365SDimitry Andric BasicBlock *B2 = *SI;
11847d523365SDimitry Andric if (Visited.insert(B2).second)
11857d523365SDimitry Andric Successors[B1].push_back(B2);
11867d523365SDimitry Andric }
11877d523365SDimitry Andric }
11887d523365SDimitry Andric }
11897d523365SDimitry Andric
11902cab237bSDimitry Andric /// Returns the sorted CallTargetMap \p M by count in descending order.
SortCallTargets(const SampleRecord::CallTargetMap & M)11912cab237bSDimitry Andric static SmallVector<InstrProfValueData, 2> SortCallTargets(
11927a7e6055SDimitry Andric const SampleRecord::CallTargetMap &M) {
11932cab237bSDimitry Andric SmallVector<InstrProfValueData, 2> R;
11942cab237bSDimitry Andric for (auto I = M.begin(); I != M.end(); ++I)
1195*b5893f02SDimitry Andric R.push_back({FunctionSamples::getGUID(I->getKey()), I->getValue()});
1196*b5893f02SDimitry Andric llvm::sort(R, [](const InstrProfValueData &L, const InstrProfValueData &R) {
11977a7e6055SDimitry Andric if (L.Count == R.Count)
11987a7e6055SDimitry Andric return L.Value > R.Value;
11997a7e6055SDimitry Andric else
12007a7e6055SDimitry Andric return L.Count > R.Count;
12017a7e6055SDimitry Andric });
12022cab237bSDimitry Andric return R;
12037a7e6055SDimitry Andric }
12047a7e6055SDimitry Andric
12054ba319b5SDimitry Andric /// Propagate weights into edges
12067d523365SDimitry Andric ///
12077d523365SDimitry Andric /// The following rules are applied to every block BB in the CFG:
12087d523365SDimitry Andric ///
12097d523365SDimitry Andric /// - If BB has a single predecessor/successor, then the weight
12107d523365SDimitry Andric /// of that edge is the weight of the block.
12117d523365SDimitry Andric ///
12127d523365SDimitry Andric /// - If all incoming or outgoing edges are known except one, and the
12137d523365SDimitry Andric /// weight of the block is already known, the weight of the unknown
12147d523365SDimitry Andric /// edge will be the weight of the block minus the sum of all the known
12157d523365SDimitry Andric /// edges. If the sum of all the known edges is larger than BB's weight,
12167d523365SDimitry Andric /// we set the unknown edge weight to zero.
12177d523365SDimitry Andric ///
12187d523365SDimitry Andric /// - If there is a self-referential edge, and the weight of the block is
12197d523365SDimitry Andric /// known, the weight for that edge is set to the weight of the block
12207d523365SDimitry Andric /// minus the weight of the other incoming edges to that block (if
12217d523365SDimitry Andric /// known).
propagateWeights(Function & F)12227d523365SDimitry Andric void SampleProfileLoader::propagateWeights(Function &F) {
12237d523365SDimitry Andric bool Changed = true;
12247d523365SDimitry Andric unsigned I = 0;
12257d523365SDimitry Andric
1226d88c1a5aSDimitry Andric // If BB weight is larger than its corresponding loop's header BB weight,
1227d88c1a5aSDimitry Andric // use the BB weight to replace the loop header BB weight.
1228d88c1a5aSDimitry Andric for (auto &BI : F) {
1229d88c1a5aSDimitry Andric BasicBlock *BB = &BI;
1230d88c1a5aSDimitry Andric Loop *L = LI->getLoopFor(BB);
1231d88c1a5aSDimitry Andric if (!L) {
1232d88c1a5aSDimitry Andric continue;
1233d88c1a5aSDimitry Andric }
1234d88c1a5aSDimitry Andric BasicBlock *Header = L->getHeader();
1235d88c1a5aSDimitry Andric if (Header && BlockWeights[BB] > BlockWeights[Header]) {
1236d88c1a5aSDimitry Andric BlockWeights[Header] = BlockWeights[BB];
1237d88c1a5aSDimitry Andric }
1238d88c1a5aSDimitry Andric }
12397d523365SDimitry Andric
12407d523365SDimitry Andric // Before propagation starts, build, for each block, a list of
12417d523365SDimitry Andric // unique predecessors and successors. This is necessary to handle
12427d523365SDimitry Andric // identical edges in multiway branches. Since we visit all blocks and all
12437d523365SDimitry Andric // edges of the CFG, it is cleaner to build these lists once at the start
12447d523365SDimitry Andric // of the pass.
12457d523365SDimitry Andric buildEdges(F);
12467d523365SDimitry Andric
12477d523365SDimitry Andric // Propagate until we converge or we go past the iteration limit.
12487d523365SDimitry Andric while (Changed && I++ < SampleProfileMaxPropagateIterations) {
1249d88c1a5aSDimitry Andric Changed = propagateThroughEdges(F, false);
1250d88c1a5aSDimitry Andric }
1251d88c1a5aSDimitry Andric
1252d88c1a5aSDimitry Andric // The first propagation propagates BB counts from annotated BBs to unknown
1253d88c1a5aSDimitry Andric // BBs. The 2nd propagation pass resets edges weights, and use all BB weights
1254d88c1a5aSDimitry Andric // to propagate edge weights.
1255d88c1a5aSDimitry Andric VisitedEdges.clear();
1256d88c1a5aSDimitry Andric Changed = true;
1257d88c1a5aSDimitry Andric while (Changed && I++ < SampleProfileMaxPropagateIterations) {
1258d88c1a5aSDimitry Andric Changed = propagateThroughEdges(F, false);
1259d88c1a5aSDimitry Andric }
1260d88c1a5aSDimitry Andric
1261d88c1a5aSDimitry Andric // The 3rd propagation pass allows adjust annotated BB weights that are
1262d88c1a5aSDimitry Andric // obviously wrong.
1263d88c1a5aSDimitry Andric Changed = true;
1264d88c1a5aSDimitry Andric while (Changed && I++ < SampleProfileMaxPropagateIterations) {
1265d88c1a5aSDimitry Andric Changed = propagateThroughEdges(F, true);
12667d523365SDimitry Andric }
12677d523365SDimitry Andric
12687d523365SDimitry Andric // Generate MD_prof metadata for every branch instruction using the
12697d523365SDimitry Andric // edge weights computed during propagation.
12704ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch weights\n");
12717d523365SDimitry Andric LLVMContext &Ctx = F.getContext();
12727d523365SDimitry Andric MDBuilder MDB(Ctx);
12737d523365SDimitry Andric for (auto &BI : F) {
12747d523365SDimitry Andric BasicBlock *BB = &BI;
12753ca95b02SDimitry Andric
12763ca95b02SDimitry Andric if (BlockWeights[BB]) {
12773ca95b02SDimitry Andric for (auto &I : BB->getInstList()) {
12787a7e6055SDimitry Andric if (!isa<CallInst>(I) && !isa<InvokeInst>(I))
12797a7e6055SDimitry Andric continue;
12807a7e6055SDimitry Andric CallSite CS(&I);
12817a7e6055SDimitry Andric if (!CS.getCalledFunction()) {
12827a7e6055SDimitry Andric const DebugLoc &DLoc = I.getDebugLoc();
12837a7e6055SDimitry Andric if (!DLoc)
12847a7e6055SDimitry Andric continue;
12857a7e6055SDimitry Andric const DILocation *DIL = DLoc;
12864ba319b5SDimitry Andric uint32_t LineOffset = FunctionSamples::getOffset(DIL);
12877a7e6055SDimitry Andric uint32_t Discriminator = DIL->getBaseDiscriminator();
12887a7e6055SDimitry Andric
12897a7e6055SDimitry Andric const FunctionSamples *FS = findFunctionSamples(I);
12907a7e6055SDimitry Andric if (!FS)
12917a7e6055SDimitry Andric continue;
12927a7e6055SDimitry Andric auto T = FS->findCallTargetMapAt(LineOffset, Discriminator);
12932cab237bSDimitry Andric if (!T || T.get().empty())
12947a7e6055SDimitry Andric continue;
12952cab237bSDimitry Andric SmallVector<InstrProfValueData, 2> SortedCallTargets =
12962cab237bSDimitry Andric SortCallTargets(T.get());
12972cab237bSDimitry Andric uint64_t Sum;
12982cab237bSDimitry Andric findIndirectCallFunctionSamples(I, Sum);
12997a7e6055SDimitry Andric annotateValueSite(*I.getParent()->getParent()->getParent(), I,
13007a7e6055SDimitry Andric SortedCallTargets, Sum, IPVK_IndirectCallTarget,
13017a7e6055SDimitry Andric SortedCallTargets.size());
13027a7e6055SDimitry Andric } else if (!dyn_cast<IntrinsicInst>(&I)) {
13033ca95b02SDimitry Andric SmallVector<uint32_t, 1> Weights;
13043ca95b02SDimitry Andric Weights.push_back(BlockWeights[BB]);
13057a7e6055SDimitry Andric I.setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
13063ca95b02SDimitry Andric }
13073ca95b02SDimitry Andric }
13083ca95b02SDimitry Andric }
1309*b5893f02SDimitry Andric Instruction *TI = BB->getTerminator();
13107d523365SDimitry Andric if (TI->getNumSuccessors() == 1)
13117d523365SDimitry Andric continue;
13127d523365SDimitry Andric if (!isa<BranchInst>(TI) && !isa<SwitchInst>(TI))
13137d523365SDimitry Andric continue;
13147d523365SDimitry Andric
13156bc11b14SDimitry Andric DebugLoc BranchLoc = TI->getDebugLoc();
13164ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "\nGetting weights for branch at line "
13176bc11b14SDimitry Andric << ((BranchLoc) ? Twine(BranchLoc.getLine())
13186bc11b14SDimitry Andric : Twine("<UNKNOWN LOCATION>"))
13196bc11b14SDimitry Andric << ".\n");
13207d523365SDimitry Andric SmallVector<uint32_t, 4> Weights;
13217d523365SDimitry Andric uint32_t MaxWeight = 0;
13222cab237bSDimitry Andric Instruction *MaxDestInst;
13237d523365SDimitry Andric for (unsigned I = 0; I < TI->getNumSuccessors(); ++I) {
13247d523365SDimitry Andric BasicBlock *Succ = TI->getSuccessor(I);
13257d523365SDimitry Andric Edge E = std::make_pair(BB, Succ);
13267d523365SDimitry Andric uint64_t Weight = EdgeWeights[E];
13274ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "\t"; printEdgeWeight(dbgs(), E));
13287d523365SDimitry Andric // Use uint32_t saturated arithmetic to adjust the incoming weights,
13297d523365SDimitry Andric // if needed. Sample counts in profiles are 64-bit unsigned values,
13307d523365SDimitry Andric // but internally branch weights are expressed as 32-bit values.
13317d523365SDimitry Andric if (Weight > std::numeric_limits<uint32_t>::max()) {
13324ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
13337d523365SDimitry Andric Weight = std::numeric_limits<uint32_t>::max();
13347d523365SDimitry Andric }
1335d88c1a5aSDimitry Andric // Weight is added by one to avoid propagation errors introduced by
1336d88c1a5aSDimitry Andric // 0 weights.
1337d88c1a5aSDimitry Andric Weights.push_back(static_cast<uint32_t>(Weight + 1));
13387d523365SDimitry Andric if (Weight != 0) {
13397d523365SDimitry Andric if (Weight > MaxWeight) {
13407d523365SDimitry Andric MaxWeight = Weight;
13412cab237bSDimitry Andric MaxDestInst = Succ->getFirstNonPHIOrDbgOrLifetime();
13427d523365SDimitry Andric }
13437d523365SDimitry Andric }
13447d523365SDimitry Andric }
13457d523365SDimitry Andric
13467a7e6055SDimitry Andric uint64_t TempWeight;
13477d523365SDimitry Andric // Only set weights if there is at least one non-zero weight.
13487d523365SDimitry Andric // In any other case, let the analyzer set weights.
13497a7e6055SDimitry Andric // Do not set weights if the weights are present. In ThinLTO, the profile
13507a7e6055SDimitry Andric // annotation is done twice. If the first annotation already set the
13517a7e6055SDimitry Andric // weights, the second pass does not need to set it.
13527a7e6055SDimitry Andric if (MaxWeight > 0 && !TI->extractProfTotalWeight(TempWeight)) {
13534ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "SUCCESS. Found non-zero weights.\n");
13542cab237bSDimitry Andric TI->setMetadata(LLVMContext::MD_prof,
13557d523365SDimitry Andric MDB.createBranchWeights(Weights));
13562cab237bSDimitry Andric ORE->emit([&]() {
13572cab237bSDimitry Andric return OptimizationRemark(DEBUG_TYPE, "PopularDest", MaxDestInst)
13582cab237bSDimitry Andric << "most popular destination for conditional branches at "
13592cab237bSDimitry Andric << ore::NV("CondBranchesLoc", BranchLoc);
13602cab237bSDimitry Andric });
13617d523365SDimitry Andric } else {
13624ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
13637d523365SDimitry Andric }
13647d523365SDimitry Andric }
13657d523365SDimitry Andric }
13667d523365SDimitry Andric
13674ba319b5SDimitry Andric /// Get the line number for the function header.
13687d523365SDimitry Andric ///
13697d523365SDimitry Andric /// This looks up function \p F in the current compilation unit and
13707d523365SDimitry Andric /// retrieves the line number where the function is defined. This is
13717d523365SDimitry Andric /// line 0 for all the samples read from the profile file. Every line
13727d523365SDimitry Andric /// number is relative to this line.
13737d523365SDimitry Andric ///
13747d523365SDimitry Andric /// \param F Function object to query.
13757d523365SDimitry Andric ///
13767d523365SDimitry Andric /// \returns the line number where \p F is defined. If it returns 0,
13777d523365SDimitry Andric /// it means that there is no debug information available for \p F.
getFunctionLoc(Function & F)13787d523365SDimitry Andric unsigned SampleProfileLoader::getFunctionLoc(Function &F) {
13793ca95b02SDimitry Andric if (DISubprogram *S = F.getSubprogram())
13807d523365SDimitry Andric return S->getLine();
13817d523365SDimitry Andric
13824ba319b5SDimitry Andric if (NoWarnSampleUnused)
13834ba319b5SDimitry Andric return 0;
13844ba319b5SDimitry Andric
13857d523365SDimitry Andric // If the start of \p F is missing, emit a diagnostic to inform the user
13867d523365SDimitry Andric // about the missed opportunity.
13877d523365SDimitry Andric F.getContext().diagnose(DiagnosticInfoSampleProfile(
13887d523365SDimitry Andric "No debug information found in function " + F.getName() +
13897d523365SDimitry Andric ": Function profile not used",
13907d523365SDimitry Andric DS_Warning));
13917d523365SDimitry Andric return 0;
13927d523365SDimitry Andric }
13937d523365SDimitry Andric
computeDominanceAndLoopInfo(Function & F)13947d523365SDimitry Andric void SampleProfileLoader::computeDominanceAndLoopInfo(Function &F) {
13957d523365SDimitry Andric DT.reset(new DominatorTree);
13967d523365SDimitry Andric DT->recalculate(F);
13977d523365SDimitry Andric
13984ba319b5SDimitry Andric PDT.reset(new PostDominatorTree(F));
13997d523365SDimitry Andric
14007d523365SDimitry Andric LI.reset(new LoopInfo);
14017d523365SDimitry Andric LI->analyze(*DT);
14027d523365SDimitry Andric }
14037d523365SDimitry Andric
14044ba319b5SDimitry Andric /// Generate branch weight metadata for all branches in \p F.
14057d523365SDimitry Andric ///
14067d523365SDimitry Andric /// Branch weights are computed out of instruction samples using a
14077d523365SDimitry Andric /// propagation heuristic. Propagation proceeds in 3 phases:
14087d523365SDimitry Andric ///
14097d523365SDimitry Andric /// 1- Assignment of block weights. All the basic blocks in the function
14107d523365SDimitry Andric /// are initial assigned the same weight as their most frequently
14117d523365SDimitry Andric /// executed instruction.
14127d523365SDimitry Andric ///
14137d523365SDimitry Andric /// 2- Creation of equivalence classes. Since samples may be missing from
14147d523365SDimitry Andric /// blocks, we can fill in the gaps by setting the weights of all the
14157d523365SDimitry Andric /// blocks in the same equivalence class to the same weight. To compute
14167d523365SDimitry Andric /// the concept of equivalence, we use dominance and loop information.
14177d523365SDimitry Andric /// Two blocks B1 and B2 are in the same equivalence class if B1
14187d523365SDimitry Andric /// dominates B2, B2 post-dominates B1 and both are in the same loop.
14197d523365SDimitry Andric ///
14207d523365SDimitry Andric /// 3- Propagation of block weights into edges. This uses a simple
14217d523365SDimitry Andric /// propagation heuristic. The following rules are applied to every
14227d523365SDimitry Andric /// block BB in the CFG:
14237d523365SDimitry Andric ///
14247d523365SDimitry Andric /// - If BB has a single predecessor/successor, then the weight
14257d523365SDimitry Andric /// of that edge is the weight of the block.
14267d523365SDimitry Andric ///
14277d523365SDimitry Andric /// - If all the edges are known except one, and the weight of the
14287d523365SDimitry Andric /// block is already known, the weight of the unknown edge will
14297d523365SDimitry Andric /// be the weight of the block minus the sum of all the known
14307d523365SDimitry Andric /// edges. If the sum of all the known edges is larger than BB's weight,
14317d523365SDimitry Andric /// we set the unknown edge weight to zero.
14327d523365SDimitry Andric ///
14337d523365SDimitry Andric /// - If there is a self-referential edge, and the weight of the block is
14347d523365SDimitry Andric /// known, the weight for that edge is set to the weight of the block
14357d523365SDimitry Andric /// minus the weight of the other incoming edges to that block (if
14367d523365SDimitry Andric /// known).
14377d523365SDimitry Andric ///
14387d523365SDimitry Andric /// Since this propagation is not guaranteed to finalize for every CFG, we
14397d523365SDimitry Andric /// only allow it to proceed for a limited number of iterations (controlled
14407d523365SDimitry Andric /// by -sample-profile-max-propagate-iterations).
14417d523365SDimitry Andric ///
14427d523365SDimitry Andric /// FIXME: Try to replace this propagation heuristic with a scheme
14437d523365SDimitry Andric /// that is guaranteed to finalize. A work-list approach similar to
14447d523365SDimitry Andric /// the standard value propagation algorithm used by SSA-CCP might
14457d523365SDimitry Andric /// work here.
14467d523365SDimitry Andric ///
14477d523365SDimitry Andric /// Once all the branch weights are computed, we emit the MD_prof
14487d523365SDimitry Andric /// metadata on BB using the computed values for each of its branches.
14497d523365SDimitry Andric ///
14507d523365SDimitry Andric /// \param F The function to query.
14517d523365SDimitry Andric ///
14527d523365SDimitry Andric /// \returns true if \p F was modified. Returns false, otherwise.
emitAnnotations(Function & F)14537d523365SDimitry Andric bool SampleProfileLoader::emitAnnotations(Function &F) {
14547d523365SDimitry Andric bool Changed = false;
14557d523365SDimitry Andric
14567d523365SDimitry Andric if (getFunctionLoc(F) == 0)
14577d523365SDimitry Andric return false;
14587d523365SDimitry Andric
14594ba319b5SDimitry Andric LLVM_DEBUG(dbgs() << "Line number for the first instruction in "
14604ba319b5SDimitry Andric << F.getName() << ": " << getFunctionLoc(F) << "\n");
14617d523365SDimitry Andric
14622cab237bSDimitry Andric DenseSet<GlobalValue::GUID> InlinedGUIDs;
14632cab237bSDimitry Andric Changed |= inlineHotFunctions(F, InlinedGUIDs);
14647d523365SDimitry Andric
14657d523365SDimitry Andric // Compute basic block weights.
14667d523365SDimitry Andric Changed |= computeBlockWeights(F);
14677d523365SDimitry Andric
14687d523365SDimitry Andric if (Changed) {
14697a7e6055SDimitry Andric // Add an entry count to the function using the samples gathered at the
14702cab237bSDimitry Andric // function entry.
14712cab237bSDimitry Andric // Sets the GUIDs that are inlined in the profiled binary. This is used
14722cab237bSDimitry Andric // for ThinLink to make correct liveness analysis, and also make the IR
14732cab237bSDimitry Andric // match the profiled binary before annotation.
14744ba319b5SDimitry Andric F.setEntryCount(
14754ba319b5SDimitry Andric ProfileCount(Samples->getHeadSamples() + 1, Function::PCT_Real),
14764ba319b5SDimitry Andric &InlinedGUIDs);
14777a7e6055SDimitry Andric
14787d523365SDimitry Andric // Compute dominance and loop info needed for propagation.
14797d523365SDimitry Andric computeDominanceAndLoopInfo(F);
14807d523365SDimitry Andric
14817d523365SDimitry Andric // Find equivalence classes.
14827d523365SDimitry Andric findEquivalenceClasses(F);
14837d523365SDimitry Andric
14847d523365SDimitry Andric // Propagate weights to all edges.
14857d523365SDimitry Andric propagateWeights(F);
14867d523365SDimitry Andric }
14877d523365SDimitry Andric
14887d523365SDimitry Andric // If coverage checking was requested, compute it now.
14897d523365SDimitry Andric if (SampleProfileRecordCoverage) {
14904ba319b5SDimitry Andric unsigned Used = CoverageTracker.countUsedRecords(Samples, PSI);
14914ba319b5SDimitry Andric unsigned Total = CoverageTracker.countBodyRecords(Samples, PSI);
14927d523365SDimitry Andric unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
14937d523365SDimitry Andric if (Coverage < SampleProfileRecordCoverage) {
14947d523365SDimitry Andric F.getContext().diagnose(DiagnosticInfoSampleProfile(
14953ca95b02SDimitry Andric F.getSubprogram()->getFilename(), getFunctionLoc(F),
14967d523365SDimitry Andric Twine(Used) + " of " + Twine(Total) + " available profile records (" +
14977d523365SDimitry Andric Twine(Coverage) + "%) were applied",
14987d523365SDimitry Andric DS_Warning));
14997d523365SDimitry Andric }
15007d523365SDimitry Andric }
15017d523365SDimitry Andric
15027d523365SDimitry Andric if (SampleProfileSampleCoverage) {
15037d523365SDimitry Andric uint64_t Used = CoverageTracker.getTotalUsedSamples();
15044ba319b5SDimitry Andric uint64_t Total = CoverageTracker.countBodySamples(Samples, PSI);
15057d523365SDimitry Andric unsigned Coverage = CoverageTracker.computeCoverage(Used, Total);
15067d523365SDimitry Andric if (Coverage < SampleProfileSampleCoverage) {
15077d523365SDimitry Andric F.getContext().diagnose(DiagnosticInfoSampleProfile(
15083ca95b02SDimitry Andric F.getSubprogram()->getFilename(), getFunctionLoc(F),
15097d523365SDimitry Andric Twine(Used) + " of " + Twine(Total) + " available profile samples (" +
15107d523365SDimitry Andric Twine(Coverage) + "%) were applied",
15117d523365SDimitry Andric DS_Warning));
15127d523365SDimitry Andric }
15137d523365SDimitry Andric }
15147d523365SDimitry Andric return Changed;
15157d523365SDimitry Andric }
15167d523365SDimitry Andric
15173ca95b02SDimitry Andric char SampleProfileLoaderLegacyPass::ID = 0;
15182cab237bSDimitry Andric
15193ca95b02SDimitry Andric INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",
15207d523365SDimitry Andric "Sample Profile loader", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)15213ca95b02SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
15222cab237bSDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
15234ba319b5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
15243ca95b02SDimitry Andric INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
15257d523365SDimitry Andric "Sample Profile loader", false, false)
15267d523365SDimitry Andric
15277d523365SDimitry Andric bool SampleProfileLoader::doInitialization(Module &M) {
15287d523365SDimitry Andric auto &Ctx = M.getContext();
15297d523365SDimitry Andric auto ReaderOrErr = SampleProfileReader::create(Filename, Ctx);
15307d523365SDimitry Andric if (std::error_code EC = ReaderOrErr.getError()) {
15317d523365SDimitry Andric std::string Msg = "Could not open profile: " + EC.message();
15327d523365SDimitry Andric Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
15337d523365SDimitry Andric return false;
15347d523365SDimitry Andric }
15357d523365SDimitry Andric Reader = std::move(ReaderOrErr.get());
1536*b5893f02SDimitry Andric Reader->collectFuncsToUse(M);
15377d523365SDimitry Andric ProfileIsValid = (Reader->read() == sampleprof_error::success);
1538*b5893f02SDimitry Andric
1539*b5893f02SDimitry Andric if (!RemappingFilename.empty()) {
1540*b5893f02SDimitry Andric // Apply profile remappings to the loaded profile data if requested.
1541*b5893f02SDimitry Andric // For now, we only support remapping symbols encoded using the Itanium
1542*b5893f02SDimitry Andric // C++ ABI's name mangling scheme.
1543*b5893f02SDimitry Andric ReaderOrErr = SampleProfileReaderItaniumRemapper::create(
1544*b5893f02SDimitry Andric RemappingFilename, Ctx, std::move(Reader));
1545*b5893f02SDimitry Andric if (std::error_code EC = ReaderOrErr.getError()) {
1546*b5893f02SDimitry Andric std::string Msg = "Could not open profile remapping file: " + EC.message();
1547*b5893f02SDimitry Andric Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
1548*b5893f02SDimitry Andric return false;
1549*b5893f02SDimitry Andric }
1550*b5893f02SDimitry Andric Reader = std::move(ReaderOrErr.get());
1551*b5893f02SDimitry Andric ProfileIsValid = (Reader->read() == sampleprof_error::success);
1552*b5893f02SDimitry Andric }
15537d523365SDimitry Andric return true;
15547d523365SDimitry Andric }
15557d523365SDimitry Andric
createSampleProfileLoaderPass()15567d523365SDimitry Andric ModulePass *llvm::createSampleProfileLoaderPass() {
1557*b5893f02SDimitry Andric return new SampleProfileLoaderLegacyPass();
15587d523365SDimitry Andric }
15597d523365SDimitry Andric
createSampleProfileLoaderPass(StringRef Name)15607d523365SDimitry Andric ModulePass *llvm::createSampleProfileLoaderPass(StringRef Name) {
15613ca95b02SDimitry Andric return new SampleProfileLoaderLegacyPass(Name);
15627d523365SDimitry Andric }
15637d523365SDimitry Andric
runOnModule(Module & M,ModuleAnalysisManager * AM,ProfileSummaryInfo * _PSI)15644ba319b5SDimitry Andric bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
15654ba319b5SDimitry Andric ProfileSummaryInfo *_PSI) {
1566*b5893f02SDimitry Andric FunctionSamples::GUIDToFuncNameMapper Mapper(M);
15677d523365SDimitry Andric if (!ProfileIsValid)
15687d523365SDimitry Andric return false;
15697d523365SDimitry Andric
15704ba319b5SDimitry Andric PSI = _PSI;
15714ba319b5SDimitry Andric if (M.getProfileSummary() == nullptr)
15724ba319b5SDimitry Andric M.setProfileSummary(Reader->getSummary().getMD(M.getContext()));
15734ba319b5SDimitry Andric
15747d523365SDimitry Andric // Compute the total number of samples collected in this profile.
15757d523365SDimitry Andric for (const auto &I : Reader->getProfiles())
15767d523365SDimitry Andric TotalCollectedSamples += I.second.getTotalSamples();
15777d523365SDimitry Andric
15786bc11b14SDimitry Andric // Populate the symbol map.
15796bc11b14SDimitry Andric for (const auto &N_F : M.getValueSymbolTable()) {
1580fe4fed2eSDimitry Andric StringRef OrigName = N_F.getKey();
15816bc11b14SDimitry Andric Function *F = dyn_cast<Function>(N_F.getValue());
15826bc11b14SDimitry Andric if (F == nullptr)
15836bc11b14SDimitry Andric continue;
15846bc11b14SDimitry Andric SymbolMap[OrigName] = F;
15856bc11b14SDimitry Andric auto pos = OrigName.find('.');
1586fe4fed2eSDimitry Andric if (pos != StringRef::npos) {
1587fe4fed2eSDimitry Andric StringRef NewName = OrigName.substr(0, pos);
15886bc11b14SDimitry Andric auto r = SymbolMap.insert(std::make_pair(NewName, F));
15896bc11b14SDimitry Andric // Failiing to insert means there is already an entry in SymbolMap,
15906bc11b14SDimitry Andric // thus there are multiple functions that are mapped to the same
15916bc11b14SDimitry Andric // stripped name. In this case of name conflicting, set the value
15926bc11b14SDimitry Andric // to nullptr to avoid confusion.
15936bc11b14SDimitry Andric if (!r.second)
15946bc11b14SDimitry Andric r.first->second = nullptr;
15956bc11b14SDimitry Andric }
15966bc11b14SDimitry Andric }
15976bc11b14SDimitry Andric
15987d523365SDimitry Andric bool retval = false;
15997d523365SDimitry Andric for (auto &F : M)
16007d523365SDimitry Andric if (!F.isDeclaration()) {
16017d523365SDimitry Andric clearFunctionData();
16022cab237bSDimitry Andric retval |= runOnFunction(F, AM);
16037d523365SDimitry Andric }
16047d523365SDimitry Andric return retval;
16057d523365SDimitry Andric }
16067d523365SDimitry Andric
runOnModule(Module & M)16073ca95b02SDimitry Andric bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
16082cab237bSDimitry Andric ACT = &getAnalysis<AssumptionCacheTracker>();
16092cab237bSDimitry Andric TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
16104ba319b5SDimitry Andric ProfileSummaryInfo *PSI =
1611*b5893f02SDimitry Andric &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
16124ba319b5SDimitry Andric return SampleLoader.runOnModule(M, nullptr, PSI);
16133ca95b02SDimitry Andric }
16143ca95b02SDimitry Andric
runOnFunction(Function & F,ModuleAnalysisManager * AM)16152cab237bSDimitry Andric bool SampleProfileLoader::runOnFunction(Function &F, ModuleAnalysisManager *AM) {
1616*b5893f02SDimitry Andric
1617*b5893f02SDimitry Andric DILocation2SampleMap.clear();
1618*b5893f02SDimitry Andric // By default the entry count is initialized to -1, which will be treated
1619*b5893f02SDimitry Andric // conservatively by getEntryCount as the same as unknown (None). This is
1620*b5893f02SDimitry Andric // to avoid newly added code to be treated as cold. If we have samples
1621*b5893f02SDimitry Andric // this will be overwritten in emitAnnotations.
1622*b5893f02SDimitry Andric // If ProfileSampleAccurate is true or F has profile-sample-accurate
1623*b5893f02SDimitry Andric // attribute, initialize the entry count to 0 so callsites or functions
1624*b5893f02SDimitry Andric // unsampled will be treated as cold.
1625*b5893f02SDimitry Andric uint64_t initialEntryCount =
1626*b5893f02SDimitry Andric (ProfileSampleAccurate || F.hasFnAttribute("profile-sample-accurate"))
1627*b5893f02SDimitry Andric ? 0
1628*b5893f02SDimitry Andric : -1;
1629*b5893f02SDimitry Andric F.setEntryCount(ProfileCount(initialEntryCount, Function::PCT_Real));
16302cab237bSDimitry Andric std::unique_ptr<OptimizationRemarkEmitter> OwnedORE;
16312cab237bSDimitry Andric if (AM) {
16322cab237bSDimitry Andric auto &FAM =
16332cab237bSDimitry Andric AM->getResult<FunctionAnalysisManagerModuleProxy>(*F.getParent())
16342cab237bSDimitry Andric .getManager();
16352cab237bSDimitry Andric ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
16362cab237bSDimitry Andric } else {
16372cab237bSDimitry Andric OwnedORE = make_unique<OptimizationRemarkEmitter>(&F);
16382cab237bSDimitry Andric ORE = OwnedORE.get();
16392cab237bSDimitry Andric }
16407d523365SDimitry Andric Samples = Reader->getSamplesFor(F);
16417a7e6055SDimitry Andric if (Samples && !Samples->empty())
16427d523365SDimitry Andric return emitAnnotations(F);
16437d523365SDimitry Andric return false;
16447d523365SDimitry Andric }
16453ca95b02SDimitry Andric
run(Module & M,ModuleAnalysisManager & AM)16463ca95b02SDimitry Andric PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
1647d88c1a5aSDimitry Andric ModuleAnalysisManager &AM) {
16482cab237bSDimitry Andric FunctionAnalysisManager &FAM =
16492cab237bSDimitry Andric AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
16502cab237bSDimitry Andric
16512cab237bSDimitry Andric auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
16522cab237bSDimitry Andric return FAM.getResult<AssumptionAnalysis>(F);
16532cab237bSDimitry Andric };
16542cab237bSDimitry Andric auto GetTTI = [&](Function &F) -> TargetTransformInfo & {
16552cab237bSDimitry Andric return FAM.getResult<TargetIRAnalysis>(F);
16562cab237bSDimitry Andric };
16573ca95b02SDimitry Andric
1658a580b014SDimitry Andric SampleProfileLoader SampleLoader(
16592cab237bSDimitry Andric ProfileFileName.empty() ? SampleProfileFile : ProfileFileName,
1660*b5893f02SDimitry Andric ProfileRemappingFileName.empty() ? SampleProfileRemappingFile
1661*b5893f02SDimitry Andric : ProfileRemappingFileName,
16622cab237bSDimitry Andric IsThinLTOPreLink, GetAssumptionCache, GetTTI);
16633ca95b02SDimitry Andric
16643ca95b02SDimitry Andric SampleLoader.doInitialization(M);
16653ca95b02SDimitry Andric
16664ba319b5SDimitry Andric ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
16674ba319b5SDimitry Andric if (!SampleLoader.runOnModule(M, &AM, PSI))
16683ca95b02SDimitry Andric return PreservedAnalyses::all();
16693ca95b02SDimitry Andric
16703ca95b02SDimitry Andric return PreservedAnalyses::none();
16713ca95b02SDimitry Andric }
1672