15a7056faSEaswaran Raman //==-SummaryBasedOptimizations.cpp - Optimizations based on ThinLTO summary-==//
25a7056faSEaswaran Raman //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65a7056faSEaswaran Raman //
75a7056faSEaswaran Raman //===----------------------------------------------------------------------===//
85a7056faSEaswaran Raman //
95a7056faSEaswaran Raman // This file implements optimizations that are based on the module summaries.
105a7056faSEaswaran Raman // These optimizations are performed during the thinlink phase of the
115a7056faSEaswaran Raman // compilation.
125a7056faSEaswaran Raman //
135a7056faSEaswaran Raman //===----------------------------------------------------------------------===//
145a7056faSEaswaran Raman 
155a7056faSEaswaran Raman #include "llvm/LTO/SummaryBasedOptimizations.h"
165a7056faSEaswaran Raman #include "llvm/Analysis/SyntheticCountsUtils.h"
175a7056faSEaswaran Raman #include "llvm/IR/ModuleSummaryIndex.h"
184c1a1d3cSReid Kleckner #include "llvm/Support/CommandLine.h"
195a7056faSEaswaran Raman 
205a7056faSEaswaran Raman using namespace llvm;
215a7056faSEaswaran Raman 
22dc5f805dSBenjamin Kramer static cl::opt<bool> ThinLTOSynthesizeEntryCounts(
235a7056faSEaswaran Raman     "thinlto-synthesize-entry-counts", cl::init(false), cl::Hidden,
245a7056faSEaswaran Raman     cl::desc("Synthesize entry counts based on the summary"));
255a7056faSEaswaran Raman 
26d8aba75aSFangrui Song namespace llvm {
275a7056faSEaswaran Raman extern cl::opt<int> InitialSyntheticCount;
28d8aba75aSFangrui Song }
295a7056faSEaswaran Raman 
initializeCounts(ModuleSummaryIndex & Index)305a7056faSEaswaran Raman static void initializeCounts(ModuleSummaryIndex &Index) {
315a7056faSEaswaran Raman   auto Root = Index.calculateCallGraphRoot();
325a7056faSEaswaran Raman   // Root is a fake node. All its successors are the actual roots of the
335a7056faSEaswaran Raman   // callgraph.
345a7056faSEaswaran Raman   // FIXME: This initializes the entry counts of only the root nodes. This makes
355a7056faSEaswaran Raman   // sense when compiling a binary with ThinLTO, but for libraries any of the
365a7056faSEaswaran Raman   // non-root nodes could be called from outside.
375a7056faSEaswaran Raman   for (auto &C : Root.calls()) {
385a7056faSEaswaran Raman     auto &V = C.first;
395a7056faSEaswaran Raman     for (auto &GVS : V.getSummaryList()) {
405a7056faSEaswaran Raman       auto S = GVS.get()->getBaseObject();
415a7056faSEaswaran Raman       auto *F = cast<FunctionSummary>(S);
425a7056faSEaswaran Raman       F->setEntryCount(InitialSyntheticCount);
435a7056faSEaswaran Raman     }
445a7056faSEaswaran Raman   }
455a7056faSEaswaran Raman }
465a7056faSEaswaran Raman 
computeSyntheticCounts(ModuleSummaryIndex & Index)475a7056faSEaswaran Raman void llvm::computeSyntheticCounts(ModuleSummaryIndex &Index) {
485a7056faSEaswaran Raman   if (!ThinLTOSynthesizeEntryCounts)
495a7056faSEaswaran Raman     return;
505a7056faSEaswaran Raman 
515a7056faSEaswaran Raman   using Scaled64 = ScaledNumber<uint64_t>;
525a7056faSEaswaran Raman   initializeCounts(Index);
535a7056faSEaswaran Raman   auto GetCallSiteRelFreq = [](FunctionSummary::EdgeTy &Edge) {
545a7056faSEaswaran Raman     return Scaled64(Edge.second.RelBlockFreq, -CalleeInfo::ScaleShift);
555a7056faSEaswaran Raman   };
565a7056faSEaswaran Raman   auto GetEntryCount = [](ValueInfo V) {
575a7056faSEaswaran Raman     if (V.getSummaryList().size()) {
58*7b563e34SKazu Hirata       auto S = V.getSummaryList().front()->getBaseObject();
595a7056faSEaswaran Raman       auto *F = cast<FunctionSummary>(S);
605a7056faSEaswaran Raman       return F->entryCount();
615a7056faSEaswaran Raman     } else {
625a7056faSEaswaran Raman       return UINT64_C(0);
635a7056faSEaswaran Raman     }
645a7056faSEaswaran Raman   };
65b45994b8SEaswaran Raman   auto AddToEntryCount = [](ValueInfo V, Scaled64 New) {
665a7056faSEaswaran Raman     if (!V.getSummaryList().size())
675a7056faSEaswaran Raman       return;
685a7056faSEaswaran Raman     for (auto &GVS : V.getSummaryList()) {
695a7056faSEaswaran Raman       auto S = GVS.get()->getBaseObject();
705a7056faSEaswaran Raman       auto *F = cast<FunctionSummary>(S);
71b45994b8SEaswaran Raman       F->setEntryCount(
72b45994b8SEaswaran Raman           SaturatingAdd(F->entryCount(), New.template toInt<uint64_t>()));
735a7056faSEaswaran Raman     }
745a7056faSEaswaran Raman   };
755a7056faSEaswaran Raman 
76b45994b8SEaswaran Raman   auto GetProfileCount = [&](ValueInfo V, FunctionSummary::EdgeTy &Edge) {
77b45994b8SEaswaran Raman     auto RelFreq = GetCallSiteRelFreq(Edge);
78b45994b8SEaswaran Raman     Scaled64 EC(GetEntryCount(V), 0);
79b45994b8SEaswaran Raman     return RelFreq * EC;
80b45994b8SEaswaran Raman   };
815a7056faSEaswaran Raman   // After initializing the counts in initializeCounts above, the counts have to
825a7056faSEaswaran Raman   // be propagated across the combined callgraph.
835a7056faSEaswaran Raman   // SyntheticCountsUtils::propagate takes care of this propagation on any
845a7056faSEaswaran Raman   // callgraph that specialized GraphTraits.
85b45994b8SEaswaran Raman   SyntheticCountsUtils<ModuleSummaryIndex *>::propagate(&Index, GetProfileCount,
86b45994b8SEaswaran Raman                                                         AddToEntryCount);
875a7056faSEaswaran Raman   Index.setHasSyntheticEntryCounts();
885a7056faSEaswaran Raman }
89