1*94faadacSSnehasish Kumar //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
2*94faadacSSnehasish Kumar //
3*94faadacSSnehasish Kumar // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*94faadacSSnehasish Kumar // See https://llvm.org/LICENSE.txt for license information.
5*94faadacSSnehasish Kumar // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*94faadacSSnehasish Kumar //
7*94faadacSSnehasish Kumar //===----------------------------------------------------------------------===//
8*94faadacSSnehasish Kumar //
9*94faadacSSnehasish Kumar // \file
10*94faadacSSnehasish Kumar // Uses profile information to split out cold blocks.
11*94faadacSSnehasish Kumar //
12*94faadacSSnehasish Kumar // This pass splits out cold machine basic blocks from the parent function. This
13*94faadacSSnehasish Kumar // implementation leverages the basic block section framework. Blocks marked
14*94faadacSSnehasish Kumar // cold by this pass are grouped together in a separate section prefixed with
15*94faadacSSnehasish Kumar // ".text.unlikely.*". The linker can then group these together as a cold
16*94faadacSSnehasish Kumar // section. The split part of the function is a contiguous region identified by
17*94faadacSSnehasish Kumar // the symbol "foo.cold". Grouping all cold blocks across functions together
18*94faadacSSnehasish Kumar // decreases fragmentation and improves icache and itlb utilization. Note that
19*94faadacSSnehasish Kumar // the overall changes to the binary size are negligible; only a small number of
20*94faadacSSnehasish Kumar // additional jump instructions may be introduced.
21*94faadacSSnehasish Kumar //
22*94faadacSSnehasish Kumar // For the original RFC of this pass please see
23*94faadacSSnehasish Kumar // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
24*94faadacSSnehasish Kumar //===----------------------------------------------------------------------===//
25*94faadacSSnehasish Kumar 
26*94faadacSSnehasish Kumar #include "llvm/ADT/Statistic.h"
27*94faadacSSnehasish Kumar #include "llvm/Analysis/ProfileSummaryInfo.h"
28*94faadacSSnehasish Kumar #include "llvm/CodeGen/BasicBlockSectionUtils.h"
29*94faadacSSnehasish Kumar #include "llvm/CodeGen/MachineBasicBlock.h"
30*94faadacSSnehasish Kumar #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
31*94faadacSSnehasish Kumar #include "llvm/CodeGen/MachineFunction.h"
32*94faadacSSnehasish Kumar #include "llvm/CodeGen/MachineFunctionPass.h"
33*94faadacSSnehasish Kumar #include "llvm/CodeGen/MachineModuleInfo.h"
34*94faadacSSnehasish Kumar #include "llvm/CodeGen/Passes.h"
35*94faadacSSnehasish Kumar #include "llvm/IR/Function.h"
36*94faadacSSnehasish Kumar #include "llvm/IR/Module.h"
37*94faadacSSnehasish Kumar #include "llvm/InitializePasses.h"
38*94faadacSSnehasish Kumar #include "llvm/Support/CommandLine.h"
39*94faadacSSnehasish Kumar 
40*94faadacSSnehasish Kumar using namespace llvm;
41*94faadacSSnehasish Kumar 
42*94faadacSSnehasish Kumar static cl::opt<unsigned>
43*94faadacSSnehasish Kumar     PercentileCutoff("mfs-psi-cutoff",
44*94faadacSSnehasish Kumar                      cl::desc("Percentile profile summary cutoff used to "
45*94faadacSSnehasish Kumar                               "determine cold blocks. Unused if set to zero."),
46*94faadacSSnehasish Kumar                      cl::init(0), cl::Hidden);
47*94faadacSSnehasish Kumar 
48*94faadacSSnehasish Kumar static cl::opt<unsigned> ColdCountThreshold(
49*94faadacSSnehasish Kumar     "mfs-count-threshold",
50*94faadacSSnehasish Kumar     cl::desc(
51*94faadacSSnehasish Kumar         "Minimum number of times a block must be executed to be retained."),
52*94faadacSSnehasish Kumar     cl::init(1), cl::Hidden);
53*94faadacSSnehasish Kumar 
54*94faadacSSnehasish Kumar namespace {
55*94faadacSSnehasish Kumar 
56*94faadacSSnehasish Kumar class MachineFunctionSplitter : public MachineFunctionPass {
57*94faadacSSnehasish Kumar public:
58*94faadacSSnehasish Kumar   static char ID;
59*94faadacSSnehasish Kumar   MachineFunctionSplitter() : MachineFunctionPass(ID) {
60*94faadacSSnehasish Kumar     initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry());
61*94faadacSSnehasish Kumar   }
62*94faadacSSnehasish Kumar 
63*94faadacSSnehasish Kumar   StringRef getPassName() const override {
64*94faadacSSnehasish Kumar     return "Machine Function Splitter Transformation";
65*94faadacSSnehasish Kumar   }
66*94faadacSSnehasish Kumar 
67*94faadacSSnehasish Kumar   void getAnalysisUsage(AnalysisUsage &AU) const override;
68*94faadacSSnehasish Kumar 
69*94faadacSSnehasish Kumar   bool runOnMachineFunction(MachineFunction &F) override;
70*94faadacSSnehasish Kumar };
71*94faadacSSnehasish Kumar } // end anonymous namespace
72*94faadacSSnehasish Kumar 
73*94faadacSSnehasish Kumar static bool isColdBlock(MachineBasicBlock &MBB,
74*94faadacSSnehasish Kumar                         const MachineBlockFrequencyInfo *MBFI,
75*94faadacSSnehasish Kumar                         ProfileSummaryInfo *PSI) {
76*94faadacSSnehasish Kumar   Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
77*94faadacSSnehasish Kumar   if (!Count.hasValue())
78*94faadacSSnehasish Kumar     return true;
79*94faadacSSnehasish Kumar 
80*94faadacSSnehasish Kumar   if (PercentileCutoff > 0) {
81*94faadacSSnehasish Kumar     return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
82*94faadacSSnehasish Kumar   }
83*94faadacSSnehasish Kumar   return (*Count < ColdCountThreshold);
84*94faadacSSnehasish Kumar }
85*94faadacSSnehasish Kumar 
86*94faadacSSnehasish Kumar bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
87*94faadacSSnehasish Kumar   // TODO: We only target functions with profile data. Static information may
88*94faadacSSnehasish Kumar   // also be considered but we don't see performance improvements yet.
89*94faadacSSnehasish Kumar   if (!MF.getFunction().hasProfileData())
90*94faadacSSnehasish Kumar     return false;
91*94faadacSSnehasish Kumar 
92*94faadacSSnehasish Kumar   // TODO: We don't split functions where a section attribute has been set
93*94faadacSSnehasish Kumar   // since the split part may not be placed in a contiguous region. It may also
94*94faadacSSnehasish Kumar   // be more beneficial to augment the linker to ensure contiguous layout of
95*94faadacSSnehasish Kumar   // split functions within the same section as specified by the attribute.
96*94faadacSSnehasish Kumar   if (!MF.getFunction().getSection().empty())
97*94faadacSSnehasish Kumar     return false;
98*94faadacSSnehasish Kumar 
99*94faadacSSnehasish Kumar   // We don't want to proceed further for cold functions
100*94faadacSSnehasish Kumar   // or functions of unknown hotness. Lukewarm functions have no prefix.
101*94faadacSSnehasish Kumar   Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
102*94faadacSSnehasish Kumar   if (SectionPrefix.hasValue() &&
103*94faadacSSnehasish Kumar       (SectionPrefix.getValue().equals(".unlikely") ||
104*94faadacSSnehasish Kumar        SectionPrefix.getValue().equals(".unknown"))) {
105*94faadacSSnehasish Kumar     return false;
106*94faadacSSnehasish Kumar   }
107*94faadacSSnehasish Kumar 
108*94faadacSSnehasish Kumar   // Renumbering blocks here preserves the order of the blocks as
109*94faadacSSnehasish Kumar   // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
110*94faadacSSnehasish Kumar   // blocks. Preserving the order of blocks is essential to retaining decisions
111*94faadacSSnehasish Kumar   // made by prior passes such as MachineBlockPlacement.
112*94faadacSSnehasish Kumar   MF.RenumberBlocks();
113*94faadacSSnehasish Kumar   MF.setBBSectionsType(BasicBlockSection::Preset);
114*94faadacSSnehasish Kumar   auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
115*94faadacSSnehasish Kumar   auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
116*94faadacSSnehasish Kumar 
117*94faadacSSnehasish Kumar   for (auto &MBB : MF) {
118*94faadacSSnehasish Kumar     // FIXME: We retain the entry block and conservatively keep all landing pad
119*94faadacSSnehasish Kumar     // blocks as part of the original function. Once D73739 is submitted, we can
120*94faadacSSnehasish Kumar     // improve the handling of ehpads.
121*94faadacSSnehasish Kumar     if ((MBB.pred_empty() || MBB.isEHPad()))
122*94faadacSSnehasish Kumar       continue;
123*94faadacSSnehasish Kumar     if (isColdBlock(MBB, MBFI, PSI))
124*94faadacSSnehasish Kumar       MBB.setSectionID(MBBSectionID::ColdSectionID);
125*94faadacSSnehasish Kumar   }
126*94faadacSSnehasish Kumar 
127*94faadacSSnehasish Kumar   auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
128*94faadacSSnehasish Kumar     return X.getSectionID().Type < Y.getSectionID().Type;
129*94faadacSSnehasish Kumar   };
130*94faadacSSnehasish Kumar   llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
131*94faadacSSnehasish Kumar 
132*94faadacSSnehasish Kumar   return true;
133*94faadacSSnehasish Kumar }
134*94faadacSSnehasish Kumar 
135*94faadacSSnehasish Kumar void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
136*94faadacSSnehasish Kumar   AU.addRequired<MachineModuleInfoWrapperPass>();
137*94faadacSSnehasish Kumar   AU.addRequired<MachineBlockFrequencyInfo>();
138*94faadacSSnehasish Kumar   AU.addRequired<ProfileSummaryInfoWrapperPass>();
139*94faadacSSnehasish Kumar }
140*94faadacSSnehasish Kumar 
141*94faadacSSnehasish Kumar char MachineFunctionSplitter::ID = 0;
142*94faadacSSnehasish Kumar INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
143*94faadacSSnehasish Kumar                 "Split machine functions using profile information", false,
144*94faadacSSnehasish Kumar                 false)
145*94faadacSSnehasish Kumar 
146*94faadacSSnehasish Kumar MachineFunctionPass *llvm::createMachineFunctionSplitterPass() {
147*94faadacSSnehasish Kumar   return new MachineFunctionSplitter();
148*94faadacSSnehasish Kumar }
149