1af732203SDimitry Andric //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
2af732203SDimitry Andric //
3af732203SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4af732203SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5af732203SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6af732203SDimitry Andric //
7af732203SDimitry Andric //===----------------------------------------------------------------------===//
8af732203SDimitry Andric //
9af732203SDimitry Andric // \file
10af732203SDimitry Andric // Uses profile information to split out cold blocks.
11af732203SDimitry Andric //
12af732203SDimitry Andric // This pass splits out cold machine basic blocks from the parent function. This
13af732203SDimitry Andric // implementation leverages the basic block section framework. Blocks marked
14af732203SDimitry Andric // cold by this pass are grouped together in a separate section prefixed with
15af732203SDimitry Andric // ".text.unlikely.*". The linker can then group these together as a cold
16af732203SDimitry Andric // section. The split part of the function is a contiguous region identified by
17af732203SDimitry Andric // the symbol "foo.cold". Grouping all cold blocks across functions together
18af732203SDimitry Andric // decreases fragmentation and improves icache and itlb utilization. Note that
19af732203SDimitry Andric // the overall changes to the binary size are negligible; only a small number of
20af732203SDimitry Andric // additional jump instructions may be introduced.
21af732203SDimitry Andric //
22af732203SDimitry Andric // For the original RFC of this pass please see
23af732203SDimitry Andric // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
24af732203SDimitry Andric //===----------------------------------------------------------------------===//
25af732203SDimitry Andric 
26*5f7ddb14SDimitry Andric #include "llvm/ADT/SmallVector.h"
27af732203SDimitry Andric #include "llvm/ADT/Statistic.h"
28af732203SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h"
29af732203SDimitry Andric #include "llvm/CodeGen/BasicBlockSectionUtils.h"
30af732203SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
31af732203SDimitry Andric #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
32af732203SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
33af732203SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
34af732203SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h"
35af732203SDimitry Andric #include "llvm/CodeGen/Passes.h"
36af732203SDimitry Andric #include "llvm/IR/Function.h"
37af732203SDimitry Andric #include "llvm/IR/Module.h"
38af732203SDimitry Andric #include "llvm/InitializePasses.h"
39af732203SDimitry Andric #include "llvm/Support/CommandLine.h"
40af732203SDimitry Andric 
41af732203SDimitry Andric using namespace llvm;
42af732203SDimitry Andric 
43af732203SDimitry Andric // FIXME: This cutoff value is CPU dependent and should be moved to
44af732203SDimitry Andric // TargetTransformInfo once we consider enabling this on other platforms.
45af732203SDimitry Andric // The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
46af732203SDimitry Andric // Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
47af732203SDimitry Andric // The default was empirically determined to be optimal when considering cutoff
48af732203SDimitry Andric // values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
49af732203SDimitry Andric // Intel CPUs.
50af732203SDimitry Andric static cl::opt<unsigned>
51af732203SDimitry Andric     PercentileCutoff("mfs-psi-cutoff",
52af732203SDimitry Andric                      cl::desc("Percentile profile summary cutoff used to "
53af732203SDimitry Andric                               "determine cold blocks. Unused if set to zero."),
54af732203SDimitry Andric                      cl::init(999950), cl::Hidden);
55af732203SDimitry Andric 
56af732203SDimitry Andric static cl::opt<unsigned> ColdCountThreshold(
57af732203SDimitry Andric     "mfs-count-threshold",
58af732203SDimitry Andric     cl::desc(
59af732203SDimitry Andric         "Minimum number of times a block must be executed to be retained."),
60af732203SDimitry Andric     cl::init(1), cl::Hidden);
61af732203SDimitry Andric 
62af732203SDimitry Andric namespace {
63af732203SDimitry Andric 
64af732203SDimitry Andric class MachineFunctionSplitter : public MachineFunctionPass {
65af732203SDimitry Andric public:
66af732203SDimitry Andric   static char ID;
MachineFunctionSplitter()67af732203SDimitry Andric   MachineFunctionSplitter() : MachineFunctionPass(ID) {
68af732203SDimitry Andric     initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry());
69af732203SDimitry Andric   }
70af732203SDimitry Andric 
getPassName() const71af732203SDimitry Andric   StringRef getPassName() const override {
72af732203SDimitry Andric     return "Machine Function Splitter Transformation";
73af732203SDimitry Andric   }
74af732203SDimitry Andric 
75af732203SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
76af732203SDimitry Andric 
77af732203SDimitry Andric   bool runOnMachineFunction(MachineFunction &F) override;
78af732203SDimitry Andric };
79af732203SDimitry Andric } // end anonymous namespace
80af732203SDimitry Andric 
isColdBlock(const MachineBasicBlock & MBB,const MachineBlockFrequencyInfo * MBFI,ProfileSummaryInfo * PSI)81*5f7ddb14SDimitry Andric static bool isColdBlock(const MachineBasicBlock &MBB,
82af732203SDimitry Andric                         const MachineBlockFrequencyInfo *MBFI,
83af732203SDimitry Andric                         ProfileSummaryInfo *PSI) {
84af732203SDimitry Andric   Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
85af732203SDimitry Andric   if (!Count.hasValue())
86af732203SDimitry Andric     return true;
87af732203SDimitry Andric 
88af732203SDimitry Andric   if (PercentileCutoff > 0) {
89af732203SDimitry Andric     return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
90af732203SDimitry Andric   }
91af732203SDimitry Andric   return (*Count < ColdCountThreshold);
92af732203SDimitry Andric }
93af732203SDimitry Andric 
runOnMachineFunction(MachineFunction & MF)94af732203SDimitry Andric bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
95af732203SDimitry Andric   // TODO: We only target functions with profile data. Static information may
96af732203SDimitry Andric   // also be considered but we don't see performance improvements yet.
97af732203SDimitry Andric   if (!MF.getFunction().hasProfileData())
98af732203SDimitry Andric     return false;
99af732203SDimitry Andric 
100af732203SDimitry Andric   // TODO: We don't split functions where a section attribute has been set
101af732203SDimitry Andric   // since the split part may not be placed in a contiguous region. It may also
102af732203SDimitry Andric   // be more beneficial to augment the linker to ensure contiguous layout of
103af732203SDimitry Andric   // split functions within the same section as specified by the attribute.
104*5f7ddb14SDimitry Andric   if (MF.getFunction().hasSection() ||
105*5f7ddb14SDimitry Andric       MF.getFunction().hasFnAttribute("implicit-section-name"))
106af732203SDimitry Andric     return false;
107af732203SDimitry Andric 
108af732203SDimitry Andric   // We don't want to proceed further for cold functions
109af732203SDimitry Andric   // or functions of unknown hotness. Lukewarm functions have no prefix.
110af732203SDimitry Andric   Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
111af732203SDimitry Andric   if (SectionPrefix.hasValue() &&
112af732203SDimitry Andric       (SectionPrefix.getValue().equals("unlikely") ||
113af732203SDimitry Andric        SectionPrefix.getValue().equals("unknown"))) {
114af732203SDimitry Andric     return false;
115af732203SDimitry Andric   }
116af732203SDimitry Andric 
117af732203SDimitry Andric   // Renumbering blocks here preserves the order of the blocks as
118af732203SDimitry Andric   // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
119af732203SDimitry Andric   // blocks. Preserving the order of blocks is essential to retaining decisions
120af732203SDimitry Andric   // made by prior passes such as MachineBlockPlacement.
121af732203SDimitry Andric   MF.RenumberBlocks();
122af732203SDimitry Andric   MF.setBBSectionsType(BasicBlockSection::Preset);
123af732203SDimitry Andric   auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
124af732203SDimitry Andric   auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
125af732203SDimitry Andric 
126*5f7ddb14SDimitry Andric   SmallVector<MachineBasicBlock *, 2> LandingPads;
127af732203SDimitry Andric   for (auto &MBB : MF) {
128*5f7ddb14SDimitry Andric     if (MBB.isEntryBlock())
129af732203SDimitry Andric       continue;
130*5f7ddb14SDimitry Andric 
131*5f7ddb14SDimitry Andric     if (MBB.isEHPad())
132*5f7ddb14SDimitry Andric       LandingPads.push_back(&MBB);
133*5f7ddb14SDimitry Andric     else if (isColdBlock(MBB, MBFI, PSI))
134af732203SDimitry Andric       MBB.setSectionID(MBBSectionID::ColdSectionID);
135af732203SDimitry Andric   }
136af732203SDimitry Andric 
137*5f7ddb14SDimitry Andric   // We only split out eh pads if all of them are cold.
138*5f7ddb14SDimitry Andric   bool HasHotLandingPads = false;
139*5f7ddb14SDimitry Andric   for (const MachineBasicBlock *LP : LandingPads) {
140*5f7ddb14SDimitry Andric     if (!isColdBlock(*LP, MBFI, PSI))
141*5f7ddb14SDimitry Andric       HasHotLandingPads = true;
142*5f7ddb14SDimitry Andric   }
143*5f7ddb14SDimitry Andric   if (!HasHotLandingPads) {
144*5f7ddb14SDimitry Andric     for (MachineBasicBlock *LP : LandingPads)
145*5f7ddb14SDimitry Andric       LP->setSectionID(MBBSectionID::ColdSectionID);
146*5f7ddb14SDimitry Andric   }
147*5f7ddb14SDimitry Andric 
148af732203SDimitry Andric   auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
149af732203SDimitry Andric     return X.getSectionID().Type < Y.getSectionID().Type;
150af732203SDimitry Andric   };
151af732203SDimitry Andric   llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
152af732203SDimitry Andric 
153af732203SDimitry Andric   return true;
154af732203SDimitry Andric }
155af732203SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const156af732203SDimitry Andric void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
157af732203SDimitry Andric   AU.addRequired<MachineModuleInfoWrapperPass>();
158af732203SDimitry Andric   AU.addRequired<MachineBlockFrequencyInfo>();
159af732203SDimitry Andric   AU.addRequired<ProfileSummaryInfoWrapperPass>();
160af732203SDimitry Andric }
161af732203SDimitry Andric 
162af732203SDimitry Andric char MachineFunctionSplitter::ID = 0;
163af732203SDimitry Andric INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
164af732203SDimitry Andric                 "Split machine functions using profile information", false,
165af732203SDimitry Andric                 false)
166af732203SDimitry Andric 
createMachineFunctionSplitterPass()167af732203SDimitry Andric MachineFunctionPass *llvm::createMachineFunctionSplitterPass() {
168af732203SDimitry Andric   return new MachineFunctionSplitter();
169af732203SDimitry Andric }
170