1e8d8bef9SDimitry Andric //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric //
9e8d8bef9SDimitry Andric // \file
10e8d8bef9SDimitry Andric // Uses profile information to split out cold blocks.
11e8d8bef9SDimitry Andric //
12e8d8bef9SDimitry Andric // This pass splits out cold machine basic blocks from the parent function. This
13e8d8bef9SDimitry Andric // implementation leverages the basic block section framework. Blocks marked
14e8d8bef9SDimitry Andric // cold by this pass are grouped together in a separate section prefixed with
15e8d8bef9SDimitry Andric // ".text.unlikely.*". The linker can then group these together as a cold
16e8d8bef9SDimitry Andric // section. The split part of the function is a contiguous region identified by
17e8d8bef9SDimitry Andric // the symbol "foo.cold". Grouping all cold blocks across functions together
18e8d8bef9SDimitry Andric // decreases fragmentation and improves icache and itlb utilization. Note that
19e8d8bef9SDimitry Andric // the overall changes to the binary size are negligible; only a small number of
20e8d8bef9SDimitry Andric // additional jump instructions may be introduced.
21e8d8bef9SDimitry Andric //
22e8d8bef9SDimitry Andric // For the original RFC of this pass please see
23e8d8bef9SDimitry Andric // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
24e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
25e8d8bef9SDimitry Andric 
26fe6060f1SDimitry Andric #include "llvm/ADT/SmallVector.h"
27fe013be4SDimitry Andric #include "llvm/Analysis/BlockFrequencyInfo.h"
28fe013be4SDimitry Andric #include "llvm/Analysis/BranchProbabilityInfo.h"
29fe013be4SDimitry Andric #include "llvm/Analysis/EHUtils.h"
30e8d8bef9SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h"
31e8d8bef9SDimitry Andric #include "llvm/CodeGen/BasicBlockSectionUtils.h"
32e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
33e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
34e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
35e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
36e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h"
37e8d8bef9SDimitry Andric #include "llvm/CodeGen/Passes.h"
38*c9157d92SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
39e8d8bef9SDimitry Andric #include "llvm/IR/Function.h"
40e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h"
41e8d8bef9SDimitry Andric #include "llvm/Support/CommandLine.h"
42bdd1243dSDimitry Andric #include <optional>
43e8d8bef9SDimitry Andric 
44e8d8bef9SDimitry Andric using namespace llvm;
45e8d8bef9SDimitry Andric 
46e8d8bef9SDimitry Andric // FIXME: This cutoff value is CPU dependent and should be moved to
47e8d8bef9SDimitry Andric // TargetTransformInfo once we consider enabling this on other platforms.
48e8d8bef9SDimitry Andric // The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
49e8d8bef9SDimitry Andric // Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
50e8d8bef9SDimitry Andric // The default was empirically determined to be optimal when considering cutoff
51e8d8bef9SDimitry Andric // values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
52e8d8bef9SDimitry Andric // Intel CPUs.
53e8d8bef9SDimitry Andric static cl::opt<unsigned>
54e8d8bef9SDimitry Andric     PercentileCutoff("mfs-psi-cutoff",
55e8d8bef9SDimitry Andric                      cl::desc("Percentile profile summary cutoff used to "
56e8d8bef9SDimitry Andric                               "determine cold blocks. Unused if set to zero."),
57e8d8bef9SDimitry Andric                      cl::init(999950), cl::Hidden);
58e8d8bef9SDimitry Andric 
59e8d8bef9SDimitry Andric static cl::opt<unsigned> ColdCountThreshold(
60e8d8bef9SDimitry Andric     "mfs-count-threshold",
61e8d8bef9SDimitry Andric     cl::desc(
62e8d8bef9SDimitry Andric         "Minimum number of times a block must be executed to be retained."),
63e8d8bef9SDimitry Andric     cl::init(1), cl::Hidden);
64e8d8bef9SDimitry Andric 
65bdd1243dSDimitry Andric static cl::opt<bool> SplitAllEHCode(
66bdd1243dSDimitry Andric     "mfs-split-ehcode",
67bdd1243dSDimitry Andric     cl::desc("Splits all EH code and it's descendants by default."),
68bdd1243dSDimitry Andric     cl::init(false), cl::Hidden);
69bdd1243dSDimitry Andric 
70e8d8bef9SDimitry Andric namespace {
71e8d8bef9SDimitry Andric 
72e8d8bef9SDimitry Andric class MachineFunctionSplitter : public MachineFunctionPass {
73e8d8bef9SDimitry Andric public:
74e8d8bef9SDimitry Andric   static char ID;
MachineFunctionSplitter()75e8d8bef9SDimitry Andric   MachineFunctionSplitter() : MachineFunctionPass(ID) {
76e8d8bef9SDimitry Andric     initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry());
77e8d8bef9SDimitry Andric   }
78e8d8bef9SDimitry Andric 
getPassName() const79e8d8bef9SDimitry Andric   StringRef getPassName() const override {
80e8d8bef9SDimitry Andric     return "Machine Function Splitter Transformation";
81e8d8bef9SDimitry Andric   }
82e8d8bef9SDimitry Andric 
83e8d8bef9SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
84e8d8bef9SDimitry Andric 
85e8d8bef9SDimitry Andric   bool runOnMachineFunction(MachineFunction &F) override;
86e8d8bef9SDimitry Andric };
87e8d8bef9SDimitry Andric } // end anonymous namespace
88e8d8bef9SDimitry Andric 
89bdd1243dSDimitry Andric /// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable
90fe013be4SDimitry Andric /// only by EH pad as cold. This will help mark EH pads statically cold
91fe013be4SDimitry Andric /// instead of relying on profile data.
setDescendantEHBlocksCold(MachineFunction & MF)92fe013be4SDimitry Andric static void setDescendantEHBlocksCold(MachineFunction &MF) {
93fe013be4SDimitry Andric   DenseSet<MachineBasicBlock *> EHBlocks;
94fe013be4SDimitry Andric   computeEHOnlyBlocks(MF, EHBlocks);
95fe013be4SDimitry Andric   for (auto Block : EHBlocks) {
96fe013be4SDimitry Andric     Block->setSectionID(MBBSectionID::ColdSectionID);
97fe013be4SDimitry Andric   }
98fe013be4SDimitry Andric }
99bdd1243dSDimitry Andric 
finishAdjustingBasicBlocksAndLandingPads(MachineFunction & MF)100fe013be4SDimitry Andric static void finishAdjustingBasicBlocksAndLandingPads(MachineFunction &MF) {
101fe013be4SDimitry Andric   auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
102fe013be4SDimitry Andric     return X.getSectionID().Type < Y.getSectionID().Type;
103bdd1243dSDimitry Andric   };
104fe013be4SDimitry Andric   llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
105fe013be4SDimitry Andric   llvm::avoidZeroOffsetLandingPad(MF);
106bdd1243dSDimitry Andric }
107bdd1243dSDimitry Andric 
isColdBlock(const MachineBasicBlock & MBB,const MachineBlockFrequencyInfo * MBFI,ProfileSummaryInfo * PSI)108fe6060f1SDimitry Andric static bool isColdBlock(const MachineBasicBlock &MBB,
109e8d8bef9SDimitry Andric                         const MachineBlockFrequencyInfo *MBFI,
110e8d8bef9SDimitry Andric                         ProfileSummaryInfo *PSI) {
111bdd1243dSDimitry Andric   std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
112*c9157d92SDimitry Andric 
113*c9157d92SDimitry Andric   // Temporary hack to cope with AArch64's jump table encoding
114*c9157d92SDimitry Andric   const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo();
115*c9157d92SDimitry Andric   if (!TII.isMBBSafeToSplitToCold(MBB))
116*c9157d92SDimitry Andric     return false;
117*c9157d92SDimitry Andric 
118fe013be4SDimitry Andric   // For instrumentation profiles and sample profiles, we use different ways
119fe013be4SDimitry Andric   // to judge whether a block is cold and should be split.
120fe013be4SDimitry Andric   if (PSI->hasInstrumentationProfile() || PSI->hasCSInstrumentationProfile()) {
121fe013be4SDimitry Andric     // If using instrument profile, which is deemed "accurate", no count means
122fe013be4SDimitry Andric     // cold.
12381ad6265SDimitry Andric     if (!Count)
124e8d8bef9SDimitry Andric       return true;
125fe013be4SDimitry Andric     if (PercentileCutoff > 0)
126e8d8bef9SDimitry Andric       return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
127fe013be4SDimitry Andric     // Fallthrough to end of function.
128fe013be4SDimitry Andric   } else if (PSI->hasSampleProfile()) {
129fe013be4SDimitry Andric     // For sample profile, no count means "do not judege coldness".
130fe013be4SDimitry Andric     if (!Count)
131fe013be4SDimitry Andric       return false;
132e8d8bef9SDimitry Andric   }
133fe013be4SDimitry Andric 
134e8d8bef9SDimitry Andric   return (*Count < ColdCountThreshold);
135e8d8bef9SDimitry Andric }
136e8d8bef9SDimitry Andric 
runOnMachineFunction(MachineFunction & MF)137e8d8bef9SDimitry Andric bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
138bdd1243dSDimitry Andric   // We target functions with profile data. Static information in the form
139bdd1243dSDimitry Andric   // of exception handling code may be split to cold if user passes the
140bdd1243dSDimitry Andric   // mfs-split-ehcode flag.
141bdd1243dSDimitry Andric   bool UseProfileData = MF.getFunction().hasProfileData();
142bdd1243dSDimitry Andric   if (!UseProfileData && !SplitAllEHCode)
143e8d8bef9SDimitry Andric     return false;
144e8d8bef9SDimitry Andric 
145*c9157d92SDimitry Andric   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
146*c9157d92SDimitry Andric   if (!TII.isFunctionSafeToSplit(MF))
147e8d8bef9SDimitry Andric     return false;
148e8d8bef9SDimitry Andric 
149e8d8bef9SDimitry Andric   // Renumbering blocks here preserves the order of the blocks as
150e8d8bef9SDimitry Andric   // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
151e8d8bef9SDimitry Andric   // blocks. Preserving the order of blocks is essential to retaining decisions
152e8d8bef9SDimitry Andric   // made by prior passes such as MachineBlockPlacement.
153e8d8bef9SDimitry Andric   MF.RenumberBlocks();
154e8d8bef9SDimitry Andric   MF.setBBSectionsType(BasicBlockSection::Preset);
155bdd1243dSDimitry Andric 
156bdd1243dSDimitry Andric   MachineBlockFrequencyInfo *MBFI = nullptr;
157bdd1243dSDimitry Andric   ProfileSummaryInfo *PSI = nullptr;
158bdd1243dSDimitry Andric   if (UseProfileData) {
159bdd1243dSDimitry Andric     MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
160bdd1243dSDimitry Andric     PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
161fe013be4SDimitry Andric     // If we don't have a good profile (sample profile is not deemed
162fe013be4SDimitry Andric     // as a "good profile") and the function is not hot, then early
163fe013be4SDimitry Andric     // return. (Because we can only trust hot functions when profile
164fe013be4SDimitry Andric     // quality is not good.)
165fe013be4SDimitry Andric     if (PSI->hasSampleProfile() && !PSI->isFunctionHotInCallGraph(&MF, *MBFI)) {
166fe013be4SDimitry Andric       // Split all EH code and it's descendant statically by default.
167fe013be4SDimitry Andric       if (SplitAllEHCode)
168fe013be4SDimitry Andric         setDescendantEHBlocksCold(MF);
169fe013be4SDimitry Andric       finishAdjustingBasicBlocksAndLandingPads(MF);
170fe013be4SDimitry Andric       return true;
171fe013be4SDimitry Andric     }
172bdd1243dSDimitry Andric   }
173e8d8bef9SDimitry Andric 
174fe6060f1SDimitry Andric   SmallVector<MachineBasicBlock *, 2> LandingPads;
175e8d8bef9SDimitry Andric   for (auto &MBB : MF) {
176fe6060f1SDimitry Andric     if (MBB.isEntryBlock())
177e8d8bef9SDimitry Andric       continue;
178fe6060f1SDimitry Andric 
179fe6060f1SDimitry Andric     if (MBB.isEHPad())
180fe6060f1SDimitry Andric       LandingPads.push_back(&MBB);
181bdd1243dSDimitry Andric     else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) && !SplitAllEHCode)
182e8d8bef9SDimitry Andric       MBB.setSectionID(MBBSectionID::ColdSectionID);
183e8d8bef9SDimitry Andric   }
184e8d8bef9SDimitry Andric 
185bdd1243dSDimitry Andric   // Split all EH code and it's descendant statically by default.
186bdd1243dSDimitry Andric   if (SplitAllEHCode)
187fe013be4SDimitry Andric     setDescendantEHBlocksCold(MF);
188fe6060f1SDimitry Andric   // We only split out eh pads if all of them are cold.
189bdd1243dSDimitry Andric   else {
190fe013be4SDimitry Andric     // Here we have UseProfileData == true.
191fe6060f1SDimitry Andric     bool HasHotLandingPads = false;
192fe6060f1SDimitry Andric     for (const MachineBasicBlock *LP : LandingPads) {
193fe6060f1SDimitry Andric       if (!isColdBlock(*LP, MBFI, PSI))
194fe6060f1SDimitry Andric         HasHotLandingPads = true;
195fe6060f1SDimitry Andric     }
196fe6060f1SDimitry Andric     if (!HasHotLandingPads) {
197fe6060f1SDimitry Andric       for (MachineBasicBlock *LP : LandingPads)
198fe6060f1SDimitry Andric         LP->setSectionID(MBBSectionID::ColdSectionID);
199fe6060f1SDimitry Andric     }
200bdd1243dSDimitry Andric   }
201fe013be4SDimitry Andric 
202fe013be4SDimitry Andric   finishAdjustingBasicBlocksAndLandingPads(MF);
203e8d8bef9SDimitry Andric   return true;
204e8d8bef9SDimitry Andric }
205e8d8bef9SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const206e8d8bef9SDimitry Andric void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
207e8d8bef9SDimitry Andric   AU.addRequired<MachineModuleInfoWrapperPass>();
208e8d8bef9SDimitry Andric   AU.addRequired<MachineBlockFrequencyInfo>();
209e8d8bef9SDimitry Andric   AU.addRequired<ProfileSummaryInfoWrapperPass>();
210e8d8bef9SDimitry Andric }
211e8d8bef9SDimitry Andric 
212e8d8bef9SDimitry Andric char MachineFunctionSplitter::ID = 0;
213e8d8bef9SDimitry Andric INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
214e8d8bef9SDimitry Andric                 "Split machine functions using profile information", false,
215e8d8bef9SDimitry Andric                 false)
216e8d8bef9SDimitry Andric 
createMachineFunctionSplitterPass()217e8d8bef9SDimitry Andric MachineFunctionPass *llvm::createMachineFunctionSplitterPass() {
218e8d8bef9SDimitry Andric   return new MachineFunctionSplitter();
219e8d8bef9SDimitry Andric }
220