1e8d8bef9SDimitry Andric //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric //
9e8d8bef9SDimitry Andric // \file
10e8d8bef9SDimitry Andric // Uses profile information to split out cold blocks.
11e8d8bef9SDimitry Andric //
12e8d8bef9SDimitry Andric // This pass splits out cold machine basic blocks from the parent function. This
13e8d8bef9SDimitry Andric // implementation leverages the basic block section framework. Blocks marked
14e8d8bef9SDimitry Andric // cold by this pass are grouped together in a separate section prefixed with
15e8d8bef9SDimitry Andric // ".text.unlikely.*". The linker can then group these together as a cold
16e8d8bef9SDimitry Andric // section. The split part of the function is a contiguous region identified by
17e8d8bef9SDimitry Andric // the symbol "foo.cold". Grouping all cold blocks across functions together
18e8d8bef9SDimitry Andric // decreases fragmentation and improves icache and itlb utilization. Note that
19e8d8bef9SDimitry Andric // the overall changes to the binary size are negligible; only a small number of
20e8d8bef9SDimitry Andric // additional jump instructions may be introduced.
21e8d8bef9SDimitry Andric //
22e8d8bef9SDimitry Andric // For the original RFC of this pass please see
23e8d8bef9SDimitry Andric // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
24e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
25e8d8bef9SDimitry Andric
26fe6060f1SDimitry Andric #include "llvm/ADT/SmallVector.h"
27fe013be4SDimitry Andric #include "llvm/Analysis/BlockFrequencyInfo.h"
28fe013be4SDimitry Andric #include "llvm/Analysis/BranchProbabilityInfo.h"
29fe013be4SDimitry Andric #include "llvm/Analysis/EHUtils.h"
30e8d8bef9SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h"
31e8d8bef9SDimitry Andric #include "llvm/CodeGen/BasicBlockSectionUtils.h"
32e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
33e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
34e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
35e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
36e8d8bef9SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h"
37e8d8bef9SDimitry Andric #include "llvm/CodeGen/Passes.h"
38*c9157d92SDimitry Andric #include "llvm/CodeGen/TargetInstrInfo.h"
39e8d8bef9SDimitry Andric #include "llvm/IR/Function.h"
40e8d8bef9SDimitry Andric #include "llvm/InitializePasses.h"
41e8d8bef9SDimitry Andric #include "llvm/Support/CommandLine.h"
42bdd1243dSDimitry Andric #include <optional>
43e8d8bef9SDimitry Andric
44e8d8bef9SDimitry Andric using namespace llvm;
45e8d8bef9SDimitry Andric
46e8d8bef9SDimitry Andric // FIXME: This cutoff value is CPU dependent and should be moved to
47e8d8bef9SDimitry Andric // TargetTransformInfo once we consider enabling this on other platforms.
48e8d8bef9SDimitry Andric // The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
49e8d8bef9SDimitry Andric // Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
50e8d8bef9SDimitry Andric // The default was empirically determined to be optimal when considering cutoff
51e8d8bef9SDimitry Andric // values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
52e8d8bef9SDimitry Andric // Intel CPUs.
53e8d8bef9SDimitry Andric static cl::opt<unsigned>
54e8d8bef9SDimitry Andric PercentileCutoff("mfs-psi-cutoff",
55e8d8bef9SDimitry Andric cl::desc("Percentile profile summary cutoff used to "
56e8d8bef9SDimitry Andric "determine cold blocks. Unused if set to zero."),
57e8d8bef9SDimitry Andric cl::init(999950), cl::Hidden);
58e8d8bef9SDimitry Andric
59e8d8bef9SDimitry Andric static cl::opt<unsigned> ColdCountThreshold(
60e8d8bef9SDimitry Andric "mfs-count-threshold",
61e8d8bef9SDimitry Andric cl::desc(
62e8d8bef9SDimitry Andric "Minimum number of times a block must be executed to be retained."),
63e8d8bef9SDimitry Andric cl::init(1), cl::Hidden);
64e8d8bef9SDimitry Andric
65bdd1243dSDimitry Andric static cl::opt<bool> SplitAllEHCode(
66bdd1243dSDimitry Andric "mfs-split-ehcode",
67bdd1243dSDimitry Andric cl::desc("Splits all EH code and it's descendants by default."),
68bdd1243dSDimitry Andric cl::init(false), cl::Hidden);
69bdd1243dSDimitry Andric
70e8d8bef9SDimitry Andric namespace {
71e8d8bef9SDimitry Andric
72e8d8bef9SDimitry Andric class MachineFunctionSplitter : public MachineFunctionPass {
73e8d8bef9SDimitry Andric public:
74e8d8bef9SDimitry Andric static char ID;
MachineFunctionSplitter()75e8d8bef9SDimitry Andric MachineFunctionSplitter() : MachineFunctionPass(ID) {
76e8d8bef9SDimitry Andric initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry());
77e8d8bef9SDimitry Andric }
78e8d8bef9SDimitry Andric
getPassName() const79e8d8bef9SDimitry Andric StringRef getPassName() const override {
80e8d8bef9SDimitry Andric return "Machine Function Splitter Transformation";
81e8d8bef9SDimitry Andric }
82e8d8bef9SDimitry Andric
83e8d8bef9SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override;
84e8d8bef9SDimitry Andric
85e8d8bef9SDimitry Andric bool runOnMachineFunction(MachineFunction &F) override;
86e8d8bef9SDimitry Andric };
87e8d8bef9SDimitry Andric } // end anonymous namespace
88e8d8bef9SDimitry Andric
89bdd1243dSDimitry Andric /// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable
90fe013be4SDimitry Andric /// only by EH pad as cold. This will help mark EH pads statically cold
91fe013be4SDimitry Andric /// instead of relying on profile data.
setDescendantEHBlocksCold(MachineFunction & MF)92fe013be4SDimitry Andric static void setDescendantEHBlocksCold(MachineFunction &MF) {
93fe013be4SDimitry Andric DenseSet<MachineBasicBlock *> EHBlocks;
94fe013be4SDimitry Andric computeEHOnlyBlocks(MF, EHBlocks);
95fe013be4SDimitry Andric for (auto Block : EHBlocks) {
96fe013be4SDimitry Andric Block->setSectionID(MBBSectionID::ColdSectionID);
97fe013be4SDimitry Andric }
98fe013be4SDimitry Andric }
99bdd1243dSDimitry Andric
finishAdjustingBasicBlocksAndLandingPads(MachineFunction & MF)100fe013be4SDimitry Andric static void finishAdjustingBasicBlocksAndLandingPads(MachineFunction &MF) {
101fe013be4SDimitry Andric auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
102fe013be4SDimitry Andric return X.getSectionID().Type < Y.getSectionID().Type;
103bdd1243dSDimitry Andric };
104fe013be4SDimitry Andric llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
105fe013be4SDimitry Andric llvm::avoidZeroOffsetLandingPad(MF);
106bdd1243dSDimitry Andric }
107bdd1243dSDimitry Andric
isColdBlock(const MachineBasicBlock & MBB,const MachineBlockFrequencyInfo * MBFI,ProfileSummaryInfo * PSI)108fe6060f1SDimitry Andric static bool isColdBlock(const MachineBasicBlock &MBB,
109e8d8bef9SDimitry Andric const MachineBlockFrequencyInfo *MBFI,
110e8d8bef9SDimitry Andric ProfileSummaryInfo *PSI) {
111bdd1243dSDimitry Andric std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
112*c9157d92SDimitry Andric
113*c9157d92SDimitry Andric // Temporary hack to cope with AArch64's jump table encoding
114*c9157d92SDimitry Andric const TargetInstrInfo &TII = *MBB.getParent()->getSubtarget().getInstrInfo();
115*c9157d92SDimitry Andric if (!TII.isMBBSafeToSplitToCold(MBB))
116*c9157d92SDimitry Andric return false;
117*c9157d92SDimitry Andric
118fe013be4SDimitry Andric // For instrumentation profiles and sample profiles, we use different ways
119fe013be4SDimitry Andric // to judge whether a block is cold and should be split.
120fe013be4SDimitry Andric if (PSI->hasInstrumentationProfile() || PSI->hasCSInstrumentationProfile()) {
121fe013be4SDimitry Andric // If using instrument profile, which is deemed "accurate", no count means
122fe013be4SDimitry Andric // cold.
12381ad6265SDimitry Andric if (!Count)
124e8d8bef9SDimitry Andric return true;
125fe013be4SDimitry Andric if (PercentileCutoff > 0)
126e8d8bef9SDimitry Andric return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
127fe013be4SDimitry Andric // Fallthrough to end of function.
128fe013be4SDimitry Andric } else if (PSI->hasSampleProfile()) {
129fe013be4SDimitry Andric // For sample profile, no count means "do not judege coldness".
130fe013be4SDimitry Andric if (!Count)
131fe013be4SDimitry Andric return false;
132e8d8bef9SDimitry Andric }
133fe013be4SDimitry Andric
134e8d8bef9SDimitry Andric return (*Count < ColdCountThreshold);
135e8d8bef9SDimitry Andric }
136e8d8bef9SDimitry Andric
runOnMachineFunction(MachineFunction & MF)137e8d8bef9SDimitry Andric bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
138bdd1243dSDimitry Andric // We target functions with profile data. Static information in the form
139bdd1243dSDimitry Andric // of exception handling code may be split to cold if user passes the
140bdd1243dSDimitry Andric // mfs-split-ehcode flag.
141bdd1243dSDimitry Andric bool UseProfileData = MF.getFunction().hasProfileData();
142bdd1243dSDimitry Andric if (!UseProfileData && !SplitAllEHCode)
143e8d8bef9SDimitry Andric return false;
144e8d8bef9SDimitry Andric
145*c9157d92SDimitry Andric const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
146*c9157d92SDimitry Andric if (!TII.isFunctionSafeToSplit(MF))
147e8d8bef9SDimitry Andric return false;
148e8d8bef9SDimitry Andric
149e8d8bef9SDimitry Andric // Renumbering blocks here preserves the order of the blocks as
150e8d8bef9SDimitry Andric // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
151e8d8bef9SDimitry Andric // blocks. Preserving the order of blocks is essential to retaining decisions
152e8d8bef9SDimitry Andric // made by prior passes such as MachineBlockPlacement.
153e8d8bef9SDimitry Andric MF.RenumberBlocks();
154e8d8bef9SDimitry Andric MF.setBBSectionsType(BasicBlockSection::Preset);
155bdd1243dSDimitry Andric
156bdd1243dSDimitry Andric MachineBlockFrequencyInfo *MBFI = nullptr;
157bdd1243dSDimitry Andric ProfileSummaryInfo *PSI = nullptr;
158bdd1243dSDimitry Andric if (UseProfileData) {
159bdd1243dSDimitry Andric MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
160bdd1243dSDimitry Andric PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
161fe013be4SDimitry Andric // If we don't have a good profile (sample profile is not deemed
162fe013be4SDimitry Andric // as a "good profile") and the function is not hot, then early
163fe013be4SDimitry Andric // return. (Because we can only trust hot functions when profile
164fe013be4SDimitry Andric // quality is not good.)
165fe013be4SDimitry Andric if (PSI->hasSampleProfile() && !PSI->isFunctionHotInCallGraph(&MF, *MBFI)) {
166fe013be4SDimitry Andric // Split all EH code and it's descendant statically by default.
167fe013be4SDimitry Andric if (SplitAllEHCode)
168fe013be4SDimitry Andric setDescendantEHBlocksCold(MF);
169fe013be4SDimitry Andric finishAdjustingBasicBlocksAndLandingPads(MF);
170fe013be4SDimitry Andric return true;
171fe013be4SDimitry Andric }
172bdd1243dSDimitry Andric }
173e8d8bef9SDimitry Andric
174fe6060f1SDimitry Andric SmallVector<MachineBasicBlock *, 2> LandingPads;
175e8d8bef9SDimitry Andric for (auto &MBB : MF) {
176fe6060f1SDimitry Andric if (MBB.isEntryBlock())
177e8d8bef9SDimitry Andric continue;
178fe6060f1SDimitry Andric
179fe6060f1SDimitry Andric if (MBB.isEHPad())
180fe6060f1SDimitry Andric LandingPads.push_back(&MBB);
181bdd1243dSDimitry Andric else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) && !SplitAllEHCode)
182e8d8bef9SDimitry Andric MBB.setSectionID(MBBSectionID::ColdSectionID);
183e8d8bef9SDimitry Andric }
184e8d8bef9SDimitry Andric
185bdd1243dSDimitry Andric // Split all EH code and it's descendant statically by default.
186bdd1243dSDimitry Andric if (SplitAllEHCode)
187fe013be4SDimitry Andric setDescendantEHBlocksCold(MF);
188fe6060f1SDimitry Andric // We only split out eh pads if all of them are cold.
189bdd1243dSDimitry Andric else {
190fe013be4SDimitry Andric // Here we have UseProfileData == true.
191fe6060f1SDimitry Andric bool HasHotLandingPads = false;
192fe6060f1SDimitry Andric for (const MachineBasicBlock *LP : LandingPads) {
193fe6060f1SDimitry Andric if (!isColdBlock(*LP, MBFI, PSI))
194fe6060f1SDimitry Andric HasHotLandingPads = true;
195fe6060f1SDimitry Andric }
196fe6060f1SDimitry Andric if (!HasHotLandingPads) {
197fe6060f1SDimitry Andric for (MachineBasicBlock *LP : LandingPads)
198fe6060f1SDimitry Andric LP->setSectionID(MBBSectionID::ColdSectionID);
199fe6060f1SDimitry Andric }
200bdd1243dSDimitry Andric }
201fe013be4SDimitry Andric
202fe013be4SDimitry Andric finishAdjustingBasicBlocksAndLandingPads(MF);
203e8d8bef9SDimitry Andric return true;
204e8d8bef9SDimitry Andric }
205e8d8bef9SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const206e8d8bef9SDimitry Andric void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
207e8d8bef9SDimitry Andric AU.addRequired<MachineModuleInfoWrapperPass>();
208e8d8bef9SDimitry Andric AU.addRequired<MachineBlockFrequencyInfo>();
209e8d8bef9SDimitry Andric AU.addRequired<ProfileSummaryInfoWrapperPass>();
210e8d8bef9SDimitry Andric }
211e8d8bef9SDimitry Andric
212e8d8bef9SDimitry Andric char MachineFunctionSplitter::ID = 0;
213e8d8bef9SDimitry Andric INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
214e8d8bef9SDimitry Andric "Split machine functions using profile information", false,
215e8d8bef9SDimitry Andric false)
216e8d8bef9SDimitry Andric
createMachineFunctionSplitterPass()217e8d8bef9SDimitry Andric MachineFunctionPass *llvm::createMachineFunctionSplitterPass() {
218e8d8bef9SDimitry Andric return new MachineFunctionSplitter();
219e8d8bef9SDimitry Andric }
220