1*0b57cec5SDimitry Andric //===- BlockExtractor.cpp - Extracts blocks into their own functions ------===//
2*0b57cec5SDimitry Andric //
3*0b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0b57cec5SDimitry Andric //
7*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
8*0b57cec5SDimitry Andric //
9*0b57cec5SDimitry Andric // This pass extracts the specified basic blocks from the module into their
10*0b57cec5SDimitry Andric // own functions.
11*0b57cec5SDimitry Andric //
12*0b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
13*0b57cec5SDimitry Andric 
14e8d8bef9SDimitry Andric #include "llvm/Transforms/IPO/BlockExtractor.h"
15*0b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
16*0b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
17*0b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
18*0b57cec5SDimitry Andric #include "llvm/IR/Module.h"
19e8d8bef9SDimitry Andric #include "llvm/IR/PassManager.h"
20*0b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
21*0b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
22*0b57cec5SDimitry Andric #include "llvm/Support/MemoryBuffer.h"
23*0b57cec5SDimitry Andric #include "llvm/Transforms/IPO.h"
24*0b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
25*0b57cec5SDimitry Andric #include "llvm/Transforms/Utils/CodeExtractor.h"
26*0b57cec5SDimitry Andric 
27*0b57cec5SDimitry Andric using namespace llvm;
28*0b57cec5SDimitry Andric 
29*0b57cec5SDimitry Andric #define DEBUG_TYPE "block-extractor"
30*0b57cec5SDimitry Andric 
31*0b57cec5SDimitry Andric STATISTIC(NumExtracted, "Number of basic blocks extracted");
32*0b57cec5SDimitry Andric 
33*0b57cec5SDimitry Andric static cl::opt<std::string> BlockExtractorFile(
34*0b57cec5SDimitry Andric     "extract-blocks-file", cl::value_desc("filename"),
35*0b57cec5SDimitry Andric     cl::desc("A file containing list of basic blocks to extract"), cl::Hidden);
36*0b57cec5SDimitry Andric 
37fe6060f1SDimitry Andric static cl::opt<bool>
38fe6060f1SDimitry Andric     BlockExtractorEraseFuncs("extract-blocks-erase-funcs",
39*0b57cec5SDimitry Andric                              cl::desc("Erase the existing functions"),
40*0b57cec5SDimitry Andric                              cl::Hidden);
41*0b57cec5SDimitry Andric namespace {
42e8d8bef9SDimitry Andric class BlockExtractor {
43e8d8bef9SDimitry Andric public:
BlockExtractor(bool EraseFunctions)44e8d8bef9SDimitry Andric   BlockExtractor(bool EraseFunctions) : EraseFunctions(EraseFunctions) {}
45e8d8bef9SDimitry Andric   bool runOnModule(Module &M);
46bdd1243dSDimitry Andric   void
init(const std::vector<std::vector<BasicBlock * >> & GroupsOfBlocksToExtract)47bdd1243dSDimitry Andric   init(const std::vector<std::vector<BasicBlock *>> &GroupsOfBlocksToExtract) {
48bdd1243dSDimitry Andric     GroupsOfBlocks = GroupsOfBlocksToExtract;
49*0b57cec5SDimitry Andric     if (!BlockExtractorFile.empty())
50*0b57cec5SDimitry Andric       loadFile();
51*0b57cec5SDimitry Andric   }
52*0b57cec5SDimitry Andric 
53e8d8bef9SDimitry Andric private:
54bdd1243dSDimitry Andric   std::vector<std::vector<BasicBlock *>> GroupsOfBlocks;
55e8d8bef9SDimitry Andric   bool EraseFunctions;
56e8d8bef9SDimitry Andric   /// Map a function name to groups of blocks.
57e8d8bef9SDimitry Andric   SmallVector<std::pair<std::string, SmallVector<std::string, 4>>, 4>
58e8d8bef9SDimitry Andric       BlocksByName;
59e8d8bef9SDimitry Andric 
60e8d8bef9SDimitry Andric   void loadFile();
61e8d8bef9SDimitry Andric   void splitLandingPadPreds(Function &F);
62e8d8bef9SDimitry Andric };
63e8d8bef9SDimitry Andric 
64*0b57cec5SDimitry Andric } // end anonymous namespace
65*0b57cec5SDimitry Andric 
66*0b57cec5SDimitry Andric /// Gets all of the blocks specified in the input file.
loadFile()67*0b57cec5SDimitry Andric void BlockExtractor::loadFile() {
68*0b57cec5SDimitry Andric   auto ErrOrBuf = MemoryBuffer::getFile(BlockExtractorFile);
69*0b57cec5SDimitry Andric   if (ErrOrBuf.getError())
70*0b57cec5SDimitry Andric     report_fatal_error("BlockExtractor couldn't load the file.");
71*0b57cec5SDimitry Andric   // Read the file.
72*0b57cec5SDimitry Andric   auto &Buf = *ErrOrBuf;
73*0b57cec5SDimitry Andric   SmallVector<StringRef, 16> Lines;
74*0b57cec5SDimitry Andric   Buf->getBuffer().split(Lines, '\n', /*MaxSplit=*/-1,
75*0b57cec5SDimitry Andric                          /*KeepEmpty=*/false);
76*0b57cec5SDimitry Andric   for (const auto &Line : Lines) {
77*0b57cec5SDimitry Andric     SmallVector<StringRef, 4> LineSplit;
78*0b57cec5SDimitry Andric     Line.split(LineSplit, ' ', /*MaxSplit=*/-1,
79*0b57cec5SDimitry Andric                /*KeepEmpty=*/false);
80*0b57cec5SDimitry Andric     if (LineSplit.empty())
81*0b57cec5SDimitry Andric       continue;
828bcb0991SDimitry Andric     if (LineSplit.size()!=2)
8381ad6265SDimitry Andric       report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'",
8481ad6265SDimitry Andric                          /*GenCrashDiag=*/false);
85*0b57cec5SDimitry Andric     SmallVector<StringRef, 4> BBNames;
86*0b57cec5SDimitry Andric     LineSplit[1].split(BBNames, ';', /*MaxSplit=*/-1,
87*0b57cec5SDimitry Andric                        /*KeepEmpty=*/false);
88*0b57cec5SDimitry Andric     if (BBNames.empty())
89*0b57cec5SDimitry Andric       report_fatal_error("Missing bbs name");
905ffd83dbSDimitry Andric     BlocksByName.push_back(
915ffd83dbSDimitry Andric         {std::string(LineSplit[0]), {BBNames.begin(), BBNames.end()}});
92*0b57cec5SDimitry Andric   }
93*0b57cec5SDimitry Andric }
94*0b57cec5SDimitry Andric 
95*0b57cec5SDimitry Andric /// Extracts the landing pads to make sure all of them have only one
96*0b57cec5SDimitry Andric /// predecessor.
splitLandingPadPreds(Function & F)97*0b57cec5SDimitry Andric void BlockExtractor::splitLandingPadPreds(Function &F) {
98*0b57cec5SDimitry Andric   for (BasicBlock &BB : F) {
99*0b57cec5SDimitry Andric     for (Instruction &I : BB) {
100*0b57cec5SDimitry Andric       if (!isa<InvokeInst>(&I))
101*0b57cec5SDimitry Andric         continue;
102*0b57cec5SDimitry Andric       InvokeInst *II = cast<InvokeInst>(&I);
103*0b57cec5SDimitry Andric       BasicBlock *Parent = II->getParent();
104*0b57cec5SDimitry Andric       BasicBlock *LPad = II->getUnwindDest();
105*0b57cec5SDimitry Andric 
106*0b57cec5SDimitry Andric       // Look through the landing pad's predecessors. If one of them ends in an
107*0b57cec5SDimitry Andric       // 'invoke', then we want to split the landing pad.
108*0b57cec5SDimitry Andric       bool Split = false;
109bdd1243dSDimitry Andric       for (auto *PredBB : predecessors(LPad)) {
110*0b57cec5SDimitry Andric         if (PredBB->isLandingPad() && PredBB != Parent &&
111*0b57cec5SDimitry Andric             isa<InvokeInst>(Parent->getTerminator())) {
112*0b57cec5SDimitry Andric           Split = true;
113*0b57cec5SDimitry Andric           break;
114*0b57cec5SDimitry Andric         }
115*0b57cec5SDimitry Andric       }
116*0b57cec5SDimitry Andric 
117*0b57cec5SDimitry Andric       if (!Split)
118*0b57cec5SDimitry Andric         continue;
119*0b57cec5SDimitry Andric 
120*0b57cec5SDimitry Andric       SmallVector<BasicBlock *, 2> NewBBs;
121*0b57cec5SDimitry Andric       SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", NewBBs);
122*0b57cec5SDimitry Andric     }
123*0b57cec5SDimitry Andric   }
124*0b57cec5SDimitry Andric }
125*0b57cec5SDimitry Andric 
runOnModule(Module & M)126*0b57cec5SDimitry Andric bool BlockExtractor::runOnModule(Module &M) {
127*0b57cec5SDimitry Andric   bool Changed = false;
128*0b57cec5SDimitry Andric 
129*0b57cec5SDimitry Andric   // Get all the functions.
130*0b57cec5SDimitry Andric   SmallVector<Function *, 4> Functions;
131*0b57cec5SDimitry Andric   for (Function &F : M) {
132*0b57cec5SDimitry Andric     splitLandingPadPreds(F);
133*0b57cec5SDimitry Andric     Functions.push_back(&F);
134*0b57cec5SDimitry Andric   }
135*0b57cec5SDimitry Andric 
136*0b57cec5SDimitry Andric   // Get all the blocks specified in the input file.
137*0b57cec5SDimitry Andric   unsigned NextGroupIdx = GroupsOfBlocks.size();
138*0b57cec5SDimitry Andric   GroupsOfBlocks.resize(NextGroupIdx + BlocksByName.size());
139*0b57cec5SDimitry Andric   for (const auto &BInfo : BlocksByName) {
140*0b57cec5SDimitry Andric     Function *F = M.getFunction(BInfo.first);
141*0b57cec5SDimitry Andric     if (!F)
14281ad6265SDimitry Andric       report_fatal_error("Invalid function name specified in the input file",
14381ad6265SDimitry Andric                          /*GenCrashDiag=*/false);
144*0b57cec5SDimitry Andric     for (const auto &BBInfo : BInfo.second) {
145*0b57cec5SDimitry Andric       auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) {
146*0b57cec5SDimitry Andric         return BB.getName().equals(BBInfo);
147*0b57cec5SDimitry Andric       });
148*0b57cec5SDimitry Andric       if (Res == F->end())
14981ad6265SDimitry Andric         report_fatal_error("Invalid block name specified in the input file",
15081ad6265SDimitry Andric                            /*GenCrashDiag=*/false);
151*0b57cec5SDimitry Andric       GroupsOfBlocks[NextGroupIdx].push_back(&*Res);
152*0b57cec5SDimitry Andric     }
153*0b57cec5SDimitry Andric     ++NextGroupIdx;
154*0b57cec5SDimitry Andric   }
155*0b57cec5SDimitry Andric 
156*0b57cec5SDimitry Andric   // Extract each group of basic blocks.
157*0b57cec5SDimitry Andric   for (auto &BBs : GroupsOfBlocks) {
158*0b57cec5SDimitry Andric     SmallVector<BasicBlock *, 32> BlocksToExtractVec;
159*0b57cec5SDimitry Andric     for (BasicBlock *BB : BBs) {
160*0b57cec5SDimitry Andric       // Check if the module contains BB.
161*0b57cec5SDimitry Andric       if (BB->getParent()->getParent() != &M)
16281ad6265SDimitry Andric         report_fatal_error("Invalid basic block", /*GenCrashDiag=*/false);
163*0b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "BlockExtractor: Extracting "
164*0b57cec5SDimitry Andric                         << BB->getParent()->getName() << ":" << BB->getName()
165*0b57cec5SDimitry Andric                         << "\n");
166*0b57cec5SDimitry Andric       BlocksToExtractVec.push_back(BB);
167*0b57cec5SDimitry Andric       if (const InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
168*0b57cec5SDimitry Andric         BlocksToExtractVec.push_back(II->getUnwindDest());
169*0b57cec5SDimitry Andric       ++NumExtracted;
170*0b57cec5SDimitry Andric       Changed = true;
171*0b57cec5SDimitry Andric     }
1728bcb0991SDimitry Andric     CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent());
1738bcb0991SDimitry Andric     Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(CEAC);
174*0b57cec5SDimitry Andric     if (F)
175*0b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName()
176*0b57cec5SDimitry Andric                         << "' in: " << F->getName() << '\n');
177*0b57cec5SDimitry Andric     else
178*0b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Failed to extract for group '"
179*0b57cec5SDimitry Andric                         << (*BBs.begin())->getName() << "'\n");
180*0b57cec5SDimitry Andric   }
181*0b57cec5SDimitry Andric 
182*0b57cec5SDimitry Andric   // Erase the functions.
183*0b57cec5SDimitry Andric   if (EraseFunctions || BlockExtractorEraseFuncs) {
184*0b57cec5SDimitry Andric     for (Function *F : Functions) {
185*0b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "BlockExtractor: Trying to delete " << F->getName()
186*0b57cec5SDimitry Andric                         << "\n");
187*0b57cec5SDimitry Andric       F->deleteBody();
188*0b57cec5SDimitry Andric     }
189*0b57cec5SDimitry Andric     // Set linkage as ExternalLinkage to avoid erasing unreachable functions.
190*0b57cec5SDimitry Andric     for (Function &F : M)
191*0b57cec5SDimitry Andric       F.setLinkage(GlobalValue::ExternalLinkage);
192*0b57cec5SDimitry Andric     Changed = true;
193*0b57cec5SDimitry Andric   }
194*0b57cec5SDimitry Andric 
195*0b57cec5SDimitry Andric   return Changed;
196*0b57cec5SDimitry Andric }
197e8d8bef9SDimitry Andric 
BlockExtractorPass(std::vector<std::vector<BasicBlock * >> && GroupsOfBlocks,bool EraseFunctions)198bdd1243dSDimitry Andric BlockExtractorPass::BlockExtractorPass(
199bdd1243dSDimitry Andric     std::vector<std::vector<BasicBlock *>> &&GroupsOfBlocks,
200bdd1243dSDimitry Andric     bool EraseFunctions)
201bdd1243dSDimitry Andric     : GroupsOfBlocks(GroupsOfBlocks), EraseFunctions(EraseFunctions) {}
202e8d8bef9SDimitry Andric 
run(Module & M,ModuleAnalysisManager & AM)203e8d8bef9SDimitry Andric PreservedAnalyses BlockExtractorPass::run(Module &M,
204e8d8bef9SDimitry Andric                                           ModuleAnalysisManager &AM) {
205bdd1243dSDimitry Andric   BlockExtractor BE(EraseFunctions);
206bdd1243dSDimitry Andric   BE.init(GroupsOfBlocks);
207e8d8bef9SDimitry Andric   return BE.runOnModule(M) ? PreservedAnalyses::none()
208e8d8bef9SDimitry Andric                            : PreservedAnalyses::all();
209e8d8bef9SDimitry Andric }
210