1dc40be75SVolkan Keles //===- BlockExtractor.cpp - Extracts blocks into their own functions ------===// 2dc40be75SVolkan Keles // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6dc40be75SVolkan Keles // 7dc40be75SVolkan Keles //===----------------------------------------------------------------------===// 8dc40be75SVolkan Keles // 9dc40be75SVolkan Keles // This pass extracts the specified basic blocks from the module into their 10dc40be75SVolkan Keles // own functions. 11dc40be75SVolkan Keles // 12dc40be75SVolkan Keles //===----------------------------------------------------------------------===// 13dc40be75SVolkan Keles 14dc40be75SVolkan Keles #include "llvm/ADT/STLExtras.h" 15dc40be75SVolkan Keles #include "llvm/ADT/Statistic.h" 16dc40be75SVolkan Keles #include "llvm/IR/Instructions.h" 17dc40be75SVolkan Keles #include "llvm/IR/Module.h" 18dc40be75SVolkan Keles #include "llvm/Pass.h" 19dc40be75SVolkan Keles #include "llvm/Support/CommandLine.h" 20dc40be75SVolkan Keles #include "llvm/Support/Debug.h" 21dc40be75SVolkan Keles #include "llvm/Support/MemoryBuffer.h" 22dc40be75SVolkan Keles #include "llvm/Transforms/IPO.h" 23dc40be75SVolkan Keles #include "llvm/Transforms/Utils/BasicBlockUtils.h" 24dc40be75SVolkan Keles #include "llvm/Transforms/Utils/CodeExtractor.h" 25*ea3364bfSQuentin Colombet 26dc40be75SVolkan Keles using namespace llvm; 27dc40be75SVolkan Keles 28dc40be75SVolkan Keles #define DEBUG_TYPE "block-extractor" 29dc40be75SVolkan Keles 30dc40be75SVolkan Keles STATISTIC(NumExtracted, "Number of basic blocks extracted"); 31dc40be75SVolkan Keles 32dc40be75SVolkan Keles static cl::opt<std::string> BlockExtractorFile( 33dc40be75SVolkan Keles "extract-blocks-file", cl::value_desc("filename"), 34dc40be75SVolkan Keles cl::desc("A file containing list of basic blocks to extract"), cl::Hidden); 35dc40be75SVolkan Keles 36dc40be75SVolkan Keles cl::opt<bool> BlockExtractorEraseFuncs("extract-blocks-erase-funcs", 37dc40be75SVolkan Keles cl::desc("Erase the existing functions"), 38dc40be75SVolkan Keles cl::Hidden); 39dc40be75SVolkan Keles namespace { 40dc40be75SVolkan Keles class BlockExtractor : public ModulePass { 41*ea3364bfSQuentin Colombet SmallVector<SmallVector<BasicBlock *, 16>, 4> GroupsOfBlocks; 42dc40be75SVolkan Keles bool EraseFunctions; 43*ea3364bfSQuentin Colombet /// Map a function name to groups of blocks. 44*ea3364bfSQuentin Colombet SmallVector<std::pair<std::string, SmallVector<std::string, 4>>, 4> 45*ea3364bfSQuentin Colombet BlocksByName; 46dc40be75SVolkan Keles 47dc40be75SVolkan Keles public: 48dc40be75SVolkan Keles static char ID; 49dc40be75SVolkan Keles BlockExtractor(const SmallVectorImpl<BasicBlock *> &BlocksToExtract, 50dc40be75SVolkan Keles bool EraseFunctions) 51*ea3364bfSQuentin Colombet : ModulePass(ID), EraseFunctions(EraseFunctions) { 52*ea3364bfSQuentin Colombet // We want one group per element of the input list. 53*ea3364bfSQuentin Colombet for (BasicBlock *BB : BlocksToExtract) { 54*ea3364bfSQuentin Colombet SmallVector<BasicBlock *, 16> NewGroup; 55*ea3364bfSQuentin Colombet NewGroup.push_back(BB); 56*ea3364bfSQuentin Colombet GroupsOfBlocks.push_back(NewGroup); 57*ea3364bfSQuentin Colombet } 58dc40be75SVolkan Keles if (!BlockExtractorFile.empty()) 59dc40be75SVolkan Keles loadFile(); 60dc40be75SVolkan Keles } 61dc40be75SVolkan Keles BlockExtractor() : BlockExtractor(SmallVector<BasicBlock *, 0>(), false) {} 62dc40be75SVolkan Keles bool runOnModule(Module &M) override; 63dc40be75SVolkan Keles 64dc40be75SVolkan Keles private: 65dc40be75SVolkan Keles void loadFile(); 66dc40be75SVolkan Keles void splitLandingPadPreds(Function &F); 67dc40be75SVolkan Keles }; 68dc40be75SVolkan Keles } // end anonymous namespace 69dc40be75SVolkan Keles 70dc40be75SVolkan Keles char BlockExtractor::ID = 0; 71dc40be75SVolkan Keles INITIALIZE_PASS(BlockExtractor, "extract-blocks", 72dc40be75SVolkan Keles "Extract basic blocks from module", false, false) 73dc40be75SVolkan Keles 74dc40be75SVolkan Keles ModulePass *llvm::createBlockExtractorPass() { return new BlockExtractor(); } 75dc40be75SVolkan Keles ModulePass *llvm::createBlockExtractorPass( 76dc40be75SVolkan Keles const SmallVectorImpl<BasicBlock *> &BlocksToExtract, bool EraseFunctions) { 77dc40be75SVolkan Keles return new BlockExtractor(BlocksToExtract, EraseFunctions); 78dc40be75SVolkan Keles } 79dc40be75SVolkan Keles 80dc40be75SVolkan Keles /// Gets all of the blocks specified in the input file. 81dc40be75SVolkan Keles void BlockExtractor::loadFile() { 82dc40be75SVolkan Keles auto ErrOrBuf = MemoryBuffer::getFile(BlockExtractorFile); 83ebf34ea3SVolkan Keles if (ErrOrBuf.getError()) 84dc40be75SVolkan Keles report_fatal_error("BlockExtractor couldn't load the file."); 85dc40be75SVolkan Keles // Read the file. 86dc40be75SVolkan Keles auto &Buf = *ErrOrBuf; 87dc40be75SVolkan Keles SmallVector<StringRef, 16> Lines; 88dc40be75SVolkan Keles Buf->getBuffer().split(Lines, '\n', /*MaxSplit=*/-1, 89dc40be75SVolkan Keles /*KeepEmpty=*/false); 90dc40be75SVolkan Keles for (const auto &Line : Lines) { 91*ea3364bfSQuentin Colombet SmallVector<StringRef, 4> LineSplit; 92*ea3364bfSQuentin Colombet Line.split(LineSplit, ' ', /*MaxSplit=*/-1, 93*ea3364bfSQuentin Colombet /*KeepEmpty=*/false); 94*ea3364bfSQuentin Colombet if (LineSplit.empty()) 95*ea3364bfSQuentin Colombet continue; 96*ea3364bfSQuentin Colombet SmallVector<StringRef, 4> BBNames; 97*ea3364bfSQuentin Colombet LineSplit[1].split(BBNames, ',', /*MaxSplit=*/-1, 98*ea3364bfSQuentin Colombet /*KeepEmpty=*/false); 99*ea3364bfSQuentin Colombet if (BBNames.empty()) 100*ea3364bfSQuentin Colombet report_fatal_error("Missing bbs name"); 101*ea3364bfSQuentin Colombet BlocksByName.push_back({LineSplit[0], {BBNames.begin(), BBNames.end()}}); 102dc40be75SVolkan Keles } 103dc40be75SVolkan Keles } 104dc40be75SVolkan Keles 105dc40be75SVolkan Keles /// Extracts the landing pads to make sure all of them have only one 106dc40be75SVolkan Keles /// predecessor. 107dc40be75SVolkan Keles void BlockExtractor::splitLandingPadPreds(Function &F) { 108dc40be75SVolkan Keles for (BasicBlock &BB : F) { 109dc40be75SVolkan Keles for (Instruction &I : BB) { 110dc40be75SVolkan Keles if (!isa<InvokeInst>(&I)) 111dc40be75SVolkan Keles continue; 112dc40be75SVolkan Keles InvokeInst *II = cast<InvokeInst>(&I); 113dc40be75SVolkan Keles BasicBlock *Parent = II->getParent(); 114dc40be75SVolkan Keles BasicBlock *LPad = II->getUnwindDest(); 115dc40be75SVolkan Keles 116dc40be75SVolkan Keles // Look through the landing pad's predecessors. If one of them ends in an 117dc40be75SVolkan Keles // 'invoke', then we want to split the landing pad. 118dc40be75SVolkan Keles bool Split = false; 119dc40be75SVolkan Keles for (auto PredBB : predecessors(LPad)) { 120dc40be75SVolkan Keles if (PredBB->isLandingPad() && PredBB != Parent && 121dc40be75SVolkan Keles isa<InvokeInst>(Parent->getTerminator())) { 122dc40be75SVolkan Keles Split = true; 123dc40be75SVolkan Keles break; 124dc40be75SVolkan Keles } 125dc40be75SVolkan Keles } 126dc40be75SVolkan Keles 127dc40be75SVolkan Keles if (!Split) 128dc40be75SVolkan Keles continue; 129dc40be75SVolkan Keles 130dc40be75SVolkan Keles SmallVector<BasicBlock *, 2> NewBBs; 131dc40be75SVolkan Keles SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", NewBBs); 132dc40be75SVolkan Keles } 133dc40be75SVolkan Keles } 134dc40be75SVolkan Keles } 135dc40be75SVolkan Keles 136dc40be75SVolkan Keles bool BlockExtractor::runOnModule(Module &M) { 137dc40be75SVolkan Keles 138dc40be75SVolkan Keles bool Changed = false; 139dc40be75SVolkan Keles 140dc40be75SVolkan Keles // Get all the functions. 141dc40be75SVolkan Keles SmallVector<Function *, 4> Functions; 142dc40be75SVolkan Keles for (Function &F : M) { 143dc40be75SVolkan Keles splitLandingPadPreds(F); 144dc40be75SVolkan Keles Functions.push_back(&F); 145dc40be75SVolkan Keles } 146dc40be75SVolkan Keles 147dc40be75SVolkan Keles // Get all the blocks specified in the input file. 148*ea3364bfSQuentin Colombet unsigned NextGroupIdx = GroupsOfBlocks.size(); 149*ea3364bfSQuentin Colombet GroupsOfBlocks.resize(NextGroupIdx + BlocksByName.size()); 150dc40be75SVolkan Keles for (const auto &BInfo : BlocksByName) { 151dc40be75SVolkan Keles Function *F = M.getFunction(BInfo.first); 152dc40be75SVolkan Keles if (!F) 153dc40be75SVolkan Keles report_fatal_error("Invalid function name specified in the input file"); 154*ea3364bfSQuentin Colombet for (const auto &BBInfo : BInfo.second) { 155dc40be75SVolkan Keles auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) { 156*ea3364bfSQuentin Colombet return BB.getName().equals(BBInfo); 157dc40be75SVolkan Keles }); 158dc40be75SVolkan Keles if (Res == F->end()) 159dc40be75SVolkan Keles report_fatal_error("Invalid block name specified in the input file"); 160*ea3364bfSQuentin Colombet GroupsOfBlocks[NextGroupIdx].push_back(&*Res); 161*ea3364bfSQuentin Colombet } 162*ea3364bfSQuentin Colombet ++NextGroupIdx; 163dc40be75SVolkan Keles } 164dc40be75SVolkan Keles 165*ea3364bfSQuentin Colombet // Extract each group of basic blocks. 166*ea3364bfSQuentin Colombet for (auto &BBs : GroupsOfBlocks) { 167*ea3364bfSQuentin Colombet SmallVector<BasicBlock *, 32> BlocksToExtractVec; 168*ea3364bfSQuentin Colombet for (BasicBlock *BB : BBs) { 169dc40be75SVolkan Keles // Check if the module contains BB. 170dc40be75SVolkan Keles if (BB->getParent()->getParent() != &M) 171dc40be75SVolkan Keles report_fatal_error("Invalid basic block"); 172d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "BlockExtractor: Extracting " 173d34e60caSNicola Zaghen << BB->getParent()->getName() << ":" << BB->getName() 174d34e60caSNicola Zaghen << "\n"); 175dc40be75SVolkan Keles BlocksToExtractVec.push_back(BB); 176dc40be75SVolkan Keles if (const InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) 177dc40be75SVolkan Keles BlocksToExtractVec.push_back(II->getUnwindDest()); 178dc40be75SVolkan Keles ++NumExtracted; 179dc40be75SVolkan Keles Changed = true; 180dc40be75SVolkan Keles } 181*ea3364bfSQuentin Colombet Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(); 182*ea3364bfSQuentin Colombet if (F) 183*ea3364bfSQuentin Colombet LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName() 184*ea3364bfSQuentin Colombet << "' in: " << F->getName() << '\n'); 185*ea3364bfSQuentin Colombet else 186*ea3364bfSQuentin Colombet LLVM_DEBUG(dbgs() << "Failed to extract for group '" 187*ea3364bfSQuentin Colombet << (*BBs.begin())->getName() << "'\n"); 188*ea3364bfSQuentin Colombet } 189dc40be75SVolkan Keles 190dc40be75SVolkan Keles // Erase the functions. 191dc40be75SVolkan Keles if (EraseFunctions || BlockExtractorEraseFuncs) { 192dc40be75SVolkan Keles for (Function *F : Functions) { 193d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "BlockExtractor: Trying to delete " << F->getName() 1944ecdb44aSVolkan Keles << "\n"); 1954ecdb44aSVolkan Keles F->deleteBody(); 196dc40be75SVolkan Keles } 197dc40be75SVolkan Keles // Set linkage as ExternalLinkage to avoid erasing unreachable functions. 198dc40be75SVolkan Keles for (Function &F : M) 199dc40be75SVolkan Keles F.setLinkage(GlobalValue::ExternalLinkage); 200dc40be75SVolkan Keles Changed = true; 201dc40be75SVolkan Keles } 202dc40be75SVolkan Keles 203dc40be75SVolkan Keles return Changed; 204dc40be75SVolkan Keles } 205