11829512dSManman Ren //===- InstrOrderFile.cpp ---- Late IR instrumentation for order file ----===//
21829512dSManman Ren //
3*c874dd53SChristopher Di Bella // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*c874dd53SChristopher Di Bella // See https://llvm.org/LICENSE.txt for license information.
5*c874dd53SChristopher Di Bella // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
61829512dSManman Ren //
71829512dSManman Ren //===----------------------------------------------------------------------===//
81829512dSManman Ren //
91829512dSManman Ren //===----------------------------------------------------------------------===//
101829512dSManman Ren 
1105da2fe5SReid Kleckner #include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
121829512dSManman Ren #include "llvm/ADT/Statistic.h"
131829512dSManman Ren #include "llvm/IR/Constants.h"
141829512dSManman Ren #include "llvm/IR/Function.h"
151829512dSManman Ren #include "llvm/IR/GlobalValue.h"
161829512dSManman Ren #include "llvm/IR/IRBuilder.h"
171829512dSManman Ren #include "llvm/IR/Instruction.h"
181829512dSManman Ren #include "llvm/IR/Instructions.h"
191829512dSManman Ren #include "llvm/IR/Metadata.h"
201829512dSManman Ren #include "llvm/IR/Module.h"
2105da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
221829512dSManman Ren #include "llvm/Pass.h"
231829512dSManman Ren #include "llvm/PassRegistry.h"
241829512dSManman Ren #include "llvm/ProfileData/InstrProf.h"
251829512dSManman Ren #include "llvm/Support/CommandLine.h"
261829512dSManman Ren #include "llvm/Support/Debug.h"
271829512dSManman Ren #include "llvm/Support/FileSystem.h"
281829512dSManman Ren #include "llvm/Support/Path.h"
291829512dSManman Ren #include "llvm/Support/raw_ostream.h"
301829512dSManman Ren #include "llvm/Transforms/Instrumentation.h"
311829512dSManman Ren #include <fstream>
321829512dSManman Ren #include <map>
33576124a3SManman Ren #include <mutex>
341829512dSManman Ren #include <set>
351829512dSManman Ren #include <sstream>
361829512dSManman Ren 
371829512dSManman Ren using namespace llvm;
381829512dSManman Ren #define DEBUG_TYPE "instrorderfile"
391829512dSManman Ren 
401829512dSManman Ren static cl::opt<std::string> ClOrderFileWriteMapping(
411829512dSManman Ren     "orderfile-write-mapping", cl::init(""),
421829512dSManman Ren     cl::desc(
431829512dSManman Ren         "Dump functions and their MD5 hash to deobfuscate profile data"),
441829512dSManman Ren     cl::Hidden);
451829512dSManman Ren 
461829512dSManman Ren namespace {
471829512dSManman Ren 
481829512dSManman Ren // We need a global bitmap to tell if a function is executed. We also
491829512dSManman Ren // need a global variable to save the order of functions. We can use a
501829512dSManman Ren // fixed-size buffer that saves the MD5 hash of the function. We need
511829512dSManman Ren // a global variable to save the index into the buffer.
521829512dSManman Ren 
531829512dSManman Ren std::mutex MappingMutex;
541829512dSManman Ren 
551829512dSManman Ren struct InstrOrderFile {
561829512dSManman Ren private:
571829512dSManman Ren   GlobalVariable *OrderFileBuffer;
581829512dSManman Ren   GlobalVariable *BufferIdx;
591829512dSManman Ren   GlobalVariable *BitMap;
601829512dSManman Ren   ArrayType *BufferTy;
611829512dSManman Ren   ArrayType *MapTy;
621829512dSManman Ren 
631829512dSManman Ren public:
641829512dSManman Ren   InstrOrderFile() {}
651829512dSManman Ren 
661829512dSManman Ren   void createOrderFileData(Module &M) {
671829512dSManman Ren     LLVMContext &Ctx = M.getContext();
681829512dSManman Ren     int NumFunctions = 0;
691829512dSManman Ren     for (Function &F : M) {
701829512dSManman Ren       if (!F.isDeclaration())
711829512dSManman Ren         NumFunctions++;
721829512dSManman Ren     }
731829512dSManman Ren 
741829512dSManman Ren     BufferTy =
751829512dSManman Ren         ArrayType::get(Type::getInt64Ty(Ctx), INSTR_ORDER_FILE_BUFFER_SIZE);
761829512dSManman Ren     Type *IdxTy = Type::getInt32Ty(Ctx);
771829512dSManman Ren     MapTy = ArrayType::get(Type::getInt8Ty(Ctx), NumFunctions);
781829512dSManman Ren 
791829512dSManman Ren     // Create the global variables.
801829512dSManman Ren     std::string SymbolName = INSTR_PROF_ORDERFILE_BUFFER_NAME_STR;
811829512dSManman Ren     OrderFileBuffer = new GlobalVariable(M, BufferTy, false, GlobalValue::LinkOnceODRLinkage,
821829512dSManman Ren                            Constant::getNullValue(BufferTy), SymbolName);
831829512dSManman Ren     Triple TT = Triple(M.getTargetTriple());
841829512dSManman Ren     OrderFileBuffer->setSection(
851829512dSManman Ren         getInstrProfSectionName(IPSK_orderfile, TT.getObjectFormat()));
861829512dSManman Ren 
871829512dSManman Ren     std::string IndexName = INSTR_PROF_ORDERFILE_BUFFER_IDX_NAME_STR;
881829512dSManman Ren     BufferIdx = new GlobalVariable(M, IdxTy, false, GlobalValue::LinkOnceODRLinkage,
891829512dSManman Ren                            Constant::getNullValue(IdxTy), IndexName);
901829512dSManman Ren 
911829512dSManman Ren     std::string BitMapName = "bitmap_0";
921829512dSManman Ren     BitMap = new GlobalVariable(M, MapTy, false, GlobalValue::PrivateLinkage,
931829512dSManman Ren                                 Constant::getNullValue(MapTy), BitMapName);
941829512dSManman Ren   }
951829512dSManman Ren 
961829512dSManman Ren   // Generate the code sequence in the entry block of each function to
971829512dSManman Ren   // update the buffer.
981829512dSManman Ren   void generateCodeSequence(Module &M, Function &F, int FuncId) {
991829512dSManman Ren     if (!ClOrderFileWriteMapping.empty()) {
1001829512dSManman Ren       std::lock_guard<std::mutex> LogLock(MappingMutex);
1011829512dSManman Ren       std::error_code EC;
102d9b948b6SFangrui Song       llvm::raw_fd_ostream OS(ClOrderFileWriteMapping, EC,
103d9b948b6SFangrui Song                               llvm::sys::fs::OF_Append);
1041829512dSManman Ren       if (EC) {
1051829512dSManman Ren         report_fatal_error(Twine("Failed to open ") + ClOrderFileWriteMapping +
1061829512dSManman Ren                            " to save mapping file for order file instrumentation\n");
1071829512dSManman Ren       } else {
1081829512dSManman Ren         std::stringstream stream;
1091829512dSManman Ren         stream << std::hex << MD5Hash(F.getName());
1101829512dSManman Ren         std::string singleLine = "MD5 " + stream.str() + " " +
1111829512dSManman Ren                                  std::string(F.getName()) + '\n';
1121829512dSManman Ren         OS << singleLine;
1131829512dSManman Ren       }
1141829512dSManman Ren     }
1151829512dSManman Ren 
1161829512dSManman Ren     BasicBlock *OrigEntry = &F.getEntryBlock();
1171829512dSManman Ren 
1181829512dSManman Ren     LLVMContext &Ctx = M.getContext();
1191829512dSManman Ren     IntegerType *Int32Ty = Type::getInt32Ty(Ctx);
1201829512dSManman Ren     IntegerType *Int8Ty = Type::getInt8Ty(Ctx);
1211829512dSManman Ren 
1221829512dSManman Ren     // Create a new entry block for instrumentation. We will check the bitmap
1231829512dSManman Ren     // in this basic block.
1241829512dSManman Ren     BasicBlock *NewEntry =
1251829512dSManman Ren         BasicBlock::Create(M.getContext(), "order_file_entry", &F, OrigEntry);
1261829512dSManman Ren     IRBuilder<> entryB(NewEntry);
1271829512dSManman Ren     // Create a basic block for updating the circular buffer.
1281829512dSManman Ren     BasicBlock *UpdateOrderFileBB =
1291829512dSManman Ren         BasicBlock::Create(M.getContext(), "order_file_set", &F, OrigEntry);
1301829512dSManman Ren     IRBuilder<> updateB(UpdateOrderFileBB);
1311829512dSManman Ren 
1321829512dSManman Ren     // Check the bitmap, if it is already 1, do nothing.
1331829512dSManman Ren     // Otherwise, set the bit, grab the index, update the buffer.
1341829512dSManman Ren     Value *IdxFlags[] = {ConstantInt::get(Int32Ty, 0),
1351829512dSManman Ren                          ConstantInt::get(Int32Ty, FuncId)};
1361829512dSManman Ren     Value *MapAddr = entryB.CreateGEP(MapTy, BitMap, IdxFlags, "");
1371829512dSManman Ren     LoadInst *loadBitMap = entryB.CreateLoad(Int8Ty, MapAddr, "");
1381829512dSManman Ren     entryB.CreateStore(ConstantInt::get(Int8Ty, 1), MapAddr);
1391829512dSManman Ren     Value *IsNotExecuted =
1401829512dSManman Ren         entryB.CreateICmpEQ(loadBitMap, ConstantInt::get(Int8Ty, 0));
1411829512dSManman Ren     entryB.CreateCondBr(IsNotExecuted, UpdateOrderFileBB, OrigEntry);
1421829512dSManman Ren 
1431829512dSManman Ren     // Fill up UpdateOrderFileBB: grab the index, update the buffer!
1441829512dSManman Ren     Value *IdxVal = updateB.CreateAtomicRMW(
1451829512dSManman Ren         AtomicRMWInst::Add, BufferIdx, ConstantInt::get(Int32Ty, 1),
14624539f1eSJames Y Knight         MaybeAlign(), AtomicOrdering::SequentiallyConsistent);
1471829512dSManman Ren     // We need to wrap around the index to fit it inside the buffer.
1481829512dSManman Ren     Value *WrappedIdx = updateB.CreateAnd(
1491829512dSManman Ren         IdxVal, ConstantInt::get(Int32Ty, INSTR_ORDER_FILE_BUFFER_MASK));
1501829512dSManman Ren     Value *BufferGEPIdx[] = {ConstantInt::get(Int32Ty, 0), WrappedIdx};
1511829512dSManman Ren     Value *BufferAddr =
1521829512dSManman Ren         updateB.CreateGEP(BufferTy, OrderFileBuffer, BufferGEPIdx, "");
1531829512dSManman Ren     updateB.CreateStore(ConstantInt::get(Type::getInt64Ty(Ctx), MD5Hash(F.getName())),
1541829512dSManman Ren                         BufferAddr);
1551829512dSManman Ren     updateB.CreateBr(OrigEntry);
1561829512dSManman Ren   }
1571829512dSManman Ren 
1581829512dSManman Ren   bool run(Module &M) {
1591829512dSManman Ren     createOrderFileData(M);
1601829512dSManman Ren 
1611829512dSManman Ren     int FuncId = 0;
1621829512dSManman Ren     for (Function &F : M) {
1631829512dSManman Ren       if (F.isDeclaration())
1641829512dSManman Ren         continue;
1651829512dSManman Ren       generateCodeSequence(M, F, FuncId);
1661829512dSManman Ren       ++FuncId;
1671829512dSManman Ren     }
1681829512dSManman Ren 
1691829512dSManman Ren     return true;
1701829512dSManman Ren   }
1711829512dSManman Ren 
1721829512dSManman Ren }; // End of InstrOrderFile struct
1731829512dSManman Ren 
1741829512dSManman Ren class InstrOrderFileLegacyPass : public ModulePass {
1751829512dSManman Ren public:
1761829512dSManman Ren   static char ID;
1771829512dSManman Ren 
1781829512dSManman Ren   InstrOrderFileLegacyPass() : ModulePass(ID) {
1791829512dSManman Ren     initializeInstrOrderFileLegacyPassPass(
1801829512dSManman Ren         *PassRegistry::getPassRegistry());
1811829512dSManman Ren   }
1821829512dSManman Ren 
1831829512dSManman Ren   bool runOnModule(Module &M) override;
1841829512dSManman Ren };
1851829512dSManman Ren 
1861829512dSManman Ren } // End anonymous namespace
1871829512dSManman Ren 
1881829512dSManman Ren bool InstrOrderFileLegacyPass::runOnModule(Module &M) {
1891829512dSManman Ren   if (skipModule(M))
1901829512dSManman Ren     return false;
1911829512dSManman Ren 
1921829512dSManman Ren   return InstrOrderFile().run(M);
1931829512dSManman Ren }
1941829512dSManman Ren 
1951829512dSManman Ren PreservedAnalyses
1961829512dSManman Ren InstrOrderFilePass::run(Module &M, ModuleAnalysisManager &AM) {
1971829512dSManman Ren   if (InstrOrderFile().run(M))
1981829512dSManman Ren     return PreservedAnalyses::none();
1991829512dSManman Ren   return PreservedAnalyses::all();
2001829512dSManman Ren }
2011829512dSManman Ren 
2021829512dSManman Ren INITIALIZE_PASS_BEGIN(InstrOrderFileLegacyPass, "instrorderfile",
2031829512dSManman Ren                       "Instrumentation for Order File", false, false)
2041829512dSManman Ren INITIALIZE_PASS_END(InstrOrderFileLegacyPass, "instrorderfile",
2051829512dSManman Ren                     "Instrumentation for Order File", false, false)
2061829512dSManman Ren 
2071829512dSManman Ren char InstrOrderFileLegacyPass::ID = 0;
2081829512dSManman Ren 
2091829512dSManman Ren ModulePass *llvm::createInstrOrderFilePass() {
2101829512dSManman Ren   return new InstrOrderFileLegacyPass();
2111829512dSManman Ren }
212