1 //===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 //===----------------------------------------------------------------------===// 11 12 #include "polly/CodeGen/PerfMonitor.h" 13 #include "polly/CodeGen/RuntimeDebugBuilder.h" 14 #include "llvm/ADT/Triple.h" 15 16 using namespace llvm; 17 using namespace polly; 18 19 Function *PerfMonitor::getAtExit() { 20 const char *Name = "atexit"; 21 Function *F = M->getFunction(Name); 22 23 if (!F) { 24 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 25 FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), 26 {Builder.getInt8PtrTy()}, false); 27 F = Function::Create(Ty, Linkage, Name, M); 28 } 29 30 return F; 31 } 32 33 void PerfMonitor::addToGlobalConstructors(Function *Fn) { 34 const char *Name = "llvm.global_ctors"; 35 GlobalVariable *GV = M->getGlobalVariable(Name); 36 std::vector<Constant *> V; 37 38 if (GV) { 39 Constant *Array = GV->getInitializer(); 40 for (Value *X : Array->operand_values()) 41 V.push_back(cast<Constant>(X)); 42 GV->eraseFromParent(); 43 } 44 45 StructType *ST = StructType::get(Builder.getInt32Ty(), Fn->getType(), 46 Builder.getInt8PtrTy(), nullptr); 47 48 V.push_back(ConstantStruct::get( 49 ST, Builder.getInt32(10), Fn, 50 ConstantPointerNull::get(Builder.getInt8PtrTy()), nullptr)); 51 ArrayType *Ty = ArrayType::get(ST, V.size()); 52 53 GV = new GlobalVariable(*M, Ty, true, GlobalValue::AppendingLinkage, 54 ConstantArray::get(Ty, V), Name, nullptr, 55 GlobalVariable::NotThreadLocal); 56 } 57 58 Function *PerfMonitor::getRDTSCP() { 59 const char *Name = "llvm.x86.rdtscp"; 60 Function *F = M->getFunction(Name); 61 62 if (!F) { 63 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 64 FunctionType *Ty = FunctionType::get(Builder.getInt64Ty(), 65 {Builder.getInt8PtrTy()}, false); 66 F = Function::Create(Ty, Linkage, Name, M); 67 } 68 69 return F; 70 } 71 72 PerfMonitor::PerfMonitor(Module *M) : M(M), Builder(M->getContext()) { 73 if (Triple(M->getTargetTriple()).getArch() == llvm::Triple::x86_64) 74 Supported = true; 75 else 76 Supported = false; 77 } 78 79 void PerfMonitor::addGlobalVariables() { 80 auto TryRegisterGlobal = [=](const char *Name, Constant *InitialValue, 81 Value **Location) { 82 *Location = M->getGlobalVariable(Name); 83 84 if (!*Location) 85 *Location = new GlobalVariable( 86 *M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage, 87 InitialValue, Name, nullptr, GlobalVariable::InitialExecTLSModel); 88 }; 89 90 TryRegisterGlobal("__polly_perf_cycles_total_start", Builder.getInt64(0), 91 &CyclesTotalStartPtr); 92 93 TryRegisterGlobal("__polly_perf_initialized", Builder.getInt1(0), 94 &AlreadyInitializedPtr); 95 96 TryRegisterGlobal("__polly_perf_cycles_in_scops", Builder.getInt64(0), 97 &CyclesInScopsPtr); 98 99 TryRegisterGlobal("__polly_perf_cycles_in_scop_start", Builder.getInt64(0), 100 &CyclesInScopStartPtr); 101 102 TryRegisterGlobal("__polly_perf_write_loation", Builder.getInt32(0), 103 &RDTSCPWriteLocation); 104 } 105 106 static const char *InitFunctionName = "__polly_perf_init"; 107 static const char *FinalReportingFunctionName = "__polly_perf_final"; 108 109 Function *PerfMonitor::insertFinalReporting() { 110 // Create new function. 111 GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage; 112 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false); 113 Function *ExitFn = 114 Function::Create(Ty, Linkage, FinalReportingFunctionName, M); 115 BasicBlock *Start = BasicBlock::Create(M->getContext(), "start", ExitFn); 116 Builder.SetInsertPoint(Start); 117 118 if (!Supported) { 119 RuntimeDebugBuilder::createCPUPrinter( 120 Builder, "Polly runtime information generation not supported\n"); 121 Builder.CreateRetVoid(); 122 return ExitFn; 123 } 124 125 // Measure current cycles and compute final timings. 126 Function *RDTSCPFn = getRDTSCP(); 127 Value *CurrentCycles = Builder.CreateCall( 128 RDTSCPFn, 129 Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy())); 130 Value *CyclesStart = Builder.CreateLoad(CyclesTotalStartPtr, true); 131 Value *CyclesTotal = Builder.CreateSub(CurrentCycles, CyclesStart); 132 Value *CyclesInScops = Builder.CreateLoad(CyclesInScopsPtr, true); 133 134 // Print the runtime information. 135 RuntimeDebugBuilder::createCPUPrinter(Builder, "Polly runtime information\n"); 136 RuntimeDebugBuilder::createCPUPrinter(Builder, "-------------------------\n"); 137 RuntimeDebugBuilder::createCPUPrinter(Builder, "Total: ", CyclesTotal, "\n"); 138 RuntimeDebugBuilder::createCPUPrinter(Builder, "Scops: ", CyclesInScops, 139 "\n"); 140 141 // Finalize function. 142 Builder.CreateRetVoid(); 143 return ExitFn; 144 } 145 146 void PerfMonitor::initialize() { 147 addGlobalVariables(); 148 149 Function *F = M->getFunction(InitFunctionName); 150 if (F) 151 return; 152 153 // initialize 154 Function *FinalReporting = insertFinalReporting(); 155 Function *InitFn = insertInitFunction(FinalReporting); 156 addToGlobalConstructors(InitFn); 157 } 158 159 Function *PerfMonitor::insertInitFunction(Function *FinalReporting) { 160 // Insert function definition and BBs. 161 GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage; 162 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false); 163 Function *InitFn = Function::Create(Ty, Linkage, InitFunctionName, M); 164 BasicBlock *Start = BasicBlock::Create(M->getContext(), "start", InitFn); 165 BasicBlock *EarlyReturn = 166 BasicBlock::Create(M->getContext(), "earlyreturn", InitFn); 167 BasicBlock *InitBB = BasicBlock::Create(M->getContext(), "initbb", InitFn); 168 169 Builder.SetInsertPoint(Start); 170 171 // Check if this function was already run. If yes, return. 172 // 173 // In case profiling has been enabled in multiple translation units, the 174 // initializer function will be added to the global constructors list of 175 // each translation unit. When merging translation units, the global 176 // constructor lists are just appended, such that the initializer will appear 177 // multiple times. To avoid initializations being run multiple times (and 178 // especially to avoid that atExitFn is called more than once), we bail 179 // out if the intializer is run more than once. 180 Value *HasRunBefore = Builder.CreateLoad(AlreadyInitializedPtr); 181 Builder.CreateCondBr(HasRunBefore, EarlyReturn, InitBB); 182 Builder.SetInsertPoint(EarlyReturn); 183 Builder.CreateRetVoid(); 184 185 // Keep track that this function has been run once. 186 Builder.SetInsertPoint(InitBB); 187 Value *True = Builder.getInt1(true); 188 Builder.CreateStore(True, AlreadyInitializedPtr); 189 190 // Register the final reporting function with atexit(). 191 Value *FinalReportingPtr = 192 Builder.CreatePointerCast(FinalReporting, Builder.getInt8PtrTy()); 193 Function *AtExitFn = getAtExit(); 194 Builder.CreateCall(AtExitFn, {FinalReportingPtr}); 195 196 if (Supported) { 197 // Read the currently cycle counter and store the result for later. 198 Function *RDTSCPFn = getRDTSCP(); 199 Value *CurrentCycles = Builder.CreateCall( 200 RDTSCPFn, 201 Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy())); 202 Builder.CreateStore(CurrentCycles, CyclesTotalStartPtr, true); 203 } 204 Builder.CreateRetVoid(); 205 206 return InitFn; 207 } 208 209 void PerfMonitor::insertRegionStart(Instruction *InsertBefore) { 210 if (!Supported) 211 return; 212 213 Builder.SetInsertPoint(InsertBefore); 214 Function *RDTSCPFn = getRDTSCP(); 215 Value *CurrentCycles = Builder.CreateCall( 216 RDTSCPFn, 217 Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy())); 218 Builder.CreateStore(CurrentCycles, CyclesInScopStartPtr, true); 219 } 220 221 void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) { 222 if (!Supported) 223 return; 224 225 Builder.SetInsertPoint(InsertBefore); 226 Function *RDTSCPFn = getRDTSCP(); 227 LoadInst *CyclesStart = Builder.CreateLoad(CyclesInScopStartPtr, true); 228 Value *CurrentCycles = Builder.CreateCall( 229 RDTSCPFn, 230 Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy())); 231 Value *CyclesInScop = Builder.CreateSub(CurrentCycles, CyclesStart); 232 Value *CyclesInScops = Builder.CreateLoad(CyclesInScopsPtr, true); 233 CyclesInScops = Builder.CreateAdd(CyclesInScops, CyclesInScop); 234 Builder.CreateStore(CyclesInScops, CyclesInScopsPtr, true); 235 } 236