1 //===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 //===----------------------------------------------------------------------===// 11 12 #include "polly/CodeGen/PerfMonitor.h" 13 #include "polly/CodeGen/RuntimeDebugBuilder.h" 14 #include "polly/ScopInfo.h" 15 #include "llvm/ADT/Triple.h" 16 #include "llvm/IR/Intrinsics.h" 17 #include <sstream> 18 19 using namespace llvm; 20 using namespace polly; 21 22 Function *PerfMonitor::getAtExit() { 23 const char *Name = "atexit"; 24 Function *F = M->getFunction(Name); 25 26 if (!F) { 27 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 28 FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), 29 {Builder.getInt8PtrTy()}, false); 30 F = Function::Create(Ty, Linkage, Name, M); 31 } 32 33 return F; 34 } 35 36 void PerfMonitor::addToGlobalConstructors(Function *Fn) { 37 const char *Name = "llvm.global_ctors"; 38 GlobalVariable *GV = M->getGlobalVariable(Name); 39 std::vector<Constant *> V; 40 41 if (GV) { 42 Constant *Array = GV->getInitializer(); 43 for (Value *X : Array->operand_values()) 44 V.push_back(cast<Constant>(X)); 45 GV->eraseFromParent(); 46 } 47 48 StructType *ST = StructType::get(Builder.getInt32Ty(), Fn->getType(), 49 Builder.getInt8PtrTy()); 50 51 V.push_back( 52 ConstantStruct::get(ST, Builder.getInt32(10), Fn, 53 ConstantPointerNull::get(Builder.getInt8PtrTy()))); 54 ArrayType *Ty = ArrayType::get(ST, V.size()); 55 56 GV = new GlobalVariable(*M, Ty, true, GlobalValue::AppendingLinkage, 57 ConstantArray::get(Ty, V), Name, nullptr, 58 GlobalVariable::NotThreadLocal); 59 } 60 61 Function *PerfMonitor::getRDTSCP() { 62 return Intrinsic::getDeclaration(M, Intrinsic::x86_rdtscp); 63 } 64 65 PerfMonitor::PerfMonitor(const Scop &S, Module *M) 66 : M(M), Builder(M->getContext()), S(S) { 67 if (Triple(M->getTargetTriple()).getArch() == llvm::Triple::x86_64) 68 Supported = true; 69 else 70 Supported = false; 71 } 72 73 static void TryRegisterGlobal(Module *M, const char *Name, 74 Constant *InitialValue, Value **Location) { 75 *Location = M->getGlobalVariable(Name); 76 77 if (!*Location) 78 *Location = new GlobalVariable( 79 *M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage, 80 InitialValue, Name, nullptr, GlobalVariable::InitialExecTLSModel); 81 } 82 83 // Generate a unique name that is usable as a LLVM name for a scop to name its 84 // performance counter. 85 static std::string GetScopUniqueVarname(const Scop &S) { 86 std::stringstream Name; 87 std::string EntryString, ExitString; 88 std::tie(EntryString, ExitString) = S.getEntryExitStr(); 89 90 Name << "__polly_perf_in_" << std::string(S.getFunction().getName()) 91 << "_from__" << EntryString << "__to__" << ExitString; 92 return Name.str(); 93 } 94 95 void PerfMonitor::addScopCounter() { 96 const std::string varname = GetScopUniqueVarname(S); 97 TryRegisterGlobal(M, (varname + "_cycles").c_str(), Builder.getInt64(0), 98 &CyclesInCurrentScopPtr); 99 100 TryRegisterGlobal(M, (varname + "_trip_count").c_str(), Builder.getInt64(0), 101 &TripCountForCurrentScopPtr); 102 } 103 104 void PerfMonitor::addGlobalVariables() { 105 TryRegisterGlobal(M, "__polly_perf_cycles_total_start", Builder.getInt64(0), 106 &CyclesTotalStartPtr); 107 108 TryRegisterGlobal(M, "__polly_perf_initialized", Builder.getInt1(0), 109 &AlreadyInitializedPtr); 110 111 TryRegisterGlobal(M, "__polly_perf_cycles_in_scops", Builder.getInt64(0), 112 &CyclesInScopsPtr); 113 114 TryRegisterGlobal(M, "__polly_perf_cycles_in_scop_start", Builder.getInt64(0), 115 &CyclesInScopStartPtr); 116 117 TryRegisterGlobal(M, "__polly_perf_write_loation", Builder.getInt32(0), 118 &RDTSCPWriteLocation); 119 } 120 121 static const char *InitFunctionName = "__polly_perf_init"; 122 static const char *FinalReportingFunctionName = "__polly_perf_final"; 123 124 static BasicBlock *FinalStartBB = nullptr; 125 static ReturnInst *ReturnFromFinal = nullptr; 126 127 Function *PerfMonitor::insertFinalReporting() { 128 // Create new function. 129 GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage; 130 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false); 131 Function *ExitFn = 132 Function::Create(Ty, Linkage, FinalReportingFunctionName, M); 133 FinalStartBB = BasicBlock::Create(M->getContext(), "start", ExitFn); 134 Builder.SetInsertPoint(FinalStartBB); 135 136 if (!Supported) { 137 RuntimeDebugBuilder::createCPUPrinter( 138 Builder, "Polly runtime information generation not supported\n"); 139 Builder.CreateRetVoid(); 140 return ExitFn; 141 } 142 143 // Measure current cycles and compute final timings. 144 Function *RDTSCPFn = getRDTSCP(); 145 Value *CurrentCycles = Builder.CreateCall( 146 RDTSCPFn, 147 Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy())); 148 Value *CyclesStart = Builder.CreateLoad(CyclesTotalStartPtr, true); 149 Value *CyclesTotal = Builder.CreateSub(CurrentCycles, CyclesStart); 150 Value *CyclesInScops = Builder.CreateLoad(CyclesInScopsPtr, true); 151 152 // Print the runtime information. 153 RuntimeDebugBuilder::createCPUPrinter(Builder, "Polly runtime information\n"); 154 RuntimeDebugBuilder::createCPUPrinter(Builder, "-------------------------\n"); 155 RuntimeDebugBuilder::createCPUPrinter(Builder, "Total: ", CyclesTotal, "\n"); 156 RuntimeDebugBuilder::createCPUPrinter(Builder, "Scops: ", CyclesInScops, 157 "\n"); 158 159 // Print the preamble for per-scop information. 160 RuntimeDebugBuilder::createCPUPrinter(Builder, "\n"); 161 RuntimeDebugBuilder::createCPUPrinter(Builder, "Per SCoP information\n"); 162 RuntimeDebugBuilder::createCPUPrinter(Builder, "--------------------\n"); 163 164 RuntimeDebugBuilder::createCPUPrinter( 165 Builder, "scop function, " 166 "entry block name, exit block name, total time, trip count\n"); 167 ReturnFromFinal = Builder.CreateRetVoid(); 168 return ExitFn; 169 } 170 171 void PerfMonitor::AppendScopReporting() { 172 if (!Supported) 173 return; 174 175 assert(FinalStartBB && "Expected FinalStartBB to be initialized by " 176 "PerfMonitor::insertFinalReporting."); 177 assert(ReturnFromFinal && "Expected ReturnFromFinal to be initialized by " 178 "PerfMonitor::insertFinalReporting."); 179 180 Builder.SetInsertPoint(FinalStartBB); 181 ReturnFromFinal->eraseFromParent(); 182 183 Value *CyclesInCurrentScop = 184 Builder.CreateLoad(this->CyclesInCurrentScopPtr, true); 185 186 Value *TripCountForCurrentScop = 187 Builder.CreateLoad(this->TripCountForCurrentScopPtr, true); 188 189 std::string EntryName, ExitName; 190 std::tie(EntryName, ExitName) = S.getEntryExitStr(); 191 192 // print in CSV for easy parsing with other tools. 193 RuntimeDebugBuilder::createCPUPrinter( 194 Builder, S.getFunction().getName(), ", ", EntryName, ", ", ExitName, ", ", 195 CyclesInCurrentScop, ", ", TripCountForCurrentScop, "\n"); 196 197 ReturnFromFinal = Builder.CreateRetVoid(); 198 } 199 200 static Function *FinalReporting = nullptr; 201 202 void PerfMonitor::initialize() { 203 addGlobalVariables(); 204 addScopCounter(); 205 206 // Ensure that we only add the final reporting function once. 207 // On later invocations, append to the reporting function. 208 if (!FinalReporting) { 209 FinalReporting = insertFinalReporting(); 210 211 Function *InitFn = insertInitFunction(FinalReporting); 212 addToGlobalConstructors(InitFn); 213 } 214 215 AppendScopReporting(); 216 } 217 218 Function *PerfMonitor::insertInitFunction(Function *FinalReporting) { 219 // Insert function definition and BBs. 220 GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage; 221 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false); 222 Function *InitFn = Function::Create(Ty, Linkage, InitFunctionName, M); 223 BasicBlock *Start = BasicBlock::Create(M->getContext(), "start", InitFn); 224 BasicBlock *EarlyReturn = 225 BasicBlock::Create(M->getContext(), "earlyreturn", InitFn); 226 BasicBlock *InitBB = BasicBlock::Create(M->getContext(), "initbb", InitFn); 227 228 Builder.SetInsertPoint(Start); 229 230 // Check if this function was already run. If yes, return. 231 // 232 // In case profiling has been enabled in multiple translation units, the 233 // initializer function will be added to the global constructors list of 234 // each translation unit. When merging translation units, the global 235 // constructor lists are just appended, such that the initializer will appear 236 // multiple times. To avoid initializations being run multiple times (and 237 // especially to avoid that atExitFn is called more than once), we bail 238 // out if the intializer is run more than once. 239 Value *HasRunBefore = Builder.CreateLoad(AlreadyInitializedPtr); 240 Builder.CreateCondBr(HasRunBefore, EarlyReturn, InitBB); 241 Builder.SetInsertPoint(EarlyReturn); 242 Builder.CreateRetVoid(); 243 244 // Keep track that this function has been run once. 245 Builder.SetInsertPoint(InitBB); 246 Value *True = Builder.getInt1(true); 247 Builder.CreateStore(True, AlreadyInitializedPtr); 248 249 // Register the final reporting function with atexit(). 250 Value *FinalReportingPtr = 251 Builder.CreatePointerCast(FinalReporting, Builder.getInt8PtrTy()); 252 Function *AtExitFn = getAtExit(); 253 Builder.CreateCall(AtExitFn, {FinalReportingPtr}); 254 255 if (Supported) { 256 // Read the currently cycle counter and store the result for later. 257 Function *RDTSCPFn = getRDTSCP(); 258 Value *CurrentCycles = Builder.CreateCall( 259 RDTSCPFn, 260 Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy())); 261 Builder.CreateStore(CurrentCycles, CyclesTotalStartPtr, true); 262 } 263 Builder.CreateRetVoid(); 264 265 return InitFn; 266 } 267 268 void PerfMonitor::insertRegionStart(Instruction *InsertBefore) { 269 if (!Supported) 270 return; 271 272 Builder.SetInsertPoint(InsertBefore); 273 Function *RDTSCPFn = getRDTSCP(); 274 Value *CurrentCycles = Builder.CreateCall( 275 RDTSCPFn, 276 Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy())); 277 Builder.CreateStore(CurrentCycles, CyclesInScopStartPtr, true); 278 } 279 280 void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) { 281 if (!Supported) 282 return; 283 284 Builder.SetInsertPoint(InsertBefore); 285 Function *RDTSCPFn = getRDTSCP(); 286 LoadInst *CyclesStart = Builder.CreateLoad(CyclesInScopStartPtr, true); 287 Value *CurrentCycles = Builder.CreateCall( 288 RDTSCPFn, 289 Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy())); 290 Value *CyclesInScop = Builder.CreateSub(CurrentCycles, CyclesStart); 291 Value *CyclesInScops = Builder.CreateLoad(CyclesInScopsPtr, true); 292 CyclesInScops = Builder.CreateAdd(CyclesInScops, CyclesInScop); 293 Builder.CreateStore(CyclesInScops, CyclesInScopsPtr, true); 294 295 Value *CyclesInCurrentScop = Builder.CreateLoad(CyclesInCurrentScopPtr, true); 296 CyclesInCurrentScop = Builder.CreateAdd(CyclesInCurrentScop, CyclesInScop); 297 Builder.CreateStore(CyclesInCurrentScop, CyclesInCurrentScopPtr, true); 298 299 Value *TripCountForCurrentScop = 300 Builder.CreateLoad(TripCountForCurrentScopPtr, true); 301 TripCountForCurrentScop = 302 Builder.CreateAdd(TripCountForCurrentScop, Builder.getInt64(1)); 303 Builder.CreateStore(TripCountForCurrentScop, TripCountForCurrentScopPtr, 304 true); 305 } 306