1 //===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 //===----------------------------------------------------------------------===// 10 11 #include "polly/CodeGen/PerfMonitor.h" 12 #include "polly/CodeGen/RuntimeDebugBuilder.h" 13 #include "polly/ScopInfo.h" 14 #include "llvm/ADT/Triple.h" 15 #include "llvm/ADT/Twine.h" 16 #include "llvm/IR/IntrinsicsX86.h" 17 18 using namespace llvm; 19 using namespace polly; 20 21 Function *PerfMonitor::getAtExit() { 22 const char *Name = "atexit"; 23 Function *F = M->getFunction(Name); 24 25 if (!F) { 26 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 27 FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), 28 {Builder.getInt8PtrTy()}, false); 29 F = Function::Create(Ty, Linkage, Name, M); 30 } 31 32 return F; 33 } 34 35 void PerfMonitor::addToGlobalConstructors(Function *Fn) { 36 const char *Name = "llvm.global_ctors"; 37 GlobalVariable *GV = M->getGlobalVariable(Name); 38 std::vector<Constant *> V; 39 40 if (GV) { 41 Constant *Array = GV->getInitializer(); 42 for (Value *X : Array->operand_values()) 43 V.push_back(cast<Constant>(X)); 44 GV->eraseFromParent(); 45 } 46 47 StructType *ST = StructType::get(Builder.getInt32Ty(), Fn->getType(), 48 Builder.getInt8PtrTy()); 49 50 V.push_back( 51 ConstantStruct::get(ST, Builder.getInt32(10), Fn, 52 ConstantPointerNull::get(Builder.getInt8PtrTy()))); 53 ArrayType *Ty = ArrayType::get(ST, V.size()); 54 55 GV = new GlobalVariable(*M, Ty, true, GlobalValue::AppendingLinkage, 56 ConstantArray::get(Ty, V), Name, nullptr, 57 GlobalVariable::NotThreadLocal); 58 } 59 60 Function *PerfMonitor::getRDTSCP() { 61 return Intrinsic::getDeclaration(M, Intrinsic::x86_rdtscp); 62 } 63 64 PerfMonitor::PerfMonitor(const Scop &S, Module *M) 65 : M(M), Builder(M->getContext()), S(S) { 66 if (Triple(M->getTargetTriple()).getArch() == llvm::Triple::x86_64) 67 Supported = true; 68 else 69 Supported = false; 70 } 71 72 static void TryRegisterGlobal(Module *M, const char *Name, 73 Constant *InitialValue, Value **Location) { 74 *Location = M->getGlobalVariable(Name); 75 76 if (!*Location) 77 *Location = new GlobalVariable( 78 *M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage, 79 InitialValue, Name, nullptr, GlobalVariable::InitialExecTLSModel); 80 } 81 82 // Generate a unique name that is usable as a LLVM name for a scop to name its 83 // performance counter. 84 static std::string GetScopUniqueVarname(const Scop &S) { 85 std::string EntryString, ExitString; 86 std::tie(EntryString, ExitString) = S.getEntryExitStr(); 87 88 return (Twine("__polly_perf_in_") + S.getFunction().getName() + "_from__" + 89 EntryString + "__to__" + ExitString) 90 .str(); 91 } 92 93 void PerfMonitor::addScopCounter() { 94 const std::string varname = GetScopUniqueVarname(S); 95 TryRegisterGlobal(M, (varname + "_cycles").c_str(), Builder.getInt64(0), 96 &CyclesInCurrentScopPtr); 97 98 TryRegisterGlobal(M, (varname + "_trip_count").c_str(), Builder.getInt64(0), 99 &TripCountForCurrentScopPtr); 100 } 101 102 void PerfMonitor::addGlobalVariables() { 103 TryRegisterGlobal(M, "__polly_perf_cycles_total_start", Builder.getInt64(0), 104 &CyclesTotalStartPtr); 105 106 TryRegisterGlobal(M, "__polly_perf_initialized", Builder.getInt1(false), 107 &AlreadyInitializedPtr); 108 109 TryRegisterGlobal(M, "__polly_perf_cycles_in_scops", Builder.getInt64(0), 110 &CyclesInScopsPtr); 111 112 TryRegisterGlobal(M, "__polly_perf_cycles_in_scop_start", Builder.getInt64(0), 113 &CyclesInScopStartPtr); 114 } 115 116 static const char *InitFunctionName = "__polly_perf_init"; 117 static const char *FinalReportingFunctionName = "__polly_perf_final"; 118 119 static BasicBlock *FinalStartBB = nullptr; 120 static ReturnInst *ReturnFromFinal = nullptr; 121 122 Function *PerfMonitor::insertFinalReporting() { 123 // Create new function. 124 GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage; 125 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false); 126 Function *ExitFn = 127 Function::Create(Ty, Linkage, FinalReportingFunctionName, M); 128 FinalStartBB = BasicBlock::Create(M->getContext(), "start", ExitFn); 129 Builder.SetInsertPoint(FinalStartBB); 130 131 if (!Supported) { 132 RuntimeDebugBuilder::createCPUPrinter( 133 Builder, "Polly runtime information generation not supported\n"); 134 Builder.CreateRetVoid(); 135 return ExitFn; 136 } 137 138 // Measure current cycles and compute final timings. 139 Function *RDTSCPFn = getRDTSCP(); 140 141 Type *Int64Ty = Builder.getInt64Ty(); 142 Value *CurrentCycles = 143 Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0}); 144 Value *CyclesStart = Builder.CreateLoad(Int64Ty, CyclesTotalStartPtr, true); 145 Value *CyclesTotal = Builder.CreateSub(CurrentCycles, CyclesStart); 146 Value *CyclesInScops = Builder.CreateLoad(Int64Ty, CyclesInScopsPtr, true); 147 148 // Print the runtime information. 149 RuntimeDebugBuilder::createCPUPrinter(Builder, "Polly runtime information\n"); 150 RuntimeDebugBuilder::createCPUPrinter(Builder, "-------------------------\n"); 151 RuntimeDebugBuilder::createCPUPrinter(Builder, "Total: ", CyclesTotal, "\n"); 152 RuntimeDebugBuilder::createCPUPrinter(Builder, "Scops: ", CyclesInScops, 153 "\n"); 154 155 // Print the preamble for per-scop information. 156 RuntimeDebugBuilder::createCPUPrinter(Builder, "\n"); 157 RuntimeDebugBuilder::createCPUPrinter(Builder, "Per SCoP information\n"); 158 RuntimeDebugBuilder::createCPUPrinter(Builder, "--------------------\n"); 159 160 RuntimeDebugBuilder::createCPUPrinter( 161 Builder, "scop function, " 162 "entry block name, exit block name, total time, trip count\n"); 163 ReturnFromFinal = Builder.CreateRetVoid(); 164 return ExitFn; 165 } 166 167 void PerfMonitor::AppendScopReporting() { 168 if (!Supported) 169 return; 170 171 assert(FinalStartBB && "Expected FinalStartBB to be initialized by " 172 "PerfMonitor::insertFinalReporting."); 173 assert(ReturnFromFinal && "Expected ReturnFromFinal to be initialized by " 174 "PerfMonitor::insertFinalReporting."); 175 176 Builder.SetInsertPoint(FinalStartBB); 177 ReturnFromFinal->eraseFromParent(); 178 179 Type *Int64Ty = Builder.getInt64Ty(); 180 Value *CyclesInCurrentScop = 181 Builder.CreateLoad(Int64Ty, this->CyclesInCurrentScopPtr, true); 182 183 Value *TripCountForCurrentScop = 184 Builder.CreateLoad(Int64Ty, this->TripCountForCurrentScopPtr, true); 185 186 std::string EntryName, ExitName; 187 std::tie(EntryName, ExitName) = S.getEntryExitStr(); 188 189 // print in CSV for easy parsing with other tools. 190 RuntimeDebugBuilder::createCPUPrinter( 191 Builder, S.getFunction().getName(), ", ", EntryName, ", ", ExitName, ", ", 192 CyclesInCurrentScop, ", ", TripCountForCurrentScop, "\n"); 193 194 ReturnFromFinal = Builder.CreateRetVoid(); 195 } 196 197 static Function *FinalReporting = nullptr; 198 199 void PerfMonitor::initialize() { 200 addGlobalVariables(); 201 addScopCounter(); 202 203 // Ensure that we only add the final reporting function once. 204 // On later invocations, append to the reporting function. 205 if (!FinalReporting) { 206 FinalReporting = insertFinalReporting(); 207 208 Function *InitFn = insertInitFunction(FinalReporting); 209 addToGlobalConstructors(InitFn); 210 } 211 212 AppendScopReporting(); 213 } 214 215 Function *PerfMonitor::insertInitFunction(Function *FinalReporting) { 216 // Insert function definition and BBs. 217 GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage; 218 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false); 219 Function *InitFn = Function::Create(Ty, Linkage, InitFunctionName, M); 220 BasicBlock *Start = BasicBlock::Create(M->getContext(), "start", InitFn); 221 BasicBlock *EarlyReturn = 222 BasicBlock::Create(M->getContext(), "earlyreturn", InitFn); 223 BasicBlock *InitBB = BasicBlock::Create(M->getContext(), "initbb", InitFn); 224 225 Builder.SetInsertPoint(Start); 226 227 // Check if this function was already run. If yes, return. 228 // 229 // In case profiling has been enabled in multiple translation units, the 230 // initializer function will be added to the global constructors list of 231 // each translation unit. When merging translation units, the global 232 // constructor lists are just appended, such that the initializer will appear 233 // multiple times. To avoid initializations being run multiple times (and 234 // especially to avoid that atExitFn is called more than once), we bail 235 // out if the initializer is run more than once. 236 Value *HasRunBefore = 237 Builder.CreateLoad(Builder.getInt1Ty(), AlreadyInitializedPtr); 238 Builder.CreateCondBr(HasRunBefore, EarlyReturn, InitBB); 239 Builder.SetInsertPoint(EarlyReturn); 240 Builder.CreateRetVoid(); 241 242 // Keep track that this function has been run once. 243 Builder.SetInsertPoint(InitBB); 244 Value *True = Builder.getInt1(true); 245 Builder.CreateStore(True, AlreadyInitializedPtr); 246 247 // Register the final reporting function with atexit(). 248 Value *FinalReportingPtr = 249 Builder.CreatePointerCast(FinalReporting, Builder.getInt8PtrTy()); 250 Function *AtExitFn = getAtExit(); 251 Builder.CreateCall(AtExitFn, {FinalReportingPtr}); 252 253 if (Supported) { 254 // Read the currently cycle counter and store the result for later. 255 Function *RDTSCPFn = getRDTSCP(); 256 Value *CurrentCycles = 257 Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0}); 258 Builder.CreateStore(CurrentCycles, CyclesTotalStartPtr, true); 259 } 260 Builder.CreateRetVoid(); 261 262 return InitFn; 263 } 264 265 void PerfMonitor::insertRegionStart(Instruction *InsertBefore) { 266 if (!Supported) 267 return; 268 269 Builder.SetInsertPoint(InsertBefore); 270 Function *RDTSCPFn = getRDTSCP(); 271 Value *CurrentCycles = 272 Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0}); 273 Builder.CreateStore(CurrentCycles, CyclesInScopStartPtr, true); 274 } 275 276 void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) { 277 if (!Supported) 278 return; 279 280 Builder.SetInsertPoint(InsertBefore); 281 Function *RDTSCPFn = getRDTSCP(); 282 Type *Int64Ty = Builder.getInt64Ty(); 283 LoadInst *CyclesStart = 284 Builder.CreateLoad(Int64Ty, CyclesInScopStartPtr, true); 285 Value *CurrentCycles = 286 Builder.CreateExtractValue(Builder.CreateCall(RDTSCPFn), {0}); 287 Value *CyclesInScop = Builder.CreateSub(CurrentCycles, CyclesStart); 288 Value *CyclesInScops = Builder.CreateLoad(Int64Ty, CyclesInScopsPtr, true); 289 CyclesInScops = Builder.CreateAdd(CyclesInScops, CyclesInScop); 290 Builder.CreateStore(CyclesInScops, CyclesInScopsPtr, true); 291 292 Value *CyclesInCurrentScop = 293 Builder.CreateLoad(Int64Ty, CyclesInCurrentScopPtr, true); 294 CyclesInCurrentScop = Builder.CreateAdd(CyclesInCurrentScop, CyclesInScop); 295 Builder.CreateStore(CyclesInCurrentScop, CyclesInCurrentScopPtr, true); 296 297 Value *TripCountForCurrentScop = 298 Builder.CreateLoad(Int64Ty, TripCountForCurrentScopPtr, true); 299 TripCountForCurrentScop = 300 Builder.CreateAdd(TripCountForCurrentScop, Builder.getInt64(1)); 301 Builder.CreateStore(TripCountForCurrentScop, TripCountForCurrentScopPtr, 302 true); 303 } 304