1 //===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //===----------------------------------------------------------------------===//
11 
12 #include "polly/CodeGen/PerfMonitor.h"
13 #include "polly/CodeGen/RuntimeDebugBuilder.h"
14 #include "polly/ScopInfo.h"
15 #include "llvm/ADT/Triple.h"
16 #include "llvm/IR/Intrinsics.h"
17 #include <sstream>
18 
19 using namespace llvm;
20 using namespace polly;
21 
22 Function *PerfMonitor::getAtExit() {
23   const char *Name = "atexit";
24   Function *F = M->getFunction(Name);
25 
26   if (!F) {
27     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
28     FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(),
29                                          {Builder.getInt8PtrTy()}, false);
30     F = Function::Create(Ty, Linkage, Name, M);
31   }
32 
33   return F;
34 }
35 
36 void PerfMonitor::addToGlobalConstructors(Function *Fn) {
37   const char *Name = "llvm.global_ctors";
38   GlobalVariable *GV = M->getGlobalVariable(Name);
39   std::vector<Constant *> V;
40 
41   if (GV) {
42     Constant *Array = GV->getInitializer();
43     for (Value *X : Array->operand_values())
44       V.push_back(cast<Constant>(X));
45     GV->eraseFromParent();
46   }
47 
48   StructType *ST = StructType::get(Builder.getInt32Ty(), Fn->getType(),
49                                    Builder.getInt8PtrTy());
50 
51   V.push_back(
52       ConstantStruct::get(ST, Builder.getInt32(10), Fn,
53                           ConstantPointerNull::get(Builder.getInt8PtrTy())));
54   ArrayType *Ty = ArrayType::get(ST, V.size());
55 
56   GV = new GlobalVariable(*M, Ty, true, GlobalValue::AppendingLinkage,
57                           ConstantArray::get(Ty, V), Name, nullptr,
58                           GlobalVariable::NotThreadLocal);
59 }
60 
61 Function *PerfMonitor::getRDTSCP() {
62   return Intrinsic::getDeclaration(M, Intrinsic::x86_rdtscp);
63 }
64 
65 PerfMonitor::PerfMonitor(const Scop &S, Module *M)
66     : M(M), Builder(M->getContext()), S(S) {
67   if (Triple(M->getTargetTriple()).getArch() == llvm::Triple::x86_64)
68     Supported = true;
69   else
70     Supported = false;
71 }
72 
73 static void TryRegisterGlobal(Module *M, const char *Name,
74                               Constant *InitialValue, Value **Location) {
75   *Location = M->getGlobalVariable(Name);
76 
77   if (!*Location)
78     *Location = new GlobalVariable(
79         *M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage,
80         InitialValue, Name, nullptr, GlobalVariable::InitialExecTLSModel);
81 }
82 
83 // Generate a unique name that is usable as a LLVM name for a scop to name its
84 // performance counter.
85 static std::string GetScopUniqueVarname(const Scop &S) {
86   std::stringstream Name;
87   std::string EntryString, ExitString;
88   std::tie(EntryString, ExitString) = S.getEntryExitStr();
89 
90   Name << "__polly_perf_in_" << std::string(S.getFunction().getName())
91        << "_from__" << EntryString << "__to__" << ExitString;
92   return Name.str();
93 }
94 
95 void PerfMonitor::addScopCounter() {
96   const std::string varname = GetScopUniqueVarname(S);
97   TryRegisterGlobal(M, (varname + "_cycles").c_str(), Builder.getInt64(0),
98                     &CyclesInCurrentScopPtr);
99 
100   TryRegisterGlobal(M, (varname + "_trip_count").c_str(), Builder.getInt64(0),
101                     &TripCountForCurrentScopPtr);
102 }
103 
104 void PerfMonitor::addGlobalVariables() {
105   TryRegisterGlobal(M, "__polly_perf_cycles_total_start", Builder.getInt64(0),
106                     &CyclesTotalStartPtr);
107 
108   TryRegisterGlobal(M, "__polly_perf_initialized", Builder.getInt1(0),
109                     &AlreadyInitializedPtr);
110 
111   TryRegisterGlobal(M, "__polly_perf_cycles_in_scops", Builder.getInt64(0),
112                     &CyclesInScopsPtr);
113 
114   TryRegisterGlobal(M, "__polly_perf_cycles_in_scop_start", Builder.getInt64(0),
115                     &CyclesInScopStartPtr);
116 
117   TryRegisterGlobal(M, "__polly_perf_write_loation", Builder.getInt32(0),
118                     &RDTSCPWriteLocation);
119 }
120 
121 static const char *InitFunctionName = "__polly_perf_init";
122 static const char *FinalReportingFunctionName = "__polly_perf_final";
123 
124 static BasicBlock *FinalStartBB = nullptr;
125 static ReturnInst *ReturnFromFinal = nullptr;
126 
127 Function *PerfMonitor::insertFinalReporting() {
128   // Create new function.
129   GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
130   FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false);
131   Function *ExitFn =
132       Function::Create(Ty, Linkage, FinalReportingFunctionName, M);
133   FinalStartBB = BasicBlock::Create(M->getContext(), "start", ExitFn);
134   Builder.SetInsertPoint(FinalStartBB);
135 
136   if (!Supported) {
137     RuntimeDebugBuilder::createCPUPrinter(
138         Builder, "Polly runtime information generation not supported\n");
139     Builder.CreateRetVoid();
140     return ExitFn;
141   }
142 
143   // Measure current cycles and compute final timings.
144   Function *RDTSCPFn = getRDTSCP();
145   Value *CurrentCycles = Builder.CreateCall(
146       RDTSCPFn,
147       Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy()));
148   Value *CyclesStart = Builder.CreateLoad(CyclesTotalStartPtr, true);
149   Value *CyclesTotal = Builder.CreateSub(CurrentCycles, CyclesStart);
150   Value *CyclesInScops = Builder.CreateLoad(CyclesInScopsPtr, true);
151 
152   // Print the runtime information.
153   RuntimeDebugBuilder::createCPUPrinter(Builder, "Polly runtime information\n");
154   RuntimeDebugBuilder::createCPUPrinter(Builder, "-------------------------\n");
155   RuntimeDebugBuilder::createCPUPrinter(Builder, "Total: ", CyclesTotal, "\n");
156   RuntimeDebugBuilder::createCPUPrinter(Builder, "Scops: ", CyclesInScops,
157                                         "\n");
158 
159   // Print the preamble for per-scop information.
160   RuntimeDebugBuilder::createCPUPrinter(Builder, "\n");
161   RuntimeDebugBuilder::createCPUPrinter(Builder, "Per SCoP information\n");
162   RuntimeDebugBuilder::createCPUPrinter(Builder, "--------------------\n");
163 
164   RuntimeDebugBuilder::createCPUPrinter(
165       Builder, "scop function, "
166                "entry block name, exit block name, total time, trip count\n");
167   ReturnFromFinal = Builder.CreateRetVoid();
168   return ExitFn;
169 }
170 
171 void PerfMonitor::AppendScopReporting() {
172   if (!Supported)
173     return;
174 
175   assert(FinalStartBB && "Expected FinalStartBB to be initialized by "
176                          "PerfMonitor::insertFinalReporting.");
177   assert(ReturnFromFinal && "Expected ReturnFromFinal to be initialized by "
178                             "PerfMonitor::insertFinalReporting.");
179 
180   Builder.SetInsertPoint(FinalStartBB);
181   ReturnFromFinal->eraseFromParent();
182 
183   Value *CyclesInCurrentScop =
184       Builder.CreateLoad(this->CyclesInCurrentScopPtr, true);
185 
186   Value *TripCountForCurrentScop =
187       Builder.CreateLoad(this->TripCountForCurrentScopPtr, true);
188 
189   std::string EntryName, ExitName;
190   std::tie(EntryName, ExitName) = S.getEntryExitStr();
191 
192   // print in CSV for easy parsing with other tools.
193   RuntimeDebugBuilder::createCPUPrinter(
194       Builder, S.getFunction().getName(), ", ", EntryName, ", ", ExitName, ", ",
195       CyclesInCurrentScop, ", ", TripCountForCurrentScop, "\n");
196 
197   ReturnFromFinal = Builder.CreateRetVoid();
198 }
199 
200 static Function *FinalReporting = nullptr;
201 
202 void PerfMonitor::initialize() {
203   addGlobalVariables();
204   addScopCounter();
205 
206   // Ensure that we only add the final reporting function once.
207   // On later invocations, append to the reporting function.
208   if (!FinalReporting) {
209     FinalReporting = insertFinalReporting();
210 
211     Function *InitFn = insertInitFunction(FinalReporting);
212     addToGlobalConstructors(InitFn);
213   }
214 
215   AppendScopReporting();
216 }
217 
218 Function *PerfMonitor::insertInitFunction(Function *FinalReporting) {
219   // Insert function definition and BBs.
220   GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
221   FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false);
222   Function *InitFn = Function::Create(Ty, Linkage, InitFunctionName, M);
223   BasicBlock *Start = BasicBlock::Create(M->getContext(), "start", InitFn);
224   BasicBlock *EarlyReturn =
225       BasicBlock::Create(M->getContext(), "earlyreturn", InitFn);
226   BasicBlock *InitBB = BasicBlock::Create(M->getContext(), "initbb", InitFn);
227 
228   Builder.SetInsertPoint(Start);
229 
230   // Check if this function was already run. If yes, return.
231   //
232   // In case profiling has been enabled in multiple translation units, the
233   // initializer function will be added to the global constructors list of
234   // each translation unit. When merging translation units, the global
235   // constructor lists are just appended, such that the initializer will appear
236   // multiple times. To avoid initializations being run multiple times (and
237   // especially to avoid that atExitFn is called more than once), we bail
238   // out if the intializer is run more than once.
239   Value *HasRunBefore = Builder.CreateLoad(AlreadyInitializedPtr);
240   Builder.CreateCondBr(HasRunBefore, EarlyReturn, InitBB);
241   Builder.SetInsertPoint(EarlyReturn);
242   Builder.CreateRetVoid();
243 
244   // Keep track that this function has been run once.
245   Builder.SetInsertPoint(InitBB);
246   Value *True = Builder.getInt1(true);
247   Builder.CreateStore(True, AlreadyInitializedPtr);
248 
249   // Register the final reporting function with atexit().
250   Value *FinalReportingPtr =
251       Builder.CreatePointerCast(FinalReporting, Builder.getInt8PtrTy());
252   Function *AtExitFn = getAtExit();
253   Builder.CreateCall(AtExitFn, {FinalReportingPtr});
254 
255   if (Supported) {
256     // Read the currently cycle counter and store the result for later.
257     Function *RDTSCPFn = getRDTSCP();
258     Value *CurrentCycles = Builder.CreateCall(
259         RDTSCPFn,
260         Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy()));
261     Builder.CreateStore(CurrentCycles, CyclesTotalStartPtr, true);
262   }
263   Builder.CreateRetVoid();
264 
265   return InitFn;
266 }
267 
268 void PerfMonitor::insertRegionStart(Instruction *InsertBefore) {
269   if (!Supported)
270     return;
271 
272   Builder.SetInsertPoint(InsertBefore);
273   Function *RDTSCPFn = getRDTSCP();
274   Value *CurrentCycles = Builder.CreateCall(
275       RDTSCPFn,
276       Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy()));
277   Builder.CreateStore(CurrentCycles, CyclesInScopStartPtr, true);
278 }
279 
280 void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) {
281   if (!Supported)
282     return;
283 
284   Builder.SetInsertPoint(InsertBefore);
285   Function *RDTSCPFn = getRDTSCP();
286   LoadInst *CyclesStart = Builder.CreateLoad(CyclesInScopStartPtr, true);
287   Value *CurrentCycles = Builder.CreateCall(
288       RDTSCPFn,
289       Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy()));
290   Value *CyclesInScop = Builder.CreateSub(CurrentCycles, CyclesStart);
291   Value *CyclesInScops = Builder.CreateLoad(CyclesInScopsPtr, true);
292   CyclesInScops = Builder.CreateAdd(CyclesInScops, CyclesInScop);
293   Builder.CreateStore(CyclesInScops, CyclesInScopsPtr, true);
294 
295   Value *CyclesInCurrentScop = Builder.CreateLoad(CyclesInCurrentScopPtr, true);
296   CyclesInCurrentScop = Builder.CreateAdd(CyclesInCurrentScop, CyclesInScop);
297   Builder.CreateStore(CyclesInCurrentScop, CyclesInCurrentScopPtr, true);
298 
299   Value *TripCountForCurrentScop =
300       Builder.CreateLoad(TripCountForCurrentScopPtr, true);
301   TripCountForCurrentScop =
302       Builder.CreateAdd(TripCountForCurrentScop, Builder.getInt64(1));
303   Builder.CreateStore(TripCountForCurrentScop, TripCountForCurrentScopPtr,
304                       true);
305 }
306