1 //===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //===----------------------------------------------------------------------===//
11 
12 #include "polly/CodeGen/PerfMonitor.h"
13 #include "polly/CodeGen/RuntimeDebugBuilder.h"
14 #include "llvm/ADT/Triple.h"
15 
16 using namespace llvm;
17 using namespace polly;
18 
19 Function *PerfMonitor::getAtExit() {
20   const char *Name = "atexit";
21   Function *F = M->getFunction(Name);
22 
23   if (!F) {
24     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
25     FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(),
26                                          {Builder.getInt8PtrTy()}, false);
27     F = Function::Create(Ty, Linkage, Name, M);
28   }
29 
30   return F;
31 }
32 
33 void PerfMonitor::addToGlobalConstructors(Function *Fn) {
34   const char *Name = "llvm.global_ctors";
35   GlobalVariable *GV = M->getGlobalVariable(Name);
36   std::vector<Constant *> V;
37 
38   if (GV) {
39     Constant *Array = GV->getInitializer();
40     for (Value *X : Array->operand_values())
41       V.push_back(cast<Constant>(X));
42     GV->eraseFromParent();
43   }
44 
45   StructType *ST = StructType::get(Builder.getInt32Ty(), Fn->getType(),
46                                    Builder.getInt8PtrTy(), nullptr);
47 
48   V.push_back(ConstantStruct::get(
49       ST, Builder.getInt32(10), Fn,
50       ConstantPointerNull::get(Builder.getInt8PtrTy()), nullptr));
51   ArrayType *Ty = ArrayType::get(ST, V.size());
52 
53   GV = new GlobalVariable(*M, Ty, true, GlobalValue::AppendingLinkage,
54                           ConstantArray::get(Ty, V), Name, nullptr,
55                           GlobalVariable::NotThreadLocal);
56 }
57 
58 Function *PerfMonitor::getRDTSCP() {
59   const char *Name = "llvm.x86.rdtscp";
60   Function *F = M->getFunction(Name);
61 
62   if (!F) {
63     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
64     FunctionType *Ty = FunctionType::get(Builder.getInt64Ty(),
65                                          {Builder.getInt8PtrTy()}, false);
66     F = Function::Create(Ty, Linkage, Name, M);
67   }
68 
69   return F;
70 }
71 
72 PerfMonitor::PerfMonitor(Module *M) : M(M), Builder(M->getContext()) {
73   if (Triple(M->getTargetTriple()).getArch() == llvm::Triple::x86_64)
74     Supported = true;
75   else
76     Supported = false;
77 }
78 
79 void PerfMonitor::addGlobalVariables() {
80   auto TryRegisterGlobal = [=](const char *Name, Constant *InitialValue,
81                                Value **Location) {
82     *Location = M->getGlobalVariable(Name);
83 
84     if (!*Location)
85       *Location = new GlobalVariable(
86           *M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage,
87           InitialValue, Name, nullptr, GlobalVariable::InitialExecTLSModel);
88   };
89 
90   TryRegisterGlobal("__polly_perf_cycles_total_start", Builder.getInt64(0),
91                     &CyclesTotalStartPtr);
92 
93   TryRegisterGlobal("__polly_perf_initialized", Builder.getInt1(0),
94                     &AlreadyInitializedPtr);
95 
96   TryRegisterGlobal("__polly_perf_cycles_in_scops", Builder.getInt64(0),
97                     &CyclesInScopsPtr);
98 
99   TryRegisterGlobal("__polly_perf_cycles_in_scop_start", Builder.getInt64(0),
100                     &CyclesInScopStartPtr);
101 
102   TryRegisterGlobal("__polly_perf_write_loation", Builder.getInt32(0),
103                     &RDTSCPWriteLocation);
104 }
105 
106 static const char *InitFunctionName = "__polly_perf_init";
107 static const char *FinalReportingFunctionName = "__polly_perf_final";
108 
109 Function *PerfMonitor::insertFinalReporting() {
110   // Create new function.
111   GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
112   FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false);
113   Function *ExitFn =
114       Function::Create(Ty, Linkage, FinalReportingFunctionName, M);
115   BasicBlock *Start = BasicBlock::Create(M->getContext(), "start", ExitFn);
116   Builder.SetInsertPoint(Start);
117 
118   if (!Supported) {
119     RuntimeDebugBuilder::createCPUPrinter(
120         Builder, "Polly runtime information generation not supported\n");
121     Builder.CreateRetVoid();
122     return ExitFn;
123   }
124 
125   // Measure current cycles and compute final timings.
126   Function *RDTSCPFn = getRDTSCP();
127   Value *CurrentCycles = Builder.CreateCall(
128       RDTSCPFn,
129       Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy()));
130   Value *CyclesStart = Builder.CreateLoad(CyclesTotalStartPtr, true);
131   Value *CyclesTotal = Builder.CreateSub(CurrentCycles, CyclesStart);
132   Value *CyclesInScops = Builder.CreateLoad(CyclesInScopsPtr, true);
133 
134   // Print the runtime information.
135   RuntimeDebugBuilder::createCPUPrinter(Builder, "Polly runtime information\n");
136   RuntimeDebugBuilder::createCPUPrinter(Builder, "-------------------------\n");
137   RuntimeDebugBuilder::createCPUPrinter(Builder, "Total: ", CyclesTotal, "\n");
138   RuntimeDebugBuilder::createCPUPrinter(Builder, "Scops: ", CyclesInScops,
139                                         "\n");
140 
141   // Finalize function.
142   Builder.CreateRetVoid();
143   return ExitFn;
144 }
145 
146 void PerfMonitor::initialize() {
147   addGlobalVariables();
148 
149   Function *F = M->getFunction(InitFunctionName);
150   if (F)
151     return;
152 
153   // initialize
154   Function *FinalReporting = insertFinalReporting();
155   Function *InitFn = insertInitFunction(FinalReporting);
156   addToGlobalConstructors(InitFn);
157 }
158 
159 Function *PerfMonitor::insertInitFunction(Function *FinalReporting) {
160   // Insert function definition and BBs.
161   GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
162   FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false);
163   Function *InitFn = Function::Create(Ty, Linkage, InitFunctionName, M);
164   BasicBlock *Start = BasicBlock::Create(M->getContext(), "start", InitFn);
165   BasicBlock *EarlyReturn =
166       BasicBlock::Create(M->getContext(), "earlyreturn", InitFn);
167   BasicBlock *InitBB = BasicBlock::Create(M->getContext(), "initbb", InitFn);
168 
169   Builder.SetInsertPoint(Start);
170 
171   // Check if this function was already run. If yes, return.
172   //
173   // In case profiling has been enabled in multiple translation units, the
174   // initializer function will be added to the global constructors list of
175   // each translation unit. When merging translation units, the global
176   // constructor lists are just appended, such that the initializer will appear
177   // multiple times. To avoid initializations being run multiple times (and
178   // especially to avoid that atExitFn is called more than once), we bail
179   // out if the intializer is run more than once.
180   Value *HasRunBefore = Builder.CreateLoad(AlreadyInitializedPtr);
181   Builder.CreateCondBr(HasRunBefore, EarlyReturn, InitBB);
182   Builder.SetInsertPoint(EarlyReturn);
183   Builder.CreateRetVoid();
184 
185   // Keep track that this function has been run once.
186   Builder.SetInsertPoint(InitBB);
187   Value *True = Builder.getInt1(true);
188   Builder.CreateStore(True, AlreadyInitializedPtr);
189 
190   // Register the final reporting function with atexit().
191   Value *FinalReportingPtr =
192       Builder.CreatePointerCast(FinalReporting, Builder.getInt8PtrTy());
193   Function *AtExitFn = getAtExit();
194   Builder.CreateCall(AtExitFn, {FinalReportingPtr});
195 
196   if (Supported) {
197     // Read the currently cycle counter and store the result for later.
198     Function *RDTSCPFn = getRDTSCP();
199     Value *CurrentCycles = Builder.CreateCall(
200         RDTSCPFn,
201         Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy()));
202     Builder.CreateStore(CurrentCycles, CyclesTotalStartPtr, true);
203   }
204   Builder.CreateRetVoid();
205 
206   return InitFn;
207 }
208 
209 void PerfMonitor::insertRegionStart(Instruction *InsertBefore) {
210   if (!Supported)
211     return;
212 
213   Builder.SetInsertPoint(InsertBefore);
214   Function *RDTSCPFn = getRDTSCP();
215   Value *CurrentCycles = Builder.CreateCall(
216       RDTSCPFn,
217       Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy()));
218   Builder.CreateStore(CurrentCycles, CyclesInScopStartPtr, true);
219 }
220 
221 void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) {
222   if (!Supported)
223     return;
224 
225   Builder.SetInsertPoint(InsertBefore);
226   Function *RDTSCPFn = getRDTSCP();
227   LoadInst *CyclesStart = Builder.CreateLoad(CyclesInScopStartPtr, true);
228   Value *CurrentCycles = Builder.CreateCall(
229       RDTSCPFn,
230       Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy()));
231   Value *CyclesInScop = Builder.CreateSub(CurrentCycles, CyclesStart);
232   Value *CyclesInScops = Builder.CreateLoad(CyclesInScopsPtr, true);
233   CyclesInScops = Builder.CreateAdd(CyclesInScops, CyclesInScop);
234   Builder.CreateStore(CyclesInScops, CyclesInScopsPtr, true);
235 }
236