1 //===------ PerfMonitor.cpp - Generate a run-time performance monitor. -======//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //===----------------------------------------------------------------------===//
11 
12 #include "polly/CodeGen/PerfMonitor.h"
13 #include "polly/CodeGen/RuntimeDebugBuilder.h"
14 #include "llvm/ADT/Triple.h"
15 #include "llvm/IR/Intrinsics.h"
16 
17 using namespace llvm;
18 using namespace polly;
19 
20 Function *PerfMonitor::getAtExit() {
21   const char *Name = "atexit";
22   Function *F = M->getFunction(Name);
23 
24   if (!F) {
25     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
26     FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(),
27                                          {Builder.getInt8PtrTy()}, false);
28     F = Function::Create(Ty, Linkage, Name, M);
29   }
30 
31   return F;
32 }
33 
34 void PerfMonitor::addToGlobalConstructors(Function *Fn) {
35   const char *Name = "llvm.global_ctors";
36   GlobalVariable *GV = M->getGlobalVariable(Name);
37   std::vector<Constant *> V;
38 
39   if (GV) {
40     Constant *Array = GV->getInitializer();
41     for (Value *X : Array->operand_values())
42       V.push_back(cast<Constant>(X));
43     GV->eraseFromParent();
44   }
45 
46   StructType *ST = StructType::get(Builder.getInt32Ty(), Fn->getType(),
47                                    Builder.getInt8PtrTy());
48 
49   V.push_back(ConstantStruct::get(
50       ST, Builder.getInt32(10), Fn,
51       ConstantPointerNull::get(Builder.getInt8PtrTy())));
52   ArrayType *Ty = ArrayType::get(ST, V.size());
53 
54   GV = new GlobalVariable(*M, Ty, true, GlobalValue::AppendingLinkage,
55                           ConstantArray::get(Ty, V), Name, nullptr,
56                           GlobalVariable::NotThreadLocal);
57 }
58 
59 Function *PerfMonitor::getRDTSCP() {
60   return Intrinsic::getDeclaration(M, Intrinsic::x86_rdtscp);
61 }
62 
63 PerfMonitor::PerfMonitor(Module *M) : M(M), Builder(M->getContext()) {
64   if (Triple(M->getTargetTriple()).getArch() == llvm::Triple::x86_64)
65     Supported = true;
66   else
67     Supported = false;
68 }
69 
70 void PerfMonitor::addGlobalVariables() {
71   auto TryRegisterGlobal = [=](const char *Name, Constant *InitialValue,
72                                Value **Location) {
73     *Location = M->getGlobalVariable(Name);
74 
75     if (!*Location)
76       *Location = new GlobalVariable(
77           *M, InitialValue->getType(), true, GlobalValue::WeakAnyLinkage,
78           InitialValue, Name, nullptr, GlobalVariable::InitialExecTLSModel);
79   };
80 
81   TryRegisterGlobal("__polly_perf_cycles_total_start", Builder.getInt64(0),
82                     &CyclesTotalStartPtr);
83 
84   TryRegisterGlobal("__polly_perf_initialized", Builder.getInt1(0),
85                     &AlreadyInitializedPtr);
86 
87   TryRegisterGlobal("__polly_perf_cycles_in_scops", Builder.getInt64(0),
88                     &CyclesInScopsPtr);
89 
90   TryRegisterGlobal("__polly_perf_cycles_in_scop_start", Builder.getInt64(0),
91                     &CyclesInScopStartPtr);
92 
93   TryRegisterGlobal("__polly_perf_write_loation", Builder.getInt32(0),
94                     &RDTSCPWriteLocation);
95 }
96 
97 static const char *InitFunctionName = "__polly_perf_init";
98 static const char *FinalReportingFunctionName = "__polly_perf_final";
99 
100 Function *PerfMonitor::insertFinalReporting() {
101   // Create new function.
102   GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
103   FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false);
104   Function *ExitFn =
105       Function::Create(Ty, Linkage, FinalReportingFunctionName, M);
106   BasicBlock *Start = BasicBlock::Create(M->getContext(), "start", ExitFn);
107   Builder.SetInsertPoint(Start);
108 
109   if (!Supported) {
110     RuntimeDebugBuilder::createCPUPrinter(
111         Builder, "Polly runtime information generation not supported\n");
112     Builder.CreateRetVoid();
113     return ExitFn;
114   }
115 
116   // Measure current cycles and compute final timings.
117   Function *RDTSCPFn = getRDTSCP();
118   Value *CurrentCycles = Builder.CreateCall(
119       RDTSCPFn,
120       Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy()));
121   Value *CyclesStart = Builder.CreateLoad(CyclesTotalStartPtr, true);
122   Value *CyclesTotal = Builder.CreateSub(CurrentCycles, CyclesStart);
123   Value *CyclesInScops = Builder.CreateLoad(CyclesInScopsPtr, true);
124 
125   // Print the runtime information.
126   RuntimeDebugBuilder::createCPUPrinter(Builder, "Polly runtime information\n");
127   RuntimeDebugBuilder::createCPUPrinter(Builder, "-------------------------\n");
128   RuntimeDebugBuilder::createCPUPrinter(Builder, "Total: ", CyclesTotal, "\n");
129   RuntimeDebugBuilder::createCPUPrinter(Builder, "Scops: ", CyclesInScops,
130                                         "\n");
131 
132   // Finalize function.
133   Builder.CreateRetVoid();
134   return ExitFn;
135 }
136 
137 void PerfMonitor::initialize() {
138   addGlobalVariables();
139 
140   Function *F = M->getFunction(InitFunctionName);
141   if (F)
142     return;
143 
144   // initialize
145   Function *FinalReporting = insertFinalReporting();
146   Function *InitFn = insertInitFunction(FinalReporting);
147   addToGlobalConstructors(InitFn);
148 }
149 
150 Function *PerfMonitor::insertInitFunction(Function *FinalReporting) {
151   // Insert function definition and BBs.
152   GlobalValue::LinkageTypes Linkage = Function::WeakODRLinkage;
153   FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), {}, false);
154   Function *InitFn = Function::Create(Ty, Linkage, InitFunctionName, M);
155   BasicBlock *Start = BasicBlock::Create(M->getContext(), "start", InitFn);
156   BasicBlock *EarlyReturn =
157       BasicBlock::Create(M->getContext(), "earlyreturn", InitFn);
158   BasicBlock *InitBB = BasicBlock::Create(M->getContext(), "initbb", InitFn);
159 
160   Builder.SetInsertPoint(Start);
161 
162   // Check if this function was already run. If yes, return.
163   //
164   // In case profiling has been enabled in multiple translation units, the
165   // initializer function will be added to the global constructors list of
166   // each translation unit. When merging translation units, the global
167   // constructor lists are just appended, such that the initializer will appear
168   // multiple times. To avoid initializations being run multiple times (and
169   // especially to avoid that atExitFn is called more than once), we bail
170   // out if the intializer is run more than once.
171   Value *HasRunBefore = Builder.CreateLoad(AlreadyInitializedPtr);
172   Builder.CreateCondBr(HasRunBefore, EarlyReturn, InitBB);
173   Builder.SetInsertPoint(EarlyReturn);
174   Builder.CreateRetVoid();
175 
176   // Keep track that this function has been run once.
177   Builder.SetInsertPoint(InitBB);
178   Value *True = Builder.getInt1(true);
179   Builder.CreateStore(True, AlreadyInitializedPtr);
180 
181   // Register the final reporting function with atexit().
182   Value *FinalReportingPtr =
183       Builder.CreatePointerCast(FinalReporting, Builder.getInt8PtrTy());
184   Function *AtExitFn = getAtExit();
185   Builder.CreateCall(AtExitFn, {FinalReportingPtr});
186 
187   if (Supported) {
188     // Read the currently cycle counter and store the result for later.
189     Function *RDTSCPFn = getRDTSCP();
190     Value *CurrentCycles = Builder.CreateCall(
191         RDTSCPFn,
192         Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy()));
193     Builder.CreateStore(CurrentCycles, CyclesTotalStartPtr, true);
194   }
195   Builder.CreateRetVoid();
196 
197   return InitFn;
198 }
199 
200 void PerfMonitor::insertRegionStart(Instruction *InsertBefore) {
201   if (!Supported)
202     return;
203 
204   Builder.SetInsertPoint(InsertBefore);
205   Function *RDTSCPFn = getRDTSCP();
206   Value *CurrentCycles = Builder.CreateCall(
207       RDTSCPFn,
208       Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy()));
209   Builder.CreateStore(CurrentCycles, CyclesInScopStartPtr, true);
210 }
211 
212 void PerfMonitor::insertRegionEnd(Instruction *InsertBefore) {
213   if (!Supported)
214     return;
215 
216   Builder.SetInsertPoint(InsertBefore);
217   Function *RDTSCPFn = getRDTSCP();
218   LoadInst *CyclesStart = Builder.CreateLoad(CyclesInScopStartPtr, true);
219   Value *CurrentCycles = Builder.CreateCall(
220       RDTSCPFn,
221       Builder.CreatePointerCast(RDTSCPWriteLocation, Builder.getInt8PtrTy()));
222   Value *CyclesInScop = Builder.CreateSub(CurrentCycles, CyclesStart);
223   Value *CyclesInScops = Builder.CreateLoad(CyclesInScopsPtr, true);
224   CyclesInScops = Builder.CreateAdd(CyclesInScops, CyclesInScop);
225   Builder.CreateStore(CyclesInScops, CyclesInScopsPtr, true);
226 }
227