1 //===------ LoopGenerators.cpp -  IR helper to create loops ---------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains functions to create scalar and OpenMP parallel loops
11 // as LLVM-IR.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "polly/ScopDetection.h"
16 #include "polly/CodeGen/LoopGenerators.h"
17 #include "llvm/Analysis/LoopInfo.h"
18 #include "llvm/IR/DataLayout.h"
19 #include "llvm/IR/Dominators.h"
20 #include "llvm/IR/Module.h"
21 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
22 
23 using namespace llvm;
24 using namespace polly;
25 
26 // We generate a loop of the following structure
27 //
28 //              BeforeBB
29 //                 |
30 //                 v
31 //              GuardBB
32 //              /      |
33 //     __  PreHeaderBB  |
34 //    /  \    /         |
35 // latch  HeaderBB      |
36 //    \  /    \         /
37 //     <       \       /
38 //              \     /
39 //              ExitBB
40 //
41 // GuardBB checks if the loop is executed at least once. If this is the case
42 // we branch to PreHeaderBB and subsequently to the HeaderBB, which contains the
43 // loop iv 'polly.indvar', the incremented loop iv 'polly.indvar_next' as well
44 // as the condition to check if we execute another iteration of the loop. After
45 // the loop has finished, we branch to ExitBB.
46 //
47 // TODO: We currently always create the GuardBB. If we can prove the loop is
48 //       always executed at least once, we can get rid of this branch.
49 Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
50                          PollyIRBuilder &Builder, Pass *P, LoopInfo &LI,
51                          DominatorTree &DT, BasicBlock *&ExitBB,
52                          ICmpInst::Predicate Predicate,
53                          LoopAnnotator *Annotator, bool Parallel) {
54   Function *F = Builder.GetInsertBlock()->getParent();
55   LLVMContext &Context = F->getContext();
56 
57   assert(LB->getType() == UB->getType() && "Types of loop bounds do not match");
58   IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType());
59   assert(LoopIVType && "UB is not integer?");
60 
61   BasicBlock *BeforeBB = Builder.GetInsertBlock();
62   BasicBlock *GuardBB = BasicBlock::Create(Context, "polly.loop_if", F);
63   BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F);
64   BasicBlock *PreHeaderBB =
65       BasicBlock::Create(Context, "polly.loop_preheader", F);
66 
67   if (Annotator) {
68     Annotator->Begin(HeaderBB);
69     if (Parallel)
70       Annotator->SetCurrentParallel();
71   }
72 
73   // Update LoopInfo
74   Loop *OuterLoop = LI.getLoopFor(BeforeBB);
75   Loop *NewLoop = new Loop();
76 
77   if (OuterLoop) {
78     OuterLoop->addChildLoop(NewLoop);
79   } else {
80     LI.addTopLevelLoop(NewLoop);
81   }
82 
83   if (OuterLoop) {
84     OuterLoop->addBasicBlockToLoop(GuardBB, LI.getBase());
85     OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI.getBase());
86   }
87 
88   NewLoop->addBasicBlockToLoop(HeaderBB, LI.getBase());
89 
90   // ExitBB
91   ExitBB = SplitBlock(BeforeBB, Builder.GetInsertPoint()++, P);
92   ExitBB->setName("polly.loop_exit");
93 
94   // BeforeBB
95   BeforeBB->getTerminator()->setSuccessor(0, GuardBB);
96 
97   // GuardBB
98   DT.addNewBlock(GuardBB, BeforeBB);
99   Builder.SetInsertPoint(GuardBB);
100   Value *LoopGuard;
101   LoopGuard = Builder.CreateICmp(Predicate, LB, UB);
102   LoopGuard->setName("polly.loop_guard");
103   Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB);
104 
105   // PreHeaderBB
106   DT.addNewBlock(PreHeaderBB, GuardBB);
107   Builder.SetInsertPoint(PreHeaderBB);
108   Builder.CreateBr(HeaderBB);
109 
110   // HeaderBB
111   DT.addNewBlock(HeaderBB, PreHeaderBB);
112   Builder.SetInsertPoint(HeaderBB);
113   PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar");
114   IV->addIncoming(LB, PreHeaderBB);
115   Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
116   Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next");
117   Value *LoopCondition;
118   UB = Builder.CreateSub(UB, Stride, "polly.adjust_ub");
119   LoopCondition = Builder.CreateICmp(Predicate, IV, UB);
120   LoopCondition->setName("polly.loop_cond");
121   Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
122   IV->addIncoming(IncrementedIV, HeaderBB);
123   DT.changeImmediateDominator(ExitBB, GuardBB);
124 
125   // The loop body should be added here.
126   Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
127   return IV;
128 }
129 
130 void OMPGenerator::createCallParallelLoopStart(
131     Value *SubFunction, Value *SubfunctionParam, Value *NumberOfThreads,
132     Value *LowerBound, Value *UpperBound, Value *Stride) {
133   Module *M = getModule();
134   const char *Name = "GOMP_parallel_loop_runtime_start";
135   Function *F = M->getFunction(Name);
136 
137   // If F is not available, declare it.
138   if (!F) {
139     Type *LongTy = getIntPtrTy();
140     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
141 
142     Type *Params[] = {PointerType::getUnqual(FunctionType::get(
143                           Builder.getVoidTy(), Builder.getInt8PtrTy(), false)),
144                       Builder.getInt8PtrTy(), Builder.getInt32Ty(), LongTy,
145                       LongTy, LongTy};
146 
147     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
148     F = Function::Create(Ty, Linkage, Name, M);
149   }
150 
151   Value *Args[] = {SubFunction, SubfunctionParam, NumberOfThreads,
152                    LowerBound,  UpperBound,       Stride};
153 
154   Builder.CreateCall(F, Args);
155 }
156 
157 Value *OMPGenerator::createCallLoopNext(Value *LowerBoundPtr,
158                                         Value *UpperBoundPtr) {
159   Module *M = getModule();
160   const char *Name = "GOMP_loop_runtime_next";
161   Function *F = M->getFunction(Name);
162 
163   // If F is not available, declare it.
164   if (!F) {
165     Type *LongPtrTy = PointerType::getUnqual(getIntPtrTy());
166     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
167 
168     Type *Params[] = {LongPtrTy, LongPtrTy};
169 
170     FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false);
171     F = Function::Create(Ty, Linkage, Name, M);
172   }
173 
174   Value *Args[] = {LowerBoundPtr, UpperBoundPtr};
175 
176   Value *Return = Builder.CreateCall(F, Args);
177   Return = Builder.CreateICmpNE(
178       Return, Builder.CreateZExt(Builder.getFalse(), Return->getType()));
179   return Return;
180 }
181 
182 void OMPGenerator::createCallParallelEnd() {
183   const char *Name = "GOMP_parallel_end";
184   Module *M = getModule();
185   Function *F = M->getFunction(Name);
186 
187   // If F is not available, declare it.
188   if (!F) {
189     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
190 
191     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
192     F = Function::Create(Ty, Linkage, Name, M);
193   }
194 
195   Builder.CreateCall(F);
196 }
197 
198 void OMPGenerator::createCallLoopEndNowait() {
199   const char *Name = "GOMP_loop_end_nowait";
200   Module *M = getModule();
201   Function *F = M->getFunction(Name);
202 
203   // If F is not available, declare it.
204   if (!F) {
205     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
206 
207     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
208     F = Function::Create(Ty, Linkage, Name, M);
209   }
210 
211   Builder.CreateCall(F);
212 }
213 
214 IntegerType *OMPGenerator::getIntPtrTy() {
215   return P->getAnalysis<DataLayoutPass>().getDataLayout().getIntPtrType(
216       Builder.getContext());
217 }
218 
219 Module *OMPGenerator::getModule() {
220   return Builder.GetInsertBlock()->getParent()->getParent();
221 }
222 
223 Function *OMPGenerator::createSubfunctionDefinition() {
224   Module *M = getModule();
225   Function *F = Builder.GetInsertBlock()->getParent();
226   std::vector<Type *> Arguments(1, Builder.getInt8PtrTy());
227   FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
228   Function *FN = Function::Create(FT, Function::InternalLinkage,
229                                   F->getName() + ".omp_subfn", M);
230   // Do not run any polly pass on the new function.
231   FN->addFnAttr(PollySkipFnAttr);
232 
233   Function::arg_iterator AI = FN->arg_begin();
234   AI->setName("omp.userContext");
235 
236   return FN;
237 }
238 
239 Value *OMPGenerator::loadValuesIntoStruct(SetVector<Value *> &Values) {
240   std::vector<Type *> Members;
241 
242   for (Value *V : Values)
243     Members.push_back(V->getType());
244 
245   StructType *Ty = StructType::get(Builder.getContext(), Members);
246   Value *Struct = Builder.CreateAlloca(Ty, 0, "omp.userContext");
247 
248   for (unsigned i = 0; i < Values.size(); i++) {
249     Value *Address = Builder.CreateStructGEP(Struct, i);
250     Builder.CreateStore(Values[i], Address);
251   }
252 
253   return Struct;
254 }
255 
256 void OMPGenerator::extractValuesFromStruct(SetVector<Value *> OldValues,
257                                            Value *Struct,
258                                            ValueToValueMapTy &Map) {
259   for (unsigned i = 0; i < OldValues.size(); i++) {
260     Value *Address = Builder.CreateStructGEP(Struct, i);
261     Value *NewValue = Builder.CreateLoad(Address);
262     Map.insert(std::make_pair(OldValues[i], NewValue));
263   }
264 }
265 
266 Value *OMPGenerator::createSubfunction(Value *Stride, Value *StructData,
267                                        SetVector<Value *> Data,
268                                        ValueToValueMapTy &Map,
269                                        Function **SubFunction) {
270   Function *FN = createSubfunctionDefinition();
271 
272   BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *LoadIVBoundsBB,
273       *AfterBB;
274   Value *LowerBoundPtr, *UpperBoundPtr, *UserContext, *Ret1, *HasNextSchedule,
275       *LowerBound, *UpperBound, *IV;
276   Type *IntPtrTy = getIntPtrTy();
277   LLVMContext &Context = FN->getContext();
278 
279   // Store the previous basic block.
280   PrevBB = Builder.GetInsertBlock();
281 
282   // Create basic blocks.
283   HeaderBB = BasicBlock::Create(Context, "omp.setup", FN);
284   ExitBB = BasicBlock::Create(Context, "omp.exit", FN);
285   CheckNextBB = BasicBlock::Create(Context, "omp.checkNext", FN);
286   LoadIVBoundsBB = BasicBlock::Create(Context, "omp.loadIVBounds", FN);
287 
288   DominatorTree &DT = P->getAnalysis<DominatorTreeWrapperPass>().getDomTree();
289   DT.addNewBlock(HeaderBB, PrevBB);
290   DT.addNewBlock(ExitBB, HeaderBB);
291   DT.addNewBlock(CheckNextBB, HeaderBB);
292   DT.addNewBlock(LoadIVBoundsBB, HeaderBB);
293 
294   // Fill up basic block HeaderBB.
295   Builder.SetInsertPoint(HeaderBB);
296   LowerBoundPtr = Builder.CreateAlloca(IntPtrTy, 0, "omp.lowerBoundPtr");
297   UpperBoundPtr = Builder.CreateAlloca(IntPtrTy, 0, "omp.upperBoundPtr");
298   UserContext = Builder.CreateBitCast(FN->arg_begin(), StructData->getType(),
299                                       "omp.userContext");
300 
301   extractValuesFromStruct(Data, UserContext, Map);
302   Builder.CreateBr(CheckNextBB);
303 
304   // Add code to check if another set of iterations will be executed.
305   Builder.SetInsertPoint(CheckNextBB);
306   Ret1 = createCallLoopNext(LowerBoundPtr, UpperBoundPtr);
307   HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(),
308                                         "omp.hasNextScheduleBlock");
309   Builder.CreateCondBr(HasNextSchedule, LoadIVBoundsBB, ExitBB);
310 
311   // Add code to to load the iv bounds for this set of iterations.
312   Builder.SetInsertPoint(LoadIVBoundsBB);
313   LowerBound = Builder.CreateLoad(LowerBoundPtr, "omp.lowerBound");
314   UpperBound = Builder.CreateLoad(UpperBoundPtr, "omp.upperBound");
315 
316   // Subtract one as the upper bound provided by openmp is a < comparison
317   // whereas the codegenForSequential function creates a <= comparison.
318   UpperBound = Builder.CreateSub(UpperBound, ConstantInt::get(IntPtrTy, 1),
319                                  "omp.upperBoundAdjusted");
320 
321   Builder.CreateBr(CheckNextBB);
322   Builder.SetInsertPoint(--Builder.GetInsertPoint());
323   LoopInfo &LI = P->getAnalysis<LoopInfo>();
324   IV = createLoop(LowerBound, UpperBound, Stride, Builder, P, LI, DT, AfterBB,
325                   ICmpInst::ICMP_SLE);
326 
327   BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
328   Builder.SetInsertPoint(AfterBB->begin());
329 
330   // Add code to terminate this openmp subfunction.
331   Builder.SetInsertPoint(ExitBB);
332   createCallLoopEndNowait();
333   Builder.CreateRetVoid();
334 
335   Builder.SetInsertPoint(LoopBody);
336   *SubFunction = FN;
337 
338   return IV;
339 }
340 
341 Value *OMPGenerator::createParallelLoop(Value *LowerBound, Value *UpperBound,
342                                         Value *Stride,
343                                         SetVector<Value *> &Values,
344                                         ValueToValueMapTy &Map,
345                                         BasicBlock::iterator *LoopBody) {
346   Value *Struct, *IV, *SubfunctionParam, *NumberOfThreads;
347   Function *SubFunction;
348 
349   Struct = loadValuesIntoStruct(Values);
350 
351   BasicBlock::iterator PrevInsertPoint = Builder.GetInsertPoint();
352   IV = createSubfunction(Stride, Struct, Values, Map, &SubFunction);
353   *LoopBody = Builder.GetInsertPoint();
354   Builder.SetInsertPoint(PrevInsertPoint);
355 
356   // Create call for GOMP_parallel_loop_runtime_start.
357   SubfunctionParam =
358       Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(), "omp_data");
359 
360   NumberOfThreads = Builder.getInt32(0);
361 
362   // Add one as the upper bound provided by openmp is a < comparison
363   // whereas the codegenForSequential function creates a <= comparison.
364   UpperBound =
365       Builder.CreateAdd(UpperBound, ConstantInt::get(getIntPtrTy(), 1));
366 
367   createCallParallelLoopStart(SubFunction, SubfunctionParam, NumberOfThreads,
368                               LowerBound, UpperBound, Stride);
369   Builder.CreateCall(SubFunction, SubfunctionParam);
370   createCallParallelEnd();
371 
372   return IV;
373 }
374