1 //===------ LoopGenerators.cpp -  IR helper to create loops ---------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains functions to create scalar and OpenMP parallel loops
11 // as LLVM-IR.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "polly/ScopDetection.h"
16 #include "polly/CodeGen/LoopGenerators.h"
17 #include "llvm/Analysis/Dominators.h"
18 #include "llvm/Analysis/LoopInfo.h"
19 #include "llvm/IR/DataLayout.h"
20 #include "llvm/IR/Module.h"
21 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
22 
23 using namespace llvm;
24 using namespace polly;
25 
26 // We generate a loop of the following structure
27 //
28 //              BeforeBB
29 //                 |
30 //                 v
31 //              GuardBB
32 //              /      \
33 //     __  PreHeaderBB  \
34 //    /  \    /         |
35 // latch  HeaderBB      |
36 //    \  /    \         /
37 //     <       \       /
38 //              \     /
39 //              ExitBB
40 //
41 // GuardBB checks if the loop is executed at least once. If this is the case
42 // we branch to PreHeaderBB and subsequently to the HeaderBB, which contains the
43 // loop iv 'polly.indvar', the incremented loop iv 'polly.indvar_next' as well
44 // as the condition to check if we execute another iteration of the loop. After
45 // the loop has finished, we branch to ExitBB.
46 //
47 // TODO: We currently always create the GuardBB. If we can prove the loop is
48 //       always executed at least once, we can get rid of this branch.
49 Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
50                          IRBuilder<> &Builder, Pass *P, BasicBlock *&ExitBB,
51                          ICmpInst::Predicate Predicate) {
52 
53   DominatorTree &DT = P->getAnalysis<DominatorTree>();
54   LoopInfo &LI = P->getAnalysis<LoopInfo>();
55   Function *F = Builder.GetInsertBlock()->getParent();
56   LLVMContext &Context = F->getContext();
57 
58   assert(LB->getType() == UB->getType() && "Types of loop bounds do not match");
59   IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType());
60   assert(LoopIVType && "UB is not integer?");
61 
62   BasicBlock *BeforeBB = Builder.GetInsertBlock();
63   BasicBlock *GuardBB = BasicBlock::Create(Context, "polly.loop_if", F);
64   BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F);
65   BasicBlock *PreHeaderBB =
66       BasicBlock::Create(Context, "polly.loop_preheader", F);
67 
68   // Update LoopInfo
69   Loop *OuterLoop = LI.getLoopFor(BeforeBB);
70   Loop *NewLoop = new Loop();
71 
72   if (OuterLoop) {
73     OuterLoop->addChildLoop(NewLoop);
74   } else {
75     LI.addTopLevelLoop(NewLoop);
76   }
77 
78   if (OuterLoop) {
79     OuterLoop->addBasicBlockToLoop(GuardBB, LI.getBase());
80     OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI.getBase());
81   }
82 
83   NewLoop->addBasicBlockToLoop(HeaderBB, LI.getBase());
84 
85   // ExitBB
86   ExitBB = SplitBlock(BeforeBB, Builder.GetInsertPoint()++, P);
87   ExitBB->setName("polly.loop_exit");
88 
89   // BeforeBB
90   BeforeBB->getTerminator()->setSuccessor(0, GuardBB);
91 
92   // GuardBB
93   DT.addNewBlock(GuardBB, BeforeBB);
94   Builder.SetInsertPoint(GuardBB);
95   Value *LoopGuard;
96   LoopGuard = Builder.CreateICmp(Predicate, LB, UB);
97   LoopGuard->setName("polly.loop_guard");
98   Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB);
99 
100   // PreHeaderBB
101   DT.addNewBlock(PreHeaderBB, GuardBB);
102   Builder.SetInsertPoint(PreHeaderBB);
103   Builder.CreateBr(HeaderBB);
104 
105   // HeaderBB
106   DT.addNewBlock(HeaderBB, PreHeaderBB);
107   Builder.SetInsertPoint(HeaderBB);
108   PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar");
109   IV->addIncoming(LB, PreHeaderBB);
110   Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
111   Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next");
112   Value *LoopCondition;
113   UB = Builder.CreateSub(UB, Stride, "polly.adjust_ub");
114   LoopCondition = Builder.CreateICmp(Predicate, IV, UB);
115   LoopCondition->setName("polly.loop_cond");
116   Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
117   IV->addIncoming(IncrementedIV, HeaderBB);
118   DT.changeImmediateDominator(ExitBB, GuardBB);
119 
120   // The loop body should be added here.
121   Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
122   return IV;
123 }
124 
125 void OMPGenerator::createCallParallelLoopStart(
126     Value *SubFunction, Value *SubfunctionParam, Value *NumberOfThreads,
127     Value *LowerBound, Value *UpperBound, Value *Stride) {
128   Module *M = getModule();
129   const char *Name = "GOMP_parallel_loop_runtime_start";
130   Function *F = M->getFunction(Name);
131 
132   // If F is not available, declare it.
133   if (!F) {
134     Type *LongTy = getIntPtrTy();
135     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
136 
137     Type *Params[] = { PointerType::getUnqual(FunctionType::get(
138                            Builder.getVoidTy(), Builder.getInt8PtrTy(), false)),
139                        Builder.getInt8PtrTy(), Builder.getInt32Ty(), LongTy,
140                        LongTy, LongTy, };
141 
142     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
143     F = Function::Create(Ty, Linkage, Name, M);
144   }
145 
146   Value *Args[] = { SubFunction, SubfunctionParam, NumberOfThreads,
147                     LowerBound,  UpperBound,       Stride };
148 
149   Builder.CreateCall(F, Args);
150 }
151 
152 Value *OMPGenerator::createCallLoopNext(Value *LowerBoundPtr,
153                                         Value *UpperBoundPtr) {
154   Module *M = getModule();
155   const char *Name = "GOMP_loop_runtime_next";
156   Function *F = M->getFunction(Name);
157 
158   // If F is not available, declare it.
159   if (!F) {
160     Type *LongPtrTy = PointerType::getUnqual(getIntPtrTy());
161     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
162 
163     Type *Params[] = { LongPtrTy, LongPtrTy, };
164 
165     FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false);
166     F = Function::Create(Ty, Linkage, Name, M);
167   }
168 
169   Value *Args[] = { LowerBoundPtr, UpperBoundPtr, };
170 
171   Value *Return = Builder.CreateCall(F, Args);
172   Return = Builder.CreateICmpNE(
173       Return, Builder.CreateZExt(Builder.getFalse(), Return->getType()));
174   return Return;
175 }
176 
177 void OMPGenerator::createCallParallelEnd() {
178   const char *Name = "GOMP_parallel_end";
179   Module *M = getModule();
180   Function *F = M->getFunction(Name);
181 
182   // If F is not available, declare it.
183   if (!F) {
184     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
185 
186     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
187     F = Function::Create(Ty, Linkage, Name, M);
188   }
189 
190   Builder.CreateCall(F);
191 }
192 
193 void OMPGenerator::createCallLoopEndNowait() {
194   const char *Name = "GOMP_loop_end_nowait";
195   Module *M = getModule();
196   Function *F = M->getFunction(Name);
197 
198   // If F is not available, declare it.
199   if (!F) {
200     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
201 
202     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
203     F = Function::Create(Ty, Linkage, Name, M);
204   }
205 
206   Builder.CreateCall(F);
207 }
208 
209 IntegerType *OMPGenerator::getIntPtrTy() {
210   return P->getAnalysis<DataLayout>().getIntPtrType(Builder.getContext());
211 }
212 
213 Module *OMPGenerator::getModule() {
214   return Builder.GetInsertBlock()->getParent()->getParent();
215 }
216 
217 Function *OMPGenerator::createSubfunctionDefinition() {
218   Module *M = getModule();
219   Function *F = Builder.GetInsertBlock()->getParent();
220   std::vector<Type *> Arguments(1, Builder.getInt8PtrTy());
221   FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
222   Function *FN = Function::Create(FT, Function::InternalLinkage,
223                                   F->getName() + ".omp_subfn", M);
224   // Do not run any polly pass on the new function.
225   P->getAnalysis<polly::ScopDetection>().markFunctionAsInvalid(FN);
226 
227   Function::arg_iterator AI = FN->arg_begin();
228   AI->setName("omp.userContext");
229 
230   return FN;
231 }
232 
233 Value *OMPGenerator::loadValuesIntoStruct(SetVector<Value *> &Values) {
234   std::vector<Type *> Members;
235 
236   for (unsigned i = 0; i < Values.size(); i++)
237     Members.push_back(Values[i]->getType());
238 
239   StructType *Ty = StructType::get(Builder.getContext(), Members);
240   Value *Struct = Builder.CreateAlloca(Ty, 0, "omp.userContext");
241 
242   for (unsigned i = 0; i < Values.size(); i++) {
243     Value *Address = Builder.CreateStructGEP(Struct, i);
244     Builder.CreateStore(Values[i], Address);
245   }
246 
247   return Struct;
248 }
249 
250 void OMPGenerator::extractValuesFromStruct(SetVector<Value *> OldValues,
251                                            Value *Struct,
252                                            ValueToValueMapTy &Map) {
253   for (unsigned i = 0; i < OldValues.size(); i++) {
254     Value *Address = Builder.CreateStructGEP(Struct, i);
255     Value *NewValue = Builder.CreateLoad(Address);
256     Map.insert(std::make_pair(OldValues[i], NewValue));
257   }
258 }
259 
260 Value *OMPGenerator::createSubfunction(Value *Stride, Value *StructData,
261                                        SetVector<Value *> Data,
262                                        ValueToValueMapTy &Map,
263                                        Function **SubFunction) {
264   Function *FN = createSubfunctionDefinition();
265 
266   BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *LoadIVBoundsBB,
267       *AfterBB;
268   Value *LowerBoundPtr, *UpperBoundPtr, *UserContext, *Ret1, *HasNextSchedule,
269       *LowerBound, *UpperBound, *IV;
270   Type *IntPtrTy = getIntPtrTy();
271   LLVMContext &Context = FN->getContext();
272 
273   // Store the previous basic block.
274   PrevBB = Builder.GetInsertBlock();
275 
276   // Create basic blocks.
277   HeaderBB = BasicBlock::Create(Context, "omp.setup", FN);
278   ExitBB = BasicBlock::Create(Context, "omp.exit", FN);
279   CheckNextBB = BasicBlock::Create(Context, "omp.checkNext", FN);
280   LoadIVBoundsBB = BasicBlock::Create(Context, "omp.loadIVBounds", FN);
281 
282   DominatorTree &DT = P->getAnalysis<DominatorTree>();
283   DT.addNewBlock(HeaderBB, PrevBB);
284   DT.addNewBlock(ExitBB, HeaderBB);
285   DT.addNewBlock(CheckNextBB, HeaderBB);
286   DT.addNewBlock(LoadIVBoundsBB, HeaderBB);
287 
288   // Fill up basic block HeaderBB.
289   Builder.SetInsertPoint(HeaderBB);
290   LowerBoundPtr = Builder.CreateAlloca(IntPtrTy, 0, "omp.lowerBoundPtr");
291   UpperBoundPtr = Builder.CreateAlloca(IntPtrTy, 0, "omp.upperBoundPtr");
292   UserContext = Builder.CreateBitCast(FN->arg_begin(), StructData->getType(),
293                                       "omp.userContext");
294 
295   extractValuesFromStruct(Data, UserContext, Map);
296   Builder.CreateBr(CheckNextBB);
297 
298   // Add code to check if another set of iterations will be executed.
299   Builder.SetInsertPoint(CheckNextBB);
300   Ret1 = createCallLoopNext(LowerBoundPtr, UpperBoundPtr);
301   HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(),
302                                         "omp.hasNextScheduleBlock");
303   Builder.CreateCondBr(HasNextSchedule, LoadIVBoundsBB, ExitBB);
304 
305   // Add code to to load the iv bounds for this set of iterations.
306   Builder.SetInsertPoint(LoadIVBoundsBB);
307   LowerBound = Builder.CreateLoad(LowerBoundPtr, "omp.lowerBound");
308   UpperBound = Builder.CreateLoad(UpperBoundPtr, "omp.upperBound");
309 
310   // Subtract one as the upper bound provided by openmp is a < comparison
311   // whereas the codegenForSequential function creates a <= comparison.
312   UpperBound = Builder.CreateSub(UpperBound, ConstantInt::get(IntPtrTy, 1),
313                                  "omp.upperBoundAdjusted");
314 
315   Builder.CreateBr(CheckNextBB);
316   Builder.SetInsertPoint(--Builder.GetInsertPoint());
317   IV = createLoop(LowerBound, UpperBound, Stride, Builder, P, AfterBB,
318                   ICmpInst::ICMP_SLE);
319 
320   BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
321   Builder.SetInsertPoint(AfterBB->begin());
322 
323   // Add code to terminate this openmp subfunction.
324   Builder.SetInsertPoint(ExitBB);
325   createCallLoopEndNowait();
326   Builder.CreateRetVoid();
327 
328   Builder.SetInsertPoint(LoopBody);
329   *SubFunction = FN;
330 
331   return IV;
332 }
333 
334 Value *OMPGenerator::createParallelLoop(Value *LowerBound, Value *UpperBound,
335                                         Value *Stride,
336                                         SetVector<Value *> &Values,
337                                         ValueToValueMapTy &Map,
338                                         BasicBlock::iterator *LoopBody) {
339   Value *Struct, *IV, *SubfunctionParam, *NumberOfThreads;
340   Function *SubFunction;
341 
342   Struct = loadValuesIntoStruct(Values);
343 
344   BasicBlock::iterator PrevInsertPoint = Builder.GetInsertPoint();
345   IV = createSubfunction(Stride, Struct, Values, Map, &SubFunction);
346   *LoopBody = Builder.GetInsertPoint();
347   Builder.SetInsertPoint(PrevInsertPoint);
348 
349   // Create call for GOMP_parallel_loop_runtime_start.
350   SubfunctionParam =
351       Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(), "omp_data");
352 
353   NumberOfThreads = Builder.getInt32(0);
354 
355   // Add one as the upper bound provided by openmp is a < comparison
356   // whereas the codegenForSequential function creates a <= comparison.
357   UpperBound =
358       Builder.CreateAdd(UpperBound, ConstantInt::get(getIntPtrTy(), 1));
359 
360   createCallParallelLoopStart(SubFunction, SubfunctionParam, NumberOfThreads,
361                               LowerBound, UpperBound, Stride);
362   Builder.CreateCall(SubFunction, SubfunctionParam);
363   createCallParallelEnd();
364 
365   return IV;
366 }
367