1 //===------ LoopGenerators.cpp -  IR helper to create loops ---------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains functions to create scalar and parallel loops as LLVM-IR.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "polly/ScopDetection.h"
15 #include "polly/CodeGen/LoopGenerators.h"
16 #include "llvm/Analysis/LoopInfo.h"
17 #include "llvm/IR/DataLayout.h"
18 #include "llvm/IR/Dominators.h"
19 #include "llvm/IR/Module.h"
20 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
21 #include "llvm/Support/CommandLine.h"
22 
23 using namespace llvm;
24 using namespace polly;
25 
26 static cl::opt<int>
27     PollyNumThreads("polly-num-threads",
28                     cl::desc("Number of threads to use (0 = auto)"), cl::Hidden,
29                     cl::init(0));
30 
31 // We generate a loop of either of the following structures:
32 //
33 //              BeforeBB                      BeforeBB
34 //                 |                             |
35 //                 v                             v
36 //              GuardBB                      PreHeaderBB
37 //              /      |                         |   _____
38 //     __  PreHeaderBB  |                        v  \/    |
39 //    /  \    /         |                     HeaderBB  latch
40 // latch  HeaderBB      |                        |\       |
41 //    \  /    \         /                        | \------/
42 //     <       \       /                         |
43 //              \     /                          v
44 //              ExitBB                         ExitBB
45 //
46 // depending on whether or not we know that it is executed at least once. If
47 // not, GuardBB checks if the loop is executed at least once. If this is the
48 // case we branch to PreHeaderBB and subsequently to the HeaderBB, which
49 // contains the loop iv 'polly.indvar', the incremented loop iv
50 // 'polly.indvar_next' as well as the condition to check if we execute another
51 // iteration of the loop. After the loop has finished, we branch to ExitBB.
52 Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
53                          PollyIRBuilder &Builder, Pass *P, LoopInfo &LI,
54                          DominatorTree &DT, BasicBlock *&ExitBB,
55                          ICmpInst::Predicate Predicate,
56                          ScopAnnotator *Annotator, bool Parallel,
57                          bool UseGuard) {
58   Function *F = Builder.GetInsertBlock()->getParent();
59   LLVMContext &Context = F->getContext();
60 
61   assert(LB->getType() == UB->getType() && "Types of loop bounds do not match");
62   IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType());
63   assert(LoopIVType && "UB is not integer?");
64 
65   BasicBlock *BeforeBB = Builder.GetInsertBlock();
66   BasicBlock *GuardBB =
67       UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr;
68   BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F);
69   BasicBlock *PreHeaderBB =
70       BasicBlock::Create(Context, "polly.loop_preheader", F);
71 
72   // Update LoopInfo
73   Loop *OuterLoop = LI.getLoopFor(BeforeBB);
74   Loop *NewLoop = new Loop();
75 
76   if (OuterLoop)
77     OuterLoop->addChildLoop(NewLoop);
78   else
79     LI.addTopLevelLoop(NewLoop);
80 
81   if (OuterLoop && GuardBB)
82     OuterLoop->addBasicBlockToLoop(GuardBB, LI.getBase());
83   else if (OuterLoop)
84     OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI.getBase());
85 
86   NewLoop->addBasicBlockToLoop(HeaderBB, LI.getBase());
87 
88   // Notify the annotator (if present) that we have a new loop, but only
89   // after the header block is set.
90   if (Annotator)
91     Annotator->pushLoop(NewLoop, Parallel);
92 
93   // ExitBB
94   ExitBB = SplitBlock(BeforeBB, Builder.GetInsertPoint()++, P);
95   ExitBB->setName("polly.loop_exit");
96 
97   // BeforeBB
98   if (GuardBB) {
99     BeforeBB->getTerminator()->setSuccessor(0, GuardBB);
100     DT.addNewBlock(GuardBB, BeforeBB);
101 
102     // GuardBB
103     Builder.SetInsertPoint(GuardBB);
104     Value *LoopGuard;
105     LoopGuard = Builder.CreateICmp(Predicate, LB, UB);
106     LoopGuard->setName("polly.loop_guard");
107     Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB);
108     DT.addNewBlock(PreHeaderBB, GuardBB);
109   } else {
110     BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB);
111     DT.addNewBlock(PreHeaderBB, BeforeBB);
112   }
113 
114   // PreHeaderBB
115   Builder.SetInsertPoint(PreHeaderBB);
116   Builder.CreateBr(HeaderBB);
117 
118   // HeaderBB
119   DT.addNewBlock(HeaderBB, PreHeaderBB);
120   Builder.SetInsertPoint(HeaderBB);
121   PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar");
122   IV->addIncoming(LB, PreHeaderBB);
123   Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
124   Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next");
125   Value *LoopCondition;
126   UB = Builder.CreateSub(UB, Stride, "polly.adjust_ub");
127   LoopCondition = Builder.CreateICmp(Predicate, IV, UB);
128   LoopCondition->setName("polly.loop_cond");
129 
130   // Create the loop latch and annotate it as such.
131   BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
132   if (Annotator)
133     Annotator->annotateLoopLatch(B, NewLoop, Parallel);
134 
135   IV->addIncoming(IncrementedIV, HeaderBB);
136   if (GuardBB)
137     DT.changeImmediateDominator(ExitBB, GuardBB);
138   else
139     DT.changeImmediateDominator(ExitBB, HeaderBB);
140 
141   // The loop body should be added here.
142   Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
143   return IV;
144 }
145 
146 Value *ParallelLoopGenerator::createParallelLoop(
147     Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues,
148     ValueToValueMapTy &Map, BasicBlock::iterator *LoopBody) {
149   Value *Struct, *IV, *SubFnParam;
150   Function *SubFn;
151 
152   Struct = storeValuesIntoStruct(UsedValues);
153 
154   BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint();
155   IV = createSubFn(Stride, Struct, UsedValues, Map, &SubFn);
156   *LoopBody = Builder.GetInsertPoint();
157   Builder.SetInsertPoint(BeforeLoop);
158 
159   SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(),
160                                      "polly.par.userContext");
161 
162   // Add one as the upper bound provided by openmp is a < comparison
163   // whereas the codegenForSequential function creates a <= comparison.
164   UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1));
165 
166   // Tell the runtime we start a parallel loop
167   createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
168   Builder.CreateCall(SubFn, SubFnParam);
169   createCallJoinThreads();
170 
171   // Mark the end of the lifetime for the parameter struct.
172   Type *Ty = Struct->getType();
173   ConstantInt *SizeOf = Builder.getInt64(DL.getTypeAllocSize(Ty));
174   Builder.CreateLifetimeEnd(Struct, SizeOf);
175 
176   return IV;
177 }
178 
179 void ParallelLoopGenerator::createCallSpawnThreads(Value *SubFn,
180                                                    Value *SubFnParam, Value *LB,
181                                                    Value *UB, Value *Stride) {
182   const std::string Name = "GOMP_parallel_loop_runtime_start";
183 
184   Function *F = M->getFunction(Name);
185 
186   // If F is not available, declare it.
187   if (!F) {
188     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
189 
190     Type *Params[] = {PointerType::getUnqual(FunctionType::get(
191                           Builder.getVoidTy(), Builder.getInt8PtrTy(), false)),
192                       Builder.getInt8PtrTy(), Builder.getInt32Ty(), LongType,
193                       LongType, LongType};
194 
195     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
196     F = Function::Create(Ty, Linkage, Name, M);
197   }
198 
199   Value *NumberOfThreads = Builder.getInt32(PollyNumThreads);
200   Value *Args[] = {SubFn, SubFnParam, NumberOfThreads, LB, UB, Stride};
201 
202   Builder.CreateCall(F, Args);
203 }
204 
205 Value *ParallelLoopGenerator::createCallGetWorkItem(Value *LBPtr,
206                                                     Value *UBPtr) {
207   const std::string Name = "GOMP_loop_runtime_next";
208 
209   Function *F = M->getFunction(Name);
210 
211   // If F is not available, declare it.
212   if (!F) {
213     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
214     Type *Params[] = {LongType->getPointerTo(), LongType->getPointerTo()};
215     FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false);
216     F = Function::Create(Ty, Linkage, Name, M);
217   }
218 
219   Value *Args[] = {LBPtr, UBPtr};
220   Value *Return = Builder.CreateCall(F, Args);
221   Return = Builder.CreateICmpNE(
222       Return, Builder.CreateZExt(Builder.getFalse(), Return->getType()));
223   return Return;
224 }
225 
226 void ParallelLoopGenerator::createCallJoinThreads() {
227   const std::string Name = "GOMP_parallel_end";
228 
229   Function *F = M->getFunction(Name);
230 
231   // If F is not available, declare it.
232   if (!F) {
233     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
234 
235     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
236     F = Function::Create(Ty, Linkage, Name, M);
237   }
238 
239   Builder.CreateCall(F);
240 }
241 
242 void ParallelLoopGenerator::createCallCleanupThread() {
243   const std::string Name = "GOMP_loop_end_nowait";
244 
245   Function *F = M->getFunction(Name);
246 
247   // If F is not available, declare it.
248   if (!F) {
249     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
250 
251     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
252     F = Function::Create(Ty, Linkage, Name, M);
253   }
254 
255   Builder.CreateCall(F);
256 }
257 
258 Function *ParallelLoopGenerator::createSubFnDefinition() {
259   Function *F = Builder.GetInsertBlock()->getParent();
260   std::vector<Type *> Arguments(1, Builder.getInt8PtrTy());
261   FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
262   Function *SubFn = Function::Create(FT, Function::InternalLinkage,
263                                      F->getName() + ".polly.subfn", M);
264 
265   // Do not run any polly pass on the new function.
266   SubFn->addFnAttr(PollySkipFnAttr);
267 
268   Function::arg_iterator AI = SubFn->arg_begin();
269   AI->setName("polly.par.userContext");
270 
271   return SubFn;
272 }
273 
274 Value *
275 ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) {
276   SmallVector<Type *, 8> Members;
277 
278   for (Value *V : Values)
279     Members.push_back(V->getType());
280 
281   // We do not want to allocate the alloca inside any loop, thus we allocate it
282   // in the entry block of the function and use annotations to denote the actual
283   // live span (similar to clang).
284   BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock();
285   Instruction *IP = EntryBB.getFirstInsertionPt();
286   StructType *Ty = StructType::get(Builder.getContext(), Members);
287   Value *Struct = new AllocaInst(Ty, 0, "polly.par.userContext", IP);
288 
289   // Mark the start of the lifetime for the parameter struct.
290   ConstantInt *SizeOf = Builder.getInt64(DL.getTypeAllocSize(Ty));
291   Builder.CreateLifetimeStart(Struct, SizeOf);
292 
293   for (unsigned i = 0; i < Values.size(); i++) {
294     Value *Address = Builder.CreateStructGEP(Struct, i);
295     Builder.CreateStore(Values[i], Address);
296   }
297 
298   return Struct;
299 }
300 
301 void ParallelLoopGenerator::extractValuesFromStruct(
302     SetVector<Value *> OldValues, Value *Struct, ValueToValueMapTy &Map) {
303   for (unsigned i = 0; i < OldValues.size(); i++) {
304     Value *Address = Builder.CreateStructGEP(Struct, i);
305     Value *NewValue = Builder.CreateLoad(Address);
306     Map[OldValues[i]] = NewValue;
307   }
308 }
309 
310 Value *ParallelLoopGenerator::createSubFn(Value *Stride, Value *StructData,
311                                           SetVector<Value *> Data,
312                                           ValueToValueMapTy &Map,
313                                           Function **SubFnPtr) {
314   BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *PreHeaderBB, *AfterBB;
315   Value *LBPtr, *UBPtr, *UserContext, *Ret1, *HasNextSchedule, *LB, *UB, *IV;
316   Function *SubFn = createSubFnDefinition();
317   LLVMContext &Context = SubFn->getContext();
318 
319   // Store the previous basic block.
320   PrevBB = Builder.GetInsertBlock();
321 
322   // Create basic blocks.
323   HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
324   ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
325   CheckNextBB = BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
326   PreHeaderBB = BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
327 
328   DT.addNewBlock(HeaderBB, PrevBB);
329   DT.addNewBlock(ExitBB, HeaderBB);
330   DT.addNewBlock(CheckNextBB, HeaderBB);
331   DT.addNewBlock(PreHeaderBB, HeaderBB);
332 
333   // Fill up basic block HeaderBB.
334   Builder.SetInsertPoint(HeaderBB);
335   LBPtr = Builder.CreateAlloca(LongType, 0, "polly.par.LBPtr");
336   UBPtr = Builder.CreateAlloca(LongType, 0, "polly.par.UBPtr");
337   UserContext = Builder.CreateBitCast(SubFn->arg_begin(), StructData->getType(),
338                                       "polly.par.userContext");
339 
340   extractValuesFromStruct(Data, UserContext, Map);
341   Builder.CreateBr(CheckNextBB);
342 
343   // Add code to check if another set of iterations will be executed.
344   Builder.SetInsertPoint(CheckNextBB);
345   Ret1 = createCallGetWorkItem(LBPtr, UBPtr);
346   HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(),
347                                         "polly.par.hasNextScheduleBlock");
348   Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB);
349 
350   // Add code to to load the iv bounds for this set of iterations.
351   Builder.SetInsertPoint(PreHeaderBB);
352   LB = Builder.CreateLoad(LBPtr, "polly.par.LB");
353   UB = Builder.CreateLoad(UBPtr, "polly.par.UB");
354 
355   // Subtract one as the upper bound provided by openmp is a < comparison
356   // whereas the codegenForSequential function creates a <= comparison.
357   UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1),
358                          "polly.par.UBAdjusted");
359 
360   Builder.CreateBr(CheckNextBB);
361   Builder.SetInsertPoint(--Builder.GetInsertPoint());
362   IV = createLoop(LB, UB, Stride, Builder, P, LI, DT, AfterBB,
363                   ICmpInst::ICMP_SLE, nullptr, true, /* UseGuard */ false);
364 
365   BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
366 
367   // Add code to terminate this subfunction.
368   Builder.SetInsertPoint(ExitBB);
369   createCallCleanupThread();
370   Builder.CreateRetVoid();
371 
372   Builder.SetInsertPoint(LoopBody);
373   *SubFnPtr = SubFn;
374 
375   return IV;
376 }
377