1 //===------ LoopGenerators.cpp -  IR helper to create loops ---------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains functions to create scalar and OpenMP parallel loops
11 // as LLVM-IR.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "polly/ScopDetection.h"
16 #include "polly/CodeGen/LoopGenerators.h"
17 
18 #include "llvm/IR/Module.h"
19 #include "llvm/Analysis/Dominators.h"
20 #include "llvm/IR/DataLayout.h"
21 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
22 
23 using namespace llvm;
24 using namespace polly;
25 
26 Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
27                          IRBuilder<> &Builder, Pass *P, BasicBlock *&AfterBlock,
28                          ICmpInst::Predicate Predicate) {
29   DominatorTree &DT = P->getAnalysis<DominatorTree>();
30   Function *F = Builder.GetInsertBlock()->getParent();
31   LLVMContext &Context = F->getContext();
32 
33   BasicBlock *PreheaderBB = Builder.GetInsertBlock();
34   BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F);
35   BasicBlock *BodyBB = BasicBlock::Create(Context, "polly.loop_body", F);
36   BasicBlock *AfterBB = SplitBlock(PreheaderBB, Builder.GetInsertPoint()++, P);
37   AfterBB->setName("polly.loop_after");
38 
39   PreheaderBB->getTerminator()->setSuccessor(0, HeaderBB);
40   DT.addNewBlock(HeaderBB, PreheaderBB);
41 
42   Builder.SetInsertPoint(HeaderBB);
43 
44   // Use the type of upper and lower bound.
45   assert(LB->getType() == UB->getType() &&
46          "Different types for upper and lower bound.");
47 
48   IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType());
49   assert(LoopIVType && "UB is not integer?");
50 
51   // IV
52   PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.loopiv");
53   IV->addIncoming(LB, PreheaderBB);
54 
55   Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
56   Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.next_loopiv");
57 
58   // Exit condition.
59   Value *CMP;
60   CMP = Builder.CreateICmp(Predicate, IV, UB);
61 
62   Builder.CreateCondBr(CMP, BodyBB, AfterBB);
63   DT.addNewBlock(BodyBB, HeaderBB);
64 
65   Builder.SetInsertPoint(BodyBB);
66   Builder.CreateBr(HeaderBB);
67   IV->addIncoming(IncrementedIV, BodyBB);
68   DT.changeImmediateDominator(AfterBB, HeaderBB);
69 
70   Builder.SetInsertPoint(BodyBB->begin());
71   AfterBlock = AfterBB;
72 
73   return IV;
74 }
75 
76 void OMPGenerator::createCallParallelLoopStart(
77     Value *SubFunction, Value *SubfunctionParam, Value *NumberOfThreads,
78     Value *LowerBound, Value *UpperBound, Value *Stride) {
79   Module *M = getModule();
80   const char *Name = "GOMP_parallel_loop_runtime_start";
81   Function *F = M->getFunction(Name);
82 
83   // If F is not available, declare it.
84   if (!F) {
85     Type *LongTy = getIntPtrTy();
86     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
87 
88     Type *Params[] = { PointerType::getUnqual(FunctionType::get(
89                            Builder.getVoidTy(), Builder.getInt8PtrTy(), false)),
90                        Builder.getInt8PtrTy(), Builder.getInt32Ty(), LongTy,
91                        LongTy, LongTy, };
92 
93     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
94     F = Function::Create(Ty, Linkage, Name, M);
95   }
96 
97   Value *Args[] = { SubFunction, SubfunctionParam, NumberOfThreads, LowerBound,
98                     UpperBound, Stride, };
99 
100   Builder.CreateCall(F, Args);
101 }
102 
103 Value *
104 OMPGenerator::createCallLoopNext(Value *LowerBoundPtr, Value *UpperBoundPtr) {
105   Module *M = getModule();
106   const char *Name = "GOMP_loop_runtime_next";
107   Function *F = M->getFunction(Name);
108 
109   // If F is not available, declare it.
110   if (!F) {
111     Type *LongPtrTy = PointerType::getUnqual(getIntPtrTy());
112     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
113 
114     Type *Params[] = { LongPtrTy, LongPtrTy, };
115 
116     FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false);
117     F = Function::Create(Ty, Linkage, Name, M);
118   }
119 
120   Value *Args[] = { LowerBoundPtr, UpperBoundPtr, };
121 
122   Value *Return = Builder.CreateCall(F, Args);
123   Return = Builder.CreateICmpNE(
124       Return, Builder.CreateZExt(Builder.getFalse(), Return->getType()));
125   return Return;
126 }
127 
128 void OMPGenerator::createCallParallelEnd() {
129   const char *Name = "GOMP_parallel_end";
130   Module *M = getModule();
131   Function *F = M->getFunction(Name);
132 
133   // If F is not available, declare it.
134   if (!F) {
135     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
136 
137     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
138     F = Function::Create(Ty, Linkage, Name, M);
139   }
140 
141   Builder.CreateCall(F);
142 }
143 
144 void OMPGenerator::createCallLoopEndNowait() {
145   const char *Name = "GOMP_loop_end_nowait";
146   Module *M = getModule();
147   Function *F = M->getFunction(Name);
148 
149   // If F is not available, declare it.
150   if (!F) {
151     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
152 
153     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
154     F = Function::Create(Ty, Linkage, Name, M);
155   }
156 
157   Builder.CreateCall(F);
158 }
159 
160 IntegerType *OMPGenerator::getIntPtrTy() {
161   return P->getAnalysis<DataLayout>().getIntPtrType(Builder.getContext());
162 }
163 
164 Module *OMPGenerator::getModule() {
165   return Builder.GetInsertBlock()->getParent()->getParent();
166 }
167 
168 Function *OMPGenerator::createSubfunctionDefinition() {
169   Module *M = getModule();
170   Function *F = Builder.GetInsertBlock()->getParent();
171   std::vector<Type *> Arguments(1, Builder.getInt8PtrTy());
172   FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
173   Function *FN = Function::Create(FT, Function::InternalLinkage,
174                                   F->getName() + ".omp_subfn", M);
175   // Do not run any polly pass on the new function.
176   P->getAnalysis<polly::ScopDetection>().markFunctionAsInvalid(FN);
177 
178   Function::arg_iterator AI = FN->arg_begin();
179   AI->setName("omp.userContext");
180 
181   return FN;
182 }
183 
184 Value *OMPGenerator::loadValuesIntoStruct(SetVector<Value *> &Values) {
185   std::vector<Type *> Members;
186 
187   for (unsigned i = 0; i < Values.size(); i++)
188     Members.push_back(Values[i]->getType());
189 
190   StructType *Ty = StructType::get(Builder.getContext(), Members);
191   Value *Struct = Builder.CreateAlloca(Ty, 0, "omp.userContext");
192 
193   for (unsigned i = 0; i < Values.size(); i++) {
194     Value *Address = Builder.CreateStructGEP(Struct, i);
195     Builder.CreateStore(Values[i], Address);
196   }
197 
198   return Struct;
199 }
200 
201 void OMPGenerator::extractValuesFromStruct(
202     SetVector<Value *> OldValues, Value *Struct, ValueToValueMapTy &Map) {
203   for (unsigned i = 0; i < OldValues.size(); i++) {
204     Value *Address = Builder.CreateStructGEP(Struct, i);
205     Value *NewValue = Builder.CreateLoad(Address);
206     Map.insert(std::make_pair(OldValues[i], NewValue));
207   }
208 }
209 
210 Value *OMPGenerator::createSubfunction(
211     Value *Stride, Value *StructData, SetVector<Value *> Data,
212     ValueToValueMapTy &Map, Function **SubFunction) {
213   Function *FN = createSubfunctionDefinition();
214 
215   BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *LoadIVBoundsBB,
216       *AfterBB;
217   Value *LowerBoundPtr, *UpperBoundPtr, *UserContext, *Ret1, *HasNextSchedule,
218       *LowerBound, *UpperBound, *IV;
219   Type *IntPtrTy = getIntPtrTy();
220   LLVMContext &Context = FN->getContext();
221 
222   // Store the previous basic block.
223   PrevBB = Builder.GetInsertBlock();
224 
225   // Create basic blocks.
226   HeaderBB = BasicBlock::Create(Context, "omp.setup", FN);
227   ExitBB = BasicBlock::Create(Context, "omp.exit", FN);
228   CheckNextBB = BasicBlock::Create(Context, "omp.checkNext", FN);
229   LoadIVBoundsBB = BasicBlock::Create(Context, "omp.loadIVBounds", FN);
230 
231   DominatorTree &DT = P->getAnalysis<DominatorTree>();
232   DT.addNewBlock(HeaderBB, PrevBB);
233   DT.addNewBlock(ExitBB, HeaderBB);
234   DT.addNewBlock(CheckNextBB, HeaderBB);
235   DT.addNewBlock(LoadIVBoundsBB, HeaderBB);
236 
237   // Fill up basic block HeaderBB.
238   Builder.SetInsertPoint(HeaderBB);
239   LowerBoundPtr = Builder.CreateAlloca(IntPtrTy, 0, "omp.lowerBoundPtr");
240   UpperBoundPtr = Builder.CreateAlloca(IntPtrTy, 0, "omp.upperBoundPtr");
241   UserContext = Builder.CreateBitCast(FN->arg_begin(), StructData->getType(),
242                                       "omp.userContext");
243 
244   extractValuesFromStruct(Data, UserContext, Map);
245   Builder.CreateBr(CheckNextBB);
246 
247   // Add code to check if another set of iterations will be executed.
248   Builder.SetInsertPoint(CheckNextBB);
249   Ret1 = createCallLoopNext(LowerBoundPtr, UpperBoundPtr);
250   HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(),
251                                         "omp.hasNextScheduleBlock");
252   Builder.CreateCondBr(HasNextSchedule, LoadIVBoundsBB, ExitBB);
253 
254   // Add code to to load the iv bounds for this set of iterations.
255   Builder.SetInsertPoint(LoadIVBoundsBB);
256   LowerBound = Builder.CreateLoad(LowerBoundPtr, "omp.lowerBound");
257   UpperBound = Builder.CreateLoad(UpperBoundPtr, "omp.upperBound");
258 
259   // Subtract one as the upper bound provided by openmp is a < comparison
260   // whereas the codegenForSequential function creates a <= comparison.
261   UpperBound = Builder.CreateSub(UpperBound, ConstantInt::get(IntPtrTy, 1),
262                                  "omp.upperBoundAdjusted");
263 
264   Builder.CreateBr(CheckNextBB);
265   Builder.SetInsertPoint(--Builder.GetInsertPoint());
266   IV = createLoop(LowerBound, UpperBound, Stride, Builder, P, AfterBB,
267                   ICmpInst::ICMP_SLE);
268 
269   BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
270   Builder.SetInsertPoint(AfterBB->begin());
271 
272   // Add code to terminate this openmp subfunction.
273   Builder.SetInsertPoint(ExitBB);
274   createCallLoopEndNowait();
275   Builder.CreateRetVoid();
276 
277   Builder.SetInsertPoint(LoopBody);
278   *SubFunction = FN;
279 
280   return IV;
281 }
282 
283 Value *OMPGenerator::createParallelLoop(
284     Value *LowerBound, Value *UpperBound, Value *Stride,
285     SetVector<Value *> &Values, ValueToValueMapTy &Map,
286     BasicBlock::iterator *LoopBody) {
287   Value *Struct, *IV, *SubfunctionParam, *NumberOfThreads;
288   Function *SubFunction;
289 
290   Struct = loadValuesIntoStruct(Values);
291 
292   BasicBlock::iterator PrevInsertPoint = Builder.GetInsertPoint();
293   IV = createSubfunction(Stride, Struct, Values, Map, &SubFunction);
294   *LoopBody = Builder.GetInsertPoint();
295   Builder.SetInsertPoint(PrevInsertPoint);
296 
297   // Create call for GOMP_parallel_loop_runtime_start.
298   SubfunctionParam =
299       Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(), "omp_data");
300 
301   NumberOfThreads = Builder.getInt32(0);
302 
303   // Add one as the upper bound provided by openmp is a < comparison
304   // whereas the codegenForSequential function creates a <= comparison.
305   UpperBound =
306       Builder.CreateAdd(UpperBound, ConstantInt::get(getIntPtrTy(), 1));
307 
308   createCallParallelLoopStart(SubFunction, SubfunctionParam, NumberOfThreads,
309                               LowerBound, UpperBound, Stride);
310   Builder.CreateCall(SubFunction, SubfunctionParam);
311   createCallParallelEnd();
312 
313   return IV;
314 }
315