1 //===------ LoopGenerators.cpp -  IR helper to create loops ---------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains functions to create scalar and OpenMP parallel loops
11 // as LLVM-IR.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "polly/ScopDetection.h"
16 #include "polly/CodeGen/LoopGenerators.h"
17 
18 #include "llvm/Module.h"
19 #include "llvm/Analysis/Dominators.h"
20 #include "llvm/DataLayout.h"
21 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
22 
23 using namespace llvm;
24 using namespace polly;
25 
26 Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
27                          IRBuilder<> &Builder, Pass *P,
28                          BasicBlock *&AfterBlock,
29                          ICmpInst::Predicate Predicate) {
30   DominatorTree &DT = P->getAnalysis<DominatorTree>();
31   Function *F = Builder.GetInsertBlock()->getParent();
32   LLVMContext &Context = F->getContext();
33 
34   BasicBlock *PreheaderBB = Builder.GetInsertBlock();
35   BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F);
36   BasicBlock *BodyBB = BasicBlock::Create(Context, "polly.loop_body", F);
37   BasicBlock *AfterBB = SplitBlock(PreheaderBB, Builder.GetInsertPoint()++, P);
38   AfterBB->setName("polly.loop_after");
39 
40   PreheaderBB->getTerminator()->setSuccessor(0, HeaderBB);
41   DT.addNewBlock(HeaderBB, PreheaderBB);
42 
43   Builder.SetInsertPoint(HeaderBB);
44 
45   // Use the type of upper and lower bound.
46   assert(LB->getType() == UB->getType()
47          && "Different types for upper and lower bound.");
48 
49   IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType());
50   assert(LoopIVType && "UB is not integer?");
51 
52   // IV
53   PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.loopiv");
54   IV->addIncoming(LB, PreheaderBB);
55 
56   Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
57   Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.next_loopiv");
58 
59   // Exit condition.
60   Value *CMP;
61   CMP = Builder.CreateICmp(Predicate, IV, UB);
62 
63   Builder.CreateCondBr(CMP, BodyBB, AfterBB);
64   DT.addNewBlock(BodyBB, HeaderBB);
65 
66   Builder.SetInsertPoint(BodyBB);
67   Builder.CreateBr(HeaderBB);
68   IV->addIncoming(IncrementedIV, BodyBB);
69   DT.changeImmediateDominator(AfterBB, HeaderBB);
70 
71   Builder.SetInsertPoint(BodyBB->begin());
72   AfterBlock = AfterBB;
73 
74   return IV;
75 }
76 
77 void OMPGenerator::createCallParallelLoopStart(Value *SubFunction,
78                                                Value *SubfunctionParam,
79                                                Value *NumberOfThreads,
80                                                Value *LowerBound,
81                                                Value *UpperBound,
82                                                Value *Stride) {
83   Module *M = getModule();
84   const char *Name = "GOMP_parallel_loop_runtime_start";
85   Function *F = M->getFunction(Name);
86 
87   // If F is not available, declare it.
88   if (!F) {
89     Type *LongTy = getIntPtrTy();
90     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
91 
92     Type *Params[] = {
93       PointerType::getUnqual(FunctionType::get(Builder.getVoidTy(),
94                                                Builder.getInt8PtrTy(),
95                                                false)),
96       Builder.getInt8PtrTy(),
97       Builder.getInt32Ty(),
98       LongTy,
99       LongTy,
100       LongTy,
101     };
102 
103     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
104     F = Function::Create(Ty, Linkage, Name, M);
105   }
106 
107   Value *Args[] = {
108     SubFunction,
109     SubfunctionParam,
110     NumberOfThreads,
111     LowerBound,
112     UpperBound,
113     Stride,
114   };
115 
116   Builder.CreateCall(F, Args);
117 }
118 
119 Value *OMPGenerator::createCallLoopNext(Value *LowerBoundPtr,
120                                         Value *UpperBoundPtr) {
121   Module *M = getModule();
122   const char *Name = "GOMP_loop_runtime_next";
123   Function *F = M->getFunction(Name);
124 
125   // If F is not available, declare it.
126   if (!F) {
127     Type *LongPtrTy = PointerType::getUnqual(getIntPtrTy());
128     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
129 
130     Type *Params[] = {
131       LongPtrTy,
132       LongPtrTy,
133     };
134 
135     FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false);
136     F = Function::Create(Ty, Linkage, Name, M);
137   }
138 
139   Value *Args[] = {
140     LowerBoundPtr,
141     UpperBoundPtr,
142   };
143 
144   Value *Return = Builder.CreateCall(F, Args);
145   Return = Builder.CreateICmpNE(Return, Builder.CreateZExt(Builder.getFalse(),
146                                                            Return->getType()));
147   return Return;
148 }
149 
150 void OMPGenerator::createCallParallelEnd() {
151   const char *Name = "GOMP_parallel_end";
152   Module *M = getModule();
153   Function *F = M->getFunction(Name);
154 
155   // If F is not available, declare it.
156   if (!F) {
157     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
158 
159     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
160     F = Function::Create(Ty, Linkage, Name, M);
161   }
162 
163   Builder.CreateCall(F);
164 }
165 
166 void OMPGenerator::createCallLoopEndNowait() {
167   const char *Name = "GOMP_loop_end_nowait";
168   Module *M = getModule();
169   Function *F = M->getFunction(Name);
170 
171   // If F is not available, declare it.
172   if (!F) {
173     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
174 
175     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
176     F = Function::Create(Ty, Linkage, Name, M);
177   }
178 
179   Builder.CreateCall(F);
180 }
181 
182 IntegerType *OMPGenerator::getIntPtrTy() {
183   // FIXME: This might need to get a proper address space. Hard code 0 for now.
184   return P->getAnalysis<DataLayout>().getIntPtrType(Builder.getContext(), 0u);
185 }
186 
187 Module *OMPGenerator::getModule() {
188   return Builder.GetInsertBlock()->getParent()->getParent();
189 }
190 
191 Function *OMPGenerator::createSubfunctionDefinition() {
192   Module *M = getModule();
193   Function *F = Builder.GetInsertBlock()->getParent();
194   std::vector<Type*> Arguments(1, Builder.getInt8PtrTy());
195   FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
196   Function *FN = Function::Create(FT, Function::InternalLinkage,
197                                   F->getName() + ".omp_subfn", M);
198   // Do not run any polly pass on the new function.
199   P->getAnalysis<polly::ScopDetection>().markFunctionAsInvalid(FN);
200 
201   Function::arg_iterator AI = FN->arg_begin();
202   AI->setName("omp.userContext");
203 
204   return FN;
205 }
206 
207 Value *OMPGenerator::loadValuesIntoStruct(SetVector<Value*> &Values) {
208   std::vector<Type*> Members;
209 
210   for (unsigned i = 0; i < Values.size(); i++)
211     Members.push_back(Values[i]->getType());
212 
213   StructType *Ty = StructType::get(Builder.getContext(), Members);
214   Value *Struct = Builder.CreateAlloca(Ty, 0, "omp.userContext");
215 
216   for (unsigned i = 0; i < Values.size(); i++) {
217     Value *Address = Builder.CreateStructGEP(Struct, i);
218     Builder.CreateStore(Values[i], Address);
219   }
220 
221   return Struct;
222 }
223 
224 void OMPGenerator::extractValuesFromStruct(SetVector<Value*> OldValues,
225                                            Value *Struct,
226                                            ValueToValueMapTy &Map) {
227   for (unsigned i = 0; i < OldValues.size(); i++) {
228     Value *Address = Builder.CreateStructGEP(Struct, i);
229     Value *NewValue = Builder.CreateLoad(Address);
230     Map.insert(std::make_pair<Value*, Value*>(OldValues[i], NewValue));
231   }
232 }
233 
234 Value *OMPGenerator::createSubfunction(Value *Stride, Value *StructData,
235                                        SetVector<Value*> Data,
236                                        ValueToValueMapTy &Map,
237                                        Function **SubFunction) {
238   Function *FN = createSubfunctionDefinition();
239 
240   BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *LoadIVBoundsBB,
241              *AfterBB;
242   Value *LowerBoundPtr, *UpperBoundPtr, *UserContext, *Ret1, *HasNextSchedule,
243         *LowerBound, *UpperBound, *IV;
244   Type *IntPtrTy = getIntPtrTy();
245   LLVMContext &Context = FN->getContext();
246 
247   // Store the previous basic block.
248   PrevBB = Builder.GetInsertBlock();
249 
250   // Create basic blocks.
251   HeaderBB = BasicBlock::Create(Context, "omp.setup", FN);
252   ExitBB = BasicBlock::Create(Context, "omp.exit", FN);
253   CheckNextBB = BasicBlock::Create(Context, "omp.checkNext", FN);
254   LoadIVBoundsBB = BasicBlock::Create(Context, "omp.loadIVBounds", FN);
255 
256   DominatorTree &DT = P->getAnalysis<DominatorTree>();
257   DT.addNewBlock(HeaderBB, PrevBB);
258   DT.addNewBlock(ExitBB, HeaderBB);
259   DT.addNewBlock(CheckNextBB, HeaderBB);
260   DT.addNewBlock(LoadIVBoundsBB, HeaderBB);
261 
262   // Fill up basic block HeaderBB.
263   Builder.SetInsertPoint(HeaderBB);
264   LowerBoundPtr = Builder.CreateAlloca(IntPtrTy, 0, "omp.lowerBoundPtr");
265   UpperBoundPtr = Builder.CreateAlloca(IntPtrTy, 0, "omp.upperBoundPtr");
266   UserContext = Builder.CreateBitCast(FN->arg_begin(), StructData->getType(),
267                                       "omp.userContext");
268 
269   extractValuesFromStruct(Data, UserContext, Map);
270   Builder.CreateBr(CheckNextBB);
271 
272   // Add code to check if another set of iterations will be executed.
273   Builder.SetInsertPoint(CheckNextBB);
274   Ret1 = createCallLoopNext(LowerBoundPtr, UpperBoundPtr);
275   HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(),
276                                         "omp.hasNextScheduleBlock");
277   Builder.CreateCondBr(HasNextSchedule, LoadIVBoundsBB, ExitBB);
278 
279   // Add code to to load the iv bounds for this set of iterations.
280   Builder.SetInsertPoint(LoadIVBoundsBB);
281   LowerBound = Builder.CreateLoad(LowerBoundPtr, "omp.lowerBound");
282   UpperBound = Builder.CreateLoad(UpperBoundPtr, "omp.upperBound");
283 
284   // Subtract one as the upper bound provided by openmp is a < comparison
285   // whereas the codegenForSequential function creates a <= comparison.
286   UpperBound = Builder.CreateSub(UpperBound, ConstantInt::get(IntPtrTy, 1),
287                                  "omp.upperBoundAdjusted");
288 
289   Builder.CreateBr(CheckNextBB);
290   Builder.SetInsertPoint(--Builder.GetInsertPoint());
291   IV = createLoop(LowerBound, UpperBound, Stride, Builder, P, AfterBB,
292                   ICmpInst::ICMP_SLE);
293 
294   BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
295   Builder.SetInsertPoint(AfterBB->begin());
296 
297   // Add code to terminate this openmp subfunction.
298   Builder.SetInsertPoint(ExitBB);
299   createCallLoopEndNowait();
300   Builder.CreateRetVoid();
301 
302   Builder.SetInsertPoint(LoopBody);
303   *SubFunction = FN;
304 
305   return IV;
306 }
307 
308 Value *OMPGenerator::createParallelLoop(Value *LowerBound, Value *UpperBound,
309                                         Value *Stride,
310                                         SetVector<Value*> &Values,
311                                         ValueToValueMapTy &Map,
312                                         BasicBlock::iterator *LoopBody) {
313   Value *Struct, *IV, *SubfunctionParam, *NumberOfThreads;
314   Function *SubFunction;
315 
316   Struct = loadValuesIntoStruct(Values);
317 
318   BasicBlock::iterator PrevInsertPoint = Builder.GetInsertPoint();
319   IV = createSubfunction(Stride, Struct, Values, Map, &SubFunction);
320   *LoopBody = Builder.GetInsertPoint();
321   Builder.SetInsertPoint(PrevInsertPoint);
322 
323   // Create call for GOMP_parallel_loop_runtime_start.
324   SubfunctionParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(),
325                                            "omp_data");
326 
327   NumberOfThreads = Builder.getInt32(0);
328 
329   // Add one as the upper bound provided by openmp is a < comparison
330   // whereas the codegenForSequential function creates a <= comparison.
331   UpperBound = Builder.CreateAdd(UpperBound,
332                                  ConstantInt::get(getIntPtrTy(), 1));
333 
334   createCallParallelLoopStart(SubFunction, SubfunctionParam, NumberOfThreads,
335                               LowerBound, UpperBound, Stride);
336   Builder.CreateCall(SubFunction, SubfunctionParam);
337   createCallParallelEnd();
338 
339   return IV;
340 }
341