1*89251edeSMichael Kruse //===------ LoopGeneratorsKMP.cpp - IR helper to create loops -------------===//
2*89251edeSMichael Kruse //
3*89251edeSMichael Kruse // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*89251edeSMichael Kruse // See https://llvm.org/LICENSE.txt for license information.
5*89251edeSMichael Kruse // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*89251edeSMichael Kruse //
7*89251edeSMichael Kruse //===----------------------------------------------------------------------===//
8*89251edeSMichael Kruse //
9*89251edeSMichael Kruse // This file contains functions to create parallel loops as LLVM-IR.
10*89251edeSMichael Kruse //
11*89251edeSMichael Kruse //===----------------------------------------------------------------------===//
12*89251edeSMichael Kruse 
13*89251edeSMichael Kruse #include "polly/CodeGen/LoopGeneratorsKMP.h"
14*89251edeSMichael Kruse #include "polly/Options.h"
15*89251edeSMichael Kruse #include "polly/ScopDetection.h"
16*89251edeSMichael Kruse #include "llvm/Analysis/LoopInfo.h"
17*89251edeSMichael Kruse #include "llvm/IR/DataLayout.h"
18*89251edeSMichael Kruse #include "llvm/IR/Dominators.h"
19*89251edeSMichael Kruse #include "llvm/IR/Module.h"
20*89251edeSMichael Kruse #include "llvm/Support/CommandLine.h"
21*89251edeSMichael Kruse #include "llvm/Transforms/Utils/BasicBlockUtils.h"
22*89251edeSMichael Kruse 
23*89251edeSMichael Kruse using namespace llvm;
24*89251edeSMichael Kruse using namespace polly;
25*89251edeSMichael Kruse 
26*89251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallSpawnThreads(Value *SubFn,
27*89251edeSMichael Kruse                                                       Value *SubFnParam,
28*89251edeSMichael Kruse                                                       Value *LB, Value *UB,
29*89251edeSMichael Kruse                                                       Value *Stride) {
30*89251edeSMichael Kruse   const std::string Name = "__kmpc_fork_call";
31*89251edeSMichael Kruse   Function *F = M->getFunction(Name);
32*89251edeSMichael Kruse   Type *KMPCMicroTy = M->getTypeByName("kmpc_micro");
33*89251edeSMichael Kruse 
34*89251edeSMichael Kruse   if (!KMPCMicroTy) {
35*89251edeSMichael Kruse     // void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...)
36*89251edeSMichael Kruse     Type *MicroParams[] = {Builder.getInt32Ty()->getPointerTo(),
37*89251edeSMichael Kruse                            Builder.getInt32Ty()->getPointerTo()};
38*89251edeSMichael Kruse 
39*89251edeSMichael Kruse     KMPCMicroTy = FunctionType::get(Builder.getVoidTy(), MicroParams, true);
40*89251edeSMichael Kruse   }
41*89251edeSMichael Kruse 
42*89251edeSMichael Kruse   // If F is not available, declare it.
43*89251edeSMichael Kruse   if (!F) {
44*89251edeSMichael Kruse     StructType *IdentTy = M->getTypeByName("struct.ident_t");
45*89251edeSMichael Kruse 
46*89251edeSMichael Kruse     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
47*89251edeSMichael Kruse     Type *Params[] = {IdentTy->getPointerTo(), Builder.getInt32Ty(),
48*89251edeSMichael Kruse                       KMPCMicroTy->getPointerTo()};
49*89251edeSMichael Kruse 
50*89251edeSMichael Kruse     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, true);
51*89251edeSMichael Kruse     F = Function::Create(Ty, Linkage, Name, M);
52*89251edeSMichael Kruse   }
53*89251edeSMichael Kruse 
54*89251edeSMichael Kruse   Value *Task = Builder.CreatePointerBitCastOrAddrSpaceCast(
55*89251edeSMichael Kruse       SubFn, KMPCMicroTy->getPointerTo());
56*89251edeSMichael Kruse 
57*89251edeSMichael Kruse   Value *Args[] = {SourceLocationInfo,
58*89251edeSMichael Kruse                    Builder.getInt32(4) /* Number of arguments (w/o Task) */,
59*89251edeSMichael Kruse                    Task,
60*89251edeSMichael Kruse                    LB,
61*89251edeSMichael Kruse                    UB,
62*89251edeSMichael Kruse                    Stride,
63*89251edeSMichael Kruse                    SubFnParam};
64*89251edeSMichael Kruse 
65*89251edeSMichael Kruse   Builder.CreateCall(F, Args);
66*89251edeSMichael Kruse }
67*89251edeSMichael Kruse 
68*89251edeSMichael Kruse void ParallelLoopGeneratorKMP::deployParallelExecution(Value *SubFn,
69*89251edeSMichael Kruse                                                        Value *SubFnParam,
70*89251edeSMichael Kruse                                                        Value *LB, Value *UB,
71*89251edeSMichael Kruse                                                        Value *Stride) {
72*89251edeSMichael Kruse   // Inform OpenMP runtime about the number of threads if greater than zero
73*89251edeSMichael Kruse   if (PollyNumThreads > 0) {
74*89251edeSMichael Kruse     Value *GlobalThreadID = createCallGlobalThreadNum();
75*89251edeSMichael Kruse     createCallPushNumThreads(GlobalThreadID, Builder.getInt32(PollyNumThreads));
76*89251edeSMichael Kruse   }
77*89251edeSMichael Kruse 
78*89251edeSMichael Kruse   // Tell the runtime we start a parallel loop
79*89251edeSMichael Kruse   createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
80*89251edeSMichael Kruse }
81*89251edeSMichael Kruse 
82*89251edeSMichael Kruse Function *ParallelLoopGeneratorKMP::prepareSubFnDefinition(Function *F) const {
83*89251edeSMichael Kruse   std::vector<Type *> Arguments = {Builder.getInt32Ty()->getPointerTo(),
84*89251edeSMichael Kruse                                    Builder.getInt32Ty()->getPointerTo(),
85*89251edeSMichael Kruse                                    LongType,
86*89251edeSMichael Kruse                                    LongType,
87*89251edeSMichael Kruse                                    LongType,
88*89251edeSMichael Kruse                                    Builder.getInt8PtrTy()};
89*89251edeSMichael Kruse 
90*89251edeSMichael Kruse   FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
91*89251edeSMichael Kruse   Function *SubFn = Function::Create(FT, Function::InternalLinkage,
92*89251edeSMichael Kruse                                      F->getName() + "_polly_subfn", M);
93*89251edeSMichael Kruse   // Name the function's arguments
94*89251edeSMichael Kruse   Function::arg_iterator AI = SubFn->arg_begin();
95*89251edeSMichael Kruse   AI->setName("polly.kmpc.global_tid");
96*89251edeSMichael Kruse   std::advance(AI, 1);
97*89251edeSMichael Kruse   AI->setName("polly.kmpc.bound_tid");
98*89251edeSMichael Kruse   std::advance(AI, 1);
99*89251edeSMichael Kruse   AI->setName("polly.kmpc.lb");
100*89251edeSMichael Kruse   std::advance(AI, 1);
101*89251edeSMichael Kruse   AI->setName("polly.kmpc.ub");
102*89251edeSMichael Kruse   std::advance(AI, 1);
103*89251edeSMichael Kruse   AI->setName("polly.kmpc.inc");
104*89251edeSMichael Kruse   std::advance(AI, 1);
105*89251edeSMichael Kruse   AI->setName("polly.kmpc.shared");
106*89251edeSMichael Kruse 
107*89251edeSMichael Kruse   return SubFn;
108*89251edeSMichael Kruse }
109*89251edeSMichael Kruse 
110*89251edeSMichael Kruse // Create a subfunction of the following (preliminary) structure:
111*89251edeSMichael Kruse //
112*89251edeSMichael Kruse //    PrevBB
113*89251edeSMichael Kruse //       |
114*89251edeSMichael Kruse //       v
115*89251edeSMichael Kruse //    HeaderBB
116*89251edeSMichael Kruse //       |   _____
117*89251edeSMichael Kruse //       v  v    |
118*89251edeSMichael Kruse //   CheckNextBB  PreHeaderBB
119*89251edeSMichael Kruse //       |\       |
120*89251edeSMichael Kruse //       | \______/
121*89251edeSMichael Kruse //       |
122*89251edeSMichael Kruse //       v
123*89251edeSMichael Kruse //     ExitBB
124*89251edeSMichael Kruse //
125*89251edeSMichael Kruse // HeaderBB will hold allocations, loading of variables and kmp-init calls.
126*89251edeSMichael Kruse // CheckNextBB will check for more work (dynamic) or will be "empty" (static).
127*89251edeSMichael Kruse // If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB.
128*89251edeSMichael Kruse // PreHeaderBB loads the new boundaries (& will lead to the loop body later on).
129*89251edeSMichael Kruse // Just like CheckNextBB: PreHeaderBB is empty in the static scheduling case.
130*89251edeSMichael Kruse // ExitBB marks the end of the parallel execution.
131*89251edeSMichael Kruse // The possibly empty BasicBlocks will automatically be removed.
132*89251edeSMichael Kruse std::tuple<Value *, Function *>
133*89251edeSMichael Kruse ParallelLoopGeneratorKMP::createSubFn(Value *StrideNotUsed,
134*89251edeSMichael Kruse                                       AllocaInst *StructData,
135*89251edeSMichael Kruse                                       SetVector<Value *> Data, ValueMapT &Map) {
136*89251edeSMichael Kruse   Function *SubFn = createSubFnDefinition();
137*89251edeSMichael Kruse   LLVMContext &Context = SubFn->getContext();
138*89251edeSMichael Kruse 
139*89251edeSMichael Kruse   // Store the previous basic block.
140*89251edeSMichael Kruse   BasicBlock *PrevBB = Builder.GetInsertBlock();
141*89251edeSMichael Kruse 
142*89251edeSMichael Kruse   // Create basic blocks.
143*89251edeSMichael Kruse   BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
144*89251edeSMichael Kruse   BasicBlock *ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
145*89251edeSMichael Kruse   BasicBlock *CheckNextBB =
146*89251edeSMichael Kruse       BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
147*89251edeSMichael Kruse   BasicBlock *PreHeaderBB =
148*89251edeSMichael Kruse       BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
149*89251edeSMichael Kruse 
150*89251edeSMichael Kruse   DT.addNewBlock(HeaderBB, PrevBB);
151*89251edeSMichael Kruse   DT.addNewBlock(ExitBB, HeaderBB);
152*89251edeSMichael Kruse   DT.addNewBlock(CheckNextBB, HeaderBB);
153*89251edeSMichael Kruse   DT.addNewBlock(PreHeaderBB, HeaderBB);
154*89251edeSMichael Kruse 
155*89251edeSMichael Kruse   // Fill up basic block HeaderBB.
156*89251edeSMichael Kruse   Builder.SetInsertPoint(HeaderBB);
157*89251edeSMichael Kruse   Value *LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr");
158*89251edeSMichael Kruse   Value *UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr");
159*89251edeSMichael Kruse   Value *IsLastPtr = Builder.CreateAlloca(Builder.getInt32Ty(), nullptr,
160*89251edeSMichael Kruse                                           "polly.par.lastIterPtr");
161*89251edeSMichael Kruse   Value *StridePtr =
162*89251edeSMichael Kruse       Builder.CreateAlloca(LongType, nullptr, "polly.par.StridePtr");
163*89251edeSMichael Kruse 
164*89251edeSMichael Kruse   // Get iterator for retrieving the previously defined parameters.
165*89251edeSMichael Kruse   Function::arg_iterator AI = SubFn->arg_begin();
166*89251edeSMichael Kruse   // First argument holds "global thread ID".
167*89251edeSMichael Kruse   Value *IDPtr = &*AI;
168*89251edeSMichael Kruse   // Skip "bound thread ID" since it is not used (but had to be defined).
169*89251edeSMichael Kruse   std::advance(AI, 2);
170*89251edeSMichael Kruse   // Move iterator to: LB, UB, Stride, Shared variable struct.
171*89251edeSMichael Kruse   Value *LB = &*AI;
172*89251edeSMichael Kruse   std::advance(AI, 1);
173*89251edeSMichael Kruse   Value *UB = &*AI;
174*89251edeSMichael Kruse   std::advance(AI, 1);
175*89251edeSMichael Kruse   Value *Stride = &*AI;
176*89251edeSMichael Kruse   std::advance(AI, 1);
177*89251edeSMichael Kruse   Value *Shared = &*AI;
178*89251edeSMichael Kruse 
179*89251edeSMichael Kruse   Value *UserContext = Builder.CreateBitCast(Shared, StructData->getType(),
180*89251edeSMichael Kruse                                              "polly.par.userContext");
181*89251edeSMichael Kruse 
182*89251edeSMichael Kruse   extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext,
183*89251edeSMichael Kruse                           Map);
184*89251edeSMichael Kruse 
185*89251edeSMichael Kruse   const int Alignment = (is64BitArch()) ? 8 : 4;
186*89251edeSMichael Kruse   Value *ID =
187*89251edeSMichael Kruse       Builder.CreateAlignedLoad(IDPtr, Alignment, "polly.par.global_tid");
188*89251edeSMichael Kruse 
189*89251edeSMichael Kruse   Builder.CreateAlignedStore(LB, LBPtr, Alignment);
190*89251edeSMichael Kruse   Builder.CreateAlignedStore(UB, UBPtr, Alignment);
191*89251edeSMichael Kruse   Builder.CreateAlignedStore(Builder.getInt32(0), IsLastPtr, Alignment);
192*89251edeSMichael Kruse   Builder.CreateAlignedStore(Stride, StridePtr, Alignment);
193*89251edeSMichael Kruse 
194*89251edeSMichael Kruse   // Subtract one as the upper bound provided by openmp is a < comparison
195*89251edeSMichael Kruse   // whereas the codegenForSequential function creates a <= comparison.
196*89251edeSMichael Kruse   Value *AdjustedUB = Builder.CreateAdd(UB, ConstantInt::get(LongType, -1),
197*89251edeSMichael Kruse                                         "polly.indvar.UBAdjusted");
198*89251edeSMichael Kruse 
199*89251edeSMichael Kruse   Value *ChunkSize =
200*89251edeSMichael Kruse       ConstantInt::get(LongType, std::max<int>(PollyChunkSize, 1));
201*89251edeSMichael Kruse 
202*89251edeSMichael Kruse   switch (PollyScheduling) {
203*89251edeSMichael Kruse   case OMPGeneralSchedulingType::Dynamic:
204*89251edeSMichael Kruse   case OMPGeneralSchedulingType::Guided:
205*89251edeSMichael Kruse   case OMPGeneralSchedulingType::Runtime:
206*89251edeSMichael Kruse     // "DYNAMIC" scheduling types are handled below (including 'runtime')
207*89251edeSMichael Kruse     {
208*89251edeSMichael Kruse       UB = AdjustedUB;
209*89251edeSMichael Kruse       createCallDispatchInit(ID, LB, UB, Stride, ChunkSize);
210*89251edeSMichael Kruse       Value *HasWork =
211*89251edeSMichael Kruse           createCallDispatchNext(ID, IsLastPtr, LBPtr, UBPtr, StridePtr);
212*89251edeSMichael Kruse       Value *HasIteration =
213*89251edeSMichael Kruse           Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_EQ, HasWork,
214*89251edeSMichael Kruse                              Builder.getInt32(1), "polly.hasIteration");
215*89251edeSMichael Kruse       Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB);
216*89251edeSMichael Kruse 
217*89251edeSMichael Kruse       Builder.SetInsertPoint(CheckNextBB);
218*89251edeSMichael Kruse       HasWork = createCallDispatchNext(ID, IsLastPtr, LBPtr, UBPtr, StridePtr);
219*89251edeSMichael Kruse       HasIteration =
220*89251edeSMichael Kruse           Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_EQ, HasWork,
221*89251edeSMichael Kruse                              Builder.getInt32(1), "polly.hasWork");
222*89251edeSMichael Kruse       Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB);
223*89251edeSMichael Kruse 
224*89251edeSMichael Kruse       Builder.SetInsertPoint(PreHeaderBB);
225*89251edeSMichael Kruse       LB = Builder.CreateAlignedLoad(LBPtr, Alignment, "polly.indvar.LB");
226*89251edeSMichael Kruse       UB = Builder.CreateAlignedLoad(UBPtr, Alignment, "polly.indvar.UB");
227*89251edeSMichael Kruse     }
228*89251edeSMichael Kruse     break;
229*89251edeSMichael Kruse   case OMPGeneralSchedulingType::StaticChunked:
230*89251edeSMichael Kruse   case OMPGeneralSchedulingType::StaticNonChunked:
231*89251edeSMichael Kruse     // "STATIC" scheduling types are handled below
232*89251edeSMichael Kruse     {
233*89251edeSMichael Kruse       createCallStaticInit(ID, IsLastPtr, LBPtr, UBPtr, StridePtr, ChunkSize);
234*89251edeSMichael Kruse 
235*89251edeSMichael Kruse       LB = Builder.CreateAlignedLoad(LBPtr, Alignment, "polly.indvar.LB");
236*89251edeSMichael Kruse       UB = Builder.CreateAlignedLoad(UBPtr, Alignment, "polly.indvar.UB");
237*89251edeSMichael Kruse 
238*89251edeSMichael Kruse       Value *AdjUBOutOfBounds =
239*89251edeSMichael Kruse           Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SLT, UB, AdjustedUB,
240*89251edeSMichael Kruse                              "polly.adjustedUBOutOfBounds");
241*89251edeSMichael Kruse 
242*89251edeSMichael Kruse       UB = Builder.CreateSelect(AdjUBOutOfBounds, UB, AdjustedUB);
243*89251edeSMichael Kruse       Builder.CreateAlignedStore(UB, UBPtr, Alignment);
244*89251edeSMichael Kruse 
245*89251edeSMichael Kruse       Value *HasIteration = Builder.CreateICmp(
246*89251edeSMichael Kruse           llvm::CmpInst::Predicate::ICMP_SLE, LB, UB, "polly.hasIteration");
247*89251edeSMichael Kruse       Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB);
248*89251edeSMichael Kruse 
249*89251edeSMichael Kruse       Builder.SetInsertPoint(CheckNextBB);
250*89251edeSMichael Kruse       Builder.CreateBr(ExitBB);
251*89251edeSMichael Kruse 
252*89251edeSMichael Kruse       Builder.SetInsertPoint(PreHeaderBB);
253*89251edeSMichael Kruse     }
254*89251edeSMichael Kruse     break;
255*89251edeSMichael Kruse   }
256*89251edeSMichael Kruse 
257*89251edeSMichael Kruse   Builder.CreateBr(CheckNextBB);
258*89251edeSMichael Kruse   Builder.SetInsertPoint(&*--Builder.GetInsertPoint());
259*89251edeSMichael Kruse   BasicBlock *AfterBB;
260*89251edeSMichael Kruse   Value *IV = createLoop(LB, UB, Stride, Builder, LI, DT, AfterBB,
261*89251edeSMichael Kruse                          ICmpInst::ICMP_SLE, nullptr, true,
262*89251edeSMichael Kruse                          /* UseGuard */ false);
263*89251edeSMichael Kruse 
264*89251edeSMichael Kruse   BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
265*89251edeSMichael Kruse 
266*89251edeSMichael Kruse   // Add code to terminate this subfunction.
267*89251edeSMichael Kruse   Builder.SetInsertPoint(ExitBB);
268*89251edeSMichael Kruse   // Static (i.e. non-dynamic) scheduling types, are terminated with a fini-call
269*89251edeSMichael Kruse   if (PollyScheduling == OMPGeneralSchedulingType::StaticChunked) {
270*89251edeSMichael Kruse     createCallStaticFini(ID);
271*89251edeSMichael Kruse   }
272*89251edeSMichael Kruse   Builder.CreateRetVoid();
273*89251edeSMichael Kruse   Builder.SetInsertPoint(&*LoopBody);
274*89251edeSMichael Kruse 
275*89251edeSMichael Kruse   return std::make_tuple(IV, SubFn);
276*89251edeSMichael Kruse }
277*89251edeSMichael Kruse 
278*89251edeSMichael Kruse Value *ParallelLoopGeneratorKMP::createCallGlobalThreadNum() {
279*89251edeSMichael Kruse   const std::string Name = "__kmpc_global_thread_num";
280*89251edeSMichael Kruse   Function *F = M->getFunction(Name);
281*89251edeSMichael Kruse 
282*89251edeSMichael Kruse   // If F is not available, declare it.
283*89251edeSMichael Kruse   if (!F) {
284*89251edeSMichael Kruse     StructType *IdentTy = M->getTypeByName("struct.ident_t");
285*89251edeSMichael Kruse 
286*89251edeSMichael Kruse     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
287*89251edeSMichael Kruse     Type *Params[] = {IdentTy->getPointerTo()};
288*89251edeSMichael Kruse 
289*89251edeSMichael Kruse     FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), Params, false);
290*89251edeSMichael Kruse     F = Function::Create(Ty, Linkage, Name, M);
291*89251edeSMichael Kruse   }
292*89251edeSMichael Kruse 
293*89251edeSMichael Kruse   return Builder.CreateCall(F, {SourceLocationInfo});
294*89251edeSMichael Kruse }
295*89251edeSMichael Kruse 
296*89251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallPushNumThreads(Value *GlobalThreadID,
297*89251edeSMichael Kruse                                                         Value *NumThreads) {
298*89251edeSMichael Kruse   const std::string Name = "__kmpc_push_num_threads";
299*89251edeSMichael Kruse   Function *F = M->getFunction(Name);
300*89251edeSMichael Kruse 
301*89251edeSMichael Kruse   // If F is not available, declare it.
302*89251edeSMichael Kruse   if (!F) {
303*89251edeSMichael Kruse     StructType *IdentTy = M->getTypeByName("struct.ident_t");
304*89251edeSMichael Kruse 
305*89251edeSMichael Kruse     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
306*89251edeSMichael Kruse     Type *Params[] = {IdentTy->getPointerTo(), Builder.getInt32Ty(),
307*89251edeSMichael Kruse                       Builder.getInt32Ty()};
308*89251edeSMichael Kruse 
309*89251edeSMichael Kruse     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
310*89251edeSMichael Kruse     F = Function::Create(Ty, Linkage, Name, M);
311*89251edeSMichael Kruse   }
312*89251edeSMichael Kruse 
313*89251edeSMichael Kruse   Value *Args[] = {SourceLocationInfo, GlobalThreadID, NumThreads};
314*89251edeSMichael Kruse 
315*89251edeSMichael Kruse   Builder.CreateCall(F, Args);
316*89251edeSMichael Kruse }
317*89251edeSMichael Kruse 
318*89251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallStaticInit(Value *GlobalThreadID,
319*89251edeSMichael Kruse                                                     Value *IsLastPtr,
320*89251edeSMichael Kruse                                                     Value *LBPtr, Value *UBPtr,
321*89251edeSMichael Kruse                                                     Value *StridePtr,
322*89251edeSMichael Kruse                                                     Value *ChunkSize) {
323*89251edeSMichael Kruse   const std::string Name =
324*89251edeSMichael Kruse       is64BitArch() ? "__kmpc_for_static_init_8" : "__kmpc_for_static_init_4";
325*89251edeSMichael Kruse   Function *F = M->getFunction(Name);
326*89251edeSMichael Kruse   StructType *IdentTy = M->getTypeByName("struct.ident_t");
327*89251edeSMichael Kruse 
328*89251edeSMichael Kruse   // If F is not available, declare it.
329*89251edeSMichael Kruse   if (!F) {
330*89251edeSMichael Kruse     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
331*89251edeSMichael Kruse 
332*89251edeSMichael Kruse     Type *Params[] = {IdentTy->getPointerTo(),
333*89251edeSMichael Kruse                       Builder.getInt32Ty(),
334*89251edeSMichael Kruse                       Builder.getInt32Ty(),
335*89251edeSMichael Kruse                       Builder.getInt32Ty()->getPointerTo(),
336*89251edeSMichael Kruse                       LongType->getPointerTo(),
337*89251edeSMichael Kruse                       LongType->getPointerTo(),
338*89251edeSMichael Kruse                       LongType->getPointerTo(),
339*89251edeSMichael Kruse                       LongType,
340*89251edeSMichael Kruse                       LongType};
341*89251edeSMichael Kruse 
342*89251edeSMichael Kruse     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
343*89251edeSMichael Kruse     F = Function::Create(Ty, Linkage, Name, M);
344*89251edeSMichael Kruse   }
345*89251edeSMichael Kruse 
346*89251edeSMichael Kruse   // The parameter 'ChunkSize' will hold strictly positive integer values,
347*89251edeSMichael Kruse   // regardless of PollyChunkSize's value
348*89251edeSMichael Kruse   Value *Args[] = {
349*89251edeSMichael Kruse       SourceLocationInfo,
350*89251edeSMichael Kruse       GlobalThreadID,
351*89251edeSMichael Kruse       Builder.getInt32(int(getSchedType(PollyChunkSize, PollyScheduling))),
352*89251edeSMichael Kruse       IsLastPtr,
353*89251edeSMichael Kruse       LBPtr,
354*89251edeSMichael Kruse       UBPtr,
355*89251edeSMichael Kruse       StridePtr,
356*89251edeSMichael Kruse       ConstantInt::get(LongType, 1),
357*89251edeSMichael Kruse       ChunkSize};
358*89251edeSMichael Kruse 
359*89251edeSMichael Kruse   Builder.CreateCall(F, Args);
360*89251edeSMichael Kruse }
361*89251edeSMichael Kruse 
362*89251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallStaticFini(Value *GlobalThreadID) {
363*89251edeSMichael Kruse   const std::string Name = "__kmpc_for_static_fini";
364*89251edeSMichael Kruse   Function *F = M->getFunction(Name);
365*89251edeSMichael Kruse   StructType *IdentTy = M->getTypeByName("struct.ident_t");
366*89251edeSMichael Kruse 
367*89251edeSMichael Kruse   // If F is not available, declare it.
368*89251edeSMichael Kruse   if (!F) {
369*89251edeSMichael Kruse     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
370*89251edeSMichael Kruse     Type *Params[] = {IdentTy->getPointerTo(), Builder.getInt32Ty()};
371*89251edeSMichael Kruse     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
372*89251edeSMichael Kruse     F = Function::Create(Ty, Linkage, Name, M);
373*89251edeSMichael Kruse   }
374*89251edeSMichael Kruse 
375*89251edeSMichael Kruse   Value *Args[] = {SourceLocationInfo, GlobalThreadID};
376*89251edeSMichael Kruse 
377*89251edeSMichael Kruse   Builder.CreateCall(F, Args);
378*89251edeSMichael Kruse }
379*89251edeSMichael Kruse 
380*89251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallDispatchInit(Value *GlobalThreadID,
381*89251edeSMichael Kruse                                                       Value *LB, Value *UB,
382*89251edeSMichael Kruse                                                       Value *Inc,
383*89251edeSMichael Kruse                                                       Value *ChunkSize) {
384*89251edeSMichael Kruse   const std::string Name =
385*89251edeSMichael Kruse       is64BitArch() ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_4";
386*89251edeSMichael Kruse   Function *F = M->getFunction(Name);
387*89251edeSMichael Kruse   StructType *IdentTy = M->getTypeByName("struct.ident_t");
388*89251edeSMichael Kruse 
389*89251edeSMichael Kruse   // If F is not available, declare it.
390*89251edeSMichael Kruse   if (!F) {
391*89251edeSMichael Kruse     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
392*89251edeSMichael Kruse 
393*89251edeSMichael Kruse     Type *Params[] = {IdentTy->getPointerTo(),
394*89251edeSMichael Kruse                       Builder.getInt32Ty(),
395*89251edeSMichael Kruse                       Builder.getInt32Ty(),
396*89251edeSMichael Kruse                       LongType,
397*89251edeSMichael Kruse                       LongType,
398*89251edeSMichael Kruse                       LongType,
399*89251edeSMichael Kruse                       LongType};
400*89251edeSMichael Kruse 
401*89251edeSMichael Kruse     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
402*89251edeSMichael Kruse     F = Function::Create(Ty, Linkage, Name, M);
403*89251edeSMichael Kruse   }
404*89251edeSMichael Kruse 
405*89251edeSMichael Kruse   // The parameter 'ChunkSize' will hold strictly positive integer values,
406*89251edeSMichael Kruse   // regardless of PollyChunkSize's value
407*89251edeSMichael Kruse   Value *Args[] = {
408*89251edeSMichael Kruse       SourceLocationInfo,
409*89251edeSMichael Kruse       GlobalThreadID,
410*89251edeSMichael Kruse       Builder.getInt32(int(getSchedType(PollyChunkSize, PollyScheduling))),
411*89251edeSMichael Kruse       LB,
412*89251edeSMichael Kruse       UB,
413*89251edeSMichael Kruse       Inc,
414*89251edeSMichael Kruse       ChunkSize};
415*89251edeSMichael Kruse 
416*89251edeSMichael Kruse   Builder.CreateCall(F, Args);
417*89251edeSMichael Kruse }
418*89251edeSMichael Kruse 
419*89251edeSMichael Kruse Value *ParallelLoopGeneratorKMP::createCallDispatchNext(Value *GlobalThreadID,
420*89251edeSMichael Kruse                                                         Value *IsLastPtr,
421*89251edeSMichael Kruse                                                         Value *LBPtr,
422*89251edeSMichael Kruse                                                         Value *UBPtr,
423*89251edeSMichael Kruse                                                         Value *StridePtr) {
424*89251edeSMichael Kruse   const std::string Name =
425*89251edeSMichael Kruse       is64BitArch() ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_4";
426*89251edeSMichael Kruse   Function *F = M->getFunction(Name);
427*89251edeSMichael Kruse   StructType *IdentTy = M->getTypeByName("struct.ident_t");
428*89251edeSMichael Kruse 
429*89251edeSMichael Kruse   // If F is not available, declare it.
430*89251edeSMichael Kruse   if (!F) {
431*89251edeSMichael Kruse     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
432*89251edeSMichael Kruse 
433*89251edeSMichael Kruse     Type *Params[] = {IdentTy->getPointerTo(),
434*89251edeSMichael Kruse                       Builder.getInt32Ty(),
435*89251edeSMichael Kruse                       Builder.getInt32Ty()->getPointerTo(),
436*89251edeSMichael Kruse                       LongType->getPointerTo(),
437*89251edeSMichael Kruse                       LongType->getPointerTo(),
438*89251edeSMichael Kruse                       LongType->getPointerTo()};
439*89251edeSMichael Kruse 
440*89251edeSMichael Kruse     FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), Params, false);
441*89251edeSMichael Kruse     F = Function::Create(Ty, Linkage, Name, M);
442*89251edeSMichael Kruse   }
443*89251edeSMichael Kruse 
444*89251edeSMichael Kruse   Value *Args[] = {SourceLocationInfo, GlobalThreadID, IsLastPtr, LBPtr, UBPtr,
445*89251edeSMichael Kruse                    StridePtr};
446*89251edeSMichael Kruse 
447*89251edeSMichael Kruse   return Builder.CreateCall(F, Args);
448*89251edeSMichael Kruse }
449*89251edeSMichael Kruse 
450*89251edeSMichael Kruse // TODO: This function currently creates a source location dummy. It might be
451*89251edeSMichael Kruse // necessary to (actually) provide information, in the future.
452*89251edeSMichael Kruse GlobalVariable *ParallelLoopGeneratorKMP::createSourceLocation() {
453*89251edeSMichael Kruse   const std::string LocName = ".loc.dummy";
454*89251edeSMichael Kruse   GlobalVariable *SourceLocDummy = M->getGlobalVariable(LocName);
455*89251edeSMichael Kruse 
456*89251edeSMichael Kruse   if (SourceLocDummy == nullptr) {
457*89251edeSMichael Kruse     const std::string StructName = "struct.ident_t";
458*89251edeSMichael Kruse     StructType *IdentTy = M->getTypeByName(StructName);
459*89251edeSMichael Kruse 
460*89251edeSMichael Kruse     // If the ident_t StructType is not available, declare it.
461*89251edeSMichael Kruse     // in LLVM-IR: ident_t = type { i32, i32, i32, i32, i8* }
462*89251edeSMichael Kruse     if (!IdentTy) {
463*89251edeSMichael Kruse       Type *LocMembers[] = {Builder.getInt32Ty(), Builder.getInt32Ty(),
464*89251edeSMichael Kruse                             Builder.getInt32Ty(), Builder.getInt32Ty(),
465*89251edeSMichael Kruse                             Builder.getInt8PtrTy()};
466*89251edeSMichael Kruse 
467*89251edeSMichael Kruse       IdentTy =
468*89251edeSMichael Kruse           StructType::create(M->getContext(), LocMembers, StructName, false);
469*89251edeSMichael Kruse     }
470*89251edeSMichael Kruse 
471*89251edeSMichael Kruse     const auto ArrayType =
472*89251edeSMichael Kruse         llvm::ArrayType::get(Builder.getInt8Ty(), /* Length */ 23);
473*89251edeSMichael Kruse 
474*89251edeSMichael Kruse     // Global Variable Definitions
475*89251edeSMichael Kruse     GlobalVariable *StrVar = new GlobalVariable(
476*89251edeSMichael Kruse         *M, ArrayType, true, GlobalValue::PrivateLinkage, 0, ".str.ident");
477*89251edeSMichael Kruse     StrVar->setAlignment(1);
478*89251edeSMichael Kruse 
479*89251edeSMichael Kruse     SourceLocDummy = new GlobalVariable(
480*89251edeSMichael Kruse         *M, IdentTy, true, GlobalValue::PrivateLinkage, nullptr, LocName);
481*89251edeSMichael Kruse     SourceLocDummy->setAlignment(8);
482*89251edeSMichael Kruse 
483*89251edeSMichael Kruse     // Constant Definitions
484*89251edeSMichael Kruse     Constant *InitStr = ConstantDataArray::getString(
485*89251edeSMichael Kruse         M->getContext(), "Source location dummy.", true);
486*89251edeSMichael Kruse 
487*89251edeSMichael Kruse     Constant *StrPtr = static_cast<Constant *>(Builder.CreateInBoundsGEP(
488*89251edeSMichael Kruse         ArrayType, StrVar, {Builder.getInt32(0), Builder.getInt32(0)}));
489*89251edeSMichael Kruse 
490*89251edeSMichael Kruse     Constant *LocInitStruct = ConstantStruct::get(
491*89251edeSMichael Kruse         IdentTy, {Builder.getInt32(0), Builder.getInt32(0), Builder.getInt32(0),
492*89251edeSMichael Kruse                   Builder.getInt32(0), StrPtr});
493*89251edeSMichael Kruse 
494*89251edeSMichael Kruse     // Initialize variables
495*89251edeSMichael Kruse     StrVar->setInitializer(InitStr);
496*89251edeSMichael Kruse     SourceLocDummy->setInitializer(LocInitStruct);
497*89251edeSMichael Kruse   }
498*89251edeSMichael Kruse 
499*89251edeSMichael Kruse   return SourceLocDummy;
500*89251edeSMichael Kruse }
501*89251edeSMichael Kruse 
502*89251edeSMichael Kruse bool ParallelLoopGeneratorKMP::is64BitArch() {
503*89251edeSMichael Kruse   return (LongType->getIntegerBitWidth() == 64);
504*89251edeSMichael Kruse }
505*89251edeSMichael Kruse 
506*89251edeSMichael Kruse OMPGeneralSchedulingType ParallelLoopGeneratorKMP::getSchedType(
507*89251edeSMichael Kruse     int ChunkSize, OMPGeneralSchedulingType Scheduling) const {
508*89251edeSMichael Kruse   if (ChunkSize == 0 && Scheduling == OMPGeneralSchedulingType::StaticChunked)
509*89251edeSMichael Kruse     return OMPGeneralSchedulingType::StaticNonChunked;
510*89251edeSMichael Kruse 
511*89251edeSMichael Kruse   return Scheduling;
512*89251edeSMichael Kruse }
513