189251edeSMichael Kruse //===------ LoopGeneratorsKMP.cpp - IR helper to create loops -------------===//
289251edeSMichael Kruse //
389251edeSMichael Kruse // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
489251edeSMichael Kruse // See https://llvm.org/LICENSE.txt for license information.
589251edeSMichael Kruse // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
689251edeSMichael Kruse //
789251edeSMichael Kruse //===----------------------------------------------------------------------===//
889251edeSMichael Kruse //
989251edeSMichael Kruse // This file contains functions to create parallel loops as LLVM-IR.
1089251edeSMichael Kruse //
1189251edeSMichael Kruse //===----------------------------------------------------------------------===//
1289251edeSMichael Kruse 
1389251edeSMichael Kruse #include "polly/CodeGen/LoopGeneratorsKMP.h"
1489251edeSMichael Kruse #include "llvm/IR/Dominators.h"
1589251edeSMichael Kruse #include "llvm/IR/Module.h"
1689251edeSMichael Kruse 
1789251edeSMichael Kruse using namespace llvm;
1889251edeSMichael Kruse using namespace polly;
1989251edeSMichael Kruse 
2089251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallSpawnThreads(Value *SubFn,
2189251edeSMichael Kruse                                                       Value *SubFnParam,
2289251edeSMichael Kruse                                                       Value *LB, Value *UB,
2389251edeSMichael Kruse                                                       Value *Stride) {
2489251edeSMichael Kruse   const std::string Name = "__kmpc_fork_call";
2589251edeSMichael Kruse   Function *F = M->getFunction(Name);
2689251edeSMichael Kruse   Type *KMPCMicroTy = M->getTypeByName("kmpc_micro");
2789251edeSMichael Kruse 
2889251edeSMichael Kruse   if (!KMPCMicroTy) {
2989251edeSMichael Kruse     // void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...)
3089251edeSMichael Kruse     Type *MicroParams[] = {Builder.getInt32Ty()->getPointerTo(),
3189251edeSMichael Kruse                            Builder.getInt32Ty()->getPointerTo()};
3289251edeSMichael Kruse 
3389251edeSMichael Kruse     KMPCMicroTy = FunctionType::get(Builder.getVoidTy(), MicroParams, true);
3489251edeSMichael Kruse   }
3589251edeSMichael Kruse 
3689251edeSMichael Kruse   // If F is not available, declare it.
3789251edeSMichael Kruse   if (!F) {
3889251edeSMichael Kruse     StructType *IdentTy = M->getTypeByName("struct.ident_t");
3989251edeSMichael Kruse 
4089251edeSMichael Kruse     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
4189251edeSMichael Kruse     Type *Params[] = {IdentTy->getPointerTo(), Builder.getInt32Ty(),
4289251edeSMichael Kruse                       KMPCMicroTy->getPointerTo()};
4389251edeSMichael Kruse 
4489251edeSMichael Kruse     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, true);
4589251edeSMichael Kruse     F = Function::Create(Ty, Linkage, Name, M);
4689251edeSMichael Kruse   }
4789251edeSMichael Kruse 
4889251edeSMichael Kruse   Value *Task = Builder.CreatePointerBitCastOrAddrSpaceCast(
4989251edeSMichael Kruse       SubFn, KMPCMicroTy->getPointerTo());
5089251edeSMichael Kruse 
5189251edeSMichael Kruse   Value *Args[] = {SourceLocationInfo,
5289251edeSMichael Kruse                    Builder.getInt32(4) /* Number of arguments (w/o Task) */,
5389251edeSMichael Kruse                    Task,
5489251edeSMichael Kruse                    LB,
5589251edeSMichael Kruse                    UB,
5689251edeSMichael Kruse                    Stride,
5789251edeSMichael Kruse                    SubFnParam};
5889251edeSMichael Kruse 
5989251edeSMichael Kruse   Builder.CreateCall(F, Args);
6089251edeSMichael Kruse }
6189251edeSMichael Kruse 
6289251edeSMichael Kruse void ParallelLoopGeneratorKMP::deployParallelExecution(Value *SubFn,
6389251edeSMichael Kruse                                                        Value *SubFnParam,
6489251edeSMichael Kruse                                                        Value *LB, Value *UB,
6589251edeSMichael Kruse                                                        Value *Stride) {
6689251edeSMichael Kruse   // Inform OpenMP runtime about the number of threads if greater than zero
6789251edeSMichael Kruse   if (PollyNumThreads > 0) {
6889251edeSMichael Kruse     Value *GlobalThreadID = createCallGlobalThreadNum();
6989251edeSMichael Kruse     createCallPushNumThreads(GlobalThreadID, Builder.getInt32(PollyNumThreads));
7089251edeSMichael Kruse   }
7189251edeSMichael Kruse 
7289251edeSMichael Kruse   // Tell the runtime we start a parallel loop
7389251edeSMichael Kruse   createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
7489251edeSMichael Kruse }
7589251edeSMichael Kruse 
7689251edeSMichael Kruse Function *ParallelLoopGeneratorKMP::prepareSubFnDefinition(Function *F) const {
7789251edeSMichael Kruse   std::vector<Type *> Arguments = {Builder.getInt32Ty()->getPointerTo(),
7889251edeSMichael Kruse                                    Builder.getInt32Ty()->getPointerTo(),
7989251edeSMichael Kruse                                    LongType,
8089251edeSMichael Kruse                                    LongType,
8189251edeSMichael Kruse                                    LongType,
8289251edeSMichael Kruse                                    Builder.getInt8PtrTy()};
8389251edeSMichael Kruse 
8489251edeSMichael Kruse   FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
8589251edeSMichael Kruse   Function *SubFn = Function::Create(FT, Function::InternalLinkage,
8689251edeSMichael Kruse                                      F->getName() + "_polly_subfn", M);
8789251edeSMichael Kruse   // Name the function's arguments
8889251edeSMichael Kruse   Function::arg_iterator AI = SubFn->arg_begin();
8989251edeSMichael Kruse   AI->setName("polly.kmpc.global_tid");
9089251edeSMichael Kruse   std::advance(AI, 1);
9189251edeSMichael Kruse   AI->setName("polly.kmpc.bound_tid");
9289251edeSMichael Kruse   std::advance(AI, 1);
9389251edeSMichael Kruse   AI->setName("polly.kmpc.lb");
9489251edeSMichael Kruse   std::advance(AI, 1);
9589251edeSMichael Kruse   AI->setName("polly.kmpc.ub");
9689251edeSMichael Kruse   std::advance(AI, 1);
9789251edeSMichael Kruse   AI->setName("polly.kmpc.inc");
9889251edeSMichael Kruse   std::advance(AI, 1);
9989251edeSMichael Kruse   AI->setName("polly.kmpc.shared");
10089251edeSMichael Kruse 
10189251edeSMichael Kruse   return SubFn;
10289251edeSMichael Kruse }
10389251edeSMichael Kruse 
10489251edeSMichael Kruse // Create a subfunction of the following (preliminary) structure:
10589251edeSMichael Kruse //
10689251edeSMichael Kruse //        PrevBB
10789251edeSMichael Kruse //           |
10889251edeSMichael Kruse //           v
10989251edeSMichael Kruse //        HeaderBB
110*1e0be76eSMichael Halkenhäuser //       /   |    _____
111*1e0be76eSMichael Halkenhäuser //      /    v   v     |
112*1e0be76eSMichael Halkenhäuser //     / PreHeaderBB   |
113*1e0be76eSMichael Halkenhäuser //    |      |         |
114*1e0be76eSMichael Halkenhäuser //    |      v         |
115*1e0be76eSMichael Halkenhäuser //    |  CheckNextBB   |
116*1e0be76eSMichael Halkenhäuser //     \     |   \_____/
117*1e0be76eSMichael Halkenhäuser //      \    |
118*1e0be76eSMichael Halkenhäuser //       v   v
11989251edeSMichael Kruse //       ExitBB
12089251edeSMichael Kruse //
12189251edeSMichael Kruse // HeaderBB will hold allocations, loading of variables and kmp-init calls.
122*1e0be76eSMichael Halkenhäuser // CheckNextBB will check for more work (dynamic / static chunked) or will be
123*1e0be76eSMichael Halkenhäuser // empty (static non chunked).
12489251edeSMichael Kruse // If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB.
12589251edeSMichael Kruse // PreHeaderBB loads the new boundaries (& will lead to the loop body later on).
126*1e0be76eSMichael Halkenhäuser // Just like CheckNextBB: PreHeaderBB is (preliminary) empty in the static non
127*1e0be76eSMichael Halkenhäuser // chunked scheduling case. ExitBB marks the end of the parallel execution.
12889251edeSMichael Kruse // The possibly empty BasicBlocks will automatically be removed.
12989251edeSMichael Kruse std::tuple<Value *, Function *>
130*1e0be76eSMichael Halkenhäuser ParallelLoopGeneratorKMP::createSubFn(Value *SequentialLoopStride,
13189251edeSMichael Kruse                                       AllocaInst *StructData,
13289251edeSMichael Kruse                                       SetVector<Value *> Data, ValueMapT &Map) {
13389251edeSMichael Kruse   Function *SubFn = createSubFnDefinition();
13489251edeSMichael Kruse   LLVMContext &Context = SubFn->getContext();
13589251edeSMichael Kruse 
13689251edeSMichael Kruse   // Store the previous basic block.
13789251edeSMichael Kruse   BasicBlock *PrevBB = Builder.GetInsertBlock();
13889251edeSMichael Kruse 
13989251edeSMichael Kruse   // Create basic blocks.
14089251edeSMichael Kruse   BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
14189251edeSMichael Kruse   BasicBlock *ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
14289251edeSMichael Kruse   BasicBlock *CheckNextBB =
14389251edeSMichael Kruse       BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
14489251edeSMichael Kruse   BasicBlock *PreHeaderBB =
14589251edeSMichael Kruse       BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
14689251edeSMichael Kruse 
14789251edeSMichael Kruse   DT.addNewBlock(HeaderBB, PrevBB);
14889251edeSMichael Kruse   DT.addNewBlock(ExitBB, HeaderBB);
14989251edeSMichael Kruse   DT.addNewBlock(CheckNextBB, HeaderBB);
15089251edeSMichael Kruse   DT.addNewBlock(PreHeaderBB, HeaderBB);
15189251edeSMichael Kruse 
15289251edeSMichael Kruse   // Fill up basic block HeaderBB.
15389251edeSMichael Kruse   Builder.SetInsertPoint(HeaderBB);
15489251edeSMichael Kruse   Value *LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr");
15589251edeSMichael Kruse   Value *UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr");
15689251edeSMichael Kruse   Value *IsLastPtr = Builder.CreateAlloca(Builder.getInt32Ty(), nullptr,
15789251edeSMichael Kruse                                           "polly.par.lastIterPtr");
15889251edeSMichael Kruse   Value *StridePtr =
15989251edeSMichael Kruse       Builder.CreateAlloca(LongType, nullptr, "polly.par.StridePtr");
16089251edeSMichael Kruse 
16189251edeSMichael Kruse   // Get iterator for retrieving the previously defined parameters.
16289251edeSMichael Kruse   Function::arg_iterator AI = SubFn->arg_begin();
16389251edeSMichael Kruse   // First argument holds "global thread ID".
16489251edeSMichael Kruse   Value *IDPtr = &*AI;
16589251edeSMichael Kruse   // Skip "bound thread ID" since it is not used (but had to be defined).
16689251edeSMichael Kruse   std::advance(AI, 2);
16789251edeSMichael Kruse   // Move iterator to: LB, UB, Stride, Shared variable struct.
16889251edeSMichael Kruse   Value *LB = &*AI;
16989251edeSMichael Kruse   std::advance(AI, 1);
17089251edeSMichael Kruse   Value *UB = &*AI;
17189251edeSMichael Kruse   std::advance(AI, 1);
17289251edeSMichael Kruse   Value *Stride = &*AI;
17389251edeSMichael Kruse   std::advance(AI, 1);
17489251edeSMichael Kruse   Value *Shared = &*AI;
17589251edeSMichael Kruse 
17689251edeSMichael Kruse   Value *UserContext = Builder.CreateBitCast(Shared, StructData->getType(),
17789251edeSMichael Kruse                                              "polly.par.userContext");
17889251edeSMichael Kruse 
17989251edeSMichael Kruse   extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext,
18089251edeSMichael Kruse                           Map);
18189251edeSMichael Kruse 
18259f95222SGuillaume Chatelet   const auto Alignment = llvm::Align(is64BitArch() ? 8 : 4);
18389251edeSMichael Kruse   Value *ID =
18489251edeSMichael Kruse       Builder.CreateAlignedLoad(IDPtr, Alignment, "polly.par.global_tid");
18589251edeSMichael Kruse 
18689251edeSMichael Kruse   Builder.CreateAlignedStore(LB, LBPtr, Alignment);
18789251edeSMichael Kruse   Builder.CreateAlignedStore(UB, UBPtr, Alignment);
18889251edeSMichael Kruse   Builder.CreateAlignedStore(Builder.getInt32(0), IsLastPtr, Alignment);
18989251edeSMichael Kruse   Builder.CreateAlignedStore(Stride, StridePtr, Alignment);
19089251edeSMichael Kruse 
19189251edeSMichael Kruse   // Subtract one as the upper bound provided by openmp is a < comparison
19289251edeSMichael Kruse   // whereas the codegenForSequential function creates a <= comparison.
19389251edeSMichael Kruse   Value *AdjustedUB = Builder.CreateAdd(UB, ConstantInt::get(LongType, -1),
19489251edeSMichael Kruse                                         "polly.indvar.UBAdjusted");
19589251edeSMichael Kruse 
19689251edeSMichael Kruse   Value *ChunkSize =
19789251edeSMichael Kruse       ConstantInt::get(LongType, std::max<int>(PollyChunkSize, 1));
19889251edeSMichael Kruse 
199*1e0be76eSMichael Halkenhäuser   OMPGeneralSchedulingType Scheduling =
200*1e0be76eSMichael Halkenhäuser       getSchedType(PollyChunkSize, PollyScheduling);
201*1e0be76eSMichael Halkenhäuser 
202*1e0be76eSMichael Halkenhäuser   switch (Scheduling) {
20389251edeSMichael Kruse   case OMPGeneralSchedulingType::Dynamic:
20489251edeSMichael Kruse   case OMPGeneralSchedulingType::Guided:
20589251edeSMichael Kruse   case OMPGeneralSchedulingType::Runtime:
20689251edeSMichael Kruse     // "DYNAMIC" scheduling types are handled below (including 'runtime')
20789251edeSMichael Kruse     {
20889251edeSMichael Kruse       UB = AdjustedUB;
20989251edeSMichael Kruse       createCallDispatchInit(ID, LB, UB, Stride, ChunkSize);
21089251edeSMichael Kruse       Value *HasWork =
21189251edeSMichael Kruse           createCallDispatchNext(ID, IsLastPtr, LBPtr, UBPtr, StridePtr);
21289251edeSMichael Kruse       Value *HasIteration =
21389251edeSMichael Kruse           Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_EQ, HasWork,
21489251edeSMichael Kruse                              Builder.getInt32(1), "polly.hasIteration");
21589251edeSMichael Kruse       Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB);
21689251edeSMichael Kruse 
21789251edeSMichael Kruse       Builder.SetInsertPoint(CheckNextBB);
21889251edeSMichael Kruse       HasWork = createCallDispatchNext(ID, IsLastPtr, LBPtr, UBPtr, StridePtr);
21989251edeSMichael Kruse       HasIteration =
22089251edeSMichael Kruse           Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_EQ, HasWork,
22189251edeSMichael Kruse                              Builder.getInt32(1), "polly.hasWork");
22289251edeSMichael Kruse       Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB);
22389251edeSMichael Kruse 
22489251edeSMichael Kruse       Builder.SetInsertPoint(PreHeaderBB);
22589251edeSMichael Kruse       LB = Builder.CreateAlignedLoad(LBPtr, Alignment, "polly.indvar.LB");
22689251edeSMichael Kruse       UB = Builder.CreateAlignedLoad(UBPtr, Alignment, "polly.indvar.UB");
22789251edeSMichael Kruse     }
22889251edeSMichael Kruse     break;
22989251edeSMichael Kruse   case OMPGeneralSchedulingType::StaticChunked:
23089251edeSMichael Kruse   case OMPGeneralSchedulingType::StaticNonChunked:
23189251edeSMichael Kruse     // "STATIC" scheduling types are handled below
23289251edeSMichael Kruse     {
233*1e0be76eSMichael Halkenhäuser       Builder.CreateAlignedStore(AdjustedUB, UBPtr, Alignment);
23489251edeSMichael Kruse       createCallStaticInit(ID, IsLastPtr, LBPtr, UBPtr, StridePtr, ChunkSize);
23589251edeSMichael Kruse 
236*1e0be76eSMichael Halkenhäuser       Value *ChunkedStride =
237*1e0be76eSMichael Halkenhäuser           Builder.CreateAlignedLoad(StridePtr, Alignment, "polly.kmpc.stride");
238*1e0be76eSMichael Halkenhäuser 
23989251edeSMichael Kruse       LB = Builder.CreateAlignedLoad(LBPtr, Alignment, "polly.indvar.LB");
240*1e0be76eSMichael Halkenhäuser       UB = Builder.CreateAlignedLoad(UBPtr, Alignment, "polly.indvar.UB.temp");
24189251edeSMichael Kruse 
242*1e0be76eSMichael Halkenhäuser       Value *UBInRange =
243*1e0be76eSMichael Halkenhäuser           Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SLE, UB, AdjustedUB,
244*1e0be76eSMichael Halkenhäuser                              "polly.indvar.UB.inRange");
245*1e0be76eSMichael Halkenhäuser       UB = Builder.CreateSelect(UBInRange, UB, AdjustedUB, "polly.indvar.UB");
24689251edeSMichael Kruse       Builder.CreateAlignedStore(UB, UBPtr, Alignment);
24789251edeSMichael Kruse 
24889251edeSMichael Kruse       Value *HasIteration = Builder.CreateICmp(
24989251edeSMichael Kruse           llvm::CmpInst::Predicate::ICMP_SLE, LB, UB, "polly.hasIteration");
25089251edeSMichael Kruse       Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB);
25189251edeSMichael Kruse 
252*1e0be76eSMichael Halkenhäuser       if (Scheduling == OMPGeneralSchedulingType::StaticChunked) {
253*1e0be76eSMichael Halkenhäuser         Builder.SetInsertPoint(PreHeaderBB);
254*1e0be76eSMichael Halkenhäuser         LB = Builder.CreateAlignedLoad(LBPtr, Alignment,
255*1e0be76eSMichael Halkenhäuser                                        "polly.indvar.LB.entry");
256*1e0be76eSMichael Halkenhäuser         UB = Builder.CreateAlignedLoad(UBPtr, Alignment,
257*1e0be76eSMichael Halkenhäuser                                        "polly.indvar.UB.entry");
258*1e0be76eSMichael Halkenhäuser       }
259*1e0be76eSMichael Halkenhäuser 
26089251edeSMichael Kruse       Builder.SetInsertPoint(CheckNextBB);
261*1e0be76eSMichael Halkenhäuser 
262*1e0be76eSMichael Halkenhäuser       if (Scheduling == OMPGeneralSchedulingType::StaticChunked) {
263*1e0be76eSMichael Halkenhäuser         Value *NextLB =
264*1e0be76eSMichael Halkenhäuser             Builder.CreateAdd(LB, ChunkedStride, "polly.indvar.nextLB");
265*1e0be76eSMichael Halkenhäuser         Value *NextUB = Builder.CreateAdd(UB, ChunkedStride);
266*1e0be76eSMichael Halkenhäuser 
267*1e0be76eSMichael Halkenhäuser         Value *NextUBOutOfBounds =
268*1e0be76eSMichael Halkenhäuser             Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SGT, NextUB,
269*1e0be76eSMichael Halkenhäuser                                AdjustedUB, "polly.indvar.nextUB.outOfBounds");
270*1e0be76eSMichael Halkenhäuser         NextUB = Builder.CreateSelect(NextUBOutOfBounds, AdjustedUB, NextUB,
271*1e0be76eSMichael Halkenhäuser                                       "polly.indvar.nextUB");
272*1e0be76eSMichael Halkenhäuser 
273*1e0be76eSMichael Halkenhäuser         Builder.CreateAlignedStore(NextLB, LBPtr, Alignment);
274*1e0be76eSMichael Halkenhäuser         Builder.CreateAlignedStore(NextUB, UBPtr, Alignment);
275*1e0be76eSMichael Halkenhäuser 
276*1e0be76eSMichael Halkenhäuser         Value *HasWork =
277*1e0be76eSMichael Halkenhäuser             Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SLE, NextLB,
278*1e0be76eSMichael Halkenhäuser                                AdjustedUB, "polly.hasWork");
279*1e0be76eSMichael Halkenhäuser         Builder.CreateCondBr(HasWork, PreHeaderBB, ExitBB);
280*1e0be76eSMichael Halkenhäuser       } else {
28189251edeSMichael Kruse         Builder.CreateBr(ExitBB);
282*1e0be76eSMichael Halkenhäuser       }
28389251edeSMichael Kruse 
28489251edeSMichael Kruse       Builder.SetInsertPoint(PreHeaderBB);
28589251edeSMichael Kruse     }
28689251edeSMichael Kruse     break;
28789251edeSMichael Kruse   }
28889251edeSMichael Kruse 
28989251edeSMichael Kruse   Builder.CreateBr(CheckNextBB);
29089251edeSMichael Kruse   Builder.SetInsertPoint(&*--Builder.GetInsertPoint());
29189251edeSMichael Kruse   BasicBlock *AfterBB;
292*1e0be76eSMichael Halkenhäuser   Value *IV = createLoop(LB, UB, SequentialLoopStride, Builder, LI, DT, AfterBB,
29389251edeSMichael Kruse                          ICmpInst::ICMP_SLE, nullptr, true,
29489251edeSMichael Kruse                          /* UseGuard */ false);
29589251edeSMichael Kruse 
29689251edeSMichael Kruse   BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
29789251edeSMichael Kruse 
29889251edeSMichael Kruse   // Add code to terminate this subfunction.
29989251edeSMichael Kruse   Builder.SetInsertPoint(ExitBB);
30089251edeSMichael Kruse   // Static (i.e. non-dynamic) scheduling types, are terminated with a fini-call
301*1e0be76eSMichael Halkenhäuser   if (Scheduling == OMPGeneralSchedulingType::StaticChunked ||
302*1e0be76eSMichael Halkenhäuser       Scheduling == OMPGeneralSchedulingType::StaticNonChunked) {
30389251edeSMichael Kruse     createCallStaticFini(ID);
30489251edeSMichael Kruse   }
30589251edeSMichael Kruse   Builder.CreateRetVoid();
30689251edeSMichael Kruse   Builder.SetInsertPoint(&*LoopBody);
30789251edeSMichael Kruse 
30889251edeSMichael Kruse   return std::make_tuple(IV, SubFn);
30989251edeSMichael Kruse }
31089251edeSMichael Kruse 
31189251edeSMichael Kruse Value *ParallelLoopGeneratorKMP::createCallGlobalThreadNum() {
31289251edeSMichael Kruse   const std::string Name = "__kmpc_global_thread_num";
31389251edeSMichael Kruse   Function *F = M->getFunction(Name);
31489251edeSMichael Kruse 
31589251edeSMichael Kruse   // If F is not available, declare it.
31689251edeSMichael Kruse   if (!F) {
31789251edeSMichael Kruse     StructType *IdentTy = M->getTypeByName("struct.ident_t");
31889251edeSMichael Kruse 
31989251edeSMichael Kruse     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
32089251edeSMichael Kruse     Type *Params[] = {IdentTy->getPointerTo()};
32189251edeSMichael Kruse 
32289251edeSMichael Kruse     FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), Params, false);
32389251edeSMichael Kruse     F = Function::Create(Ty, Linkage, Name, M);
32489251edeSMichael Kruse   }
32589251edeSMichael Kruse 
32689251edeSMichael Kruse   return Builder.CreateCall(F, {SourceLocationInfo});
32789251edeSMichael Kruse }
32889251edeSMichael Kruse 
32989251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallPushNumThreads(Value *GlobalThreadID,
33089251edeSMichael Kruse                                                         Value *NumThreads) {
33189251edeSMichael Kruse   const std::string Name = "__kmpc_push_num_threads";
33289251edeSMichael Kruse   Function *F = M->getFunction(Name);
33389251edeSMichael Kruse 
33489251edeSMichael Kruse   // If F is not available, declare it.
33589251edeSMichael Kruse   if (!F) {
33689251edeSMichael Kruse     StructType *IdentTy = M->getTypeByName("struct.ident_t");
33789251edeSMichael Kruse 
33889251edeSMichael Kruse     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
33989251edeSMichael Kruse     Type *Params[] = {IdentTy->getPointerTo(), Builder.getInt32Ty(),
34089251edeSMichael Kruse                       Builder.getInt32Ty()};
34189251edeSMichael Kruse 
34289251edeSMichael Kruse     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
34389251edeSMichael Kruse     F = Function::Create(Ty, Linkage, Name, M);
34489251edeSMichael Kruse   }
34589251edeSMichael Kruse 
34689251edeSMichael Kruse   Value *Args[] = {SourceLocationInfo, GlobalThreadID, NumThreads};
34789251edeSMichael Kruse 
34889251edeSMichael Kruse   Builder.CreateCall(F, Args);
34989251edeSMichael Kruse }
35089251edeSMichael Kruse 
35189251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallStaticInit(Value *GlobalThreadID,
35289251edeSMichael Kruse                                                     Value *IsLastPtr,
35389251edeSMichael Kruse                                                     Value *LBPtr, Value *UBPtr,
35489251edeSMichael Kruse                                                     Value *StridePtr,
35589251edeSMichael Kruse                                                     Value *ChunkSize) {
35689251edeSMichael Kruse   const std::string Name =
35789251edeSMichael Kruse       is64BitArch() ? "__kmpc_for_static_init_8" : "__kmpc_for_static_init_4";
35889251edeSMichael Kruse   Function *F = M->getFunction(Name);
35989251edeSMichael Kruse   StructType *IdentTy = M->getTypeByName("struct.ident_t");
36089251edeSMichael Kruse 
36189251edeSMichael Kruse   // If F is not available, declare it.
36289251edeSMichael Kruse   if (!F) {
36389251edeSMichael Kruse     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
36489251edeSMichael Kruse 
36589251edeSMichael Kruse     Type *Params[] = {IdentTy->getPointerTo(),
36689251edeSMichael Kruse                       Builder.getInt32Ty(),
36789251edeSMichael Kruse                       Builder.getInt32Ty(),
36889251edeSMichael Kruse                       Builder.getInt32Ty()->getPointerTo(),
36989251edeSMichael Kruse                       LongType->getPointerTo(),
37089251edeSMichael Kruse                       LongType->getPointerTo(),
37189251edeSMichael Kruse                       LongType->getPointerTo(),
37289251edeSMichael Kruse                       LongType,
37389251edeSMichael Kruse                       LongType};
37489251edeSMichael Kruse 
37589251edeSMichael Kruse     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
37689251edeSMichael Kruse     F = Function::Create(Ty, Linkage, Name, M);
37789251edeSMichael Kruse   }
37889251edeSMichael Kruse 
37989251edeSMichael Kruse   // The parameter 'ChunkSize' will hold strictly positive integer values,
38089251edeSMichael Kruse   // regardless of PollyChunkSize's value
38189251edeSMichael Kruse   Value *Args[] = {
38289251edeSMichael Kruse       SourceLocationInfo,
38389251edeSMichael Kruse       GlobalThreadID,
38489251edeSMichael Kruse       Builder.getInt32(int(getSchedType(PollyChunkSize, PollyScheduling))),
38589251edeSMichael Kruse       IsLastPtr,
38689251edeSMichael Kruse       LBPtr,
38789251edeSMichael Kruse       UBPtr,
38889251edeSMichael Kruse       StridePtr,
38989251edeSMichael Kruse       ConstantInt::get(LongType, 1),
39089251edeSMichael Kruse       ChunkSize};
39189251edeSMichael Kruse 
39289251edeSMichael Kruse   Builder.CreateCall(F, Args);
39389251edeSMichael Kruse }
39489251edeSMichael Kruse 
39589251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallStaticFini(Value *GlobalThreadID) {
39689251edeSMichael Kruse   const std::string Name = "__kmpc_for_static_fini";
39789251edeSMichael Kruse   Function *F = M->getFunction(Name);
39889251edeSMichael Kruse   StructType *IdentTy = M->getTypeByName("struct.ident_t");
39989251edeSMichael Kruse 
40089251edeSMichael Kruse   // If F is not available, declare it.
40189251edeSMichael Kruse   if (!F) {
40289251edeSMichael Kruse     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
40389251edeSMichael Kruse     Type *Params[] = {IdentTy->getPointerTo(), Builder.getInt32Ty()};
40489251edeSMichael Kruse     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
40589251edeSMichael Kruse     F = Function::Create(Ty, Linkage, Name, M);
40689251edeSMichael Kruse   }
40789251edeSMichael Kruse 
40889251edeSMichael Kruse   Value *Args[] = {SourceLocationInfo, GlobalThreadID};
40989251edeSMichael Kruse 
41089251edeSMichael Kruse   Builder.CreateCall(F, Args);
41189251edeSMichael Kruse }
41289251edeSMichael Kruse 
41389251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallDispatchInit(Value *GlobalThreadID,
41489251edeSMichael Kruse                                                       Value *LB, Value *UB,
41589251edeSMichael Kruse                                                       Value *Inc,
41689251edeSMichael Kruse                                                       Value *ChunkSize) {
41789251edeSMichael Kruse   const std::string Name =
41889251edeSMichael Kruse       is64BitArch() ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_4";
41989251edeSMichael Kruse   Function *F = M->getFunction(Name);
42089251edeSMichael Kruse   StructType *IdentTy = M->getTypeByName("struct.ident_t");
42189251edeSMichael Kruse 
42289251edeSMichael Kruse   // If F is not available, declare it.
42389251edeSMichael Kruse   if (!F) {
42489251edeSMichael Kruse     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
42589251edeSMichael Kruse 
42689251edeSMichael Kruse     Type *Params[] = {IdentTy->getPointerTo(),
42789251edeSMichael Kruse                       Builder.getInt32Ty(),
42889251edeSMichael Kruse                       Builder.getInt32Ty(),
42989251edeSMichael Kruse                       LongType,
43089251edeSMichael Kruse                       LongType,
43189251edeSMichael Kruse                       LongType,
43289251edeSMichael Kruse                       LongType};
43389251edeSMichael Kruse 
43489251edeSMichael Kruse     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
43589251edeSMichael Kruse     F = Function::Create(Ty, Linkage, Name, M);
43689251edeSMichael Kruse   }
43789251edeSMichael Kruse 
43889251edeSMichael Kruse   // The parameter 'ChunkSize' will hold strictly positive integer values,
43989251edeSMichael Kruse   // regardless of PollyChunkSize's value
44089251edeSMichael Kruse   Value *Args[] = {
44189251edeSMichael Kruse       SourceLocationInfo,
44289251edeSMichael Kruse       GlobalThreadID,
44389251edeSMichael Kruse       Builder.getInt32(int(getSchedType(PollyChunkSize, PollyScheduling))),
44489251edeSMichael Kruse       LB,
44589251edeSMichael Kruse       UB,
44689251edeSMichael Kruse       Inc,
44789251edeSMichael Kruse       ChunkSize};
44889251edeSMichael Kruse 
44989251edeSMichael Kruse   Builder.CreateCall(F, Args);
45089251edeSMichael Kruse }
45189251edeSMichael Kruse 
45289251edeSMichael Kruse Value *ParallelLoopGeneratorKMP::createCallDispatchNext(Value *GlobalThreadID,
45389251edeSMichael Kruse                                                         Value *IsLastPtr,
45489251edeSMichael Kruse                                                         Value *LBPtr,
45589251edeSMichael Kruse                                                         Value *UBPtr,
45689251edeSMichael Kruse                                                         Value *StridePtr) {
45789251edeSMichael Kruse   const std::string Name =
45889251edeSMichael Kruse       is64BitArch() ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_4";
45989251edeSMichael Kruse   Function *F = M->getFunction(Name);
46089251edeSMichael Kruse   StructType *IdentTy = M->getTypeByName("struct.ident_t");
46189251edeSMichael Kruse 
46289251edeSMichael Kruse   // If F is not available, declare it.
46389251edeSMichael Kruse   if (!F) {
46489251edeSMichael Kruse     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
46589251edeSMichael Kruse 
46689251edeSMichael Kruse     Type *Params[] = {IdentTy->getPointerTo(),
46789251edeSMichael Kruse                       Builder.getInt32Ty(),
46889251edeSMichael Kruse                       Builder.getInt32Ty()->getPointerTo(),
46989251edeSMichael Kruse                       LongType->getPointerTo(),
47089251edeSMichael Kruse                       LongType->getPointerTo(),
47189251edeSMichael Kruse                       LongType->getPointerTo()};
47289251edeSMichael Kruse 
47389251edeSMichael Kruse     FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), Params, false);
47489251edeSMichael Kruse     F = Function::Create(Ty, Linkage, Name, M);
47589251edeSMichael Kruse   }
47689251edeSMichael Kruse 
47789251edeSMichael Kruse   Value *Args[] = {SourceLocationInfo, GlobalThreadID, IsLastPtr, LBPtr, UBPtr,
47889251edeSMichael Kruse                    StridePtr};
47989251edeSMichael Kruse 
48089251edeSMichael Kruse   return Builder.CreateCall(F, Args);
48189251edeSMichael Kruse }
48289251edeSMichael Kruse 
48389251edeSMichael Kruse // TODO: This function currently creates a source location dummy. It might be
48489251edeSMichael Kruse // necessary to (actually) provide information, in the future.
48589251edeSMichael Kruse GlobalVariable *ParallelLoopGeneratorKMP::createSourceLocation() {
48689251edeSMichael Kruse   const std::string LocName = ".loc.dummy";
48789251edeSMichael Kruse   GlobalVariable *SourceLocDummy = M->getGlobalVariable(LocName);
48889251edeSMichael Kruse 
48989251edeSMichael Kruse   if (SourceLocDummy == nullptr) {
49089251edeSMichael Kruse     const std::string StructName = "struct.ident_t";
49189251edeSMichael Kruse     StructType *IdentTy = M->getTypeByName(StructName);
49289251edeSMichael Kruse 
49389251edeSMichael Kruse     // If the ident_t StructType is not available, declare it.
49489251edeSMichael Kruse     // in LLVM-IR: ident_t = type { i32, i32, i32, i32, i8* }
49589251edeSMichael Kruse     if (!IdentTy) {
49689251edeSMichael Kruse       Type *LocMembers[] = {Builder.getInt32Ty(), Builder.getInt32Ty(),
49789251edeSMichael Kruse                             Builder.getInt32Ty(), Builder.getInt32Ty(),
49889251edeSMichael Kruse                             Builder.getInt8PtrTy()};
49989251edeSMichael Kruse 
50089251edeSMichael Kruse       IdentTy =
50189251edeSMichael Kruse           StructType::create(M->getContext(), LocMembers, StructName, false);
50289251edeSMichael Kruse     }
50389251edeSMichael Kruse 
50489251edeSMichael Kruse     const auto ArrayType =
50589251edeSMichael Kruse         llvm::ArrayType::get(Builder.getInt8Ty(), /* Length */ 23);
50689251edeSMichael Kruse 
50789251edeSMichael Kruse     // Global Variable Definitions
50889251edeSMichael Kruse     GlobalVariable *StrVar = new GlobalVariable(
50989251edeSMichael Kruse         *M, ArrayType, true, GlobalValue::PrivateLinkage, 0, ".str.ident");
510805c157eSGuillaume Chatelet     StrVar->setAlignment(llvm::Align(1));
51189251edeSMichael Kruse 
51289251edeSMichael Kruse     SourceLocDummy = new GlobalVariable(
51389251edeSMichael Kruse         *M, IdentTy, true, GlobalValue::PrivateLinkage, nullptr, LocName);
5140e62011dSGuillaume Chatelet     SourceLocDummy->setAlignment(llvm::Align(8));
51589251edeSMichael Kruse 
51689251edeSMichael Kruse     // Constant Definitions
51789251edeSMichael Kruse     Constant *InitStr = ConstantDataArray::getString(
51889251edeSMichael Kruse         M->getContext(), "Source location dummy.", true);
51989251edeSMichael Kruse 
52089251edeSMichael Kruse     Constant *StrPtr = static_cast<Constant *>(Builder.CreateInBoundsGEP(
52189251edeSMichael Kruse         ArrayType, StrVar, {Builder.getInt32(0), Builder.getInt32(0)}));
52289251edeSMichael Kruse 
52389251edeSMichael Kruse     Constant *LocInitStruct = ConstantStruct::get(
52489251edeSMichael Kruse         IdentTy, {Builder.getInt32(0), Builder.getInt32(0), Builder.getInt32(0),
52589251edeSMichael Kruse                   Builder.getInt32(0), StrPtr});
52689251edeSMichael Kruse 
52789251edeSMichael Kruse     // Initialize variables
52889251edeSMichael Kruse     StrVar->setInitializer(InitStr);
52989251edeSMichael Kruse     SourceLocDummy->setInitializer(LocInitStruct);
53089251edeSMichael Kruse   }
53189251edeSMichael Kruse 
53289251edeSMichael Kruse   return SourceLocDummy;
53389251edeSMichael Kruse }
53489251edeSMichael Kruse 
53589251edeSMichael Kruse bool ParallelLoopGeneratorKMP::is64BitArch() {
53689251edeSMichael Kruse   return (LongType->getIntegerBitWidth() == 64);
53789251edeSMichael Kruse }
53889251edeSMichael Kruse 
53989251edeSMichael Kruse OMPGeneralSchedulingType ParallelLoopGeneratorKMP::getSchedType(
54089251edeSMichael Kruse     int ChunkSize, OMPGeneralSchedulingType Scheduling) const {
54189251edeSMichael Kruse   if (ChunkSize == 0 && Scheduling == OMPGeneralSchedulingType::StaticChunked)
54289251edeSMichael Kruse     return OMPGeneralSchedulingType::StaticNonChunked;
54389251edeSMichael Kruse 
54489251edeSMichael Kruse   return Scheduling;
54589251edeSMichael Kruse }
546