1*89251edeSMichael Kruse //===------ LoopGeneratorsKMP.cpp - IR helper to create loops -------------===// 2*89251edeSMichael Kruse // 3*89251edeSMichael Kruse // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*89251edeSMichael Kruse // See https://llvm.org/LICENSE.txt for license information. 5*89251edeSMichael Kruse // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*89251edeSMichael Kruse // 7*89251edeSMichael Kruse //===----------------------------------------------------------------------===// 8*89251edeSMichael Kruse // 9*89251edeSMichael Kruse // This file contains functions to create parallel loops as LLVM-IR. 10*89251edeSMichael Kruse // 11*89251edeSMichael Kruse //===----------------------------------------------------------------------===// 12*89251edeSMichael Kruse 13*89251edeSMichael Kruse #include "polly/CodeGen/LoopGeneratorsKMP.h" 14*89251edeSMichael Kruse #include "polly/Options.h" 15*89251edeSMichael Kruse #include "polly/ScopDetection.h" 16*89251edeSMichael Kruse #include "llvm/Analysis/LoopInfo.h" 17*89251edeSMichael Kruse #include "llvm/IR/DataLayout.h" 18*89251edeSMichael Kruse #include "llvm/IR/Dominators.h" 19*89251edeSMichael Kruse #include "llvm/IR/Module.h" 20*89251edeSMichael Kruse #include "llvm/Support/CommandLine.h" 21*89251edeSMichael Kruse #include "llvm/Transforms/Utils/BasicBlockUtils.h" 22*89251edeSMichael Kruse 23*89251edeSMichael Kruse using namespace llvm; 24*89251edeSMichael Kruse using namespace polly; 25*89251edeSMichael Kruse 26*89251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallSpawnThreads(Value *SubFn, 27*89251edeSMichael Kruse Value *SubFnParam, 28*89251edeSMichael Kruse Value *LB, Value *UB, 29*89251edeSMichael Kruse Value *Stride) { 30*89251edeSMichael Kruse const std::string Name = "__kmpc_fork_call"; 31*89251edeSMichael Kruse Function *F = M->getFunction(Name); 32*89251edeSMichael Kruse Type *KMPCMicroTy = M->getTypeByName("kmpc_micro"); 33*89251edeSMichael Kruse 34*89251edeSMichael Kruse if (!KMPCMicroTy) { 35*89251edeSMichael Kruse // void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...) 36*89251edeSMichael Kruse Type *MicroParams[] = {Builder.getInt32Ty()->getPointerTo(), 37*89251edeSMichael Kruse Builder.getInt32Ty()->getPointerTo()}; 38*89251edeSMichael Kruse 39*89251edeSMichael Kruse KMPCMicroTy = FunctionType::get(Builder.getVoidTy(), MicroParams, true); 40*89251edeSMichael Kruse } 41*89251edeSMichael Kruse 42*89251edeSMichael Kruse // If F is not available, declare it. 43*89251edeSMichael Kruse if (!F) { 44*89251edeSMichael Kruse StructType *IdentTy = M->getTypeByName("struct.ident_t"); 45*89251edeSMichael Kruse 46*89251edeSMichael Kruse GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 47*89251edeSMichael Kruse Type *Params[] = {IdentTy->getPointerTo(), Builder.getInt32Ty(), 48*89251edeSMichael Kruse KMPCMicroTy->getPointerTo()}; 49*89251edeSMichael Kruse 50*89251edeSMichael Kruse FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, true); 51*89251edeSMichael Kruse F = Function::Create(Ty, Linkage, Name, M); 52*89251edeSMichael Kruse } 53*89251edeSMichael Kruse 54*89251edeSMichael Kruse Value *Task = Builder.CreatePointerBitCastOrAddrSpaceCast( 55*89251edeSMichael Kruse SubFn, KMPCMicroTy->getPointerTo()); 56*89251edeSMichael Kruse 57*89251edeSMichael Kruse Value *Args[] = {SourceLocationInfo, 58*89251edeSMichael Kruse Builder.getInt32(4) /* Number of arguments (w/o Task) */, 59*89251edeSMichael Kruse Task, 60*89251edeSMichael Kruse LB, 61*89251edeSMichael Kruse UB, 62*89251edeSMichael Kruse Stride, 63*89251edeSMichael Kruse SubFnParam}; 64*89251edeSMichael Kruse 65*89251edeSMichael Kruse Builder.CreateCall(F, Args); 66*89251edeSMichael Kruse } 67*89251edeSMichael Kruse 68*89251edeSMichael Kruse void ParallelLoopGeneratorKMP::deployParallelExecution(Value *SubFn, 69*89251edeSMichael Kruse Value *SubFnParam, 70*89251edeSMichael Kruse Value *LB, Value *UB, 71*89251edeSMichael Kruse Value *Stride) { 72*89251edeSMichael Kruse // Inform OpenMP runtime about the number of threads if greater than zero 73*89251edeSMichael Kruse if (PollyNumThreads > 0) { 74*89251edeSMichael Kruse Value *GlobalThreadID = createCallGlobalThreadNum(); 75*89251edeSMichael Kruse createCallPushNumThreads(GlobalThreadID, Builder.getInt32(PollyNumThreads)); 76*89251edeSMichael Kruse } 77*89251edeSMichael Kruse 78*89251edeSMichael Kruse // Tell the runtime we start a parallel loop 79*89251edeSMichael Kruse createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride); 80*89251edeSMichael Kruse } 81*89251edeSMichael Kruse 82*89251edeSMichael Kruse Function *ParallelLoopGeneratorKMP::prepareSubFnDefinition(Function *F) const { 83*89251edeSMichael Kruse std::vector<Type *> Arguments = {Builder.getInt32Ty()->getPointerTo(), 84*89251edeSMichael Kruse Builder.getInt32Ty()->getPointerTo(), 85*89251edeSMichael Kruse LongType, 86*89251edeSMichael Kruse LongType, 87*89251edeSMichael Kruse LongType, 88*89251edeSMichael Kruse Builder.getInt8PtrTy()}; 89*89251edeSMichael Kruse 90*89251edeSMichael Kruse FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false); 91*89251edeSMichael Kruse Function *SubFn = Function::Create(FT, Function::InternalLinkage, 92*89251edeSMichael Kruse F->getName() + "_polly_subfn", M); 93*89251edeSMichael Kruse // Name the function's arguments 94*89251edeSMichael Kruse Function::arg_iterator AI = SubFn->arg_begin(); 95*89251edeSMichael Kruse AI->setName("polly.kmpc.global_tid"); 96*89251edeSMichael Kruse std::advance(AI, 1); 97*89251edeSMichael Kruse AI->setName("polly.kmpc.bound_tid"); 98*89251edeSMichael Kruse std::advance(AI, 1); 99*89251edeSMichael Kruse AI->setName("polly.kmpc.lb"); 100*89251edeSMichael Kruse std::advance(AI, 1); 101*89251edeSMichael Kruse AI->setName("polly.kmpc.ub"); 102*89251edeSMichael Kruse std::advance(AI, 1); 103*89251edeSMichael Kruse AI->setName("polly.kmpc.inc"); 104*89251edeSMichael Kruse std::advance(AI, 1); 105*89251edeSMichael Kruse AI->setName("polly.kmpc.shared"); 106*89251edeSMichael Kruse 107*89251edeSMichael Kruse return SubFn; 108*89251edeSMichael Kruse } 109*89251edeSMichael Kruse 110*89251edeSMichael Kruse // Create a subfunction of the following (preliminary) structure: 111*89251edeSMichael Kruse // 112*89251edeSMichael Kruse // PrevBB 113*89251edeSMichael Kruse // | 114*89251edeSMichael Kruse // v 115*89251edeSMichael Kruse // HeaderBB 116*89251edeSMichael Kruse // | _____ 117*89251edeSMichael Kruse // v v | 118*89251edeSMichael Kruse // CheckNextBB PreHeaderBB 119*89251edeSMichael Kruse // |\ | 120*89251edeSMichael Kruse // | \______/ 121*89251edeSMichael Kruse // | 122*89251edeSMichael Kruse // v 123*89251edeSMichael Kruse // ExitBB 124*89251edeSMichael Kruse // 125*89251edeSMichael Kruse // HeaderBB will hold allocations, loading of variables and kmp-init calls. 126*89251edeSMichael Kruse // CheckNextBB will check for more work (dynamic) or will be "empty" (static). 127*89251edeSMichael Kruse // If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB. 128*89251edeSMichael Kruse // PreHeaderBB loads the new boundaries (& will lead to the loop body later on). 129*89251edeSMichael Kruse // Just like CheckNextBB: PreHeaderBB is empty in the static scheduling case. 130*89251edeSMichael Kruse // ExitBB marks the end of the parallel execution. 131*89251edeSMichael Kruse // The possibly empty BasicBlocks will automatically be removed. 132*89251edeSMichael Kruse std::tuple<Value *, Function *> 133*89251edeSMichael Kruse ParallelLoopGeneratorKMP::createSubFn(Value *StrideNotUsed, 134*89251edeSMichael Kruse AllocaInst *StructData, 135*89251edeSMichael Kruse SetVector<Value *> Data, ValueMapT &Map) { 136*89251edeSMichael Kruse Function *SubFn = createSubFnDefinition(); 137*89251edeSMichael Kruse LLVMContext &Context = SubFn->getContext(); 138*89251edeSMichael Kruse 139*89251edeSMichael Kruse // Store the previous basic block. 140*89251edeSMichael Kruse BasicBlock *PrevBB = Builder.GetInsertBlock(); 141*89251edeSMichael Kruse 142*89251edeSMichael Kruse // Create basic blocks. 143*89251edeSMichael Kruse BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn); 144*89251edeSMichael Kruse BasicBlock *ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn); 145*89251edeSMichael Kruse BasicBlock *CheckNextBB = 146*89251edeSMichael Kruse BasicBlock::Create(Context, "polly.par.checkNext", SubFn); 147*89251edeSMichael Kruse BasicBlock *PreHeaderBB = 148*89251edeSMichael Kruse BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn); 149*89251edeSMichael Kruse 150*89251edeSMichael Kruse DT.addNewBlock(HeaderBB, PrevBB); 151*89251edeSMichael Kruse DT.addNewBlock(ExitBB, HeaderBB); 152*89251edeSMichael Kruse DT.addNewBlock(CheckNextBB, HeaderBB); 153*89251edeSMichael Kruse DT.addNewBlock(PreHeaderBB, HeaderBB); 154*89251edeSMichael Kruse 155*89251edeSMichael Kruse // Fill up basic block HeaderBB. 156*89251edeSMichael Kruse Builder.SetInsertPoint(HeaderBB); 157*89251edeSMichael Kruse Value *LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr"); 158*89251edeSMichael Kruse Value *UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr"); 159*89251edeSMichael Kruse Value *IsLastPtr = Builder.CreateAlloca(Builder.getInt32Ty(), nullptr, 160*89251edeSMichael Kruse "polly.par.lastIterPtr"); 161*89251edeSMichael Kruse Value *StridePtr = 162*89251edeSMichael Kruse Builder.CreateAlloca(LongType, nullptr, "polly.par.StridePtr"); 163*89251edeSMichael Kruse 164*89251edeSMichael Kruse // Get iterator for retrieving the previously defined parameters. 165*89251edeSMichael Kruse Function::arg_iterator AI = SubFn->arg_begin(); 166*89251edeSMichael Kruse // First argument holds "global thread ID". 167*89251edeSMichael Kruse Value *IDPtr = &*AI; 168*89251edeSMichael Kruse // Skip "bound thread ID" since it is not used (but had to be defined). 169*89251edeSMichael Kruse std::advance(AI, 2); 170*89251edeSMichael Kruse // Move iterator to: LB, UB, Stride, Shared variable struct. 171*89251edeSMichael Kruse Value *LB = &*AI; 172*89251edeSMichael Kruse std::advance(AI, 1); 173*89251edeSMichael Kruse Value *UB = &*AI; 174*89251edeSMichael Kruse std::advance(AI, 1); 175*89251edeSMichael Kruse Value *Stride = &*AI; 176*89251edeSMichael Kruse std::advance(AI, 1); 177*89251edeSMichael Kruse Value *Shared = &*AI; 178*89251edeSMichael Kruse 179*89251edeSMichael Kruse Value *UserContext = Builder.CreateBitCast(Shared, StructData->getType(), 180*89251edeSMichael Kruse "polly.par.userContext"); 181*89251edeSMichael Kruse 182*89251edeSMichael Kruse extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext, 183*89251edeSMichael Kruse Map); 184*89251edeSMichael Kruse 185*89251edeSMichael Kruse const int Alignment = (is64BitArch()) ? 8 : 4; 186*89251edeSMichael Kruse Value *ID = 187*89251edeSMichael Kruse Builder.CreateAlignedLoad(IDPtr, Alignment, "polly.par.global_tid"); 188*89251edeSMichael Kruse 189*89251edeSMichael Kruse Builder.CreateAlignedStore(LB, LBPtr, Alignment); 190*89251edeSMichael Kruse Builder.CreateAlignedStore(UB, UBPtr, Alignment); 191*89251edeSMichael Kruse Builder.CreateAlignedStore(Builder.getInt32(0), IsLastPtr, Alignment); 192*89251edeSMichael Kruse Builder.CreateAlignedStore(Stride, StridePtr, Alignment); 193*89251edeSMichael Kruse 194*89251edeSMichael Kruse // Subtract one as the upper bound provided by openmp is a < comparison 195*89251edeSMichael Kruse // whereas the codegenForSequential function creates a <= comparison. 196*89251edeSMichael Kruse Value *AdjustedUB = Builder.CreateAdd(UB, ConstantInt::get(LongType, -1), 197*89251edeSMichael Kruse "polly.indvar.UBAdjusted"); 198*89251edeSMichael Kruse 199*89251edeSMichael Kruse Value *ChunkSize = 200*89251edeSMichael Kruse ConstantInt::get(LongType, std::max<int>(PollyChunkSize, 1)); 201*89251edeSMichael Kruse 202*89251edeSMichael Kruse switch (PollyScheduling) { 203*89251edeSMichael Kruse case OMPGeneralSchedulingType::Dynamic: 204*89251edeSMichael Kruse case OMPGeneralSchedulingType::Guided: 205*89251edeSMichael Kruse case OMPGeneralSchedulingType::Runtime: 206*89251edeSMichael Kruse // "DYNAMIC" scheduling types are handled below (including 'runtime') 207*89251edeSMichael Kruse { 208*89251edeSMichael Kruse UB = AdjustedUB; 209*89251edeSMichael Kruse createCallDispatchInit(ID, LB, UB, Stride, ChunkSize); 210*89251edeSMichael Kruse Value *HasWork = 211*89251edeSMichael Kruse createCallDispatchNext(ID, IsLastPtr, LBPtr, UBPtr, StridePtr); 212*89251edeSMichael Kruse Value *HasIteration = 213*89251edeSMichael Kruse Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_EQ, HasWork, 214*89251edeSMichael Kruse Builder.getInt32(1), "polly.hasIteration"); 215*89251edeSMichael Kruse Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB); 216*89251edeSMichael Kruse 217*89251edeSMichael Kruse Builder.SetInsertPoint(CheckNextBB); 218*89251edeSMichael Kruse HasWork = createCallDispatchNext(ID, IsLastPtr, LBPtr, UBPtr, StridePtr); 219*89251edeSMichael Kruse HasIteration = 220*89251edeSMichael Kruse Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_EQ, HasWork, 221*89251edeSMichael Kruse Builder.getInt32(1), "polly.hasWork"); 222*89251edeSMichael Kruse Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB); 223*89251edeSMichael Kruse 224*89251edeSMichael Kruse Builder.SetInsertPoint(PreHeaderBB); 225*89251edeSMichael Kruse LB = Builder.CreateAlignedLoad(LBPtr, Alignment, "polly.indvar.LB"); 226*89251edeSMichael Kruse UB = Builder.CreateAlignedLoad(UBPtr, Alignment, "polly.indvar.UB"); 227*89251edeSMichael Kruse } 228*89251edeSMichael Kruse break; 229*89251edeSMichael Kruse case OMPGeneralSchedulingType::StaticChunked: 230*89251edeSMichael Kruse case OMPGeneralSchedulingType::StaticNonChunked: 231*89251edeSMichael Kruse // "STATIC" scheduling types are handled below 232*89251edeSMichael Kruse { 233*89251edeSMichael Kruse createCallStaticInit(ID, IsLastPtr, LBPtr, UBPtr, StridePtr, ChunkSize); 234*89251edeSMichael Kruse 235*89251edeSMichael Kruse LB = Builder.CreateAlignedLoad(LBPtr, Alignment, "polly.indvar.LB"); 236*89251edeSMichael Kruse UB = Builder.CreateAlignedLoad(UBPtr, Alignment, "polly.indvar.UB"); 237*89251edeSMichael Kruse 238*89251edeSMichael Kruse Value *AdjUBOutOfBounds = 239*89251edeSMichael Kruse Builder.CreateICmp(llvm::CmpInst::Predicate::ICMP_SLT, UB, AdjustedUB, 240*89251edeSMichael Kruse "polly.adjustedUBOutOfBounds"); 241*89251edeSMichael Kruse 242*89251edeSMichael Kruse UB = Builder.CreateSelect(AdjUBOutOfBounds, UB, AdjustedUB); 243*89251edeSMichael Kruse Builder.CreateAlignedStore(UB, UBPtr, Alignment); 244*89251edeSMichael Kruse 245*89251edeSMichael Kruse Value *HasIteration = Builder.CreateICmp( 246*89251edeSMichael Kruse llvm::CmpInst::Predicate::ICMP_SLE, LB, UB, "polly.hasIteration"); 247*89251edeSMichael Kruse Builder.CreateCondBr(HasIteration, PreHeaderBB, ExitBB); 248*89251edeSMichael Kruse 249*89251edeSMichael Kruse Builder.SetInsertPoint(CheckNextBB); 250*89251edeSMichael Kruse Builder.CreateBr(ExitBB); 251*89251edeSMichael Kruse 252*89251edeSMichael Kruse Builder.SetInsertPoint(PreHeaderBB); 253*89251edeSMichael Kruse } 254*89251edeSMichael Kruse break; 255*89251edeSMichael Kruse } 256*89251edeSMichael Kruse 257*89251edeSMichael Kruse Builder.CreateBr(CheckNextBB); 258*89251edeSMichael Kruse Builder.SetInsertPoint(&*--Builder.GetInsertPoint()); 259*89251edeSMichael Kruse BasicBlock *AfterBB; 260*89251edeSMichael Kruse Value *IV = createLoop(LB, UB, Stride, Builder, LI, DT, AfterBB, 261*89251edeSMichael Kruse ICmpInst::ICMP_SLE, nullptr, true, 262*89251edeSMichael Kruse /* UseGuard */ false); 263*89251edeSMichael Kruse 264*89251edeSMichael Kruse BasicBlock::iterator LoopBody = Builder.GetInsertPoint(); 265*89251edeSMichael Kruse 266*89251edeSMichael Kruse // Add code to terminate this subfunction. 267*89251edeSMichael Kruse Builder.SetInsertPoint(ExitBB); 268*89251edeSMichael Kruse // Static (i.e. non-dynamic) scheduling types, are terminated with a fini-call 269*89251edeSMichael Kruse if (PollyScheduling == OMPGeneralSchedulingType::StaticChunked) { 270*89251edeSMichael Kruse createCallStaticFini(ID); 271*89251edeSMichael Kruse } 272*89251edeSMichael Kruse Builder.CreateRetVoid(); 273*89251edeSMichael Kruse Builder.SetInsertPoint(&*LoopBody); 274*89251edeSMichael Kruse 275*89251edeSMichael Kruse return std::make_tuple(IV, SubFn); 276*89251edeSMichael Kruse } 277*89251edeSMichael Kruse 278*89251edeSMichael Kruse Value *ParallelLoopGeneratorKMP::createCallGlobalThreadNum() { 279*89251edeSMichael Kruse const std::string Name = "__kmpc_global_thread_num"; 280*89251edeSMichael Kruse Function *F = M->getFunction(Name); 281*89251edeSMichael Kruse 282*89251edeSMichael Kruse // If F is not available, declare it. 283*89251edeSMichael Kruse if (!F) { 284*89251edeSMichael Kruse StructType *IdentTy = M->getTypeByName("struct.ident_t"); 285*89251edeSMichael Kruse 286*89251edeSMichael Kruse GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 287*89251edeSMichael Kruse Type *Params[] = {IdentTy->getPointerTo()}; 288*89251edeSMichael Kruse 289*89251edeSMichael Kruse FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), Params, false); 290*89251edeSMichael Kruse F = Function::Create(Ty, Linkage, Name, M); 291*89251edeSMichael Kruse } 292*89251edeSMichael Kruse 293*89251edeSMichael Kruse return Builder.CreateCall(F, {SourceLocationInfo}); 294*89251edeSMichael Kruse } 295*89251edeSMichael Kruse 296*89251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallPushNumThreads(Value *GlobalThreadID, 297*89251edeSMichael Kruse Value *NumThreads) { 298*89251edeSMichael Kruse const std::string Name = "__kmpc_push_num_threads"; 299*89251edeSMichael Kruse Function *F = M->getFunction(Name); 300*89251edeSMichael Kruse 301*89251edeSMichael Kruse // If F is not available, declare it. 302*89251edeSMichael Kruse if (!F) { 303*89251edeSMichael Kruse StructType *IdentTy = M->getTypeByName("struct.ident_t"); 304*89251edeSMichael Kruse 305*89251edeSMichael Kruse GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 306*89251edeSMichael Kruse Type *Params[] = {IdentTy->getPointerTo(), Builder.getInt32Ty(), 307*89251edeSMichael Kruse Builder.getInt32Ty()}; 308*89251edeSMichael Kruse 309*89251edeSMichael Kruse FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false); 310*89251edeSMichael Kruse F = Function::Create(Ty, Linkage, Name, M); 311*89251edeSMichael Kruse } 312*89251edeSMichael Kruse 313*89251edeSMichael Kruse Value *Args[] = {SourceLocationInfo, GlobalThreadID, NumThreads}; 314*89251edeSMichael Kruse 315*89251edeSMichael Kruse Builder.CreateCall(F, Args); 316*89251edeSMichael Kruse } 317*89251edeSMichael Kruse 318*89251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallStaticInit(Value *GlobalThreadID, 319*89251edeSMichael Kruse Value *IsLastPtr, 320*89251edeSMichael Kruse Value *LBPtr, Value *UBPtr, 321*89251edeSMichael Kruse Value *StridePtr, 322*89251edeSMichael Kruse Value *ChunkSize) { 323*89251edeSMichael Kruse const std::string Name = 324*89251edeSMichael Kruse is64BitArch() ? "__kmpc_for_static_init_8" : "__kmpc_for_static_init_4"; 325*89251edeSMichael Kruse Function *F = M->getFunction(Name); 326*89251edeSMichael Kruse StructType *IdentTy = M->getTypeByName("struct.ident_t"); 327*89251edeSMichael Kruse 328*89251edeSMichael Kruse // If F is not available, declare it. 329*89251edeSMichael Kruse if (!F) { 330*89251edeSMichael Kruse GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 331*89251edeSMichael Kruse 332*89251edeSMichael Kruse Type *Params[] = {IdentTy->getPointerTo(), 333*89251edeSMichael Kruse Builder.getInt32Ty(), 334*89251edeSMichael Kruse Builder.getInt32Ty(), 335*89251edeSMichael Kruse Builder.getInt32Ty()->getPointerTo(), 336*89251edeSMichael Kruse LongType->getPointerTo(), 337*89251edeSMichael Kruse LongType->getPointerTo(), 338*89251edeSMichael Kruse LongType->getPointerTo(), 339*89251edeSMichael Kruse LongType, 340*89251edeSMichael Kruse LongType}; 341*89251edeSMichael Kruse 342*89251edeSMichael Kruse FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false); 343*89251edeSMichael Kruse F = Function::Create(Ty, Linkage, Name, M); 344*89251edeSMichael Kruse } 345*89251edeSMichael Kruse 346*89251edeSMichael Kruse // The parameter 'ChunkSize' will hold strictly positive integer values, 347*89251edeSMichael Kruse // regardless of PollyChunkSize's value 348*89251edeSMichael Kruse Value *Args[] = { 349*89251edeSMichael Kruse SourceLocationInfo, 350*89251edeSMichael Kruse GlobalThreadID, 351*89251edeSMichael Kruse Builder.getInt32(int(getSchedType(PollyChunkSize, PollyScheduling))), 352*89251edeSMichael Kruse IsLastPtr, 353*89251edeSMichael Kruse LBPtr, 354*89251edeSMichael Kruse UBPtr, 355*89251edeSMichael Kruse StridePtr, 356*89251edeSMichael Kruse ConstantInt::get(LongType, 1), 357*89251edeSMichael Kruse ChunkSize}; 358*89251edeSMichael Kruse 359*89251edeSMichael Kruse Builder.CreateCall(F, Args); 360*89251edeSMichael Kruse } 361*89251edeSMichael Kruse 362*89251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallStaticFini(Value *GlobalThreadID) { 363*89251edeSMichael Kruse const std::string Name = "__kmpc_for_static_fini"; 364*89251edeSMichael Kruse Function *F = M->getFunction(Name); 365*89251edeSMichael Kruse StructType *IdentTy = M->getTypeByName("struct.ident_t"); 366*89251edeSMichael Kruse 367*89251edeSMichael Kruse // If F is not available, declare it. 368*89251edeSMichael Kruse if (!F) { 369*89251edeSMichael Kruse GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 370*89251edeSMichael Kruse Type *Params[] = {IdentTy->getPointerTo(), Builder.getInt32Ty()}; 371*89251edeSMichael Kruse FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false); 372*89251edeSMichael Kruse F = Function::Create(Ty, Linkage, Name, M); 373*89251edeSMichael Kruse } 374*89251edeSMichael Kruse 375*89251edeSMichael Kruse Value *Args[] = {SourceLocationInfo, GlobalThreadID}; 376*89251edeSMichael Kruse 377*89251edeSMichael Kruse Builder.CreateCall(F, Args); 378*89251edeSMichael Kruse } 379*89251edeSMichael Kruse 380*89251edeSMichael Kruse void ParallelLoopGeneratorKMP::createCallDispatchInit(Value *GlobalThreadID, 381*89251edeSMichael Kruse Value *LB, Value *UB, 382*89251edeSMichael Kruse Value *Inc, 383*89251edeSMichael Kruse Value *ChunkSize) { 384*89251edeSMichael Kruse const std::string Name = 385*89251edeSMichael Kruse is64BitArch() ? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_4"; 386*89251edeSMichael Kruse Function *F = M->getFunction(Name); 387*89251edeSMichael Kruse StructType *IdentTy = M->getTypeByName("struct.ident_t"); 388*89251edeSMichael Kruse 389*89251edeSMichael Kruse // If F is not available, declare it. 390*89251edeSMichael Kruse if (!F) { 391*89251edeSMichael Kruse GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 392*89251edeSMichael Kruse 393*89251edeSMichael Kruse Type *Params[] = {IdentTy->getPointerTo(), 394*89251edeSMichael Kruse Builder.getInt32Ty(), 395*89251edeSMichael Kruse Builder.getInt32Ty(), 396*89251edeSMichael Kruse LongType, 397*89251edeSMichael Kruse LongType, 398*89251edeSMichael Kruse LongType, 399*89251edeSMichael Kruse LongType}; 400*89251edeSMichael Kruse 401*89251edeSMichael Kruse FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false); 402*89251edeSMichael Kruse F = Function::Create(Ty, Linkage, Name, M); 403*89251edeSMichael Kruse } 404*89251edeSMichael Kruse 405*89251edeSMichael Kruse // The parameter 'ChunkSize' will hold strictly positive integer values, 406*89251edeSMichael Kruse // regardless of PollyChunkSize's value 407*89251edeSMichael Kruse Value *Args[] = { 408*89251edeSMichael Kruse SourceLocationInfo, 409*89251edeSMichael Kruse GlobalThreadID, 410*89251edeSMichael Kruse Builder.getInt32(int(getSchedType(PollyChunkSize, PollyScheduling))), 411*89251edeSMichael Kruse LB, 412*89251edeSMichael Kruse UB, 413*89251edeSMichael Kruse Inc, 414*89251edeSMichael Kruse ChunkSize}; 415*89251edeSMichael Kruse 416*89251edeSMichael Kruse Builder.CreateCall(F, Args); 417*89251edeSMichael Kruse } 418*89251edeSMichael Kruse 419*89251edeSMichael Kruse Value *ParallelLoopGeneratorKMP::createCallDispatchNext(Value *GlobalThreadID, 420*89251edeSMichael Kruse Value *IsLastPtr, 421*89251edeSMichael Kruse Value *LBPtr, 422*89251edeSMichael Kruse Value *UBPtr, 423*89251edeSMichael Kruse Value *StridePtr) { 424*89251edeSMichael Kruse const std::string Name = 425*89251edeSMichael Kruse is64BitArch() ? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_4"; 426*89251edeSMichael Kruse Function *F = M->getFunction(Name); 427*89251edeSMichael Kruse StructType *IdentTy = M->getTypeByName("struct.ident_t"); 428*89251edeSMichael Kruse 429*89251edeSMichael Kruse // If F is not available, declare it. 430*89251edeSMichael Kruse if (!F) { 431*89251edeSMichael Kruse GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 432*89251edeSMichael Kruse 433*89251edeSMichael Kruse Type *Params[] = {IdentTy->getPointerTo(), 434*89251edeSMichael Kruse Builder.getInt32Ty(), 435*89251edeSMichael Kruse Builder.getInt32Ty()->getPointerTo(), 436*89251edeSMichael Kruse LongType->getPointerTo(), 437*89251edeSMichael Kruse LongType->getPointerTo(), 438*89251edeSMichael Kruse LongType->getPointerTo()}; 439*89251edeSMichael Kruse 440*89251edeSMichael Kruse FunctionType *Ty = FunctionType::get(Builder.getInt32Ty(), Params, false); 441*89251edeSMichael Kruse F = Function::Create(Ty, Linkage, Name, M); 442*89251edeSMichael Kruse } 443*89251edeSMichael Kruse 444*89251edeSMichael Kruse Value *Args[] = {SourceLocationInfo, GlobalThreadID, IsLastPtr, LBPtr, UBPtr, 445*89251edeSMichael Kruse StridePtr}; 446*89251edeSMichael Kruse 447*89251edeSMichael Kruse return Builder.CreateCall(F, Args); 448*89251edeSMichael Kruse } 449*89251edeSMichael Kruse 450*89251edeSMichael Kruse // TODO: This function currently creates a source location dummy. It might be 451*89251edeSMichael Kruse // necessary to (actually) provide information, in the future. 452*89251edeSMichael Kruse GlobalVariable *ParallelLoopGeneratorKMP::createSourceLocation() { 453*89251edeSMichael Kruse const std::string LocName = ".loc.dummy"; 454*89251edeSMichael Kruse GlobalVariable *SourceLocDummy = M->getGlobalVariable(LocName); 455*89251edeSMichael Kruse 456*89251edeSMichael Kruse if (SourceLocDummy == nullptr) { 457*89251edeSMichael Kruse const std::string StructName = "struct.ident_t"; 458*89251edeSMichael Kruse StructType *IdentTy = M->getTypeByName(StructName); 459*89251edeSMichael Kruse 460*89251edeSMichael Kruse // If the ident_t StructType is not available, declare it. 461*89251edeSMichael Kruse // in LLVM-IR: ident_t = type { i32, i32, i32, i32, i8* } 462*89251edeSMichael Kruse if (!IdentTy) { 463*89251edeSMichael Kruse Type *LocMembers[] = {Builder.getInt32Ty(), Builder.getInt32Ty(), 464*89251edeSMichael Kruse Builder.getInt32Ty(), Builder.getInt32Ty(), 465*89251edeSMichael Kruse Builder.getInt8PtrTy()}; 466*89251edeSMichael Kruse 467*89251edeSMichael Kruse IdentTy = 468*89251edeSMichael Kruse StructType::create(M->getContext(), LocMembers, StructName, false); 469*89251edeSMichael Kruse } 470*89251edeSMichael Kruse 471*89251edeSMichael Kruse const auto ArrayType = 472*89251edeSMichael Kruse llvm::ArrayType::get(Builder.getInt8Ty(), /* Length */ 23); 473*89251edeSMichael Kruse 474*89251edeSMichael Kruse // Global Variable Definitions 475*89251edeSMichael Kruse GlobalVariable *StrVar = new GlobalVariable( 476*89251edeSMichael Kruse *M, ArrayType, true, GlobalValue::PrivateLinkage, 0, ".str.ident"); 477*89251edeSMichael Kruse StrVar->setAlignment(1); 478*89251edeSMichael Kruse 479*89251edeSMichael Kruse SourceLocDummy = new GlobalVariable( 480*89251edeSMichael Kruse *M, IdentTy, true, GlobalValue::PrivateLinkage, nullptr, LocName); 481*89251edeSMichael Kruse SourceLocDummy->setAlignment(8); 482*89251edeSMichael Kruse 483*89251edeSMichael Kruse // Constant Definitions 484*89251edeSMichael Kruse Constant *InitStr = ConstantDataArray::getString( 485*89251edeSMichael Kruse M->getContext(), "Source location dummy.", true); 486*89251edeSMichael Kruse 487*89251edeSMichael Kruse Constant *StrPtr = static_cast<Constant *>(Builder.CreateInBoundsGEP( 488*89251edeSMichael Kruse ArrayType, StrVar, {Builder.getInt32(0), Builder.getInt32(0)})); 489*89251edeSMichael Kruse 490*89251edeSMichael Kruse Constant *LocInitStruct = ConstantStruct::get( 491*89251edeSMichael Kruse IdentTy, {Builder.getInt32(0), Builder.getInt32(0), Builder.getInt32(0), 492*89251edeSMichael Kruse Builder.getInt32(0), StrPtr}); 493*89251edeSMichael Kruse 494*89251edeSMichael Kruse // Initialize variables 495*89251edeSMichael Kruse StrVar->setInitializer(InitStr); 496*89251edeSMichael Kruse SourceLocDummy->setInitializer(LocInitStruct); 497*89251edeSMichael Kruse } 498*89251edeSMichael Kruse 499*89251edeSMichael Kruse return SourceLocDummy; 500*89251edeSMichael Kruse } 501*89251edeSMichael Kruse 502*89251edeSMichael Kruse bool ParallelLoopGeneratorKMP::is64BitArch() { 503*89251edeSMichael Kruse return (LongType->getIntegerBitWidth() == 64); 504*89251edeSMichael Kruse } 505*89251edeSMichael Kruse 506*89251edeSMichael Kruse OMPGeneralSchedulingType ParallelLoopGeneratorKMP::getSchedType( 507*89251edeSMichael Kruse int ChunkSize, OMPGeneralSchedulingType Scheduling) const { 508*89251edeSMichael Kruse if (ChunkSize == 0 && Scheduling == OMPGeneralSchedulingType::StaticChunked) 509*89251edeSMichael Kruse return OMPGeneralSchedulingType::StaticNonChunked; 510*89251edeSMichael Kruse 511*89251edeSMichael Kruse return Scheduling; 512*89251edeSMichael Kruse } 513