1609089f2SHongbin Zheng //===------ LoopGenerators.cpp - IR helper to create loops ---------------===// 2609089f2SHongbin Zheng // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6609089f2SHongbin Zheng // 7609089f2SHongbin Zheng //===----------------------------------------------------------------------===// 8609089f2SHongbin Zheng // 9*89251edeSMichael Kruse // This file contains functions to create scalar loops and orchestrate the 10*89251edeSMichael Kruse // creation of parallel loops as LLVM-IR. 11609089f2SHongbin Zheng // 12609089f2SHongbin Zheng //===----------------------------------------------------------------------===// 13609089f2SHongbin Zheng 148a846610SHongbin Zheng #include "polly/CodeGen/LoopGenerators.h" 15*89251edeSMichael Kruse #include "polly/Options.h" 165624d3c9STobias Grosser #include "polly/ScopDetection.h" 173081b0f5STobias Grosser #include "llvm/Analysis/LoopInfo.h" 18535d52c7SChandler Carruth #include "llvm/IR/DataLayout.h" 19e87c6a81SChandler Carruth #include "llvm/IR/Dominators.h" 2083628182STobias Grosser #include "llvm/IR/Module.h" 21990cd4c2SJohannes Doerfert #include "llvm/Support/CommandLine.h" 22ba0d0922STobias Grosser #include "llvm/Transforms/Utils/BasicBlockUtils.h" 23609089f2SHongbin Zheng 24609089f2SHongbin Zheng using namespace llvm; 254ac4e155SHongbin Zheng using namespace polly; 26609089f2SHongbin Zheng 27*89251edeSMichael Kruse int polly::PollyNumThreads; 28*89251edeSMichael Kruse OMPGeneralSchedulingType polly::PollyScheduling; 29*89251edeSMichael Kruse int polly::PollyChunkSize; 30*89251edeSMichael Kruse 31*89251edeSMichael Kruse static cl::opt<int, true> 32*89251edeSMichael Kruse XPollyNumThreads("polly-num-threads", 33*89251edeSMichael Kruse cl::desc("Number of threads to use (0 = auto)"), 34*89251edeSMichael Kruse cl::Hidden, cl::location(polly::PollyNumThreads), 35*89251edeSMichael Kruse cl::init(0), cl::cat(PollyCategory)); 36*89251edeSMichael Kruse 37*89251edeSMichael Kruse static cl::opt<OMPGeneralSchedulingType, true> XPollyScheduling( 38*89251edeSMichael Kruse "polly-scheduling", 39*89251edeSMichael Kruse cl::desc("Scheduling type of parallel OpenMP for loops"), 40*89251edeSMichael Kruse cl::values(clEnumValN(OMPGeneralSchedulingType::StaticChunked, "static", 41*89251edeSMichael Kruse "Static scheduling"), 42*89251edeSMichael Kruse clEnumValN(OMPGeneralSchedulingType::Dynamic, "dynamic", 43*89251edeSMichael Kruse "Dynamic scheduling"), 44*89251edeSMichael Kruse clEnumValN(OMPGeneralSchedulingType::Guided, "guided", 45*89251edeSMichael Kruse "Guided scheduling"), 46*89251edeSMichael Kruse clEnumValN(OMPGeneralSchedulingType::Runtime, "runtime", 47*89251edeSMichael Kruse "Runtime determined (OMP_SCHEDULE)")), 48*89251edeSMichael Kruse cl::Hidden, cl::location(polly::PollyScheduling), 49*89251edeSMichael Kruse cl::init(OMPGeneralSchedulingType::Runtime), cl::Optional, 50*89251edeSMichael Kruse cl::cat(PollyCategory)); 51*89251edeSMichael Kruse 52*89251edeSMichael Kruse static cl::opt<int, true> 53*89251edeSMichael Kruse XPollyChunkSize("polly-scheduling-chunksize", 54*89251edeSMichael Kruse cl::desc("Chunksize to use by the OpenMP runtime calls"), 55*89251edeSMichael Kruse cl::Hidden, cl::location(polly::PollyChunkSize), 56*89251edeSMichael Kruse cl::init(0), cl::Optional, cl::cat(PollyCategory)); 57990cd4c2SJohannes Doerfert 58dd5c1442SJohannes Doerfert // We generate a loop of either of the following structures: 595db6ffd7STobias Grosser // 60dd5c1442SJohannes Doerfert // BeforeBB BeforeBB 61dd5c1442SJohannes Doerfert // | | 62dd5c1442SJohannes Doerfert // v v 63dd5c1442SJohannes Doerfert // GuardBB PreHeaderBB 64dd5c1442SJohannes Doerfert // / | | _____ 65dd5c1442SJohannes Doerfert // __ PreHeaderBB | v \/ | 66dd5c1442SJohannes Doerfert // / \ / | HeaderBB latch 67dd5c1442SJohannes Doerfert // latch HeaderBB | |\ | 68dd5c1442SJohannes Doerfert // \ / \ / | \------/ 69dd5c1442SJohannes Doerfert // < \ / | 70dd5c1442SJohannes Doerfert // \ / v 71dd5c1442SJohannes Doerfert // ExitBB ExitBB 725db6ffd7STobias Grosser // 73dd5c1442SJohannes Doerfert // depending on whether or not we know that it is executed at least once. If 74dd5c1442SJohannes Doerfert // not, GuardBB checks if the loop is executed at least once. If this is the 75dd5c1442SJohannes Doerfert // case we branch to PreHeaderBB and subsequently to the HeaderBB, which 76dd5c1442SJohannes Doerfert // contains the loop iv 'polly.indvar', the incremented loop iv 77dd5c1442SJohannes Doerfert // 'polly.indvar_next' as well as the condition to check if we execute another 78dd5c1442SJohannes Doerfert // iteration of the loop. After the loop has finished, we branch to ExitBB. 794fe342cbSHongbin Zheng // We expect the type of UB, LB, UB+Stride to be large enough for values that 804fe342cbSHongbin Zheng // UB may take throughout the execution of the loop, including the computation 814fe342cbSHongbin Zheng // of indvar + Stride before the final abort. 824ac4e155SHongbin Zheng Value *polly::createLoop(Value *LB, Value *UB, Value *Stride, 832d950f36SPhilip Pfaffe PollyIRBuilder &Builder, LoopInfo &LI, 842ef3f4fdSJohannes Doerfert DominatorTree &DT, BasicBlock *&ExitBB, 8537c9b8e0STobias Grosser ICmpInst::Predicate Predicate, 860956a606SRoman Gareev ScopAnnotator *Annotator, bool Parallel, bool UseGuard, 870956a606SRoman Gareev bool LoopVectDisabled) { 884ac4e155SHongbin Zheng Function *F = Builder.GetInsertBlock()->getParent(); 89609089f2SHongbin Zheng LLVMContext &Context = F->getContext(); 90609089f2SHongbin Zheng 915db6ffd7STobias Grosser assert(LB->getType() == UB->getType() && "Types of loop bounds do not match"); 92609089f2SHongbin Zheng IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType()); 93609089f2SHongbin Zheng assert(LoopIVType && "UB is not integer?"); 94609089f2SHongbin Zheng 955db6ffd7STobias Grosser BasicBlock *BeforeBB = Builder.GetInsertBlock(); 96dd5c1442SJohannes Doerfert BasicBlock *GuardBB = 97dd5c1442SJohannes Doerfert UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr; 985db6ffd7STobias Grosser BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F); 995db6ffd7STobias Grosser BasicBlock *PreHeaderBB = 1005db6ffd7STobias Grosser BasicBlock::Create(Context, "polly.loop_preheader", F); 101609089f2SHongbin Zheng 1023081b0f5STobias Grosser // Update LoopInfo 1033081b0f5STobias Grosser Loop *OuterLoop = LI.getLoopFor(BeforeBB); 104859ef1c0SPhilip Pfaffe Loop *NewLoop = LI.AllocateLoop(); 1053081b0f5STobias Grosser 106dd5c1442SJohannes Doerfert if (OuterLoop) 1073081b0f5STobias Grosser OuterLoop->addChildLoop(NewLoop); 108dd5c1442SJohannes Doerfert else 1093081b0f5STobias Grosser LI.addTopLevelLoop(NewLoop); 1103081b0f5STobias Grosser 111154d9469STobias Grosser if (OuterLoop) { 112154d9469STobias Grosser if (GuardBB) 1136adcf56bSChandler Carruth OuterLoop->addBasicBlockToLoop(GuardBB, LI); 1146adcf56bSChandler Carruth OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI); 115154d9469STobias Grosser } 1163081b0f5STobias Grosser 1176adcf56bSChandler Carruth NewLoop->addBasicBlockToLoop(HeaderBB, LI); 1183081b0f5STobias Grosser 119c7b719fcSJohannes Doerfert // Notify the annotator (if present) that we have a new loop, but only 120c7b719fcSJohannes Doerfert // after the header block is set. 121c7b719fcSJohannes Doerfert if (Annotator) 122c7b719fcSJohannes Doerfert Annotator->pushLoop(NewLoop, Parallel); 123c7b719fcSJohannes Doerfert 1245db6ffd7STobias Grosser // ExitBB 125b8f58b53SDuncan P. N. Exon Smith ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI); 1265db6ffd7STobias Grosser ExitBB->setName("polly.loop_exit"); 127609089f2SHongbin Zheng 1285db6ffd7STobias Grosser // BeforeBB 129dd5c1442SJohannes Doerfert if (GuardBB) { 1305db6ffd7STobias Grosser BeforeBB->getTerminator()->setSuccessor(0, GuardBB); 131dd5c1442SJohannes Doerfert DT.addNewBlock(GuardBB, BeforeBB); 132609089f2SHongbin Zheng 1335db6ffd7STobias Grosser // GuardBB 1345db6ffd7STobias Grosser Builder.SetInsertPoint(GuardBB); 1355db6ffd7STobias Grosser Value *LoopGuard; 1365db6ffd7STobias Grosser LoopGuard = Builder.CreateICmp(Predicate, LB, UB); 1375db6ffd7STobias Grosser LoopGuard->setName("polly.loop_guard"); 1385db6ffd7STobias Grosser Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB); 139dd5c1442SJohannes Doerfert DT.addNewBlock(PreHeaderBB, GuardBB); 140dd5c1442SJohannes Doerfert } else { 141dd5c1442SJohannes Doerfert BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB); 142dd5c1442SJohannes Doerfert DT.addNewBlock(PreHeaderBB, BeforeBB); 143dd5c1442SJohannes Doerfert } 144609089f2SHongbin Zheng 1455db6ffd7STobias Grosser // PreHeaderBB 1465db6ffd7STobias Grosser Builder.SetInsertPoint(PreHeaderBB); 1474ac4e155SHongbin Zheng Builder.CreateBr(HeaderBB); 148609089f2SHongbin Zheng 1495db6ffd7STobias Grosser // HeaderBB 1505db6ffd7STobias Grosser DT.addNewBlock(HeaderBB, PreHeaderBB); 1515db6ffd7STobias Grosser Builder.SetInsertPoint(HeaderBB); 1525db6ffd7STobias Grosser PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar"); 1535db6ffd7STobias Grosser IV->addIncoming(LB, PreHeaderBB); 1543717aa5dSTobias Grosser Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType); 1555db6ffd7STobias Grosser Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next"); 1564fe342cbSHongbin Zheng Value *LoopCondition = 1574fe342cbSHongbin Zheng Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond"); 158c7b719fcSJohannes Doerfert 159c7b719fcSJohannes Doerfert // Create the loop latch and annotate it as such. 160c7b719fcSJohannes Doerfert BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB); 161c7b719fcSJohannes Doerfert if (Annotator) 1620956a606SRoman Gareev Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled); 163c7b719fcSJohannes Doerfert 1645db6ffd7STobias Grosser IV->addIncoming(IncrementedIV, HeaderBB); 165dd5c1442SJohannes Doerfert if (GuardBB) 1665db6ffd7STobias Grosser DT.changeImmediateDominator(ExitBB, GuardBB); 167dd5c1442SJohannes Doerfert else 168f8a678d2STobias Grosser DT.changeImmediateDominator(ExitBB, HeaderBB); 169609089f2SHongbin Zheng 1705db6ffd7STobias Grosser // The loop body should be added here. 1715db6ffd7STobias Grosser Builder.SetInsertPoint(HeaderBB->getFirstNonPHI()); 172609089f2SHongbin Zheng return IV; 173609089f2SHongbin Zheng } 174609089f2SHongbin Zheng 17512b355a2SJohannes Doerfert Value *ParallelLoopGenerator::createParallelLoop( 17612b355a2SJohannes Doerfert Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues, 177521dd584SJohannes Doerfert ValueMapT &Map, BasicBlock::iterator *LoopBody) { 17812b355a2SJohannes Doerfert 179f0e3d50dSDavid Blaikie AllocaInst *Struct = storeValuesIntoStruct(UsedValues); 18012b355a2SJohannes Doerfert BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint(); 181*89251edeSMichael Kruse 182*89251edeSMichael Kruse Value *IV; 183*89251edeSMichael Kruse Function *SubFn; 184*89251edeSMichael Kruse std::tie(IV, SubFn) = createSubFn(Stride, Struct, UsedValues, Map); 18512b355a2SJohannes Doerfert *LoopBody = Builder.GetInsertPoint(); 186b8f58b53SDuncan P. N. Exon Smith Builder.SetInsertPoint(&*BeforeLoop); 18712b355a2SJohannes Doerfert 188f0e3d50dSDavid Blaikie Value *SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(), 18912b355a2SJohannes Doerfert "polly.par.userContext"); 19012b355a2SJohannes Doerfert 191a6d48f59SMichael Kruse // Add one as the upper bound provided by OpenMP is a < comparison 19212b355a2SJohannes Doerfert // whereas the codegenForSequential function creates a <= comparison. 19312b355a2SJohannes Doerfert UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1)); 19412b355a2SJohannes Doerfert 195*89251edeSMichael Kruse // Execute the prepared subfunction in parallel. 196*89251edeSMichael Kruse deployParallelExecution(SubFn, SubFnParam, LB, UB, Stride); 19712b355a2SJohannes Doerfert 19812b355a2SJohannes Doerfert return IV; 19912b355a2SJohannes Doerfert } 20012b355a2SJohannes Doerfert 20112b355a2SJohannes Doerfert Function *ParallelLoopGenerator::createSubFnDefinition() { 202609089f2SHongbin Zheng Function *F = Builder.GetInsertBlock()->getParent(); 203*89251edeSMichael Kruse Function *SubFn = prepareSubFnDefinition(F); 204a89dc57bSTobias Grosser 205a89dc57bSTobias Grosser // Certain backends (e.g., NVPTX) do not support '.'s in function names. 206a89dc57bSTobias Grosser // Hence, we ensure that all '.'s are replaced by '_'s. 207a89dc57bSTobias Grosser std::string FunctionName = SubFn->getName(); 208a89dc57bSTobias Grosser std::replace(FunctionName.begin(), FunctionName.end(), '.', '_'); 209a89dc57bSTobias Grosser SubFn->setName(FunctionName); 21012b355a2SJohannes Doerfert 211609089f2SHongbin Zheng // Do not run any polly pass on the new function. 21212b355a2SJohannes Doerfert SubFn->addFnAttr(PollySkipFnAttr); 213609089f2SHongbin Zheng 21412b355a2SJohannes Doerfert return SubFn; 215609089f2SHongbin Zheng } 216609089f2SHongbin Zheng 217f0e3d50dSDavid Blaikie AllocaInst * 21812b355a2SJohannes Doerfert ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) { 21912b355a2SJohannes Doerfert SmallVector<Type *, 8> Members; 220609089f2SHongbin Zheng 22191f5b262STobias Grosser for (Value *V : Values) 22291f5b262STobias Grosser Members.push_back(V->getType()); 223609089f2SHongbin Zheng 2247b5a4dfdSTobias Grosser const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout(); 225b3e30c32SMatt Arsenault 2261356ac75SJohannes Doerfert // We do not want to allocate the alloca inside any loop, thus we allocate it 2271356ac75SJohannes Doerfert // in the entry block of the function and use annotations to denote the actual 2281356ac75SJohannes Doerfert // live span (similar to clang). 2291356ac75SJohannes Doerfert BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock(); 230b8f58b53SDuncan P. N. Exon Smith Instruction *IP = &*EntryBB.getFirstInsertionPt(); 231609089f2SHongbin Zheng StructType *Ty = StructType::get(Builder.getContext(), Members); 232b3e30c32SMatt Arsenault AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr, 233b3e30c32SMatt Arsenault "polly.par.userContext", IP); 2341356ac75SJohannes Doerfert 235609089f2SHongbin Zheng for (unsigned i = 0; i < Values.size(); i++) { 236f0e3d50dSDavid Blaikie Value *Address = Builder.CreateStructGEP(Ty, Struct, i); 23795e59aaaSTobias Grosser Address->setName("polly.subfn.storeaddr." + Values[i]->getName()); 238609089f2SHongbin Zheng Builder.CreateStore(Values[i], Address); 239609089f2SHongbin Zheng } 240609089f2SHongbin Zheng 241609089f2SHongbin Zheng return Struct; 242609089f2SHongbin Zheng } 243609089f2SHongbin Zheng 24412b355a2SJohannes Doerfert void ParallelLoopGenerator::extractValuesFromStruct( 245521dd584SJohannes Doerfert SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) { 246609089f2SHongbin Zheng for (unsigned i = 0; i < OldValues.size(); i++) { 247f0e3d50dSDavid Blaikie Value *Address = Builder.CreateStructGEP(Ty, Struct, i); 248609089f2SHongbin Zheng Value *NewValue = Builder.CreateLoad(Address); 24972b80672STobias Grosser NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName()); 25012b355a2SJohannes Doerfert Map[OldValues[i]] = NewValue; 251609089f2SHongbin Zheng } 252609089f2SHongbin Zheng } 253