1609089f2SHongbin Zheng //===------ LoopGenerators.cpp - IR helper to create loops ---------------===//
2609089f2SHongbin Zheng //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6609089f2SHongbin Zheng //
7609089f2SHongbin Zheng //===----------------------------------------------------------------------===//
8609089f2SHongbin Zheng //
989251edeSMichael Kruse // This file contains functions to create scalar loops and orchestrate the
1089251edeSMichael Kruse // creation of parallel loops as LLVM-IR.
11609089f2SHongbin Zheng //
12609089f2SHongbin Zheng //===----------------------------------------------------------------------===//
13609089f2SHongbin Zheng
148a846610SHongbin Zheng #include "polly/CodeGen/LoopGenerators.h"
1589251edeSMichael Kruse #include "polly/Options.h"
165624d3c9STobias Grosser #include "polly/ScopDetection.h"
173081b0f5STobias Grosser #include "llvm/Analysis/LoopInfo.h"
18535d52c7SChandler Carruth #include "llvm/IR/DataLayout.h"
19*fe0e5b3eSMichael Kruse #include "llvm/IR/DebugInfoMetadata.h"
20e87c6a81SChandler Carruth #include "llvm/IR/Dominators.h"
2183628182STobias Grosser #include "llvm/IR/Module.h"
22990cd4c2SJohannes Doerfert #include "llvm/Support/CommandLine.h"
23ba0d0922STobias Grosser #include "llvm/Transforms/Utils/BasicBlockUtils.h"
24609089f2SHongbin Zheng
25609089f2SHongbin Zheng using namespace llvm;
264ac4e155SHongbin Zheng using namespace polly;
27609089f2SHongbin Zheng
2889251edeSMichael Kruse int polly::PollyNumThreads;
2989251edeSMichael Kruse OMPGeneralSchedulingType polly::PollyScheduling;
3089251edeSMichael Kruse int polly::PollyChunkSize;
3189251edeSMichael Kruse
3289251edeSMichael Kruse static cl::opt<int, true>
3389251edeSMichael Kruse XPollyNumThreads("polly-num-threads",
3489251edeSMichael Kruse cl::desc("Number of threads to use (0 = auto)"),
3589251edeSMichael Kruse cl::Hidden, cl::location(polly::PollyNumThreads),
3689251edeSMichael Kruse cl::init(0), cl::cat(PollyCategory));
3789251edeSMichael Kruse
3889251edeSMichael Kruse static cl::opt<OMPGeneralSchedulingType, true> XPollyScheduling(
3989251edeSMichael Kruse "polly-scheduling",
4089251edeSMichael Kruse cl::desc("Scheduling type of parallel OpenMP for loops"),
4189251edeSMichael Kruse cl::values(clEnumValN(OMPGeneralSchedulingType::StaticChunked, "static",
4289251edeSMichael Kruse "Static scheduling"),
4389251edeSMichael Kruse clEnumValN(OMPGeneralSchedulingType::Dynamic, "dynamic",
4489251edeSMichael Kruse "Dynamic scheduling"),
4589251edeSMichael Kruse clEnumValN(OMPGeneralSchedulingType::Guided, "guided",
4689251edeSMichael Kruse "Guided scheduling"),
4789251edeSMichael Kruse clEnumValN(OMPGeneralSchedulingType::Runtime, "runtime",
4889251edeSMichael Kruse "Runtime determined (OMP_SCHEDULE)")),
4989251edeSMichael Kruse cl::Hidden, cl::location(polly::PollyScheduling),
5089251edeSMichael Kruse cl::init(OMPGeneralSchedulingType::Runtime), cl::Optional,
5189251edeSMichael Kruse cl::cat(PollyCategory));
5289251edeSMichael Kruse
5389251edeSMichael Kruse static cl::opt<int, true>
5489251edeSMichael Kruse XPollyChunkSize("polly-scheduling-chunksize",
5589251edeSMichael Kruse cl::desc("Chunksize to use by the OpenMP runtime calls"),
5689251edeSMichael Kruse cl::Hidden, cl::location(polly::PollyChunkSize),
5789251edeSMichael Kruse cl::init(0), cl::Optional, cl::cat(PollyCategory));
58990cd4c2SJohannes Doerfert
59dd5c1442SJohannes Doerfert // We generate a loop of either of the following structures:
605db6ffd7STobias Grosser //
61dd5c1442SJohannes Doerfert // BeforeBB BeforeBB
62dd5c1442SJohannes Doerfert // | |
63dd5c1442SJohannes Doerfert // v v
64dd5c1442SJohannes Doerfert // GuardBB PreHeaderBB
65dd5c1442SJohannes Doerfert // / | | _____
66dd5c1442SJohannes Doerfert // __ PreHeaderBB | v \/ |
67dd5c1442SJohannes Doerfert // / \ / | HeaderBB latch
68dd5c1442SJohannes Doerfert // latch HeaderBB | |\ |
69dd5c1442SJohannes Doerfert // \ / \ / | \------/
70dd5c1442SJohannes Doerfert // < \ / |
71dd5c1442SJohannes Doerfert // \ / v
72dd5c1442SJohannes Doerfert // ExitBB ExitBB
735db6ffd7STobias Grosser //
74dd5c1442SJohannes Doerfert // depending on whether or not we know that it is executed at least once. If
75dd5c1442SJohannes Doerfert // not, GuardBB checks if the loop is executed at least once. If this is the
76dd5c1442SJohannes Doerfert // case we branch to PreHeaderBB and subsequently to the HeaderBB, which
77dd5c1442SJohannes Doerfert // contains the loop iv 'polly.indvar', the incremented loop iv
78dd5c1442SJohannes Doerfert // 'polly.indvar_next' as well as the condition to check if we execute another
79dd5c1442SJohannes Doerfert // iteration of the loop. After the loop has finished, we branch to ExitBB.
804fe342cbSHongbin Zheng // We expect the type of UB, LB, UB+Stride to be large enough for values that
814fe342cbSHongbin Zheng // UB may take throughout the execution of the loop, including the computation
824fe342cbSHongbin Zheng // of indvar + Stride before the final abort.
createLoop(Value * LB,Value * UB,Value * Stride,PollyIRBuilder & Builder,LoopInfo & LI,DominatorTree & DT,BasicBlock * & ExitBB,ICmpInst::Predicate Predicate,ScopAnnotator * Annotator,bool Parallel,bool UseGuard,bool LoopVectDisabled)834ac4e155SHongbin Zheng Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
842d950f36SPhilip Pfaffe PollyIRBuilder &Builder, LoopInfo &LI,
852ef3f4fdSJohannes Doerfert DominatorTree &DT, BasicBlock *&ExitBB,
8637c9b8e0STobias Grosser ICmpInst::Predicate Predicate,
870956a606SRoman Gareev ScopAnnotator *Annotator, bool Parallel, bool UseGuard,
880956a606SRoman Gareev bool LoopVectDisabled) {
894ac4e155SHongbin Zheng Function *F = Builder.GetInsertBlock()->getParent();
90609089f2SHongbin Zheng LLVMContext &Context = F->getContext();
91609089f2SHongbin Zheng
925db6ffd7STobias Grosser assert(LB->getType() == UB->getType() && "Types of loop bounds do not match");
93609089f2SHongbin Zheng IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType());
94609089f2SHongbin Zheng assert(LoopIVType && "UB is not integer?");
95609089f2SHongbin Zheng
965db6ffd7STobias Grosser BasicBlock *BeforeBB = Builder.GetInsertBlock();
97dd5c1442SJohannes Doerfert BasicBlock *GuardBB =
98dd5c1442SJohannes Doerfert UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr;
995db6ffd7STobias Grosser BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F);
1005db6ffd7STobias Grosser BasicBlock *PreHeaderBB =
1015db6ffd7STobias Grosser BasicBlock::Create(Context, "polly.loop_preheader", F);
102609089f2SHongbin Zheng
1033081b0f5STobias Grosser // Update LoopInfo
1043081b0f5STobias Grosser Loop *OuterLoop = LI.getLoopFor(BeforeBB);
105859ef1c0SPhilip Pfaffe Loop *NewLoop = LI.AllocateLoop();
1063081b0f5STobias Grosser
107dd5c1442SJohannes Doerfert if (OuterLoop)
1083081b0f5STobias Grosser OuterLoop->addChildLoop(NewLoop);
109dd5c1442SJohannes Doerfert else
1103081b0f5STobias Grosser LI.addTopLevelLoop(NewLoop);
1113081b0f5STobias Grosser
112154d9469STobias Grosser if (OuterLoop) {
113154d9469STobias Grosser if (GuardBB)
1146adcf56bSChandler Carruth OuterLoop->addBasicBlockToLoop(GuardBB, LI);
1156adcf56bSChandler Carruth OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI);
116154d9469STobias Grosser }
1173081b0f5STobias Grosser
1186adcf56bSChandler Carruth NewLoop->addBasicBlockToLoop(HeaderBB, LI);
1193081b0f5STobias Grosser
120c7b719fcSJohannes Doerfert // Notify the annotator (if present) that we have a new loop, but only
121c7b719fcSJohannes Doerfert // after the header block is set.
122c7b719fcSJohannes Doerfert if (Annotator)
123c7b719fcSJohannes Doerfert Annotator->pushLoop(NewLoop, Parallel);
124c7b719fcSJohannes Doerfert
1255db6ffd7STobias Grosser // ExitBB
126b8f58b53SDuncan P. N. Exon Smith ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI);
1275db6ffd7STobias Grosser ExitBB->setName("polly.loop_exit");
128609089f2SHongbin Zheng
1295db6ffd7STobias Grosser // BeforeBB
130dd5c1442SJohannes Doerfert if (GuardBB) {
1315db6ffd7STobias Grosser BeforeBB->getTerminator()->setSuccessor(0, GuardBB);
132dd5c1442SJohannes Doerfert DT.addNewBlock(GuardBB, BeforeBB);
133609089f2SHongbin Zheng
1345db6ffd7STobias Grosser // GuardBB
1355db6ffd7STobias Grosser Builder.SetInsertPoint(GuardBB);
1365db6ffd7STobias Grosser Value *LoopGuard;
1375db6ffd7STobias Grosser LoopGuard = Builder.CreateICmp(Predicate, LB, UB);
1385db6ffd7STobias Grosser LoopGuard->setName("polly.loop_guard");
1395db6ffd7STobias Grosser Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB);
140dd5c1442SJohannes Doerfert DT.addNewBlock(PreHeaderBB, GuardBB);
141dd5c1442SJohannes Doerfert } else {
142dd5c1442SJohannes Doerfert BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB);
143dd5c1442SJohannes Doerfert DT.addNewBlock(PreHeaderBB, BeforeBB);
144dd5c1442SJohannes Doerfert }
145609089f2SHongbin Zheng
1465db6ffd7STobias Grosser // PreHeaderBB
1475db6ffd7STobias Grosser Builder.SetInsertPoint(PreHeaderBB);
1484ac4e155SHongbin Zheng Builder.CreateBr(HeaderBB);
149609089f2SHongbin Zheng
1505db6ffd7STobias Grosser // HeaderBB
1515db6ffd7STobias Grosser DT.addNewBlock(HeaderBB, PreHeaderBB);
1525db6ffd7STobias Grosser Builder.SetInsertPoint(HeaderBB);
1535db6ffd7STobias Grosser PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar");
1545db6ffd7STobias Grosser IV->addIncoming(LB, PreHeaderBB);
1553717aa5dSTobias Grosser Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
1565db6ffd7STobias Grosser Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next");
1574fe342cbSHongbin Zheng Value *LoopCondition =
1584fe342cbSHongbin Zheng Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond");
159c7b719fcSJohannes Doerfert
160c7b719fcSJohannes Doerfert // Create the loop latch and annotate it as such.
161c7b719fcSJohannes Doerfert BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
162c7b719fcSJohannes Doerfert if (Annotator)
1630956a606SRoman Gareev Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled);
164c7b719fcSJohannes Doerfert
1655db6ffd7STobias Grosser IV->addIncoming(IncrementedIV, HeaderBB);
166dd5c1442SJohannes Doerfert if (GuardBB)
1675db6ffd7STobias Grosser DT.changeImmediateDominator(ExitBB, GuardBB);
168dd5c1442SJohannes Doerfert else
169f8a678d2STobias Grosser DT.changeImmediateDominator(ExitBB, HeaderBB);
170609089f2SHongbin Zheng
1715db6ffd7STobias Grosser // The loop body should be added here.
1725db6ffd7STobias Grosser Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
173609089f2SHongbin Zheng return IV;
174609089f2SHongbin Zheng }
175609089f2SHongbin Zheng
createParallelLoop(Value * LB,Value * UB,Value * Stride,SetVector<Value * > & UsedValues,ValueMapT & Map,BasicBlock::iterator * LoopBody)17612b355a2SJohannes Doerfert Value *ParallelLoopGenerator::createParallelLoop(
17712b355a2SJohannes Doerfert Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues,
178521dd584SJohannes Doerfert ValueMapT &Map, BasicBlock::iterator *LoopBody) {
17912b355a2SJohannes Doerfert
180f0e3d50dSDavid Blaikie AllocaInst *Struct = storeValuesIntoStruct(UsedValues);
18112b355a2SJohannes Doerfert BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint();
18289251edeSMichael Kruse
18389251edeSMichael Kruse Value *IV;
18489251edeSMichael Kruse Function *SubFn;
18589251edeSMichael Kruse std::tie(IV, SubFn) = createSubFn(Stride, Struct, UsedValues, Map);
18612b355a2SJohannes Doerfert *LoopBody = Builder.GetInsertPoint();
187b8f58b53SDuncan P. N. Exon Smith Builder.SetInsertPoint(&*BeforeLoop);
18812b355a2SJohannes Doerfert
189f0e3d50dSDavid Blaikie Value *SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(),
19012b355a2SJohannes Doerfert "polly.par.userContext");
19112b355a2SJohannes Doerfert
192a6d48f59SMichael Kruse // Add one as the upper bound provided by OpenMP is a < comparison
19312b355a2SJohannes Doerfert // whereas the codegenForSequential function creates a <= comparison.
19412b355a2SJohannes Doerfert UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1));
19512b355a2SJohannes Doerfert
19689251edeSMichael Kruse // Execute the prepared subfunction in parallel.
19789251edeSMichael Kruse deployParallelExecution(SubFn, SubFnParam, LB, UB, Stride);
19812b355a2SJohannes Doerfert
19912b355a2SJohannes Doerfert return IV;
20012b355a2SJohannes Doerfert }
20112b355a2SJohannes Doerfert
createSubFnDefinition()20212b355a2SJohannes Doerfert Function *ParallelLoopGenerator::createSubFnDefinition() {
203609089f2SHongbin Zheng Function *F = Builder.GetInsertBlock()->getParent();
20489251edeSMichael Kruse Function *SubFn = prepareSubFnDefinition(F);
205a89dc57bSTobias Grosser
206a89dc57bSTobias Grosser // Certain backends (e.g., NVPTX) do not support '.'s in function names.
207a89dc57bSTobias Grosser // Hence, we ensure that all '.'s are replaced by '_'s.
2080257a921SEli Friedman std::string FunctionName = SubFn->getName().str();
209a89dc57bSTobias Grosser std::replace(FunctionName.begin(), FunctionName.end(), '.', '_');
210a89dc57bSTobias Grosser SubFn->setName(FunctionName);
21112b355a2SJohannes Doerfert
212609089f2SHongbin Zheng // Do not run any polly pass on the new function.
21312b355a2SJohannes Doerfert SubFn->addFnAttr(PollySkipFnAttr);
214609089f2SHongbin Zheng
21512b355a2SJohannes Doerfert return SubFn;
216609089f2SHongbin Zheng }
217609089f2SHongbin Zheng
218f0e3d50dSDavid Blaikie AllocaInst *
storeValuesIntoStruct(SetVector<Value * > & Values)21912b355a2SJohannes Doerfert ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) {
22012b355a2SJohannes Doerfert SmallVector<Type *, 8> Members;
221609089f2SHongbin Zheng
22291f5b262STobias Grosser for (Value *V : Values)
22391f5b262STobias Grosser Members.push_back(V->getType());
224609089f2SHongbin Zheng
2257b5a4dfdSTobias Grosser const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
226b3e30c32SMatt Arsenault
2271356ac75SJohannes Doerfert // We do not want to allocate the alloca inside any loop, thus we allocate it
2281356ac75SJohannes Doerfert // in the entry block of the function and use annotations to denote the actual
2291356ac75SJohannes Doerfert // live span (similar to clang).
2301356ac75SJohannes Doerfert BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock();
231b8f58b53SDuncan P. N. Exon Smith Instruction *IP = &*EntryBB.getFirstInsertionPt();
232609089f2SHongbin Zheng StructType *Ty = StructType::get(Builder.getContext(), Members);
233b3e30c32SMatt Arsenault AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr,
234b3e30c32SMatt Arsenault "polly.par.userContext", IP);
2351356ac75SJohannes Doerfert
236609089f2SHongbin Zheng for (unsigned i = 0; i < Values.size(); i++) {
237f0e3d50dSDavid Blaikie Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
23895e59aaaSTobias Grosser Address->setName("polly.subfn.storeaddr." + Values[i]->getName());
239609089f2SHongbin Zheng Builder.CreateStore(Values[i], Address);
240609089f2SHongbin Zheng }
241609089f2SHongbin Zheng
242609089f2SHongbin Zheng return Struct;
243609089f2SHongbin Zheng }
244609089f2SHongbin Zheng
extractValuesFromStruct(SetVector<Value * > OldValues,Type * Ty,Value * Struct,ValueMapT & Map)24512b355a2SJohannes Doerfert void ParallelLoopGenerator::extractValuesFromStruct(
246521dd584SJohannes Doerfert SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) {
247609089f2SHongbin Zheng for (unsigned i = 0; i < OldValues.size(); i++) {
248f0e3d50dSDavid Blaikie Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
24946354bacSNikita Popov Type *ElemTy = cast<GetElementPtrInst>(Address)->getResultElementType();
25046354bacSNikita Popov Value *NewValue = Builder.CreateLoad(ElemTy, Address);
25172b80672STobias Grosser NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName());
25212b355a2SJohannes Doerfert Map[OldValues[i]] = NewValue;
253609089f2SHongbin Zheng }
254609089f2SHongbin Zheng }
255*fe0e5b3eSMichael Kruse
createDebugLocForGeneratedCode(Function * F)256*fe0e5b3eSMichael Kruse DebugLoc polly::createDebugLocForGeneratedCode(Function *F) {
257*fe0e5b3eSMichael Kruse if (!F)
258*fe0e5b3eSMichael Kruse return DebugLoc();
259*fe0e5b3eSMichael Kruse
260*fe0e5b3eSMichael Kruse LLVMContext &Ctx = F->getContext();
261*fe0e5b3eSMichael Kruse DISubprogram *DILScope =
262*fe0e5b3eSMichael Kruse dyn_cast_or_null<DISubprogram>(F->getMetadata(LLVMContext::MD_dbg));
263*fe0e5b3eSMichael Kruse if (!DILScope)
264*fe0e5b3eSMichael Kruse return DebugLoc();
265*fe0e5b3eSMichael Kruse return DILocation::get(Ctx, 0, 0, DILScope);
266*fe0e5b3eSMichael Kruse }
267