1609089f2SHongbin Zheng //===------ LoopGenerators.cpp -  IR helper to create loops ---------------===//
2609089f2SHongbin Zheng //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6609089f2SHongbin Zheng //
7609089f2SHongbin Zheng //===----------------------------------------------------------------------===//
8609089f2SHongbin Zheng //
9*89251edeSMichael Kruse // This file contains functions to create scalar loops and orchestrate the
10*89251edeSMichael Kruse // creation of parallel loops as LLVM-IR.
11609089f2SHongbin Zheng //
12609089f2SHongbin Zheng //===----------------------------------------------------------------------===//
13609089f2SHongbin Zheng 
148a846610SHongbin Zheng #include "polly/CodeGen/LoopGenerators.h"
15*89251edeSMichael Kruse #include "polly/Options.h"
165624d3c9STobias Grosser #include "polly/ScopDetection.h"
173081b0f5STobias Grosser #include "llvm/Analysis/LoopInfo.h"
18535d52c7SChandler Carruth #include "llvm/IR/DataLayout.h"
19e87c6a81SChandler Carruth #include "llvm/IR/Dominators.h"
2083628182STobias Grosser #include "llvm/IR/Module.h"
21990cd4c2SJohannes Doerfert #include "llvm/Support/CommandLine.h"
22ba0d0922STobias Grosser #include "llvm/Transforms/Utils/BasicBlockUtils.h"
23609089f2SHongbin Zheng 
24609089f2SHongbin Zheng using namespace llvm;
254ac4e155SHongbin Zheng using namespace polly;
26609089f2SHongbin Zheng 
27*89251edeSMichael Kruse int polly::PollyNumThreads;
28*89251edeSMichael Kruse OMPGeneralSchedulingType polly::PollyScheduling;
29*89251edeSMichael Kruse int polly::PollyChunkSize;
30*89251edeSMichael Kruse 
31*89251edeSMichael Kruse static cl::opt<int, true>
32*89251edeSMichael Kruse     XPollyNumThreads("polly-num-threads",
33*89251edeSMichael Kruse                      cl::desc("Number of threads to use (0 = auto)"),
34*89251edeSMichael Kruse                      cl::Hidden, cl::location(polly::PollyNumThreads),
35*89251edeSMichael Kruse                      cl::init(0), cl::cat(PollyCategory));
36*89251edeSMichael Kruse 
37*89251edeSMichael Kruse static cl::opt<OMPGeneralSchedulingType, true> XPollyScheduling(
38*89251edeSMichael Kruse     "polly-scheduling",
39*89251edeSMichael Kruse     cl::desc("Scheduling type of parallel OpenMP for loops"),
40*89251edeSMichael Kruse     cl::values(clEnumValN(OMPGeneralSchedulingType::StaticChunked, "static",
41*89251edeSMichael Kruse                           "Static scheduling"),
42*89251edeSMichael Kruse                clEnumValN(OMPGeneralSchedulingType::Dynamic, "dynamic",
43*89251edeSMichael Kruse                           "Dynamic scheduling"),
44*89251edeSMichael Kruse                clEnumValN(OMPGeneralSchedulingType::Guided, "guided",
45*89251edeSMichael Kruse                           "Guided scheduling"),
46*89251edeSMichael Kruse                clEnumValN(OMPGeneralSchedulingType::Runtime, "runtime",
47*89251edeSMichael Kruse                           "Runtime determined (OMP_SCHEDULE)")),
48*89251edeSMichael Kruse     cl::Hidden, cl::location(polly::PollyScheduling),
49*89251edeSMichael Kruse     cl::init(OMPGeneralSchedulingType::Runtime), cl::Optional,
50*89251edeSMichael Kruse     cl::cat(PollyCategory));
51*89251edeSMichael Kruse 
52*89251edeSMichael Kruse static cl::opt<int, true>
53*89251edeSMichael Kruse     XPollyChunkSize("polly-scheduling-chunksize",
54*89251edeSMichael Kruse                     cl::desc("Chunksize to use by the OpenMP runtime calls"),
55*89251edeSMichael Kruse                     cl::Hidden, cl::location(polly::PollyChunkSize),
56*89251edeSMichael Kruse                     cl::init(0), cl::Optional, cl::cat(PollyCategory));
57990cd4c2SJohannes Doerfert 
58dd5c1442SJohannes Doerfert // We generate a loop of either of the following structures:
595db6ffd7STobias Grosser //
60dd5c1442SJohannes Doerfert //              BeforeBB                      BeforeBB
61dd5c1442SJohannes Doerfert //                 |                             |
62dd5c1442SJohannes Doerfert //                 v                             v
63dd5c1442SJohannes Doerfert //              GuardBB                      PreHeaderBB
64dd5c1442SJohannes Doerfert //              /      |                         |   _____
65dd5c1442SJohannes Doerfert //     __  PreHeaderBB  |                        v  \/    |
66dd5c1442SJohannes Doerfert //    /  \    /         |                     HeaderBB  latch
67dd5c1442SJohannes Doerfert // latch  HeaderBB      |                        |\       |
68dd5c1442SJohannes Doerfert //    \  /    \         /                        | \------/
69dd5c1442SJohannes Doerfert //     <       \       /                         |
70dd5c1442SJohannes Doerfert //              \     /                          v
71dd5c1442SJohannes Doerfert //              ExitBB                         ExitBB
725db6ffd7STobias Grosser //
73dd5c1442SJohannes Doerfert // depending on whether or not we know that it is executed at least once. If
74dd5c1442SJohannes Doerfert // not, GuardBB checks if the loop is executed at least once. If this is the
75dd5c1442SJohannes Doerfert // case we branch to PreHeaderBB and subsequently to the HeaderBB, which
76dd5c1442SJohannes Doerfert // contains the loop iv 'polly.indvar', the incremented loop iv
77dd5c1442SJohannes Doerfert // 'polly.indvar_next' as well as the condition to check if we execute another
78dd5c1442SJohannes Doerfert // iteration of the loop. After the loop has finished, we branch to ExitBB.
794fe342cbSHongbin Zheng // We expect the type of UB, LB, UB+Stride to be large enough for values that
804fe342cbSHongbin Zheng // UB may take throughout the execution of the loop, including the computation
814fe342cbSHongbin Zheng // of indvar + Stride before the final abort.
824ac4e155SHongbin Zheng Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
832d950f36SPhilip Pfaffe                          PollyIRBuilder &Builder, LoopInfo &LI,
842ef3f4fdSJohannes Doerfert                          DominatorTree &DT, BasicBlock *&ExitBB,
8537c9b8e0STobias Grosser                          ICmpInst::Predicate Predicate,
860956a606SRoman Gareev                          ScopAnnotator *Annotator, bool Parallel, bool UseGuard,
870956a606SRoman Gareev                          bool LoopVectDisabled) {
884ac4e155SHongbin Zheng   Function *F = Builder.GetInsertBlock()->getParent();
89609089f2SHongbin Zheng   LLVMContext &Context = F->getContext();
90609089f2SHongbin Zheng 
915db6ffd7STobias Grosser   assert(LB->getType() == UB->getType() && "Types of loop bounds do not match");
92609089f2SHongbin Zheng   IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType());
93609089f2SHongbin Zheng   assert(LoopIVType && "UB is not integer?");
94609089f2SHongbin Zheng 
955db6ffd7STobias Grosser   BasicBlock *BeforeBB = Builder.GetInsertBlock();
96dd5c1442SJohannes Doerfert   BasicBlock *GuardBB =
97dd5c1442SJohannes Doerfert       UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr;
985db6ffd7STobias Grosser   BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F);
995db6ffd7STobias Grosser   BasicBlock *PreHeaderBB =
1005db6ffd7STobias Grosser       BasicBlock::Create(Context, "polly.loop_preheader", F);
101609089f2SHongbin Zheng 
1023081b0f5STobias Grosser   // Update LoopInfo
1033081b0f5STobias Grosser   Loop *OuterLoop = LI.getLoopFor(BeforeBB);
104859ef1c0SPhilip Pfaffe   Loop *NewLoop = LI.AllocateLoop();
1053081b0f5STobias Grosser 
106dd5c1442SJohannes Doerfert   if (OuterLoop)
1073081b0f5STobias Grosser     OuterLoop->addChildLoop(NewLoop);
108dd5c1442SJohannes Doerfert   else
1093081b0f5STobias Grosser     LI.addTopLevelLoop(NewLoop);
1103081b0f5STobias Grosser 
111154d9469STobias Grosser   if (OuterLoop) {
112154d9469STobias Grosser     if (GuardBB)
1136adcf56bSChandler Carruth       OuterLoop->addBasicBlockToLoop(GuardBB, LI);
1146adcf56bSChandler Carruth     OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI);
115154d9469STobias Grosser   }
1163081b0f5STobias Grosser 
1176adcf56bSChandler Carruth   NewLoop->addBasicBlockToLoop(HeaderBB, LI);
1183081b0f5STobias Grosser 
119c7b719fcSJohannes Doerfert   // Notify the annotator (if present) that we have a new loop, but only
120c7b719fcSJohannes Doerfert   // after the header block is set.
121c7b719fcSJohannes Doerfert   if (Annotator)
122c7b719fcSJohannes Doerfert     Annotator->pushLoop(NewLoop, Parallel);
123c7b719fcSJohannes Doerfert 
1245db6ffd7STobias Grosser   // ExitBB
125b8f58b53SDuncan P. N. Exon Smith   ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI);
1265db6ffd7STobias Grosser   ExitBB->setName("polly.loop_exit");
127609089f2SHongbin Zheng 
1285db6ffd7STobias Grosser   // BeforeBB
129dd5c1442SJohannes Doerfert   if (GuardBB) {
1305db6ffd7STobias Grosser     BeforeBB->getTerminator()->setSuccessor(0, GuardBB);
131dd5c1442SJohannes Doerfert     DT.addNewBlock(GuardBB, BeforeBB);
132609089f2SHongbin Zheng 
1335db6ffd7STobias Grosser     // GuardBB
1345db6ffd7STobias Grosser     Builder.SetInsertPoint(GuardBB);
1355db6ffd7STobias Grosser     Value *LoopGuard;
1365db6ffd7STobias Grosser     LoopGuard = Builder.CreateICmp(Predicate, LB, UB);
1375db6ffd7STobias Grosser     LoopGuard->setName("polly.loop_guard");
1385db6ffd7STobias Grosser     Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB);
139dd5c1442SJohannes Doerfert     DT.addNewBlock(PreHeaderBB, GuardBB);
140dd5c1442SJohannes Doerfert   } else {
141dd5c1442SJohannes Doerfert     BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB);
142dd5c1442SJohannes Doerfert     DT.addNewBlock(PreHeaderBB, BeforeBB);
143dd5c1442SJohannes Doerfert   }
144609089f2SHongbin Zheng 
1455db6ffd7STobias Grosser   // PreHeaderBB
1465db6ffd7STobias Grosser   Builder.SetInsertPoint(PreHeaderBB);
1474ac4e155SHongbin Zheng   Builder.CreateBr(HeaderBB);
148609089f2SHongbin Zheng 
1495db6ffd7STobias Grosser   // HeaderBB
1505db6ffd7STobias Grosser   DT.addNewBlock(HeaderBB, PreHeaderBB);
1515db6ffd7STobias Grosser   Builder.SetInsertPoint(HeaderBB);
1525db6ffd7STobias Grosser   PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar");
1535db6ffd7STobias Grosser   IV->addIncoming(LB, PreHeaderBB);
1543717aa5dSTobias Grosser   Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType);
1555db6ffd7STobias Grosser   Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next");
1564fe342cbSHongbin Zheng   Value *LoopCondition =
1574fe342cbSHongbin Zheng       Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond");
158c7b719fcSJohannes Doerfert 
159c7b719fcSJohannes Doerfert   // Create the loop latch and annotate it as such.
160c7b719fcSJohannes Doerfert   BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB);
161c7b719fcSJohannes Doerfert   if (Annotator)
1620956a606SRoman Gareev     Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled);
163c7b719fcSJohannes Doerfert 
1645db6ffd7STobias Grosser   IV->addIncoming(IncrementedIV, HeaderBB);
165dd5c1442SJohannes Doerfert   if (GuardBB)
1665db6ffd7STobias Grosser     DT.changeImmediateDominator(ExitBB, GuardBB);
167dd5c1442SJohannes Doerfert   else
168f8a678d2STobias Grosser     DT.changeImmediateDominator(ExitBB, HeaderBB);
169609089f2SHongbin Zheng 
1705db6ffd7STobias Grosser   // The loop body should be added here.
1715db6ffd7STobias Grosser   Builder.SetInsertPoint(HeaderBB->getFirstNonPHI());
172609089f2SHongbin Zheng   return IV;
173609089f2SHongbin Zheng }
174609089f2SHongbin Zheng 
17512b355a2SJohannes Doerfert Value *ParallelLoopGenerator::createParallelLoop(
17612b355a2SJohannes Doerfert     Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues,
177521dd584SJohannes Doerfert     ValueMapT &Map, BasicBlock::iterator *LoopBody) {
17812b355a2SJohannes Doerfert 
179f0e3d50dSDavid Blaikie   AllocaInst *Struct = storeValuesIntoStruct(UsedValues);
18012b355a2SJohannes Doerfert   BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint();
181*89251edeSMichael Kruse 
182*89251edeSMichael Kruse   Value *IV;
183*89251edeSMichael Kruse   Function *SubFn;
184*89251edeSMichael Kruse   std::tie(IV, SubFn) = createSubFn(Stride, Struct, UsedValues, Map);
18512b355a2SJohannes Doerfert   *LoopBody = Builder.GetInsertPoint();
186b8f58b53SDuncan P. N. Exon Smith   Builder.SetInsertPoint(&*BeforeLoop);
18712b355a2SJohannes Doerfert 
188f0e3d50dSDavid Blaikie   Value *SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(),
18912b355a2SJohannes Doerfert                                             "polly.par.userContext");
19012b355a2SJohannes Doerfert 
191a6d48f59SMichael Kruse   // Add one as the upper bound provided by OpenMP is a < comparison
19212b355a2SJohannes Doerfert   // whereas the codegenForSequential function creates a <= comparison.
19312b355a2SJohannes Doerfert   UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1));
19412b355a2SJohannes Doerfert 
195*89251edeSMichael Kruse   // Execute the prepared subfunction in parallel.
196*89251edeSMichael Kruse   deployParallelExecution(SubFn, SubFnParam, LB, UB, Stride);
19712b355a2SJohannes Doerfert 
19812b355a2SJohannes Doerfert   return IV;
19912b355a2SJohannes Doerfert }
20012b355a2SJohannes Doerfert 
20112b355a2SJohannes Doerfert Function *ParallelLoopGenerator::createSubFnDefinition() {
202609089f2SHongbin Zheng   Function *F = Builder.GetInsertBlock()->getParent();
203*89251edeSMichael Kruse   Function *SubFn = prepareSubFnDefinition(F);
204a89dc57bSTobias Grosser 
205a89dc57bSTobias Grosser   // Certain backends (e.g., NVPTX) do not support '.'s in function names.
206a89dc57bSTobias Grosser   // Hence, we ensure that all '.'s are replaced by '_'s.
207a89dc57bSTobias Grosser   std::string FunctionName = SubFn->getName();
208a89dc57bSTobias Grosser   std::replace(FunctionName.begin(), FunctionName.end(), '.', '_');
209a89dc57bSTobias Grosser   SubFn->setName(FunctionName);
21012b355a2SJohannes Doerfert 
211609089f2SHongbin Zheng   // Do not run any polly pass on the new function.
21212b355a2SJohannes Doerfert   SubFn->addFnAttr(PollySkipFnAttr);
213609089f2SHongbin Zheng 
21412b355a2SJohannes Doerfert   return SubFn;
215609089f2SHongbin Zheng }
216609089f2SHongbin Zheng 
217f0e3d50dSDavid Blaikie AllocaInst *
21812b355a2SJohannes Doerfert ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) {
21912b355a2SJohannes Doerfert   SmallVector<Type *, 8> Members;
220609089f2SHongbin Zheng 
22191f5b262STobias Grosser   for (Value *V : Values)
22291f5b262STobias Grosser     Members.push_back(V->getType());
223609089f2SHongbin Zheng 
2247b5a4dfdSTobias Grosser   const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout();
225b3e30c32SMatt Arsenault 
2261356ac75SJohannes Doerfert   // We do not want to allocate the alloca inside any loop, thus we allocate it
2271356ac75SJohannes Doerfert   // in the entry block of the function and use annotations to denote the actual
2281356ac75SJohannes Doerfert   // live span (similar to clang).
2291356ac75SJohannes Doerfert   BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock();
230b8f58b53SDuncan P. N. Exon Smith   Instruction *IP = &*EntryBB.getFirstInsertionPt();
231609089f2SHongbin Zheng   StructType *Ty = StructType::get(Builder.getContext(), Members);
232b3e30c32SMatt Arsenault   AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr,
233b3e30c32SMatt Arsenault                                       "polly.par.userContext", IP);
2341356ac75SJohannes Doerfert 
235609089f2SHongbin Zheng   for (unsigned i = 0; i < Values.size(); i++) {
236f0e3d50dSDavid Blaikie     Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
23795e59aaaSTobias Grosser     Address->setName("polly.subfn.storeaddr." + Values[i]->getName());
238609089f2SHongbin Zheng     Builder.CreateStore(Values[i], Address);
239609089f2SHongbin Zheng   }
240609089f2SHongbin Zheng 
241609089f2SHongbin Zheng   return Struct;
242609089f2SHongbin Zheng }
243609089f2SHongbin Zheng 
24412b355a2SJohannes Doerfert void ParallelLoopGenerator::extractValuesFromStruct(
245521dd584SJohannes Doerfert     SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) {
246609089f2SHongbin Zheng   for (unsigned i = 0; i < OldValues.size(); i++) {
247f0e3d50dSDavid Blaikie     Value *Address = Builder.CreateStructGEP(Ty, Struct, i);
248609089f2SHongbin Zheng     Value *NewValue = Builder.CreateLoad(Address);
24972b80672STobias Grosser     NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName());
25012b355a2SJohannes Doerfert     Map[OldValues[i]] = NewValue;
251609089f2SHongbin Zheng   }
252609089f2SHongbin Zheng }
253