1 //===------ LoopGeneratorsGOMP.cpp - IR helper to create loops ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains functions to create parallel loops as LLVM-IR.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "polly/CodeGen/LoopGeneratorsGOMP.h"
14 #include "llvm/IR/Dominators.h"
15 #include "llvm/IR/Module.h"
16 
17 using namespace llvm;
18 using namespace polly;
19 
createCallSpawnThreads(Value * SubFn,Value * SubFnParam,Value * LB,Value * UB,Value * Stride)20 void ParallelLoopGeneratorGOMP::createCallSpawnThreads(Value *SubFn,
21                                                        Value *SubFnParam,
22                                                        Value *LB, Value *UB,
23                                                        Value *Stride) {
24   const std::string Name = "GOMP_parallel_loop_runtime_start";
25 
26   Function *F = M->getFunction(Name);
27 
28   // If F is not available, declare it.
29   if (!F) {
30     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
31 
32     Type *Params[] = {PointerType::getUnqual(FunctionType::get(
33                           Builder.getVoidTy(), Builder.getInt8PtrTy(), false)),
34                       Builder.getInt8PtrTy(),
35                       Builder.getInt32Ty(),
36                       LongType,
37                       LongType,
38                       LongType};
39 
40     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
41     F = Function::Create(Ty, Linkage, Name, M);
42   }
43 
44   Value *Args[] = {SubFn, SubFnParam, Builder.getInt32(PollyNumThreads),
45                    LB,    UB,         Stride};
46 
47   CallInst *Call = Builder.CreateCall(F, Args);
48   Call->setDebugLoc(DLGenerated);
49 }
50 
deployParallelExecution(Function * SubFn,Value * SubFnParam,Value * LB,Value * UB,Value * Stride)51 void ParallelLoopGeneratorGOMP::deployParallelExecution(Function *SubFn,
52                                                         Value *SubFnParam,
53                                                         Value *LB, Value *UB,
54                                                         Value *Stride) {
55   // Tell the runtime we start a parallel loop
56   createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
57   CallInst *Call = Builder.CreateCall(SubFn, SubFnParam);
58   Call->setDebugLoc(DLGenerated);
59   createCallJoinThreads();
60 }
61 
prepareSubFnDefinition(Function * F) const62 Function *ParallelLoopGeneratorGOMP::prepareSubFnDefinition(Function *F) const {
63   FunctionType *FT =
64       FunctionType::get(Builder.getVoidTy(), {Builder.getInt8PtrTy()}, false);
65   Function *SubFn = Function::Create(FT, Function::InternalLinkage,
66                                      F->getName() + "_polly_subfn", M);
67   // Name the function's arguments
68   SubFn->arg_begin()->setName("polly.par.userContext");
69   return SubFn;
70 }
71 
72 // Create a subfunction of the following (preliminary) structure:
73 //
74 //    PrevBB
75 //       |
76 //       v
77 //    HeaderBB
78 //       |   _____
79 //       v  v    |
80 //   CheckNextBB  PreHeaderBB
81 //       |\       |
82 //       | \______/
83 //       |
84 //       v
85 //     ExitBB
86 //
87 // HeaderBB will hold allocations and loading of variables.
88 // CheckNextBB will check for more work.
89 // If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB.
90 // PreHeaderBB loads the new boundaries (& will lead to the loop body later on).
91 // ExitBB marks the end of the parallel execution.
92 std::tuple<Value *, Function *>
createSubFn(Value * Stride,AllocaInst * StructData,SetVector<Value * > Data,ValueMapT & Map)93 ParallelLoopGeneratorGOMP::createSubFn(Value *Stride, AllocaInst *StructData,
94                                        SetVector<Value *> Data,
95                                        ValueMapT &Map) {
96   if (PollyScheduling != OMPGeneralSchedulingType::Runtime) {
97     // User tried to influence the scheduling type (currently not supported)
98     errs() << "warning: Polly's GNU OpenMP backend solely "
99               "supports the scheduling type 'runtime'.\n";
100   }
101 
102   if (PollyChunkSize != 0) {
103     // User tried to influence the chunk size (currently not supported)
104     errs() << "warning: Polly's GNU OpenMP backend solely "
105               "supports the default chunk size.\n";
106   }
107 
108   Function *SubFn = createSubFnDefinition();
109   LLVMContext &Context = SubFn->getContext();
110 
111   // Store the previous basic block.
112   BasicBlock *PrevBB = Builder.GetInsertBlock();
113 
114   // Create basic blocks.
115   BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
116   BasicBlock *ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
117   BasicBlock *CheckNextBB =
118       BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
119   BasicBlock *PreHeaderBB =
120       BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
121 
122   DT.addNewBlock(HeaderBB, PrevBB);
123   DT.addNewBlock(ExitBB, HeaderBB);
124   DT.addNewBlock(CheckNextBB, HeaderBB);
125   DT.addNewBlock(PreHeaderBB, HeaderBB);
126 
127   // Fill up basic block HeaderBB.
128   Builder.SetInsertPoint(HeaderBB);
129   Value *LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr");
130   Value *UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr");
131   Value *UserContext = Builder.CreateBitCast(
132       &*SubFn->arg_begin(), StructData->getType(), "polly.par.userContext");
133 
134   extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext,
135                           Map);
136   Builder.CreateBr(CheckNextBB);
137 
138   // Add code to check if another set of iterations will be executed.
139   Builder.SetInsertPoint(CheckNextBB);
140   Value *Next = createCallGetWorkItem(LBPtr, UBPtr);
141   Value *HasNextSchedule = Builder.CreateTrunc(
142       Next, Builder.getInt1Ty(), "polly.par.hasNextScheduleBlock");
143   Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB);
144 
145   // Add code to load the iv bounds for this set of iterations.
146   Builder.SetInsertPoint(PreHeaderBB);
147   Value *LB = Builder.CreateLoad(LongType, LBPtr, "polly.par.LB");
148   Value *UB = Builder.CreateLoad(LongType, UBPtr, "polly.par.UB");
149 
150   // Subtract one as the upper bound provided by OpenMP is a < comparison
151   // whereas the codegenForSequential function creates a <= comparison.
152   UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1),
153                          "polly.par.UBAdjusted");
154 
155   Builder.CreateBr(CheckNextBB);
156   Builder.SetInsertPoint(&*--Builder.GetInsertPoint());
157   BasicBlock *AfterBB;
158   Value *IV =
159       createLoop(LB, UB, Stride, Builder, LI, DT, AfterBB, ICmpInst::ICMP_SLE,
160                  nullptr, true, /* UseGuard */ false);
161 
162   BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
163 
164   // Add code to terminate this subfunction.
165   Builder.SetInsertPoint(ExitBB);
166   createCallCleanupThread();
167   Builder.CreateRetVoid();
168 
169   Builder.SetInsertPoint(&*LoopBody);
170 
171   return std::make_tuple(IV, SubFn);
172 }
173 
createCallGetWorkItem(Value * LBPtr,Value * UBPtr)174 Value *ParallelLoopGeneratorGOMP::createCallGetWorkItem(Value *LBPtr,
175                                                         Value *UBPtr) {
176   const std::string Name = "GOMP_loop_runtime_next";
177 
178   Function *F = M->getFunction(Name);
179 
180   // If F is not available, declare it.
181   if (!F) {
182     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
183     Type *Params[] = {LongType->getPointerTo(), LongType->getPointerTo()};
184     FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false);
185     F = Function::Create(Ty, Linkage, Name, M);
186   }
187 
188   Value *Args[] = {LBPtr, UBPtr};
189   CallInst *Call = Builder.CreateCall(F, Args);
190   Call->setDebugLoc(DLGenerated);
191   Value *Return = Builder.CreateICmpNE(
192       Call, Builder.CreateZExt(Builder.getFalse(), Call->getType()));
193   return Return;
194 }
195 
createCallJoinThreads()196 void ParallelLoopGeneratorGOMP::createCallJoinThreads() {
197   const std::string Name = "GOMP_parallel_end";
198 
199   Function *F = M->getFunction(Name);
200 
201   // If F is not available, declare it.
202   if (!F) {
203     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
204 
205     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
206     F = Function::Create(Ty, Linkage, Name, M);
207   }
208 
209   CallInst *Call = Builder.CreateCall(F, {});
210   Call->setDebugLoc(DLGenerated);
211 }
212 
createCallCleanupThread()213 void ParallelLoopGeneratorGOMP::createCallCleanupThread() {
214   const std::string Name = "GOMP_loop_end_nowait";
215 
216   Function *F = M->getFunction(Name);
217 
218   // If F is not available, declare it.
219   if (!F) {
220     GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
221 
222     FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false);
223     F = Function::Create(Ty, Linkage, Name, M);
224   }
225 
226   CallInst *Call = Builder.CreateCall(F, {});
227   Call->setDebugLoc(DLGenerated);
228 }
229