1 //===------ LoopGeneratorsGOMP.cpp - IR helper to create loops ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains functions to create parallel loops as LLVM-IR. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "polly/CodeGen/LoopGeneratorsGOMP.h" 14 #include "llvm/IR/Dominators.h" 15 #include "llvm/IR/Module.h" 16 17 using namespace llvm; 18 using namespace polly; 19 20 void ParallelLoopGeneratorGOMP::createCallSpawnThreads(Value *SubFn, 21 Value *SubFnParam, 22 Value *LB, Value *UB, 23 Value *Stride) { 24 const std::string Name = "GOMP_parallel_loop_runtime_start"; 25 26 Function *F = M->getFunction(Name); 27 28 // If F is not available, declare it. 29 if (!F) { 30 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 31 32 Type *Params[] = {PointerType::getUnqual(FunctionType::get( 33 Builder.getVoidTy(), Builder.getInt8PtrTy(), false)), 34 Builder.getInt8PtrTy(), 35 Builder.getInt32Ty(), 36 LongType, 37 LongType, 38 LongType}; 39 40 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false); 41 F = Function::Create(Ty, Linkage, Name, M); 42 } 43 44 Value *Args[] = {SubFn, SubFnParam, Builder.getInt32(PollyNumThreads), 45 LB, UB, Stride}; 46 47 Builder.CreateCall(F, Args); 48 } 49 50 void ParallelLoopGeneratorGOMP::deployParallelExecution(Function *SubFn, 51 Value *SubFnParam, 52 Value *LB, Value *UB, 53 Value *Stride) { 54 // Tell the runtime we start a parallel loop 55 createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride); 56 Builder.CreateCall(SubFn, SubFnParam); 57 createCallJoinThreads(); 58 } 59 60 Function *ParallelLoopGeneratorGOMP::prepareSubFnDefinition(Function *F) const { 61 FunctionType *FT = 62 FunctionType::get(Builder.getVoidTy(), {Builder.getInt8PtrTy()}, false); 63 Function *SubFn = Function::Create(FT, Function::InternalLinkage, 64 F->getName() + "_polly_subfn", M); 65 // Name the function's arguments 66 SubFn->arg_begin()->setName("polly.par.userContext"); 67 return SubFn; 68 } 69 70 // Create a subfunction of the following (preliminary) structure: 71 // 72 // PrevBB 73 // | 74 // v 75 // HeaderBB 76 // | _____ 77 // v v | 78 // CheckNextBB PreHeaderBB 79 // |\ | 80 // | \______/ 81 // | 82 // v 83 // ExitBB 84 // 85 // HeaderBB will hold allocations and loading of variables. 86 // CheckNextBB will check for more work. 87 // If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB. 88 // PreHeaderBB loads the new boundaries (& will lead to the loop body later on). 89 // ExitBB marks the end of the parallel execution. 90 std::tuple<Value *, Function *> 91 ParallelLoopGeneratorGOMP::createSubFn(Value *Stride, AllocaInst *StructData, 92 SetVector<Value *> Data, 93 ValueMapT &Map) { 94 if (PollyScheduling != OMPGeneralSchedulingType::Runtime) { 95 // User tried to influence the scheduling type (currently not supported) 96 errs() << "warning: Polly's GNU OpenMP backend solely " 97 "supports the scheduling type 'runtime'.\n"; 98 } 99 100 if (PollyChunkSize != 0) { 101 // User tried to influence the chunk size (currently not supported) 102 errs() << "warning: Polly's GNU OpenMP backend solely " 103 "supports the default chunk size.\n"; 104 } 105 106 Function *SubFn = createSubFnDefinition(); 107 LLVMContext &Context = SubFn->getContext(); 108 109 // Store the previous basic block. 110 BasicBlock *PrevBB = Builder.GetInsertBlock(); 111 112 // Create basic blocks. 113 BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn); 114 BasicBlock *ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn); 115 BasicBlock *CheckNextBB = 116 BasicBlock::Create(Context, "polly.par.checkNext", SubFn); 117 BasicBlock *PreHeaderBB = 118 BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn); 119 120 DT.addNewBlock(HeaderBB, PrevBB); 121 DT.addNewBlock(ExitBB, HeaderBB); 122 DT.addNewBlock(CheckNextBB, HeaderBB); 123 DT.addNewBlock(PreHeaderBB, HeaderBB); 124 125 // Fill up basic block HeaderBB. 126 Builder.SetInsertPoint(HeaderBB); 127 Value *LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr"); 128 Value *UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr"); 129 Value *UserContext = Builder.CreateBitCast( 130 &*SubFn->arg_begin(), StructData->getType(), "polly.par.userContext"); 131 132 extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext, 133 Map); 134 Builder.CreateBr(CheckNextBB); 135 136 // Add code to check if another set of iterations will be executed. 137 Builder.SetInsertPoint(CheckNextBB); 138 Value *Next = createCallGetWorkItem(LBPtr, UBPtr); 139 Value *HasNextSchedule = Builder.CreateTrunc( 140 Next, Builder.getInt1Ty(), "polly.par.hasNextScheduleBlock"); 141 Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB); 142 143 // Add code to load the iv bounds for this set of iterations. 144 Builder.SetInsertPoint(PreHeaderBB); 145 Value *LB = Builder.CreateLoad(LBPtr, "polly.par.LB"); 146 Value *UB = Builder.CreateLoad(UBPtr, "polly.par.UB"); 147 148 // Subtract one as the upper bound provided by OpenMP is a < comparison 149 // whereas the codegenForSequential function creates a <= comparison. 150 UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1), 151 "polly.par.UBAdjusted"); 152 153 Builder.CreateBr(CheckNextBB); 154 Builder.SetInsertPoint(&*--Builder.GetInsertPoint()); 155 BasicBlock *AfterBB; 156 Value *IV = 157 createLoop(LB, UB, Stride, Builder, LI, DT, AfterBB, ICmpInst::ICMP_SLE, 158 nullptr, true, /* UseGuard */ false); 159 160 BasicBlock::iterator LoopBody = Builder.GetInsertPoint(); 161 162 // Add code to terminate this subfunction. 163 Builder.SetInsertPoint(ExitBB); 164 createCallCleanupThread(); 165 Builder.CreateRetVoid(); 166 167 Builder.SetInsertPoint(&*LoopBody); 168 169 return std::make_tuple(IV, SubFn); 170 } 171 172 Value *ParallelLoopGeneratorGOMP::createCallGetWorkItem(Value *LBPtr, 173 Value *UBPtr) { 174 const std::string Name = "GOMP_loop_runtime_next"; 175 176 Function *F = M->getFunction(Name); 177 178 // If F is not available, declare it. 179 if (!F) { 180 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 181 Type *Params[] = {LongType->getPointerTo(), LongType->getPointerTo()}; 182 FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false); 183 F = Function::Create(Ty, Linkage, Name, M); 184 } 185 186 Value *Args[] = {LBPtr, UBPtr}; 187 Value *Return = Builder.CreateCall(F, Args); 188 Return = Builder.CreateICmpNE( 189 Return, Builder.CreateZExt(Builder.getFalse(), Return->getType())); 190 return Return; 191 } 192 193 void ParallelLoopGeneratorGOMP::createCallJoinThreads() { 194 const std::string Name = "GOMP_parallel_end"; 195 196 Function *F = M->getFunction(Name); 197 198 // If F is not available, declare it. 199 if (!F) { 200 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 201 202 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); 203 F = Function::Create(Ty, Linkage, Name, M); 204 } 205 206 Builder.CreateCall(F, {}); 207 } 208 209 void ParallelLoopGeneratorGOMP::createCallCleanupThread() { 210 const std::string Name = "GOMP_loop_end_nowait"; 211 212 Function *F = M->getFunction(Name); 213 214 // If F is not available, declare it. 215 if (!F) { 216 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 217 218 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); 219 F = Function::Create(Ty, Linkage, Name, M); 220 } 221 222 Builder.CreateCall(F, {}); 223 } 224