1 //===------ LoopGeneratorsGOMP.cpp - IR helper to create loops ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains functions to create parallel loops as LLVM-IR. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "polly/CodeGen/LoopGeneratorsGOMP.h" 14 #include "llvm/IR/Dominators.h" 15 #include "llvm/IR/Module.h" 16 17 using namespace llvm; 18 using namespace polly; 19 20 void ParallelLoopGeneratorGOMP::createCallSpawnThreads(Value *SubFn, 21 Value *SubFnParam, 22 Value *LB, Value *UB, 23 Value *Stride) { 24 const std::string Name = "GOMP_parallel_loop_runtime_start"; 25 26 Function *F = M->getFunction(Name); 27 28 // If F is not available, declare it. 29 if (!F) { 30 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 31 32 Type *Params[] = {PointerType::getUnqual(FunctionType::get( 33 Builder.getVoidTy(), Builder.getInt8PtrTy(), false)), 34 Builder.getInt8PtrTy(), 35 Builder.getInt32Ty(), 36 LongType, 37 LongType, 38 LongType}; 39 40 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false); 41 F = Function::Create(Ty, Linkage, Name, M); 42 } 43 44 Value *Args[] = {SubFn, SubFnParam, Builder.getInt32(PollyNumThreads), 45 LB, UB, Stride}; 46 47 CallInst *Call = Builder.CreateCall(F, Args); 48 Call->setDebugLoc(DLGenerated); 49 } 50 51 void ParallelLoopGeneratorGOMP::deployParallelExecution(Function *SubFn, 52 Value *SubFnParam, 53 Value *LB, Value *UB, 54 Value *Stride) { 55 // Tell the runtime we start a parallel loop 56 createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride); 57 CallInst *Call = Builder.CreateCall(SubFn, SubFnParam); 58 Call->setDebugLoc(DLGenerated); 59 createCallJoinThreads(); 60 } 61 62 Function *ParallelLoopGeneratorGOMP::prepareSubFnDefinition(Function *F) const { 63 FunctionType *FT = 64 FunctionType::get(Builder.getVoidTy(), {Builder.getInt8PtrTy()}, false); 65 Function *SubFn = Function::Create(FT, Function::InternalLinkage, 66 F->getName() + "_polly_subfn", M); 67 // Name the function's arguments 68 SubFn->arg_begin()->setName("polly.par.userContext"); 69 return SubFn; 70 } 71 72 // Create a subfunction of the following (preliminary) structure: 73 // 74 // PrevBB 75 // | 76 // v 77 // HeaderBB 78 // | _____ 79 // v v | 80 // CheckNextBB PreHeaderBB 81 // |\ | 82 // | \______/ 83 // | 84 // v 85 // ExitBB 86 // 87 // HeaderBB will hold allocations and loading of variables. 88 // CheckNextBB will check for more work. 89 // If there is more work to do: go to PreHeaderBB, otherwise go to ExitBB. 90 // PreHeaderBB loads the new boundaries (& will lead to the loop body later on). 91 // ExitBB marks the end of the parallel execution. 92 std::tuple<Value *, Function *> 93 ParallelLoopGeneratorGOMP::createSubFn(Value *Stride, AllocaInst *StructData, 94 SetVector<Value *> Data, 95 ValueMapT &Map) { 96 if (PollyScheduling != OMPGeneralSchedulingType::Runtime) { 97 // User tried to influence the scheduling type (currently not supported) 98 errs() << "warning: Polly's GNU OpenMP backend solely " 99 "supports the scheduling type 'runtime'.\n"; 100 } 101 102 if (PollyChunkSize != 0) { 103 // User tried to influence the chunk size (currently not supported) 104 errs() << "warning: Polly's GNU OpenMP backend solely " 105 "supports the default chunk size.\n"; 106 } 107 108 Function *SubFn = createSubFnDefinition(); 109 LLVMContext &Context = SubFn->getContext(); 110 111 // Store the previous basic block. 112 BasicBlock *PrevBB = Builder.GetInsertBlock(); 113 114 // Create basic blocks. 115 BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn); 116 BasicBlock *ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn); 117 BasicBlock *CheckNextBB = 118 BasicBlock::Create(Context, "polly.par.checkNext", SubFn); 119 BasicBlock *PreHeaderBB = 120 BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn); 121 122 DT.addNewBlock(HeaderBB, PrevBB); 123 DT.addNewBlock(ExitBB, HeaderBB); 124 DT.addNewBlock(CheckNextBB, HeaderBB); 125 DT.addNewBlock(PreHeaderBB, HeaderBB); 126 127 // Fill up basic block HeaderBB. 128 Builder.SetInsertPoint(HeaderBB); 129 Value *LBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.LBPtr"); 130 Value *UBPtr = Builder.CreateAlloca(LongType, nullptr, "polly.par.UBPtr"); 131 Value *UserContext = Builder.CreateBitCast( 132 &*SubFn->arg_begin(), StructData->getType(), "polly.par.userContext"); 133 134 extractValuesFromStruct(Data, StructData->getAllocatedType(), UserContext, 135 Map); 136 Builder.CreateBr(CheckNextBB); 137 138 // Add code to check if another set of iterations will be executed. 139 Builder.SetInsertPoint(CheckNextBB); 140 Value *Next = createCallGetWorkItem(LBPtr, UBPtr); 141 Value *HasNextSchedule = Builder.CreateTrunc( 142 Next, Builder.getInt1Ty(), "polly.par.hasNextScheduleBlock"); 143 Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB); 144 145 // Add code to load the iv bounds for this set of iterations. 146 Builder.SetInsertPoint(PreHeaderBB); 147 Value *LB = Builder.CreateLoad(LongType, LBPtr, "polly.par.LB"); 148 Value *UB = Builder.CreateLoad(LongType, UBPtr, "polly.par.UB"); 149 150 // Subtract one as the upper bound provided by OpenMP is a < comparison 151 // whereas the codegenForSequential function creates a <= comparison. 152 UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1), 153 "polly.par.UBAdjusted"); 154 155 Builder.CreateBr(CheckNextBB); 156 Builder.SetInsertPoint(&*--Builder.GetInsertPoint()); 157 BasicBlock *AfterBB; 158 Value *IV = 159 createLoop(LB, UB, Stride, Builder, LI, DT, AfterBB, ICmpInst::ICMP_SLE, 160 nullptr, true, /* UseGuard */ false); 161 162 BasicBlock::iterator LoopBody = Builder.GetInsertPoint(); 163 164 // Add code to terminate this subfunction. 165 Builder.SetInsertPoint(ExitBB); 166 createCallCleanupThread(); 167 Builder.CreateRetVoid(); 168 169 Builder.SetInsertPoint(&*LoopBody); 170 171 return std::make_tuple(IV, SubFn); 172 } 173 174 Value *ParallelLoopGeneratorGOMP::createCallGetWorkItem(Value *LBPtr, 175 Value *UBPtr) { 176 const std::string Name = "GOMP_loop_runtime_next"; 177 178 Function *F = M->getFunction(Name); 179 180 // If F is not available, declare it. 181 if (!F) { 182 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 183 Type *Params[] = {LongType->getPointerTo(), LongType->getPointerTo()}; 184 FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false); 185 F = Function::Create(Ty, Linkage, Name, M); 186 } 187 188 Value *Args[] = {LBPtr, UBPtr}; 189 CallInst *Call = Builder.CreateCall(F, Args); 190 Call->setDebugLoc(DLGenerated); 191 Value *Return = Builder.CreateICmpNE( 192 Call, Builder.CreateZExt(Builder.getFalse(), Call->getType())); 193 return Return; 194 } 195 196 void ParallelLoopGeneratorGOMP::createCallJoinThreads() { 197 const std::string Name = "GOMP_parallel_end"; 198 199 Function *F = M->getFunction(Name); 200 201 // If F is not available, declare it. 202 if (!F) { 203 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 204 205 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); 206 F = Function::Create(Ty, Linkage, Name, M); 207 } 208 209 CallInst *Call = Builder.CreateCall(F, {}); 210 Call->setDebugLoc(DLGenerated); 211 } 212 213 void ParallelLoopGeneratorGOMP::createCallCleanupThread() { 214 const std::string Name = "GOMP_loop_end_nowait"; 215 216 Function *F = M->getFunction(Name); 217 218 // If F is not available, declare it. 219 if (!F) { 220 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 221 222 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); 223 F = Function::Create(Ty, Linkage, Name, M); 224 } 225 226 CallInst *Call = Builder.CreateCall(F, {}); 227 Call->setDebugLoc(DLGenerated); 228 } 229