1 //===------ LoopGenerators.cpp - IR helper to create loops ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains functions to create scalar loops and orchestrate the 10 // creation of parallel loops as LLVM-IR. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "polly/CodeGen/LoopGenerators.h" 15 #include "polly/Options.h" 16 #include "polly/ScopDetection.h" 17 #include "llvm/Analysis/LoopInfo.h" 18 #include "llvm/IR/DataLayout.h" 19 #include "llvm/IR/Dominators.h" 20 #include "llvm/IR/Module.h" 21 #include "llvm/Support/CommandLine.h" 22 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 23 24 using namespace llvm; 25 using namespace polly; 26 27 int polly::PollyNumThreads; 28 OMPGeneralSchedulingType polly::PollyScheduling; 29 int polly::PollyChunkSize; 30 31 static cl::opt<int, true> 32 XPollyNumThreads("polly-num-threads", 33 cl::desc("Number of threads to use (0 = auto)"), 34 cl::Hidden, cl::location(polly::PollyNumThreads), 35 cl::init(0), cl::cat(PollyCategory)); 36 37 static cl::opt<OMPGeneralSchedulingType, true> XPollyScheduling( 38 "polly-scheduling", 39 cl::desc("Scheduling type of parallel OpenMP for loops"), 40 cl::values(clEnumValN(OMPGeneralSchedulingType::StaticChunked, "static", 41 "Static scheduling"), 42 clEnumValN(OMPGeneralSchedulingType::Dynamic, "dynamic", 43 "Dynamic scheduling"), 44 clEnumValN(OMPGeneralSchedulingType::Guided, "guided", 45 "Guided scheduling"), 46 clEnumValN(OMPGeneralSchedulingType::Runtime, "runtime", 47 "Runtime determined (OMP_SCHEDULE)")), 48 cl::Hidden, cl::location(polly::PollyScheduling), 49 cl::init(OMPGeneralSchedulingType::Runtime), cl::Optional, 50 cl::cat(PollyCategory)); 51 52 static cl::opt<int, true> 53 XPollyChunkSize("polly-scheduling-chunksize", 54 cl::desc("Chunksize to use by the OpenMP runtime calls"), 55 cl::Hidden, cl::location(polly::PollyChunkSize), 56 cl::init(0), cl::Optional, cl::cat(PollyCategory)); 57 58 // We generate a loop of either of the following structures: 59 // 60 // BeforeBB BeforeBB 61 // | | 62 // v v 63 // GuardBB PreHeaderBB 64 // / | | _____ 65 // __ PreHeaderBB | v \/ | 66 // / \ / | HeaderBB latch 67 // latch HeaderBB | |\ | 68 // \ / \ / | \------/ 69 // < \ / | 70 // \ / v 71 // ExitBB ExitBB 72 // 73 // depending on whether or not we know that it is executed at least once. If 74 // not, GuardBB checks if the loop is executed at least once. If this is the 75 // case we branch to PreHeaderBB and subsequently to the HeaderBB, which 76 // contains the loop iv 'polly.indvar', the incremented loop iv 77 // 'polly.indvar_next' as well as the condition to check if we execute another 78 // iteration of the loop. After the loop has finished, we branch to ExitBB. 79 // We expect the type of UB, LB, UB+Stride to be large enough for values that 80 // UB may take throughout the execution of the loop, including the computation 81 // of indvar + Stride before the final abort. 82 Value *polly::createLoop(Value *LB, Value *UB, Value *Stride, 83 PollyIRBuilder &Builder, LoopInfo &LI, 84 DominatorTree &DT, BasicBlock *&ExitBB, 85 ICmpInst::Predicate Predicate, 86 ScopAnnotator *Annotator, bool Parallel, bool UseGuard, 87 bool LoopVectDisabled) { 88 Function *F = Builder.GetInsertBlock()->getParent(); 89 LLVMContext &Context = F->getContext(); 90 91 assert(LB->getType() == UB->getType() && "Types of loop bounds do not match"); 92 IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType()); 93 assert(LoopIVType && "UB is not integer?"); 94 95 BasicBlock *BeforeBB = Builder.GetInsertBlock(); 96 BasicBlock *GuardBB = 97 UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr; 98 BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F); 99 BasicBlock *PreHeaderBB = 100 BasicBlock::Create(Context, "polly.loop_preheader", F); 101 102 // Update LoopInfo 103 Loop *OuterLoop = LI.getLoopFor(BeforeBB); 104 Loop *NewLoop = LI.AllocateLoop(); 105 106 if (OuterLoop) 107 OuterLoop->addChildLoop(NewLoop); 108 else 109 LI.addTopLevelLoop(NewLoop); 110 111 if (OuterLoop) { 112 if (GuardBB) 113 OuterLoop->addBasicBlockToLoop(GuardBB, LI); 114 OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI); 115 } 116 117 NewLoop->addBasicBlockToLoop(HeaderBB, LI); 118 119 // Notify the annotator (if present) that we have a new loop, but only 120 // after the header block is set. 121 if (Annotator) 122 Annotator->pushLoop(NewLoop, Parallel); 123 124 // ExitBB 125 ExitBB = SplitBlock(BeforeBB, &*Builder.GetInsertPoint(), &DT, &LI); 126 ExitBB->setName("polly.loop_exit"); 127 128 // BeforeBB 129 if (GuardBB) { 130 BeforeBB->getTerminator()->setSuccessor(0, GuardBB); 131 DT.addNewBlock(GuardBB, BeforeBB); 132 133 // GuardBB 134 Builder.SetInsertPoint(GuardBB); 135 Value *LoopGuard; 136 LoopGuard = Builder.CreateICmp(Predicate, LB, UB); 137 LoopGuard->setName("polly.loop_guard"); 138 Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB); 139 DT.addNewBlock(PreHeaderBB, GuardBB); 140 } else { 141 BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB); 142 DT.addNewBlock(PreHeaderBB, BeforeBB); 143 } 144 145 // PreHeaderBB 146 Builder.SetInsertPoint(PreHeaderBB); 147 Builder.CreateBr(HeaderBB); 148 149 // HeaderBB 150 DT.addNewBlock(HeaderBB, PreHeaderBB); 151 Builder.SetInsertPoint(HeaderBB); 152 PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar"); 153 IV->addIncoming(LB, PreHeaderBB); 154 Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType); 155 Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next"); 156 Value *LoopCondition = 157 Builder.CreateICmp(Predicate, IncrementedIV, UB, "polly.loop_cond"); 158 159 // Create the loop latch and annotate it as such. 160 BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB); 161 if (Annotator) 162 Annotator->annotateLoopLatch(B, NewLoop, Parallel, LoopVectDisabled); 163 164 IV->addIncoming(IncrementedIV, HeaderBB); 165 if (GuardBB) 166 DT.changeImmediateDominator(ExitBB, GuardBB); 167 else 168 DT.changeImmediateDominator(ExitBB, HeaderBB); 169 170 // The loop body should be added here. 171 Builder.SetInsertPoint(HeaderBB->getFirstNonPHI()); 172 return IV; 173 } 174 175 Value *ParallelLoopGenerator::createParallelLoop( 176 Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues, 177 ValueMapT &Map, BasicBlock::iterator *LoopBody) { 178 179 AllocaInst *Struct = storeValuesIntoStruct(UsedValues); 180 BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint(); 181 182 Value *IV; 183 Function *SubFn; 184 std::tie(IV, SubFn) = createSubFn(Stride, Struct, UsedValues, Map); 185 *LoopBody = Builder.GetInsertPoint(); 186 Builder.SetInsertPoint(&*BeforeLoop); 187 188 Value *SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(), 189 "polly.par.userContext"); 190 191 // Add one as the upper bound provided by OpenMP is a < comparison 192 // whereas the codegenForSequential function creates a <= comparison. 193 UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1)); 194 195 // Execute the prepared subfunction in parallel. 196 deployParallelExecution(SubFn, SubFnParam, LB, UB, Stride); 197 198 return IV; 199 } 200 201 Function *ParallelLoopGenerator::createSubFnDefinition() { 202 Function *F = Builder.GetInsertBlock()->getParent(); 203 Function *SubFn = prepareSubFnDefinition(F); 204 205 // Certain backends (e.g., NVPTX) do not support '.'s in function names. 206 // Hence, we ensure that all '.'s are replaced by '_'s. 207 std::string FunctionName = SubFn->getName().str(); 208 std::replace(FunctionName.begin(), FunctionName.end(), '.', '_'); 209 SubFn->setName(FunctionName); 210 211 // Do not run any polly pass on the new function. 212 SubFn->addFnAttr(PollySkipFnAttr); 213 214 return SubFn; 215 } 216 217 AllocaInst * 218 ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) { 219 SmallVector<Type *, 8> Members; 220 221 for (Value *V : Values) 222 Members.push_back(V->getType()); 223 224 const DataLayout &DL = Builder.GetInsertBlock()->getModule()->getDataLayout(); 225 226 // We do not want to allocate the alloca inside any loop, thus we allocate it 227 // in the entry block of the function and use annotations to denote the actual 228 // live span (similar to clang). 229 BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock(); 230 Instruction *IP = &*EntryBB.getFirstInsertionPt(); 231 StructType *Ty = StructType::get(Builder.getContext(), Members); 232 AllocaInst *Struct = new AllocaInst(Ty, DL.getAllocaAddrSpace(), nullptr, 233 "polly.par.userContext", IP); 234 235 for (unsigned i = 0; i < Values.size(); i++) { 236 Value *Address = Builder.CreateStructGEP(Ty, Struct, i); 237 Address->setName("polly.subfn.storeaddr." + Values[i]->getName()); 238 Builder.CreateStore(Values[i], Address); 239 } 240 241 return Struct; 242 } 243 244 void ParallelLoopGenerator::extractValuesFromStruct( 245 SetVector<Value *> OldValues, Type *Ty, Value *Struct, ValueMapT &Map) { 246 for (unsigned i = 0; i < OldValues.size(); i++) { 247 Value *Address = Builder.CreateStructGEP(Ty, Struct, i); 248 Type *ElemTy = cast<GetElementPtrInst>(Address)->getResultElementType(); 249 Value *NewValue = Builder.CreateLoad(ElemTy, Address); 250 NewValue->setName("polly.subfunc.arg." + OldValues[i]->getName()); 251 Map[OldValues[i]] = NewValue; 252 } 253 } 254