1 //===------ LoopGenerators.cpp - IR helper to create loops ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains functions to create scalar and parallel loops as LLVM-IR. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "polly/ScopDetection.h" 15 #include "polly/CodeGen/LoopGenerators.h" 16 #include "llvm/Analysis/LoopInfo.h" 17 #include "llvm/IR/DataLayout.h" 18 #include "llvm/IR/Dominators.h" 19 #include "llvm/IR/Module.h" 20 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 21 #include "llvm/Support/CommandLine.h" 22 23 using namespace llvm; 24 using namespace polly; 25 26 static cl::opt<int> 27 PollyNumThreads("polly-num-threads", 28 cl::desc("Number of threads to use (0 = auto)"), cl::Hidden, 29 cl::init(0)); 30 31 // We generate a loop of either of the following structures: 32 // 33 // BeforeBB BeforeBB 34 // | | 35 // v v 36 // GuardBB PreHeaderBB 37 // / | | _____ 38 // __ PreHeaderBB | v \/ | 39 // / \ / | HeaderBB latch 40 // latch HeaderBB | |\ | 41 // \ / \ / | \------/ 42 // < \ / | 43 // \ / v 44 // ExitBB ExitBB 45 // 46 // depending on whether or not we know that it is executed at least once. If 47 // not, GuardBB checks if the loop is executed at least once. If this is the 48 // case we branch to PreHeaderBB and subsequently to the HeaderBB, which 49 // contains the loop iv 'polly.indvar', the incremented loop iv 50 // 'polly.indvar_next' as well as the condition to check if we execute another 51 // iteration of the loop. After the loop has finished, we branch to ExitBB. 52 Value *polly::createLoop(Value *LB, Value *UB, Value *Stride, 53 PollyIRBuilder &Builder, Pass *P, LoopInfo &LI, 54 DominatorTree &DT, BasicBlock *&ExitBB, 55 ICmpInst::Predicate Predicate, 56 ScopAnnotator *Annotator, bool Parallel, 57 bool UseGuard) { 58 Function *F = Builder.GetInsertBlock()->getParent(); 59 LLVMContext &Context = F->getContext(); 60 61 assert(LB->getType() == UB->getType() && "Types of loop bounds do not match"); 62 IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType()); 63 assert(LoopIVType && "UB is not integer?"); 64 65 BasicBlock *BeforeBB = Builder.GetInsertBlock(); 66 BasicBlock *GuardBB = 67 UseGuard ? BasicBlock::Create(Context, "polly.loop_if", F) : nullptr; 68 BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F); 69 BasicBlock *PreHeaderBB = 70 BasicBlock::Create(Context, "polly.loop_preheader", F); 71 72 // Update LoopInfo 73 Loop *OuterLoop = LI.getLoopFor(BeforeBB); 74 Loop *NewLoop = new Loop(); 75 76 if (OuterLoop) 77 OuterLoop->addChildLoop(NewLoop); 78 else 79 LI.addTopLevelLoop(NewLoop); 80 81 if (OuterLoop && GuardBB) 82 OuterLoop->addBasicBlockToLoop(GuardBB, LI.getBase()); 83 else if (OuterLoop) 84 OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI.getBase()); 85 86 NewLoop->addBasicBlockToLoop(HeaderBB, LI.getBase()); 87 88 // Notify the annotator (if present) that we have a new loop, but only 89 // after the header block is set. 90 if (Annotator) 91 Annotator->pushLoop(NewLoop, Parallel); 92 93 // ExitBB 94 ExitBB = SplitBlock(BeforeBB, Builder.GetInsertPoint()++, P); 95 ExitBB->setName("polly.loop_exit"); 96 97 // BeforeBB 98 if (GuardBB) { 99 BeforeBB->getTerminator()->setSuccessor(0, GuardBB); 100 DT.addNewBlock(GuardBB, BeforeBB); 101 102 // GuardBB 103 Builder.SetInsertPoint(GuardBB); 104 Value *LoopGuard; 105 LoopGuard = Builder.CreateICmp(Predicate, LB, UB); 106 LoopGuard->setName("polly.loop_guard"); 107 Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB); 108 DT.addNewBlock(PreHeaderBB, GuardBB); 109 } else { 110 BeforeBB->getTerminator()->setSuccessor(0, PreHeaderBB); 111 DT.addNewBlock(PreHeaderBB, BeforeBB); 112 } 113 114 // PreHeaderBB 115 Builder.SetInsertPoint(PreHeaderBB); 116 Builder.CreateBr(HeaderBB); 117 118 // HeaderBB 119 DT.addNewBlock(HeaderBB, PreHeaderBB); 120 Builder.SetInsertPoint(HeaderBB); 121 PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar"); 122 IV->addIncoming(LB, PreHeaderBB); 123 Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType); 124 Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next"); 125 Value *LoopCondition; 126 UB = Builder.CreateSub(UB, Stride, "polly.adjust_ub"); 127 LoopCondition = Builder.CreateICmp(Predicate, IV, UB); 128 LoopCondition->setName("polly.loop_cond"); 129 130 // Create the loop latch and annotate it as such. 131 BranchInst *B = Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB); 132 if (Annotator) 133 Annotator->annotateLoopLatch(B, NewLoop, Parallel); 134 135 IV->addIncoming(IncrementedIV, HeaderBB); 136 if (GuardBB) 137 DT.changeImmediateDominator(ExitBB, GuardBB); 138 else 139 DT.changeImmediateDominator(ExitBB, HeaderBB); 140 141 // The loop body should be added here. 142 Builder.SetInsertPoint(HeaderBB->getFirstNonPHI()); 143 return IV; 144 } 145 146 Value *ParallelLoopGenerator::createParallelLoop( 147 Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues, 148 ValueToValueMapTy &Map, BasicBlock::iterator *LoopBody) { 149 Value *Struct, *IV, *SubFnParam; 150 Function *SubFn; 151 152 Struct = storeValuesIntoStruct(UsedValues); 153 154 BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint(); 155 IV = createSubFn(Stride, Struct, UsedValues, Map, &SubFn); 156 *LoopBody = Builder.GetInsertPoint(); 157 Builder.SetInsertPoint(BeforeLoop); 158 159 SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(), 160 "polly.par.userContext"); 161 162 // Add one as the upper bound provided by openmp is a < comparison 163 // whereas the codegenForSequential function creates a <= comparison. 164 UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1)); 165 166 // Tell the runtime we start a parallel loop 167 createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride); 168 Builder.CreateCall(SubFn, SubFnParam); 169 createCallJoinThreads(); 170 171 // Mark the end of the lifetime for the parameter struct. 172 Type *Ty = Struct->getType(); 173 ConstantInt *SizeOf = Builder.getInt64(DL.getTypeAllocSize(Ty)); 174 Builder.CreateLifetimeEnd(Struct, SizeOf); 175 176 return IV; 177 } 178 179 void ParallelLoopGenerator::createCallSpawnThreads(Value *SubFn, 180 Value *SubFnParam, Value *LB, 181 Value *UB, Value *Stride) { 182 const std::string Name = "GOMP_parallel_loop_runtime_start"; 183 184 Function *F = M->getFunction(Name); 185 186 // If F is not available, declare it. 187 if (!F) { 188 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 189 190 Type *Params[] = {PointerType::getUnqual(FunctionType::get( 191 Builder.getVoidTy(), Builder.getInt8PtrTy(), false)), 192 Builder.getInt8PtrTy(), Builder.getInt32Ty(), LongType, 193 LongType, LongType}; 194 195 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false); 196 F = Function::Create(Ty, Linkage, Name, M); 197 } 198 199 Value *NumberOfThreads = Builder.getInt32(PollyNumThreads); 200 Value *Args[] = {SubFn, SubFnParam, NumberOfThreads, LB, UB, Stride}; 201 202 Builder.CreateCall(F, Args); 203 } 204 205 Value *ParallelLoopGenerator::createCallGetWorkItem(Value *LBPtr, 206 Value *UBPtr) { 207 const std::string Name = "GOMP_loop_runtime_next"; 208 209 Function *F = M->getFunction(Name); 210 211 // If F is not available, declare it. 212 if (!F) { 213 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 214 Type *Params[] = {LongType->getPointerTo(), LongType->getPointerTo()}; 215 FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false); 216 F = Function::Create(Ty, Linkage, Name, M); 217 } 218 219 Value *Args[] = {LBPtr, UBPtr}; 220 Value *Return = Builder.CreateCall(F, Args); 221 Return = Builder.CreateICmpNE( 222 Return, Builder.CreateZExt(Builder.getFalse(), Return->getType())); 223 return Return; 224 } 225 226 void ParallelLoopGenerator::createCallJoinThreads() { 227 const std::string Name = "GOMP_parallel_end"; 228 229 Function *F = M->getFunction(Name); 230 231 // If F is not available, declare it. 232 if (!F) { 233 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 234 235 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); 236 F = Function::Create(Ty, Linkage, Name, M); 237 } 238 239 Builder.CreateCall(F); 240 } 241 242 void ParallelLoopGenerator::createCallCleanupThread() { 243 const std::string Name = "GOMP_loop_end_nowait"; 244 245 Function *F = M->getFunction(Name); 246 247 // If F is not available, declare it. 248 if (!F) { 249 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 250 251 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); 252 F = Function::Create(Ty, Linkage, Name, M); 253 } 254 255 Builder.CreateCall(F); 256 } 257 258 Function *ParallelLoopGenerator::createSubFnDefinition() { 259 Function *F = Builder.GetInsertBlock()->getParent(); 260 std::vector<Type *> Arguments(1, Builder.getInt8PtrTy()); 261 FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false); 262 Function *SubFn = Function::Create(FT, Function::InternalLinkage, 263 F->getName() + ".polly.subfn", M); 264 265 // Do not run any polly pass on the new function. 266 SubFn->addFnAttr(PollySkipFnAttr); 267 268 Function::arg_iterator AI = SubFn->arg_begin(); 269 AI->setName("polly.par.userContext"); 270 271 return SubFn; 272 } 273 274 Value * 275 ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) { 276 SmallVector<Type *, 8> Members; 277 278 for (Value *V : Values) 279 Members.push_back(V->getType()); 280 281 // We do not want to allocate the alloca inside any loop, thus we allocate it 282 // in the entry block of the function and use annotations to denote the actual 283 // live span (similar to clang). 284 BasicBlock &EntryBB = Builder.GetInsertBlock()->getParent()->getEntryBlock(); 285 Instruction *IP = EntryBB.getFirstInsertionPt(); 286 StructType *Ty = StructType::get(Builder.getContext(), Members); 287 Value *Struct = new AllocaInst(Ty, 0, "polly.par.userContext", IP); 288 289 // Mark the start of the lifetime for the parameter struct. 290 ConstantInt *SizeOf = Builder.getInt64(DL.getTypeAllocSize(Ty)); 291 Builder.CreateLifetimeStart(Struct, SizeOf); 292 293 for (unsigned i = 0; i < Values.size(); i++) { 294 Value *Address = Builder.CreateStructGEP(Struct, i); 295 Builder.CreateStore(Values[i], Address); 296 } 297 298 return Struct; 299 } 300 301 void ParallelLoopGenerator::extractValuesFromStruct( 302 SetVector<Value *> OldValues, Value *Struct, ValueToValueMapTy &Map) { 303 for (unsigned i = 0; i < OldValues.size(); i++) { 304 Value *Address = Builder.CreateStructGEP(Struct, i); 305 Value *NewValue = Builder.CreateLoad(Address); 306 Map[OldValues[i]] = NewValue; 307 } 308 } 309 310 Value *ParallelLoopGenerator::createSubFn(Value *Stride, Value *StructData, 311 SetVector<Value *> Data, 312 ValueToValueMapTy &Map, 313 Function **SubFnPtr) { 314 BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *PreHeaderBB, *AfterBB; 315 Value *LBPtr, *UBPtr, *UserContext, *Ret1, *HasNextSchedule, *LB, *UB, *IV; 316 Function *SubFn = createSubFnDefinition(); 317 LLVMContext &Context = SubFn->getContext(); 318 319 // Store the previous basic block. 320 PrevBB = Builder.GetInsertBlock(); 321 322 // Create basic blocks. 323 HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn); 324 ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn); 325 CheckNextBB = BasicBlock::Create(Context, "polly.par.checkNext", SubFn); 326 PreHeaderBB = BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn); 327 328 DT.addNewBlock(HeaderBB, PrevBB); 329 DT.addNewBlock(ExitBB, HeaderBB); 330 DT.addNewBlock(CheckNextBB, HeaderBB); 331 DT.addNewBlock(PreHeaderBB, HeaderBB); 332 333 // Fill up basic block HeaderBB. 334 Builder.SetInsertPoint(HeaderBB); 335 LBPtr = Builder.CreateAlloca(LongType, 0, "polly.par.LBPtr"); 336 UBPtr = Builder.CreateAlloca(LongType, 0, "polly.par.UBPtr"); 337 UserContext = Builder.CreateBitCast(SubFn->arg_begin(), StructData->getType(), 338 "polly.par.userContext"); 339 340 extractValuesFromStruct(Data, UserContext, Map); 341 Builder.CreateBr(CheckNextBB); 342 343 // Add code to check if another set of iterations will be executed. 344 Builder.SetInsertPoint(CheckNextBB); 345 Ret1 = createCallGetWorkItem(LBPtr, UBPtr); 346 HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(), 347 "polly.par.hasNextScheduleBlock"); 348 Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB); 349 350 // Add code to to load the iv bounds for this set of iterations. 351 Builder.SetInsertPoint(PreHeaderBB); 352 LB = Builder.CreateLoad(LBPtr, "polly.par.LB"); 353 UB = Builder.CreateLoad(UBPtr, "polly.par.UB"); 354 355 // Subtract one as the upper bound provided by openmp is a < comparison 356 // whereas the codegenForSequential function creates a <= comparison. 357 UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1), 358 "polly.par.UBAdjusted"); 359 360 Builder.CreateBr(CheckNextBB); 361 Builder.SetInsertPoint(--Builder.GetInsertPoint()); 362 IV = createLoop(LB, UB, Stride, Builder, P, LI, DT, AfterBB, 363 ICmpInst::ICMP_SLE, nullptr, true, /* UseGuard */ false); 364 365 BasicBlock::iterator LoopBody = Builder.GetInsertPoint(); 366 367 // Add code to terminate this subfunction. 368 Builder.SetInsertPoint(ExitBB); 369 createCallCleanupThread(); 370 Builder.CreateRetVoid(); 371 372 Builder.SetInsertPoint(LoopBody); 373 *SubFnPtr = SubFn; 374 375 return IV; 376 } 377