1 //===------ LoopGenerators.cpp - IR helper to create loops ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains functions to create scalar and OpenMP parallel loops 11 // as LLVM-IR. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "polly/ScopDetection.h" 16 #include "polly/CodeGen/LoopGenerators.h" 17 #include "llvm/Analysis/Dominators.h" 18 #include "llvm/Analysis/LoopInfo.h" 19 #include "llvm/IR/DataLayout.h" 20 #include "llvm/IR/Module.h" 21 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 22 23 using namespace llvm; 24 using namespace polly; 25 26 // We generate a loop of the following structure 27 // 28 // BeforeBB 29 // | 30 // v 31 // GuardBB 32 // / \ 33 // __ PreHeaderBB \ 34 // / \ / | 35 // latch HeaderBB | 36 // \ / \ / 37 // < \ / 38 // \ / 39 // ExitBB 40 // 41 // GuardBB checks if the loop is executed at least once. If this is the case 42 // we branch to PreHeaderBB and subsequently to the HeaderBB, which contains the 43 // loop iv 'polly.indvar', the incremented loop iv 'polly.indvar_next' as well 44 // as the condition to check if we execute another iteration of the loop. After 45 // the loop has finished, we branch to ExitBB. 46 // 47 // TODO: We currently always create the GuardBB. If we can prove the loop is 48 // always executed at least once, we can get rid of this branch. 49 Value *polly::createLoop(Value *LB, Value *UB, Value *Stride, 50 IRBuilder<> &Builder, Pass *P, BasicBlock *&ExitBB, 51 ICmpInst::Predicate Predicate) { 52 53 DominatorTree &DT = P->getAnalysis<DominatorTree>(); 54 LoopInfo &LI = P->getAnalysis<LoopInfo>(); 55 Function *F = Builder.GetInsertBlock()->getParent(); 56 LLVMContext &Context = F->getContext(); 57 58 assert(LB->getType() == UB->getType() && "Types of loop bounds do not match"); 59 IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType()); 60 assert(LoopIVType && "UB is not integer?"); 61 62 BasicBlock *BeforeBB = Builder.GetInsertBlock(); 63 BasicBlock *GuardBB = BasicBlock::Create(Context, "polly.loop_if", F); 64 BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F); 65 BasicBlock *PreHeaderBB = 66 BasicBlock::Create(Context, "polly.loop_preheader", F); 67 68 // Update LoopInfo 69 Loop *OuterLoop = LI.getLoopFor(BeforeBB); 70 Loop *NewLoop = new Loop(); 71 72 if (OuterLoop) { 73 OuterLoop->addChildLoop(NewLoop); 74 } else { 75 LI.addTopLevelLoop(NewLoop); 76 } 77 78 if (OuterLoop) { 79 OuterLoop->addBasicBlockToLoop(GuardBB, LI.getBase()); 80 OuterLoop->addBasicBlockToLoop(PreHeaderBB, LI.getBase()); 81 } 82 83 NewLoop->addBasicBlockToLoop(HeaderBB, LI.getBase()); 84 85 // ExitBB 86 ExitBB = SplitBlock(BeforeBB, Builder.GetInsertPoint()++, P); 87 ExitBB->setName("polly.loop_exit"); 88 89 // BeforeBB 90 BeforeBB->getTerminator()->setSuccessor(0, GuardBB); 91 92 // GuardBB 93 DT.addNewBlock(GuardBB, BeforeBB); 94 Builder.SetInsertPoint(GuardBB); 95 Value *LoopGuard; 96 LoopGuard = Builder.CreateICmp(Predicate, LB, UB); 97 LoopGuard->setName("polly.loop_guard"); 98 Builder.CreateCondBr(LoopGuard, PreHeaderBB, ExitBB); 99 100 // PreHeaderBB 101 DT.addNewBlock(PreHeaderBB, GuardBB); 102 Builder.SetInsertPoint(PreHeaderBB); 103 Builder.CreateBr(HeaderBB); 104 105 // HeaderBB 106 DT.addNewBlock(HeaderBB, PreHeaderBB); 107 Builder.SetInsertPoint(HeaderBB); 108 PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.indvar"); 109 IV->addIncoming(LB, PreHeaderBB); 110 Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType); 111 Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.indvar_next"); 112 Value *LoopCondition; 113 UB = Builder.CreateSub(UB, Stride, "polly.adjust_ub"); 114 LoopCondition = Builder.CreateICmp(Predicate, IV, UB); 115 LoopCondition->setName("polly.loop_cond"); 116 Builder.CreateCondBr(LoopCondition, HeaderBB, ExitBB); 117 IV->addIncoming(IncrementedIV, HeaderBB); 118 DT.changeImmediateDominator(ExitBB, GuardBB); 119 120 // The loop body should be added here. 121 Builder.SetInsertPoint(HeaderBB->getFirstNonPHI()); 122 return IV; 123 } 124 125 void OMPGenerator::createCallParallelLoopStart( 126 Value *SubFunction, Value *SubfunctionParam, Value *NumberOfThreads, 127 Value *LowerBound, Value *UpperBound, Value *Stride) { 128 Module *M = getModule(); 129 const char *Name = "GOMP_parallel_loop_runtime_start"; 130 Function *F = M->getFunction(Name); 131 132 // If F is not available, declare it. 133 if (!F) { 134 Type *LongTy = getIntPtrTy(); 135 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 136 137 Type *Params[] = { PointerType::getUnqual(FunctionType::get( 138 Builder.getVoidTy(), Builder.getInt8PtrTy(), false)), 139 Builder.getInt8PtrTy(), Builder.getInt32Ty(), LongTy, 140 LongTy, LongTy, }; 141 142 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false); 143 F = Function::Create(Ty, Linkage, Name, M); 144 } 145 146 Value *Args[] = { SubFunction, SubfunctionParam, NumberOfThreads, 147 LowerBound, UpperBound, Stride }; 148 149 Builder.CreateCall(F, Args); 150 } 151 152 Value *OMPGenerator::createCallLoopNext(Value *LowerBoundPtr, 153 Value *UpperBoundPtr) { 154 Module *M = getModule(); 155 const char *Name = "GOMP_loop_runtime_next"; 156 Function *F = M->getFunction(Name); 157 158 // If F is not available, declare it. 159 if (!F) { 160 Type *LongPtrTy = PointerType::getUnqual(getIntPtrTy()); 161 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 162 163 Type *Params[] = { LongPtrTy, LongPtrTy, }; 164 165 FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false); 166 F = Function::Create(Ty, Linkage, Name, M); 167 } 168 169 Value *Args[] = { LowerBoundPtr, UpperBoundPtr, }; 170 171 Value *Return = Builder.CreateCall(F, Args); 172 Return = Builder.CreateICmpNE( 173 Return, Builder.CreateZExt(Builder.getFalse(), Return->getType())); 174 return Return; 175 } 176 177 void OMPGenerator::createCallParallelEnd() { 178 const char *Name = "GOMP_parallel_end"; 179 Module *M = getModule(); 180 Function *F = M->getFunction(Name); 181 182 // If F is not available, declare it. 183 if (!F) { 184 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 185 186 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); 187 F = Function::Create(Ty, Linkage, Name, M); 188 } 189 190 Builder.CreateCall(F); 191 } 192 193 void OMPGenerator::createCallLoopEndNowait() { 194 const char *Name = "GOMP_loop_end_nowait"; 195 Module *M = getModule(); 196 Function *F = M->getFunction(Name); 197 198 // If F is not available, declare it. 199 if (!F) { 200 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 201 202 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); 203 F = Function::Create(Ty, Linkage, Name, M); 204 } 205 206 Builder.CreateCall(F); 207 } 208 209 IntegerType *OMPGenerator::getIntPtrTy() { 210 return P->getAnalysis<DataLayout>().getIntPtrType(Builder.getContext()); 211 } 212 213 Module *OMPGenerator::getModule() { 214 return Builder.GetInsertBlock()->getParent()->getParent(); 215 } 216 217 Function *OMPGenerator::createSubfunctionDefinition() { 218 Module *M = getModule(); 219 Function *F = Builder.GetInsertBlock()->getParent(); 220 std::vector<Type *> Arguments(1, Builder.getInt8PtrTy()); 221 FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false); 222 Function *FN = Function::Create(FT, Function::InternalLinkage, 223 F->getName() + ".omp_subfn", M); 224 // Do not run any polly pass on the new function. 225 P->getAnalysis<polly::ScopDetection>().markFunctionAsInvalid(FN); 226 227 Function::arg_iterator AI = FN->arg_begin(); 228 AI->setName("omp.userContext"); 229 230 return FN; 231 } 232 233 Value *OMPGenerator::loadValuesIntoStruct(SetVector<Value *> &Values) { 234 std::vector<Type *> Members; 235 236 for (unsigned i = 0; i < Values.size(); i++) 237 Members.push_back(Values[i]->getType()); 238 239 StructType *Ty = StructType::get(Builder.getContext(), Members); 240 Value *Struct = Builder.CreateAlloca(Ty, 0, "omp.userContext"); 241 242 for (unsigned i = 0; i < Values.size(); i++) { 243 Value *Address = Builder.CreateStructGEP(Struct, i); 244 Builder.CreateStore(Values[i], Address); 245 } 246 247 return Struct; 248 } 249 250 void OMPGenerator::extractValuesFromStruct(SetVector<Value *> OldValues, 251 Value *Struct, 252 ValueToValueMapTy &Map) { 253 for (unsigned i = 0; i < OldValues.size(); i++) { 254 Value *Address = Builder.CreateStructGEP(Struct, i); 255 Value *NewValue = Builder.CreateLoad(Address); 256 Map.insert(std::make_pair(OldValues[i], NewValue)); 257 } 258 } 259 260 Value *OMPGenerator::createSubfunction(Value *Stride, Value *StructData, 261 SetVector<Value *> Data, 262 ValueToValueMapTy &Map, 263 Function **SubFunction) { 264 Function *FN = createSubfunctionDefinition(); 265 266 BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *LoadIVBoundsBB, 267 *AfterBB; 268 Value *LowerBoundPtr, *UpperBoundPtr, *UserContext, *Ret1, *HasNextSchedule, 269 *LowerBound, *UpperBound, *IV; 270 Type *IntPtrTy = getIntPtrTy(); 271 LLVMContext &Context = FN->getContext(); 272 273 // Store the previous basic block. 274 PrevBB = Builder.GetInsertBlock(); 275 276 // Create basic blocks. 277 HeaderBB = BasicBlock::Create(Context, "omp.setup", FN); 278 ExitBB = BasicBlock::Create(Context, "omp.exit", FN); 279 CheckNextBB = BasicBlock::Create(Context, "omp.checkNext", FN); 280 LoadIVBoundsBB = BasicBlock::Create(Context, "omp.loadIVBounds", FN); 281 282 DominatorTree &DT = P->getAnalysis<DominatorTree>(); 283 DT.addNewBlock(HeaderBB, PrevBB); 284 DT.addNewBlock(ExitBB, HeaderBB); 285 DT.addNewBlock(CheckNextBB, HeaderBB); 286 DT.addNewBlock(LoadIVBoundsBB, HeaderBB); 287 288 // Fill up basic block HeaderBB. 289 Builder.SetInsertPoint(HeaderBB); 290 LowerBoundPtr = Builder.CreateAlloca(IntPtrTy, 0, "omp.lowerBoundPtr"); 291 UpperBoundPtr = Builder.CreateAlloca(IntPtrTy, 0, "omp.upperBoundPtr"); 292 UserContext = Builder.CreateBitCast(FN->arg_begin(), StructData->getType(), 293 "omp.userContext"); 294 295 extractValuesFromStruct(Data, UserContext, Map); 296 Builder.CreateBr(CheckNextBB); 297 298 // Add code to check if another set of iterations will be executed. 299 Builder.SetInsertPoint(CheckNextBB); 300 Ret1 = createCallLoopNext(LowerBoundPtr, UpperBoundPtr); 301 HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(), 302 "omp.hasNextScheduleBlock"); 303 Builder.CreateCondBr(HasNextSchedule, LoadIVBoundsBB, ExitBB); 304 305 // Add code to to load the iv bounds for this set of iterations. 306 Builder.SetInsertPoint(LoadIVBoundsBB); 307 LowerBound = Builder.CreateLoad(LowerBoundPtr, "omp.lowerBound"); 308 UpperBound = Builder.CreateLoad(UpperBoundPtr, "omp.upperBound"); 309 310 // Subtract one as the upper bound provided by openmp is a < comparison 311 // whereas the codegenForSequential function creates a <= comparison. 312 UpperBound = Builder.CreateSub(UpperBound, ConstantInt::get(IntPtrTy, 1), 313 "omp.upperBoundAdjusted"); 314 315 Builder.CreateBr(CheckNextBB); 316 Builder.SetInsertPoint(--Builder.GetInsertPoint()); 317 IV = createLoop(LowerBound, UpperBound, Stride, Builder, P, AfterBB, 318 ICmpInst::ICMP_SLE); 319 320 BasicBlock::iterator LoopBody = Builder.GetInsertPoint(); 321 Builder.SetInsertPoint(AfterBB->begin()); 322 323 // Add code to terminate this openmp subfunction. 324 Builder.SetInsertPoint(ExitBB); 325 createCallLoopEndNowait(); 326 Builder.CreateRetVoid(); 327 328 Builder.SetInsertPoint(LoopBody); 329 *SubFunction = FN; 330 331 return IV; 332 } 333 334 Value *OMPGenerator::createParallelLoop(Value *LowerBound, Value *UpperBound, 335 Value *Stride, 336 SetVector<Value *> &Values, 337 ValueToValueMapTy &Map, 338 BasicBlock::iterator *LoopBody) { 339 Value *Struct, *IV, *SubfunctionParam, *NumberOfThreads; 340 Function *SubFunction; 341 342 Struct = loadValuesIntoStruct(Values); 343 344 BasicBlock::iterator PrevInsertPoint = Builder.GetInsertPoint(); 345 IV = createSubfunction(Stride, Struct, Values, Map, &SubFunction); 346 *LoopBody = Builder.GetInsertPoint(); 347 Builder.SetInsertPoint(PrevInsertPoint); 348 349 // Create call for GOMP_parallel_loop_runtime_start. 350 SubfunctionParam = 351 Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(), "omp_data"); 352 353 NumberOfThreads = Builder.getInt32(0); 354 355 // Add one as the upper bound provided by openmp is a < comparison 356 // whereas the codegenForSequential function creates a <= comparison. 357 UpperBound = 358 Builder.CreateAdd(UpperBound, ConstantInt::get(getIntPtrTy(), 1)); 359 360 createCallParallelLoopStart(SubFunction, SubfunctionParam, NumberOfThreads, 361 LowerBound, UpperBound, Stride); 362 Builder.CreateCall(SubFunction, SubfunctionParam); 363 createCallParallelEnd(); 364 365 return IV; 366 } 367