1 //===------ LoopGenerators.cpp - IR helper to create loops ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains functions to create scalar and OpenMP parallel loops 11 // as LLVM-IR. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "polly/ScopDetection.h" 16 #include "polly/CodeGen/LoopGenerators.h" 17 18 #include "llvm/Module.h" 19 #include "llvm/Analysis/Dominators.h" 20 #include "llvm/Target/TargetData.h" 21 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 22 23 using namespace llvm; 24 using namespace polly; 25 26 Value *polly::createLoop(Value *LB, Value *UB, Value *Stride, 27 IRBuilder<> &Builder, Pass *P, 28 BasicBlock *&AfterBlock) { 29 DominatorTree &DT = P->getAnalysis<DominatorTree>(); 30 Function *F = Builder.GetInsertBlock()->getParent(); 31 LLVMContext &Context = F->getContext(); 32 33 BasicBlock *PreheaderBB = Builder.GetInsertBlock(); 34 BasicBlock *HeaderBB = BasicBlock::Create(Context, "polly.loop_header", F); 35 BasicBlock *BodyBB = BasicBlock::Create(Context, "polly.loop_body", F); 36 BasicBlock *AfterBB = SplitBlock(PreheaderBB, Builder.GetInsertPoint()++, P); 37 AfterBB->setName("polly.loop_after"); 38 39 PreheaderBB->getTerminator()->setSuccessor(0, HeaderBB); 40 DT.addNewBlock(HeaderBB, PreheaderBB); 41 42 Builder.SetInsertPoint(HeaderBB); 43 44 // Use the type of upper and lower bound. 45 assert(LB->getType() == UB->getType() 46 && "Different types for upper and lower bound."); 47 48 IntegerType *LoopIVType = dyn_cast<IntegerType>(UB->getType()); 49 assert(LoopIVType && "UB is not integer?"); 50 51 // IV 52 PHINode *IV = Builder.CreatePHI(LoopIVType, 2, "polly.loopiv"); 53 IV->addIncoming(LB, PreheaderBB); 54 55 Stride = Builder.CreateZExtOrBitCast(Stride, LoopIVType); 56 Value *IncrementedIV = Builder.CreateNSWAdd(IV, Stride, "polly.next_loopiv"); 57 58 // Exit condition. 59 Value *CMP; 60 CMP = Builder.CreateICmpSLE(IV, UB); 61 62 Builder.CreateCondBr(CMP, BodyBB, AfterBB); 63 DT.addNewBlock(BodyBB, HeaderBB); 64 65 Builder.SetInsertPoint(BodyBB); 66 Builder.CreateBr(HeaderBB); 67 IV->addIncoming(IncrementedIV, BodyBB); 68 DT.changeImmediateDominator(AfterBB, HeaderBB); 69 70 Builder.SetInsertPoint(BodyBB->begin()); 71 AfterBlock = AfterBB; 72 73 return IV; 74 } 75 76 void OMPGenerator::createCallParallelLoopStart(Value *SubFunction, 77 Value *SubfunctionParam, 78 Value *NumberOfThreads, 79 Value *LowerBound, 80 Value *UpperBound, 81 Value *Stride) { 82 Module *M = getModule(); 83 const char *Name = "GOMP_parallel_loop_runtime_start"; 84 Function *F = M->getFunction(Name); 85 86 // If F is not available, declare it. 87 if (!F) { 88 Type *LongTy = getIntPtrTy(); 89 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 90 91 Type *Params[] = { 92 PointerType::getUnqual(FunctionType::get(Builder.getVoidTy(), 93 Builder.getInt8PtrTy(), 94 false)), 95 Builder.getInt8PtrTy(), 96 Builder.getInt32Ty(), 97 LongTy, 98 LongTy, 99 LongTy, 100 }; 101 102 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false); 103 F = Function::Create(Ty, Linkage, Name, M); 104 } 105 106 Value *Args[] = { 107 SubFunction, 108 SubfunctionParam, 109 NumberOfThreads, 110 LowerBound, 111 UpperBound, 112 Stride, 113 }; 114 115 Builder.CreateCall(F, Args); 116 } 117 118 Value *OMPGenerator::createCallLoopNext(Value *LowerBoundPtr, 119 Value *UpperBoundPtr) { 120 Module *M = getModule(); 121 const char *Name = "GOMP_loop_runtime_next"; 122 Function *F = M->getFunction(Name); 123 124 // If F is not available, declare it. 125 if (!F) { 126 Type *LongPtrTy = PointerType::getUnqual(getIntPtrTy()); 127 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 128 129 Type *Params[] = { 130 LongPtrTy, 131 LongPtrTy, 132 }; 133 134 FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false); 135 F = Function::Create(Ty, Linkage, Name, M); 136 } 137 138 Value *Args[] = { 139 LowerBoundPtr, 140 UpperBoundPtr, 141 }; 142 143 Value *Return = Builder.CreateCall(F, Args); 144 Return = Builder.CreateICmpNE(Return, Builder.CreateZExt(Builder.getFalse(), 145 Return->getType())); 146 return Return; 147 } 148 149 void OMPGenerator::createCallParallelEnd() { 150 const char *Name = "GOMP_parallel_end"; 151 Module *M = getModule(); 152 Function *F = M->getFunction(Name); 153 154 // If F is not available, declare it. 155 if (!F) { 156 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 157 158 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); 159 F = Function::Create(Ty, Linkage, Name, M); 160 } 161 162 Builder.CreateCall(F); 163 } 164 165 void OMPGenerator::createCallLoopEndNowait() { 166 const char *Name = "GOMP_loop_end_nowait"; 167 Module *M = getModule(); 168 Function *F = M->getFunction(Name); 169 170 // If F is not available, declare it. 171 if (!F) { 172 GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage; 173 174 FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), false); 175 F = Function::Create(Ty, Linkage, Name, M); 176 } 177 178 Builder.CreateCall(F); 179 } 180 181 IntegerType *OMPGenerator::getIntPtrTy() { 182 return P->getAnalysis<TargetData>().getIntPtrType(Builder.getContext()); 183 } 184 185 Module *OMPGenerator::getModule() { 186 return Builder.GetInsertBlock()->getParent()->getParent(); 187 } 188 189 Function *OMPGenerator::createSubfunctionDefinition() { 190 Module *M = getModule(); 191 Function *F = Builder.GetInsertBlock()->getParent(); 192 std::vector<Type*> Arguments(1, Builder.getInt8PtrTy()); 193 FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false); 194 Function *FN = Function::Create(FT, Function::InternalLinkage, 195 F->getName() + ".omp_subfn", M); 196 // Do not run any polly pass on the new function. 197 P->getAnalysis<polly::ScopDetection>().markFunctionAsInvalid(FN); 198 199 Function::arg_iterator AI = FN->arg_begin(); 200 AI->setName("omp.userContext"); 201 202 return FN; 203 } 204 205 Value *OMPGenerator::loadValuesIntoStruct(SetVector<Value*> &Values) { 206 std::vector<Type*> Members; 207 208 for (unsigned i = 0; i < Values.size(); i++) 209 Members.push_back(Values[i]->getType()); 210 211 StructType *Ty = StructType::get(Builder.getContext(), Members); 212 Value *Struct = Builder.CreateAlloca(Ty, 0, "omp.userContext"); 213 214 for (unsigned i = 0; i < Values.size(); i++) { 215 Value *Address = Builder.CreateStructGEP(Struct, i); 216 Builder.CreateStore(Values[i], Address); 217 } 218 219 return Struct; 220 } 221 222 void OMPGenerator::extractValuesFromStruct(SetVector<Value*> OldValues, 223 Value *Struct, 224 ValueToValueMapTy &Map) { 225 for (unsigned i = 0; i < OldValues.size(); i++) { 226 Value *Address = Builder.CreateStructGEP(Struct, i); 227 Value *NewValue = Builder.CreateLoad(Address); 228 Map.insert(std::make_pair<Value*, Value*>(OldValues[i], NewValue)); 229 } 230 } 231 232 Value *OMPGenerator::createSubfunction(Value *Stride, Value *StructData, 233 SetVector<Value*> Data, 234 ValueToValueMapTy &Map, 235 Function **SubFunction) { 236 Function *FN = createSubfunctionDefinition(); 237 238 BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *LoadIVBoundsBB, 239 *AfterBB; 240 Value *LowerBoundPtr, *UpperBoundPtr, *UserContext, *Ret1, *HasNextSchedule, 241 *LowerBound, *UpperBound, *IV; 242 Type *IntPtrTy = getIntPtrTy(); 243 LLVMContext &Context = FN->getContext(); 244 245 // Store the previous basic block. 246 PrevBB = Builder.GetInsertBlock(); 247 248 // Create basic blocks. 249 HeaderBB = BasicBlock::Create(Context, "omp.setup", FN); 250 ExitBB = BasicBlock::Create(Context, "omp.exit", FN); 251 CheckNextBB = BasicBlock::Create(Context, "omp.checkNext", FN); 252 LoadIVBoundsBB = BasicBlock::Create(Context, "omp.loadIVBounds", FN); 253 254 DominatorTree &DT = P->getAnalysis<DominatorTree>(); 255 DT.addNewBlock(HeaderBB, PrevBB); 256 DT.addNewBlock(ExitBB, HeaderBB); 257 DT.addNewBlock(CheckNextBB, HeaderBB); 258 DT.addNewBlock(LoadIVBoundsBB, HeaderBB); 259 260 // Fill up basic block HeaderBB. 261 Builder.SetInsertPoint(HeaderBB); 262 LowerBoundPtr = Builder.CreateAlloca(IntPtrTy, 0, "omp.lowerBoundPtr"); 263 UpperBoundPtr = Builder.CreateAlloca(IntPtrTy, 0, "omp.upperBoundPtr"); 264 UserContext = Builder.CreateBitCast(FN->arg_begin(), StructData->getType(), 265 "omp.userContext"); 266 267 extractValuesFromStruct(Data, UserContext, Map); 268 Builder.CreateBr(CheckNextBB); 269 270 // Add code to check if another set of iterations will be executed. 271 Builder.SetInsertPoint(CheckNextBB); 272 Ret1 = createCallLoopNext(LowerBoundPtr, UpperBoundPtr); 273 HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(), 274 "omp.hasNextScheduleBlock"); 275 Builder.CreateCondBr(HasNextSchedule, LoadIVBoundsBB, ExitBB); 276 277 // Add code to to load the iv bounds for this set of iterations. 278 Builder.SetInsertPoint(LoadIVBoundsBB); 279 LowerBound = Builder.CreateLoad(LowerBoundPtr, "omp.lowerBound"); 280 UpperBound = Builder.CreateLoad(UpperBoundPtr, "omp.upperBound"); 281 282 // Subtract one as the upper bound provided by openmp is a < comparison 283 // whereas the codegenForSequential function creates a <= comparison. 284 UpperBound = Builder.CreateSub(UpperBound, ConstantInt::get(IntPtrTy, 1), 285 "omp.upperBoundAdjusted"); 286 287 Builder.CreateBr(CheckNextBB); 288 Builder.SetInsertPoint(--Builder.GetInsertPoint()); 289 IV = createLoop(LowerBound, UpperBound, Stride, Builder, P, AfterBB); 290 291 BasicBlock::iterator LoopBody = Builder.GetInsertPoint(); 292 Builder.SetInsertPoint(AfterBB->begin()); 293 294 // Add code to terminate this openmp subfunction. 295 Builder.SetInsertPoint(ExitBB); 296 createCallLoopEndNowait(); 297 Builder.CreateRetVoid(); 298 299 Builder.SetInsertPoint(LoopBody); 300 *SubFunction = FN; 301 302 return IV; 303 } 304 305 Value *OMPGenerator::createParallelLoop(Value *LowerBound, Value *UpperBound, 306 Value *Stride, 307 SetVector<Value*> &Values, 308 ValueToValueMapTy &Map, 309 BasicBlock::iterator *LoopBody) { 310 Value *Struct, *IV, *SubfunctionParam, *NumberOfThreads; 311 Function *SubFunction; 312 313 Struct = loadValuesIntoStruct(Values); 314 315 BasicBlock::iterator PrevInsertPoint = Builder.GetInsertPoint(); 316 IV = createSubfunction(Stride, Struct, Values, Map, &SubFunction); 317 *LoopBody = Builder.GetInsertPoint(); 318 Builder.SetInsertPoint(PrevInsertPoint); 319 320 // Create call for GOMP_parallel_loop_runtime_start. 321 SubfunctionParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(), 322 "omp_data"); 323 324 NumberOfThreads = Builder.getInt32(0); 325 326 // Add one as the upper bound provided by openmp is a < comparison 327 // whereas the codegenForSequential function creates a <= comparison. 328 UpperBound = Builder.CreateAdd(UpperBound, 329 ConstantInt::get(getIntPtrTy(), 1)); 330 331 createCallParallelLoopStart(SubFunction, SubfunctionParam, NumberOfThreads, 332 LowerBound, UpperBound, Stride); 333 Builder.CreateCall(SubFunction, SubfunctionParam); 334 createCallParallelEnd(); 335 336 return IV; 337 } 338